You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
2.7 KiB
Python

import datetime
from utils.files import create_dir
import pandas as pd
import numpy as np
from config import *
from utils.dataframe import *
from sklearn.preprocessing import QuantileTransformer
from PIL import Image
from loguru import logger
def input_csv_to_df(file_path: str) -> pd.DataFrame:
# 使用pandas的read_csv函数读取CSV文件
df = pd.read_csv(file_path)
return df
def averaging_df(df: pd.DataFrame):
logger.info(f"Total: {len(df)} averaging...")
numeric_features = df.dtypes[df.dtypes != 'object'].index
scaler = QuantileTransformer()
df[numeric_features] = scaler.fit_transform(df[numeric_features])
df[numeric_features] = df[numeric_features].apply(
lambda x: (x * 255))
return df
def clean_data(df: pd.DataFrame) -> pd.DataFrame:
df = df.replace([np.inf, -np.inf], np.nan)
df = df.dropna(axis=0) # 删除具有NaN值的行
# df = get_ddos_df(df)
df = drop_columns(df, UNIQUE_COLUMNS)
# df = drop_unique_columns(df)
df = df.iloc[:, 7:]
return df
def slice_df(df: pd.DataFrame):
logger.info(f"Total: {len(df)} slicing...")
ddos_df = select_label_rows(df, 'DDoS')
normal_df = select_label_rows(df, 'BENIGN')
return ddos_df, normal_df
def process(df: pd.DataFrame, label: str = None):
ddos_df, normal_df = slice_df(df)
ddos_df = averaging_df(clean_data(ddos_df))
normal_df = averaging_df(clean_data(normal_df))
logger.info(f"DDoS: {len(ddos_df)}, Normal: {len(normal_df)}")
ddos_save_path = f"{IMG_SAVE_PATH}/ddos"
benign_save_path = f"{IMG_SAVE_PATH}/benign"
create_dir(ddos_save_path)
generate_and_save(ddos_df, ddos_save_path)
create_dir(benign_save_path)
generate_and_save(normal_df, benign_save_path)
def generate_and_save(df_clean_data: pd.DataFrame, save_path: str = IMG_SAVE_PATH):
row_length = len(df_clean_data.columns)
col_length = len(df_clean_data)
count = 0
ims = []
saves_count = 0
for i in range(0, col_length):
count = count + 1
if count <= (row_length * 3):
im = df_clean_data.iloc[i].values
ims = np.append(ims, im)
else:
saves_count = saves_count + 1
ims = np.array(ims).reshape(row_length, row_length, 3)
if saves_count % 100 == 0:
logger.info(f"Saving {saves_count} images")
if saves_count == 1:
logger.info(f"Shape: {ims.shape}")
array = np.array(ims, dtype=np.uint8)
new_image = Image.fromarray(array)
new_image.save(f"{save_path}/{saves_count}.png")
count = 0
ims = []
if __name__ == '__main__':
df = input_csv_to_df(CSV_PATH)
process(df)