import datetime from utils.files import create_dir import pandas as pd import numpy as np from config import * from utils.dataframe import * from sklearn.preprocessing import QuantileTransformer from PIL import Image from loguru import logger def input_csv_to_df(file_path: str) -> pd.DataFrame: # 使用pandas的read_csv函数读取CSV文件 df = pd.read_csv(file_path) return df def averaging_df(df: pd.DataFrame): numeric_features = df.dtypes[df.dtypes != 'object'].index scaler = QuantileTransformer() df[numeric_features] = scaler.fit_transform(df[numeric_features]) df[numeric_features] = df[numeric_features].apply( lambda x: (x * 255)) return df def clean_data(df: pd.DataFrame) -> pd.DataFrame: df = df.replace([np.inf, -np.inf], np.nan) df = df.dropna(axis=0) # 删除具有NaN值的行 df = get_ddos_df(df) df = drop_columns(df, UNIQUE_COLUMNS) # df = drop_unique_columns(df) df = df.iloc[:, 7:] return df def process(df: pd.DataFrame, label: str = None): df = clean_data(df) df_clean_data = averaging_df(df) create_dir(IMG_SAVE_PATH) generate_and_save(df_clean_data) def generate_and_save(df_clean_data: pd.DataFrame): row_length = len(df_clean_data.columns) col_length = len(df_clean_data) count = 0 ims = [] saves_count = 0 for i in range(0, col_length): count = count + 1 if count <= (row_length * 3): im = df_clean_data.iloc[i].values ims = np.append(ims, im) else: saves_count = saves_count + 1 ims = np.array(ims).reshape(row_length, row_length, 3) if saves_count % 100 == 0: logger.info(f"Saving {saves_count} images") if saves_count == 1: logger.info(f"Shape: {ims.shape}") array = np.array(ims, dtype=np.uint8) new_image = Image.fromarray(array) new_image.save(f"{IMG_SAVE_PATH}/{saves_count}.png") count = 0 ims = [] if __name__ == '__main__': df = input_csv_to_df(CSV_PATH) process(df)