import datetime from utils.files import create_dir import pandas as pd import numpy as np from config import * import matplotlib.pyplot as plt from utils.dataframe import * from sklearn.preprocessing import QuantileTransformer from PIL import Image def is_in_bypass_list(column_name: str, bypass_list: tuple) -> bool: for bypass in bypass_list: if bypass in column_name: return True return False def input_csv_to_df(file_path: str) -> pd.DataFrame: # 使用pandas的read_csv函数读取CSV文件 df = pd.read_csv(file_path) return df def averaging_df(df: pd.DataFrame, column_num: int = None): numeric_columns = df.select_dtypes(include=[np.number]).columns max_values = df.max() if column_num is None: column_num = 0 for numeric_column in numeric_columns: if is_in_bypass_list(numeric_column, BYPASS_COLUMNS): continue column_num = column_num + 1 for numeric_column in numeric_columns: if is_in_bypass_list(numeric_column, BYPASS_COLUMNS): continue df[numeric_column] = df[numeric_column] / max_values[numeric_column] * column_num # fix nan df[numeric_column] = df[numeric_column].fillna(0) return df, column_num def iter_df_to_point(df: pd.DataFrame, column_num: int = None): size = 0 points = [] for index, row in df.iterrows(): x_values = row.values[2:] y_values = np.linspace(0, len(x_values) - 1, len(x_values)) size = size + 1 points.append({index: (x_values, y_values)}) return points def generate_one_plot(x_values, y_values, x_y_size: int) -> plt: yedges = xedges = np.linspace(0, x_y_size, x_y_size) H = np.zeros((x_y_size, x_y_size)) plt.pcolormesh(xedges, yedges, H) # pcolormeshp()函数用于创建具有非规则矩形网格的伪彩色图 plt.scatter(x_values, y_values, marker=',', s=1) plt.xlim(0, x_y_size) plt.ylim(0, x_y_size) # 326 plt.ylabel('Attributes') plt.xlabel('Attribute values') # plt.set_cmap('gnuplot') plt.set_cmap('BuPu') # plt.set_cmap('Greys') plt.axis('on') return plt # plt.savefig(os.path.join(figure_save_path, qwe + ".png"), bbox_inches='tight', pad_inches=0) # 分别命名图片 def save_plt(plt: plt, base_path: str, num: int): plt.savefig(f"{base_path}/{num}.png", bbox_inches='tight', pad_inches=0) from multiprocessing import Pool, cpu_count def process(df: pd.DataFrame): df, size = averaging_df(df) points = iter_df_to_point(df, size) base_path = f'./saves/{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}' create_dir(base_path) pool = Pool(cpu_count()) results = [] for point_dict in points: num = list(point_dict.keys())[0] point = point_dict[num] result = pool.apply_async(generate_and_save, args=(base_path, point, size, num)) results.append(result) pool.close() pool.join() def generate_and_save(base_path: str, point: tuple, size: int, calculate): plt = generate_one_plot(point[0], point[1], size) save_plt(plt, base_path, calculate) def process_single_threaded(df: pd.DataFrame): df, size = averaging_df(df) points = iter_df_to_point(df, size) base_path = IMG_SAVE_PATH create_dir(base_path) for point_dict in points: num = list(point_dict.keys())[0] point = point_dict[num] size = len(point[0]) generate_and_save(base_path, point, size, num) # plt.show() # return df if __name__ == '__main__': df = input_csv_to_df(CSV_PATH) # process(df) # process_single_threaded(df) df = df.replace([np.inf, -np.inf], np.nan) df = df.dropna(axis=0) # 删除具有NaN值的行 df = get_ddos_df(df) df = drop_columns(df, UNIQUE_COLUMNS) # df = drop_unique_columns(df) df = df.iloc[:, 7:] numeric_features = df.dtypes[df.dtypes != 'object'].index scaler = QuantileTransformer() df[numeric_features] = scaler.fit_transform(df[numeric_features]) # In[19]: # Multiply the feature values by 255 to transform them into the scale of [0,255] df[numeric_features] = df[numeric_features].apply( lambda x: (x * 255)) df_clean_data = df row_length = len(df_clean_data.columns) col_length = len(df_clean_data) # Transform all features into the scale of [0,1] count = 0 ims = [] for i in range(0, col_length): count = count + 1 if count <= (row_length*3): im = df_clean_data.iloc[i].values ims = np.append(ims, im) else: ims = np.array(ims).reshape(row_length, row_length, 3) array = np.array(ims, dtype=np.uint8) new_image = Image.fromarray(array) new_image.save(IMG_SAVE_PATH + str(i) + '.png') count = 0 ims = [] print(df)