You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
72 lines
2.1 KiB
Python
72 lines
2.1 KiB
Python
import datetime
|
|
from utils.files import create_dir
|
|
import pandas as pd
|
|
import numpy as np
|
|
from config import *
|
|
from utils.dataframe import *
|
|
from sklearn.preprocessing import QuantileTransformer
|
|
from PIL import Image
|
|
from loguru import logger
|
|
|
|
|
|
def input_csv_to_df(file_path: str) -> pd.DataFrame:
|
|
# 使用pandas的read_csv函数读取CSV文件
|
|
df = pd.read_csv(file_path)
|
|
return df
|
|
|
|
|
|
def averaging_df(df: pd.DataFrame):
|
|
numeric_features = df.dtypes[df.dtypes != 'object'].index
|
|
scaler = QuantileTransformer()
|
|
df[numeric_features] = scaler.fit_transform(df[numeric_features])
|
|
df[numeric_features] = df[numeric_features].apply(
|
|
lambda x: (x * 255))
|
|
return df
|
|
|
|
|
|
def clean_data(df: pd.DataFrame) -> pd.DataFrame:
|
|
df = df.replace([np.inf, -np.inf], np.nan)
|
|
df = df.dropna(axis=0) # 删除具有NaN值的行
|
|
df = get_ddos_df(df)
|
|
df = drop_columns(df, UNIQUE_COLUMNS)
|
|
# df = drop_unique_columns(df)
|
|
df = df.iloc[:, 7:]
|
|
return df
|
|
|
|
|
|
def process(df: pd.DataFrame, label: str = None):
|
|
df = clean_data(df)
|
|
df_clean_data = averaging_df(df)
|
|
create_dir(IMG_SAVE_PATH)
|
|
generate_and_save(df_clean_data)
|
|
|
|
|
|
def generate_and_save(df_clean_data: pd.DataFrame):
|
|
row_length = len(df_clean_data.columns)
|
|
col_length = len(df_clean_data)
|
|
count = 0
|
|
ims = []
|
|
saves_count = 0
|
|
for i in range(0, col_length):
|
|
count = count + 1
|
|
if count <= (row_length * 3):
|
|
im = df_clean_data.iloc[i].values
|
|
ims = np.append(ims, im)
|
|
else:
|
|
saves_count = saves_count + 1
|
|
ims = np.array(ims).reshape(row_length, row_length, 3)
|
|
if saves_count % 100 == 0:
|
|
logger.info(f"Saving {saves_count} images")
|
|
if saves_count == 1:
|
|
logger.info(f"Shape: {ims.shape}")
|
|
array = np.array(ims, dtype=np.uint8)
|
|
new_image = Image.fromarray(array)
|
|
new_image.save(f"{IMG_SAVE_PATH}/{saves_count}.png")
|
|
count = 0
|
|
ims = []
|
|
|
|
|
|
if __name__ == '__main__':
|
|
df = input_csv_to_df(CSV_PATH)
|
|
process(df)
|