FlowPicRefresh/main.py

import datetime
from utils.files import create_dir
import pandas as pd
import numpy as np
from config import *
from utils.dataframe import *
from sklearn.preprocessing import QuantileTransformer
from PIL import Image
from loguru import logger


def input_csv_to_df(file_path: str) -> pd.DataFrame:
    # 使用pandas的read_csv函数读取CSV文件
    df = pd.read_csv(file_path)
    return df


def averaging_df(df: pd.DataFrame):
    numeric_features = df.dtypes[df.dtypes != 'object'].index
    scaler = QuantileTransformer()
    df[numeric_features] = scaler.fit_transform(df[numeric_features])
    df[numeric_features] = df[numeric_features].apply(
        lambda x: (x * 255))
    return df


def clean_data(df: pd.DataFrame) -> pd.DataFrame:
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.dropna(axis=0)  # 删除具有NaN值的行
    df = get_ddos_df(df)
    df = drop_columns(df, UNIQUE_COLUMNS)
    # df = drop_unique_columns(df)
    df = df.iloc[:, 7:]
    return df


def process(df: pd.DataFrame, label: str = None):
    df = clean_data(df)
    df_clean_data = averaging_df(df)
    create_dir(IMG_SAVE_PATH)
    generate_and_save(df_clean_data)


def generate_and_save(df_clean_data: pd.DataFrame):
    row_length = len(df_clean_data.columns)
    col_length = len(df_clean_data)
    count = 0
    ims = []
    saves_count = 0
    for i in range(0, col_length):
        count = count + 1
        if count <= (row_length * 3):
            im = df_clean_data.iloc[i].values
            ims = np.append(ims, im)
        else:
            saves_count = saves_count + 1
            ims = np.array(ims).reshape(row_length, row_length, 3)
            if saves_count % 100 == 0:
                logger.info(f"Saving {saves_count} images")
            if saves_count == 1:
                logger.info(f"Shape: {ims.shape}")
            array = np.array(ims, dtype=np.uint8)
            new_image = Image.fromarray(array)
            new_image.save(f"{IMG_SAVE_PATH}/{saves_count}.png")
            count = 0
            ims = []


if __name__ == '__main__':
    df = input_csv_to_df(CSV_PATH)
    process(df)