Initial stage 2: color pic

2 years ago · 847780028c
parent 48fec95dac
commit 847780028c
42 changed files with 826254 additions and 5 deletions
--- a/.gitignore
+++ b/.gitignore
@ -9,6 +9,7 @@ saves/
 *.so
 _dataset/MachineLearningCVE/
 _dataset/TrafficLabelling_/
+_dataset/pcap/
 # Distribution / packaging
 .Python
 build/
--- a/_reference/CNN_Transfer_Refresh/1-Data_pre-processing_CAN.py
+++ b/_reference/CNN_Transfer_Refresh/1-Data_pre-processing_CAN.py
@ -0,0 +1,350 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# # A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles 
+# This is the code for the paper entitled "**A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles**" accepted in IEEE International Conference on Communications (IEEE ICC).  
+# Authors: Li Yang (lyang339@uwo.ca) and Abdallah Shami (Abdallah.Shami@uwo.ca)  
+# Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University
+# 
+# **Notebook 1: Data pre-processing**  
+# Procedures:  
+# &nbsp; 1): Read the dataset  
+# &nbsp; 2): Transform the tabular data into images  
+# &nbsp; 3): Display the transformed images  
+# &nbsp; 4): Split the training and test set  
+
+# ## Import libraries
+
+# In[14]:
+
+
+import numpy as np
+import pandas as pd
+import os
+import cv2
+import math
+import random
+import matplotlib.pyplot as plt
+import shutil
+from sklearn.preprocessing import QuantileTransformer
+from PIL import Image
+import warnings
+warnings.filterwarnings("ignore")
+
+
+# ## Read the Car-Hacking/CAN-Intrusion dataset
+# The complete Car-Hacking dataset is publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset  
+# In this repository, due to the file size limit of GitHub, we use the 5% subset.
+
+# In[15]:
+
+
+#Read dataset
+df=pd.read_csv('data/Car_Hacking_5%.csv')
+
+
+# In[16]:
+
+
+df
+
+
+# In[17]:
+
+
+# The labels of the dataset. "R" indicates normal patterns, and there are four types of attack (DoS, fuzzy. gear spoofing, and RPM spoofing zttacks)
+df.Label.value_counts()
+
+
+# ## Data Transformation
+# Convert tabular data to images
+# Procedures:
+# 1. Use quantile transform to transform the original data samples into the scale of [0,255], representing pixel values
+# 2. Generate images for each category (Normal, DoS, Fuzzy, Gear, RPM), each image consists of 27 data samples with 9 features. Thus, the size of each image is 9*9*3, length 9, width 9, and 3 color channels (RGB).
+
+# In[18]:
+
+
+# Transform all features into the scale of [0,1]
+numeric_features = df.dtypes[df.dtypes != 'object'].index
+scaler = QuantileTransformer() 
+df[numeric_features] = scaler.fit_transform(df[numeric_features])
+
+
+# In[19]:
+
+
+# Multiply the feature values by 255 to transform them into the scale of [0,255]
+df[numeric_features] = df[numeric_features].apply(
+    lambda x: (x*255))
+
+
+# In[20]:
+
+
+df.describe()
+
+
+# All features are in the same scale of [0,255]
+
+# ### Generate images for each class
+
+# In[21]:
+
+
+df0=df[df['Label']=='R'].drop(['Label'],axis=1)
+df1=df[df['Label']=='RPM'].drop(['Label'],axis=1)
+df2=df[df['Label']=='gear'].drop(['Label'],axis=1)
+df3=df[df['Label']=='DoS'].drop(['Label'],axis=1)
+df4=df[df['Label']=='Fuzzy'].drop(['Label'],axis=1)
+
+
+# In[22]:
+
+
+# Generate 9*9 color images for class 0 (Normal)
+count=0
+ims = []
+
+image_path = "train/0/"
+os.makedirs(image_path)
+
+for i in range(0, 2):
+    count=count+1
+    if count<=27: 
+        im=df0.iloc[i].values
+        ims=np.append(ims,im)
+    else:
+        print(ims)
+        ims=np.array(ims).reshape(9,9,3)
+        print(ims)
+        array = np.array(ims, dtype=np.uint8)
+        new_image = Image.fromarray(array)
+        new_image.save(image_path+str(i)+'.png')
+        count=0
+        ims = []
+
+
+# In[23]:
+
+
+# Generate 9*9 color images for class 1 (RPM spoofing)
+count=0
+ims = []
+
+image_path = "train/1/"
+os.makedirs(image_path)
+
+for i in range(0, len(df1)):  
+    count=count+1
+    if count<=27: 
+        im=df1.iloc[i].values
+        ims=np.append(ims,im)
+    else:
+        ims=np.array(ims).reshape(9,9,3)
+        array = np.array(ims, dtype=np.uint8)
+        new_image = Image.fromarray(array)
+        new_image.save(image_path+str(i)+'.png')
+        count=0
+        ims = []
+
+
+# In[24]:
+
+
+# Generate 9*9 color images for class 2 (Gear spoofing)
+count=0
+ims = []
+
+image_path = "train/2/"
+os.makedirs(image_path)
+
+for i in range(0, len(df2)):  
+    count=count+1
+    if count<=27: 
+        im=df2.iloc[i].values
+        ims=np.append(ims,im)
+    else:
+        ims
+        ims=np.array(ims).reshape(9,9,3)
+        ims
+        array = np.array(ims, dtype=np.uint8)
+        new_image = Image.fromarray(array)
+        new_image.save(image_path+str(i)+'.png')
+        count=0
+        ims = []
+
+
+# In[25]:
+
+
+# Generate 9*9 color images for class 3 (DoS attack)
+count=0
+ims = []
+
+image_path = "train/3/"
+os.makedirs(image_path)
+
+
+for i in range(0, len(df3)):  
+    count=count+1
+    if count<=27: 
+        im=df3.iloc[i].values
+        ims=np.append(ims,im)
+    else:
+        ims=np.array(ims).reshape(9,9,3)
+        array = np.array(ims, dtype=np.uint8)
+        new_image = Image.fromarray(array)
+        new_image.save(image_path+str(i)+'.png')
+        count=0
+        ims = []
+
+
+# In[26]:
+
+
+# Generate 9*9 color images for class 4 (Fuzzy attack)
+count=0
+ims = []
+
+image_path = "train/4/"
+os.makedirs(image_path)
+
+
+for i in range(0, len(df4)):  
+    count=count+1
+    if count<=27: 
+        im=df4.iloc[i].values
+        ims=np.append(ims,im)
+    else:
+        ims=np.array(ims).reshape(9,9,3)
+        array = np.array(ims, dtype=np.uint8)
+        new_image = Image.fromarray(array)
+        new_image.save(image_path+str(i)+'.png')
+        count=0
+        ims = []
+
+
+# ## Split the training and test set 
+
+# In[27]:
+
+
+# Create folders to store images
+Train_Dir='./train/'
+Val_Dir='./test/'
+allimgs=[]
+for subdir in os.listdir(Train_Dir):
+    for filename in os.listdir(os.path.join(Train_Dir,subdir)):
+        filepath=os.path.join(Train_Dir,subdir,filename)
+        allimgs.append(filepath)
+print(len(allimgs)) # Print the total number of images
+
+
+# In[28]:
+
+
+#split a test set from the dataset, train/test size = 80%/20%
+Numbers=len(allimgs)//5 	#size of test set (20%)
+
+def mymovefile(srcfile,dstfile):
+    if not os.path.isfile(srcfile):
+        print ("%s not exist!"%(srcfile))
+    else:
+        fpath,fname=os.path.split(dstfile)    
+        if not os.path.exists(fpath):
+            os.makedirs(fpath)               
+        shutil.move(srcfile,dstfile)          
+        #print ("move %s -> %s"%(srcfile,dstfile))
+
+
+# In[29]:
+
+
+# The size of test set
+Numbers
+
+
+# In[30]:
+
+
+# Create the test set
+val_imgs=random.sample(allimgs,Numbers)
+for img in val_imgs:
+    dest_path=img.replace(Train_Dir,Val_Dir)
+    mymovefile(img,dest_path)
+print('Finish creating test set')
+
+
+# In[31]:
+
+
+#resize the images 224*224 for better CNN training
+def get_224(folder,dstdir):
+    imgfilepaths=[]
+    for root,dirs,imgs in os.walk(folder):
+        for thisimg in imgs:
+            thisimg_path=os.path.join(root,thisimg)
+            imgfilepaths.append(thisimg_path)
+    for thisimg_path in imgfilepaths:
+        dir_name,filename=os.path.split(thisimg_path)
+        dir_name=dir_name.replace(folder,dstdir)
+        new_file_path=os.path.join(dir_name,filename)
+        if not os.path.exists(dir_name):
+            os.makedirs(dir_name)
+        img=cv2.imread(thisimg_path)
+        img=cv2.resize(img,(224,224))
+        cv2.imwrite(new_file_path,img)
+    print('Finish resizing'.format(folder=folder))
+
+
+# In[32]:
+
+
+DATA_DIR_224='./train_224/'
+get_224(folder='./train/',dstdir=DATA_DIR_224)
+
+
+# In[33]:
+
+
+DATA_DIR2_224='./test_224/'
+get_224(folder='./test/',dstdir=DATA_DIR2_224)
+
+
+# ### Display samples for each category
+
+# In[34]:
+
+
+# Read the images for each category, the file name may vary (27.png, 83.png...)
+img1 = Image.open('./train_224/0/27.png')
+img2 = Image.open('./train_224/1/83.png')
+img3 = Image.open('./train_224/2/27.png')
+img4 = Image.open('./train_224/3/27.png')
+img5 = Image.open('./train_224/4/27.png')
+
+plt.figure(figsize=(10, 10)) 
+plt.subplot(1,5,1)
+plt.imshow(img1)
+plt.title("Normal")
+plt.subplot(1,5,2)
+plt.imshow(img2)
+plt.title("RPM Spoofing")
+plt.subplot(1,5,3)
+plt.imshow(img3)
+plt.title("Gear Spoofing")
+plt.subplot(1,5,4)
+plt.imshow(img4)
+plt.title("DoS Attack")
+plt.subplot(1,5,5)
+plt.imshow(img5)
+plt.title("Fuzzy Attack")
+plt.show()  # display it
+
+
+# In[ ]:
+
+
+
+
--- a/_reference/FlowPic/LICENSE
+++ b/_reference/FlowPic/LICENSE
--- a/_reference/FlowPic/README.md
+++ b/_reference/FlowPic/README.md
--- a/_reference/FlowPic/TrafficParser/datasets_generator.py
+++ b/_reference/FlowPic/TrafficParser/datasets_generator.py
--- a/_reference/FlowPic/TrafficParser/generic_parser.py
+++ b/_reference/FlowPic/TrafficParser/generic_parser.py
--- a/_reference/FlowPic/TrafficParser/sessions_plotter.py
+++ b/_reference/FlowPic/TrafficParser/sessions_plotter.py
--- a/_reference/FlowPic/TrafficParser/traffic_csv_converter.py
+++ b/_reference/FlowPic/TrafficParser/traffic_csv_converter.py
--- a/_reference/FlowPic/TrafficParser/traffic_csv_merger.py
+++ b/_reference/FlowPic/TrafficParser/traffic_csv_merger.py
--- a/_reference/FlowPic/dataset/sessions_plotter.py
+++ b/_reference/FlowPic/dataset/sessions_plotter.py
--- a/_reference/FlowPic/overlap_multiclass_reg_non_bn.ipynb
+++ b/_reference/FlowPic/overlap_multiclass_reg_non_bn.ipynb
--- a/_reference/FlowPic/sessions_plotter.py
+++ b/_reference/FlowPic/sessions_plotter.py
--- a/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/1-Data_pre-processing_CAN.ipynb
+++ b/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/1-Data_pre-processing_CAN.ipynb
@ -0,0 +1,682 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles \n",
+    "This is the code for the paper entitled \"**A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles**\" accepted in IEEE International Conference on Communications (IEEE ICC).  \n",
+    "Authors: Li Yang (lyang339@uwo.ca) and Abdallah Shami (Abdallah.Shami@uwo.ca)  \n",
+    "Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University\n",
+    "\n",
+    "**Notebook 1: Data pre-processing**  \n",
+    "Procedures:  \n",
+    "&nbsp; 1): Read the dataset  \n",
+    "&nbsp; 2): Transform the tabular data into images  \n",
+    "&nbsp; 3): Display the transformed images  \n",
+    "&nbsp; 4): Split the training and test set  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:07.788679800Z",
+     "start_time": "2023-07-06T09:03:07.746481Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import os\n",
+    "import cv2\n",
+    "import math\n",
+    "import random\n",
+    "import matplotlib.pyplot as plt\n",
+    "import shutil\n",
+    "from sklearn.preprocessing import QuantileTransformer\n",
+    "from PIL import Image\n",
+    "import warnings\n",
+    "warnings.filterwarnings(\"ignore\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Read the Car-Hacking/CAN-Intrusion dataset\n",
+    "The complete Car-Hacking dataset is publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset  \n",
+    "In this repository, due to the file size limit of GitHub, we use the 5% subset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:08.040220300Z",
+     "start_time": "2023-07-06T09:03:07.750003500Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "#Read dataset\n",
+    "df=pd.read_csv('data/Car_Hacking_5%.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "scrolled": true,
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:08.050784400Z",
+     "start_time": "2023-07-06T09:03:08.042218700Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "        CAN ID  DATA[0]  DATA[1]  DATA[2]  DATA[3]  DATA[4]  DATA[5]  DATA[6]  \\\n0         1201       41       39       39       35        0        0        0   \n1          809       64      187      127       20       17       32        0   \n2         1349      216        0        0      136        0        0        0   \n3         1201       41       39       39       35        0        0        0   \n4            2        0        0        0        0        0        3        2   \n...        ...      ...      ...      ...      ...      ...      ...      ...   \n818435     848        5       32       52      104      117        0        0   \n818436    1088      255        0        0        0      255      134        9   \n818437     848        5       32      100      104      117        0        0   \n818438    1349      216       90        0      137        0        0        0   \n818439     790        5       33       48       10       33       30        0   \n\n        DATA[7] Label  \n0           154     R  \n1            20     R  \n2             0     R  \n3           154     R  \n4           228     R  \n...         ...   ...  \n818435       12     R  \n818436        0     R  \n818437       92     R  \n818438        0     R  \n818439      111     R  \n\n[818440 rows x 10 columns]",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>CAN ID</th>\n      <th>DATA[0]</th>\n      <th>DATA[1]</th>\n      <th>DATA[2]</th>\n      <th>DATA[3]</th>\n      <th>DATA[4]</th>\n      <th>DATA[5]</th>\n      <th>DATA[6]</th>\n      <th>DATA[7]</th>\n      <th>Label</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1201</td>\n      <td>41</td>\n      <td>39</td>\n      <td>39</td>\n      <td>35</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>154</td>\n      <td>R</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>809</td>\n      <td>64</td>\n      <td>187</td>\n      <td>127</td>\n      <td>20</td>\n      <td>17</td>\n      <td>32</td>\n      <td>0</td>\n      <td>20</td>\n      <td>R</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1349</td>\n      <td>216</td>\n      <td>0</td>\n      <td>0</td>\n      <td>136</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>R</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1201</td>\n      <td>41</td>\n      <td>39</td>\n      <td>39</td>\n      <td>35</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>154</td>\n      <td>R</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>3</td>\n      <td>2</td>\n      <td>228</td>\n      <td>R</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>818435</th>\n      <td>848</td>\n      <td>5</td>\n      <td>32</td>\n      <td>52</td>\n      <td>104</td>\n      <td>117</td>\n      <td>0</td>\n      <td>0</td>\n      <td>12</td>\n      <td>R</td>\n    </tr>\n    <tr>\n      <th>818436</th>\n      <td>1088</td>\n      <td>255</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>255</td>\n      <td>134</td>\n      <td>9</td>\n      <td>0</td>\n      <td>R</td>\n    </tr>\n    <tr>\n      <th>818437</th>\n      <td>848</td>\n      <td>5</td>\n      <td>32</td>\n      <td>100</td>\n      <td>104</td>\n      <td>117</td>\n      <td>0</td>\n      <td>0</td>\n      <td>92</td>\n      <td>R</td>\n    </tr>\n    <tr>\n      <th>818438</th>\n      <td>1349</td>\n      <td>216</td>\n      <td>90</td>\n      <td>0</td>\n      <td>137</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>R</td>\n    </tr>\n    <tr>\n      <th>818439</th>\n      <td>790</td>\n      <td>5</td>\n      <td>33</td>\n      <td>48</td>\n      <td>10</td>\n      <td>33</td>\n      <td>30</td>\n      <td>0</td>\n      <td>111</td>\n      <td>R</td>\n    </tr>\n  </tbody>\n</table>\n<p>818440 rows × 10 columns</p>\n</div>"
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:08.131587100Z",
+     "start_time": "2023-07-06T09:03:08.052784200Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "Label\nR        701832\nRPM       32539\ngear      29944\nDoS       29501\nFuzzy     24624\nName: count, dtype: int64"
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# The labels of the dataset. \"R\" indicates normal patterns, and there are four types of attack (DoS, fuzzy. gear spoofing, and RPM spoofing zttacks)\n",
+    "df.Label.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "## Data Transformation\n",
+    "Convert tabular data to images\n",
+    "Procedures:\n",
+    "1. Use quantile transform to transform the original data samples into the scale of [0,255], representing pixel values\n",
+    "2. Generate images for each category (Normal, DoS, Fuzzy, Gear, RPM), each image consists of 27 data samples with 9 features. Thus, the size of each image is 9*9*3, length 9, width 9, and 3 color channels (RGB)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:09.029917800Z",
+     "start_time": "2023-07-06T09:03:08.087993Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Transform all features into the scale of [0,1]\n",
+    "numeric_features = df.dtypes[df.dtypes != 'object'].index\n",
+    "scaler = QuantileTransformer() \n",
+    "df[numeric_features] = scaler.fit_transform(df[numeric_features])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:09.083315300Z",
+     "start_time": "2023-07-06T09:03:09.030919300Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Multiply the feature values by 255 to transform them into the scale of [0,255]\n",
+    "df[numeric_features] = df[numeric_features].apply(\n",
+    "    lambda x: (x*255))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:09.331286300Z",
+     "start_time": "2023-07-06T09:03:09.084313100Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "              CAN ID        DATA[0]        DATA[1]        DATA[2]  \\\ncount  818440.000000  818440.000000  818440.000000  818440.000000   \nmean      127.457890     113.711554     107.926505      89.813595   \nstd        73.812063      89.982269      93.314034     100.866477   \nmin         0.000000       0.000000       0.000000       0.000000   \n25%        66.621622       0.000000       0.000000       0.000000   \n50%       122.267267     126.223724     115.630631       0.000000   \n75%       190.292793     192.590090     192.972973     199.992492   \nmax       255.000000     255.000000     255.000000     255.000000   \n\n             DATA[3]        DATA[4]        DATA[5]        DATA[6]  \\\ncount  818440.000000  818440.000000  818440.000000  818440.000000   \nmean      109.978430     105.412321     112.250627      84.973873   \nstd       103.679776      95.557986      91.033532     101.390068   \nmin         0.000000       0.000000       0.000000       0.000000   \n25%         0.000000       0.000000       0.000000       0.000000   \n50%       130.690691     127.244745     129.159159       0.000000   \n75%       191.186186     192.717718     190.420420     192.207207   \nmax       255.000000     255.000000     255.000000     255.000000   \n\n             DATA[7]  \ncount  818440.000000  \nmean       93.112763  \nstd       100.247486  \nmin         0.000000  \n25%         0.000000  \n50%         0.000000  \n75%       190.675676  \nmax       255.000000  ",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>CAN ID</th>\n      <th>DATA[0]</th>\n      <th>DATA[1]</th>\n      <th>DATA[2]</th>\n      <th>DATA[3]</th>\n      <th>DATA[4]</th>\n      <th>DATA[5]</th>\n      <th>DATA[6]</th>\n      <th>DATA[7]</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>count</th>\n      <td>818440.000000</td>\n      <td>818440.000000</td>\n      <td>818440.000000</td>\n      <td>818440.000000</td>\n      <td>818440.000000</td>\n      <td>818440.000000</td>\n      <td>818440.000000</td>\n      <td>818440.000000</td>\n      <td>818440.000000</td>\n    </tr>\n    <tr>\n      <th>mean</th>\n      <td>127.457890</td>\n      <td>113.711554</td>\n      <td>107.926505</td>\n      <td>89.813595</td>\n      <td>109.978430</td>\n      <td>105.412321</td>\n      <td>112.250627</td>\n      <td>84.973873</td>\n      <td>93.112763</td>\n    </tr>\n    <tr>\n      <th>std</th>\n      <td>73.812063</td>\n      <td>89.982269</td>\n      <td>93.314034</td>\n      <td>100.866477</td>\n      <td>103.679776</td>\n      <td>95.557986</td>\n      <td>91.033532</td>\n      <td>101.390068</td>\n      <td>100.247486</td>\n    </tr>\n    <tr>\n      <th>min</th>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>25%</th>\n      <td>66.621622</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>50%</th>\n      <td>122.267267</td>\n      <td>126.223724</td>\n      <td>115.630631</td>\n      <td>0.000000</td>\n      <td>130.690691</td>\n      <td>127.244745</td>\n      <td>129.159159</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>75%</th>\n      <td>190.292793</td>\n      <td>192.590090</td>\n      <td>192.972973</td>\n      <td>199.992492</td>\n      <td>191.186186</td>\n      <td>192.717718</td>\n      <td>190.420420</td>\n      <td>192.207207</td>\n      <td>190.675676</td>\n    </tr>\n    <tr>\n      <th>max</th>\n      <td>255.000000</td>\n      <td>255.000000</td>\n      <td>255.000000</td>\n      <td>255.000000</td>\n      <td>255.000000</td>\n      <td>255.000000</td>\n      <td>255.000000</td>\n      <td>255.000000</td>\n      <td>255.000000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "All features are in the same scale of [0,255]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Generate images for each class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:09.553237400Z",
+     "start_time": "2023-07-06T09:03:09.334282600Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "df0=df[df['Label']=='R'].drop(['Label'],axis=1)\n",
+    "df1=df[df['Label']=='RPM'].drop(['Label'],axis=1)\n",
+    "df2=df[df['Label']=='gear'].drop(['Label'],axis=1)\n",
+    "df3=df[df['Label']=='DoS'].drop(['Label'],axis=1)\n",
+    "df4=df[df['Label']=='Fuzzy'].drop(['Label'],axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:09.561532500Z",
+     "start_time": "2023-07-06T09:03:09.557535700Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Generate 9*9 color images for class 0 (Normal)\n",
+    "count=0\n",
+    "ims = []\n",
+    "\n",
+    "image_path = \"train/0/\"\n",
+    "os.makedirs(image_path)\n",
+    "\n",
+    "for i in range(0, 2):\n",
+    "    count=count+1\n",
+    "    if count<=27: \n",
+    "        im=df0.iloc[i].values\n",
+    "        ims=np.append(ims,im)\n",
+    "    else:\n",
+    "        print(ims)\n",
+    "        ims=np.array(ims).reshape(9,9,3)\n",
+    "        print(ims)\n",
+    "        array = np.array(ims, dtype=np.uint8)\n",
+    "        new_image = Image.fromarray(array)\n",
+    "        new_image.save(image_path+str(i)+'.png')\n",
+    "        count=0\n",
+    "        ims = []"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:11.704231100Z",
+     "start_time": "2023-07-06T09:03:09.564039300Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Generate 9*9 color images for class 1 (RPM spoofing)\n",
+    "count=0\n",
+    "ims = []\n",
+    "\n",
+    "image_path = \"train/1/\"\n",
+    "os.makedirs(image_path)\n",
+    "\n",
+    "for i in range(0, len(df1)):  \n",
+    "    count=count+1\n",
+    "    if count<=27: \n",
+    "        im=df1.iloc[i].values\n",
+    "        ims=np.append(ims,im)\n",
+    "    else:\n",
+    "        ims=np.array(ims).reshape(9,9,3)\n",
+    "        array = np.array(ims, dtype=np.uint8)\n",
+    "        new_image = Image.fromarray(array)\n",
+    "        new_image.save(image_path+str(i)+'.png')\n",
+    "        count=0\n",
+    "        ims = []"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:13.510844700Z",
+     "start_time": "2023-07-06T09:03:11.707374200Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Generate 9*9 color images for class 2 (Gear spoofing)\n",
+    "count=0\n",
+    "ims = []\n",
+    "\n",
+    "image_path = \"train/2/\"\n",
+    "os.makedirs(image_path)\n",
+    "\n",
+    "for i in range(0, len(df2)):  \n",
+    "    count=count+1\n",
+    "    if count<=27: \n",
+    "        im=df2.iloc[i].values\n",
+    "        ims=np.append(ims,im)\n",
+    "    else:\n",
+    "        ims\n",
+    "        ims=np.array(ims).reshape(9,9,3)\n",
+    "        ims\n",
+    "        array = np.array(ims, dtype=np.uint8)\n",
+    "        new_image = Image.fromarray(array)\n",
+    "        new_image.save(image_path+str(i)+'.png')\n",
+    "        count=0\n",
+    "        ims = []"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:15.293229800Z",
+     "start_time": "2023-07-06T09:03:13.514351300Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Generate 9*9 color images for class 3 (DoS attack)\n",
+    "count=0\n",
+    "ims = []\n",
+    "\n",
+    "image_path = \"train/3/\"\n",
+    "os.makedirs(image_path)\n",
+    "\n",
+    "\n",
+    "for i in range(0, len(df3)):  \n",
+    "    count=count+1\n",
+    "    if count<=27: \n",
+    "        im=df3.iloc[i].values\n",
+    "        ims=np.append(ims,im)\n",
+    "    else:\n",
+    "        ims=np.array(ims).reshape(9,9,3)\n",
+    "        array = np.array(ims, dtype=np.uint8)\n",
+    "        new_image = Image.fromarray(array)\n",
+    "        new_image.save(image_path+str(i)+'.png')\n",
+    "        count=0\n",
+    "        ims = []"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:16.797229300Z",
+     "start_time": "2023-07-06T09:03:15.294734300Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Generate 9*9 color images for class 4 (Fuzzy attack)\n",
+    "count=0\n",
+    "ims = []\n",
+    "\n",
+    "image_path = \"train/4/\"\n",
+    "os.makedirs(image_path)\n",
+    "\n",
+    "\n",
+    "for i in range(0, len(df4)):  \n",
+    "    count=count+1\n",
+    "    if count<=27: \n",
+    "        im=df4.iloc[i].values\n",
+    "        ims=np.append(ims,im)\n",
+    "    else:\n",
+    "        ims=np.array(ims).reshape(9,9,3)\n",
+    "        array = np.array(ims, dtype=np.uint8)\n",
+    "        new_image = Image.fromarray(array)\n",
+    "        new_image.save(image_path+str(i)+'.png')\n",
+    "        count=0\n",
+    "        ims = []"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Split the training and test set "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:16.815834800Z",
+     "start_time": "2023-07-06T09:03:16.797229300Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "4163\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create folders to store images\n",
+    "Train_Dir='./train/'\n",
+    "Val_Dir='./test/'\n",
+    "allimgs=[]\n",
+    "for subdir in os.listdir(Train_Dir):\n",
+    "    for filename in os.listdir(os.path.join(Train_Dir,subdir)):\n",
+    "        filepath=os.path.join(Train_Dir,subdir,filename)\n",
+    "        allimgs.append(filepath)\n",
+    "print(len(allimgs)) # Print the total number of images"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:16.838914900Z",
+     "start_time": "2023-07-06T09:03:16.818833300Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "#split a test set from the dataset, train/test size = 80%/20%\n",
+    "Numbers=len(allimgs)//5 \t#size of test set (20%)\n",
+    "\n",
+    "def mymovefile(srcfile,dstfile):\n",
+    "    if not os.path.isfile(srcfile):\n",
+    "        print (\"%s not exist!\"%(srcfile))\n",
+    "    else:\n",
+    "        fpath,fname=os.path.split(dstfile)    \n",
+    "        if not os.path.exists(fpath):\n",
+    "            os.makedirs(fpath)               \n",
+    "        shutil.move(srcfile,dstfile)          \n",
+    "        #print (\"move %s -> %s\"%(srcfile,dstfile))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "scrolled": true,
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:16.838914900Z",
+     "start_time": "2023-07-06T09:03:16.822343500Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "832"
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# The size of test set\n",
+    "Numbers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:17.654719900Z",
+     "start_time": "2023-07-06T09:03:16.832397200Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Finish creating test set\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create the test set\n",
+    "val_imgs=random.sample(allimgs,Numbers)\n",
+    "for img in val_imgs:\n",
+    "    dest_path=img.replace(Train_Dir,Val_Dir)\n",
+    "    mymovefile(img,dest_path)\n",
+    "print('Finish creating test set')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:17.660725400Z",
+     "start_time": "2023-07-06T09:03:17.658724800Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "#resize the images 224*224 for better CNN training\n",
+    "def get_224(folder,dstdir):\n",
+    "    imgfilepaths=[]\n",
+    "    for root,dirs,imgs in os.walk(folder):\n",
+    "        for thisimg in imgs:\n",
+    "            thisimg_path=os.path.join(root,thisimg)\n",
+    "            imgfilepaths.append(thisimg_path)\n",
+    "    for thisimg_path in imgfilepaths:\n",
+    "        dir_name,filename=os.path.split(thisimg_path)\n",
+    "        dir_name=dir_name.replace(folder,dstdir)\n",
+    "        new_file_path=os.path.join(dir_name,filename)\n",
+    "        if not os.path.exists(dir_name):\n",
+    "            os.makedirs(dir_name)\n",
+    "        img=cv2.imread(thisimg_path)\n",
+    "        img=cv2.resize(img,(224,224))\n",
+    "        cv2.imwrite(new_file_path,img)\n",
+    "    print('Finish resizing'.format(folder=folder))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:22.772090900Z",
+     "start_time": "2023-07-06T09:03:17.661728600Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Finish resizing\n"
+     ]
+    }
+   ],
+   "source": [
+    "DATA_DIR_224='./train_224/'\n",
+    "get_224(folder='./train/',dstdir=DATA_DIR_224)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:24.056886300Z",
+     "start_time": "2023-07-06T09:03:22.772621Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Finish resizing\n"
+     ]
+    }
+   ],
+   "source": [
+    "DATA_DIR2_224='./test_224/'\n",
+    "get_224(folder='./test/',dstdir=DATA_DIR2_224)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Display samples for each category"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-07-06T09:03:24.562540100Z",
+     "start_time": "2023-07-06T09:03:24.056886300Z"
+    }
+   },
+   "outputs": [
+    {
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: './train_224/0/27.png'",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[1;31mFileNotFoundError\u001B[0m                         Traceback (most recent call last)",
+      "Cell \u001B[1;32mIn[34], line 2\u001B[0m\n\u001B[0;32m      1\u001B[0m \u001B[38;5;66;03m# Read the images for each category, the file name may vary (27.png, 83.png...)\u001B[39;00m\n\u001B[1;32m----> 2\u001B[0m img1 \u001B[38;5;241m=\u001B[39m \u001B[43mImage\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43m./train_224/0/27.png\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[0;32m      3\u001B[0m img2 \u001B[38;5;241m=\u001B[39m Image\u001B[38;5;241m.\u001B[39mopen(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m./train_224/1/83.png\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[0;32m      4\u001B[0m img3 \u001B[38;5;241m=\u001B[39m Image\u001B[38;5;241m.\u001B[39mopen(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m./train_224/2/27.png\u001B[39m\u001B[38;5;124m'\u001B[39m)\n",
+      "File \u001B[1;32m~\\anaconda3\\envs\\FlowPicRefresh\\lib\\site-packages\\PIL\\Image.py:3227\u001B[0m, in \u001B[0;36mopen\u001B[1;34m(fp, mode, formats)\u001B[0m\n\u001B[0;32m   3224\u001B[0m     filename \u001B[38;5;241m=\u001B[39m fp\n\u001B[0;32m   3226\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m filename:\n\u001B[1;32m-> 3227\u001B[0m     fp \u001B[38;5;241m=\u001B[39m \u001B[43mbuiltins\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[43mfilename\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mrb\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[0;32m   3228\u001B[0m     exclusive_fp \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m\n\u001B[0;32m   3230\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n",
+      "\u001B[1;31mFileNotFoundError\u001B[0m: [Errno 2] No such file or directory: './train_224/0/27.png'"
+     ]
+    }
+   ],
+   "source": [
+    "# Read the images for each category, the file name may vary (27.png, 83.png...)\n",
+    "img1 = Image.open('./train_224/0/27.png')\n",
+    "img2 = Image.open('./train_224/1/83.png')\n",
+    "img3 = Image.open('./train_224/2/27.png')\n",
+    "img4 = Image.open('./train_224/3/27.png')\n",
+    "img5 = Image.open('./train_224/4/27.png')\n",
+    "\n",
+    "plt.figure(figsize=(10, 10)) \n",
+    "plt.subplot(1,5,1)\n",
+    "plt.imshow(img1)\n",
+    "plt.title(\"Normal\")\n",
+    "plt.subplot(1,5,2)\n",
+    "plt.imshow(img2)\n",
+    "plt.title(\"RPM Spoofing\")\n",
+    "plt.subplot(1,5,3)\n",
+    "plt.imshow(img3)\n",
+    "plt.title(\"Gear Spoofing\")\n",
+    "plt.subplot(1,5,4)\n",
+    "plt.imshow(img4)\n",
+    "plt.title(\"DoS Attack\")\n",
+    "plt.subplot(1,5,5)\n",
+    "plt.imshow(img5)\n",
+    "plt.title(\"Fuzzy Attack\")\n",
+    "plt.show()  # display it"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2023-07-06T09:03:24.562540100Z"
+    }
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "anaconda-cloud": {},
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/1-Data_pre-processing_CAN.py
+++ b/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/1-Data_pre-processing_CAN.py
@ -0,0 +1,348 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# # A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles 
+# This is the code for the paper entitled "**A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles**" accepted in IEEE International Conference on Communications (IEEE ICC).  
+# Authors: Li Yang (lyang339@uwo.ca) and Abdallah Shami (Abdallah.Shami@uwo.ca)  
+# Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University
+# 
+# **Notebook 1: Data pre-processing**  
+# Procedures:  
+# &nbsp; 1): Read the dataset  
+# &nbsp; 2): Transform the tabular data into images  
+# &nbsp; 3): Display the transformed images  
+# &nbsp; 4): Split the training and test set  
+
+# ## Import libraries
+
+# In[14]:
+
+
+import numpy as np
+import pandas as pd
+import os
+import cv2
+import math
+import random
+import matplotlib.pyplot as plt
+import shutil
+from sklearn.preprocessing import QuantileTransformer
+from PIL import Image
+import warnings
+warnings.filterwarnings("ignore")
+
+
+# ## Read the Car-Hacking/CAN-Intrusion dataset
+# The complete Car-Hacking dataset is publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset  
+# In this repository, due to the file size limit of GitHub, we use the 5% subset.
+
+# In[15]:
+
+
+#Read dataset
+df=pd.read_csv('data/Car_Hacking_5%.csv')
+
+
+# In[16]:
+
+
+df
+
+
+# In[17]:
+
+
+# The labels of the dataset. "R" indicates normal patterns, and there are four types of attack (DoS, fuzzy. gear spoofing, and RPM spoofing zttacks)
+df.Label.value_counts()
+
+
+# ## Data Transformation
+# Convert tabular data to images
+# Procedures:
+# 1. Use quantile transform to transform the original data samples into the scale of [0,255], representing pixel values
+# 2. Generate images for each category (Normal, DoS, Fuzzy, Gear, RPM), each image consists of 27 data samples with 9 features. Thus, the size of each image is 9*9*3, length 9, width 9, and 3 color channels (RGB).
+
+# In[18]:
+
+
+# Transform all features into the scale of [0,1]
+numeric_features = df.dtypes[df.dtypes != 'object'].index
+scaler = QuantileTransformer() 
+df[numeric_features] = scaler.fit_transform(df[numeric_features])
+
+
+# In[19]:
+
+
+# Multiply the feature values by 255 to transform them into the scale of [0,255]
+df[numeric_features] = df[numeric_features].apply(
+    lambda x: (x*255))
+
+
+# In[20]:
+
+
+df.describe()
+
+
+# All features are in the same scale of [0,255]
+
+# ### Generate images for each class
+
+# In[21]:
+
+
+df0=df[df['Label']=='R'].drop(['Label'],axis=1)
+df1=df[df['Label']=='RPM'].drop(['Label'],axis=1)
+df2=df[df['Label']=='gear'].drop(['Label'],axis=1)
+df3=df[df['Label']=='DoS'].drop(['Label'],axis=1)
+df4=df[df['Label']=='Fuzzy'].drop(['Label'],axis=1)
+
+
+# In[22]:
+
+
+# Generate 9*9 color images for class 0 (Normal)
+count=0
+ims = []
+
+image_path = "train/0/"
+os.makedirs(image_path)
+
+for i in range(0, len(df1)):
+    count=count+1
+    if count<=27:
+        im=df1.iloc[i].values
+        ims=np.append(ims,im)
+    else:
+        ims=np.array(ims).reshape(9,9,3)
+        array = np.array(ims, dtype=np.uint8)
+        new_image = Image.fromarray(array)
+        new_image.save(image_path+str(i)+'.png')
+        count=0
+        ims = []
+
+
+# In[23]:
+
+
+# Generate 9*9 color images for class 1 (RPM spoofing)
+count=0
+ims = []
+
+image_path = "train/1/"
+os.makedirs(image_path)
+
+for i in range(0, len(df1)):  
+    count=count+1
+    if count<=27: 
+        im=df1.iloc[i].values
+        ims=np.append(ims,im)
+    else:
+        ims=np.array(ims).reshape(9,9,3)
+        array = np.array(ims, dtype=np.uint8)
+        new_image = Image.fromarray(array)
+        new_image.save(image_path+str(i)+'.png')
+        count=0
+        ims = []
+
+
+# In[24]:
+
+
+# Generate 9*9 color images for class 2 (Gear spoofing)
+count=0
+ims = []
+
+image_path = "train/2/"
+os.makedirs(image_path)
+
+for i in range(0, len(df2)):  
+    count=count+1
+    if count<=27: 
+        im=df2.iloc[i].values
+        ims=np.append(ims,im)
+    else:
+        ims
+        ims=np.array(ims).reshape(9,9,3)
+        ims
+        array = np.array(ims, dtype=np.uint8)
+        new_image = Image.fromarray(array)
+        new_image.save(image_path+str(i)+'.png')
+        count=0
+        ims = []
+
+
+# In[25]:
+
+
+# Generate 9*9 color images for class 3 (DoS attack)
+count=0
+ims = []
+
+image_path = "train/3/"
+os.makedirs(image_path)
+
+
+for i in range(0, len(df3)):  
+    count=count+1
+    if count<=27: 
+        im=df3.iloc[i].values
+        ims=np.append(ims,im)
+    else:
+        ims=np.array(ims).reshape(9,9,3)
+        array = np.array(ims, dtype=np.uint8)
+        new_image = Image.fromarray(array)
+        new_image.save(image_path+str(i)+'.png')
+        count=0
+        ims = []
+
+
+# In[26]:
+
+
+# Generate 9*9 color images for class 4 (Fuzzy attack)
+count=0
+ims = []
+
+image_path = "train/4/"
+os.makedirs(image_path)
+
+
+for i in range(0, len(df4)):  
+    count=count+1
+    if count<=27: 
+        im=df4.iloc[i].values
+        ims=np.append(ims,im)
+    else:
+        ims=np.array(ims).reshape(9,9,3)
+        array = np.array(ims, dtype=np.uint8)
+        new_image = Image.fromarray(array)
+        new_image.save(image_path+str(i)+'.png')
+        count=0
+        ims = []
+
+
+# ## Split the training and test set 
+
+# In[27]:
+
+
+# Create folders to store images
+Train_Dir='./train/'
+Val_Dir='./test/'
+allimgs=[]
+for subdir in os.listdir(Train_Dir):
+    for filename in os.listdir(os.path.join(Train_Dir,subdir)):
+        filepath=os.path.join(Train_Dir,subdir,filename)
+        allimgs.append(filepath)
+print(len(allimgs)) # Print the total number of images
+
+
+# In[28]:
+
+
+#split a test set from the dataset, train/test size = 80%/20%
+Numbers=len(allimgs)//5 	#size of test set (20%)
+
+def mymovefile(srcfile,dstfile):
+    if not os.path.isfile(srcfile):
+        print ("%s not exist!"%(srcfile))
+    else:
+        fpath,fname=os.path.split(dstfile)    
+        if not os.path.exists(fpath):
+            os.makedirs(fpath)               
+        shutil.move(srcfile,dstfile)          
+        #print ("move %s -> %s"%(srcfile,dstfile))
+
+
+# In[29]:
+
+
+# The size of test set
+Numbers
+
+
+# In[30]:
+
+
+# Create the test set
+val_imgs=random.sample(allimgs,Numbers)
+for img in val_imgs:
+    dest_path=img.replace(Train_Dir,Val_Dir)
+    mymovefile(img,dest_path)
+print('Finish creating test set')
+
+
+# In[31]:
+
+
+#resize the images 224*224 for better CNN training
+def get_224(folder,dstdir):
+    imgfilepaths=[]
+    for root,dirs,imgs in os.walk(folder):
+        for thisimg in imgs:
+            thisimg_path=os.path.join(root,thisimg)
+            imgfilepaths.append(thisimg_path)
+    for thisimg_path in imgfilepaths:
+        dir_name,filename=os.path.split(thisimg_path)
+        dir_name=dir_name.replace(folder,dstdir)
+        new_file_path=os.path.join(dir_name,filename)
+        if not os.path.exists(dir_name):
+            os.makedirs(dir_name)
+        img=cv2.imread(thisimg_path)
+        img=cv2.resize(img,(224,224))
+        cv2.imwrite(new_file_path,img)
+    print('Finish resizing'.format(folder=folder))
+
+
+# In[32]:
+
+
+DATA_DIR_224='./train_224/'
+get_224(folder='./train/',dstdir=DATA_DIR_224)
+
+
+# In[33]:
+
+
+DATA_DIR2_224='./test_224/'
+get_224(folder='./test/',dstdir=DATA_DIR2_224)
+
+
+# ### Display samples for each category
+
+# In[34]:
+
+
+# Read the images for each category, the file name may vary (27.png, 83.png...)
+img1 = Image.open('./train_224/0/27.png')
+img2 = Image.open('./train_224/1/83.png')
+img3 = Image.open('./train_224/2/27.png')
+img4 = Image.open('./train_224/3/27.png')
+img5 = Image.open('./train_224/4/27.png')
+
+plt.figure(figsize=(10, 10)) 
+plt.subplot(1,5,1)
+plt.imshow(img1)
+plt.title("Normal")
+plt.subplot(1,5,2)
+plt.imshow(img2)
+plt.title("RPM Spoofing")
+plt.subplot(1,5,3)
+plt.imshow(img3)
+plt.title("Gear Spoofing")
+plt.subplot(1,5,4)
+plt.imshow(img4)
+plt.title("DoS Attack")
+plt.subplot(1,5,5)
+plt.imshow(img5)
+plt.title("Fuzzy Attack")
+plt.show()  # display it
+
+
+# In[ ]:
+
+
+
+
--- a/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/2-CNN_Model_Development&Hyperparameter
+++ b/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/2-CNN_Model_Development&Hyperparameter
--- a/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/3-Ensemble_Models-CAN.ipynb
+++ b/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/3-Ensemble_Models-CAN.ipynb
--- a/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/CAN.png
+++ b/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/CAN.png
--- a/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/LICENSE
+++ b/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Western OC2 Lab
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/Paper_2201.11812.pdf
+++ b/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/Paper_2201.11812.pdf
--- a/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/README.md
+++ b/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/README.md
@ -0,0 +1,81 @@
+# Intrusion-Detection-System-Using-CNN-and-Transfer-Learning
+
+This is the code for the paper entitled "**[A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles](https://arxiv.org/pdf/2201.11812.pdf)**" published in **IEEE International Conference on Communications (IEEE ICC)**, doi: [10.1109/ICC45855.2022.9838780](https://ieeexplore.ieee.org/document/9838780).  
+- Authors: Li Yang and Abdallah Shami  
+- Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University
+
+This repository introduces how to use **convolutional neural networks (CNNs)** and **transfer learning** techniques to develop **intrusion detection systems**. **Ensemble learning** and **hyperparameter optimization techniques** are also used to achieve optimized model performance.
+
+- Another **intrusion detection system development code** using **decision tree-based machine learning algorithms (Decision tree, random forest, XGBoost, stacking, etc.)** can be found in: [Intrusion-Detection-System-Using-Machine-Learning](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-Machine-Learning)
+
+- A comprehensive **hyperparameter optimization** tutorial code can be found in: [Hyperparameter-Optimization-of-Machine-Learning-Algorithms](https://github.com/LiYangHart/Hyperparameter-Optimization-of-Machine-Learning-Algorithms)
+
+## Abstract of The Paper
+Modern vehicles, including autonomous vehicles and connected vehicles,  are increasingly connected to the external world, which enables various functionalities and services. However, the improving connectivity also increases the attack surfaces of the Internet of Vehicles (IoV), causing its vulnerabilities to cyber-threats. Due to the lack of authentication and encryption procedures in vehicular networks, Intrusion Detection Systems (IDSs) are essential approaches to protect modern vehicle systems from network attacks. In this paper, a transfer learning and ensemble learning-based IDS is proposed for IoV systems using convolutional neural networks (CNNs) and hyper-parameter optimization techniques. In the experiments, the proposed IDS has demonstrated over 99.25% detection rates and F1-scores on two well-known public benchmark IoV security datasets: the Car-Hacking dataset and the CICIDS2017 dataset. This shows the effectiveness of the proposed IDS for cyber-attack detection in both intra-vehicle and external vehicular networks.
+
+<p float="left">
+  <img src="https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/framework.png" width="500" />
+  <img src="https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/CAN.png" width="400" /> 
+</p>
+
+## Implementation 
+### CNN Models  
+* VGG16
+* VGG19
+* Xception
+* Inception
+* Resnet
+* InceptionResnet
+
+### Ensemble Learning Models
+* Bagging
+* Probability Averaging
+* Concatenation
+
+### Hyperparameter Optimization Methods
+* Random Search (RS)
+* Bayesian Optimization - Tree Parzen Estimator(BO-TPE)
+
+### Dataset 
+1. CAN-intrusion/Car-Hacking dataset, a benchmark network security dataset for intra-vehicle intrusion detection
+* Publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset  
+* Can be processed using the same code
+
+2. CICIDS2017 dataset, a popular network traffic dataset for intrusion detection problems
+* Publicly available at: https://www.unb.ca/cic/datasets/ids-2017.html  
+
+For the purpose of displaying the experimental results in Jupyter Notebook, the sampled subset of the CAN-intrusion dataset is used in the sample code. The subsets are in the "[data](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/tree/main/data)" folder.
+
+### Code  
+* [1-Data_pre-processing_CAN.ipynb](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/1-Data_pre-processing_CAN.ipynb): code for data pre-processing and transformation (tabular data to images).  
+* [2-CNN_Model_Development&Hyperparameter Optimization.ipynb](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/2-CNN_Model_Development%26Hyperparameter%20Optimization.ipynb): code for the development and CNN models and their hyperparameter optimization.
+* [3-Ensemble_Models-CAN.ipynb](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/3-Ensemble_Models-CAN.ipynb): code for the construction of three ensemble learning techniques.
+
+Libraries  
+* Python 3.5+
+* [Keras 2.1.0+](hhttps://keras.io/)  
+* [Tensorflow 1.10.0+](https://www.tensorflow.org/install/gpu)
+* [OpenCV-python](https://docs.opencv.org/4.x/d6/d00/tutorial_py_root.html)
+* [hyperopt](https://github.com/hyperopt/hyperopt) 
+
+## Contact-Info
+Please feel free to contact us for any questions or cooperation opportunities. We will be happy to help.
+* Email: [liyanghart@gmail.com](mailto:liyanghart@gmail.com) or [Abdallah.Shami@uwo.ca](mailto:Abdallah.Shami@uwo.ca)
+* GitHub: [LiYangHart](https://github.com/LiYangHart) and [Western OC2 Lab](https://github.com/Western-OC2-Lab/)
+* LinkedIn: [Li Yang](https://www.linkedin.com/in/li-yang-phd-65a190176/)  
+* Google Scholar: [Li Yang](https://scholar.google.com.eg/citations?user=XEfM7bIAAAAJ&hl=en) and [OC2 Lab](https://scholar.google.com.eg/citations?user=oiebNboAAAAJ&hl=en)
+
+## Citation
+If you find this repository useful in your research, please cite this article as:  
+
+L. Yang and A. Shami, "A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles," ICC 2022 - IEEE International Conference on Communications, 2022, pp. 2774-2779, doi: 10.1109/ICC45855.2022.9838780.
+
+```
+@INPROCEEDINGS{9838780,
+  author={Yang, Li and Shami, Abdallah},
+  booktitle={ICC 2022 - IEEE International Conference on Communications}, 
+  title={A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles}, 
+  year={2022},
+  pages={2774-2779},
+  doi={10.1109/ICC45855.2022.9838780}}
+```
--- a/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/data/Car_Hacking_5%.csv
+++ b/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/data/Car_Hacking_5%.csv
--- a/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/data/README.md
+++ b/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/data/README.md
@ -0,0 +1,3 @@
+# The sampled datasets used for the experiments in the sample code
+
+**Car_Hacking_5%.csv**: The 5% randomly sampled subset of the [Car Hacking dataset](https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset)  
--- a/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/framework.png
+++ b/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/framework.png
--- a/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/supplementary_code/CAR_IDS_SVC.ipynb
+++ b/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/supplementary_code/CAR_IDS_SVC.ipynb
--- a/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/supplementary_code/README.md
+++ b/_reference/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/supplementary_code/README.md
@ -0,0 +1 @@
+# The code in this folder shows an example of the pre-processing of the Car-Hacking dataset.
--- a/_reference/cicids2017-ml/1.1
+++ b/_reference/cicids2017-ml/1.1
--- a/_reference/cicids2017-ml/1.2
+++ b/_reference/cicids2017-ml/1.2
--- a/_reference/cicids2017-ml/2.1
+++ b/_reference/cicids2017-ml/2.1
--- a/_reference/cicids2017-ml/2.2
+++ b/_reference/cicids2017-ml/2.2
--- a/_reference/cicids2017-ml/3
+++ b/_reference/cicids2017-ml/3
--- a/_reference/cicids2017-ml/4.2
+++ b/_reference/cicids2017-ml/4.2
--- a/_reference/cicids2017-ml/5.2
+++ b/_reference/cicids2017-ml/5.2
--- a/_reference/cicids2017-ml/6.2
+++ b/_reference/cicids2017-ml/6.2
--- a/_reference/cicids2017-ml/LICENSE
+++ b/_reference/cicids2017-ml/LICENSE
@ -0,0 +1,29 @@
+BSD 3-Clause License
+
+Copyright (c) 2020, Mahendra Data
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/_reference/cicids2017-ml/README.md
+++ b/_reference/cicids2017-ml/README.md
@ -0,0 +1,2 @@
+# CICIDS2017-ML
+The purpose of this repository is to demonstrate the steps of processing CICIDS2017 dataset using machine learning algorithms.
--- a/_tools/拼接数据集.py
+++ b/_tools/拼接数据集.py
@ -0,0 +1,30 @@
+import pandas as pd
+
+
+def merge():
+    # 读取三份csv文件
+    df1 = pd.read_csv("../_dataset/TrafficLabelling_/Friday-WorkingHours-Morning.pcap_ISCX.csv")
+    df2 = pd.read_csv("../_dataset/TrafficLabelling_/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv")
+    df3 = pd.read_csv("../_dataset/TrafficLabelling_/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv")
+    # 将它们拼合成一个DataFrame
+    df = pd.concat([df1, df2, df3])
+    # 保存为新的csv文件
+    df.to_csv("../_dataset/TrafficLabelling_/Friday-WorkingHours-merged.csv", index=False)
+
+
+def select():
+    df = pd.read_csv('../_dataset/TrafficLabelling_/Friday-WorkingHours-merged.csv')
+    df_ddos = df[df.iloc[:, -1] == 'DDoS']
+    df_ddos.to_csv('../_dataset/TrafficLabelling_/Friday-WorkingHours-DDoS.csv', index=False)
+
+
+def search(query: str, row_name: str):
+    df = pd.read_csv('../_dataset/TrafficLabelling_/Friday-WorkingHours-merged.csv')
+    result = df[df[row_name].str.contains(query)]
+    print(result.head())
+
+
+if __name__ == "__main__":
+    # merge()
+    # select()
+    search("172.16.0.1-192.168.10.50-49533-80-6", "Flow ID")
--- a/config.py
+++ b/config.py
@ -1,2 +1,39 @@
-CSV_PATH = './_dataset/DDos3.csv'
-BYPASS_COLUMNS= ('Destination Port', 'Label')
+import datetime
+
+CSV_PATH = './_dataset/TrafficLabelling_/Friday-WorkingHours-DDoS.csv'
+BYPASS_COLUMNS= ('Destination Port', 'Label')
+UNIQUE_COLUMNS = [' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags', ' CWE Flag Count',
+       'Fwd Avg Bytes/Bulk', ' Fwd Avg Packets/Bulk', ' Fwd Avg Bulk Rate',
+       ' Bwd Avg Bytes/Bulk', ' Bwd Avg Packets/Bulk', 'Bwd Avg Bulk Rate']
+IMG_SAVE_PATH = f'./saves/{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}'
+
+
+
+(['Flow ID', ' Source IP', ' Source Port', ' Destination IP',
+       ' Destination Port', ' Protocol', ' Timestamp', ' Flow Duration',
+       ' Total Fwd Packets', ' Total Backward Packets',
+       'Total Length of Fwd Packets', ' Total Length of Bwd Packets',
+       ' Fwd Packet Length Max', ' Fwd Packet Length Min',
+       ' Fwd Packet Length Mean', ' Fwd Packet Length Std',
+       'Bwd Packet Length Max', ' Bwd Packet Length Min',
+       ' Bwd Packet Length Mean', ' Bwd Packet Length Std', 'Flow Bytes/s',
+       ' Flow Packets/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max',
+       ' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std',
+       ' Fwd IAT Max', ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean',
+       ' Bwd IAT Std', ' Bwd IAT Max', ' Bwd IAT Min', 'Fwd PSH Flags',
+       ' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags',
+       ' Fwd Header Length', ' Bwd Header Length', 'Fwd Packets/s',
+       ' Bwd Packets/s', ' Min Packet Length', ' Max Packet Length',
+       ' Packet Length Mean', ' Packet Length Std', ' Packet Length Variance',
+       'FIN Flag Count', ' SYN Flag Count', ' RST Flag Count',
+       ' PSH Flag Count', ' ACK Flag Count', ' URG Flag Count',
+       ' CWE Flag Count', ' ECE Flag Count', ' Down/Up Ratio',
+       ' Average Packet Size', ' Avg Fwd Segment Size',
+       ' Avg Bwd Segment Size', ' Fwd Header Length.1', 'Fwd Avg Bytes/Bulk',
+       ' Fwd Avg Packets/Bulk', ' Fwd Avg Bulk Rate', ' Bwd Avg Bytes/Bulk',
+       ' Bwd Avg Packets/Bulk', 'Bwd Avg Bulk Rate', 'Subflow Fwd Packets',
+       ' Subflow Fwd Bytes', ' Subflow Bwd Packets', ' Subflow Bwd Bytes',
+       'Init_Win_bytes_forward', ' Init_Win_bytes_backward',
+       ' act_data_pkt_fwd', ' min_seg_size_forward', 'Active Mean',
+       ' Active Std', ' Active Max', ' Active Min', 'Idle Mean', ' Idle Std',
+       ' Idle Max', ' Idle Min', ' Label'])
--- a/flows.csv
+++ b/flows.csv
--- a/main.py
+++ b/main.py
@ -4,7 +4,9 @@ import pandas as pd
 import numpy as np
 from config import *
 import matplotlib.pyplot as plt
-
+from utils.dataframe import *
+from sklearn.preprocessing import QuantileTransformer
+from PIL import Image

 def is_in_bypass_list(column_name: str, bypass_list: tuple) -> bool:
    for bypass in bypass_list:
@ -37,6 +39,9 @@ def averaging_df(df: pd.DataFrame, column_num: int = None):
    return df, column_num


+
+
+
 def iter_df_to_point(df: pd.DataFrame, column_num: int = None):
    size = 0
    points = []
@ -100,7 +105,7 @@ def generate_and_save(base_path: str, point: tuple, size: int, calculate):
 def process_single_threaded(df: pd.DataFrame):
    df, size = averaging_df(df)
    points = iter_df_to_point(df, size)
-    base_path = f'./saves/{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}'
+    base_path = IMG_SAVE_PATH
    create_dir(base_path)
    for point_dict in points:
        num = list(point_dict.keys())[0]
@ -113,4 +118,41 @@ def process_single_threaded(df: pd.DataFrame):

 if __name__ == '__main__':
    df = input_csv_to_df(CSV_PATH)
-    process(df)
+    # process(df)
+    # process_single_threaded(df)
+    df = df.replace([np.inf, -np.inf], np.nan)
+    df = df.dropna(axis=0)  # 删除具有NaN值的行
+
+    df = get_ddos_df(df)
+    df = drop_columns(df, UNIQUE_COLUMNS)
+    # df = drop_unique_columns(df)
+    df = df.iloc[:, 7:]
+    numeric_features = df.dtypes[df.dtypes != 'object'].index
+    scaler = QuantileTransformer()
+    df[numeric_features] = scaler.fit_transform(df[numeric_features])
+
+    # In[19]:
+
+    # Multiply the feature values by 255 to transform them into the scale of [0,255]
+    df[numeric_features] = df[numeric_features].apply(
+        lambda x: (x * 255))
+    df_clean_data = df
+    row_length = len(df_clean_data.columns)
+    col_length = len(df_clean_data)
+    # Transform all features into the scale of [0,1]
+
+    count = 0
+    ims = []
+    for i in range(0, col_length):
+        count = count + 1
+        if count <= (row_length*3):
+            im = df_clean_data.iloc[i].values
+            ims = np.append(ims, im)
+        else:
+            ims = np.array(ims).reshape(row_length, row_length, 3)
+            array = np.array(ims, dtype=np.uint8)
+            new_image = Image.fromarray(array)
+            new_image.save(IMG_SAVE_PATH + str(i) + '.png')
+            count = 0
+            ims = []
+    print(df)
--- a/test.py
+++ b/test.py
--- a/utils/dataframe.py
+++ b/utils/dataframe.py
@ -0,0 +1,27 @@
+import pandas as pd
+
+
+def drop_unique_columns(df: pd.DataFrame):
+    nunique = df.nunique()  # 计算每一列的唯一值的数量
+    cols_to_drop = nunique[nunique == 1].index  # 找到只有一个唯一值的列的索引
+    df.drop(cols_to_drop, axis=1, inplace=True)  # 删除这些列
+    print(cols_to_drop)  # 输出删除的列的列名
+    return df
+
+
+# def drop_columns_with_fix_up(df: pd.DataFrame, columns: list):
+#     columns = [w.lstrip() for w in columns]
+#     df = drop_columns(df, columns)
+#     columns = [" " + w for w in columns]
+#     df = drop_columns(df, columns)
+#     return df
+def drop_columns(df: pd.DataFrame, columns: list):
+    columns = [w.lstrip() for w in columns]
+    for column_name in columns:
+        cols_to_drop = df.filter(regex=column_name).columns
+        df.drop(cols_to_drop, axis=1, inplace=True)
+    return df
+
+
+def get_ddos_df(df: pd.DataFrame):
+    return df[df.iloc[:, -1] == 'DDoS']
--- a/utils/pcap.py
+++ b/utils/pcap.py
@ -0,0 +1,29 @@
+from scapy.all import *
+from loguru import logger
+
+
+def split_pcap(file_path: str, chunk_size: int, save_base_path: str = None):
+    packets = PcapReader(file_path)
+    chunk = []
+    counter = 1
+    for packet in packets:
+        # logger.info(packet.time)
+        chunk.append(packet)
+        if len(chunk) == chunk_size:
+            wrpcap(f'{save_base_path}/chunk_{counter}.pcap', chunk)
+            chunk = []
+            logger.info(f'chunk_{counter}.pcap saved')
+            counter += 1
+
+    if chunk:
+        wrpcap(f'{save_base_path}/chunk_{counter}.pcap', chunk)
+
+
+def get_packet_time(pkt: Packet):
+    return pkt.time
+
+
+if __name__ == '__main__':
+    from utils.files import create_dir
+    create_dir('../_dataset/pcap/Friday-WorkingHours')
+    split_pcap('../_dataset/pcap/Friday-WorkingHours.pcap', 10000, '../_dataset/pcap/Friday-WorkingHours')
				`@ -0,0 +1 @@`
				`# The code in this folder shows an example of the pre-processing of the Car-Hacking dataset.`