Initial stage 2: color pic

main
yulonger's Desktop 2 years ago
parent 48fec95dac
commit 847780028c

1
.gitignore vendored

@ -9,6 +9,7 @@ saves/
*.so
_dataset/MachineLearningCVE/
_dataset/TrafficLabelling_/
_dataset/pcap/
# Distribution / packaging
.Python
build/

@ -0,0 +1,350 @@
#!/usr/bin/env python
# coding: utf-8
# # A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles
# This is the code for the paper entitled "**A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles**" accepted in IEEE International Conference on Communications (IEEE ICC).
# Authors: Li Yang (lyang339@uwo.ca) and Abdallah Shami (Abdallah.Shami@uwo.ca)
# Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University
#
# **Notebook 1: Data pre-processing**
# Procedures:
#   1): Read the dataset
#   2): Transform the tabular data into images
#   3): Display the transformed images
#   4): Split the training and test set
# ## Import libraries
# In[14]:
import numpy as np
import pandas as pd
import os
import cv2
import math
import random
import matplotlib.pyplot as plt
import shutil
from sklearn.preprocessing import QuantileTransformer
from PIL import Image
import warnings
warnings.filterwarnings("ignore")
# ## Read the Car-Hacking/CAN-Intrusion dataset
# The complete Car-Hacking dataset is publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset
# In this repository, due to the file size limit of GitHub, we use the 5% subset.
# In[15]:
#Read dataset
df=pd.read_csv('data/Car_Hacking_5%.csv')
# In[16]:
df
# In[17]:
# The labels of the dataset. "R" indicates normal patterns, and there are four types of attack (DoS, fuzzy. gear spoofing, and RPM spoofing zttacks)
df.Label.value_counts()
# ## Data Transformation
# Convert tabular data to images
# Procedures:
# 1. Use quantile transform to transform the original data samples into the scale of [0,255], representing pixel values
# 2. Generate images for each category (Normal, DoS, Fuzzy, Gear, RPM), each image consists of 27 data samples with 9 features. Thus, the size of each image is 9*9*3, length 9, width 9, and 3 color channels (RGB).
# In[18]:
# Transform all features into the scale of [0,1]
numeric_features = df.dtypes[df.dtypes != 'object'].index
scaler = QuantileTransformer()
df[numeric_features] = scaler.fit_transform(df[numeric_features])
# In[19]:
# Multiply the feature values by 255 to transform them into the scale of [0,255]
df[numeric_features] = df[numeric_features].apply(
lambda x: (x*255))
# In[20]:
df.describe()
# All features are in the same scale of [0,255]
# ### Generate images for each class
# In[21]:
df0=df[df['Label']=='R'].drop(['Label'],axis=1)
df1=df[df['Label']=='RPM'].drop(['Label'],axis=1)
df2=df[df['Label']=='gear'].drop(['Label'],axis=1)
df3=df[df['Label']=='DoS'].drop(['Label'],axis=1)
df4=df[df['Label']=='Fuzzy'].drop(['Label'],axis=1)
# In[22]:
# Generate 9*9 color images for class 0 (Normal)
count=0
ims = []
image_path = "train/0/"
os.makedirs(image_path)
for i in range(0, 2):
count=count+1
if count<=27:
im=df0.iloc[i].values
ims=np.append(ims,im)
else:
print(ims)
ims=np.array(ims).reshape(9,9,3)
print(ims)
array = np.array(ims, dtype=np.uint8)
new_image = Image.fromarray(array)
new_image.save(image_path+str(i)+'.png')
count=0
ims = []
# In[23]:
# Generate 9*9 color images for class 1 (RPM spoofing)
count=0
ims = []
image_path = "train/1/"
os.makedirs(image_path)
for i in range(0, len(df1)):
count=count+1
if count<=27:
im=df1.iloc[i].values
ims=np.append(ims,im)
else:
ims=np.array(ims).reshape(9,9,3)
array = np.array(ims, dtype=np.uint8)
new_image = Image.fromarray(array)
new_image.save(image_path+str(i)+'.png')
count=0
ims = []
# In[24]:
# Generate 9*9 color images for class 2 (Gear spoofing)
count=0
ims = []
image_path = "train/2/"
os.makedirs(image_path)
for i in range(0, len(df2)):
count=count+1
if count<=27:
im=df2.iloc[i].values
ims=np.append(ims,im)
else:
ims
ims=np.array(ims).reshape(9,9,3)
ims
array = np.array(ims, dtype=np.uint8)
new_image = Image.fromarray(array)
new_image.save(image_path+str(i)+'.png')
count=0
ims = []
# In[25]:
# Generate 9*9 color images for class 3 (DoS attack)
count=0
ims = []
image_path = "train/3/"
os.makedirs(image_path)
for i in range(0, len(df3)):
count=count+1
if count<=27:
im=df3.iloc[i].values
ims=np.append(ims,im)
else:
ims=np.array(ims).reshape(9,9,3)
array = np.array(ims, dtype=np.uint8)
new_image = Image.fromarray(array)
new_image.save(image_path+str(i)+'.png')
count=0
ims = []
# In[26]:
# Generate 9*9 color images for class 4 (Fuzzy attack)
count=0
ims = []
image_path = "train/4/"
os.makedirs(image_path)
for i in range(0, len(df4)):
count=count+1
if count<=27:
im=df4.iloc[i].values
ims=np.append(ims,im)
else:
ims=np.array(ims).reshape(9,9,3)
array = np.array(ims, dtype=np.uint8)
new_image = Image.fromarray(array)
new_image.save(image_path+str(i)+'.png')
count=0
ims = []
# ## Split the training and test set
# In[27]:
# Create folders to store images
Train_Dir='./train/'
Val_Dir='./test/'
allimgs=[]
for subdir in os.listdir(Train_Dir):
for filename in os.listdir(os.path.join(Train_Dir,subdir)):
filepath=os.path.join(Train_Dir,subdir,filename)
allimgs.append(filepath)
print(len(allimgs)) # Print the total number of images
# In[28]:
#split a test set from the dataset, train/test size = 80%/20%
Numbers=len(allimgs)//5 #size of test set (20%)
def mymovefile(srcfile,dstfile):
if not os.path.isfile(srcfile):
print ("%s not exist!"%(srcfile))
else:
fpath,fname=os.path.split(dstfile)
if not os.path.exists(fpath):
os.makedirs(fpath)
shutil.move(srcfile,dstfile)
#print ("move %s -> %s"%(srcfile,dstfile))
# In[29]:
# The size of test set
Numbers
# In[30]:
# Create the test set
val_imgs=random.sample(allimgs,Numbers)
for img in val_imgs:
dest_path=img.replace(Train_Dir,Val_Dir)
mymovefile(img,dest_path)
print('Finish creating test set')
# In[31]:
#resize the images 224*224 for better CNN training
def get_224(folder,dstdir):
imgfilepaths=[]
for root,dirs,imgs in os.walk(folder):
for thisimg in imgs:
thisimg_path=os.path.join(root,thisimg)
imgfilepaths.append(thisimg_path)
for thisimg_path in imgfilepaths:
dir_name,filename=os.path.split(thisimg_path)
dir_name=dir_name.replace(folder,dstdir)
new_file_path=os.path.join(dir_name,filename)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
img=cv2.imread(thisimg_path)
img=cv2.resize(img,(224,224))
cv2.imwrite(new_file_path,img)
print('Finish resizing'.format(folder=folder))
# In[32]:
DATA_DIR_224='./train_224/'
get_224(folder='./train/',dstdir=DATA_DIR_224)
# In[33]:
DATA_DIR2_224='./test_224/'
get_224(folder='./test/',dstdir=DATA_DIR2_224)
# ### Display samples for each category
# In[34]:
# Read the images for each category, the file name may vary (27.png, 83.png...)
img1 = Image.open('./train_224/0/27.png')
img2 = Image.open('./train_224/1/83.png')
img3 = Image.open('./train_224/2/27.png')
img4 = Image.open('./train_224/3/27.png')
img5 = Image.open('./train_224/4/27.png')
plt.figure(figsize=(10, 10))
plt.subplot(1,5,1)
plt.imshow(img1)
plt.title("Normal")
plt.subplot(1,5,2)
plt.imshow(img2)
plt.title("RPM Spoofing")
plt.subplot(1,5,3)
plt.imshow(img3)
plt.title("Gear Spoofing")
plt.subplot(1,5,4)
plt.imshow(img4)
plt.title("DoS Attack")
plt.subplot(1,5,5)
plt.imshow(img5)
plt.title("Fuzzy Attack")
plt.show() # display it
# In[ ]:

@ -0,0 +1,682 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles \n",
"This is the code for the paper entitled \"**A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles**\" accepted in IEEE International Conference on Communications (IEEE ICC). \n",
"Authors: Li Yang (lyang339@uwo.ca) and Abdallah Shami (Abdallah.Shami@uwo.ca) \n",
"Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University\n",
"\n",
"**Notebook 1: Data pre-processing** \n",
"Procedures: \n",
"&nbsp; 1): Read the dataset \n",
"&nbsp; 2): Transform the tabular data into images \n",
"&nbsp; 3): Display the transformed images \n",
"&nbsp; 4): Split the training and test set "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Import libraries"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2023-07-06T09:03:07.788679800Z",
"start_time": "2023-07-06T09:03:07.746481Z"
}
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import os\n",
"import cv2\n",
"import math\n",
"import random\n",
"import matplotlib.pyplot as plt\n",
"import shutil\n",
"from sklearn.preprocessing import QuantileTransformer\n",
"from PIL import Image\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Read the Car-Hacking/CAN-Intrusion dataset\n",
"The complete Car-Hacking dataset is publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset \n",
"In this repository, due to the file size limit of GitHub, we use the 5% subset."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2023-07-06T09:03:08.040220300Z",
"start_time": "2023-07-06T09:03:07.750003500Z"
}
},
"outputs": [],
"source": [
"#Read dataset\n",
"df=pd.read_csv('data/Car_Hacking_5%.csv')"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2023-07-06T09:03:08.050784400Z",
"start_time": "2023-07-06T09:03:08.042218700Z"
}
},
"outputs": [
{
"data": {
"text/plain": " CAN ID DATA[0] DATA[1] DATA[2] DATA[3] DATA[4] DATA[5] DATA[6] \\\n0 1201 41 39 39 35 0 0 0 \n1 809 64 187 127 20 17 32 0 \n2 1349 216 0 0 136 0 0 0 \n3 1201 41 39 39 35 0 0 0 \n4 2 0 0 0 0 0 3 2 \n... ... ... ... ... ... ... ... ... \n818435 848 5 32 52 104 117 0 0 \n818436 1088 255 0 0 0 255 134 9 \n818437 848 5 32 100 104 117 0 0 \n818438 1349 216 90 0 137 0 0 0 \n818439 790 5 33 48 10 33 30 0 \n\n DATA[7] Label \n0 154 R \n1 20 R \n2 0 R \n3 154 R \n4 228 R \n... ... ... \n818435 12 R \n818436 0 R \n818437 92 R \n818438 0 R \n818439 111 R \n\n[818440 rows x 10 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>CAN ID</th>\n <th>DATA[0]</th>\n <th>DATA[1]</th>\n <th>DATA[2]</th>\n <th>DATA[3]</th>\n <th>DATA[4]</th>\n <th>DATA[5]</th>\n <th>DATA[6]</th>\n <th>DATA[7]</th>\n <th>Label</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1201</td>\n <td>41</td>\n <td>39</td>\n <td>39</td>\n <td>35</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>154</td>\n <td>R</td>\n </tr>\n <tr>\n <th>1</th>\n <td>809</td>\n <td>64</td>\n <td>187</td>\n <td>127</td>\n <td>20</td>\n <td>17</td>\n <td>32</td>\n <td>0</td>\n <td>20</td>\n <td>R</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1349</td>\n <td>216</td>\n <td>0</td>\n <td>0</td>\n <td>136</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>R</td>\n </tr>\n <tr>\n <th>3</th>\n <td>1201</td>\n <td>41</td>\n <td>39</td>\n <td>39</td>\n <td>35</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>154</td>\n <td>R</td>\n </tr>\n <tr>\n <th>4</th>\n <td>2</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>3</td>\n <td>2</td>\n <td>228</td>\n <td>R</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>818435</th>\n <td>848</td>\n <td>5</td>\n <td>32</td>\n <td>52</td>\n <td>104</td>\n <td>117</td>\n <td>0</td>\n <td>0</td>\n <td>12</td>\n <td>R</td>\n </tr>\n <tr>\n <th>818436</th>\n <td>1088</td>\n <td>255</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>255</td>\n <td>134</td>\n <td>9</td>\n <td>0</td>\n <td>R</td>\n </tr>\n <tr>\n <th>818437</th>\n <td>848</td>\n <td>5</td>\n <td>32</td>\n <td>100</td>\n <td>104</td>\n <td>117</td>\n <td>0</td>\n <td>0</td>\n <td>92</td>\n <td>R</td>\n </tr>\n <tr>\n <th>818438</th>\n <td>1349</td>\n <td>216</td>\n <td>90</td>\n <td>0</td>\n <td>137</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>R</td>\n </tr>\n <tr>\n <th>818439</th>\n <td>790</td>\n <td>5</td>\n <td>33</td>\n <td>48</td>\n <td>10</td>\n <td>33</td>\n <td>30</td>\n <td>0</td>\n <td>111</td>\n <td>R</td>\n </tr>\n </tbody>\n</table>\n<p>818440 rows × 10 columns</p>\n</div>"
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"ExecuteTime": {
"end_time": "2023-07-06T09:03:08.131587100Z",
"start_time": "2023-07-06T09:03:08.052784200Z"
}
},
"outputs": [
{
"data": {
"text/plain": "Label\nR 701832\nRPM 32539\ngear 29944\nDoS 29501\nFuzzy 24624\nName: count, dtype: int64"
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The labels of the dataset. \"R\" indicates normal patterns, and there are four types of attack (DoS, fuzzy. gear spoofing, and RPM spoofing zttacks)\n",
"df.Label.value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"## Data Transformation\n",
"Convert tabular data to images\n",
"Procedures:\n",
"1. Use quantile transform to transform the original data samples into the scale of [0,255], representing pixel values\n",
"2. Generate images for each category (Normal, DoS, Fuzzy, Gear, RPM), each image consists of 27 data samples with 9 features. Thus, the size of each image is 9*9*3, length 9, width 9, and 3 color channels (RGB)."
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2023-07-06T09:03:09.029917800Z",
"start_time": "2023-07-06T09:03:08.087993Z"
}
},
"outputs": [],
"source": [
"# Transform all features into the scale of [0,1]\n",
"numeric_features = df.dtypes[df.dtypes != 'object'].index\n",
"scaler = QuantileTransformer() \n",
"df[numeric_features] = scaler.fit_transform(df[numeric_features])"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"ExecuteTime": {
"end_time": "2023-07-06T09:03:09.083315300Z",
"start_time": "2023-07-06T09:03:09.030919300Z"
}
},
"outputs": [],
"source": [
"# Multiply the feature values by 255 to transform them into the scale of [0,255]\n",
"df[numeric_features] = df[numeric_features].apply(\n",
" lambda x: (x*255))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"ExecuteTime": {
"end_time": "2023-07-06T09:03:09.331286300Z",
"start_time": "2023-07-06T09:03:09.084313100Z"
}
},
"outputs": [
{
"data": {
"text/plain": " CAN ID DATA[0] DATA[1] DATA[2] \\\ncount 818440.000000 818440.000000 818440.000000 818440.000000 \nmean 127.457890 113.711554 107.926505 89.813595 \nstd 73.812063 89.982269 93.314034 100.866477 \nmin 0.000000 0.000000 0.000000 0.000000 \n25% 66.621622 0.000000 0.000000 0.000000 \n50% 122.267267 126.223724 115.630631 0.000000 \n75% 190.292793 192.590090 192.972973 199.992492 \nmax 255.000000 255.000000 255.000000 255.000000 \n\n DATA[3] DATA[4] DATA[5] DATA[6] \\\ncount 818440.000000 818440.000000 818440.000000 818440.000000 \nmean 109.978430 105.412321 112.250627 84.973873 \nstd 103.679776 95.557986 91.033532 101.390068 \nmin 0.000000 0.000000 0.000000 0.000000 \n25% 0.000000 0.000000 0.000000 0.000000 \n50% 130.690691 127.244745 129.159159 0.000000 \n75% 191.186186 192.717718 190.420420 192.207207 \nmax 255.000000 255.000000 255.000000 255.000000 \n\n DATA[7] \ncount 818440.000000 \nmean 93.112763 \nstd 100.247486 \nmin 0.000000 \n25% 0.000000 \n50% 0.000000 \n75% 190.675676 \nmax 255.000000 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>CAN ID</th>\n <th>DATA[0]</th>\n <th>DATA[1]</th>\n <th>DATA[2]</th>\n <th>DATA[3]</th>\n <th>DATA[4]</th>\n <th>DATA[5]</th>\n <th>DATA[6]</th>\n <th>DATA[7]</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>count</th>\n <td>818440.000000</td>\n <td>818440.000000</td>\n <td>818440.000000</td>\n <td>818440.000000</td>\n <td>818440.000000</td>\n <td>818440.000000</td>\n <td>818440.000000</td>\n <td>818440.000000</td>\n <td>818440.000000</td>\n </tr>\n <tr>\n <th>mean</th>\n <td>127.457890</td>\n <td>113.711554</td>\n <td>107.926505</td>\n <td>89.813595</td>\n <td>109.978430</td>\n <td>105.412321</td>\n <td>112.250627</td>\n <td>84.973873</td>\n <td>93.112763</td>\n </tr>\n <tr>\n <th>std</th>\n <td>73.812063</td>\n <td>89.982269</td>\n <td>93.314034</td>\n <td>100.866477</td>\n <td>103.679776</td>\n <td>95.557986</td>\n <td>91.033532</td>\n <td>101.390068</td>\n <td>100.247486</td>\n </tr>\n <tr>\n <th>min</th>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n </tr>\n <tr>\n <th>25%</th>\n <td>66.621622</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n </tr>\n <tr>\n <th>50%</th>\n <td>122.267267</td>\n <td>126.223724</td>\n <td>115.630631</td>\n <td>0.000000</td>\n <td>130.690691</td>\n <td>127.244745</td>\n <td>129.159159</td>\n <td>0.000000</td>\n <td>0.000000</td>\n </tr>\n <tr>\n <th>75%</th>\n <td>190.292793</td>\n <td>192.590090</td>\n <td>192.972973</td>\n <td>199.992492</td>\n <td>191.186186</td>\n <td>192.717718</td>\n <td>190.420420</td>\n <td>192.207207</td>\n <td>190.675676</td>\n </tr>\n <tr>\n <th>max</th>\n <td>255.000000</td>\n <td>255.000000</td>\n <td>255.000000</td>\n <td>255.000000</td>\n <td>255.000000</td>\n <td>255.000000</td>\n <td>255.000000</td>\n <td>255.000000</td>\n <td>255.000000</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"All features are in the same scale of [0,255]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Generate images for each class"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2023-07-06T09:03:09.553237400Z",
"start_time": "2023-07-06T09:03:09.334282600Z"
}
},
"outputs": [],
"source": [
"df0=df[df['Label']=='R'].drop(['Label'],axis=1)\n",
"df1=df[df['Label']=='RPM'].drop(['Label'],axis=1)\n",
"df2=df[df['Label']=='gear'].drop(['Label'],axis=1)\n",
"df3=df[df['Label']=='DoS'].drop(['Label'],axis=1)\n",
"df4=df[df['Label']=='Fuzzy'].drop(['Label'],axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"ExecuteTime": {
"end_time": "2023-07-06T09:03:09.561532500Z",
"start_time": "2023-07-06T09:03:09.557535700Z"
}
},
"outputs": [],
"source": [
"# Generate 9*9 color images for class 0 (Normal)\n",
"count=0\n",
"ims = []\n",
"\n",
"image_path = \"train/0/\"\n",
"os.makedirs(image_path)\n",
"\n",
"for i in range(0, 2):\n",
" count=count+1\n",
" if count<=27: \n",
" im=df0.iloc[i].values\n",
" ims=np.append(ims,im)\n",
" else:\n",
" print(ims)\n",
" ims=np.array(ims).reshape(9,9,3)\n",
" print(ims)\n",
" array = np.array(ims, dtype=np.uint8)\n",
" new_image = Image.fromarray(array)\n",
" new_image.save(image_path+str(i)+'.png')\n",
" count=0\n",
" ims = []"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"ExecuteTime": {
"end_time": "2023-07-06T09:03:11.704231100Z",
"start_time": "2023-07-06T09:03:09.564039300Z"
}
},
"outputs": [],
"source": [
"# Generate 9*9 color images for class 1 (RPM spoofing)\n",
"count=0\n",
"ims = []\n",
"\n",
"image_path = \"train/1/\"\n",
"os.makedirs(image_path)\n",
"\n",
"for i in range(0, len(df1)): \n",
" count=count+1\n",
" if count<=27: \n",
" im=df1.iloc[i].values\n",
" ims=np.append(ims,im)\n",
" else:\n",
" ims=np.array(ims).reshape(9,9,3)\n",
" array = np.array(ims, dtype=np.uint8)\n",
" new_image = Image.fromarray(array)\n",
" new_image.save(image_path+str(i)+'.png')\n",
" count=0\n",
" ims = []"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"ExecuteTime": {
"end_time": "2023-07-06T09:03:13.510844700Z",
"start_time": "2023-07-06T09:03:11.707374200Z"
}
},
"outputs": [],
"source": [
"# Generate 9*9 color images for class 2 (Gear spoofing)\n",
"count=0\n",
"ims = []\n",
"\n",
"image_path = \"train/2/\"\n",
"os.makedirs(image_path)\n",
"\n",
"for i in range(0, len(df2)): \n",
" count=count+1\n",
" if count<=27: \n",
" im=df2.iloc[i].values\n",
" ims=np.append(ims,im)\n",
" else:\n",
" ims\n",
" ims=np.array(ims).reshape(9,9,3)\n",
" ims\n",
" array = np.array(ims, dtype=np.uint8)\n",
" new_image = Image.fromarray(array)\n",
" new_image.save(image_path+str(i)+'.png')\n",
" count=0\n",
" ims = []"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2023-07-06T09:03:15.293229800Z",
"start_time": "2023-07-06T09:03:13.514351300Z"
}
},
"outputs": [],
"source": [
"# Generate 9*9 color images for class 3 (DoS attack)\n",
"count=0\n",
"ims = []\n",
"\n",
"image_path = \"train/3/\"\n",
"os.makedirs(image_path)\n",
"\n",
"\n",
"for i in range(0, len(df3)): \n",
" count=count+1\n",
" if count<=27: \n",
" im=df3.iloc[i].values\n",
" ims=np.append(ims,im)\n",
" else:\n",
" ims=np.array(ims).reshape(9,9,3)\n",
" array = np.array(ims, dtype=np.uint8)\n",
" new_image = Image.fromarray(array)\n",
" new_image.save(image_path+str(i)+'.png')\n",
" count=0\n",
" ims = []"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2023-07-06T09:03:16.797229300Z",
"start_time": "2023-07-06T09:03:15.294734300Z"
}
},
"outputs": [],
"source": [
"# Generate 9*9 color images for class 4 (Fuzzy attack)\n",
"count=0\n",
"ims = []\n",
"\n",
"image_path = \"train/4/\"\n",
"os.makedirs(image_path)\n",
"\n",
"\n",
"for i in range(0, len(df4)): \n",
" count=count+1\n",
" if count<=27: \n",
" im=df4.iloc[i].values\n",
" ims=np.append(ims,im)\n",
" else:\n",
" ims=np.array(ims).reshape(9,9,3)\n",
" array = np.array(ims, dtype=np.uint8)\n",
" new_image = Image.fromarray(array)\n",
" new_image.save(image_path+str(i)+'.png')\n",
" count=0\n",
" ims = []"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Split the training and test set "
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"ExecuteTime": {
"end_time": "2023-07-06T09:03:16.815834800Z",
"start_time": "2023-07-06T09:03:16.797229300Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4163\n"
]
}
],
"source": [
"# Create folders to store images\n",
"Train_Dir='./train/'\n",
"Val_Dir='./test/'\n",
"allimgs=[]\n",
"for subdir in os.listdir(Train_Dir):\n",
" for filename in os.listdir(os.path.join(Train_Dir,subdir)):\n",
" filepath=os.path.join(Train_Dir,subdir,filename)\n",
" allimgs.append(filepath)\n",
"print(len(allimgs)) # Print the total number of images"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"ExecuteTime": {
"end_time": "2023-07-06T09:03:16.838914900Z",
"start_time": "2023-07-06T09:03:16.818833300Z"
}
},
"outputs": [],
"source": [
"#split a test set from the dataset, train/test size = 80%/20%\n",
"Numbers=len(allimgs)//5 \t#size of test set (20%)\n",
"\n",
"def mymovefile(srcfile,dstfile):\n",
" if not os.path.isfile(srcfile):\n",
" print (\"%s not exist!\"%(srcfile))\n",
" else:\n",
" fpath,fname=os.path.split(dstfile) \n",
" if not os.path.exists(fpath):\n",
" os.makedirs(fpath) \n",
" shutil.move(srcfile,dstfile) \n",
" #print (\"move %s -> %s\"%(srcfile,dstfile))"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2023-07-06T09:03:16.838914900Z",
"start_time": "2023-07-06T09:03:16.822343500Z"
}
},
"outputs": [
{
"data": {
"text/plain": "832"
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The size of test set\n",
"Numbers"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"ExecuteTime": {
"end_time": "2023-07-06T09:03:17.654719900Z",
"start_time": "2023-07-06T09:03:16.832397200Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Finish creating test set\n"
]
}
],
"source": [
"# Create the test set\n",
"val_imgs=random.sample(allimgs,Numbers)\n",
"for img in val_imgs:\n",
" dest_path=img.replace(Train_Dir,Val_Dir)\n",
" mymovefile(img,dest_path)\n",
"print('Finish creating test set')"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2023-07-06T09:03:17.660725400Z",
"start_time": "2023-07-06T09:03:17.658724800Z"
}
},
"outputs": [],
"source": [
"#resize the images 224*224 for better CNN training\n",
"def get_224(folder,dstdir):\n",
" imgfilepaths=[]\n",
" for root,dirs,imgs in os.walk(folder):\n",
" for thisimg in imgs:\n",
" thisimg_path=os.path.join(root,thisimg)\n",
" imgfilepaths.append(thisimg_path)\n",
" for thisimg_path in imgfilepaths:\n",
" dir_name,filename=os.path.split(thisimg_path)\n",
" dir_name=dir_name.replace(folder,dstdir)\n",
" new_file_path=os.path.join(dir_name,filename)\n",
" if not os.path.exists(dir_name):\n",
" os.makedirs(dir_name)\n",
" img=cv2.imread(thisimg_path)\n",
" img=cv2.resize(img,(224,224))\n",
" cv2.imwrite(new_file_path,img)\n",
" print('Finish resizing'.format(folder=folder))"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"ExecuteTime": {
"end_time": "2023-07-06T09:03:22.772090900Z",
"start_time": "2023-07-06T09:03:17.661728600Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Finish resizing\n"
]
}
],
"source": [
"DATA_DIR_224='./train_224/'\n",
"get_224(folder='./train/',dstdir=DATA_DIR_224)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"ExecuteTime": {
"end_time": "2023-07-06T09:03:24.056886300Z",
"start_time": "2023-07-06T09:03:22.772621Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Finish resizing\n"
]
}
],
"source": [
"DATA_DIR2_224='./test_224/'\n",
"get_224(folder='./test/',dstdir=DATA_DIR2_224)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Display samples for each category"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"ExecuteTime": {
"end_time": "2023-07-06T09:03:24.562540100Z",
"start_time": "2023-07-06T09:03:24.056886300Z"
}
},
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "[Errno 2] No such file or directory: './train_224/0/27.png'",
"output_type": "error",
"traceback": [
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[1;31mFileNotFoundError\u001B[0m Traceback (most recent call last)",
"Cell \u001B[1;32mIn[34], line 2\u001B[0m\n\u001B[0;32m 1\u001B[0m \u001B[38;5;66;03m# Read the images for each category, the file name may vary (27.png, 83.png...)\u001B[39;00m\n\u001B[1;32m----> 2\u001B[0m img1 \u001B[38;5;241m=\u001B[39m \u001B[43mImage\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43m./train_224/0/27.png\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[0;32m 3\u001B[0m img2 \u001B[38;5;241m=\u001B[39m Image\u001B[38;5;241m.\u001B[39mopen(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m./train_224/1/83.png\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[0;32m 4\u001B[0m img3 \u001B[38;5;241m=\u001B[39m Image\u001B[38;5;241m.\u001B[39mopen(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m./train_224/2/27.png\u001B[39m\u001B[38;5;124m'\u001B[39m)\n",
"File \u001B[1;32m~\\anaconda3\\envs\\FlowPicRefresh\\lib\\site-packages\\PIL\\Image.py:3227\u001B[0m, in \u001B[0;36mopen\u001B[1;34m(fp, mode, formats)\u001B[0m\n\u001B[0;32m 3224\u001B[0m filename \u001B[38;5;241m=\u001B[39m fp\n\u001B[0;32m 3226\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m filename:\n\u001B[1;32m-> 3227\u001B[0m fp \u001B[38;5;241m=\u001B[39m \u001B[43mbuiltins\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[43mfilename\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mrb\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[0;32m 3228\u001B[0m exclusive_fp \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m\n\u001B[0;32m 3230\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n",
"\u001B[1;31mFileNotFoundError\u001B[0m: [Errno 2] No such file or directory: './train_224/0/27.png'"
]
}
],
"source": [
"# Read the images for each category, the file name may vary (27.png, 83.png...)\n",
"img1 = Image.open('./train_224/0/27.png')\n",
"img2 = Image.open('./train_224/1/83.png')\n",
"img3 = Image.open('./train_224/2/27.png')\n",
"img4 = Image.open('./train_224/3/27.png')\n",
"img5 = Image.open('./train_224/4/27.png')\n",
"\n",
"plt.figure(figsize=(10, 10)) \n",
"plt.subplot(1,5,1)\n",
"plt.imshow(img1)\n",
"plt.title(\"Normal\")\n",
"plt.subplot(1,5,2)\n",
"plt.imshow(img2)\n",
"plt.title(\"RPM Spoofing\")\n",
"plt.subplot(1,5,3)\n",
"plt.imshow(img3)\n",
"plt.title(\"Gear Spoofing\")\n",
"plt.subplot(1,5,4)\n",
"plt.imshow(img4)\n",
"plt.title(\"DoS Attack\")\n",
"plt.subplot(1,5,5)\n",
"plt.imshow(img5)\n",
"plt.title(\"Fuzzy Attack\")\n",
"plt.show() # display it"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"start_time": "2023-07-06T09:03:24.562540100Z"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

@ -0,0 +1,348 @@
#!/usr/bin/env python
# coding: utf-8
# # A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles
# This is the code for the paper entitled "**A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles**" accepted in IEEE International Conference on Communications (IEEE ICC).
# Authors: Li Yang (lyang339@uwo.ca) and Abdallah Shami (Abdallah.Shami@uwo.ca)
# Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University
#
# **Notebook 1: Data pre-processing**
# Procedures:
# &nbsp; 1): Read the dataset
# &nbsp; 2): Transform the tabular data into images
# &nbsp; 3): Display the transformed images
# &nbsp; 4): Split the training and test set
# ## Import libraries
# In[14]:
import numpy as np
import pandas as pd
import os
import cv2
import math
import random
import matplotlib.pyplot as plt
import shutil
from sklearn.preprocessing import QuantileTransformer
from PIL import Image
import warnings
warnings.filterwarnings("ignore")
# ## Read the Car-Hacking/CAN-Intrusion dataset
# The complete Car-Hacking dataset is publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset
# In this repository, due to the file size limit of GitHub, we use the 5% subset.
# In[15]:
#Read dataset
df=pd.read_csv('data/Car_Hacking_5%.csv')
# In[16]:
df
# In[17]:
# The labels of the dataset. "R" indicates normal patterns, and there are four types of attack (DoS, fuzzy. gear spoofing, and RPM spoofing zttacks)
df.Label.value_counts()
# ## Data Transformation
# Convert tabular data to images
# Procedures:
# 1. Use quantile transform to transform the original data samples into the scale of [0,255], representing pixel values
# 2. Generate images for each category (Normal, DoS, Fuzzy, Gear, RPM), each image consists of 27 data samples with 9 features. Thus, the size of each image is 9*9*3, length 9, width 9, and 3 color channels (RGB).
# In[18]:
# Transform all features into the scale of [0,1]
numeric_features = df.dtypes[df.dtypes != 'object'].index
scaler = QuantileTransformer()
df[numeric_features] = scaler.fit_transform(df[numeric_features])
# In[19]:
# Multiply the feature values by 255 to transform them into the scale of [0,255]
df[numeric_features] = df[numeric_features].apply(
lambda x: (x*255))
# In[20]:
df.describe()
# All features are in the same scale of [0,255]
# ### Generate images for each class
# In[21]:
df0=df[df['Label']=='R'].drop(['Label'],axis=1)
df1=df[df['Label']=='RPM'].drop(['Label'],axis=1)
df2=df[df['Label']=='gear'].drop(['Label'],axis=1)
df3=df[df['Label']=='DoS'].drop(['Label'],axis=1)
df4=df[df['Label']=='Fuzzy'].drop(['Label'],axis=1)
# In[22]:
# Generate 9*9 color images for class 0 (Normal)
count=0
ims = []
image_path = "train/0/"
os.makedirs(image_path)
for i in range(0, len(df1)):
count=count+1
if count<=27:
im=df1.iloc[i].values
ims=np.append(ims,im)
else:
ims=np.array(ims).reshape(9,9,3)
array = np.array(ims, dtype=np.uint8)
new_image = Image.fromarray(array)
new_image.save(image_path+str(i)+'.png')
count=0
ims = []
# In[23]:
# Generate 9*9 color images for class 1 (RPM spoofing)
count=0
ims = []
image_path = "train/1/"
os.makedirs(image_path)
for i in range(0, len(df1)):
count=count+1
if count<=27:
im=df1.iloc[i].values
ims=np.append(ims,im)
else:
ims=np.array(ims).reshape(9,9,3)
array = np.array(ims, dtype=np.uint8)
new_image = Image.fromarray(array)
new_image.save(image_path+str(i)+'.png')
count=0
ims = []
# In[24]:
# Generate 9*9 color images for class 2 (Gear spoofing)
count=0
ims = []
image_path = "train/2/"
os.makedirs(image_path)
for i in range(0, len(df2)):
count=count+1
if count<=27:
im=df2.iloc[i].values
ims=np.append(ims,im)
else:
ims
ims=np.array(ims).reshape(9,9,3)
ims
array = np.array(ims, dtype=np.uint8)
new_image = Image.fromarray(array)
new_image.save(image_path+str(i)+'.png')
count=0
ims = []
# In[25]:
# Generate 9*9 color images for class 3 (DoS attack)
count=0
ims = []
image_path = "train/3/"
os.makedirs(image_path)
for i in range(0, len(df3)):
count=count+1
if count<=27:
im=df3.iloc[i].values
ims=np.append(ims,im)
else:
ims=np.array(ims).reshape(9,9,3)
array = np.array(ims, dtype=np.uint8)
new_image = Image.fromarray(array)
new_image.save(image_path+str(i)+'.png')
count=0
ims = []
# In[26]:
# Generate 9*9 color images for class 4 (Fuzzy attack)
count=0
ims = []
image_path = "train/4/"
os.makedirs(image_path)
for i in range(0, len(df4)):
count=count+1
if count<=27:
im=df4.iloc[i].values
ims=np.append(ims,im)
else:
ims=np.array(ims).reshape(9,9,3)
array = np.array(ims, dtype=np.uint8)
new_image = Image.fromarray(array)
new_image.save(image_path+str(i)+'.png')
count=0
ims = []
# ## Split the training and test set
# In[27]:
# Create folders to store images
Train_Dir='./train/'
Val_Dir='./test/'
allimgs=[]
for subdir in os.listdir(Train_Dir):
for filename in os.listdir(os.path.join(Train_Dir,subdir)):
filepath=os.path.join(Train_Dir,subdir,filename)
allimgs.append(filepath)
print(len(allimgs)) # Print the total number of images
# In[28]:
#split a test set from the dataset, train/test size = 80%/20%
Numbers=len(allimgs)//5 #size of test set (20%)
def mymovefile(srcfile,dstfile):
if not os.path.isfile(srcfile):
print ("%s not exist!"%(srcfile))
else:
fpath,fname=os.path.split(dstfile)
if not os.path.exists(fpath):
os.makedirs(fpath)
shutil.move(srcfile,dstfile)
#print ("move %s -> %s"%(srcfile,dstfile))
# In[29]:
# The size of test set
Numbers
# In[30]:
# Create the test set
val_imgs=random.sample(allimgs,Numbers)
for img in val_imgs:
dest_path=img.replace(Train_Dir,Val_Dir)
mymovefile(img,dest_path)
print('Finish creating test set')
# In[31]:
#resize the images 224*224 for better CNN training
def get_224(folder,dstdir):
imgfilepaths=[]
for root,dirs,imgs in os.walk(folder):
for thisimg in imgs:
thisimg_path=os.path.join(root,thisimg)
imgfilepaths.append(thisimg_path)
for thisimg_path in imgfilepaths:
dir_name,filename=os.path.split(thisimg_path)
dir_name=dir_name.replace(folder,dstdir)
new_file_path=os.path.join(dir_name,filename)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
img=cv2.imread(thisimg_path)
img=cv2.resize(img,(224,224))
cv2.imwrite(new_file_path,img)
print('Finish resizing'.format(folder=folder))
# In[32]:
DATA_DIR_224='./train_224/'
get_224(folder='./train/',dstdir=DATA_DIR_224)
# In[33]:
DATA_DIR2_224='./test_224/'
get_224(folder='./test/',dstdir=DATA_DIR2_224)
# ### Display samples for each category
# In[34]:
# Read the images for each category, the file name may vary (27.png, 83.png...)
img1 = Image.open('./train_224/0/27.png')
img2 = Image.open('./train_224/1/83.png')
img3 = Image.open('./train_224/2/27.png')
img4 = Image.open('./train_224/3/27.png')
img5 = Image.open('./train_224/4/27.png')
plt.figure(figsize=(10, 10))
plt.subplot(1,5,1)
plt.imshow(img1)
plt.title("Normal")
plt.subplot(1,5,2)
plt.imshow(img2)
plt.title("RPM Spoofing")
plt.subplot(1,5,3)
plt.imshow(img3)
plt.title("Gear Spoofing")
plt.subplot(1,5,4)
plt.imshow(img4)
plt.title("DoS Attack")
plt.subplot(1,5,5)
plt.imshow(img5)
plt.title("Fuzzy Attack")
plt.show() # display it
# In[ ]:

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2022 Western OC2 Lab
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,81 @@
# Intrusion-Detection-System-Using-CNN-and-Transfer-Learning
This is the code for the paper entitled "**[A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles](https://arxiv.org/pdf/2201.11812.pdf)**" published in **IEEE International Conference on Communications (IEEE ICC)**, doi: [10.1109/ICC45855.2022.9838780](https://ieeexplore.ieee.org/document/9838780).
- Authors: Li Yang and Abdallah Shami
- Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University
This repository introduces how to use **convolutional neural networks (CNNs)** and **transfer learning** techniques to develop **intrusion detection systems**. **Ensemble learning** and **hyperparameter optimization techniques** are also used to achieve optimized model performance.
- Another **intrusion detection system development code** using **decision tree-based machine learning algorithms (Decision tree, random forest, XGBoost, stacking, etc.)** can be found in: [Intrusion-Detection-System-Using-Machine-Learning](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-Machine-Learning)
- A comprehensive **hyperparameter optimization** tutorial code can be found in: [Hyperparameter-Optimization-of-Machine-Learning-Algorithms](https://github.com/LiYangHart/Hyperparameter-Optimization-of-Machine-Learning-Algorithms)
## Abstract of The Paper
Modern vehicles, including autonomous vehicles and connected vehicles, are increasingly connected to the external world, which enables various functionalities and services. However, the improving connectivity also increases the attack surfaces of the Internet of Vehicles (IoV), causing its vulnerabilities to cyber-threats. Due to the lack of authentication and encryption procedures in vehicular networks, Intrusion Detection Systems (IDSs) are essential approaches to protect modern vehicle systems from network attacks. In this paper, a transfer learning and ensemble learning-based IDS is proposed for IoV systems using convolutional neural networks (CNNs) and hyper-parameter optimization techniques. In the experiments, the proposed IDS has demonstrated over 99.25% detection rates and F1-scores on two well-known public benchmark IoV security datasets: the Car-Hacking dataset and the CICIDS2017 dataset. This shows the effectiveness of the proposed IDS for cyber-attack detection in both intra-vehicle and external vehicular networks.
<p float="left">
<img src="https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/framework.png" width="500" />
<img src="https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/CAN.png" width="400" />
</p>
## Implementation
### CNN Models
* VGG16
* VGG19
* Xception
* Inception
* Resnet
* InceptionResnet
### Ensemble Learning Models
* Bagging
* Probability Averaging
* Concatenation
### Hyperparameter Optimization Methods
* Random Search (RS)
* Bayesian Optimization - Tree Parzen Estimator(BO-TPE)
### Dataset
1. CAN-intrusion/Car-Hacking dataset, a benchmark network security dataset for intra-vehicle intrusion detection
* Publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset
* Can be processed using the same code
2. CICIDS2017 dataset, a popular network traffic dataset for intrusion detection problems
* Publicly available at: https://www.unb.ca/cic/datasets/ids-2017.html
For the purpose of displaying the experimental results in Jupyter Notebook, the sampled subset of the CAN-intrusion dataset is used in the sample code. The subsets are in the "[data](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/tree/main/data)" folder.
### Code
* [1-Data_pre-processing_CAN.ipynb](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/1-Data_pre-processing_CAN.ipynb): code for data pre-processing and transformation (tabular data to images).
* [2-CNN_Model_Development&Hyperparameter Optimization.ipynb](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/2-CNN_Model_Development%26Hyperparameter%20Optimization.ipynb): code for the development and CNN models and their hyperparameter optimization.
* [3-Ensemble_Models-CAN.ipynb](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/3-Ensemble_Models-CAN.ipynb): code for the construction of three ensemble learning techniques.
Libraries
* Python 3.5+
* [Keras 2.1.0+](hhttps://keras.io/)
* [Tensorflow 1.10.0+](https://www.tensorflow.org/install/gpu)
* [OpenCV-python](https://docs.opencv.org/4.x/d6/d00/tutorial_py_root.html)
* [hyperopt](https://github.com/hyperopt/hyperopt)
## Contact-Info
Please feel free to contact us for any questions or cooperation opportunities. We will be happy to help.
* Email: [liyanghart@gmail.com](mailto:liyanghart@gmail.com) or [Abdallah.Shami@uwo.ca](mailto:Abdallah.Shami@uwo.ca)
* GitHub: [LiYangHart](https://github.com/LiYangHart) and [Western OC2 Lab](https://github.com/Western-OC2-Lab/)
* LinkedIn: [Li Yang](https://www.linkedin.com/in/li-yang-phd-65a190176/)
* Google Scholar: [Li Yang](https://scholar.google.com.eg/citations?user=XEfM7bIAAAAJ&hl=en) and [OC2 Lab](https://scholar.google.com.eg/citations?user=oiebNboAAAAJ&hl=en)
## Citation
If you find this repository useful in your research, please cite this article as:
L. Yang and A. Shami, "A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles," ICC 2022 - IEEE International Conference on Communications, 2022, pp. 2774-2779, doi: 10.1109/ICC45855.2022.9838780.
```
@INPROCEEDINGS{9838780,
author={Yang, Li and Shami, Abdallah},
booktitle={ICC 2022 - IEEE International Conference on Communications},
title={A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles},
year={2022},
pages={2774-2779},
doi={10.1109/ICC45855.2022.9838780}}
```

@ -0,0 +1,3 @@
# The sampled datasets used for the experiments in the sample code
**Car_Hacking_5%.csv**: The 5% randomly sampled subset of the [Car Hacking dataset](https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset)

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

@ -0,0 +1 @@
# The code in this folder shows an example of the pre-processing of the Car-Hacking dataset.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -0,0 +1,29 @@
BSD 3-Clause License
Copyright (c) 2020, Mahendra Data
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -0,0 +1,2 @@
# CICIDS2017-ML
The purpose of this repository is to demonstrate the steps of processing CICIDS2017 dataset using machine learning algorithms.

@ -0,0 +1,30 @@
import pandas as pd
def merge():
# 读取三份csv文件
df1 = pd.read_csv("../_dataset/TrafficLabelling_/Friday-WorkingHours-Morning.pcap_ISCX.csv")
df2 = pd.read_csv("../_dataset/TrafficLabelling_/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv")
df3 = pd.read_csv("../_dataset/TrafficLabelling_/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv")
# 将它们拼合成一个DataFrame
df = pd.concat([df1, df2, df3])
# 保存为新的csv文件
df.to_csv("../_dataset/TrafficLabelling_/Friday-WorkingHours-merged.csv", index=False)
def select():
df = pd.read_csv('../_dataset/TrafficLabelling_/Friday-WorkingHours-merged.csv')
df_ddos = df[df.iloc[:, -1] == 'DDoS']
df_ddos.to_csv('../_dataset/TrafficLabelling_/Friday-WorkingHours-DDoS.csv', index=False)
def search(query: str, row_name: str):
df = pd.read_csv('../_dataset/TrafficLabelling_/Friday-WorkingHours-merged.csv')
result = df[df[row_name].str.contains(query)]
print(result.head())
if __name__ == "__main__":
# merge()
# select()
search("172.16.0.1-192.168.10.50-49533-80-6", "Flow ID")

@ -1,2 +1,39 @@
CSV_PATH = './_dataset/DDos3.csv'
BYPASS_COLUMNS= ('Destination Port', 'Label')
import datetime
CSV_PATH = './_dataset/TrafficLabelling_/Friday-WorkingHours-DDoS.csv'
BYPASS_COLUMNS= ('Destination Port', 'Label')
UNIQUE_COLUMNS = [' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags', ' CWE Flag Count',
'Fwd Avg Bytes/Bulk', ' Fwd Avg Packets/Bulk', ' Fwd Avg Bulk Rate',
' Bwd Avg Bytes/Bulk', ' Bwd Avg Packets/Bulk', 'Bwd Avg Bulk Rate']
IMG_SAVE_PATH = f'./saves/{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}'
(['Flow ID', ' Source IP', ' Source Port', ' Destination IP',
' Destination Port', ' Protocol', ' Timestamp', ' Flow Duration',
' Total Fwd Packets', ' Total Backward Packets',
'Total Length of Fwd Packets', ' Total Length of Bwd Packets',
' Fwd Packet Length Max', ' Fwd Packet Length Min',
' Fwd Packet Length Mean', ' Fwd Packet Length Std',
'Bwd Packet Length Max', ' Bwd Packet Length Min',
' Bwd Packet Length Mean', ' Bwd Packet Length Std', 'Flow Bytes/s',
' Flow Packets/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max',
' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std',
' Fwd IAT Max', ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean',
' Bwd IAT Std', ' Bwd IAT Max', ' Bwd IAT Min', 'Fwd PSH Flags',
' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags',
' Fwd Header Length', ' Bwd Header Length', 'Fwd Packets/s',
' Bwd Packets/s', ' Min Packet Length', ' Max Packet Length',
' Packet Length Mean', ' Packet Length Std', ' Packet Length Variance',
'FIN Flag Count', ' SYN Flag Count', ' RST Flag Count',
' PSH Flag Count', ' ACK Flag Count', ' URG Flag Count',
' CWE Flag Count', ' ECE Flag Count', ' Down/Up Ratio',
' Average Packet Size', ' Avg Fwd Segment Size',
' Avg Bwd Segment Size', ' Fwd Header Length.1', 'Fwd Avg Bytes/Bulk',
' Fwd Avg Packets/Bulk', ' Fwd Avg Bulk Rate', ' Bwd Avg Bytes/Bulk',
' Bwd Avg Packets/Bulk', 'Bwd Avg Bulk Rate', 'Subflow Fwd Packets',
' Subflow Fwd Bytes', ' Subflow Bwd Packets', ' Subflow Bwd Bytes',
'Init_Win_bytes_forward', ' Init_Win_bytes_backward',
' act_data_pkt_fwd', ' min_seg_size_forward', 'Active Mean',
' Active Std', ' Active Max', ' Active Min', 'Idle Mean', ' Idle Std',
' Idle Max', ' Idle Min', ' Label'])

@ -4,7 +4,9 @@ import pandas as pd
import numpy as np
from config import *
import matplotlib.pyplot as plt
from utils.dataframe import *
from sklearn.preprocessing import QuantileTransformer
from PIL import Image
def is_in_bypass_list(column_name: str, bypass_list: tuple) -> bool:
for bypass in bypass_list:
@ -37,6 +39,9 @@ def averaging_df(df: pd.DataFrame, column_num: int = None):
return df, column_num
def iter_df_to_point(df: pd.DataFrame, column_num: int = None):
size = 0
points = []
@ -100,7 +105,7 @@ def generate_and_save(base_path: str, point: tuple, size: int, calculate):
def process_single_threaded(df: pd.DataFrame):
df, size = averaging_df(df)
points = iter_df_to_point(df, size)
base_path = f'./saves/{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}'
base_path = IMG_SAVE_PATH
create_dir(base_path)
for point_dict in points:
num = list(point_dict.keys())[0]
@ -113,4 +118,41 @@ def process_single_threaded(df: pd.DataFrame):
if __name__ == '__main__':
df = input_csv_to_df(CSV_PATH)
process(df)
# process(df)
# process_single_threaded(df)
df = df.replace([np.inf, -np.inf], np.nan)
df = df.dropna(axis=0) # 删除具有NaN值的行
df = get_ddos_df(df)
df = drop_columns(df, UNIQUE_COLUMNS)
# df = drop_unique_columns(df)
df = df.iloc[:, 7:]
numeric_features = df.dtypes[df.dtypes != 'object'].index
scaler = QuantileTransformer()
df[numeric_features] = scaler.fit_transform(df[numeric_features])
# In[19]:
# Multiply the feature values by 255 to transform them into the scale of [0,255]
df[numeric_features] = df[numeric_features].apply(
lambda x: (x * 255))
df_clean_data = df
row_length = len(df_clean_data.columns)
col_length = len(df_clean_data)
# Transform all features into the scale of [0,1]
count = 0
ims = []
for i in range(0, col_length):
count = count + 1
if count <= (row_length*3):
im = df_clean_data.iloc[i].values
ims = np.append(ims, im)
else:
ims = np.array(ims).reshape(row_length, row_length, 3)
array = np.array(ims, dtype=np.uint8)
new_image = Image.fromarray(array)
new_image.save(IMG_SAVE_PATH + str(i) + '.png')
count = 0
ims = []
print(df)

@ -0,0 +1,27 @@
import pandas as pd
def drop_unique_columns(df: pd.DataFrame):
nunique = df.nunique() # 计算每一列的唯一值的数量
cols_to_drop = nunique[nunique == 1].index # 找到只有一个唯一值的列的索引
df.drop(cols_to_drop, axis=1, inplace=True) # 删除这些列
print(cols_to_drop) # 输出删除的列的列名
return df
# def drop_columns_with_fix_up(df: pd.DataFrame, columns: list):
# columns = [w.lstrip() for w in columns]
# df = drop_columns(df, columns)
# columns = [" " + w for w in columns]
# df = drop_columns(df, columns)
# return df
def drop_columns(df: pd.DataFrame, columns: list):
columns = [w.lstrip() for w in columns]
for column_name in columns:
cols_to_drop = df.filter(regex=column_name).columns
df.drop(cols_to_drop, axis=1, inplace=True)
return df
def get_ddos_df(df: pd.DataFrame):
return df[df.iloc[:, -1] == 'DDoS']

@ -0,0 +1,29 @@
from scapy.all import *
from loguru import logger
def split_pcap(file_path: str, chunk_size: int, save_base_path: str = None):
packets = PcapReader(file_path)
chunk = []
counter = 1
for packet in packets:
# logger.info(packet.time)
chunk.append(packet)
if len(chunk) == chunk_size:
wrpcap(f'{save_base_path}/chunk_{counter}.pcap', chunk)
chunk = []
logger.info(f'chunk_{counter}.pcap saved')
counter += 1
if chunk:
wrpcap(f'{save_base_path}/chunk_{counter}.pcap', chunk)
def get_packet_time(pkt: Packet):
return pkt.time
if __name__ == '__main__':
from utils.files import create_dir
create_dir('../_dataset/pcap/Friday-WorkingHours')
split_pcap('../_dataset/pcap/Friday-WorkingHours.pcap', 10000, '../_dataset/pcap/Friday-WorkingHours')
Loading…
Cancel
Save