You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
349 lines
7.8 KiB
Python
349 lines
7.8 KiB
Python
#!/usr/bin/env python
|
|
# coding: utf-8
|
|
|
|
# # A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles
|
|
# This is the code for the paper entitled "**A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles**" accepted in IEEE International Conference on Communications (IEEE ICC).
|
|
# Authors: Li Yang (lyang339@uwo.ca) and Abdallah Shami (Abdallah.Shami@uwo.ca)
|
|
# Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University
|
|
#
|
|
# **Notebook 1: Data pre-processing**
|
|
# Procedures:
|
|
# 1): Read the dataset
|
|
# 2): Transform the tabular data into images
|
|
# 3): Display the transformed images
|
|
# 4): Split the training and test set
|
|
|
|
# ## Import libraries
|
|
|
|
# In[14]:
|
|
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
import os
|
|
import cv2
|
|
import math
|
|
import random
|
|
import matplotlib.pyplot as plt
|
|
import shutil
|
|
from sklearn.preprocessing import QuantileTransformer
|
|
from PIL import Image
|
|
import warnings
|
|
warnings.filterwarnings("ignore")
|
|
|
|
|
|
# ## Read the Car-Hacking/CAN-Intrusion dataset
|
|
# The complete Car-Hacking dataset is publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset
|
|
# In this repository, due to the file size limit of GitHub, we use the 5% subset.
|
|
|
|
# In[15]:
|
|
|
|
|
|
#Read dataset
|
|
df=pd.read_csv('data/Car_Hacking_5%.csv')
|
|
|
|
|
|
# In[16]:
|
|
|
|
|
|
df
|
|
|
|
|
|
# In[17]:
|
|
|
|
|
|
# The labels of the dataset. "R" indicates normal patterns, and there are four types of attack (DoS, fuzzy. gear spoofing, and RPM spoofing zttacks)
|
|
df.Label.value_counts()
|
|
|
|
|
|
# ## Data Transformation
|
|
# Convert tabular data to images
|
|
# Procedures:
|
|
# 1. Use quantile transform to transform the original data samples into the scale of [0,255], representing pixel values
|
|
# 2. Generate images for each category (Normal, DoS, Fuzzy, Gear, RPM), each image consists of 27 data samples with 9 features. Thus, the size of each image is 9*9*3, length 9, width 9, and 3 color channels (RGB).
|
|
|
|
# In[18]:
|
|
|
|
|
|
# Transform all features into the scale of [0,1]
|
|
numeric_features = df.dtypes[df.dtypes != 'object'].index
|
|
scaler = QuantileTransformer()
|
|
df[numeric_features] = scaler.fit_transform(df[numeric_features])
|
|
|
|
|
|
# In[19]:
|
|
|
|
|
|
# Multiply the feature values by 255 to transform them into the scale of [0,255]
|
|
df[numeric_features] = df[numeric_features].apply(
|
|
lambda x: (x*255))
|
|
|
|
|
|
# In[20]:
|
|
|
|
|
|
df.describe()
|
|
|
|
|
|
# All features are in the same scale of [0,255]
|
|
|
|
# ### Generate images for each class
|
|
|
|
# In[21]:
|
|
|
|
|
|
df0=df[df['Label']=='R'].drop(['Label'],axis=1)
|
|
df1=df[df['Label']=='RPM'].drop(['Label'],axis=1)
|
|
df2=df[df['Label']=='gear'].drop(['Label'],axis=1)
|
|
df3=df[df['Label']=='DoS'].drop(['Label'],axis=1)
|
|
df4=df[df['Label']=='Fuzzy'].drop(['Label'],axis=1)
|
|
|
|
|
|
# In[22]:
|
|
|
|
|
|
# Generate 9*9 color images for class 0 (Normal)
|
|
count=0
|
|
ims = []
|
|
|
|
image_path = "train/0/"
|
|
os.makedirs(image_path)
|
|
|
|
for i in range(0, len(df1)):
|
|
count=count+1
|
|
if count<=27:
|
|
im=df1.iloc[i].values
|
|
ims=np.append(ims,im)
|
|
else:
|
|
ims=np.array(ims).reshape(9,9,3)
|
|
array = np.array(ims, dtype=np.uint8)
|
|
new_image = Image.fromarray(array)
|
|
new_image.save(image_path+str(i)+'.png')
|
|
count=0
|
|
ims = []
|
|
|
|
|
|
# In[23]:
|
|
|
|
|
|
# Generate 9*9 color images for class 1 (RPM spoofing)
|
|
count=0
|
|
ims = []
|
|
|
|
image_path = "train/1/"
|
|
os.makedirs(image_path)
|
|
|
|
for i in range(0, len(df1)):
|
|
count=count+1
|
|
if count<=27:
|
|
im=df1.iloc[i].values
|
|
ims=np.append(ims,im)
|
|
else:
|
|
ims=np.array(ims).reshape(9,9,3)
|
|
array = np.array(ims, dtype=np.uint8)
|
|
new_image = Image.fromarray(array)
|
|
new_image.save(image_path+str(i)+'.png')
|
|
count=0
|
|
ims = []
|
|
|
|
|
|
# In[24]:
|
|
|
|
|
|
# Generate 9*9 color images for class 2 (Gear spoofing)
|
|
count=0
|
|
ims = []
|
|
|
|
image_path = "train/2/"
|
|
os.makedirs(image_path)
|
|
|
|
for i in range(0, len(df2)):
|
|
count=count+1
|
|
if count<=27:
|
|
im=df2.iloc[i].values
|
|
ims=np.append(ims,im)
|
|
else:
|
|
ims
|
|
ims=np.array(ims).reshape(9,9,3)
|
|
ims
|
|
array = np.array(ims, dtype=np.uint8)
|
|
new_image = Image.fromarray(array)
|
|
new_image.save(image_path+str(i)+'.png')
|
|
count=0
|
|
ims = []
|
|
|
|
|
|
# In[25]:
|
|
|
|
|
|
# Generate 9*9 color images for class 3 (DoS attack)
|
|
count=0
|
|
ims = []
|
|
|
|
image_path = "train/3/"
|
|
os.makedirs(image_path)
|
|
|
|
|
|
for i in range(0, len(df3)):
|
|
count=count+1
|
|
if count<=27:
|
|
im=df3.iloc[i].values
|
|
ims=np.append(ims,im)
|
|
else:
|
|
ims=np.array(ims).reshape(9,9,3)
|
|
array = np.array(ims, dtype=np.uint8)
|
|
new_image = Image.fromarray(array)
|
|
new_image.save(image_path+str(i)+'.png')
|
|
count=0
|
|
ims = []
|
|
|
|
|
|
# In[26]:
|
|
|
|
|
|
# Generate 9*9 color images for class 4 (Fuzzy attack)
|
|
count=0
|
|
ims = []
|
|
|
|
image_path = "train/4/"
|
|
os.makedirs(image_path)
|
|
|
|
|
|
for i in range(0, len(df4)):
|
|
count=count+1
|
|
if count<=27:
|
|
im=df4.iloc[i].values
|
|
ims=np.append(ims,im)
|
|
else:
|
|
ims=np.array(ims).reshape(9,9,3)
|
|
array = np.array(ims, dtype=np.uint8)
|
|
new_image = Image.fromarray(array)
|
|
new_image.save(image_path+str(i)+'.png')
|
|
count=0
|
|
ims = []
|
|
|
|
|
|
# ## Split the training and test set
|
|
|
|
# In[27]:
|
|
|
|
|
|
# Create folders to store images
|
|
Train_Dir='./train/'
|
|
Val_Dir='./test/'
|
|
allimgs=[]
|
|
for subdir in os.listdir(Train_Dir):
|
|
for filename in os.listdir(os.path.join(Train_Dir,subdir)):
|
|
filepath=os.path.join(Train_Dir,subdir,filename)
|
|
allimgs.append(filepath)
|
|
print(len(allimgs)) # Print the total number of images
|
|
|
|
|
|
# In[28]:
|
|
|
|
|
|
#split a test set from the dataset, train/test size = 80%/20%
|
|
Numbers=len(allimgs)//5 #size of test set (20%)
|
|
|
|
def mymovefile(srcfile,dstfile):
|
|
if not os.path.isfile(srcfile):
|
|
print ("%s not exist!"%(srcfile))
|
|
else:
|
|
fpath,fname=os.path.split(dstfile)
|
|
if not os.path.exists(fpath):
|
|
os.makedirs(fpath)
|
|
shutil.move(srcfile,dstfile)
|
|
#print ("move %s -> %s"%(srcfile,dstfile))
|
|
|
|
|
|
# In[29]:
|
|
|
|
|
|
# The size of test set
|
|
Numbers
|
|
|
|
|
|
# In[30]:
|
|
|
|
|
|
# Create the test set
|
|
val_imgs=random.sample(allimgs,Numbers)
|
|
for img in val_imgs:
|
|
dest_path=img.replace(Train_Dir,Val_Dir)
|
|
mymovefile(img,dest_path)
|
|
print('Finish creating test set')
|
|
|
|
|
|
# In[31]:
|
|
|
|
|
|
#resize the images 224*224 for better CNN training
|
|
def get_224(folder,dstdir):
|
|
imgfilepaths=[]
|
|
for root,dirs,imgs in os.walk(folder):
|
|
for thisimg in imgs:
|
|
thisimg_path=os.path.join(root,thisimg)
|
|
imgfilepaths.append(thisimg_path)
|
|
for thisimg_path in imgfilepaths:
|
|
dir_name,filename=os.path.split(thisimg_path)
|
|
dir_name=dir_name.replace(folder,dstdir)
|
|
new_file_path=os.path.join(dir_name,filename)
|
|
if not os.path.exists(dir_name):
|
|
os.makedirs(dir_name)
|
|
img=cv2.imread(thisimg_path)
|
|
img=cv2.resize(img,(224,224))
|
|
cv2.imwrite(new_file_path,img)
|
|
print('Finish resizing'.format(folder=folder))
|
|
|
|
|
|
# In[32]:
|
|
|
|
|
|
DATA_DIR_224='./train_224/'
|
|
get_224(folder='./train/',dstdir=DATA_DIR_224)
|
|
|
|
|
|
# In[33]:
|
|
|
|
|
|
DATA_DIR2_224='./test_224/'
|
|
get_224(folder='./test/',dstdir=DATA_DIR2_224)
|
|
|
|
|
|
# ### Display samples for each category
|
|
|
|
# In[34]:
|
|
|
|
|
|
# Read the images for each category, the file name may vary (27.png, 83.png...)
|
|
img1 = Image.open('./train_224/0/27.png')
|
|
img2 = Image.open('./train_224/1/83.png')
|
|
img3 = Image.open('./train_224/2/27.png')
|
|
img4 = Image.open('./train_224/3/27.png')
|
|
img5 = Image.open('./train_224/4/27.png')
|
|
|
|
plt.figure(figsize=(10, 10))
|
|
plt.subplot(1,5,1)
|
|
plt.imshow(img1)
|
|
plt.title("Normal")
|
|
plt.subplot(1,5,2)
|
|
plt.imshow(img2)
|
|
plt.title("RPM Spoofing")
|
|
plt.subplot(1,5,3)
|
|
plt.imshow(img3)
|
|
plt.title("Gear Spoofing")
|
|
plt.subplot(1,5,4)
|
|
plt.imshow(img4)
|
|
plt.title("DoS Attack")
|
|
plt.subplot(1,5,5)
|
|
plt.imshow(img5)
|
|
plt.title("Fuzzy Attack")
|
|
plt.show() # display it
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
|
|
|