#!/usr/bin/env python # coding: utf-8 # # A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles # This is the code for the paper entitled "**A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles**" accepted in IEEE International Conference on Communications (IEEE ICC). # Authors: Li Yang (lyang339@uwo.ca) and Abdallah Shami (Abdallah.Shami@uwo.ca) # Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University # # **Notebook 1: Data pre-processing** # Procedures: #   1): Read the dataset #   2): Transform the tabular data into images #   3): Display the transformed images #   4): Split the training and test set # ## Import libraries # In[14]: import numpy as np import pandas as pd import os import cv2 import math import random import matplotlib.pyplot as plt import shutil from sklearn.preprocessing import QuantileTransformer from PIL import Image import warnings warnings.filterwarnings("ignore") # ## Read the Car-Hacking/CAN-Intrusion dataset # The complete Car-Hacking dataset is publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset # In this repository, due to the file size limit of GitHub, we use the 5% subset. # In[15]: #Read dataset df=pd.read_csv('data/Car_Hacking_5%.csv') # In[16]: df # In[17]: # The labels of the dataset. "R" indicates normal patterns, and there are four types of attack (DoS, fuzzy. gear spoofing, and RPM spoofing zttacks) df.Label.value_counts() # ## Data Transformation # Convert tabular data to images # Procedures: # 1. Use quantile transform to transform the original data samples into the scale of [0,255], representing pixel values # 2. Generate images for each category (Normal, DoS, Fuzzy, Gear, RPM), each image consists of 27 data samples with 9 features. Thus, the size of each image is 9*9*3, length 9, width 9, and 3 color channels (RGB). # In[18]: # Transform all features into the scale of [0,1] numeric_features = df.dtypes[df.dtypes != 'object'].index scaler = QuantileTransformer() df[numeric_features] = scaler.fit_transform(df[numeric_features]) # In[19]: # Multiply the feature values by 255 to transform them into the scale of [0,255] df[numeric_features] = df[numeric_features].apply( lambda x: (x*255)) # In[20]: df.describe() # All features are in the same scale of [0,255] # ### Generate images for each class # In[21]: df0=df[df['Label']=='R'].drop(['Label'],axis=1) df1=df[df['Label']=='RPM'].drop(['Label'],axis=1) df2=df[df['Label']=='gear'].drop(['Label'],axis=1) df3=df[df['Label']=='DoS'].drop(['Label'],axis=1) df4=df[df['Label']=='Fuzzy'].drop(['Label'],axis=1) # In[22]: # Generate 9*9 color images for class 0 (Normal) count=0 ims = [] image_path = "train/0/" os.makedirs(image_path) for i in range(0, len(df1)): count=count+1 if count<=27: im=df1.iloc[i].values ims=np.append(ims,im) else: ims=np.array(ims).reshape(9,9,3) array = np.array(ims, dtype=np.uint8) new_image = Image.fromarray(array) new_image.save(image_path+str(i)+'.png') count=0 ims = [] # In[23]: # Generate 9*9 color images for class 1 (RPM spoofing) count=0 ims = [] image_path = "train/1/" os.makedirs(image_path) for i in range(0, len(df1)): count=count+1 if count<=27: im=df1.iloc[i].values ims=np.append(ims,im) else: ims=np.array(ims).reshape(9,9,3) array = np.array(ims, dtype=np.uint8) new_image = Image.fromarray(array) new_image.save(image_path+str(i)+'.png') count=0 ims = [] # In[24]: # Generate 9*9 color images for class 2 (Gear spoofing) count=0 ims = [] image_path = "train/2/" os.makedirs(image_path) for i in range(0, len(df2)): count=count+1 if count<=27: im=df2.iloc[i].values ims=np.append(ims,im) else: ims ims=np.array(ims).reshape(9,9,3) ims array = np.array(ims, dtype=np.uint8) new_image = Image.fromarray(array) new_image.save(image_path+str(i)+'.png') count=0 ims = [] # In[25]: # Generate 9*9 color images for class 3 (DoS attack) count=0 ims = [] image_path = "train/3/" os.makedirs(image_path) for i in range(0, len(df3)): count=count+1 if count<=27: im=df3.iloc[i].values ims=np.append(ims,im) else: ims=np.array(ims).reshape(9,9,3) array = np.array(ims, dtype=np.uint8) new_image = Image.fromarray(array) new_image.save(image_path+str(i)+'.png') count=0 ims = [] # In[26]: # Generate 9*9 color images for class 4 (Fuzzy attack) count=0 ims = [] image_path = "train/4/" os.makedirs(image_path) for i in range(0, len(df4)): count=count+1 if count<=27: im=df4.iloc[i].values ims=np.append(ims,im) else: ims=np.array(ims).reshape(9,9,3) array = np.array(ims, dtype=np.uint8) new_image = Image.fromarray(array) new_image.save(image_path+str(i)+'.png') count=0 ims = [] # ## Split the training and test set # In[27]: # Create folders to store images Train_Dir='./train/' Val_Dir='./test/' allimgs=[] for subdir in os.listdir(Train_Dir): for filename in os.listdir(os.path.join(Train_Dir,subdir)): filepath=os.path.join(Train_Dir,subdir,filename) allimgs.append(filepath) print(len(allimgs)) # Print the total number of images # In[28]: #split a test set from the dataset, train/test size = 80%/20% Numbers=len(allimgs)//5 #size of test set (20%) def mymovefile(srcfile,dstfile): if not os.path.isfile(srcfile): print ("%s not exist!"%(srcfile)) else: fpath,fname=os.path.split(dstfile) if not os.path.exists(fpath): os.makedirs(fpath) shutil.move(srcfile,dstfile) #print ("move %s -> %s"%(srcfile,dstfile)) # In[29]: # The size of test set Numbers # In[30]: # Create the test set val_imgs=random.sample(allimgs,Numbers) for img in val_imgs: dest_path=img.replace(Train_Dir,Val_Dir) mymovefile(img,dest_path) print('Finish creating test set') # In[31]: #resize the images 224*224 for better CNN training def get_224(folder,dstdir): imgfilepaths=[] for root,dirs,imgs in os.walk(folder): for thisimg in imgs: thisimg_path=os.path.join(root,thisimg) imgfilepaths.append(thisimg_path) for thisimg_path in imgfilepaths: dir_name,filename=os.path.split(thisimg_path) dir_name=dir_name.replace(folder,dstdir) new_file_path=os.path.join(dir_name,filename) if not os.path.exists(dir_name): os.makedirs(dir_name) img=cv2.imread(thisimg_path) img=cv2.resize(img,(224,224)) cv2.imwrite(new_file_path,img) print('Finish resizing'.format(folder=folder)) # In[32]: DATA_DIR_224='./train_224/' get_224(folder='./train/',dstdir=DATA_DIR_224) # In[33]: DATA_DIR2_224='./test_224/' get_224(folder='./test/',dstdir=DATA_DIR2_224) # ### Display samples for each category # In[34]: # Read the images for each category, the file name may vary (27.png, 83.png...) img1 = Image.open('./train_224/0/27.png') img2 = Image.open('./train_224/1/83.png') img3 = Image.open('./train_224/2/27.png') img4 = Image.open('./train_224/3/27.png') img5 = Image.open('./train_224/4/27.png') plt.figure(figsize=(10, 10)) plt.subplot(1,5,1) plt.imshow(img1) plt.title("Normal") plt.subplot(1,5,2) plt.imshow(img2) plt.title("RPM Spoofing") plt.subplot(1,5,3) plt.imshow(img3) plt.title("Gear Spoofing") plt.subplot(1,5,4) plt.imshow(img4) plt.title("DoS Attack") plt.subplot(1,5,5) plt.imshow(img5) plt.title("Fuzzy Attack") plt.show() # display it # In[ ]: