Compare commits
3 Commits
Author | SHA1 | Date |
---|---|---|
|
28fff1c924 | 2 years ago |
|
984d403510 | 2 years ago |
|
847780028c | 2 years ago |
@ -0,0 +1,350 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# # A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles
|
||||||
|
# This is the code for the paper entitled "**A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles**" accepted in IEEE International Conference on Communications (IEEE ICC).
|
||||||
|
# Authors: Li Yang (lyang339@uwo.ca) and Abdallah Shami (Abdallah.Shami@uwo.ca)
|
||||||
|
# Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University
|
||||||
|
#
|
||||||
|
# **Notebook 1: Data pre-processing**
|
||||||
|
# Procedures:
|
||||||
|
# 1): Read the dataset
|
||||||
|
# 2): Transform the tabular data into images
|
||||||
|
# 3): Display the transformed images
|
||||||
|
# 4): Split the training and test set
|
||||||
|
|
||||||
|
# ## Import libraries
|
||||||
|
|
||||||
|
# In[14]:
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
import cv2
|
||||||
|
import math
|
||||||
|
import random
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import shutil
|
||||||
|
from sklearn.preprocessing import QuantileTransformer
|
||||||
|
from PIL import Image
|
||||||
|
import warnings
|
||||||
|
warnings.filterwarnings("ignore")
|
||||||
|
|
||||||
|
|
||||||
|
# ## Read the Car-Hacking/CAN-Intrusion dataset
|
||||||
|
# The complete Car-Hacking dataset is publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset
|
||||||
|
# In this repository, due to the file size limit of GitHub, we use the 5% subset.
|
||||||
|
|
||||||
|
# In[15]:
|
||||||
|
|
||||||
|
|
||||||
|
#Read dataset
|
||||||
|
df=pd.read_csv('data/Car_Hacking_5%.csv')
|
||||||
|
|
||||||
|
|
||||||
|
# In[16]:
|
||||||
|
|
||||||
|
|
||||||
|
df
|
||||||
|
|
||||||
|
|
||||||
|
# In[17]:
|
||||||
|
|
||||||
|
|
||||||
|
# The labels of the dataset. "R" indicates normal patterns, and there are four types of attack (DoS, fuzzy. gear spoofing, and RPM spoofing zttacks)
|
||||||
|
df.Label.value_counts()
|
||||||
|
|
||||||
|
|
||||||
|
# ## Data Transformation
|
||||||
|
# Convert tabular data to images
|
||||||
|
# Procedures:
|
||||||
|
# 1. Use quantile transform to transform the original data samples into the scale of [0,255], representing pixel values
|
||||||
|
# 2. Generate images for each category (Normal, DoS, Fuzzy, Gear, RPM), each image consists of 27 data samples with 9 features. Thus, the size of each image is 9*9*3, length 9, width 9, and 3 color channels (RGB).
|
||||||
|
|
||||||
|
# In[18]:
|
||||||
|
|
||||||
|
|
||||||
|
# Transform all features into the scale of [0,1]
|
||||||
|
numeric_features = df.dtypes[df.dtypes != 'object'].index
|
||||||
|
scaler = QuantileTransformer()
|
||||||
|
df[numeric_features] = scaler.fit_transform(df[numeric_features])
|
||||||
|
|
||||||
|
|
||||||
|
# In[19]:
|
||||||
|
|
||||||
|
|
||||||
|
# Multiply the feature values by 255 to transform them into the scale of [0,255]
|
||||||
|
df[numeric_features] = df[numeric_features].apply(
|
||||||
|
lambda x: (x*255))
|
||||||
|
|
||||||
|
|
||||||
|
# In[20]:
|
||||||
|
|
||||||
|
|
||||||
|
df.describe()
|
||||||
|
|
||||||
|
|
||||||
|
# All features are in the same scale of [0,255]
|
||||||
|
|
||||||
|
# ### Generate images for each class
|
||||||
|
|
||||||
|
# In[21]:
|
||||||
|
|
||||||
|
|
||||||
|
df0=df[df['Label']=='R'].drop(['Label'],axis=1)
|
||||||
|
df1=df[df['Label']=='RPM'].drop(['Label'],axis=1)
|
||||||
|
df2=df[df['Label']=='gear'].drop(['Label'],axis=1)
|
||||||
|
df3=df[df['Label']=='DoS'].drop(['Label'],axis=1)
|
||||||
|
df4=df[df['Label']=='Fuzzy'].drop(['Label'],axis=1)
|
||||||
|
|
||||||
|
|
||||||
|
# In[22]:
|
||||||
|
|
||||||
|
|
||||||
|
# Generate 9*9 color images for class 0 (Normal)
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
image_path = "train/0/"
|
||||||
|
os.makedirs(image_path)
|
||||||
|
|
||||||
|
for i in range(0, 2):
|
||||||
|
count=count+1
|
||||||
|
if count<=27:
|
||||||
|
im=df0.iloc[i].values
|
||||||
|
ims=np.append(ims,im)
|
||||||
|
else:
|
||||||
|
print(ims)
|
||||||
|
ims=np.array(ims).reshape(9,9,3)
|
||||||
|
print(ims)
|
||||||
|
array = np.array(ims, dtype=np.uint8)
|
||||||
|
new_image = Image.fromarray(array)
|
||||||
|
new_image.save(image_path+str(i)+'.png')
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
|
||||||
|
# In[23]:
|
||||||
|
|
||||||
|
|
||||||
|
# Generate 9*9 color images for class 1 (RPM spoofing)
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
image_path = "train/1/"
|
||||||
|
os.makedirs(image_path)
|
||||||
|
|
||||||
|
for i in range(0, len(df1)):
|
||||||
|
count=count+1
|
||||||
|
if count<=27:
|
||||||
|
im=df1.iloc[i].values
|
||||||
|
ims=np.append(ims,im)
|
||||||
|
else:
|
||||||
|
ims=np.array(ims).reshape(9,9,3)
|
||||||
|
array = np.array(ims, dtype=np.uint8)
|
||||||
|
new_image = Image.fromarray(array)
|
||||||
|
new_image.save(image_path+str(i)+'.png')
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
|
||||||
|
# In[24]:
|
||||||
|
|
||||||
|
|
||||||
|
# Generate 9*9 color images for class 2 (Gear spoofing)
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
image_path = "train/2/"
|
||||||
|
os.makedirs(image_path)
|
||||||
|
|
||||||
|
for i in range(0, len(df2)):
|
||||||
|
count=count+1
|
||||||
|
if count<=27:
|
||||||
|
im=df2.iloc[i].values
|
||||||
|
ims=np.append(ims,im)
|
||||||
|
else:
|
||||||
|
ims
|
||||||
|
ims=np.array(ims).reshape(9,9,3)
|
||||||
|
ims
|
||||||
|
array = np.array(ims, dtype=np.uint8)
|
||||||
|
new_image = Image.fromarray(array)
|
||||||
|
new_image.save(image_path+str(i)+'.png')
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
|
||||||
|
# In[25]:
|
||||||
|
|
||||||
|
|
||||||
|
# Generate 9*9 color images for class 3 (DoS attack)
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
image_path = "train/3/"
|
||||||
|
os.makedirs(image_path)
|
||||||
|
|
||||||
|
|
||||||
|
for i in range(0, len(df3)):
|
||||||
|
count=count+1
|
||||||
|
if count<=27:
|
||||||
|
im=df3.iloc[i].values
|
||||||
|
ims=np.append(ims,im)
|
||||||
|
else:
|
||||||
|
ims=np.array(ims).reshape(9,9,3)
|
||||||
|
array = np.array(ims, dtype=np.uint8)
|
||||||
|
new_image = Image.fromarray(array)
|
||||||
|
new_image.save(image_path+str(i)+'.png')
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
|
||||||
|
# In[26]:
|
||||||
|
|
||||||
|
|
||||||
|
# Generate 9*9 color images for class 4 (Fuzzy attack)
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
image_path = "train/4/"
|
||||||
|
os.makedirs(image_path)
|
||||||
|
|
||||||
|
|
||||||
|
for i in range(0, len(df4)):
|
||||||
|
count=count+1
|
||||||
|
if count<=27:
|
||||||
|
im=df4.iloc[i].values
|
||||||
|
ims=np.append(ims,im)
|
||||||
|
else:
|
||||||
|
ims=np.array(ims).reshape(9,9,3)
|
||||||
|
array = np.array(ims, dtype=np.uint8)
|
||||||
|
new_image = Image.fromarray(array)
|
||||||
|
new_image.save(image_path+str(i)+'.png')
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
|
||||||
|
# ## Split the training and test set
|
||||||
|
|
||||||
|
# In[27]:
|
||||||
|
|
||||||
|
|
||||||
|
# Create folders to store images
|
||||||
|
Train_Dir='./train/'
|
||||||
|
Val_Dir='./test/'
|
||||||
|
allimgs=[]
|
||||||
|
for subdir in os.listdir(Train_Dir):
|
||||||
|
for filename in os.listdir(os.path.join(Train_Dir,subdir)):
|
||||||
|
filepath=os.path.join(Train_Dir,subdir,filename)
|
||||||
|
allimgs.append(filepath)
|
||||||
|
print(len(allimgs)) # Print the total number of images
|
||||||
|
|
||||||
|
|
||||||
|
# In[28]:
|
||||||
|
|
||||||
|
|
||||||
|
#split a test set from the dataset, train/test size = 80%/20%
|
||||||
|
Numbers=len(allimgs)//5 #size of test set (20%)
|
||||||
|
|
||||||
|
def mymovefile(srcfile,dstfile):
|
||||||
|
if not os.path.isfile(srcfile):
|
||||||
|
print ("%s not exist!"%(srcfile))
|
||||||
|
else:
|
||||||
|
fpath,fname=os.path.split(dstfile)
|
||||||
|
if not os.path.exists(fpath):
|
||||||
|
os.makedirs(fpath)
|
||||||
|
shutil.move(srcfile,dstfile)
|
||||||
|
#print ("move %s -> %s"%(srcfile,dstfile))
|
||||||
|
|
||||||
|
|
||||||
|
# In[29]:
|
||||||
|
|
||||||
|
|
||||||
|
# The size of test set
|
||||||
|
Numbers
|
||||||
|
|
||||||
|
|
||||||
|
# In[30]:
|
||||||
|
|
||||||
|
|
||||||
|
# Create the test set
|
||||||
|
val_imgs=random.sample(allimgs,Numbers)
|
||||||
|
for img in val_imgs:
|
||||||
|
dest_path=img.replace(Train_Dir,Val_Dir)
|
||||||
|
mymovefile(img,dest_path)
|
||||||
|
print('Finish creating test set')
|
||||||
|
|
||||||
|
|
||||||
|
# In[31]:
|
||||||
|
|
||||||
|
|
||||||
|
#resize the images 224*224 for better CNN training
|
||||||
|
def get_224(folder,dstdir):
|
||||||
|
imgfilepaths=[]
|
||||||
|
for root,dirs,imgs in os.walk(folder):
|
||||||
|
for thisimg in imgs:
|
||||||
|
thisimg_path=os.path.join(root,thisimg)
|
||||||
|
imgfilepaths.append(thisimg_path)
|
||||||
|
for thisimg_path in imgfilepaths:
|
||||||
|
dir_name,filename=os.path.split(thisimg_path)
|
||||||
|
dir_name=dir_name.replace(folder,dstdir)
|
||||||
|
new_file_path=os.path.join(dir_name,filename)
|
||||||
|
if not os.path.exists(dir_name):
|
||||||
|
os.makedirs(dir_name)
|
||||||
|
img=cv2.imread(thisimg_path)
|
||||||
|
img=cv2.resize(img,(224,224))
|
||||||
|
cv2.imwrite(new_file_path,img)
|
||||||
|
print('Finish resizing'.format(folder=folder))
|
||||||
|
|
||||||
|
|
||||||
|
# In[32]:
|
||||||
|
|
||||||
|
|
||||||
|
DATA_DIR_224='./train_224/'
|
||||||
|
get_224(folder='./train/',dstdir=DATA_DIR_224)
|
||||||
|
|
||||||
|
|
||||||
|
# In[33]:
|
||||||
|
|
||||||
|
|
||||||
|
DATA_DIR2_224='./test_224/'
|
||||||
|
get_224(folder='./test/',dstdir=DATA_DIR2_224)
|
||||||
|
|
||||||
|
|
||||||
|
# ### Display samples for each category
|
||||||
|
|
||||||
|
# In[34]:
|
||||||
|
|
||||||
|
|
||||||
|
# Read the images for each category, the file name may vary (27.png, 83.png...)
|
||||||
|
img1 = Image.open('./train_224/0/27.png')
|
||||||
|
img2 = Image.open('./train_224/1/83.png')
|
||||||
|
img3 = Image.open('./train_224/2/27.png')
|
||||||
|
img4 = Image.open('./train_224/3/27.png')
|
||||||
|
img5 = Image.open('./train_224/4/27.png')
|
||||||
|
|
||||||
|
plt.figure(figsize=(10, 10))
|
||||||
|
plt.subplot(1,5,1)
|
||||||
|
plt.imshow(img1)
|
||||||
|
plt.title("Normal")
|
||||||
|
plt.subplot(1,5,2)
|
||||||
|
plt.imshow(img2)
|
||||||
|
plt.title("RPM Spoofing")
|
||||||
|
plt.subplot(1,5,3)
|
||||||
|
plt.imshow(img3)
|
||||||
|
plt.title("Gear Spoofing")
|
||||||
|
plt.subplot(1,5,4)
|
||||||
|
plt.imshow(img4)
|
||||||
|
plt.title("DoS Attack")
|
||||||
|
plt.subplot(1,5,5)
|
||||||
|
plt.imshow(img5)
|
||||||
|
plt.title("Fuzzy Attack")
|
||||||
|
plt.show() # display it
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,348 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# # A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles
|
||||||
|
# This is the code for the paper entitled "**A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles**" accepted in IEEE International Conference on Communications (IEEE ICC).
|
||||||
|
# Authors: Li Yang (lyang339@uwo.ca) and Abdallah Shami (Abdallah.Shami@uwo.ca)
|
||||||
|
# Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University
|
||||||
|
#
|
||||||
|
# **Notebook 1: Data pre-processing**
|
||||||
|
# Procedures:
|
||||||
|
# 1): Read the dataset
|
||||||
|
# 2): Transform the tabular data into images
|
||||||
|
# 3): Display the transformed images
|
||||||
|
# 4): Split the training and test set
|
||||||
|
|
||||||
|
# ## Import libraries
|
||||||
|
|
||||||
|
# In[14]:
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
import cv2
|
||||||
|
import math
|
||||||
|
import random
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import shutil
|
||||||
|
from sklearn.preprocessing import QuantileTransformer
|
||||||
|
from PIL import Image
|
||||||
|
import warnings
|
||||||
|
warnings.filterwarnings("ignore")
|
||||||
|
|
||||||
|
|
||||||
|
# ## Read the Car-Hacking/CAN-Intrusion dataset
|
||||||
|
# The complete Car-Hacking dataset is publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset
|
||||||
|
# In this repository, due to the file size limit of GitHub, we use the 5% subset.
|
||||||
|
|
||||||
|
# In[15]:
|
||||||
|
|
||||||
|
|
||||||
|
#Read dataset
|
||||||
|
df=pd.read_csv('data/Car_Hacking_5%.csv')
|
||||||
|
|
||||||
|
|
||||||
|
# In[16]:
|
||||||
|
|
||||||
|
|
||||||
|
df
|
||||||
|
|
||||||
|
|
||||||
|
# In[17]:
|
||||||
|
|
||||||
|
|
||||||
|
# The labels of the dataset. "R" indicates normal patterns, and there are four types of attack (DoS, fuzzy. gear spoofing, and RPM spoofing zttacks)
|
||||||
|
df.Label.value_counts()
|
||||||
|
|
||||||
|
|
||||||
|
# ## Data Transformation
|
||||||
|
# Convert tabular data to images
|
||||||
|
# Procedures:
|
||||||
|
# 1. Use quantile transform to transform the original data samples into the scale of [0,255], representing pixel values
|
||||||
|
# 2. Generate images for each category (Normal, DoS, Fuzzy, Gear, RPM), each image consists of 27 data samples with 9 features. Thus, the size of each image is 9*9*3, length 9, width 9, and 3 color channels (RGB).
|
||||||
|
|
||||||
|
# In[18]:
|
||||||
|
|
||||||
|
|
||||||
|
# Transform all features into the scale of [0,1]
|
||||||
|
numeric_features = df.dtypes[df.dtypes != 'object'].index
|
||||||
|
scaler = QuantileTransformer()
|
||||||
|
df[numeric_features] = scaler.fit_transform(df[numeric_features])
|
||||||
|
|
||||||
|
|
||||||
|
# In[19]:
|
||||||
|
|
||||||
|
|
||||||
|
# Multiply the feature values by 255 to transform them into the scale of [0,255]
|
||||||
|
df[numeric_features] = df[numeric_features].apply(
|
||||||
|
lambda x: (x*255))
|
||||||
|
|
||||||
|
|
||||||
|
# In[20]:
|
||||||
|
|
||||||
|
|
||||||
|
df.describe()
|
||||||
|
|
||||||
|
|
||||||
|
# All features are in the same scale of [0,255]
|
||||||
|
|
||||||
|
# ### Generate images for each class
|
||||||
|
|
||||||
|
# In[21]:
|
||||||
|
|
||||||
|
|
||||||
|
df0=df[df['Label']=='R'].drop(['Label'],axis=1)
|
||||||
|
df1=df[df['Label']=='RPM'].drop(['Label'],axis=1)
|
||||||
|
df2=df[df['Label']=='gear'].drop(['Label'],axis=1)
|
||||||
|
df3=df[df['Label']=='DoS'].drop(['Label'],axis=1)
|
||||||
|
df4=df[df['Label']=='Fuzzy'].drop(['Label'],axis=1)
|
||||||
|
|
||||||
|
|
||||||
|
# In[22]:
|
||||||
|
|
||||||
|
|
||||||
|
# Generate 9*9 color images for class 0 (Normal)
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
image_path = "train/0/"
|
||||||
|
os.makedirs(image_path)
|
||||||
|
|
||||||
|
for i in range(0, len(df1)):
|
||||||
|
count=count+1
|
||||||
|
if count<=27:
|
||||||
|
im=df1.iloc[i].values
|
||||||
|
ims=np.append(ims,im)
|
||||||
|
else:
|
||||||
|
ims=np.array(ims).reshape(9,9,3)
|
||||||
|
array = np.array(ims, dtype=np.uint8)
|
||||||
|
new_image = Image.fromarray(array)
|
||||||
|
new_image.save(image_path+str(i)+'.png')
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
|
||||||
|
# In[23]:
|
||||||
|
|
||||||
|
|
||||||
|
# Generate 9*9 color images for class 1 (RPM spoofing)
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
image_path = "train/1/"
|
||||||
|
os.makedirs(image_path)
|
||||||
|
|
||||||
|
for i in range(0, len(df1)):
|
||||||
|
count=count+1
|
||||||
|
if count<=27:
|
||||||
|
im=df1.iloc[i].values
|
||||||
|
ims=np.append(ims,im)
|
||||||
|
else:
|
||||||
|
ims=np.array(ims).reshape(9,9,3)
|
||||||
|
array = np.array(ims, dtype=np.uint8)
|
||||||
|
new_image = Image.fromarray(array)
|
||||||
|
new_image.save(image_path+str(i)+'.png')
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
|
||||||
|
# In[24]:
|
||||||
|
|
||||||
|
|
||||||
|
# Generate 9*9 color images for class 2 (Gear spoofing)
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
image_path = "train/2/"
|
||||||
|
os.makedirs(image_path)
|
||||||
|
|
||||||
|
for i in range(0, len(df2)):
|
||||||
|
count=count+1
|
||||||
|
if count<=27:
|
||||||
|
im=df2.iloc[i].values
|
||||||
|
ims=np.append(ims,im)
|
||||||
|
else:
|
||||||
|
ims
|
||||||
|
ims=np.array(ims).reshape(9,9,3)
|
||||||
|
ims
|
||||||
|
array = np.array(ims, dtype=np.uint8)
|
||||||
|
new_image = Image.fromarray(array)
|
||||||
|
new_image.save(image_path+str(i)+'.png')
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
|
||||||
|
# In[25]:
|
||||||
|
|
||||||
|
|
||||||
|
# Generate 9*9 color images for class 3 (DoS attack)
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
image_path = "train/3/"
|
||||||
|
os.makedirs(image_path)
|
||||||
|
|
||||||
|
|
||||||
|
for i in range(0, len(df3)):
|
||||||
|
count=count+1
|
||||||
|
if count<=27:
|
||||||
|
im=df3.iloc[i].values
|
||||||
|
ims=np.append(ims,im)
|
||||||
|
else:
|
||||||
|
ims=np.array(ims).reshape(9,9,3)
|
||||||
|
array = np.array(ims, dtype=np.uint8)
|
||||||
|
new_image = Image.fromarray(array)
|
||||||
|
new_image.save(image_path+str(i)+'.png')
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
|
||||||
|
# In[26]:
|
||||||
|
|
||||||
|
|
||||||
|
# Generate 9*9 color images for class 4 (Fuzzy attack)
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
image_path = "train/4/"
|
||||||
|
os.makedirs(image_path)
|
||||||
|
|
||||||
|
|
||||||
|
for i in range(0, len(df4)):
|
||||||
|
count=count+1
|
||||||
|
if count<=27:
|
||||||
|
im=df4.iloc[i].values
|
||||||
|
ims=np.append(ims,im)
|
||||||
|
else:
|
||||||
|
ims=np.array(ims).reshape(9,9,3)
|
||||||
|
array = np.array(ims, dtype=np.uint8)
|
||||||
|
new_image = Image.fromarray(array)
|
||||||
|
new_image.save(image_path+str(i)+'.png')
|
||||||
|
count=0
|
||||||
|
ims = []
|
||||||
|
|
||||||
|
|
||||||
|
# ## Split the training and test set
|
||||||
|
|
||||||
|
# In[27]:
|
||||||
|
|
||||||
|
|
||||||
|
# Create folders to store images
|
||||||
|
Train_Dir='./train/'
|
||||||
|
Val_Dir='./test/'
|
||||||
|
allimgs=[]
|
||||||
|
for subdir in os.listdir(Train_Dir):
|
||||||
|
for filename in os.listdir(os.path.join(Train_Dir,subdir)):
|
||||||
|
filepath=os.path.join(Train_Dir,subdir,filename)
|
||||||
|
allimgs.append(filepath)
|
||||||
|
print(len(allimgs)) # Print the total number of images
|
||||||
|
|
||||||
|
|
||||||
|
# In[28]:
|
||||||
|
|
||||||
|
|
||||||
|
#split a test set from the dataset, train/test size = 80%/20%
|
||||||
|
Numbers=len(allimgs)//5 #size of test set (20%)
|
||||||
|
|
||||||
|
def mymovefile(srcfile,dstfile):
|
||||||
|
if not os.path.isfile(srcfile):
|
||||||
|
print ("%s not exist!"%(srcfile))
|
||||||
|
else:
|
||||||
|
fpath,fname=os.path.split(dstfile)
|
||||||
|
if not os.path.exists(fpath):
|
||||||
|
os.makedirs(fpath)
|
||||||
|
shutil.move(srcfile,dstfile)
|
||||||
|
#print ("move %s -> %s"%(srcfile,dstfile))
|
||||||
|
|
||||||
|
|
||||||
|
# In[29]:
|
||||||
|
|
||||||
|
|
||||||
|
# The size of test set
|
||||||
|
Numbers
|
||||||
|
|
||||||
|
|
||||||
|
# In[30]:
|
||||||
|
|
||||||
|
|
||||||
|
# Create the test set
|
||||||
|
val_imgs=random.sample(allimgs,Numbers)
|
||||||
|
for img in val_imgs:
|
||||||
|
dest_path=img.replace(Train_Dir,Val_Dir)
|
||||||
|
mymovefile(img,dest_path)
|
||||||
|
print('Finish creating test set')
|
||||||
|
|
||||||
|
|
||||||
|
# In[31]:
|
||||||
|
|
||||||
|
|
||||||
|
#resize the images 224*224 for better CNN training
|
||||||
|
def get_224(folder,dstdir):
|
||||||
|
imgfilepaths=[]
|
||||||
|
for root,dirs,imgs in os.walk(folder):
|
||||||
|
for thisimg in imgs:
|
||||||
|
thisimg_path=os.path.join(root,thisimg)
|
||||||
|
imgfilepaths.append(thisimg_path)
|
||||||
|
for thisimg_path in imgfilepaths:
|
||||||
|
dir_name,filename=os.path.split(thisimg_path)
|
||||||
|
dir_name=dir_name.replace(folder,dstdir)
|
||||||
|
new_file_path=os.path.join(dir_name,filename)
|
||||||
|
if not os.path.exists(dir_name):
|
||||||
|
os.makedirs(dir_name)
|
||||||
|
img=cv2.imread(thisimg_path)
|
||||||
|
img=cv2.resize(img,(224,224))
|
||||||
|
cv2.imwrite(new_file_path,img)
|
||||||
|
print('Finish resizing'.format(folder=folder))
|
||||||
|
|
||||||
|
|
||||||
|
# In[32]:
|
||||||
|
|
||||||
|
|
||||||
|
DATA_DIR_224='./train_224/'
|
||||||
|
get_224(folder='./train/',dstdir=DATA_DIR_224)
|
||||||
|
|
||||||
|
|
||||||
|
# In[33]:
|
||||||
|
|
||||||
|
|
||||||
|
DATA_DIR2_224='./test_224/'
|
||||||
|
get_224(folder='./test/',dstdir=DATA_DIR2_224)
|
||||||
|
|
||||||
|
|
||||||
|
# ### Display samples for each category
|
||||||
|
|
||||||
|
# In[34]:
|
||||||
|
|
||||||
|
|
||||||
|
# Read the images for each category, the file name may vary (27.png, 83.png...)
|
||||||
|
img1 = Image.open('./train_224/0/27.png')
|
||||||
|
img2 = Image.open('./train_224/1/83.png')
|
||||||
|
img3 = Image.open('./train_224/2/27.png')
|
||||||
|
img4 = Image.open('./train_224/3/27.png')
|
||||||
|
img5 = Image.open('./train_224/4/27.png')
|
||||||
|
|
||||||
|
plt.figure(figsize=(10, 10))
|
||||||
|
plt.subplot(1,5,1)
|
||||||
|
plt.imshow(img1)
|
||||||
|
plt.title("Normal")
|
||||||
|
plt.subplot(1,5,2)
|
||||||
|
plt.imshow(img2)
|
||||||
|
plt.title("RPM Spoofing")
|
||||||
|
plt.subplot(1,5,3)
|
||||||
|
plt.imshow(img3)
|
||||||
|
plt.title("Gear Spoofing")
|
||||||
|
plt.subplot(1,5,4)
|
||||||
|
plt.imshow(img4)
|
||||||
|
plt.title("DoS Attack")
|
||||||
|
plt.subplot(1,5,5)
|
||||||
|
plt.imshow(img5)
|
||||||
|
plt.title("Fuzzy Attack")
|
||||||
|
plt.show() # display it
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Binary file not shown.
After Width: | Height: | Size: 40 KiB |
@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2022 Western OC2 Lab
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
Binary file not shown.
@ -0,0 +1,81 @@
|
|||||||
|
# Intrusion-Detection-System-Using-CNN-and-Transfer-Learning
|
||||||
|
|
||||||
|
This is the code for the paper entitled "**[A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles](https://arxiv.org/pdf/2201.11812.pdf)**" published in **IEEE International Conference on Communications (IEEE ICC)**, doi: [10.1109/ICC45855.2022.9838780](https://ieeexplore.ieee.org/document/9838780).
|
||||||
|
- Authors: Li Yang and Abdallah Shami
|
||||||
|
- Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University
|
||||||
|
|
||||||
|
This repository introduces how to use **convolutional neural networks (CNNs)** and **transfer learning** techniques to develop **intrusion detection systems**. **Ensemble learning** and **hyperparameter optimization techniques** are also used to achieve optimized model performance.
|
||||||
|
|
||||||
|
- Another **intrusion detection system development code** using **decision tree-based machine learning algorithms (Decision tree, random forest, XGBoost, stacking, etc.)** can be found in: [Intrusion-Detection-System-Using-Machine-Learning](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-Machine-Learning)
|
||||||
|
|
||||||
|
- A comprehensive **hyperparameter optimization** tutorial code can be found in: [Hyperparameter-Optimization-of-Machine-Learning-Algorithms](https://github.com/LiYangHart/Hyperparameter-Optimization-of-Machine-Learning-Algorithms)
|
||||||
|
|
||||||
|
## Abstract of The Paper
|
||||||
|
Modern vehicles, including autonomous vehicles and connected vehicles, are increasingly connected to the external world, which enables various functionalities and services. However, the improving connectivity also increases the attack surfaces of the Internet of Vehicles (IoV), causing its vulnerabilities to cyber-threats. Due to the lack of authentication and encryption procedures in vehicular networks, Intrusion Detection Systems (IDSs) are essential approaches to protect modern vehicle systems from network attacks. In this paper, a transfer learning and ensemble learning-based IDS is proposed for IoV systems using convolutional neural networks (CNNs) and hyper-parameter optimization techniques. In the experiments, the proposed IDS has demonstrated over 99.25% detection rates and F1-scores on two well-known public benchmark IoV security datasets: the Car-Hacking dataset and the CICIDS2017 dataset. This shows the effectiveness of the proposed IDS for cyber-attack detection in both intra-vehicle and external vehicular networks.
|
||||||
|
|
||||||
|
<p float="left">
|
||||||
|
<img src="https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/framework.png" width="500" />
|
||||||
|
<img src="https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/CAN.png" width="400" />
|
||||||
|
</p>
|
||||||
|
|
||||||
|
## Implementation
|
||||||
|
### CNN Models
|
||||||
|
* VGG16
|
||||||
|
* VGG19
|
||||||
|
* Xception
|
||||||
|
* Inception
|
||||||
|
* Resnet
|
||||||
|
* InceptionResnet
|
||||||
|
|
||||||
|
### Ensemble Learning Models
|
||||||
|
* Bagging
|
||||||
|
* Probability Averaging
|
||||||
|
* Concatenation
|
||||||
|
|
||||||
|
### Hyperparameter Optimization Methods
|
||||||
|
* Random Search (RS)
|
||||||
|
* Bayesian Optimization - Tree Parzen Estimator(BO-TPE)
|
||||||
|
|
||||||
|
### Dataset
|
||||||
|
1. CAN-intrusion/Car-Hacking dataset, a benchmark network security dataset for intra-vehicle intrusion detection
|
||||||
|
* Publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset
|
||||||
|
* Can be processed using the same code
|
||||||
|
|
||||||
|
2. CICIDS2017 dataset, a popular network traffic dataset for intrusion detection problems
|
||||||
|
* Publicly available at: https://www.unb.ca/cic/datasets/ids-2017.html
|
||||||
|
|
||||||
|
For the purpose of displaying the experimental results in Jupyter Notebook, the sampled subset of the CAN-intrusion dataset is used in the sample code. The subsets are in the "[data](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/tree/main/data)" folder.
|
||||||
|
|
||||||
|
### Code
|
||||||
|
* [1-Data_pre-processing_CAN.ipynb](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/1-Data_pre-processing_CAN.ipynb): code for data pre-processing and transformation (tabular data to images).
|
||||||
|
* [2-CNN_Model_Development&Hyperparameter Optimization.ipynb](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/2-CNN_Model_Development%26Hyperparameter%20Optimization.ipynb): code for the development and CNN models and their hyperparameter optimization.
|
||||||
|
* [3-Ensemble_Models-CAN.ipynb](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/3-Ensemble_Models-CAN.ipynb): code for the construction of three ensemble learning techniques.
|
||||||
|
|
||||||
|
Libraries
|
||||||
|
* Python 3.5+
|
||||||
|
* [Keras 2.1.0+](hhttps://keras.io/)
|
||||||
|
* [Tensorflow 1.10.0+](https://www.tensorflow.org/install/gpu)
|
||||||
|
* [OpenCV-python](https://docs.opencv.org/4.x/d6/d00/tutorial_py_root.html)
|
||||||
|
* [hyperopt](https://github.com/hyperopt/hyperopt)
|
||||||
|
|
||||||
|
## Contact-Info
|
||||||
|
Please feel free to contact us for any questions or cooperation opportunities. We will be happy to help.
|
||||||
|
* Email: [liyanghart@gmail.com](mailto:liyanghart@gmail.com) or [Abdallah.Shami@uwo.ca](mailto:Abdallah.Shami@uwo.ca)
|
||||||
|
* GitHub: [LiYangHart](https://github.com/LiYangHart) and [Western OC2 Lab](https://github.com/Western-OC2-Lab/)
|
||||||
|
* LinkedIn: [Li Yang](https://www.linkedin.com/in/li-yang-phd-65a190176/)
|
||||||
|
* Google Scholar: [Li Yang](https://scholar.google.com.eg/citations?user=XEfM7bIAAAAJ&hl=en) and [OC2 Lab](https://scholar.google.com.eg/citations?user=oiebNboAAAAJ&hl=en)
|
||||||
|
|
||||||
|
## Citation
|
||||||
|
If you find this repository useful in your research, please cite this article as:
|
||||||
|
|
||||||
|
L. Yang and A. Shami, "A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles," ICC 2022 - IEEE International Conference on Communications, 2022, pp. 2774-2779, doi: 10.1109/ICC45855.2022.9838780.
|
||||||
|
|
||||||
|
```
|
||||||
|
@INPROCEEDINGS{9838780,
|
||||||
|
author={Yang, Li and Shami, Abdallah},
|
||||||
|
booktitle={ICC 2022 - IEEE International Conference on Communications},
|
||||||
|
title={A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles},
|
||||||
|
year={2022},
|
||||||
|
pages={2774-2779},
|
||||||
|
doi={10.1109/ICC45855.2022.9838780}}
|
||||||
|
```
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,3 @@
|
|||||||
|
# The sampled datasets used for the experiments in the sample code
|
||||||
|
|
||||||
|
**Car_Hacking_5%.csv**: The 5% randomly sampled subset of the [Car Hacking dataset](https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset)
|
Binary file not shown.
After Width: | Height: | Size: 80 KiB |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1 @@
|
|||||||
|
# The code in this folder shows an example of the pre-processing of the Car-Hacking dataset.
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1,29 @@
|
|||||||
|
BSD 3-Clause License
|
||||||
|
|
||||||
|
Copyright (c) 2020, Mahendra Data
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
3. Neither the name of the copyright holder nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -0,0 +1,2 @@
|
|||||||
|
# CICIDS2017-ML
|
||||||
|
The purpose of this repository is to demonstrate the steps of processing CICIDS2017 dataset using machine learning algorithms.
|
@ -0,0 +1,30 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def merge():
|
||||||
|
# 读取三份csv文件
|
||||||
|
df1 = pd.read_csv("../_dataset/TrafficLabelling_/Friday-WorkingHours-Morning.pcap_ISCX.csv")
|
||||||
|
df2 = pd.read_csv("../_dataset/TrafficLabelling_/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv")
|
||||||
|
df3 = pd.read_csv("../_dataset/TrafficLabelling_/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv")
|
||||||
|
# 将它们拼合成一个DataFrame
|
||||||
|
df = pd.concat([df1, df2, df3])
|
||||||
|
# 保存为新的csv文件
|
||||||
|
df.to_csv("../_dataset/TrafficLabelling_/Friday-WorkingHours-merged.csv", index=False)
|
||||||
|
|
||||||
|
|
||||||
|
def select():
|
||||||
|
df = pd.read_csv('../_dataset/TrafficLabelling_/Friday-WorkingHours-merged.csv')
|
||||||
|
df_ddos = df[df.iloc[:, -1] == 'DDoS']
|
||||||
|
df_ddos.to_csv('../_dataset/TrafficLabelling_/Friday-WorkingHours-DDoS.csv', index=False)
|
||||||
|
|
||||||
|
|
||||||
|
def search(query: str, row_name: str):
|
||||||
|
df = pd.read_csv('../_dataset/TrafficLabelling_/Friday-WorkingHours-merged.csv')
|
||||||
|
result = df[df[row_name].str.contains(query)]
|
||||||
|
print(result.head())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# merge()
|
||||||
|
# select()
|
||||||
|
search("172.16.0.1-192.168.10.50-49533-80-6", "Flow ID")
|
@ -1,2 +1,39 @@
|
|||||||
CSV_PATH = './_dataset/DDos3.csv'
|
import datetime
|
||||||
|
|
||||||
|
CSV_PATH = './_dataset/TrafficLabelling_/Friday-WorkingHours-merged.csv'
|
||||||
BYPASS_COLUMNS= ('Destination Port', 'Label')
|
BYPASS_COLUMNS= ('Destination Port', 'Label')
|
||||||
|
UNIQUE_COLUMNS = [' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags', ' CWE Flag Count',
|
||||||
|
'Fwd Avg Bytes/Bulk', ' Fwd Avg Packets/Bulk', ' Fwd Avg Bulk Rate',
|
||||||
|
' Bwd Avg Bytes/Bulk', ' Bwd Avg Packets/Bulk', 'Bwd Avg Bulk Rate', 'Label']
|
||||||
|
IMG_SAVE_PATH = f'./saves/{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
(['Flow ID', ' Source IP', ' Source Port', ' Destination IP',
|
||||||
|
' Destination Port', ' Protocol', ' Timestamp', ' Flow Duration',
|
||||||
|
' Total Fwd Packets', ' Total Backward Packets',
|
||||||
|
'Total Length of Fwd Packets', ' Total Length of Bwd Packets',
|
||||||
|
' Fwd Packet Length Max', ' Fwd Packet Length Min',
|
||||||
|
' Fwd Packet Length Mean', ' Fwd Packet Length Std',
|
||||||
|
'Bwd Packet Length Max', ' Bwd Packet Length Min',
|
||||||
|
' Bwd Packet Length Mean', ' Bwd Packet Length Std', 'Flow Bytes/s',
|
||||||
|
' Flow Packets/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max',
|
||||||
|
' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std',
|
||||||
|
' Fwd IAT Max', ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean',
|
||||||
|
' Bwd IAT Std', ' Bwd IAT Max', ' Bwd IAT Min', 'Fwd PSH Flags',
|
||||||
|
' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags',
|
||||||
|
' Fwd Header Length', ' Bwd Header Length', 'Fwd Packets/s',
|
||||||
|
' Bwd Packets/s', ' Min Packet Length', ' Max Packet Length',
|
||||||
|
' Packet Length Mean', ' Packet Length Std', ' Packet Length Variance',
|
||||||
|
'FIN Flag Count', ' SYN Flag Count', ' RST Flag Count',
|
||||||
|
' PSH Flag Count', ' ACK Flag Count', ' URG Flag Count',
|
||||||
|
' CWE Flag Count', ' ECE Flag Count', ' Down/Up Ratio',
|
||||||
|
' Average Packet Size', ' Avg Fwd Segment Size',
|
||||||
|
' Avg Bwd Segment Size', ' Fwd Header Length.1', 'Fwd Avg Bytes/Bulk',
|
||||||
|
' Fwd Avg Packets/Bulk', ' Fwd Avg Bulk Rate', ' Bwd Avg Bytes/Bulk',
|
||||||
|
' Bwd Avg Packets/Bulk', 'Bwd Avg Bulk Rate', 'Subflow Fwd Packets',
|
||||||
|
' Subflow Fwd Bytes', ' Subflow Bwd Packets', ' Subflow Bwd Bytes',
|
||||||
|
'Init_Win_bytes_forward', ' Init_Win_bytes_backward',
|
||||||
|
' act_data_pkt_fwd', ' min_seg_size_forward', 'Active Mean',
|
||||||
|
' Active Std', ' Active Max', ' Active Min', 'Idle Mean', ' Idle Std',
|
||||||
|
' Idle Max', ' Idle Min', ' Label'])
|
@ -1,267 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
"""
|
|
||||||
Read traffic_csv
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import argparse
|
|
||||||
import csv
|
|
||||||
import glob
|
|
||||||
import re
|
|
||||||
|
|
||||||
FLAGS = None
|
|
||||||
INPUT = "../raw_csvs/classes/browsing/reg/CICNTTor_browsing.raw.csv"#"../dataset/iscxNTVPN2016/CompletePCAPs" # ""
|
|
||||||
INPUT_DIR = "../raw_csvs/classes/chat/vpn/"
|
|
||||||
CLASSES_DIR = "../raw_csvs/classes/**/**/"
|
|
||||||
|
|
||||||
# LABEL_IND = 1
|
|
||||||
TPS = 60 # TimePerSession in secs
|
|
||||||
DELTA_T = 60 # Delta T between splitted sessions
|
|
||||||
MIN_TPS = 50
|
|
||||||
|
|
||||||
# def insert_dataset(dataset, labels, session, label_ind=LABEL_IND):
|
|
||||||
# dataset.append(session)
|
|
||||||
# labels.append(label_ind)
|
|
||||||
|
|
||||||
# def export_dataset(dataset, labels):
|
|
||||||
# print "Start export dataset"
|
|
||||||
# np.savez(INPUT.split(".")[0] + ".npz", X=dataset, Y=labels)
|
|
||||||
# print dataset.shape, labels.shape
|
|
||||||
|
|
||||||
#
|
|
||||||
# def import_dataset():
|
|
||||||
# print "Import dataset"
|
|
||||||
# dataset = np.load(INPUT.split(".")[0] + ".npz")
|
|
||||||
# print dataset["X"].shape, dataset["Y"].shape
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
MTU = 1500
|
|
||||||
|
|
||||||
def session_spectogram(ts, sizes, name=None):
|
|
||||||
plt.scatter(ts, sizes, marker='.')
|
|
||||||
plt.ylim(0, MTU)
|
|
||||||
plt.xlim(ts[0], ts[-1])
|
|
||||||
# plt.yticks(np.arange(0, MTU, 10))
|
|
||||||
# plt.xticks(np.arange(int(ts[0]), int(ts[-1]), 10))
|
|
||||||
plt.title(name + " Session Spectogram")
|
|
||||||
plt.ylabel('Size [B]')
|
|
||||||
plt.xlabel('Time [sec]')
|
|
||||||
|
|
||||||
plt.grid(True)
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
|
|
||||||
def session_atricle_spectogram(ts, sizes, fpath=None, show=True, tps=None):
|
|
||||||
if tps is None:
|
|
||||||
max_delta_time = ts[-1] - ts[0]
|
|
||||||
else:
|
|
||||||
max_delta_time = tps
|
|
||||||
|
|
||||||
ts_norm = ((np.array(ts) - ts[0]) / max_delta_time) * MTU
|
|
||||||
plt.figure()
|
|
||||||
plt.scatter(ts_norm, sizes, marker=',', c='k', s=5)
|
|
||||||
plt.ylim(0, MTU)
|
|
||||||
plt.xlim(0, MTU)
|
|
||||||
plt.ylabel('Packet Size [B]')
|
|
||||||
plt.xlabel('Normalized Arrival Time')
|
|
||||||
plt.set_cmap('binary')
|
|
||||||
plt.axes().set_aspect('equal')
|
|
||||||
plt.grid(False)
|
|
||||||
if fpath is not None:
|
|
||||||
# plt.savefig(OUTPUT_DIR + fname, bbox_inches='tight', pad_inches=1)
|
|
||||||
plt.savefig(fpath, bbox_inches='tight')
|
|
||||||
if show:
|
|
||||||
plt.show()
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
|
|
||||||
def session_histogram(sizes, plot=False):
|
|
||||||
hist, bin_edges = np.histogram(sizes, bins=range(0, MTU + 1, 1))
|
|
||||||
if plot:
|
|
||||||
plt.bar(bin_edges[:-1], hist, width=1)
|
|
||||||
plt.xlim(min(bin_edges), max(bin_edges)+100)
|
|
||||||
plt.show()
|
|
||||||
return hist.astype(np.uint16)
|
|
||||||
|
|
||||||
|
|
||||||
def session_2d_histogram(ts, sizes, plot=False, tps=None):
|
|
||||||
if tps is None:
|
|
||||||
max_delta_time = ts[-1] - ts[0]
|
|
||||||
else:
|
|
||||||
max_delta_time = tps
|
|
||||||
|
|
||||||
# ts_norm = map(int, ((np.array(ts) - ts[0]) / max_delta_time) * MTU)
|
|
||||||
ts_norm = ((np.array(ts) - ts[0]) / max_delta_time) * MTU
|
|
||||||
H, xedges, yedges = np.histogram2d(sizes, ts_norm, bins=(range(0, MTU + 1, 1), range(0, MTU + 1, 1)))
|
|
||||||
|
|
||||||
if plot:
|
|
||||||
plt.pcolormesh(xedges, yedges, H)
|
|
||||||
plt.colorbar()
|
|
||||||
plt.xlim(0, MTU)
|
|
||||||
plt.ylim(0, MTU)
|
|
||||||
plt.set_cmap('binary')
|
|
||||||
plt.show()
|
|
||||||
return H.astype(np.uint16)
|
|
||||||
|
|
||||||
|
|
||||||
def export_dataset(dataset):
|
|
||||||
print("Start export dataset")
|
|
||||||
np.save(os.path.splitext(INPUT)[0], dataset)
|
|
||||||
print(dataset.shape)
|
|
||||||
|
|
||||||
|
|
||||||
def export_class_dataset(dataset, class_dir):
|
|
||||||
print("Start export dataset")
|
|
||||||
np.save(class_dir + "/" + "_".join(re.findall(r"[\w']+", class_dir)[-2:]), dataset)
|
|
||||||
print(dataset.shape)
|
|
||||||
|
|
||||||
|
|
||||||
def import_dataset():
|
|
||||||
print("Import dataset")
|
|
||||||
dataset = np.load(os.path.splitext(INPUT)[0] + ".npy")
|
|
||||||
print(dataset.shape)
|
|
||||||
return dataset
|
|
||||||
|
|
||||||
|
|
||||||
def traffic_csv_converter(file_path):
|
|
||||||
print("Running on " + file_path)
|
|
||||||
dataset = []
|
|
||||||
# labels = []
|
|
||||||
counter = 0
|
|
||||||
with open(file_path, 'r') as csv_file:
|
|
||||||
reader = csv.reader(csv_file)
|
|
||||||
for i, row in enumerate(reader):
|
|
||||||
# print row[0], row[7]
|
|
||||||
session_tuple_key = tuple(row[:8])
|
|
||||||
length = int(row[7])
|
|
||||||
ts = np.array(row[8:8+length], dtype=float)
|
|
||||||
sizes = np.array(row[9+length:], dtype=int)
|
|
||||||
|
|
||||||
# if (sizes > MTU).any():
|
|
||||||
# a = [(sizes[i], i) for i in range(len(sizes)) if (np.array(sizes) > MTU)[i]]
|
|
||||||
# print len(a), session_tuple_key
|
|
||||||
|
|
||||||
if length > 10:
|
|
||||||
# print ts[0], ts[-1]
|
|
||||||
# h = session_2d_histogram(ts, sizes)
|
|
||||||
# session_spectogram(ts, sizes, session_tuple_key[0])
|
|
||||||
# dataset.append([h])
|
|
||||||
# counter += 1
|
|
||||||
# if counter % 100 == 0:
|
|
||||||
# print counter
|
|
||||||
|
|
||||||
for t in range(int(ts[-1]/DELTA_T - TPS/DELTA_T) + 1):
|
|
||||||
mask = ((ts >= t * DELTA_T) & (ts <= (t * DELTA_T + TPS)))
|
|
||||||
# print t * DELTA_T, t * DELTA_T + TPS, ts[-1]
|
|
||||||
ts_mask = ts[mask]
|
|
||||||
sizes_mask = sizes[mask]
|
|
||||||
if len(ts_mask) > 10 and ts_mask[-1] - ts_mask[0] > MIN_TPS:
|
|
||||||
# if "facebook" in session_tuple_key[0]:
|
|
||||||
# session_spectogram(ts[mask], sizes[mask], session_tuple_key[0])
|
|
||||||
# # session_2d_histogram(ts[mask], sizes[mask], True)
|
|
||||||
# session_histogram(sizes[mask], True)
|
|
||||||
# exit()
|
|
||||||
# else:
|
|
||||||
# continue
|
|
||||||
|
|
||||||
h = session_2d_histogram(ts_mask, sizes_mask)
|
|
||||||
# session_spectogram(ts_mask, sizes_mask, session_tuple_key[0])
|
|
||||||
dataset.append([h])
|
|
||||||
counter += 1
|
|
||||||
if counter % 100 == 0:
|
|
||||||
print(counter)
|
|
||||||
|
|
||||||
return np.asarray(dataset) #, np.asarray(labels)
|
|
||||||
|
|
||||||
|
|
||||||
def traffic_csv_converter_splitted(file_path):
|
|
||||||
def split_converter(ts, sizes, dataset, counter):
|
|
||||||
if ts[-1] - ts[0] > MIN_TPS and len(ts) > 20:
|
|
||||||
# print ts[0], ts[-1]
|
|
||||||
h = session_2d_histogram(ts-ts[0], sizes)
|
|
||||||
# session_spectogram(ts, sizes, session_tuple_key[0])
|
|
||||||
dataset.append([h])
|
|
||||||
counter += 1
|
|
||||||
# if counter % 100 == 0:
|
|
||||||
# print counter
|
|
||||||
|
|
||||||
total_time = ts[-1] - ts[0]
|
|
||||||
if total_time > TPS:
|
|
||||||
for ts_split, sizes_split in zip(np.split(ts, [len(ts)/2]), np.split(sizes, [len(sizes)/2])):
|
|
||||||
split_converter(ts_split, sizes_split, dataset, counter)
|
|
||||||
|
|
||||||
print("Running on " + file_path)
|
|
||||||
dataset = []
|
|
||||||
# labels = []
|
|
||||||
counter = 0
|
|
||||||
with open(file_path, 'r') as csv_file:
|
|
||||||
reader = csv.reader(csv_file)
|
|
||||||
for i, row in enumerate(reader):
|
|
||||||
# print row[0], row[7]
|
|
||||||
session_tuple_key = tuple(row[:8])
|
|
||||||
length = int(row[7])
|
|
||||||
ts = np.array(row[8:8+length], dtype=float)
|
|
||||||
sizes = np.array(row[9+length:], dtype=int)
|
|
||||||
|
|
||||||
# if (sizes > MTU).any():
|
|
||||||
# a = [(sizes[i], i) for i in range(len(sizes)) if (np.array(sizes) > MTU)[i]]
|
|
||||||
# print len(a), session_tuple_key
|
|
||||||
|
|
||||||
if length > 10:
|
|
||||||
split_converter(ts, sizes, dataset, counter)
|
|
||||||
|
|
||||||
return np.asarray(dataset)
|
|
||||||
|
|
||||||
|
|
||||||
def traffic_class_converter(dir_path):
|
|
||||||
dataset_tuple = ()
|
|
||||||
for file_path in [os.path.join(dir_path, fn) for fn in next(os.walk(dir_path))[2] if (".csv" in os.path.splitext(fn)[-1])]:
|
|
||||||
dataset_tuple += (traffic_csv_converter(file_path),) ################
|
|
||||||
|
|
||||||
return np.concatenate(dataset_tuple, axis=0)
|
|
||||||
|
|
||||||
|
|
||||||
def iterate_all_classes():
|
|
||||||
for class_dir in glob.glob(CLASSES_DIR):
|
|
||||||
if "other" not in class_dir: #"browsing" not in class_dir and
|
|
||||||
print("working on " + class_dir)
|
|
||||||
dataset = traffic_class_converter(class_dir)
|
|
||||||
print(dataset.shape)
|
|
||||||
export_class_dataset(dataset, class_dir)
|
|
||||||
|
|
||||||
|
|
||||||
def random_sampling_dataset(input_array, size=2000):
|
|
||||||
print("Import dataset " + input_array)
|
|
||||||
dataset = np.load(input_array)
|
|
||||||
print(dataset.shape)
|
|
||||||
p = size*1.0/len(dataset)
|
|
||||||
print(p)
|
|
||||||
if p >= 1:
|
|
||||||
raise Exception
|
|
||||||
|
|
||||||
mask = np.random.choice([True, False], len(dataset), p=[p, 1-p])
|
|
||||||
dataset = dataset[mask]
|
|
||||||
print("Start export dataset")
|
|
||||||
|
|
||||||
np.save(os.path.splitext(input_array)[0] + "_samp", dataset)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument('--input', type=str, default=INPUT, help='Path to csv file')
|
|
||||||
|
|
||||||
FLAGS = parser.parse_args()
|
|
||||||
##
|
|
||||||
# iterate_all_classes()
|
|
||||||
|
|
||||||
# dataset = traffic_class_converter(INPUT_DIR)
|
|
||||||
# dataset = traffic_csv_converter(INPUT)
|
|
||||||
|
|
||||||
input_array = "./_dataset/FlowPic/browsing_reg.npy"
|
|
||||||
random_sampling_dataset(input_array)
|
|
||||||
|
|
||||||
|
|
||||||
# export_class_dataset(dataset)
|
|
||||||
# import_dataset()
|
|
@ -0,0 +1,40 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def drop_unique_columns(df: pd.DataFrame):
|
||||||
|
nunique = df.nunique() # 计算每一列的唯一值的数量
|
||||||
|
cols_to_drop = nunique[nunique == 1].index # 找到只有一个唯一值的列的索引
|
||||||
|
df.drop(cols_to_drop, axis=1, inplace=True) # 删除这些列
|
||||||
|
print(cols_to_drop) # 输出删除的列的列名
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def select_label_rows(df: pd.DataFrame, label: str):
|
||||||
|
return df[df.iloc[:, -1] == label]
|
||||||
|
|
||||||
|
|
||||||
|
def drop_columns_with_fix_up(df: pd.DataFrame, columns: list):
|
||||||
|
columns = [w.lstrip() for w in columns]
|
||||||
|
df = drop_columns(df, columns)
|
||||||
|
columns = [" " + w for w in columns]
|
||||||
|
df = drop_columns(df, columns)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def drop_columns(df: pd.DataFrame, columns: list):
|
||||||
|
columns = [w.lstrip() for w in columns]
|
||||||
|
for column_name in columns:
|
||||||
|
cols_to_drop = df.filter(regex=column_name).columns
|
||||||
|
df.drop(cols_to_drop, axis=1, inplace=True)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def get_ddos_df(df: pd.DataFrame):
|
||||||
|
return df[df.iloc[:, -1] == 'DDoS']
|
||||||
|
|
||||||
|
|
||||||
|
def is_in_bypass_list(column_name: str, bypass_list: tuple) -> bool:
|
||||||
|
for bypass in bypass_list:
|
||||||
|
if bypass in column_name:
|
||||||
|
return True
|
||||||
|
return False
|
@ -0,0 +1,29 @@
|
|||||||
|
from scapy.all import *
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
|
||||||
|
def split_pcap(file_path: str, chunk_size: int, save_base_path: str = None):
|
||||||
|
packets = PcapReader(file_path)
|
||||||
|
chunk = []
|
||||||
|
counter = 1
|
||||||
|
for packet in packets:
|
||||||
|
# logger.info(packet.time)
|
||||||
|
chunk.append(packet)
|
||||||
|
if len(chunk) == chunk_size:
|
||||||
|
wrpcap(f'{save_base_path}/chunk_{counter}.pcap', chunk)
|
||||||
|
chunk = []
|
||||||
|
logger.info(f'chunk_{counter}.pcap saved')
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
if chunk:
|
||||||
|
wrpcap(f'{save_base_path}/chunk_{counter}.pcap', chunk)
|
||||||
|
|
||||||
|
|
||||||
|
def get_packet_time(pkt: Packet):
|
||||||
|
return pkt.time
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from utils.files import create_dir
|
||||||
|
create_dir('../_dataset/pcap/Friday-WorkingHours')
|
||||||
|
split_pcap('../_dataset/pcap/Friday-WorkingHours.pcap', 10000, '../_dataset/pcap/Friday-WorkingHours')
|
Loading…
Reference in New Issue