bump:stable v0.2
parent
847780028c
commit
984d403510
@ -1,267 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
"""
|
|
||||||
Read traffic_csv
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import argparse
|
|
||||||
import csv
|
|
||||||
import glob
|
|
||||||
import re
|
|
||||||
|
|
||||||
FLAGS = None
|
|
||||||
INPUT = "../raw_csvs/classes/browsing/reg/CICNTTor_browsing.raw.csv"#"../dataset/iscxNTVPN2016/CompletePCAPs" # ""
|
|
||||||
INPUT_DIR = "../raw_csvs/classes/chat/vpn/"
|
|
||||||
CLASSES_DIR = "../raw_csvs/classes/**/**/"
|
|
||||||
|
|
||||||
# LABEL_IND = 1
|
|
||||||
TPS = 60 # TimePerSession in secs
|
|
||||||
DELTA_T = 60 # Delta T between splitted sessions
|
|
||||||
MIN_TPS = 50
|
|
||||||
|
|
||||||
# def insert_dataset(dataset, labels, session, label_ind=LABEL_IND):
|
|
||||||
# dataset.append(session)
|
|
||||||
# labels.append(label_ind)
|
|
||||||
|
|
||||||
# def export_dataset(dataset, labels):
|
|
||||||
# print "Start export dataset"
|
|
||||||
# np.savez(INPUT.split(".")[0] + ".npz", X=dataset, Y=labels)
|
|
||||||
# print dataset.shape, labels.shape
|
|
||||||
|
|
||||||
#
|
|
||||||
# def import_dataset():
|
|
||||||
# print "Import dataset"
|
|
||||||
# dataset = np.load(INPUT.split(".")[0] + ".npz")
|
|
||||||
# print dataset["X"].shape, dataset["Y"].shape
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
MTU = 1500
|
|
||||||
|
|
||||||
def session_spectogram(ts, sizes, name=None):
|
|
||||||
plt.scatter(ts, sizes, marker='.')
|
|
||||||
plt.ylim(0, MTU)
|
|
||||||
plt.xlim(ts[0], ts[-1])
|
|
||||||
# plt.yticks(np.arange(0, MTU, 10))
|
|
||||||
# plt.xticks(np.arange(int(ts[0]), int(ts[-1]), 10))
|
|
||||||
plt.title(name + " Session Spectogram")
|
|
||||||
plt.ylabel('Size [B]')
|
|
||||||
plt.xlabel('Time [sec]')
|
|
||||||
|
|
||||||
plt.grid(True)
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
|
|
||||||
def session_atricle_spectogram(ts, sizes, fpath=None, show=True, tps=None):
|
|
||||||
if tps is None:
|
|
||||||
max_delta_time = ts[-1] - ts[0]
|
|
||||||
else:
|
|
||||||
max_delta_time = tps
|
|
||||||
|
|
||||||
ts_norm = ((np.array(ts) - ts[0]) / max_delta_time) * MTU
|
|
||||||
plt.figure()
|
|
||||||
plt.scatter(ts_norm, sizes, marker=',', c='k', s=5)
|
|
||||||
plt.ylim(0, MTU)
|
|
||||||
plt.xlim(0, MTU)
|
|
||||||
plt.ylabel('Packet Size [B]')
|
|
||||||
plt.xlabel('Normalized Arrival Time')
|
|
||||||
plt.set_cmap('binary')
|
|
||||||
plt.axes().set_aspect('equal')
|
|
||||||
plt.grid(False)
|
|
||||||
if fpath is not None:
|
|
||||||
# plt.savefig(OUTPUT_DIR + fname, bbox_inches='tight', pad_inches=1)
|
|
||||||
plt.savefig(fpath, bbox_inches='tight')
|
|
||||||
if show:
|
|
||||||
plt.show()
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
|
|
||||||
def session_histogram(sizes, plot=False):
|
|
||||||
hist, bin_edges = np.histogram(sizes, bins=range(0, MTU + 1, 1))
|
|
||||||
if plot:
|
|
||||||
plt.bar(bin_edges[:-1], hist, width=1)
|
|
||||||
plt.xlim(min(bin_edges), max(bin_edges)+100)
|
|
||||||
plt.show()
|
|
||||||
return hist.astype(np.uint16)
|
|
||||||
|
|
||||||
|
|
||||||
def session_2d_histogram(ts, sizes, plot=False, tps=None):
|
|
||||||
if tps is None:
|
|
||||||
max_delta_time = ts[-1] - ts[0]
|
|
||||||
else:
|
|
||||||
max_delta_time = tps
|
|
||||||
|
|
||||||
# ts_norm = map(int, ((np.array(ts) - ts[0]) / max_delta_time) * MTU)
|
|
||||||
ts_norm = ((np.array(ts) - ts[0]) / max_delta_time) * MTU
|
|
||||||
H, xedges, yedges = np.histogram2d(sizes, ts_norm, bins=(range(0, MTU + 1, 1), range(0, MTU + 1, 1)))
|
|
||||||
|
|
||||||
if plot:
|
|
||||||
plt.pcolormesh(xedges, yedges, H)
|
|
||||||
plt.colorbar()
|
|
||||||
plt.xlim(0, MTU)
|
|
||||||
plt.ylim(0, MTU)
|
|
||||||
plt.set_cmap('binary')
|
|
||||||
plt.show()
|
|
||||||
return H.astype(np.uint16)
|
|
||||||
|
|
||||||
|
|
||||||
def export_dataset(dataset):
|
|
||||||
print("Start export dataset")
|
|
||||||
np.save(os.path.splitext(INPUT)[0], dataset)
|
|
||||||
print(dataset.shape)
|
|
||||||
|
|
||||||
|
|
||||||
def export_class_dataset(dataset, class_dir):
|
|
||||||
print("Start export dataset")
|
|
||||||
np.save(class_dir + "/" + "_".join(re.findall(r"[\w']+", class_dir)[-2:]), dataset)
|
|
||||||
print(dataset.shape)
|
|
||||||
|
|
||||||
|
|
||||||
def import_dataset():
|
|
||||||
print("Import dataset")
|
|
||||||
dataset = np.load(os.path.splitext(INPUT)[0] + ".npy")
|
|
||||||
print(dataset.shape)
|
|
||||||
return dataset
|
|
||||||
|
|
||||||
|
|
||||||
def traffic_csv_converter(file_path):
|
|
||||||
print("Running on " + file_path)
|
|
||||||
dataset = []
|
|
||||||
# labels = []
|
|
||||||
counter = 0
|
|
||||||
with open(file_path, 'r') as csv_file:
|
|
||||||
reader = csv.reader(csv_file)
|
|
||||||
for i, row in enumerate(reader):
|
|
||||||
# print row[0], row[7]
|
|
||||||
session_tuple_key = tuple(row[:8])
|
|
||||||
length = int(row[7])
|
|
||||||
ts = np.array(row[8:8+length], dtype=float)
|
|
||||||
sizes = np.array(row[9+length:], dtype=int)
|
|
||||||
|
|
||||||
# if (sizes > MTU).any():
|
|
||||||
# a = [(sizes[i], i) for i in range(len(sizes)) if (np.array(sizes) > MTU)[i]]
|
|
||||||
# print len(a), session_tuple_key
|
|
||||||
|
|
||||||
if length > 10:
|
|
||||||
# print ts[0], ts[-1]
|
|
||||||
# h = session_2d_histogram(ts, sizes)
|
|
||||||
# session_spectogram(ts, sizes, session_tuple_key[0])
|
|
||||||
# dataset.append([h])
|
|
||||||
# counter += 1
|
|
||||||
# if counter % 100 == 0:
|
|
||||||
# print counter
|
|
||||||
|
|
||||||
for t in range(int(ts[-1]/DELTA_T - TPS/DELTA_T) + 1):
|
|
||||||
mask = ((ts >= t * DELTA_T) & (ts <= (t * DELTA_T + TPS)))
|
|
||||||
# print t * DELTA_T, t * DELTA_T + TPS, ts[-1]
|
|
||||||
ts_mask = ts[mask]
|
|
||||||
sizes_mask = sizes[mask]
|
|
||||||
if len(ts_mask) > 10 and ts_mask[-1] - ts_mask[0] > MIN_TPS:
|
|
||||||
# if "facebook" in session_tuple_key[0]:
|
|
||||||
# session_spectogram(ts[mask], sizes[mask], session_tuple_key[0])
|
|
||||||
# # session_2d_histogram(ts[mask], sizes[mask], True)
|
|
||||||
# session_histogram(sizes[mask], True)
|
|
||||||
# exit()
|
|
||||||
# else:
|
|
||||||
# continue
|
|
||||||
|
|
||||||
h = session_2d_histogram(ts_mask, sizes_mask)
|
|
||||||
# session_spectogram(ts_mask, sizes_mask, session_tuple_key[0])
|
|
||||||
dataset.append([h])
|
|
||||||
counter += 1
|
|
||||||
if counter % 100 == 0:
|
|
||||||
print(counter)
|
|
||||||
|
|
||||||
return np.asarray(dataset) #, np.asarray(labels)
|
|
||||||
|
|
||||||
|
|
||||||
def traffic_csv_converter_splitted(file_path):
|
|
||||||
def split_converter(ts, sizes, dataset, counter):
|
|
||||||
if ts[-1] - ts[0] > MIN_TPS and len(ts) > 20:
|
|
||||||
# print ts[0], ts[-1]
|
|
||||||
h = session_2d_histogram(ts-ts[0], sizes)
|
|
||||||
# session_spectogram(ts, sizes, session_tuple_key[0])
|
|
||||||
dataset.append([h])
|
|
||||||
counter += 1
|
|
||||||
# if counter % 100 == 0:
|
|
||||||
# print counter
|
|
||||||
|
|
||||||
total_time = ts[-1] - ts[0]
|
|
||||||
if total_time > TPS:
|
|
||||||
for ts_split, sizes_split in zip(np.split(ts, [len(ts)/2]), np.split(sizes, [len(sizes)/2])):
|
|
||||||
split_converter(ts_split, sizes_split, dataset, counter)
|
|
||||||
|
|
||||||
print("Running on " + file_path)
|
|
||||||
dataset = []
|
|
||||||
# labels = []
|
|
||||||
counter = 0
|
|
||||||
with open(file_path, 'r') as csv_file:
|
|
||||||
reader = csv.reader(csv_file)
|
|
||||||
for i, row in enumerate(reader):
|
|
||||||
# print row[0], row[7]
|
|
||||||
session_tuple_key = tuple(row[:8])
|
|
||||||
length = int(row[7])
|
|
||||||
ts = np.array(row[8:8+length], dtype=float)
|
|
||||||
sizes = np.array(row[9+length:], dtype=int)
|
|
||||||
|
|
||||||
# if (sizes > MTU).any():
|
|
||||||
# a = [(sizes[i], i) for i in range(len(sizes)) if (np.array(sizes) > MTU)[i]]
|
|
||||||
# print len(a), session_tuple_key
|
|
||||||
|
|
||||||
if length > 10:
|
|
||||||
split_converter(ts, sizes, dataset, counter)
|
|
||||||
|
|
||||||
return np.asarray(dataset)
|
|
||||||
|
|
||||||
|
|
||||||
def traffic_class_converter(dir_path):
|
|
||||||
dataset_tuple = ()
|
|
||||||
for file_path in [os.path.join(dir_path, fn) for fn in next(os.walk(dir_path))[2] if (".csv" in os.path.splitext(fn)[-1])]:
|
|
||||||
dataset_tuple += (traffic_csv_converter(file_path),) ################
|
|
||||||
|
|
||||||
return np.concatenate(dataset_tuple, axis=0)
|
|
||||||
|
|
||||||
|
|
||||||
def iterate_all_classes():
|
|
||||||
for class_dir in glob.glob(CLASSES_DIR):
|
|
||||||
if "other" not in class_dir: #"browsing" not in class_dir and
|
|
||||||
print("working on " + class_dir)
|
|
||||||
dataset = traffic_class_converter(class_dir)
|
|
||||||
print(dataset.shape)
|
|
||||||
export_class_dataset(dataset, class_dir)
|
|
||||||
|
|
||||||
|
|
||||||
def random_sampling_dataset(input_array, size=2000):
|
|
||||||
print("Import dataset " + input_array)
|
|
||||||
dataset = np.load(input_array)
|
|
||||||
print(dataset.shape)
|
|
||||||
p = size*1.0/len(dataset)
|
|
||||||
print(p)
|
|
||||||
if p >= 1:
|
|
||||||
raise Exception
|
|
||||||
|
|
||||||
mask = np.random.choice([True, False], len(dataset), p=[p, 1-p])
|
|
||||||
dataset = dataset[mask]
|
|
||||||
print("Start export dataset")
|
|
||||||
|
|
||||||
np.save(os.path.splitext(input_array)[0] + "_samp", dataset)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument('--input', type=str, default=INPUT, help='Path to csv file')
|
|
||||||
|
|
||||||
FLAGS = parser.parse_args()
|
|
||||||
##
|
|
||||||
# iterate_all_classes()
|
|
||||||
|
|
||||||
# dataset = traffic_class_converter(INPUT_DIR)
|
|
||||||
# dataset = traffic_csv_converter(INPUT)
|
|
||||||
|
|
||||||
input_array = "./_dataset/FlowPic/browsing_reg.npy"
|
|
||||||
random_sampling_dataset(input_array)
|
|
||||||
|
|
||||||
|
|
||||||
# export_class_dataset(dataset)
|
|
||||||
# import_dataset()
|
|
Loading…
Reference in New Issue