You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1 line
7.5 KiB
Plaintext
1 line
7.5 KiB
Plaintext
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"3 Dataset differences.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyNNN5mDn3v+hKFMCuKQUmSX"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"xP2-JKpus40G","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":54},"executionInfo":{"status":"ok","timestamp":1597049167487,"user_tz":-540,"elapsed":632,"user":{"displayName":"Mahendra Data","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Ghn7DAlkRKEg-Y82BqktrBT0ABMFy8r5576xhbKDQ=s64","userId":"08049029618478467489"}},"outputId":"631cf283-ae68-41b2-a90c-5d161fa97408"},"source":["from google.colab import drive\n","drive.mount(\"/content/drive\")"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"NvDJdbHguhWV","colab_type":"code","colab":{}},"source":["import logging\n","import numpy as np\n","import pandas as pd\n","\n","logging.basicConfig(format=\"%(asctime)s %(levelname)s %(message)s\", datefmt=\"%H:%M:%S\", level=logging.INFO)\n","\n","# Change display.max_rows to show all features.\n","pd.set_option(\"display.max_rows\", 85)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"I7GWwpFwuiBF","colab_type":"code","colab":{}},"source":["TRAFFICLABELLING_PATH = \"/content/drive/My Drive/CICIDS2017/TrafficLabelling/TrafficLabelling.csv\"\n","MACHINELEARNINGCVE_PATH = \"/content/drive/My Drive/CICIDS2017/MachineLearningCVE/MachineLearningCVE.csv\""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"CglmbKQ8cUvg","colab_type":"code","colab":{}},"source":["TrafficLabelling = pd.read_csv(TRAFFICLABELLING_PATH, skipinitialspace=True)\n","MachineLearningCVE = pd.read_csv(MACHINELEARNINGCVE_PATH, skipinitialspace=True)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"E1T2PJYEm4TW","colab_type":"text"},"source":["# Class Distribution"]},{"cell_type":"code","metadata":{"id":"saSUVDTlcUpV","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":289},"executionInfo":{"status":"ok","timestamp":1597049247271,"user_tz":-540,"elapsed":80388,"user":{"displayName":"Mahendra Data","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Ghn7DAlkRKEg-Y82BqktrBT0ABMFy8r5576xhbKDQ=s64","userId":"08049029618478467489"}},"outputId":"c3503379-1bbf-415b-b13a-561718ad9209"},"source":["TrafficLabelling.Label.value_counts()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["BENIGN 2273097\n","DoS Hulk 231073\n","PortScan 158930\n","DDoS 128027\n","DoS GoldenEye 10293\n","FTP-Patator 7938\n","SSH-Patator 5897\n","DoS slowloris 5796\n","DoS Slowhttptest 5499\n","Bot 1966\n","Web Attack-Brute Force 1507\n","Web Attack-XSS 652\n","Infiltration 36\n","Web Attack-Sql Injection 21\n","Heartbleed 11\n","Name: Label, dtype: int64"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"id":"8Ljs4UljcUl1","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":289},"executionInfo":{"status":"ok","timestamp":1597049247763,"user_tz":-540,"elapsed":80875,"user":{"displayName":"Mahendra Data","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Ghn7DAlkRKEg-Y82BqktrBT0ABMFy8r5576xhbKDQ=s64","userId":"08049029618478467489"}},"outputId":"da0edd9f-bb63-462d-ce15-97a0774a39f3"},"source":["MachineLearningCVE.Label.value_counts()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["BENIGN 2273097\n","DoS Hulk 231073\n","PortScan 158930\n","DDoS 128027\n","DoS GoldenEye 10293\n","FTP-Patator 7938\n","SSH-Patator 5897\n","DoS slowloris 5796\n","DoS Slowhttptest 5499\n","Bot 1966\n","Web Attack-Brute Force 1507\n","Web Attack-XSS 652\n","Infiltration 36\n","Web Attack-Sql Injection 21\n","Heartbleed 11\n","Name: Label, dtype: int64"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"5eAVWpoenDHN","colab_type":"text"},"source":["# Dataset shape"]},{"cell_type":"code","metadata":{"id":"WifmX6zvnGnS","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1597050237737,"user_tz":-540,"elapsed":553,"user":{"displayName":"Mahendra Data","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Ghn7DAlkRKEg-Y82BqktrBT0ABMFy8r5576xhbKDQ=s64","userId":"08049029618478467489"}},"outputId":"c8cd39c6-1b9d-43c4-e245-9d4731762615"},"source":["TrafficLabelling.shape"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(2830743, 85)"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"code","metadata":{"id":"Jhvt8vK3nLy2","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1597050246991,"user_tz":-540,"elapsed":643,"user":{"displayName":"Mahendra Data","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Ghn7DAlkRKEg-Y82BqktrBT0ABMFy8r5576xhbKDQ=s64","userId":"08049029618478467489"}},"outputId":"9851a97f-6d4a-4755-9904-a929efbe103f"},"source":["MachineLearningCVE.shape"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(2830743, 79)"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"markdown","metadata":{"id":"xwEWMI22m8wu","colab_type":"text"},"source":["# Features differences"]},{"cell_type":"code","metadata":{"id":"3D3h6dpUl8DL","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":51},"executionInfo":{"status":"ok","timestamp":1597050133673,"user_tz":-540,"elapsed":542,"user":{"displayName":"Mahendra Data","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Ghn7DAlkRKEg-Y82BqktrBT0ABMFy8r5576xhbKDQ=s64","userId":"08049029618478467489"}},"outputId":"a946559b-fcf3-44d5-9a59-d18f995a03eb"},"source":["np.setdiff1d(TrafficLabelling.columns, MachineLearningCVE.columns)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['Destination IP', 'Flow ID', 'Protocol', 'Source IP',\n"," 'Source Port', 'Timestamp'], dtype=object)"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"3lFCGF6ApixT","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":85},"executionInfo":{"status":"ok","timestamp":1597050874399,"user_tz":-540,"elapsed":597,"user":{"displayName":"Mahendra Data","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Ghn7DAlkRKEg-Y82BqktrBT0ABMFy8r5576xhbKDQ=s64","userId":"08049029618478467489"}},"outputId":"19b37ab1-76ca-4cd5-d979-6d817ee2bd6e"},"source":["TrafficLabelling.Protocol.value_counts()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["6.0 1829554\n","17.0 999493\n","0.0 1696\n","Name: Protocol, dtype: int64"]},"metadata":{"tags":[]},"execution_count":15}]}]} |