diff --git "a/Shallow machine learning/.ipynb_checkpoints/Binary classification-checkpoint.ipynb" "b/Shallow machine learning/.ipynb_checkpoints/Binary classification-checkpoint.ipynb" new file mode 100644--- /dev/null +++ "b/Shallow machine learning/.ipynb_checkpoints/Binary classification-checkpoint.ipynb" @@ -0,0 +1,2502 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import regex as re\n", + "\n", + "from sklearn.feature_extraction.text import TfidfVectorizer, TfidfTransformer, CountVectorizer\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import recall_score, accuracy_score, precision_score, f1_score, confusion_matrix, classification_report\n", + "\n", + "from sklearn.ensemble import AdaBoostClassifier\n", + "from xgboost import XGBClassifier\n", + "\n", + "from sklearn.linear_model import LogisticRegression, SGDClassifier\n", + "from sklearn.svm import SVC, LinearSVC, NuSVC\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.naive_bayes import MultinomialNB, BernoulliNB\n", + "\n", + "import pickle\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
VideoIDEffectiveness
0pvuN_WvF1to1.0
1eRLJscAlk1M5.0
2VbiRNT_gWUQ2.0
35dVcn8NjbwYNaN
45scez5dqtAc4.0
.........
186TZ0j6kr4ZJ03.0
1878DiWzvE52ZY1.0
188OwqIy8Ikv-c2.0
189lPgZfhnCAdI1.0
190dSu5sXmsur43.0
\n", + "

191 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " VideoID Effectiveness\n", + "0 pvuN_WvF1to 1.0\n", + "1 eRLJscAlk1M 5.0\n", + "2 VbiRNT_gWUQ 2.0\n", + "3 5dVcn8NjbwY NaN\n", + "4 5scez5dqtAc 4.0\n", + ".. ... ...\n", + "186 TZ0j6kr4ZJ0 3.0\n", + "187 8DiWzvE52ZY 1.0\n", + "188 OwqIy8Ikv-c 2.0\n", + "189 lPgZfhnCAdI 1.0\n", + "190 dSu5sXmsur4 3.0\n", + "\n", + "[191 rows x 2 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_excel('../../NLP Preprocessing/02_Resources/Climate_change_links.xlsx')\n", + "data = df[[\"VideoID\", \"Effectiveness\"]]\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEMCAYAAADd+e2FAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAP+0lEQVR4nO3dfYxldX3H8feHXUTKKixh2G4EHGO2WrS61AlqUYsFFcEITcGHpLJpsGtbrZg0NVv7YJq0zRoTtSZN6kaRteID9YmtVHG7FU2rIrOw8iAYrEVKWXdHq0Uq1QLf/nHPOOMws3N3Zu7c+e28X8nNOed3z7nny+9mP/zmPNyTqkKS1J6jhl2AJGlhDHBJapQBLkmNMsAlqVEGuCQ1au1y7uykk06q0dHR5dylJDVv7969362qkZntyxrgo6OjjI+PL+cuJal5Sb49W7uHUCSpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVHLeiempMEb3XbtsEvg7u0XDLuEVaGvAE9yN/BD4GHgoaoaS3Ii8FFgFLgbeEVVfX8wZUqSZjqcQygvrKrNVTXWLW8D9lTVJmBPtyxJWiaLOQZ+IbCzm98JXLToaiRJfes3wAv4XJK9SbZ2bRuqaj9ANz15tg2TbE0ynmR8YmJi8RVLkoD+T2KeVVX3JTkZ2J3kzn53UFU7gB0AY2NjtYAaJUmz6GsEXlX3ddODwCeBM4EDSTYCdNODgypSkvRo8wZ4kuOSPG5yHngxcBuwC9jSrbYFuGZQRUqSHq2fQygbgE8mmVz/Q1X12SQ3AlcnuQy4B7hkcGVKkmaaN8Cr6lvAM2dp/x5wziCKkiTNz1vpJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqVN8BnmRNkpuTfLpbPjHJ7iR3ddP1gytTkjTT4YzALwfumLa8DdhTVZuAPd2yJGmZ9BXgSU4BLgDeO635QmBnN78TuGhJK5MkHVK/I/B3AW8GHpnWtqGq9gN005Nn2zDJ1iTjScYnJiYWU6skaZp5AzzJy4CDVbV3ITuoqh1VNVZVYyMjIwv5CEnSLNb2sc5ZwMuTnA88Fnh8kg8CB5JsrKr9STYCBwdZqCTpZ807Aq+qP6qqU6pqFHgV8M9V9ZvALmBLt9oW4JqBVSlJepR+RuBz2Q5cneQy4B7gkqUp6dBGt127HLs5pLu3XzDsEiTp8AK8qq4Hru/mvwecs/QlSZL64Z2YktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ajE38mjIvKlJWt0cgUtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmN8pmYOiL4fFCtRo7AJalRBrgkNcoAl6RGGeCS1Kh5AzzJY5N8NcnXktye5M+79hOT7E5yVzddP/hyJUmT+hmB/xj4tap6JrAZOC/Jc4BtwJ6q2gTs6ZYlSctk3gCvnge6xaO7VwEXAju79p3ARYMoUJI0u76OgSdZk2QfcBDYXVU3ABuqaj9ANz15jm23JhlPMj4xMbFEZUuS+grwqnq4qjYDpwBnJnl6vzuoqh1VNVZVYyMjIwssU5I002FdhVJVPwCuB84DDiTZCNBNDy51cZKkufVzFcpIkhO6+WOBc4E7gV3Alm61LcA1A6pRkjSLfn4LZSOwM8kaeoF/dVV9OsmXgauTXAbcA1wywDolSTPMG+BVdQtwxizt3wPOGURRkqT5eSemJDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNmjfAk5ya5PNJ7khye5LLu/YTk+xOclc3XT/4ciVJk/oZgT8E/EFV/SLwHOD1SU4HtgF7qmoTsKdbliQtk3kDvKr2V9VN3fwPgTuAJwAXAju71XYCFw2oRknSLA7rGHiSUeAM4AZgQ1Xth17IAycveXWSpDn1HeBJ1gEfB95UVfcfxnZbk4wnGZ+YmFhIjZKkWfQV4EmOphfeV1XVJ7rmA0k2du9vBA7Otm1V7aiqsaoaGxkZWYqaJUn0dxVKgPcBd1TVO6a9tQvY0s1vAa5Z+vIkSXNZ28c6ZwGvAW5Nsq9rewuwHbg6yWXAPcAlA6lQkjSreQO8qv4FyBxvn7O05UiS+uWdmJLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGzRvgSa5IcjDJbdPaTkyyO8ld3XT9YMuUJM3Uzwj8SuC8GW3bgD1VtQnY0y1LkpbRvAFeVV8E/mtG84XAzm5+J3DR0pYlSZrP2gVut6Gq9gNU1f4kJ8+1YpKtwFaA0047bYG7k6TDN7rt2mGXwN3bLxjYZw/8JGZV7aiqsaoaGxkZGfTuJGnVWGiAH0iyEaCbHly6kiRJ/VhogO8CtnTzW4BrlqYcSVK/+rmM8MPAl4GnJLk3yWXAduBFSe4CXtQtS5KW0bwnMavq1XO8dc4S1yJJOgzeiSlJjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElq1KICPMl5Sb6R5JtJti1VUZKk+S04wJOsAf4GeClwOvDqJKcvVWGSpENbzAj8TOCbVfWtqvoJ8BHgwqUpS5I0n1TVwjZMLgbOq6rXdsuvAZ5dVW+Ysd5WYGu3+BTgGwsvd0mcBHx3yDWsFPbFFPtiin0xZaX0xROramRm49pFfGBmaXvU/w2qagewYxH7WVJJxqtqbNh1rAT2xRT7Yop9MWWl98ViDqHcC5w6bfkU4L7FlSNJ6tdiAvxGYFOSJyV5DPAqYNfSlCVJms+CD6FU1UNJ3gBcB6wBrqiq25esssFZMYdzVgD7Yop9McW+mLKi+2LBJzElScPlnZiS1CgDXJIaZYBLUqMMcElq1GJu5FGDkmwAnkDvpqv7qurAkEsaGvtiin0xpaW+WDVXobT0pQxCks3A3wLHA//ZNZ8C/AD4vaq6aTiVLT/7Yop9MaXFvjjiA7zFL2UQkuwDXldVN8xofw7wnqp65lAKGwL7Yop9MaXFvlgNh1CuZO4v5f3AivtSBuS4mX0AUFVfSXLcMAoaIvtiin0xpbm+WA0B3tyXMiCfSXIt8AHgP7q2U4FLgc8OrarhsC+m2BdTmuuL1XAI5d3Ak5n9S/n3mT9/eyRL8lJ6v9n+BHq/JnkvsKuq/nGohQ2BfTHFvpjSWl8c8QEO7X0pktSPVRHgOrQkW7vfbV/17Isp9sWUldoXq/pGnu5pQZr94RyrlX0xxb6YsiL7YjWcxDyUFfmlDEqSp9I7jHRDVT0w7a1vD6mkoUlyJlBVdWP3MO7zgDur6j1DLm3oknygqi5d7X2R5Hn0nv1720rti9Ue4D8ZdgHLJckbgdcDdwDvS3J5VV3Tvf1XrNCz7IOQ5K3AS4G1SXYDzwauB7YlOaOq/nKY9S2nJDMfwhLghUlOAKiqly97UUOS5KtVdWY3/9v0/r18Enhrkl+uqu1DLXAWq/oYeJJ7quq0YdexHJLcCjy3qh5IMgp8DPi7qvrrJDdX1RnDrXD5dH2xGTgG+A5wSlXdn+RYen+dPGOY9S2nJDcBXwfeS+8u5QAfpveELarqC8OrbnlN/3eQ5Ebg/Kqa6C43/kpV/dJwK3y0I34EnuSWud4CNixnLUO2ZvKwSVXdneRs4GNJnsgqO5QEPFRVDwM/SvJvVXU/QFU9mOSRIde23MaAy4E/Bv6wqvYleXA1Bfc0RyVZT+/cYKpqAqCq/ifJQ8MtbXZHfIDTC+mXAN+f0R7gS8tfztB8J8nmqtoH0I3EXwZcAay4kcWA/STJz1XVj4BnTTYmOR5YVQFeVY8A70zy9930AKsjF2ZzPLCXXjZUkp+vqu8kWccKHeSshi/q08C6yeCaLsn1y17N8FwK/MwooqoeAi5NsiJP0AzQC6rqx/DTAJt0NLBlOCUNV1XdC1yS5ALg/mHXMwxVNTrHW48Av76MpfRtVR8Dl6SWrerrwCWpZQa4JDXKANeKkOThJPumvbZ17c9PcnvXdmySt3fLb1/APt4yY3k1ncTWEchj4FoRkjxQVetmaf9betdmv79bvh8YmTwJuRT7kFrlCFwrVpLXAq8A/izJVd1dg8cBNyR5ZZKRJB9PcmP3Oqvbbl2S9ye5NcktSX4jyXbg2G4kf1W33gPd9KNJzp+23yu7bdZ0I/4bu895Xff+2UmuT/KxJHd2taV771lJvpBkb5Lrkmzs2t+Y5Ovd53yka/vVaX9x3JzkccvWuToyVJUvX0N/AQ8D+6a9Xtm1XwlcPG29B6bNfwh4Xjd/GnBHN/824F3T1ls/c9vpy/QuEdvZzT+G3u/GHwtsBf6kaz8GGAeeBJwN/De9R/MdBXwZeB69yxC/RO8vBIBXAld08/cBx3TzJ3TTfwDO6ubXAWuH/T34auu1Gq4DVxserKrNh7nNucDp3eAX4PHdKPZculvBAapq5k1cM30GeHeSY+j9qNUXq3dX5ouBZyS5uFvveGATvd/Q+Wr1rp2efJbiKL3nrD4d2N3VtAbY3217C3BVkk8Bn+ra/hV4R/cXwScmP0/qlwGulh1F7/ddHpze2B3O6PvkTlX9b3dT10vojZo/PPlRwO9X1XUzPv9sYPox+Ifp/VsKcHtVPXeW3VwAvAB4OfCnSZ5WVdvTe4TX+cBXkpxbVXf2W7fkMXC17HPATx+Jl2TzHO3ru9n/S3L0HJ/1EeC3gOcDk4F9HfC7k9sk+YUc+jmq3wBGkjy3W//oJE9LchRwalV9HngzcAKwLsmTq+rWqnobvcMzT+3vP1vqMcC1Uhybn72MsJ+f7nwjMNadGPw68Dtd+18A65PcluRrwAu79h3ALZMnMWf4HL0R8j9V1eTPDL+X3i/13ZTkNuA9HOKv1m67i4G3dfvdB/wKvUMpH0zvVxBvBt5ZVT8A3jStxgfpHcqR+uZlhJLUKEfgktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ16v8BiQJKNtSjlfEAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visulaise dataset\n", + "def plot_data(label):\n", + " data.groupby(label).VideoID.count().plot.bar(ylim=0)\n", + " plt.show()\n", + " \n", + "plot_data('Effectiveness')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
VideoIDEffectiveness
0pvuN_WvF1to1.0
1eRLJscAlk1M5.0
2VbiRNT_gWUQ2.0
35scez5dqtAc4.0
4JDcro7dPqpA2.0
.........
164TZ0j6kr4ZJ03.0
1658DiWzvE52ZY1.0
166OwqIy8Ikv-c2.0
167lPgZfhnCAdI1.0
168dSu5sXmsur43.0
\n", + "

169 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " VideoID Effectiveness\n", + "0 pvuN_WvF1to 1.0\n", + "1 eRLJscAlk1M 5.0\n", + "2 VbiRNT_gWUQ 2.0\n", + "3 5scez5dqtAc 4.0\n", + "4 JDcro7dPqpA 2.0\n", + ".. ... ...\n", + "164 TZ0j6kr4ZJ0 3.0\n", + "165 8DiWzvE52ZY 1.0\n", + "166 OwqIy8Ikv-c 2.0\n", + "167 lPgZfhnCAdI 1.0\n", + "168 dSu5sXmsur4 3.0\n", + "\n", + "[169 rows x 2 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# clean data\n", + "data = data.loc[(data[\"Effectiveness\"] == 1) | (data[\"Effectiveness\"] == 2) | (data[\"Effectiveness\"] == 3) | (data[\"Effectiveness\"] == 4) | (data[\"Effectiveness\"] == 5)]\n", + "data = data.reset_index()\n", + "del data[\"index\"]\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEMCAYAAADd+e2FAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAP+0lEQVR4nO3dfYxldX3H8feHXUTKKixh2G4EHGO2WrS61AlqUYsFFcEITcGHpLJpsGtbrZg0NVv7YJq0zRoTtSZN6kaRteID9YmtVHG7FU2rIrOw8iAYrEVKWXdHq0Uq1QLf/nHPOOMws3N3Zu7c+e28X8nNOed3z7nny+9mP/zmPNyTqkKS1J6jhl2AJGlhDHBJapQBLkmNMsAlqVEGuCQ1au1y7uykk06q0dHR5dylJDVv7969362qkZntyxrgo6OjjI+PL+cuJal5Sb49W7uHUCSpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVHLeiempMEb3XbtsEvg7u0XDLuEVaGvAE9yN/BD4GHgoaoaS3Ii8FFgFLgbeEVVfX8wZUqSZjqcQygvrKrNVTXWLW8D9lTVJmBPtyxJWiaLOQZ+IbCzm98JXLToaiRJfes3wAv4XJK9SbZ2bRuqaj9ANz15tg2TbE0ynmR8YmJi8RVLkoD+T2KeVVX3JTkZ2J3kzn53UFU7gB0AY2NjtYAaJUmz6GsEXlX3ddODwCeBM4EDSTYCdNODgypSkvRo8wZ4kuOSPG5yHngxcBuwC9jSrbYFuGZQRUqSHq2fQygbgE8mmVz/Q1X12SQ3AlcnuQy4B7hkcGVKkmaaN8Cr6lvAM2dp/x5wziCKkiTNz1vpJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqVN8BnmRNkpuTfLpbPjHJ7iR3ddP1gytTkjTT4YzALwfumLa8DdhTVZuAPd2yJGmZ9BXgSU4BLgDeO635QmBnN78TuGhJK5MkHVK/I/B3AW8GHpnWtqGq9gN005Nn2zDJ1iTjScYnJiYWU6skaZp5AzzJy4CDVbV3ITuoqh1VNVZVYyMjIwv5CEnSLNb2sc5ZwMuTnA88Fnh8kg8CB5JsrKr9STYCBwdZqCTpZ807Aq+qP6qqU6pqFHgV8M9V9ZvALmBLt9oW4JqBVSlJepR+RuBz2Q5cneQy4B7gkqUp6dBGt127HLs5pLu3XzDsEiTp8AK8qq4Hru/mvwecs/QlSZL64Z2YktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ajE38mjIvKlJWt0cgUtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmN8pmYOiL4fFCtRo7AJalRBrgkNcoAl6RGGeCS1Kh5AzzJY5N8NcnXktye5M+79hOT7E5yVzddP/hyJUmT+hmB/xj4tap6JrAZOC/Jc4BtwJ6q2gTs6ZYlSctk3gCvnge6xaO7VwEXAju79p3ARYMoUJI0u76OgSdZk2QfcBDYXVU3ABuqaj9ANz15jm23JhlPMj4xMbFEZUuS+grwqnq4qjYDpwBnJnl6vzuoqh1VNVZVYyMjIwssU5I002FdhVJVPwCuB84DDiTZCNBNDy51cZKkufVzFcpIkhO6+WOBc4E7gV3Alm61LcA1A6pRkjSLfn4LZSOwM8kaeoF/dVV9OsmXgauTXAbcA1wywDolSTPMG+BVdQtwxizt3wPOGURRkqT5eSemJDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNmjfAk5ya5PNJ7khye5LLu/YTk+xOclc3XT/4ciVJk/oZgT8E/EFV/SLwHOD1SU4HtgF7qmoTsKdbliQtk3kDvKr2V9VN3fwPgTuAJwAXAju71XYCFw2oRknSLA7rGHiSUeAM4AZgQ1Xth17IAycveXWSpDn1HeBJ1gEfB95UVfcfxnZbk4wnGZ+YmFhIjZKkWfQV4EmOphfeV1XVJ7rmA0k2du9vBA7Otm1V7aiqsaoaGxkZWYqaJUn0dxVKgPcBd1TVO6a9tQvY0s1vAa5Z+vIkSXNZ28c6ZwGvAW5Nsq9rewuwHbg6yWXAPcAlA6lQkjSreQO8qv4FyBxvn7O05UiS+uWdmJLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGzRvgSa5IcjDJbdPaTkyyO8ld3XT9YMuUJM3Uzwj8SuC8GW3bgD1VtQnY0y1LkpbRvAFeVV8E/mtG84XAzm5+J3DR0pYlSZrP2gVut6Gq9gNU1f4kJ8+1YpKtwFaA0047bYG7k6TDN7rt2mGXwN3bLxjYZw/8JGZV7aiqsaoaGxkZGfTuJGnVWGiAH0iyEaCbHly6kiRJ/VhogO8CtnTzW4BrlqYcSVK/+rmM8MPAl4GnJLk3yWXAduBFSe4CXtQtS5KW0bwnMavq1XO8dc4S1yJJOgzeiSlJjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElq1KICPMl5Sb6R5JtJti1VUZKk+S04wJOsAf4GeClwOvDqJKcvVWGSpENbzAj8TOCbVfWtqvoJ8BHgwqUpS5I0n1TVwjZMLgbOq6rXdsuvAZ5dVW+Ysd5WYGu3+BTgGwsvd0mcBHx3yDWsFPbFFPtiin0xZaX0xROramRm49pFfGBmaXvU/w2qagewYxH7WVJJxqtqbNh1rAT2xRT7Yop9MWWl98ViDqHcC5w6bfkU4L7FlSNJ6tdiAvxGYFOSJyV5DPAqYNfSlCVJms+CD6FU1UNJ3gBcB6wBrqiq25esssFZMYdzVgD7Yop9McW+mLKi+2LBJzElScPlnZiS1CgDXJIaZYBLUqMMcElq1GJu5FGDkmwAnkDvpqv7qurAkEsaGvtiin0xpaW+WDVXobT0pQxCks3A3wLHA//ZNZ8C/AD4vaq6aTiVLT/7Yop9MaXFvjjiA7zFL2UQkuwDXldVN8xofw7wnqp65lAKGwL7Yop9MaXFvlgNh1CuZO4v5f3AivtSBuS4mX0AUFVfSXLcMAoaIvtiin0xpbm+WA0B3tyXMiCfSXIt8AHgP7q2U4FLgc8OrarhsC+m2BdTmuuL1XAI5d3Ak5n9S/n3mT9/eyRL8lJ6v9n+BHq/JnkvsKuq/nGohQ2BfTHFvpjSWl8c8QEO7X0pktSPVRHgOrQkW7vfbV/17Isp9sWUldoXq/pGnu5pQZr94RyrlX0xxb6YsiL7YjWcxDyUFfmlDEqSp9I7jHRDVT0w7a1vD6mkoUlyJlBVdWP3MO7zgDur6j1DLm3oknygqi5d7X2R5Hn0nv1720rti9Ue4D8ZdgHLJckbgdcDdwDvS3J5VV3Tvf1XrNCz7IOQ5K3AS4G1SXYDzwauB7YlOaOq/nKY9S2nJDMfwhLghUlOAKiqly97UUOS5KtVdWY3/9v0/r18Enhrkl+uqu1DLXAWq/oYeJJ7quq0YdexHJLcCjy3qh5IMgp8DPi7qvrrJDdX1RnDrXD5dH2xGTgG+A5wSlXdn+RYen+dPGOY9S2nJDcBXwfeS+8u5QAfpveELarqC8OrbnlN/3eQ5Ebg/Kqa6C43/kpV/dJwK3y0I34EnuSWud4CNixnLUO2ZvKwSVXdneRs4GNJnsgqO5QEPFRVDwM/SvJvVXU/QFU9mOSRIde23MaAy4E/Bv6wqvYleXA1Bfc0RyVZT+/cYKpqAqCq/ifJQ8MtbXZHfIDTC+mXAN+f0R7gS8tfztB8J8nmqtoH0I3EXwZcAay4kcWA/STJz1XVj4BnTTYmOR5YVQFeVY8A70zy9930AKsjF2ZzPLCXXjZUkp+vqu8kWccKHeSshi/q08C6yeCaLsn1y17N8FwK/MwooqoeAi5NsiJP0AzQC6rqx/DTAJt0NLBlOCUNV1XdC1yS5ALg/mHXMwxVNTrHW48Av76MpfRtVR8Dl6SWrerrwCWpZQa4JDXKANeKkOThJPumvbZ17c9PcnvXdmySt3fLb1/APt4yY3k1ncTWEchj4FoRkjxQVetmaf9betdmv79bvh8YmTwJuRT7kFrlCFwrVpLXAq8A/izJVd1dg8cBNyR5ZZKRJB9PcmP3Oqvbbl2S9ye5NcktSX4jyXbg2G4kf1W33gPd9KNJzp+23yu7bdZ0I/4bu895Xff+2UmuT/KxJHd2taV771lJvpBkb5Lrkmzs2t+Y5Ovd53yka/vVaX9x3JzkccvWuToyVJUvX0N/AQ8D+6a9Xtm1XwlcPG29B6bNfwh4Xjd/GnBHN/824F3T1ls/c9vpy/QuEdvZzT+G3u/GHwtsBf6kaz8GGAeeBJwN/De9R/MdBXwZeB69yxC/RO8vBIBXAld08/cBx3TzJ3TTfwDO6ubXAWuH/T34auu1Gq4DVxserKrNh7nNucDp3eAX4PHdKPZculvBAapq5k1cM30GeHeSY+j9qNUXq3dX5ouBZyS5uFvveGATvd/Q+Wr1rp2efJbiKL3nrD4d2N3VtAbY3217C3BVkk8Bn+ra/hV4R/cXwScmP0/qlwGulh1F7/ddHpze2B3O6PvkTlX9b3dT10vojZo/PPlRwO9X1XUzPv9sYPox+Ifp/VsKcHtVPXeW3VwAvAB4OfCnSZ5WVdvTe4TX+cBXkpxbVXf2W7fkMXC17HPATx+Jl2TzHO3ru9n/S3L0HJ/1EeC3gOcDk4F9HfC7k9sk+YUc+jmq3wBGkjy3W//oJE9LchRwalV9HngzcAKwLsmTq+rWqnobvcMzT+3vP1vqMcC1Uhybn72MsJ+f7nwjMNadGPw68Dtd+18A65PcluRrwAu79h3ALZMnMWf4HL0R8j9V1eTPDL+X3i/13ZTkNuA9HOKv1m67i4G3dfvdB/wKvUMpH0zvVxBvBt5ZVT8A3jStxgfpHcqR+uZlhJLUKEfgktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ16v8BiQJKNtSjlfEAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visulaise \n", + "plot_data('Effectiveness')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
VideoIDEffectiveness
0pvuN_WvF1toneg
1eRLJscAlk1Mpos
2VbiRNT_gWUQneg
35scez5dqtAcpos
4JDcro7dPqpAneg
.........
132JYZpxRy5Mfgpos
133xXMlFFY9uEIpos
1348DiWzvE52ZYneg
135OwqIy8Ikv-cneg
136lPgZfhnCAdIneg
\n", + "

137 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " VideoID Effectiveness\n", + "0 pvuN_WvF1to neg\n", + "1 eRLJscAlk1M pos\n", + "2 VbiRNT_gWUQ neg\n", + "3 5scez5dqtAc pos\n", + "4 JDcro7dPqpA neg\n", + ".. ... ...\n", + "132 JYZpxRy5Mfg pos\n", + "133 xXMlFFY9uEI pos\n", + "134 8DiWzvE52ZY neg\n", + "135 OwqIy8Ikv-c neg\n", + "136 lPgZfhnCAdI neg\n", + "\n", + "[137 rows x 2 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## Custom encoder\n", + "def custom_encoder(df):\n", + " df.replace(to_replace = 1.0, value = \"neg\", inplace=True)\n", + " df.replace(to_replace = 2.0, value = \"neg\", inplace=True)\n", + " df.replace(to_replace = 4.0, value = \"pos\", inplace=True)\n", + " df.replace(to_replace = 5.0, value = \"pos\", inplace=True)\n", + "\n", + "custom_encoder(df['Effectiveness'])\n", + "\n", + "data = df[[\"VideoID\", \"Effectiveness\"]]\n", + "data = data[data[\"Effectiveness\"] != 3]\n", + "data = data.loc[(data[\"Effectiveness\"] == 'pos') | (data[\"Effectiveness\"] == 'neg')]\n", + "data = data.reset_index()\n", + "del data[\"index\"]\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEPCAYAAABbbZ8rAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQf0lEQVR4nO3df5BdZX3H8feHBBRBSlI2aUbEqI20SBXrjkJRiw1YBEvSKSBMdTIOTlqn/up0tNFpO9OOf4Q6Y7UzjpoiujMiSlVMqlMxTUVHRWSRyK/AxFIESposKEWUiuC3f9yTuiwb92Z3716e5P2aOXPO89xz7vkCux+efe4596SqkCS155BhFyBJmh0DXJIaZYBLUqMMcElqlAEuSY1avJAnO+aYY2rlypULeUpJat71119/X1WNTO1f0ABfuXIl4+PjC3lKSWpeku9P1+8UiiQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNWpB78RsxcoNXxx2CQeUOzeePewSpAOSI3BJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNWrGAE9yfJLtk5YHk7w9ydIkW5Ps7NZLFqJgSVLPjAFeVbdX1UlVdRLwYuAnwJXABmBbVa0CtnVtSdIC2d8plNXAf1TV94E1wFjXPwasnce6JEkz2N8AvwC4vNteXlW7ALr1sukOSLI+yXiS8YmJidlXKkl6nL4DPMlhwDnAP+/PCapqU1WNVtXoyMjI/tYnSdqH/RmBvxr4TlXt7tq7k6wA6NZ75rs4SdK+7U+AX8gvpk8AtgDruu11wOb5KkqSNLO+AjzJ04AzgM9N6t4InJFkZ/faxvkvT5K0L3091LiqfgL86pS+++ldlSJJGgLvxJSkRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1Kh+n4l5dJLPJLktyY4kpyRZmmRrkp3desmgi5Uk/UJfz8QEPgB8qarOTXIY8DTg3cC2qtqYZAOwAfjLAdUpCVi54YvDLuGAcufGs4ddwpzMOAJPchTwCuCjAFX1SFU9AKwBxrrdxoC1gylRkjSdfqZQngNMAB9LckOSS5IcASyvql0A3XrZdAcnWZ9kPMn4xMTEvBUuSQe7fgJ8MfDbwIeq6kXAj+lNl/SlqjZV1WhVjY6MjMyyTEnSVP0E+D3APVV1bdf+DL1A351kBUC33jOYEiVJ05kxwKvqv4G7kxzfda0GbgW2AOu6vnXA5oFUKEmaVr9XobwFuKy7AuUO4A30wv+KJBcBdwHnDaZESdJ0+grwqtoOjE7z0up5rUaS1DfvxJSkRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1Ki+HqmW5E7gR8BjwKNVNZpkKfBpYCVwJ3B+Vf1wMGVKkqbanxH4K6vqpKra+2zMDcC2qloFbOvakqQFMpcplDXAWLc9BqydczWSpL71G+AFfDnJ9UnWd33Lq2oXQLdeNogCJUnT62sOHDi1qu5NsgzYmuS2fk/QBf56gOOOO24WJUqSptPXCLyq7u3We4ArgZcAu5OsAOjWe/Zx7KaqGq2q0ZGRkfmpWpI0c4AnOSLJ0/duA68Cbga2AOu63dYBmwdVpCTpifqZQlkOXJlk7/6frKovJbkOuCLJRcBdwHmDK1OSNNWMAV5VdwAvnKb/fmD1IIqSJM3MOzElqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRvUd4EkWJbkhyRe69tIkW5Ps7NZLBlemJGmq/RmBvw3YMam9AdhWVauAbV1bkrRA+grwJMcCZwOXTOpeA4x122PA2nmtTJL0S/U7An8/8E7g55P6llfVLoBuvWy6A5OsTzKeZHxiYmIutUqSJpkxwJO8BthTVdfP5gRVtamqRqtqdGRkZDZvIUmaxuI+9jkVOCfJWcBTgaOSfALYnWRFVe1KsgLYM8hCJUmPN+MIvKreVVXHVtVK4ALg36vqdcAWYF232zpg88CqlCQ9wVyuA98InJFkJ3BG15YkLZB+plD+X1VdDVzdbd8PrJ7/kiRJ/fBOTElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjZoxwJM8Ncm3k3w3yS1J/rbrX5pka5Kd3XrJ4MuVJO3Vzwj8p8DvVdULgZOAM5OcDGwAtlXVKmBb15YkLZAZA7x6Huqah3ZLAWuAsa5/DFg7iAIlSdPraw48yaIk24E9wNaquhZYXlW7ALr1sn0cuz7JeJLxiYmJeSpbktRXgFfVY1V1EnAs8JIkJ/Z7gqraVFWjVTU6MjIyyzIlSVPt11UoVfUAcDVwJrA7yQqAbr1nvouTJO1bP1ehjCQ5uts+HDgduA3YAqzrdlsHbB5QjZKkaSzuY58VwFiSRfQC/4qq+kKSa4ArklwE3AWcN8A6JUlTzBjgVXUj8KJp+u8HVg+iKEnSzLwTU5IaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSo/p5qPEzk3wlyY4ktyR5W9e/NMnWJDu79ZLBlytJ2qufEfijwF9U1W8CJwN/luQEYAOwrapWAdu6tiRpgcwY4FW1q6q+023/CNgBPANYA4x1u40BawdUoyRpGvs1B55kJb0n1F8LLK+qXdALeWDZPo5Zn2Q8yfjExMQcy5Uk7dV3gCc5Evgs8PaqerDf46pqU1WNVtXoyMjIbGqUJE2jrwBPcii98L6sqj7Xde9OsqJ7fQWwZzAlSpKm089VKAE+CuyoqvdNemkLsK7bXgdsnv/yJEn7sriPfU4FXg/clGR71/duYCNwRZKLgLuA8wZSoSRpWjMGeFV9Hcg+Xl49v+VIkvrlnZiS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhrVz0ONL02yJ8nNk/qWJtmaZGe3XjLYMiVJU/UzAv84cOaUvg3AtqpaBWzr2pKkBTRjgFfV14AfTOleA4x122PA2vktS5I0k9nOgS+vql0A3XrZ/JUkSerHwD/ETLI+yXiS8YmJiUGfTpIOGrMN8N1JVgB06z372rGqNlXVaFWNjoyMzPJ0kqSpZhvgW4B13fY6YPP8lCNJ6lc/lxFeDlwDHJ/kniQXARuBM5LsBM7o2pKkBbR4ph2q6sJ9vLR6nmuRJO0H78SUpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGjWnAE9yZpLbk3wvyYb5KkqSNLNZB3iSRcAHgVcDJwAXJjlhvgqTJP1ycxmBvwT4XlXdUVWPAJ8C1sxPWZKkmSyew7HPAO6e1L4HeOnUnZKsB9Z3zYeS3D6Hc+rxjgHuG3YRM8nFw65AQ+DP5vx61nSdcwnwTNNXT+io2gRsmsN5tA9JxqtqdNh1SFP5s7kw5jKFcg/wzEntY4F751aOJKlfcwnw64BVSZ6d5DDgAmDL/JQlSZrJrKdQqurRJG8GrgIWAZdW1S3zVpn64dSUnqz82VwAqXrCtLUkqQHeiSlJjTLAJalRBrgkNcoAl6RGGeCS5kWSv09yVJJDk2xLcl+S1w27rgOZAd6YJD9K8uCU5e4kVyZ5zrDr00HtVVX1IPAaejf6PQ94x3BLOrDN5VZ6Dcf76N3x+kl6X2dwAfBrwO3ApcBpQ6tMB7tDu/VZwOVV9YNkum/c0HzxOvDGJLm2ql46pe9bVXVyku9W1QuHVZsObkk2AmuBh+l9W+nRwBem/rxq/jiF0p6fJzk/ySHdcv6k1/y/sYamqjYApwCjVfUz4Mf4FdMD5Qi8Md089wfo/aIU8C3gz4H/Al5cVV8fYnk6iCU5FHgT8Iqu66vAh7sw1wAY4JLmRZJL6M2Dj3Vdrwceq6o3Dq+qA5sB3pgkzwM+BCyvqhOTvAA4p6reM+TSdJCb7jMYP5cZLOfA2/NPwLuAnwFU1Y30rkSRhu2xJM/d2+im+x4bYj0HPC8jbM/TqurbUy7PenRYxUiTvAP4SpI7uvZK4A3DK+fA5wi8Pfd1o5wCSHIusGu4JUkAfAP4CPDzbvkIcM1QKzrAOQfemO7P0k3A7wA/BP4T+OOq+v5QC9NBL8kVwIPAZV3XhcCSqjpveFUd2AzwxiR5CnAuvT9Pl9L7hamq+rth1iX5IebCcwqlPZuBP6D3Iea9wEP0bpiQhu2GJCfvbSR5Kb1pFQ2II/DGJLm5qk4cdh3SVEl2AMcDd3VdxwE76M2HV1W9YFi1Hai8CqU930zyW1V107ALkaY4c9gFHGwcgTcmya3Ar9P78PKn9L6R0NGNdBAywBuT5FnT9XsVinTwMcAlqVFehSJJjTLAJalRBrieFJI8lmT7pGVD1//yJLd0fYcneW/Xfu8szvHuKe1vzlf90jA4B64nhSQPVdWR0/R/GLi2qj7WtR8ERqrqp/N1DqlVjsD1pJXkjcD5wN8kuSzJFuAI4Nokr00ykuSzSa7rllO7445M8rEkNyW5Mckfdc9rPLwbyV/W7fdQt/50krMmnffj3TGLuhH/dd37/En3+mlJrk7ymSS3dbWle+3FSb6a5PokVyVZ0fW/Ncmt3ft8quv73Ul/cdyQ5OkL9i9XB4aqcnEZ+kLve6O3T1pe2/V/HDh30n4PTdr+JPCybvs4YEe3fTHw/kn7LZl67OQ28IfAWLd9GHA3cDiwHvirrv8pwDjwbOA04H+AY+kNgq4BXkbvaTTfpPcXAsBrgUu77XuBp3TbR3frfwFO7baPBBYP+7+DS1uLd2LqyeLhqjppP485HThh0nejH9WNYk9n0kMuquqHM7zPvwL/2H1R2JnA16rq4SSvAl7QfWUvwK8Aq4BHgG9X1T0ASbbT+3KxB4ATga1dTYv4xVf93ghcluTzwOe7vm8A7+v+Ivjc3veT+mWAq2WHAKdU1cOTO7vpjL4/3Kmq/01yNfD79EbNl+99K+AtVXXVlPc/jd5dsHs9Ru93KcAtVXXKNKc5m97Dfs8B/jrJ86tqY5IvAmcB30pyelXd1m/dknPgatmXgTfvbSQ5aR/9S7rNn3VPTp/Op+g9PeblwN7Avgp4095jkjwvyRG/pJ7bgZEkp3T7H5rk+UkOAZ5ZVV8B3gkcDRyZ5LlVdVNVXUxveuY3+vvHlnoMcD1ZHJ7HX0a4sY9j3gqMdh8M3gr8adf/HmBJkpuTfBd4Zde/Cbhx74eYU3yZ3gj536rqka7vEuBW4DtJbqb3hJl9/tXaHXcucHF33u30HryxCPhEkpuAG4B/qKoHgLdPqvFhelM5Ut+8jFCSGuUIXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRv0fdiPZqpRdAToAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visulaise \n", + "plot_data('Effectiveness')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# get documnets (pre-processd comments)\n", + "documents = []\n", + "for i in range(len(data)):\n", + " VideoID = data[\"VideoID\"][i]\n", + " comment = pd.read_csv(\"../../NLP Preprocessing/03_Processed_Comments/\"+VideoID+\"/\"+VideoID+\"_all_words.csv\")\n", + " documents.append(list(comment[\"0\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# create two new columns of the pre-processed data in list and string form\n", + "data['cleaned'] = documents\n", + "data['cleaned_string'] = [' '.join(map(str, l)) for l in data['cleaned']]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Additional Features" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "## Create more features\n", + "\n", + "# functions for feature creation\n", + "def words_count(text):\n", + " return len(text.split())\n", + "\n", + "def sent_count(text):\n", + " return len(nltk.sent_tokenize(text))\n", + "\n", + "def punct_count(text):\n", + " cnt = 0\n", + " for i in punctuation:\n", + " cnt = cnt + text.count(i)\n", + " return cnt\n", + "\n", + "def emoji_count(text):\n", + " emojis_iter = map(lambda y: y, emoji.UNICODE_EMOJI['en'].keys())\n", + " regex_set = re.compile('|'.join(re.escape(em) for em in emojis_iter))\n", + " new_list = regex_set.findall(text)\n", + " return len(new_list)\n", + "\n", + "def average(lst):\n", + " return sum(lst) / len(lst)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# function to create a new feature\n", + "def new_feature(fun):\n", + " ave = []\n", + " for i in range(len(data)):\n", + " VideoID = data[\"VideoID\"][i]\n", + " print(i)\n", + " video = pd.read_json(\"../../NLP Preprocessing/01_Comments/\"+VideoID+\".json\", lines=True)\n", + " comments = video[\"text\"]\n", + " feature_lengths = []\n", + " for comment in comments:\n", + " feature_lengths.append(fun(comment))\n", + " ave.append(average(feature_lengths))\n", + " return ave" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "## load pickles ##\n", + "average_word_length_f = open('pickles/average_word_length.pickle', \"rb\")\n", + "average_word_length = pickle.load(average_word_length_f)\n", + "average_word_length_f.close()\n", + "\n", + "average_sent_length_f = open('pickles/average_sent_length.pickle', \"rb\")\n", + "average_sent_length = pickle.load(average_sent_length_f)\n", + "average_sent_length_f.close()\n", + "\n", + "average_punctuation_count_f = open('pickles/average_punctuation_count.pickle', \"rb\")\n", + "average_punctuation_count = pickle.load(average_punctuation_count_f)\n", + "average_punctuation_count_f.close()\n", + "\n", + "average_emoji_count_f = open('pickles/average_emoji_count.pickle', \"rb\")\n", + "average_emoji_count = pickle.load(average_emoji_count_f)\n", + "average_emoji_count_f.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "## Sentiment features\n", + "def average_sentiment():\n", + " ave = []\n", + " for i in range(len(data)):\n", + " VideoID = data[\"VideoID\"][i]\n", + " print(i)\n", + " video = pd.read_json(\"../../NLP Preprocessing/03_Processed_Comments/\"+VideoID+\"/\"+VideoID+\".json\", lines=True)\n", + " sentiments = video[\"sentiment\"]\n", + " sentiment_strengths = []\n", + " for sentiment in sentiments:\n", + " sentiment_strengths.append(sentiment)\n", + " ave.append(average(sentiment_strengths))\n", + " return ave\n", + "\n", + "def neg_neu_pos(lst):\n", + " neg_count, neu_count, pos_count = (0,)*3 \n", + " for sentiment in lst:\n", + " if sentiment < 0:\n", + " neg_count += 1\n", + " if sentiment == 0:\n", + " neu_count += 1\n", + " if sentiment > 0:\n", + " pos_count += 1\n", + " return neg_count, neu_count, pos_count\n", + "\n", + "def ratio(count,video):\n", + " return count/len(video)\n", + "\n", + "def sentiment_ratio():\n", + " sentiment_ratios_neg, sentiment_ratios_neu, sentiment_ratios_pos = [],[],[]\n", + " for i in range(len(data)):\n", + " VideoID = data[\"VideoID\"][i]\n", + " print(i)\n", + " video = pd.read_json(\"../../NLP Preprocessing/03_Processed_Comments/\"+VideoID+\"/\"+VideoID+\".json\", lines=True)\n", + " sentiments = video[\"sentiment\"]\n", + " neg_count, neu_count, pos_count = neg_neu_pos(sentiments)\n", + " sentiment_ratios_neg.append(ratio(neg_count,video))\n", + " sentiment_ratios_neu.append(ratio(neu_count,video))\n", + " sentiment_ratios_pos.append(ratio(pos_count,video))\n", + " return sentiment_ratios_neg, sentiment_ratios_neu, sentiment_ratios_pos" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "## load pickles ##\n", + "ave_sentiment_f = open('pickles/ave_sentiment.pickle', \"rb\")\n", + "ave_sentiment = pickle.load(ave_sentiment_f)\n", + "ave_sentiment_f.close()\n", + "\n", + "sentiment_ratios_neg_f = open('pickles/sentiment_ratios_neg.pickle', \"rb\")\n", + "sentiment_ratios_neg = pickle.load(sentiment_ratios_neg_f)\n", + "sentiment_ratios_neg_f.close()\n", + "\n", + "sentiment_ratios_neu_f = open('pickles/sentiment_ratios_neu.pickle', \"rb\")\n", + "sentiment_ratios_neu = pickle.load(sentiment_ratios_neu_f)\n", + "sentiment_ratios_neu_f.close()\n", + "\n", + "sentiment_ratios_pos_f = open('pickles/sentiment_ratios_pos.pickle', \"rb\")\n", + "sentiment_ratios_pos = pickle.load(sentiment_ratios_pos_f)\n", + "sentiment_ratios_pos_f.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "num_comments = []\n", + "videoIDs = data[\"VideoID\"]\n", + "\n", + "for videoID in videoIDs:\n", + " video = pd.read_json(\"../../NLP Preprocessing/01_Comments/\"+videoID+\".json\", lines=True)\n", + " num_comments.append(len(video))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
VideoIDEffectivenesscleanedcleaned_stringnum_commentsaverage_word_lengthaverage_sentence_lengthaverage_punctuation_countaverage_emoji_countaverage_sentimentsentiment_ratio_negativesentiment_ratio_neutralsentiment_ratio_positive
0pvuN_WvF1toneg[clean, version, video, child, love, northeast...clean version video child love northeast india...12553311.3707391.2929592.1235770.5883710.0956330.1372950.5296060.333100
1eRLJscAlk1Mpos[step, take, help, fight, climate, change, wel...step take help fight climate change well equal...16195317.1952291.5949942.7182890.4897040.0376110.2023550.5009050.296740
2VbiRNT_gWUQneg[country, disappear, video, year, old, world, ...country disappear video year old world map did...2761618.3866601.7267893.5407010.1179030.0528460.1960100.4451770.358814
35scez5dqtAcpos[im, watch, trump, biden, ha, already, start, ...im watch trump biden ha already start process ...1377332.3004432.3645545.8706160.0606260.0206080.3013870.3155450.383068
4JDcro7dPqpAneg[fun, fact, cow, belch, fart, adult, version, ...fun fact cow belch fart adult version bill nye...1882134.8694542.5595886.6242500.1069020.0322380.2967960.3134800.389724
..........................................
132JYZpxRy5Mfgpos[usually, consumer_NEG, say_NEG, though_NEG, s...usually consumer_NEG say_NEG though_NEG suppor...41519.0361451.7590363.5084340.2072290.0901640.1493980.5132530.337349
133xXMlFFY9uEIpos[joe, biden, ha, plan, fix, thing, forefront, ...joe biden ha plan fix thing forefront news sev...43137.7749422.7006969.0394430.1531320.0346210.2250580.3967520.378190
1348DiWzvE52ZYneg[marios, leave, hand, doe, intro, impressive, ...marios leave hand doe intro impressive today p...526218.2985561.7797423.7267200.1366400.1434380.1296090.4030790.467313
135OwqIy8Ikv-cneg[lie, interseting, isnt, group_NEG, consist_NE...lie interseting isnt group_NEG consist_NEG com...1442157.6512033.80396613.2889540.0294020.0492500.2499830.2782750.471743
136lPgZfhnCAdIneg[miss, man, wa, hero, didnt, cherish_NEG, enou...miss man wa hero didnt cherish_NEG enough_NEG ...377740.9997353.0145628.4156740.0349480.0178250.2949430.2912360.413820
\n", + "

137 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " VideoID Effectiveness \\\n", + "0 pvuN_WvF1to neg \n", + "1 eRLJscAlk1M pos \n", + "2 VbiRNT_gWUQ neg \n", + "3 5scez5dqtAc pos \n", + "4 JDcro7dPqpA neg \n", + ".. ... ... \n", + "132 JYZpxRy5Mfg pos \n", + "133 xXMlFFY9uEI pos \n", + "134 8DiWzvE52ZY neg \n", + "135 OwqIy8Ikv-c neg \n", + "136 lPgZfhnCAdI neg \n", + "\n", + " cleaned \\\n", + "0 [clean, version, video, child, love, northeast... \n", + "1 [step, take, help, fight, climate, change, wel... \n", + "2 [country, disappear, video, year, old, world, ... \n", + "3 [im, watch, trump, biden, ha, already, start, ... \n", + "4 [fun, fact, cow, belch, fart, adult, version, ... \n", + ".. ... \n", + "132 [usually, consumer_NEG, say_NEG, though_NEG, s... \n", + "133 [joe, biden, ha, plan, fix, thing, forefront, ... \n", + "134 [marios, leave, hand, doe, intro, impressive, ... \n", + "135 [lie, interseting, isnt, group_NEG, consist_NE... \n", + "136 [miss, man, wa, hero, didnt, cherish_NEG, enou... \n", + "\n", + " cleaned_string num_comments \\\n", + "0 clean version video child love northeast india... 125533 \n", + "1 step take help fight climate change well equal... 161953 \n", + "2 country disappear video year old world map did... 27616 \n", + "3 im watch trump biden ha already start process ... 13773 \n", + "4 fun fact cow belch fart adult version bill nye... 18821 \n", + ".. ... ... \n", + "132 usually consumer_NEG say_NEG though_NEG suppor... 415 \n", + "133 joe biden ha plan fix thing forefront news sev... 431 \n", + "134 marios leave hand doe intro impressive today p... 5262 \n", + "135 lie interseting isnt group_NEG consist_NEG com... 14421 \n", + "136 miss man wa hero didnt cherish_NEG enough_NEG ... 3777 \n", + "\n", + " average_word_length average_sentence_length average_punctuation_count \\\n", + "0 11.370739 1.292959 2.123577 \n", + "1 17.195229 1.594994 2.718289 \n", + "2 18.386660 1.726789 3.540701 \n", + "3 32.300443 2.364554 5.870616 \n", + "4 34.869454 2.559588 6.624250 \n", + ".. ... ... ... \n", + "132 19.036145 1.759036 3.508434 \n", + "133 37.774942 2.700696 9.039443 \n", + "134 18.298556 1.779742 3.726720 \n", + "135 57.651203 3.803966 13.288954 \n", + "136 40.999735 3.014562 8.415674 \n", + "\n", + " average_emoji_count average_sentiment sentiment_ratio_negative \\\n", + "0 0.588371 0.095633 0.137295 \n", + "1 0.489704 0.037611 0.202355 \n", + "2 0.117903 0.052846 0.196010 \n", + "3 0.060626 0.020608 0.301387 \n", + "4 0.106902 0.032238 0.296796 \n", + ".. ... ... ... \n", + "132 0.207229 0.090164 0.149398 \n", + "133 0.153132 0.034621 0.225058 \n", + "134 0.136640 0.143438 0.129609 \n", + "135 0.029402 0.049250 0.249983 \n", + "136 0.034948 0.017825 0.294943 \n", + "\n", + " sentiment_ratio_neutral sentiment_ratio_positive \n", + "0 0.529606 0.333100 \n", + "1 0.500905 0.296740 \n", + "2 0.445177 0.358814 \n", + "3 0.315545 0.383068 \n", + "4 0.313480 0.389724 \n", + ".. ... ... \n", + "132 0.513253 0.337349 \n", + "133 0.396752 0.378190 \n", + "134 0.403079 0.467313 \n", + "135 0.278275 0.471743 \n", + "136 0.291236 0.413820 \n", + "\n", + "[137 rows x 13 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"num_comments\"] = num_comments\n", + "data[\"average_word_length\"] = average_word_length\n", + "data[\"average_sentence_length\"] = average_sent_length\n", + "data[\"average_punctuation_count\"] = average_punctuation_count\n", + "data[\"average_emoji_count\"] = average_emoji_count\n", + "data[\"average_sentiment\"] = ave_sentiment\n", + "data[\"sentiment_ratio_negative\"] = sentiment_ratios_neg\n", + "data[\"sentiment_ratio_neutral\"] = sentiment_ratios_neu\n", + "data[\"sentiment_ratio_positive\"] = sentiment_ratios_pos\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Classifiers with Feature Engineering" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# Using TF-IDF\n", + "vectorizer = TfidfVectorizer(min_df=1)\n", + "final_features = vectorizer.fit_transform(data['cleaned_string']).toarray()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "final = pd.DataFrame(final_features)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "#lListing all features\n", + "features = ['average_word_length', 'average_sentence_length', 'average_punctuation_count',\n", + " 'average_emoji_count', 'average_sentiment', 'sentiment_ratio_negative',\n", + " 'sentiment_ratio_neutral', 'sentiment_ratio_positive']" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...569679569680average_word_lengthaverage_sentence_lengthaverage_punctuation_countaverage_emoji_countaverage_sentimentsentiment_ratio_negativesentiment_ratio_neutralsentiment_ratio_positive
00.0002590.0000000.0003150.0000000.0004350.0000000.0003720.0002370.0000850.000149...0.00.011.3707391.2929592.1235770.5883710.0956330.1372950.5296060.333100
10.0004730.0000530.0002220.0000000.0000000.0000840.0000000.0000670.0000000.000000...0.00.017.1952291.5949942.7182890.4897040.0376110.2023550.5009050.296740
20.0002010.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0003970.000000...0.00.018.3866601.7267893.5407010.1179030.0528460.1960100.4451770.358814
30.0005390.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.00.032.3004432.3645545.8706160.0606260.0206080.3013870.3155450.383068
40.0001810.0002630.0000000.0002580.0000000.0000000.0000000.0000000.0000000.000000...0.00.034.8694542.5595886.6242500.1069020.0322380.2967960.3134800.389724
..................................................................
1320.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.00.019.0361451.7590363.5084340.2072290.0901640.1493980.5132530.337349
1330.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.00.037.7749422.7006969.0394430.1531320.0346210.2250580.3967520.378190
1340.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.00.018.2985561.7797423.7267200.1366400.1434380.1296090.4030790.467313
1350.0000000.0002250.0003790.0008830.0000000.0000000.0000000.0000000.0000000.000000...0.00.057.6512033.80396613.2889540.0294020.0492500.2499830.2782750.471743
1360.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.00.040.9997353.0145628.4156740.0349480.0178250.2949430.2912360.413820
\n", + "

137 rows × 569689 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 \\\n", + "0 0.000259 0.000000 0.000315 0.000000 0.000435 0.000000 0.000372 \n", + "1 0.000473 0.000053 0.000222 0.000000 0.000000 0.000084 0.000000 \n", + "2 0.000201 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "3 0.000539 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "4 0.000181 0.000263 0.000000 0.000258 0.000000 0.000000 0.000000 \n", + ".. ... ... ... ... ... ... ... \n", + "132 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "133 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "134 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "135 0.000000 0.000225 0.000379 0.000883 0.000000 0.000000 0.000000 \n", + "136 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "\n", + " 7 8 9 ... 569679 569680 average_word_length \\\n", + "0 0.000237 0.000085 0.000149 ... 0.0 0.0 11.370739 \n", + "1 0.000067 0.000000 0.000000 ... 0.0 0.0 17.195229 \n", + "2 0.000000 0.000397 0.000000 ... 0.0 0.0 18.386660 \n", + "3 0.000000 0.000000 0.000000 ... 0.0 0.0 32.300443 \n", + "4 0.000000 0.000000 0.000000 ... 0.0 0.0 34.869454 \n", + ".. ... ... ... ... ... ... ... \n", + "132 0.000000 0.000000 0.000000 ... 0.0 0.0 19.036145 \n", + "133 0.000000 0.000000 0.000000 ... 0.0 0.0 37.774942 \n", + "134 0.000000 0.000000 0.000000 ... 0.0 0.0 18.298556 \n", + "135 0.000000 0.000000 0.000000 ... 0.0 0.0 57.651203 \n", + "136 0.000000 0.000000 0.000000 ... 0.0 0.0 40.999735 \n", + "\n", + " average_sentence_length average_punctuation_count average_emoji_count \\\n", + "0 1.292959 2.123577 0.588371 \n", + "1 1.594994 2.718289 0.489704 \n", + "2 1.726789 3.540701 0.117903 \n", + "3 2.364554 5.870616 0.060626 \n", + "4 2.559588 6.624250 0.106902 \n", + ".. ... ... ... \n", + "132 1.759036 3.508434 0.207229 \n", + "133 2.700696 9.039443 0.153132 \n", + "134 1.779742 3.726720 0.136640 \n", + "135 3.803966 13.288954 0.029402 \n", + "136 3.014562 8.415674 0.034948 \n", + "\n", + " average_sentiment sentiment_ratio_negative sentiment_ratio_neutral \\\n", + "0 0.095633 0.137295 0.529606 \n", + "1 0.037611 0.202355 0.500905 \n", + "2 0.052846 0.196010 0.445177 \n", + "3 0.020608 0.301387 0.315545 \n", + "4 0.032238 0.296796 0.313480 \n", + ".. ... ... ... \n", + "132 0.090164 0.149398 0.513253 \n", + "133 0.034621 0.225058 0.396752 \n", + "134 0.143438 0.129609 0.403079 \n", + "135 0.049250 0.249983 0.278275 \n", + "136 0.017825 0.294943 0.291236 \n", + "\n", + " sentiment_ratio_positive \n", + "0 0.333100 \n", + "1 0.296740 \n", + "2 0.358814 \n", + "3 0.383068 \n", + "4 0.389724 \n", + ".. ... \n", + "132 0.337349 \n", + "133 0.378190 \n", + "134 0.467313 \n", + "135 0.471743 \n", + "136 0.413820 \n", + "\n", + "[137 rows x 569689 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# merging all features with the above TF-IDF. \n", + "ff = pd.merge(final,data[features],left_index=True, right_index=True)\n", + "ff" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "# first split the dataset into testing and training set:\n", + "# this block is to split the dataset into training and testing set \n", + "X = ff\n", + "y = data['Effectiveness']\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Jared\\anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n", + "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", + "\n", + "Increase the number of iterations (max_iter) or scale the data as shown in:\n", + " https://scikit-learn.org/stable/modules/preprocessing.html\n", + "Please also refer to the documentation for alternative solver options:\n", + " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", + " n_iter_i = _check_optimize_result(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy => 69.05\n", + "\n", + "Logistic Regression Classifier results: \n", + "\n", + " precision recall f1-score support\n", + "\n", + " neg 0.70 0.73 0.71 22\n", + " pos 0.68 0.65 0.67 20\n", + "\n", + " accuracy 0.69 42\n", + " macro avg 0.69 0.69 0.69 42\n", + "weighted avg 0.69 0.69 0.69 42\n", + "\n" + ] + } + ], + "source": [ + "# Logistic Regression\n", + "pipeline = Pipeline([('clf', LogisticRegression(n_jobs=1, C=1e5))])\n", + "\n", + "LRC = pipeline.fit(X_train, y_train)\n", + "\n", + "ytest = np.array(y_test)\n", + "LRC_prediction = LRC.predict(X_test)\n", + "\n", + "print(\"Accuracy => \", round(accuracy_score(LRC_prediction, ytest)*100, 2))\n", + "print(\"\\nLogistic Regression Classifier results: \\n\")\n", + "print(classification_report(ytest, LRC_prediction))" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy => 52.38\n", + "\n", + "SGD Classifier results: \n", + "\n", + " precision recall f1-score support\n", + "\n", + " neg 0.52 1.00 0.69 22\n", + " pos 0.00 0.00 0.00 20\n", + "\n", + " accuracy 0.52 42\n", + " macro avg 0.26 0.50 0.34 42\n", + "weighted avg 0.27 0.52 0.36 42\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Jared\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1248: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "C:\\Users\\Jared\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1248: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "C:\\Users\\Jared\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1248: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n" + ] + } + ], + "source": [ + "# SGD\n", + "SGD = SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, random_state=42, max_iter=5, tol=None)\n", + "\n", + "SGD.fit(X_train, y_train)\n", + "SGD_prediction = SGD.predict(X_test)\n", + "\n", + "print(\"Accuracy => \", round(accuracy_score(SGD_prediction, y_test)*100, 2))\n", + "print(\"\\nSGD Classifier results: \\n\")\n", + "print(classification_report(y_test, SGD_prediction))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Jared\\anaconda3\\lib\\site-packages\\sklearn\\svm\\_base.py:985: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n", + " warnings.warn(\"Liblinear failed to converge, increase \"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy => 59.52\n", + "\n", + "LinearSVC results: \n", + "\n", + " precision recall f1-score support\n", + "\n", + " neg 0.58 0.82 0.68 22\n", + " pos 0.64 0.35 0.45 20\n", + "\n", + " accuracy 0.60 42\n", + " macro avg 0.61 0.58 0.57 42\n", + "weighted avg 0.61 0.60 0.57 42\n", + "\n" + ] + } + ], + "source": [ + "# LinearSVC\n", + "LSVC = LinearSVC()\n", + "\n", + "LSVC.fit(X_train, y_train)\n", + "LSVC_prediction = LSVC.predict(X_test)\n", + "\n", + "print(\"Accuracy => \", round(accuracy_score(LSVC_prediction, y_test)*100, 2))\n", + "print(\"\\nLinearSVC results: \\n\")\n", + "print(classification_report(y_test, LSVC_prediction))" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy => 69.05\n", + "\n", + "Random Forest Classifier results: \n", + "\n", + " precision recall f1-score support\n", + "\n", + " neg 0.65 0.91 0.75 22\n", + " pos 0.82 0.45 0.58 20\n", + "\n", + " accuracy 0.69 42\n", + " macro avg 0.73 0.68 0.67 42\n", + "weighted avg 0.73 0.69 0.67 42\n", + "\n" + ] + } + ], + "source": [ + "# Random Forest\n", + "RFC = RandomForestClassifier(n_estimators = 1000, min_samples_split = 15, random_state = 42)\n", + "\n", + "RFC.fit(X_train, y_train)\n", + "RFC_prediction = RFC.predict(X_test)\n", + "\n", + "print(\"Accuracy => \", round(accuracy_score(RFC_prediction, y_test)*100, 2))\n", + "print(\"\\nRandom Forest Classifier results: \\n\")\n", + "print(classification_report(y_test, RFC_prediction))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Classification without Feature Engineering" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "# function to plot confusion matrix\n", + "def plot_conf_matrix(conf_matrix):\n", + " group_counts = [\"{0:0.0f}\".format(value) for value in\n", + " conf_matrix.flatten()]\n", + " group_percentages = [\"{0:.2%}\".format(value) for value in\n", + " conf_matrix.flatten()/np.sum(conf_matrix)]\n", + " labels = [f\"{v1}\\n{v2}\" for v1, v2 in\n", + " zip(group_counts,group_percentages)]\n", + " labels = np.asarray(labels).reshape(2,2)\n", + " \n", + " sns.heatmap(conf_matrix, annot=labels, fmt='', cmap='Blues')" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "# set vectorizer\n", + "vectorizer = TfidfVectorizer(min_df=3, stop_words=\"english\", sublinear_tf=True, norm='l2', ngram_range=(1, 1))\n", + "\n", + "# split training and test set\n", + "X = data.cleaned_string\n", + "y = data.Effectiveness\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Jared\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1248: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "C:\\Users\\Jared\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1248: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "C:\\Users\\Jared\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1248: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " neg 0.52 1.00 0.69 22\n", + " pos 0.00 0.00 0.00 20\n", + "\n", + " accuracy 0.52 42\n", + " macro avg 0.26 0.50 0.34 42\n", + "weighted avg 0.27 0.52 0.36 42\n", + "\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWQAAAD4CAYAAADbyJysAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAfZElEQVR4nO3deXwV1f3/8dcnC2twaSVhFa0siqjYWhCtChQsIArWDURFRaO1VOlii8UvrijuPy0oIiJVW8SqIGDABRcEtUIREQoIKkgEEqyKIAgk+fz+uBeM4Sa5CTe5c8f308c8uHdmzsy5at89njnnjLk7IiKSfGnJroCIiEQokEVEAkKBLCISEApkEZGAUCCLiARERk3foP6xQzWMQ/by5YIxya6CBFC9DGxfr1GVzNn+3ph9vl8i1Xggi4jUKkvd//BXIItIuFigGr1VokAWkXBRC1lEJCDUQhYRCYi09GTXoNoUyCISLuqyEBEJCHVZiIgEhFrIIiIBoRayiEhAqIUsIhIQGmUhIhIQaiGLiAREmvqQRUSCQS1kEZGA0CgLEZGASOGHeqnbthcRicXS4t8quoxZSzN7zcyWm9kyM7smuv9HZvayma2K/nlgOeV7mdlKM1ttZsPjqboCWUTCxSz+rWJFwB/d/QjgeOC3ZtYeGA7Mcfc2wJzo9zJVsHRgLNAbaA8MjJatkAJZRMIlQS1kd9/g7ouin7cAy4HmQD/g79HT/g70j1G8E7Da3T92953AU9FyFVIgi0i4VKGFbGa5Zraw1JYb+5J2CHAs8G8gx903QCS0gewYRZoD60p9z4/uq5Ae6olIuFRh2Ju7jwfGV3g5syzgWWCYu39t8Y3iiHVSpS9fVSCLSLgkcJSFmWUSCeN/uPtz0d0FZtbU3TeYWVOgMEbRfKBlqe8tgPWV3U9dFiISLokbZWHAo8Byd7+31KHpwODo58HA8zGKLwDamNmhZlYHGBAtVyEFsoiES+JGWZwIXAh0N7PF0a0PMBroaWargJ7R75hZMzPLA3D3ImAo8CKRh4FPu/uyym6oLgsRCZcETZ1293nE7gsG+GWM89cDfUp9zwPyqnJPBbKIhIumTouIBIQWFxIRCQZLUyCLiARCnOOEA0mBLCLhkrp5rEAWkXBRC1lEJCAUyCIiAZGmh3oiIgGRug1kBbKIhIu6LEREAkKBLCISEApkEZGAUCCLiASEpSmQRUQCQS1kEZGAUCCLiARFAvPYzCYCfYFCd+8Q3TcFaBc95QDgK3fvGKPsGmALUAwUuftxld1PgSwioZLgFvIkYAzw+O4d7n5eqXvdA2yuoHw3d/883pspkEUkVBIZyO4+18wOKec+BpwLdE/U/RTINaRFzgFMuOUicn68HyXuTHx2PmMnv85tw/rT5+QO7NxVzCf5n5N7w5Ns3ro92dWVJJn/5lzuGD2KkuISzjzrHIZcnpvsKqW8qqxlYWa5QOm/6ePdfXycxU8CCtx9VTnHHXjJzBx4OJ7rKpBrSFFxCcPvfY7FK/LJalCXt/75F+b8ewVz3lnB//1tOsXFJdx6dT+uvfRUrn8g1lvEJeyKi4u5bdTNPPzIY+Tk5HD+eWfTtVt3DmvdOtlVS21VaCBHQzLeAC5rIDC5guMnuvt6M8sGXjazFe4+t6ILpu6ySAG38fOvWbwiH4Ct23aw4pONNGt8AHPeWUFxcQkA737wCc1zDkhiLSWZln6whJYtW9GiZUsy69ShV5/TeP21OcmuVsozs7i3fbhHBvBrYEp550TfQo27FwJTgU6VXVeBXAsObvojOrZrwYKla763/6J+XXhx/n+TUylJusKCApo0bbLne3ZODgUFBUmsUTjURiADPYAV7p5fTh0amlmj3Z+BU4GllV1UgVzDGtavw+S7L+Pau59lyzff7tn/5yG/ori4hKfyFiSxdpJMju+1L5XH0AZFIgPZzCYDbwPtzCzfzIZEDw2gTHeFmTUzs7zo1xxgnpm9D7wLvODusyu7n/qQa1BGRhqT776cKbMW8vyr7+/ZP+j0zvQ5uQO9r3ggibWTZMvJacLGDRv3fC8sKCA7OzuJNQqHRE6ddveB5ey/OMa+9UCf6OePgWOqej+1kGvQuBsGsfKTjTzw5Kt79vU84Qj+eHEPzh72MNu/3ZXE2kmyHdnhKD79dA35+evYtXMns/Ne4JRuCRtB9YNVS10WNUIt5BpyQsefMKhvZz748DPeeWo4ADeMmc49155D3ToZzHxoKADvfrCGq0c9lcyqSpJkZGRw3YiR/Cb3MkpKiul/5lm0bt0m2dVKeUEM2niZ+979WIlU/9ihNXsDSUlfLhiT7CpIANXL2PeJz4cOeyHuzPnk/50WqPSutIVsZocD/YDmRAY6rwemu/vyGq6biEjVBSpiq6bCPmQz+wvwFJGf+C6wIPp5spkNr6BcrpktNLOFRZ8vS2R9RUQqFOY+5CHAke7+vadPZnYvsAwYHatQ6dkvYemyWPHCTWz5ZgfFJSUUFZfwi0F3xjUNum6dDF55dBh16mSQkZ7O1Ffe49ZxkZExR7dtzt9GDKBu3UyKiksYdtsUFi5bS5djfsL9fz2PnbuKuOi6x/h43efsn1WfJ+64lDN+OzYZP1/iUNk0aHfnjttHMW/uG9SrX49bRo3miPZHVlj2vnvuYv68ubQ7/AhG3X4nADOmT+PrzZsZdOHg2v2BKSIthReor2yURQnQLMb+ptFjPyi9cu/n+AGj+cWgyP8w5ryzgp+dcxudzrudVWsLufbSU/cqs2NnEb1yH6DzeaPpPOB2Tj2hPZ2OOgSAUcP6M2r8LI4fMJpbHprJqGH9Abjmwu4MvHYCI/82g9xzTgLgutxe3DnxxVr5nVJ1u6dBPzhuAlOnv8DsvJl8tHr1986Z9+ZcPl27hhmzXmLkjbdw6803Vlh2y5YtvL/4PZ6ZOoOS4mJWfbiSb7/9lunTpnLugPNr/0emiFRuIVcWyMOAOWY2y8zGR7fZwBzgmhqvXcDFOw36m+07AcjMSCcjI53dD1LdYb+G9QDYP6s+GzZFVvHbVVRM/bqZNKifya6iYg5tcRDNsg9g3n9Wx7y+JF8806Bfe3UOp5/RHzPj6GM6smXL12zaVFhu2bQ0Y9euXbg73+7YQUZGBpMmTuD8Cy4kMzMzSb80+Mzi34Kmwi4Ld59tZm2JzMFuTqT/OB9Y4O7FtVC/wHB3Zjw4FHfn0WfnM/G5+d87flG/Ljzz0qKYZdPSjLf++RcOa9mYh6fMZcHStQBce/czzBj7W27//ZmkpRndLr4HgLsmvsTY6weyfccuhlz/OLf/4UxuenBmzf5A2SexpkF/sGTJ988pLCCnyXfn5OQ0obCgoNyyDRtm0aPnqZx3Vn86Hd+FrEaNWLZ0KVdeNbTmf1AKC2LLN16VjrJw9xLgnVqoS6B1v+Q+NmzaTOMDs5g5bigr12xk/qKPgMqnQZeUOMcPGM3+WfWZcu/ltD+sKf/9aAO555zEn+95jmlzFnNWz2N56IZBnHblGJZ8+BmnDI6E84k/PYwNmzZjGE+MvoRdRcUMv3cqhV9sqbXfLpWLaxp0jCGmZlZh2UuGXM4lQy4H4MaRI7jqd1fz3DP/4u235tGmbTtyr7wqAbUPlxTOY83Ui9fu7oRNX25l+qtL+PmRhwDfTYO+eMSkSq+xeet25i5cxakntI+U7duZaXMWA/Dsy+9x3JGt9ioz/LJe3D5+FiOu6M0t4/KYnLeAqwZ2TcRPkgSKZxp0dk4TCjZ+d05BwUYaZ2fHVXb58sgiVK1aHcKM6dO46977Wb16FWvXrqmBX5Pa0tIs7i1oFMhxaFCvDlkN6u753KPL4Sz7aH1c06APOjCL/bPqA1CvbibdO7dj5ZrIil4bNm3mpJ9FZmZ17dSW1Z9u+l7ZC07vzOw3l/HVlu00qFeHkhKnpMRpUE/9h0ETzzTort26M2P6NNydJe8vJiurEY0bZ8dVduzf7ueqoVdTVFRESXGktzDN0vh2+7fI96VyIGvqdByyf9yIKfdG/rMxIz2dKbMW8vJby1n6/A0xp0E3bbw/D448nzN/9xBNDtqPR26+kPS0NNLSjGdfXsSsNyOr8P32ln9y17Vnk5GRxo4dRQy99bvFo+rXy+SC0zvT96rIjLYHnnyVyXdfxs5dRQy+blLt/g2QSpU3DfrpKZF/pueeN5CTTj6FeXPfoG/vntSrV5+bb72twrK7vTrnFTp0OIrs7BwAju54LGf1P522bdvS7vDDa//HBlwqd1lo6rQkhaZOSyyJmDp99MhX4s6cJTf3CFR8q4UsIqGSyqMs1IcsIqGSyHHIZjbRzArNbGmpfTea2Wdmtji69SmnbC8zW2lmqytaaqI0BbKIhEqCH+pNAnrF2H+fu3eMbnllD5pZOjAW6A20BwaaWftK6x5PjUREUkUip05H3xL9RTWq0QlY7e4fu/tOIou09auskAJZREKllqZODzWzJdEujQNjHG8OrCv1PT+6r0IKZBEJlaq0kEsvFRzdciu/Aw8BhwEdgQ3APbGqEWNfpaM/NMpCREKlKi3f0ksFV6FMwXf3skeAWAvN5AMtS31vQeTlHhVSC1lEQqWml980s6alvp4JLI1x2gKgjZkdamZ1gAHA9MqurRayiIRKIqdEm9lkoCtwkJnlAzcAXc2sI5EuiDXAFdFzmwET3L2PuxeZ2VDgRSAdmOjulb4+SYEsIqGSyHkh7j4wxu5Hyzl3PdCn1Pc8YK8hcRVRIItIqKTyTD0FsoiESgrnsQJZRMJFLWQRkYBQIIuIBEQQF56PlwJZREIlhRvICmQRCRd1WYiIBEQK57ECWUTCJS2FE1mBLCKhood6IiIBkcJ5rEAWkXDRQz0RkYBI4TxWIItIuFjMl3WkBgWyiISK+pBFRAJCoyxERAIikeOQzWwi0BcodPcO0X13AacDO4GPgEvc/asYZdcAW4BioMjdj6u07gmruYhIAJjFv8VhEtCrzL6XgQ7ufjTwIXBdBeW7uXvHeMIYFMgiEjKJfMmpu88Fviiz7yV3L4p+fYfIG6UTQoEsIqFSlRaymeWa2cJSW24Vb3cpMKucYw68ZGb/ife66kMWkVBJr0IfsruPB8ZX5z5mNgIoAv5Rziknuvt6M8sGXjazFdEWd7nUQhaRUElkl0UF9xhM5GHfIHf3WOdE30KNuxcCU4FOlV1XgSwioZJm8W/VYWa9gL8AZ7j7tnLOaWhmjXZ/Bk4FllZa9+pVSUQkmBLZQjazycDbQDszyzezIcAYoBGRbojFZjYuem4zM8uLFs0B5pnZ+8C7wAvuPruy+6kPWURCJZFrWbj7wBi7Hy3n3PVAn+jnj4Fjqno/BbKIhIpWexMRCYh0TZ0WEQmG1I1jBbKIhIzeqSciEhApnMcKZBEJFz3UExEJiBTOYwWyiISLRlmIiASEuiwq0uSwGr+FiMhuqbwehFrIIhIqaiGLiARECnchK5BFJFz0UE9EJCBSOI8VyCISLinchaxAFpFwSeW1LFJ5hIiIyF7SqrBVxswmmlmhmS0tte9HZvayma2K/nlgOWV7mdlKM1ttZsPjrbuISGiYxb/FYRLQq8y+4cAcd28DzIl+L1MHSwfGAr2B9sBAM2tf2c0UyCISKulpFvdWGXefC3xRZnc/4O/Rz38H+sco2glY7e4fu/tO4KlouQopkEUkVKry1mkzyzWzhaW23DhukePuGwCif2bHOKc5sK7U9/zovgrpoZ6IhEpVHuq5+3hgfA1UI1YlvLJCaiGLSKgkuA85lgIzaxq5lzUFCmOckw+0LPW9BbC+sgsrkEUkVKrSZVFN04HB0c+DgedjnLMAaGNmh5pZHWBAtFzFda92lUREAsiq8Fel1zKbDLwNtDOzfDMbAowGeprZKqBn9Dtm1szM8gDcvQgYCrwILAeedvdlld1PfcgiEioZCWxmuvvAcg79Msa564E+pb7nAXlVuZ8CWURCRctviogEhBYXEhEJiBRuICuQRSRcUnlxIQWyiIRKegqPHVMgi0iopMUxnC2oFMgiEiop3GOhQBaRcNEoCxGRgNBDPRGRgEjhPFYgi0i4xLPwfFApkEUkVFJ41JsCWUTCRWtZiIgEROrGsQJZREJGoyxERAIideM4tfu/RUT2kpZmcW8VMbN2Zra41Pa1mQ0rc05XM9tc6pyR+1J3tZBFJFQS1cp095VARwAzSwc+A6bGOPVNd++biHsqkEUkVGpolMUvgY/cfW1NXHw3dVmISKhYVTazXDNbWGrLLeeyA4DJ5RzrYmbvm9ksMztyX+quFrKIhEpVWsjuPh4YX8n16gBnANfFOLwIaOXuW82sDzANaBN3BcpQC1lEQiXdLO4tTr2BRe5eUPaAu3/t7lujn/OATDM7qLp1VyCLSKhUpcsiTgMpp7vCzJpYtEluZp2IZOr/qlt3dVmISKgk8pmemTUAegJXlNp3JYC7jwPOBn5jZkXAdmCAu3t176dAFpFQSeQrnNx9G/DjMvvGlfo8BhiTqPspkEUkVFJ45rQCWUTCxVJ48rQCWURCpQqjJwJHgVxDWhyUxYQ/9SLnwAaUOEyc9QFjn3+PA7Pq8sR1p9EqZz/WFnzNBbe/wFdbdyS7upIk89+cyx2jR1FSXMKZZ53DkMvLm5cg8UrhPNawt5pSVOwMf2Qux17xOKf8fjJX9D2Gww/+EX86txOvL17HUZdN4vXF6/jTuT9PdlUlSYqLi7lt1M08OG4CU6e/wOy8mXy0enWyq5XyzOLfgkaBXEM2fvkNiz8qBGDr9l2sWPcFzX6cRd8uP+HJV/4LwJOv/JfTuxyWzGpKEi39YAktW7aiRcuWZNapQ68+p/H6a3OSXa2UZ1X4K2gUyLXg4Oz96HhYYxas3Ej2AQ3Y+OU3QCS0G+/fIMm1k2QpLCigSdMme75n5+RQULDXZDCpojSLfwsa9SHXsIb1Mpl8fV+uffgNtmzbmezqSIA4e88fSOX3wQVFKr8xRC3kGpSRnsbk6/sy5bUVPP9WpG+w8KttNDmwIQBNDmzIps3bkllFSaKcnCZs3LBxz/fCggKys7OTWKNwUJeFxDRuWE9WrvuCB6Yu2rPvhXc+5oIe7QG4oEd7Zr79cbKqJ0l2ZIej+PTTNeTnr2PXzp3MznuBU7p1T3a1Up66LGQvJxzZjEE92vPBJ5t4Z8wgAG74+3zufnoBT/71NAb/6kjWbdrCoFEzk1xTSZaMjAyuGzGS3+ReRklJMf3PPIvWrau9cqNEBbHlGy/bh3Uw4lK/9301ewNJSV/O+H2yqyABVC9j39N03qov486cX7Q5MFDprRayiIRKoBK2iqrdh2xml1RwbM9rUYrWvV3dW4iIVFkNLFBfa/blod5N5R1w9/Hufpy7H5fRsss+3EJEpIpqYIX62lJhl4WZLSnvEJCT+OoEW1qaMf+B81n/+VbOuvF5nhjehzYtDgTggKy6fLV1B8cP/cde5fZvWJeHhvWkfasf4+5ced/L/HvFBm4bchJ9Ov+EnUXFfLJhM7n3vsTmb3bQpX0z7h/anZ27irlodB4fb9jM/g3r8sR1fTjj+lhvIZcgqGxdCnfnjttHMW/uG9SrX49bRo3miPZHVlj2vnvuYv68ubQ7/AhG3X4nADOmT+PrzZsZdOHg2v2BKSKRD/XMbA2wBSgGitz9uDLHDbgf6ANsAy5290VlrxOvyvqQc4BfAV+WrSfwVnVvmqqG9juWlZ9+QaMGdQC4cHTenmOjLzuZzdtiLxJ095VdeWnhGs4fNZPMjDQa1M0EYM57a/m/x+ZRXOLceukvuPa8n3P9xHlc8+ufMvDWmbTK2Y/c045h+IS5XDewM3dOebfmf6RUy+51KR5+5DFycnI4/7yz6dqtO4e1br3nnHlvzuXTtWuYMeslPljyPrfefCP/eOpf5ZbNzsnh/cXv8czUGVz35z+y6sOVtDy4FdOnTeXBhyck78cGXA30RHRz98/LOdabyEtN2wCdgYeif1ZLZV0WM4Esd19bZlsDvF7dm6ai5gdl0avToTz24tKYx886uS1Pv75yr/2NGtThFx2aMylabldRCZu/iQT3nEWfUlwSeSD87ooNND8oa8859etk0KBuBruKSzi06f40OyiLeR98VhM/TRIgnnUpXnt1Dqef0R8z4+hjOrJly9ds2lRYbtm0NGPXrl24O9/u2EFGRgaTJk7g/AsuJDMzM0m/NPhquceiH/C4R7wDHGBmTat7sQoD2d2HuPu8co6dX92bpqK7rujKiEffpKRk7xE1J3ZoTsGX2/ho/Vd7HTu0yf58vnk74/9wKm+PGcSD1/SgQd29/8PkolM78OKCNZF7Pb2Asdf0YGj/nzJuxmJuGnwiNz3+g/sPkpQSz7oUhYUF5DT57pycnCYUFhSUW7Zhwyx69DyV887qT/PmLchq1IhlS5fSrXuPmv9BqawKiVx6AEJ0K7v+qQMvmdl/YhwDaA6sK/U9P7qvWjTsLQ69Ox1K4VfbeG91IScd1WKv4+d2bce/3lgRs2xGehodW2fzh4deY8HKjdx9RVf+dO7PufmJ70af/HlAJ4qLS3jqtcg1lny8iVN+/xQQCfsN/9uKGTwxvA+7iksY/shcCr/SlOsgiWtdihhj/s2swrKXDLmcS4ZcDsCNI0dw1e+u5rln/sXbb82jTdt25F55VQJqHy5VWcvC3ccD4ys45UR3X29m2cDLZrbC3eeWOh7rZtWee6Gp03Ho0r4ZfY//CSsmXcrjw/vQ9ZiWTLy2FwDpaUa/E1rzzNwPY5b97PMtfPb5FhasjKxZMHXeKjq2/m69gkE92tOn06FcfOesmOWHD+zM7ZP/zYhBx3PLk28z+dXlXNWvY2J/oOyzeNalyM5pQsHG784pKNhI4+zsuMouXx5ZsrVVq0OYMX0ad917P6tXr2Lt2jU18GtSWyK7LNx9ffTPQmAq0KnMKflAy1LfWwDrq1t3BXIcRk6aT+sLJ3D4xRO5aHQer7+/jkvvmg1A92MP5sP8L/ns860xyxZ8uY38TVtp0zwyGqNrx5as+PQLAHr+rBV/POc4zr5pOtt3FO1V9oIe7Zn97id8tXUHDepmUuJOifueh4ISHPGsS9G1W3dmTJ+Gu7Pk/cVkZTWicePsuMqO/dv9XDX0aoqKiigpLgYgzdL4dvu3tfYbU0aCEtnMGppZo92fgVOBsg+RpgMXWcTxwGZ331DdqqvLYh+dc0q7vR7mNf1RQx4c1pMzR04D4A8PvcZjf+5Nncw01mzYTO59LwFw31XdqZuZzsxRvwbg3RUbuXpM5EFQ/boZXNCjPX1HPAfAA88tYvKI09lZVMzgO/KQYClvXYqnp0wG4NzzBnLSyacwb+4b9O3dk3r16nPzrbdVWHa3V+e8QocOR5GdHRlpenTHYzmr/+m0bduWdocfXvs/NuASOOwtB5ga7T7KAP7p7rPN7EoAdx8H5BEZ8raayLC3cifMxUNrWUhSaC0LiSURa1ks/nRL3JnT8eBGgZoeohayiIRKAGdEx02BLCKhksrLbyqQRSRU1EIWEQmIFM5jBbKIhEwKJ7ICWURCRX3IIiIBEcSXl8ZLgSwi4aJAFhEJBnVZiIgEhIa9iYgERArnsQJZREImhRNZgSwioVKVBeqDRoEsIqGSunGsQBaRsEnhRFYgi0iopPKwN73CSURCxSz+reLrWEsze83MlpvZMjO7JsY5Xc1ss5ktjm4j96XuaiGLSKgk8JleEfBHd18Ufbfef8zsZXf/b5nz3nT3vom4oQJZREIlUV0W0ZeVboh+3mJmy4HmQNlAThh1WYhIqFSly8LMcs1sYaktN/Y17RDgWODfMQ53MbP3zWyWmR25L3VXC1lEQqUq7WN3Hw+Mr/B6ZlnAs8Awd/+6zOFFQCt332pmfYBpQBuqSS1kEQmVRD3Ui1zLMomE8T/c/bmyx939a3ffGv2cB2Sa2UHVrbsCWURCxqqwVXAVMwMeBZa7+73lnNMkeh5m1olIpv6vujVXl4WIhEoCF6g/EbgQ+MDMFkf3/RU4GMDdxwFnA78xsyJgOzDA3b26N1Qgi0ioJGrYm7vPo5JmtLuPAcYk5o4KZBEJmVSeqadAFpFwSd08ViCLSLikcB4rkEUkXFJ4OWQFsoiEi6VwIiuQRSRUUjeOFcgiEjIp3EBWIItIuGjYm4hIQKiFLCISEApkEZGAUJeFiEhAqIUsIhIQKZzHCmQRCZkUTmQFsoiEivqQRUQCIoEL1Nc6vcJJRMIlMW9wilzKrJeZrTSz1WY2PMZxM7MHoseXmNlP96XqCmQRCRWrwl8VXscsHRgL9AbaAwPNrH2Z03oTect0GyAXeGhf6q5AFpFQSeBbpzsBq939Y3ffCTwF9CtzTj/gcY94BzjAzJpWt+413oe8fdbvU7hHJ7HMLNfdxye7HhIs+vciseplxP9Uz8xyibRsdxtf6p9Fc2BdqWP5QOcyl4h1TnNgQ9wVLkUt5NqVW/kp8gOkfy+SxN3Hu/txpbbS/8cYK9jLvlE6nnPipkAWEYktH2hZ6nsLYH01zombAllEJLYFQBszO9TM6gADgOllzpkOXBQdbXE8sNndq9VdARqHXNvUTyix6N+LAHL3IjMbCrwIpAMT3X2ZmV0ZPT4OyAP6AKuBbcAl+3JPc692d4eIiCSQuixERAJCgSwiEhAK5FpS2RRM+eExs4lmVmhmS5NdFwkGBXItiHMKpvzwTAJ6JbsSEhwK5NoRzxRM+YFx97nAF8muhwSHArl2lDe9UkRkDwVy7Ujo9EoRCScFcu1I6PRKEQknBXLtiGcKpoj8wCmQa4G7FwG7p2AuB55292XJrZUkm5lNBt4G2plZvpkNSXadJLk0dVpEJCDUQhYRCQgFsohIQCiQRUQCQoEsIhIQCmQRkYBQIIuIBIQCWUQkIP4/EcUDemoGR3AAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## Multinomial Naive Bayes Classifier\n", + "MNB = Pipeline([('vect', vectorizer),\n", + " ('clf', MultinomialNB()),\n", + " ])\n", + "\n", + "MNB.fit(X_train, y_train)\n", + "\n", + "y_pred_MNB = MNB.predict(X_test)\n", + "\n", + "print(classification_report(y_test, y_pred_MNB))\n", + "\n", + "# plot confusion matrix\n", + "conf_matrix = confusion_matrix(y_test, y_pred_MNB)\n", + "plot_conf_matrix(conf_matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " neg 0.71 0.68 0.70 22\n", + " pos 0.67 0.70 0.68 20\n", + "\n", + " accuracy 0.69 42\n", + " macro avg 0.69 0.69 0.69 42\n", + "weighted avg 0.69 0.69 0.69 42\n", + "\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVoAAAD8CAYAAAA2Y2wxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAcvElEQVR4nO3deXwV1f3G8c/3ZoEEZA2bggiKKK7VFLeKCKKoKIpioSpUsdEiWoogUBBQsKAgVuuCQSiKGleo1J8bRRG1qCxuKCAuLMFIEFS2BBJyfn8kpQFC7r3hzr3D8Lx9zYvcuTNnvgP06eHMmRlzziEiIt4JJboAEZGgU9CKiHhMQSsi4jEFrYiIxxS0IiIeU9CKiHhMQSsisg9mNtXM8s1sSbl1o8xsrZl9UrZcFK4dBa2IyL5NAzpXsP5+59zJZcur4RpR0IqI7INzbh6wcX/bSY5BLZVK+1U/3Xome1k+575ElyA+dHi9ara/bUSTOYWfPHwjkFVuVbZzLjuCXfuZWS9gIXCbc+6nyjZWj1ZEgsVCES/OuWznXGa5JZKQfRQ4EjgZyAPC9ho879GKiMSV7XenuFLOuXX/O5RNBl4Jt4+CVkSCxbz9h7qZNXHO5ZV9vBxYUtn2oKAVkaCJYY/WzHKA9kCGmeUCI4H2ZnYy4ICVwI3h2lHQikiwhJJi1pRzrmcFq6dE246CVkSCxeOhg6pQ0IpIsHh8MawqFLQiEizq0YqIeEw9WhERj6lHKyLisRjOOogVBa2IBIt6tCIiHgtpjFZExFvq0YqIeEyzDkREPKaLYSIiHtPQgYiIxzR0ICLiMfVoRUQ85sMerf+iX0Rkf0TxzrCwTZlNNbN8M9vrLQpmNtDMnJllhGtHQSsiwRJKinwJbxrQec+VZtYM6ASsjqikaOoXEfG9GPZonXPzgI0VfHU/cDulr7MJS2O0IhIsHo/RmtmlwFrn3KcW4bEUtCISLFHMOjCzLCCr3Kps51x2JdunA8OA86MpSUErIsESRY+2LFT3GawVOBJoAfy3N9sUWGxmbZ1zP+xrJwWtiASLh/NonXOfAw13HcpsJZDpnPuxsv10MUxEAsVCoYiXsG2Z5QDzgdZmlmtmfapSk3q0IhIokV6gioRzrmeY74+IpB0FrYgEi/9uDFPQikiwxLJHGysKWhEJFAWtiIjHQhFc5Io3Ba2IBIv/OrQKWhEJFg0diIh4TEErIuIxBa2IiMcUtCIiHrOQglZExFPq0YqIeExBKyLiNf/lrIJWRIJFPVoREY8paA8ik0ZezYXtjmf9xs1kdv8rAMNuvIjru53J+p+2ADDyoVm88d6XiSxTEmjNqu8Yc8ftuz7/sDaX3n/oS7ce1yawqgOfnnVwEJn+rw+Y9Nw7PD66127r//7U2/xt+pwEVSV+0qx5Cx578gUAdu7cSc9Lz+OsczomuKoAiGGH1symAl2AfOfc8WXrRgNdgRIgH/i9c+77ytrxX/QHxPuLv2HjL9sSXYYcID5e+CFNDmtGoyaHJrqUA56ZRbxEYBrQeY91451zJzrnTgZeAUaEa0RBG2c39WjHR88NZdLIq6lzSFqiyxGfmDv7dc7tdGGiywiEWAatc24esHGPdZvKfawBuHDtKGjjaPIL79LmklGc1mMcP/y4iXEDuiW6JPGBoqIi5r83l3M6np/oUgIhmqA1sywzW1huyYrwGHeb2RrgatSj9Zf8jZspKXE455g6430yj2+e6JLEBxbMf4+jWh9L3Xr1E11KIFjIIl6cc9nOucxyS3Ykx3DODXPONQOeBvqF215BG0eNM2rt+rlrh5P48pu8BFYjfvH27Nc0bBBDMR6jDecZ4IpwG2nWgUeeGPt7zj61FRl1avL166MZPelV2p3aihNbN8U5x6q8jdwyJifRZUqCFRYWsOij+fQffEeiSwkMr+fRmlkr59yKso+XAsvC7aOg9UjvodP2WvfEP+fHvxDxterV05jxxruJLiNQYhm0ZpYDtAcyzCwXGAlcZGatKZ3etQq4KVw7YYPWzI6hdM7YYZReXfsemOWcW1rl6kVEvBLDDq1zrmcFq6dE206lY7RmNhh4ltLSPwIWlP2cY2ZDKtlv15W84h+/iLYmEZEqi/MYbUTC9Wj7AMc554rKrzSzicAXwLiKdiq7cpcNkParfmHnmPldtdRk/j2lP6mpySQnJTHz3x8zZtKrEd1S26p5Q6bfc/2uzy0Oq8/oR/+Ph56ZS7fzfsWwmy7imBaNOPvaCSz+cjUAZ5zUkgf+8lt2FBXTa+g/+HbNj9Sumcb0e67n0psfjt+JS6UmjBnBh/95hzp16zH56Zm71v/zhWd4+cUckpKSOe3Ms/lDvwF77btl8yYmjh3Fym++BjMGDruLNiecxJjhg1izeiUAWzdvpsYhh/DYky+w5NOPeXD8GFJSU/nLnfdwWLPD2bJ5E2PuuJ2x9z/qy/v7EyV0AD74uwQ4lNJxiPKalH13UNi+o5jOWQ+ytWAHyckh3po6gDffLw3UcLfUrliVz+k9Sv//KBQyvnnjbma9/SkAX3zzPT1um8xDw3f/18mfru1Az0GP07xJfbK6n82QiTMZmtWZe6e+4dEZSlWcf/GldO3eg3vvGrZr3SeLPuI/897msekvkZqayk8bN1S47yP330Pm6Wcx4q8TKSoqYnthAQDDx4zftc2kBydQo0ZNAF7MeZIRYyeyLu97/jXzeW66dSBP/SObnr1uUMjuwY+/H+GCtj8wx8xWAGvK1h0OHEUEc8eCZGvBDgBSkpNITk7Cueg76ue2bc13uetZnfcTAMu/W1fhdkXFO0mrlkJ6WgpFxTtp0TSDQxvW4b1FX1f9BCTmTvxVJj/krd1t3b9mPE+Pa/uQmpoKUOHc2K1bt/D5J4sYdMcYAFJSUkhJSdltG+cc8+a8wb0PPQ5AcnIyO7ZvZ3thIcnJyXyfu4YN6/M56ZRML07tgObDnK08aJ1zr5vZ0UBbSi+GGZALLHDO7YxDfb4RChn/eWYwRzZrwGPPzWPBklWcf9Zx3NSjHb/r0pbFX65myMQZ/Ly5YJ9tdL/gVJ5/fVHYY42f+iYPD+9JwfYi+gx/krEDLufOR16J5emIR3LXrOLzTxfxj8ceJCW1Gjfechut2xy/2zZ5a3OpXace48fcwbcrvqLVMcfS98+DSUtL37XN558sok69+jRtVnpTS49efbh/3J1Uq1adwSPvJvvvE+mddXNcz+1A4ccebdgbFpxzJc65D5xzLznnXiz7+aAKWYCSEsfpPcZx1AXDyTy+OW2ObBLVLbUpyUlcfM4JzJj9cdhjffbVWs7pfR+dsx7kiKb1yVv/C4Yxfdx1TB3Ti4b1DonlqUkMlewsZsvmzTz4+NNk9RvAmOED9/rXz86dO1nx1VIu6XYVk558nuppaTz35NTdttnzJoajjj6Gvz/+NBMenkLe2lzqZzQA5xgzfBDjRg3d5xDFwcgs8iVedGdYlH7ZUsC8hSs4/8w2Ud1Se8Fv2vDJsjXkb9wc1fGG3NCZsdmvMezGCxk96VVyXl1A357t9/MsxCsZDRrxm/YdMTOOOe4ELBTil59/2m2bBg0b0aBBI4497kQA2p3biRVf/W+25M7iYt6bO4f2512wV/vOOZ6eNpmrr7uR6VMm0euGvnS84GJmPv+0tyd2AAmFLOIlbjXF7UgHsIy6Nalds/RJW9WrpdDhtNYsX7kuqltqr+qcGdGwQXnXXHIar7/7BT9vLiC9eiolJY6SEkd69ZTwO0tCnNmuAx8v/AiA3NUrKS4qonadurttU69+Bg0aNWLNqu+A0kckNj+i5a7vFy/4gGbNW9CgYeO92n/z1VmcdubZHFKrFtsLCwmFDAuF2F5Y6OFZHVj8GLS6MywCjTNqMfmua0kKhQiFjJdmL+a1d5cwZXSvCm+pbdKgNo+M+B2X3/IoAGnVU+hw2jH02+OW20vPPZGJg7uTUbcmMx68ic+Wr901fSutegrXXHIaXfo+BMCDT71FzoQb2FFUXOFdZxJ/d4+4nc8WL+SXn3+m56Xn0euGvnS+5HLuu3sEf7j6cpKTUxh0xxjMjB/X5zNx7Cj+OvERAG4eMJSxo4ZSXFREk8OaMnDY6F3tvv3vih+ZWFhYwOxXZzHugUkAXNHzWu4cOoCUlBT+ctc98TnpA4APh2ixqlw9j0YQ5tFK7C2fc1+iSxAfOrxetf2OyRNH/DvizPnsrvPiEsvq0YpIoPhx1oGCVkQCxYc5q6AVkWA5EG/BFRE5oGjoQETEYz7MWc2jFZFgieVjEs1sqpnlm9mScuvGm9kyM/vMzGaaWZ1w7ShoRSRQYnwL7jSg8x7rZgPHO+dOBL4ChoZrREErIoESyx6tc24esHGPdW8654rLPn4ANA3XjoJWRAIlmltwy78NpmzJivJw1wOvhdtIF8NEJFCiuRhW/m0w0R/HhgHFQNgn+ihoRSRQ4jG9y8x6A12Aji6C5xgoaEUkULzOWTPrDAwGznHObYtkHwWtiARKLHu0ZpYDtAcyzCwXGEnpLINqwOyyY33gnLupsnYUtCISKLEMWudczwpWT4m2HQWtiASKnnUgIuIxP96Cq6AVkUDRQ2VERDzmw5xV0IpIsIR8mLQKWhEJFF0MExHxmA9zVkErIsGii2EiIh7zYc4qaEUkWAz/Ja2CVkQCRWO0IiIe06wDERGPaR6tiIjHfJizCloRCRY/Tu/SyxlFJFBi+bpxM5tqZvlmtqTcuu5m9oWZlZhZZiQ1KWhFJFCSzCJeIjAN6LzHuiVAN2BepDVp6EBEAiXGb1iYZ2ZH7LFuabTHUY9WRAIlZJEvZpZlZgvLLVle1KQerYgESjQ9TedcNpDtXTWlFLQiEig+nHSgoBWRYPHj9C4FrYgESlIMb8E1sxygPZBhZrnASGAj8HegAfB/ZvaJc+6CytpR0IpIoMSyP+uc67mPr2ZG046CVkQCRc86EBHxmA9zVkErIsGii2EiIh7zYc4qaEUkWGI56yBWFLQiEigH5dDBTwse8voQcgCq2+3RRJcgPlQw64/73YYfH+CiHq2IBMpB2aMVEYknHw7RKmhFJFh0MUxExGM+zFkFrYgEiw+HaBW0IhIsetaBiIjHNL1LRMRjPuzQ+jL8RUSqLClkES/hmNlUM8s3syXl1tUzs9lmtqLs17rh2lHQikigRPMW3AhMAzrvsW4IMMc51wqYU/a58pqiPAcREV8LmUW8hOOcm0fpq2vK6wo8UfbzE8BlYWuK8hxERHzNLJrFssxsYbklK4JDNHLO5QGU/dow3A66GCYigRLNDQvOuWwg27NiyqhHKyKBYlH8V0XrzKwJQNmv+eF2UNCKSKAkhyJfqmgW0Lvs597Ay2FrqvKhRER8KJaPSTSzHKA9kGFmucBIYBzwvJn1AVYD3cO1o6AVkUCJ5UNlnHM99/FVx2jaUdCKSKD48c4wBa2IBIoeKiMi4rEkH17iV9CKSKCEqj5tyzMKWhEJFB+OHChoRSRY9CobERGP6WKYiIjHfJizCloRCRa9blxExGM+nN2loBWRYInlsw5iRUErIoHiv5hV0IpIwGjWgYiIx/wXswpaEQmYkA9nHfjxAp2ISJWFoljCMbM/mdkSM/vCzPrvT00iIoFhZhEvYdo5HvgD0BY4CehiZq2qUpOCVkQCxaJYwjgW+MA5t805Vwy8A1xelZoUtCISKNH0aM0sy8wWlluyyjW1BGhnZvXNLB24CGhWlZp0MUxEAiUpiuldzrlsIHsf3y01s3uA2cAW4FOguCo1qUcrIoESw6EDnHNTnHOnOOfaARuBFVWpST1aEQmUWN6vYGYNnXP5ZnY40A04oyrtKGhFJFBi/Cqbl8ysPlAE3Oyc+6kqjShoRSRQYtmjdc6dHYt2FLQiEijmw5twFbQiEijRzDqIF806iJNNmzZxW/9b6dqlM5ddciGffvJxokuSBJh0a3tWPfl7Fv79t3t91/+ykyiY9UfqH1I9AZUFh1nkS7woaOPk3rF3c9ZvzublV17nhZdepkXLIxNdkiTA9DnL6Trqlb3WN82oQYeTm7I6f3MCqgoWBe1BasuWLSxatIDLr7gSgJTUVGrVqpXgqiQR3v8ij41btu+1/t4+ZzFs2gc45xJQVbBYFP/Fi8Zo4yB3zRrq1q3HiGFDWb58GW2OO47bhwwjPT090aWJD1zc9gi+37CVz1duSHQpgeDDpySqRxsPO3cWs2zpl3Tv0ZPnX/onaWlpTH28wrv+5CCTlprM4O6ncNczCxJdSmCEzCJe4lZT3I50EGvUqDGNGjXmxBNPAqDT+Z1ZtvTLBFclftCySS2aN6rFRw90Z9nkqzksoybz/3YljeqkJbq0A5aGDg5SGQ0a0KhxY1Z+9y1HtGjJhx/Mp+WRuhgm8MWqjTTvNW3X52WTr+asAS+xYXNh4oo6wPlx6EBBGydD/nIHQwcPpKioiKZNm3HXmLGJLkkS4ImB53H28YeSUas6X0+9ltE5C3hi9rJElxUofrxhwby+yllYjC6jyl7qdns00SWIDxXM+uN+p+R7K36KOHN+06puXFJZPVoRCRT/9Wf342KYmV1XyXe7nlo+ZbKurotI/CSZRbzEy/70aO8E/lHRF+WfWq6hAxGJKx92aSsNWjP7bF9fAY1iX44/jRg+lHnvzKVevfrMeHn32yef+McUJk64l7nvzadu3Xq7ffdDXh7Dht7Ohg0/Yhbiyu5XcfW1vQFYvmwZY+4aybZt2zj00MMYe+8EatasyceLF3H36FGkpqQybvxEDm/enE2bNnH7bX/m0ezHw765U+KnWkoS/x7bldSUJJKTQsx8/1vG5CxgxNW/pstpLSgpcaz/pYCsB94ib+O2iPYF9rn/Gcc25oE/tmNH0U56TZjNt3mbqF0jlemDOnHpqP9LxG+BLx1wF8PMbB1wAbDnw24N+I9z7tBwBwhCj3bRwgWkp6czbOjg3YL2h7w8Ro0YzsrvviXnhZf2Ctr16/P5cf16jm1zHFu3bqFH9yv424MPc+RRR/G7q65gwKDBZP66LTNnvMja3Fz63dqfP/+pH/0HDOT7tWt5/713GXj7ECbcO47253Yg89dt433qngnKxbAa1ZPZWlhMclKIt8ZdxsDH32fp6o1sLigCoG+XEzimWV1ufXReRPt+tHwdh6SlVLj/s0MvYNi0D2je6BDOP6UZQ6bOZ9z1Z/DKhyt574u8uJ63V2JxMeyjb3+JOHPatqxd6fHM7M/ADYADPgeuc85FPfcu3BjtK0BN59yqPZaVwNxoD3agOjXz19SqXXuv9ePvGcufbxu0z15mgwYNObbNcQDUqFGTli1bkp+/DoCVK7/j1MxfA3DGGWcxZ/abACQnJ7O9sJDCwgKSk5NZs3o1+fnrAhWyQbK1sPRdfSlJIZKTQzjndoUkQHr15H32NCraF9jn/kXFJaRVSyK9WjJFxSW0aFyLQ+vVCEzIxkqs3hlmZocBtwKZzrnjgSSgR1VqqnTowDnXp5LvfleVAwbF3Lfm0LBRQ1ofc0xE269dm8uypUs5oezusKNaHc3ct+dwbofzePON1/nhh9L/sfS54UbuGjWCatWq8ddx47lvwj3cfMufPDsP2T+hkPGfiVdyZJPaPPbqEhZ8lQ/AqGvacvW5rfll2w46D3s5qn33tf/4Fxfz8M3tKdheTJ/75zD2ujO482nduruX2I4cJANpZlYEpAPfV6UR3YJbBQUFBUzOnkTffpEF4LatW7mt/60MGvIXatasCcCdo+/m2Zxn6NG9G9u2bSUlJRWAY449lqdynmfKtOnk5q6hQYOGOOcYdFt/hg4eyIYff/TsvCR6JSWO0/u/wFHXP0lmq4a0Obx0+GjUUx/Rqs90nn3nK266+ISo9t3X/p99t4FzBs2g8/BZHNG4Fnkbt2EG0wd1YuqAjjTUbbtAdM86KD9DqmzJ+m87zrm1wARgNZAH/OKce7NKNcXm1A4uuWtWs3ZtLld168qFnTqwbt0P9LiyGz+uX7/XtkVFRQzofysXXXwJ53U6f9f6Fi2P5LHJU3n2hRl0vuhimjZrttt+zjmyH3uUG2/qy2OPPETfm2+hS5dLeebp6Z6fn0Tvl607mLfke84/Zfc/x+ffWcFlZ7as0r6V7T/kqlMZ+9xChvXIZPQzC8iZ+xV9u1Qc6AebaIYOnHPZzrnMcsuu+ahmVhfoCrQADgVqmNk1ValJQVsFrY5uzdx35/Pa7Ld4bfZbNGrUmGdfnEFGgwa7beecY9SIYbRs2ZJev9992vGGDaWPxCspKWHyY4/S/be7D/3M+udM2rU7h1q1a1NQWIiFQlgoRGFBgbcnJxHLqFWd2jVK/yVSPTWJDic1ZXnuzxzZ5H/j+Re3PYKvcvd+ceq+9gXC7n9Nh9a8vnAVP2/dQXq1ZEqco6TEkV5N9x8BsRukhfOA75xz651zRcAM4MyqlKQ/mQgMHjiAhQs+4ueff6JTh3b88eZb6HZF9wq3zc9fx50jhvPwpMl8vHgRr8x6mVZHH81V3boCcEv/AZzd7hxef/UVns15BoCO53Xissuv2NVGQUEBs16eyaTJUwHo1fs6but/KykpKYwbf5/HZyuRalwvncn9O5AUChEy46X3vua1havIGXIBrQ6rQ4lzrM7fzK2PlM44aFIvnUf6tefyu17d574AY3qfXuH+UPpYxWs6tKbLyNLZLw++/Bk5Qy5gR3EJvSfMjv9vgg/FcHrXauB0M0sHCoCOwMIq1aRnHUgiBGV6l8RWLKZ3fbJ6c8SZc/Lhh4Sb3nUn8FugGPgYuME5t/crMsJQj1ZEAiWW9/Q450YCI/e3HQWtiASKH+8MU9CKSKD48S51Ba2IBIoPc1ZBKyIB48OkVdCKSKBojFZExGN6OaOIiNcUtCIi3tLQgYiIxzS9S0TEYz7MWQWtiASMD5NWQSsigRLy4diBglZEAsV/MaugFZGg8WHSKmhFJFD8OL1Lr7IRkUAxi3ypvB1rbWaflFs2mVn/qtSkHq2IBEqsroU555YDJ5e2aUnAWmBmVdpS0IpIoHg0dNAR+MY5t6oqOytoRSRQPJrd1QPIqerOGqMVkUCJ5m3jZpZlZgvLLVl7tWeWClwKvFDVmtSjFZFAiaZH65zLBrLDbHYhsNg5t66qNSloRSRgYj520JP9GDYABa2IBEwsH/xtZulAJ+DG/WlHQSsigRLLi2HOuW1A/f1tR0ErIoHixzvDFLQiEiz+y1kFrYgEiw9zVkErIsHiw8fRKmhFJFjMh0mroBWRQPFfzCpoRSRgfNihVdCKSLBoepeIiMfUoxUR8ZiCVkTEYxo6EBHxmHq0IiIe82HOKmhFJGB8mLQKWhEJFI3Rioh4LJYP/o4VvZxRRIIlmrczhmvKrI6ZvWhmy8xsqZmdUZWS1KMVkUCJ8dDBA8Drzrkry96Gm16VRhS0IhIosZreZWa1gHbA7wGcczuAHVVqyzkXm6okLDPLKnu9scgu+nuROGaWBWSVW5X93z8LMzuZ0leRfwmcBCwC/uSc2xr1cRS08WNmC51zmYmuQ/xFfy/8ycwygQ+As5xzH5rZA8Am59wd0bali2EiIhXLBXKdcx+WfX4ROKUqDSloRUQq4Jz7AVhjZq3LVnWkdBgharoYFl8ah5OK6O+Ff90CPF024+Bb4LqqNKIxWhERj2noQETEYwpaERGPKWjjxMw6m9lyM/vazIYkuh5JPDObamb5ZrYk0bWItxS0cWBmScDDwIVAG6CnmbVJbFXiA9OAzokuQrynoI2PtsDXzrlvy27jexbomuCaJMGcc/OAjYmuQ7ynoI2Pw4A15T7nlq0TkYOAgjY+KnrMhebViRwkFLTxkQs0K/e5KfB9gmoRkThT0MbHAqCVmbUou8OkBzArwTWJSJwoaOPAOVcM9APeAJYCzzvnvkhsVZJoZpYDzAdam1mumfVJdE3iDd2CKyLiMfVoRUQ8pqAVEfGYglZExGMKWhERjyloRUQ8pqAVEfGYglZExGP/D3j3aGJn20cmAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## Bernoulli Naive Bayes Classifier\n", + "BNB = Pipeline([('vect', TfidfVectorizer(min_df=6)),\n", + " ('clf', BernoulliNB()),\n", + " ])\n", + "\n", + "BNB_train = BNB.fit(X_train, y_train)\n", + "\n", + "y_pred_BNB = BNB_train.predict(X_test)\n", + "\n", + "print(classification_report(y_test, y_pred_BNB))\n", + "\n", + "# plot confusion matrix\n", + "conf_matrix = confusion_matrix(y_test, y_pred_BNB)\n", + "plot_conf_matrix(conf_matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " neg 0.76 0.73 0.74 22\n", + " pos 0.71 0.75 0.73 20\n", + "\n", + " accuracy 0.74 42\n", + " macro avg 0.74 0.74 0.74 42\n", + "weighted avg 0.74 0.74 0.74 42\n", + "\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVoAAAD8CAYAAAA2Y2wxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAa4UlEQVR4nO3de5xN9f7H8ddnzx73DBlCKXelUmpyqZNbKcr9VIc45NIgKkqlOJxSUXSVaJQIuSREHZWcSnekkk5Hd7dk5BYxGPP9/TFz/FzG3Ow1e83yfnqsh5m19vruz/Lg7Tvf9V3fbc45RETEO6FoFyAiEnQKWhERjyloRUQ8pqAVEfGYglZExGMKWhERjyloRUSOw8wmmVmyma0+av+tZrbGzL4xs0eza0dBKyJyfJOBFofvMLOmQFugjnPuXGBMdo0oaEVEjsM5txTYdtTuvsAo59y+jNckZ9dO2IPajlC0bn89eibH+PHdx6NdgvhQxVKF7ETbyE3mpHw5rjeQeNiuJOdcUjan1QQuN7OHgBRgkHNueVYneB60IiL5ynL+g3pGqGYXrEcLA6WBBsAlwGwzq+qyWM9AQwciEixmOd/yZgMw16VbBqQB8VmdoKAVkWCxUM63vJkPNAMws5pAIeD3rE7Q0IGIBEvee6qZNGUzgCZAvJltAIYDk4BJGVO+9gPdsho2AAWtiARNKCZiTTnnOh3nUJfctKOgFZFgyfuQgGcUtCISLBEcOogUBa2IBIt6tCIiHlOPVkTEY+rRioh4LIKzDiJFQSsiwaIerYiIx0IaoxUR8ZZ6tCIiHtOsAxERj+lmmIiIxzR0ICLiMQ0diIh4TD1aERGPqUcrIuIx9WhFRDymWQciIh5Tj1ZExGMaoxUR8Zh6tCIiHlOPVkTEY+rRioh4y0IKWhERT5mGDkREPOa/nFXQikiwqEcrIuIxBa2IiMdCuhkmIuIx/3VoFbQiEiwaOhAR8Zgfg9Z/gxkiIifAzHK85aCtSWaWbGarMzk2yMycmcVn146CVkQCJZJBC0wGWmTyHpWA5sC6nDSioBWRQLGQ5XjLjnNuKbAtk0NPAHcDLic1KWhFJFBy06M1s0QzW3HYlpiD9tsAG51zX+W0Jt0ME5FAyc3NMOdcEpCUi7aLAUOAq3JTk3q0IhIslost96oBVYCvzOwX4AxgpZmVz+ok9WhFJFC8nN7lnPsaKHfYe/0CJDjnfs/qPPVoRSRQIjy9awbwCVDLzDaYWc+81KQerUcmDO9My0bnsWXbLhKuf/jQ/r4dG9Pnb41IPZjGmx+sZshTr0WxSom23bv+YPRD/+Tnn77HzLh76AOce/6F0S6rQIvkWgfOuU7ZHK+ck3YUtB6ZuvBTJsx6n+dHdD20r1FCDVo1OZ9LbhjJ/gOplC1dIooVih+MffwR6jW8jPtHPc6BAwfYl7I32iUVfP57MExDB175aOWPbNu554h9iddfzpgXF7P/QCoAW7bvjkZp4hN/7t7Nqi8+55o2HQCIjY2lxCklo1xVwRfhBxYiQj3afFT9rHJcVrca9/drTcr+A9z7+Dw+/0+OHiyRANr06wZKlS7NIyOG8uP331Hz7Nr0v+MeihYtFu3SCjStdXCSC8eEKF2yGI26juG+J+Yz7dEe0S5JoujgwYN8t+Zb2nT4GxOnvkKRIkWZMeWFaJdV4PmxR6ugzUcbN+9g/pL0h0lWfLOWtDRHvMZpT1ply51G2XKnUfu8OgA0btac79Z8G+WqCr5IPoIbKQrafLTwvVU0qVcTgOpnlqNQbJjfNU570jq1TDzlypVn3dqfAVi54jMqV6kW5aoKPj/2aDVG65EpI2/i8otrEF+qBD+8OYIRE/7FlPmf8Nw/O7PilfvYf+AgvYZNjXaZEmW3DbqXh4YNJjX1ABUqnsE9/xgR7ZIKPD+O0ZpzOVp8Js+K1u3v7RtIgfTju49HuwTxoYqlCp1wSlYZ8EaOM+fnJ6/Nl1TOtkdrZmcDbYHTSV8S7FdggXNOg0ki4j/+69BmPUZrZvcAM0kvfRmwPOPrGWY2OIvzDi09lvr7N5GsV0QkS34co83uZlhP4BLn3Cjn3LSMbRRQL+NYppxzSc65BOdcQjj+3EjWGxWFC4X5YOogPps1mM/nDGFon2sAqFPzdN6fciefzhzMh9PvJuHcszI9f8LwzqxdMpIVr9x3xP7SJYvx+vj+fP3aMF4f359SpxQFoOEFVVk2614+nHYXVSulf0pGXImiLBjXz8OrlNx6ZMQ/aN+iMd07tT/m2Kxpk2la/3x27th+zLHkzb8xsG8Puv2tDTd1bMecmdMOHfvhuzX069mZHje25747+/Pn7vSbpV9/9QU9O3egz00d2bg+fe717l1/cNdtvfF6+K+gCYUsx1u+1ZTN8TSgYib7K2QcOyns259Ki8Snqf+3UdTvOJKrLq1NvfMr89CAdjyUtIgGHUcxYvzrPDSgXabnT134KW37jTtm/6DuzXlv2RrOb/sA7y1bw6Du6Utc3v73ZnS663mGjV1I4vWXA3BvYgsenfSWZ9coudeiVVseeXL8MfuTN//GimWfcFr5CpmeFxMTQ9/bBzFl1gKefWE6r82ZyS8//QjAmIeHc3O/AUx6eR5/aXwFs6a9CMArL0/h/pFP0Kvvbbw2dxYAL016js439fLlzZ9oKog92gHAEjNbZGZJGdubwBLgds+r85E/9+4HIDYcQzgcg3MO56Bk8SJAeo9z05admZ6b2eO4AK2a1GHaws8AmLbwM1o3TZ9PeSD1IEULx1KsaCwHUg9S5Yx4KpYrxYef/+DFpUkeXVA3gZIl447ZP+6JR+nd/w44zj/kMvFlqXl2bQCKFS/OmZWr8PuWzQCsX/sLF9RNACChfkOWvvsOADHhMPv2pZCSkkI4HGbjhvX8npzMhRdd4sWlFWhmOd/yS5Y3w5xzb5pZTdKHCk4nfXx2A7DcOXcwH+rzjVDI+Pjle6hWqSzPzVrK8tVruWvMHBaO68fIge0JhYymNz2WqzbLlTmF337/A4Dffv+DsqeeAsDoSW8zbmgn9u47QM+hLzHyjvbc/+zrEb8mibyPlr5LfNlyVK9ZK0ev/+3Xjfzw3X8559z0/2SrVKvOR0vf5S+Nm/HekrdITv4NgM7devHYyAcoXLgw9/3zYcY//Rg9evf37DoKMj/28LOddeCcSwM+zYdafC0tzdGg4yjiShRl1uM3U7taBXr+9TLufmwu85d8yV+b12X88M5c2+eZE36vVd9tpHG39NC+7KJqbNqyE8OYOqo7B1IPMvjxeSRv23XC7yORlZKyl2mTJzL66edy9Pq9e/YwbPBA+g28h+Il0p8QvHvoA4x9bBQvvTCByxo1JTYcC0D1mmfz7KTpAHz1xQrKlC2Lw3H/kEGEw2H63jaIU8tk+6nXJwUf5qyeDMutnbv3snTF91x1aW06t6rP/CVfAvDq4i+OezPseJK37qJ8fPpqTeXjS7Ilk/Ac3KsFI5MWMaR3S0ZM+Bcz/rWcWzo1OdHLEA/8umE9v/26kV5drqNju6vZkryZxK43sG3rsYvvp6YeYNjggVzZ4loaNb3y0P4zK1dl9Ngkkl6aTbOrWlLxjEpHnOecY9qkJLr26M2U58fT/eZbaN6iFXNnv+z59RUUBfFmmADxpUsQVyJ9RkCRwrE0q1+LNb9sZtOWnVx+cQ0AmtSryQ/rtuSq3Tfe/5ouresD0KV1fV5/b9URx7u0rs+bH3zDjl17KVakEGlpjrQ0R7EisRG4Kom0qtVrMu/N95k5/y1mzn+LsuVOI+ml2cf0NJ1zPPrgcM6qXJUbbux2xLHt27YCkJaWxtRJSbRuf8MRx9964zXqX9aIU0rGsS8lBQuFMAtpHdvD+DFo9QhuDpSPL8nEB/5OTChEKGS8ungliz5Yzc5dexh913WEwyH27Uul/4MzAKhQNo5nh91I+1vT70gf73HcMS8uZtojPejWriHrN22n893/v3JT0SKxdGldn1a3pA9FPD3t38wY04v9B1Lpdu/kfP8zkGONGHo3X65czs4dO7i+1RXclNiPazPWlj3a71uSGfPQcEY9OZ7VX33B4kULqVq9Br26XAdAr7630eCyRix5exGvzZkJwOVNr6Bl63aH2khJ2ctbbyxg9Nj0oYnrO3Vl+OCBhMOx/OPBR7292ALEj0MHegRXokKP4EpmIvEIbp1h7+Q4c1Y9cKU/HsEVESlICuSsAxGRgsSHOaugFZFgyc+bXDmloBWRQNHQgYiIx3yYswpaEQkW9WhFRDzmw5xV0IpIsKhHKyLiMc06EBHxmA87tApaEQkWPw4daPUuEQmUSH7CgplNMrNkM1t92L7RZvZfM1tlZvPMrFR27ShoRSRQIvyZYZOBFkftWwyc55yrA3wH3JtdIwpaEQmUSAatc24psO2ofW8751Izvv0UOCO7dhS0IhIouVn428wSzWzFYVtiLt+uB7AouxfpZpiIBEpu7oU555KApLy9jw0BUoHp2b1WQSsigZIfsw7MrBvQCrjC5eDTExS0IhIoXuesmbUA7gEaO+f25OQcBa2IBEoogklrZjOAJkC8mW0AhpM+y6AwsDij9/ypc65PVu0oaEUkUCL5CK5zrlMmu1/IZF+WFLQiEig+XOpAQSsiweLHR3AVtCISKD7MWQWtiASL4b+kVdCKSKBojFZExGNa+FtExGORnEcbKQpaEQkUH+asglZEgkXTu0REPObDnFXQikiwxPgwaRW0IhIoGjoQEfGYD2d3KWhFJFjUoxUR8ZgPc1ZBKyLBoh6tiIjHYnw4SKugFZFA8V/MKmhFJGC01oGIiMd8mLMKWhEJFt0MExHxmA9zVkErIsGiWQciIh47KYcOti9/xuu3kAKodOsnol2C+NDeRQNPuI1QBOqINPVoRSRQTsoerYhIfvLhEK2CVkSCRTfDREQ85sOcVdCKSLD4cIhWQSsiweLHtQ78OBNCRCTPQrnYsmNmk8ws2cxWH7bvVDNbbGbfZ/xeOic1iYgEhlnOtxyYDLQ4at9gYIlzrgawJOP7LCloRSRQYkKW4y07zrmlwLajdrcFpmR8PQVol107CloRCZSQ5Xwzs0QzW3HYlpiDtzjNObcJIOP3ctmdoJthIhIoubkZ5pxLApK8qyaderQiEigRHqPNzGYzq5D+XlYBSM7uBAWtiARKboYO8mgB0C3j627Aa9mdoKEDEQkUi+DHM5rZDKAJEG9mG4DhwChgtpn1BNYB12fXjoJWRAIlHMGf051znY5z6IrctKOgFZFA0TKJIiIe06IyIiIe82GHVkErIsHix0VlFLQiEigxPpy0qqAVkUAJRXB6V6QoaEUkUHw4cqCgFZFg0awDERGP6WaYiIjHfJizCloRCRZ93LiIiMd8OLtLQSsiwaK1DkREPOa/mFXQikjAaNaBiIjH/BezCloRCZiQZh2IiHhLsw5ERDymWQciIh7zX8wqaEUkYNSjFRHxWIyCVkTEW/6LWQWtiASMDzu0CloRCRZ9lI2IiMfUoxUR8ZipRysi4i3NOjiJtWzejGLFixMTChETjmHG7LnRLkmiYMLA5rSsV5UtO/aQ0HcqAEM6N6BHi/PZsnMPAMOnfMRby3+JYpUFmw9zVkGbn55/cQqlS58a7TIkiqYu/g8TFnzF84OuPmL/2PkrefLVz6NUVbD4MWj9uP6CSGB9tHoj23alRLuMQLNc/Mq2LbOBZvaNma02sxlmViQvNSlo84tBn5t70vH6DsyZPSva1YjP9Gl9Acue7cKEgc0pVaJwtMsp0EKW8y0rZnY6cBuQ4Jw7D4gBOualJg0d5JMp02ZQrtxpbN26lT69ulOlalUuTrgk2mWJD0x8YxUjZ3yGc47hXS9l1M2N6PPE4miXVWBF+BMWwkBRMzsAFAN+zVNNkaxIjq9cudMAKFOmDM2ubM7qr1dFuSLxi+Qde0hLczgHkxatJqFm+WiXVKDlZujAzBLNbMVhW+L/2nHObQTGAOuATcBO59zbealJQZsP9uzZw59/7j709Scff0T16jWiXJX4RfnSxQ993fbSavxn7dYoVlPw5WbowDmX5JxLOGxL+l87ZlYaaAtUASoCxc2sS15q0tBBPti2dSsDb+sHQOrBg1xzbSsuu7xRlKuSaJhyT0sur1OJ+JJF+GFqL0ZM/YRGdSpRp2pZHI61m//g1qeXRLvMAi2CDyxcCfzsnNsCYGZzgUuBabltSEGbD86oVIlX5i2IdhniA90eWXTMvilvfxOFSoIrgkO064AGZlYM2AtcAazIS0MKWhEJlEjlrHPuMzObA6wEUoEvgKSsz8pcnsdozax7FscODTC/MDFPdYmI5EmMWY637DjnhjvnznbOneec+7tzbl9eajqRHu39wIvHKS6JjORPScWdwHuIiOROQXsyzMxWHWf7Gjgtn2qMumFD76XJ5Q3p0LbVoX1vv7WI9m2u5cLzzuab1V8f99zpU6fQoW0r2re5lmkvTT60f+eOHfTu1Z3WLa+id6/u/LFzJwBfrPyc69q35sYb/sq6tWsB+OOPP+hzc0+c0/9ZflI4NoYPnuzEZ+O68PmErgzt0hBIX7vgx6k38+kznfn0mc5cfUnlY86tcXrpQ8c/faYzm1+9hf7t6gLQ4S81+HxCV/58YwAX1fj/f2YNa1dk2bNd+PCpTlStEAdAXPHCLHiwvfcXW4BE8smwSMlu6OA0oCvQOpPtpJmD0rZdB8Y/9/wR+6pXr8kTT43N8qGD77//jlfnvML0ma/wytzXWPr+e6xd+wsAk55Pol79hixc9Db16jfkhefTh1hemvIijz05llsH3MHsWTMASJrwLL0Se/vyQ+dOZvsOHKTF4DnU7zeN+v2mcdXFZ1Hv7PQ5sGPnr6RB/+k06D890wVivt+4/dDxS297mT0pqSz4+AcAvlm7lY4jFvLh6g1HnHN7h4vo9ODrDJv8EYnXXgDAvZ3q8+isZd5eaAFjlvMtv2QXtK8DJZxza4/afgHe87w6n7g44RJKxsUdsa9qtWpUrlI1y/N+/ulH6lxwAUWLFiUcDnNxwiX8+530J37efXcJbdq1A6BNu3a8++93AAiHw+xLSSElZS/hcJj169aRnLyZhEvqRf7C5IT9mXIAgNhwiHA4RF5+6Gh6YSV+3rSTdcm7AFizfhvfb9x+zOsOpKZRtFCYYoXDHDiYRpUKcVSML8GHX288oWsIGsvFll+yHKN1zvXM4tiNkS8nWKpXr8nYp55kx47tFC5chA8/WErtc88D0ufWli1bDoCyZcuxbds2AHr26s0D/xxG4cKFeXjUaB4b8wj9br09atcgWQuFjI+fvpFqFUvx3OtfsXzNb1yVUJk+rS/gxivOYeX3mxk8cSk7dh//Hsr1jWsx+/3/Zvteo2cvZ9ztV7J3Xyo9x7zJyF6NuP+ljyN5OcHgwx/8NL3LQ1WrVaN7z1707tWDYsWKUbNWLcIxMVmec/Y55zBtxmwAPl+xnLJly+Gc4647BxAOhxl012DKxMfnR/mSA2lpjgb9pxNXvDCz/tGa2meVydXaBbHhENfWr8awFz/K9r1W/bSFxgNnAnDZeaezaetuzGDq4Gs4cDCNwROXkrxjT0SvryCK8FoHEaFHcD3W4a/XM2vOPF58aTpxcaU486yzADi1TBm2bEkGYMuWZE499ch1ap1zJD03nt59buG5Z5/hln630qpVG16ePjXfr0Gyt/PPfSxdtYGrEirnau2CqxMq8+WPybkOyMGd6jNyxmcM6dyAEdM+Yca/v+WWthee4FUEgx+HDhS0Htu6Nf2e4aZff2XJO2/T8pr0mQtNmjZjwfz5ACyYP5+mTa844rwF8+fRqFFjSsbFsTclBQuFsFCIlL1787V+Ob74uKLEFU9f0rBIoRia1T2TNeu35WrtghuanM3s97IfNjhclytr8+ayn9mxex/FCseS5hxpzlGscGzeLiRofJi0GjrIgXsG3cGK5cvYsWM7zZs1om+/W4mLK8Woh0ewfds2+t/Sm1q1zmHCxBdITt7M/cOGMm7CRADuHHArO3fsIBwOc9/Q4YduqvXolchddwxg/tw5lK9QgTGPP3Xo/fbu3cuC1+YxYeIkALp2686dA24jNjaWUaMfy/8/AMlU+dLFmTjoamJCRsiMVz/4jkXLfuaFQS0yXbugwqnFeXZAc9oPmw9A0cJhmtU9k/5Pv3NEu20urcbjfZsSH1eUufe3ZdVPW2gzdN6hc7pcWZtWQ9I/CunpuSuZMaQ1+1MP0u2Rf+XfxfuYHz+c0byem6kHFiQzpVs/Ee0SxIf2Lhp4win55bpdOc6cC888JV9SWT1aEQkUH94LU9CKSLD4cehAQSsigaIerYiIx3yYswpaEQkYHyatglZEAkVjtCIiHgv5L2cVtCISMApaERFvaehARMRjmt4lIuIxH+asglZEAsaHSaugFZFA8ePC3wpaEQkU/8WsglZEgsaHSaugFZFA0fQuERGP+XCIVkErIsGioBUR8ZiGDkREPKYerYiIx3yYs4SiXYCISCSZ5XzLvi0rZWZzzOy/ZvatmTXMS03q0YpIwES0T/sU8KZz7jozKwQUy0sjCloRCZRILfxtZiWBRsBNAM65/cD+PNUUmZJERPwhN0MHZpZoZisO2xIPa6oqsAV40cy+MLPnzax4XmpS0IpIoFgufjnnkpxzCYdtSYc1FQYuAsY75+oCfwKD81KTglZEgsVysWVtA7DBOfdZxvdzSA/eXFPQikigRCpnnXO/AevNrFbGriuA/+SlJt0ME5FAifADC7cC0zNmHPwEdM9LIwpaEQkUi2DSOue+BBJOtB0FrYgEih+fDFPQikigaK0DERGPafUuERGPqUcrIuIxBa2IiMc0dCAi4jH1aEVEPObDnFXQikjA+DBpFbQiEigaoxUR8VikFv6OJAWtiASLglZExFsaOhAR8Zgfp3eZcy7aNZw0zCzxqI/KENHfi5OAPmEhfyVm/xI5CenvRcApaEVEPKagFRHxmII2f2kcTjKjvxcBp5thIiIeU49WRMRjCloREY8paPOJmbUwszVm9oOZDY52PRJ9ZjbJzJLNbHW0axFvKWjzgZnFAOOAlkBtoJOZ1Y5uVeIDk4EW0S5CvKegzR/1gB+ccz855/YDM4G2Ua5Josw5txTYFu06xHsK2vxxOrD+sO83ZOwTkZOAgjZ/ZLbMhebViZwkFLT5YwNQ6bDvzwB+jVItIpLPFLT5YzlQw8yqmFkhoCOwIMo1iUg+UdDmA+dcKtAfeAv4FpjtnPsmulVJtJnZDOAToJaZbTCzntGuSbyhR3BFRDymHq2IiMcUtCIiHlPQioh4TEErIuIxBa2IiMcUtCIiHlPQioh47P8AGI1JP2QdHdYAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## Stochastic Gradient Descent Classifier \n", + "SGD = Pipeline([('vect', vectorizer),\n", + " ('clf', SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, max_iter=5, tol=None, random_state=12)),\n", + " ])\n", + "\n", + "SGD_train = SGD.fit(X_train, y_train)\n", + "\n", + "y_pred_SGD = SGD_train.predict(X_test)\n", + "\n", + "print(classification_report(y_test, y_pred_SGD))\n", + "# plot confusion matrix\n", + "conf_matrix = confusion_matrix(y_test, y_pred_SGD)\n", + "plot_conf_matrix(conf_matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " neg 0.78 0.82 0.80 22\n", + " pos 0.79 0.75 0.77 20\n", + "\n", + " accuracy 0.79 42\n", + " macro avg 0.79 0.78 0.78 42\n", + "weighted avg 0.79 0.79 0.79 42\n", + "\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVoAAAD8CAYAAAA2Y2wxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAcLElEQVR4nO3de5yOdf7H8dfnnnswyGkdKpJDDpVKkQ6bQ9KGZbWJEoVoaqUiUdKPbCsWHWwnzUoOWZVDxJYOrFSboliHVdFBOeU8KYY5fH9/zKzHYMw995jrvi+X99Pjeux9X9d9fa/P1erdd7739/qOOecQERHvhOJdgIhI0CloRUQ8pqAVEfGYglZExGMKWhERjyloRUQ8pqAVETkOM5toZtvNbE2ufQ3NbKmZrTSz5WbWJFI7CloRkeObBLQ+at9oYLhzriEwNOd9vhS0IiLH4ZxbAuw+ejdQJud1WWBLpHbCRVzXMZIu7qtHz+QYe5Y9G+8SxIdKhLETbSOazElb+dydQHKuXSnOuZQIp/UD3jGzsWR3Vq+MdB3Pg1ZEJKas4D+o54RqpGA92p+A/s65WWbWGXgJaJXfCRo6EJFgMSv4Vjjdgdk5r2cA+jJMRE4xFir4VjhbgOY5r1sC6yOdoKEDEQmWwvdU82jKpgMtgIpmtgkYBtwBjDOzMJDGkWO8eVLQikiwhBKKrCnnXJfjHGoUTTsKWhEJlsIPCXhGQSsiwVKEQwdFRUErIsGiHq2IiMfUoxUR8Zh6tCIiHivCWQdFRUErIsGiHq2IiMdCGqMVEfGWerQiIh7TrAMREY/pyzAREY9p6EBExGMaOhAR8Zh6tCIiHlOPVkTEY+rRioh4TLMOREQ8ph6tiIjHNEYrIuIx9WhFRDymHq2IiMd82KP1X0UiIifAQqECbxHbMptoZtvNbM1R++8xs6/MbK2ZjY7Ujnq0IhIoVrRDB5OAZ4Epudq/GugAXOicO2hmlSM1oh6tiASLRbFF4JxbAuw+avefgFHOuYM5n9keqR0FrYgEiplFsyWb2fJcW3IBLlEXaGpmn5rZB2Z2aaQTNHQgIoESzdCBcy4FSInyEmGgPHA5cCnwupnVcs65/E4QEQmMUAG+5DpBm4DZOcH6mZllARWBHcetyeuKRERiqgjHaI9jDtASwMzqAsWAnfmdoB6tiARKUc46MLPpQAugopltAoYBE4GJOVO+DgHd8xs2AAWtiARMUQatc67LcQ51i6YdBa2IBEoRz6MtEgpaEQkUBa2IiMcspKAVEfGUerQiIh5T0IqIeM1/OaugFZFgUY9WRMRjCtpTyPhhXWnTrAE7du+jcafHAbiwblWeGXIzxYsnkpGZRb/HX2P52o1xrlTiKTMzky6dO1K5ShWeff7FeJcTCDFY6yBq/qsoIKbOW0qHu587Yt+IftczIuVtLr95FI+9MJ8R/a6PT3HiG9OmTqFWrdrxLiNYvF/rIGoKWo98/MU37E7df8Q+56BMqRIAlC2dxNYdqfEoTXzip23b+HDJYv7Y8cZ4lxIo0axHGysaOoihgWNnMu+5uxnZ/4+EQsbVPZ6Id0kSR6NHPU7/AQP59ddf411KoPhxjFY92hhK7tSUQU/Mpk6b/2PQ2Fm8MKxrvEuSOPlg8b+oUKEC553fIN6lBI4fe7QK2hjq2u4y5ixcCcCs91bQ+Pyz41uQxM3KFV+wePEi2lzbkgcfuJ9lny5l8IMPxLusQLCQFXiLFQ0dxNDWHak0bVSHDz9fT4smddnww3EXZJeAu6//AO7rPwCAZZ99yuRJExn517FxrioY/Dh0oKD1yOSRPWjaqA4Vy5Vmw4LHeGz8W9z92D8YM/BGwuEQBw9m0Pcv0+Ndpkjg+DFoLcLC4Ccs6eK+3l5ATkp7lj0b7xLEh0qET3zSVc1+/yxw5nz39O9jksoRe7RmVh/oAFQFHLAFeNM5t87j2kREoue/Dm3+X4aZ2YPAq2SX/hmwLOf1dDN7KJ/zDv+u9Iyda4uyXhGRfPlx1kGkHm0v4HznXHrunWb2JLAWGJXXSbl/V3qQhg5CIePjaYPYsj2VjveN5/F+19O2WQMOpWfy3aadJA97hdRfDhxz3j1dr6bHH6/EOcfaDVtIHvYKBw9lAPCnm5tz103NyMjMYsGHaxgybi5XXFSLcQ/fxKH0DG4b/DLf/riTsqWTmPrX2/nDUU+biX9MmzqZWTNn4Jyj442d6HZbjyOOL/vsU/rd04eqVasB0LLVtdzVpy/btm5lyOBB7Nq1E7MQN3bqTNdbuwPw1BNj+PijJdSrfy4jRo4GYN6bc/g5NfXwZ+RIIR8u/B1pelcWcGYe+8/IOXZK6XvL1Xz13U+H3y9c+iWNOj1Ok5tGsn7jdgbe/rtjzjmzUln6dGnOb7uOpnGnx0kIheh0XSMAmjWuQ7sWF3Bp55E0unEET09ZCMB9t7aky8AJDH1mHsmdmgIwOLk1oye+E4O7lMJYv/5rZs2cwbRXZzBj9lyWfLCYjRu/P+ZzFzdqzOuz5/L67Lnc1acvAAnhBB4Y9BBz5r3NK9Nf49Xp/+CbDRvYt28f/1m5gplvzCMrM5P1X39FWloab855g8433xLjOzx5+LFHGylo+wELzextM0vJ2RYAC4H7PK/OR6pWLkfrq87n5Tf+fXjfwqVfkpmZ/d+bz1Z/R9Uq5fI8N5yQQFLxRBISQiSVKHb40dvkTk0Z+/J7HErP7t3u2PMLAOkZmSQVT6RkUiLpGZnUrFaRMyuX46PPN3h4h3Iivvv2Gy686CKSkpIIh8M0anwpi95/r0DnVqpUmXPPOx+AUqVKU6tWLbZv/4lQyEhPT8c5R9rBg4TDYSZNnMAt3W4lMTHRy9s5qZkVfIuVfIPWObcAqAsMB94B3gUeBerlHDtljBnYkSHj5pCVlfdIyG0druCdj/97zP4tO1J5espCvn77Mb57bwQ//3KAhUu/BOCcsyvz24trs2TKA7w74T4anVc9+1oT3+W5R7rQ95arGf/qEob3bc/w5+d7d3Nyws45py6fL1/O3r17OHDgAB99uIRt27Yd87lVK1fS6Y9/oM+dvdmwYf0xxzdv3sSX69ZxwYUXUapUaVpd+ztu6ng9VatWo/Rpp7F2zRqubtkqFrd00irKHq2ZTTSz7Wa2Jo9jD5iZM7OKkdqJOOvAOZcFLI1YUYC1adqA7bv3sWLdjzRtVOeY44N6XUdmZhavvrXsmGPlTkuiXYsLOLfdMPbu288/Rvfi5raX8upbywgnhChfpiTNbhtL4/PP5pXRt3Nuu0dZ9fVmmnfPXgfht5fUZuuOVAxj6qiepGdk8tCTb7B99z7P71sKrlbt2vTs1Zs7e99OyZIlqVuvHuGEhCM+c+5557PgvUWULFWKD5d8QP977mbe2+8ePr7/118Z0O9eBj70MKVLlwagZ6876NnrDgAeHTqEPvfcy+yZM/jk3x9Rp249ku/qE7ubPEkUcU91EvAsMOXIa9hZwLXADwVpRI/gFsAVDWvRrvkFfPnP4UwZ1ZMWl9Zl4l9uA6Br+8to26wBPYZMyvPclpfV5/stu9i55xcyMrKYs+g/XH5RTQA2/7SXOQv/A8DytRvJynJULF/6iPMf6t2akSlvM+TONjw2/i2mv7WMPl1aeHavUng3dOzEazPf4OUp0yhbthzVzz7yEevSpUtTslQpAJo2a05GRgZ79uwGID09nfv73Uvb37en1bXHjvWvW5f909LZZ9dg3ptzGPPkODZsWJ/nOPCpLhSyAm+ROOeWALvzOPQUMIjsKa+Ra4rqDk5RQ595k3Na/x/1fz+M2x56mcXLvub2R6Zw7ZXnMqBHK27s9yIH0tLzPPfHbbtpckFNkkpkj6ld3aTe4S/U5i1eRYsmdQE4p3pliiWG2ZkzTgvQrf1lLPhwLXv3HaBkiWJkZTmyshwlS2h8zo927doFwNYtW1j4/ru0advuiOM7d+zgfw8IrV61iqysLMqVK49zjkeHDqFWrVrc1qNnnm0/98w4+vS9l4yMDLIyMwEIWYi0A2ke3tHJKZqgzT0VNWdLjtS+mf0B2Oyc+09Ba9IjuCfgqQc7U7xYmPkvZH97/Nnq77l3xKucUakszw+9hT/e8wLL1mzkjfdX8Mk/HiQjM4v/fLmJl2Z9DMDkOZ/w4qNdWT7jYQ6lZ9J76NTDbSeVSKRb+8to1yf7Caq/vbKI6WN7cyg9g+6DJ8X8XiWyAf3uIXXvXsLhMA8/MowyZcvy+mvZj1l3vqkL7737Dq+/Np1wQgLFS5Tgr2OfxMz44vPlzH9zLnXq1qXzDR0AuKff/TRt1hyARQvfp0GDC6hcuQoAFza8mI7Xt6du3brUq18/PjfrY9EMHeSeilqwtq0kMAQ49seO/M7TI7gSD3oEV/JSFI/gXjj0/QJnzqo/t4p4PTOrAcx3zjUwswvInnX1v1X9q5H9tGwT59yx337mUI9WRALFy/mxzrnVQOVc1/oeaOyc25nfeRqjFZFAKcp5tGY2HfgEqGdmm8ysV2FqUo9WRAKlKB/Bdc51iXC8RkHaUdCKSKD4cT1aBa2IBIoPc1ZBKyLBoh6tiIjHfJizCloRCRb1aEVEPObHhb8VtCISKD7s0CpoRSRYNHQgIuIxH+asglZEgkU9WhERjyloRUQ8plkHIiIe82GHVkErIsGioQMREY/5MGcVtCISLCEfJq2CVkQCRV+GiYh4zIc5q6AVkWDRl2EiIh7zYc4qaEUkWAz/Ja2CVkQCxY9jtKF4FyAiUpRCISvwFomZTTSz7Wa2Jte+MWb2pZmtMrM3zKxcxJpO7JZERPwlZFbgrQAmAa2P2vce0MA5dyHwNTA4Yk3R3oSIiJ+ZFXyLxDm3BNh91L53nXMZOW+XAtUitaMxWhEJlBhP77odeC3Sh9SjFZFAiaZHa2bJZrY815Zc8OvYECADmBbps+rRikigJETRo3XOpQAp0V7DzLoD7YBrnHMu0ucVtCISKF4PHZhZa+BBoLlzbn9BzlHQikigFOU8WjObDrQAKprZJmAY2bMMigPv5YT6UufcXfm1o6AVkUApyh6tc65LHrtfirYdBa2IBIrWOhAR8ZhW7xIR8ViCDxc7UNCKSKD4L2YVtCISMPqdYSIiHvNhzipoRSRY9GWYiIjHfJizCloRCRbNOhAR8dgpOXSwcclTXl9CTkK1+s6OdwniQ1vG33DCbfhx7Vf1aEUkUE7JHq2ISCz5cIhWQSsiwaIvw0REPObDnFXQikiw+HCIVkErIsGitQ5ERDym6V0iIh7zYYdWQSsiwaJZByIiHvNhzipoRSRY/PhlmB/HjUVECs2s4FvktmyimW03szW59lUws/fMbH3O/5aP1I6CVkQCJWQF3wpgEtD6qH0PAQudc3WAhTnv868pynsQEfE1i+JPJM65JcDuo3Z3ACbnvJ4MXB+pHY3RikighL3vPlZxzm0FcM5tNbPKkU5Qj1ZEAsXMotmSzWx5ri3Zi5rUoxWRQIlmepdzLgVIifISP5nZGTm92TOA7RFrivICIiK+VpSzDo7jTaB7zuvuwNxIJ6hHKyKBUpTzaM1sOtACqGhmm4BhwCjgdTPrBfwAdIrUjoJWRAIloQh/TnfOdTnOoWuiaUdBKyKBEirAtK1YU9CKSKD48AlcBa2IBIsWlRER8ZgfF5VR0IpIoPgwZxW0IhIsWvhbRMRjfnwKS0ErIoFiPhw7UNCKSKD4L2YVtCISMJp1ICLiMf/FrIJWRAImpFkHIiLe0qwDERGPadaBiIjH/BezCloRCRj1aEVEPJagoBUR8Zb/YlZBKyIB48MOrYJWRIJFv8pGRMRj6tGKiHjM1KMVEfGWZh2cwjq1/x0lS5YilBAiISGBCVNfj3dJEgdP3noJrS44nZ37DtLysYUADGh3LrdcVYPd+w4CMHLuWhat+SmeZZ7UijJnzaw/0BtwwGqgp3MuLdp2FLQxNO7FiZQrVz7eZUgcvfbJRl5e/C3jejQ6Yv/fF25g/Hvr41RVsBRV0JpZVeBe4Dzn3AEzex24GZgUbVt+XH9BJLA+3bCLPfsPxbuMQLMo/hRAGEgyszBQEthSmJoUtDFiZtx/dzK9unXmzdkz4l2O+EzPFrV4/5FrePLWSyhbMjHe5ZzUQlbwzcySzWx5ri35f+045zYDY4EfgK1AqnPu3cLUpKGDGHn+palUrFSZPbt30f/uO6heoyYNL2kc77LEByZ/8C1P/XMdDhj0h/MY1vEC7p/6RbzLOmlF8xsWnHMpQEpex8ysPNABqAnsBWaYWTfn3CtR1xTtCVI4FStVBqB8hd/QrMU1rFu7Os4ViV/s3HeQLAfOwbSPvqdhDY3jn4giHDpoBXznnNvhnEsHZgNXFqYmBW0MHDiwn/2//nr49bJP/02t2nXiXJX4ReUyJQ6/btPwTL7a8nMcqzn5RTN0EMEPwOVmVtKylwS7BlhXmJo0dBADe3bt4uGB9wGQmZnJtde15bIrr4pzVRIPz/e6lCvqVqJC6WIsH9mGJ+b9lyvqVuL8s8riHGzatZ9B01bEu8yTWlE9sOCc+9TMZgJfABnACo4zzBCxJudckRR1PNv3pXt7ATkpNRw4L94liA9tGX/DCafkR+v3FDhzrqpTPiZPN6hHKyKB4r/nwk5gjNbMeuZz7PCUiSkvTyjsJUREopZgVuAtVk6kRzsceDmvA7mnTGjoQERiyodd2nyD1sxWHe8QUKXoy/GnkcMf4d8fLaF8+QpMeX0OAP96/x0mpjzPxu++JWXydOqf1yDPc2dMn8q8N2bhcLS//kY633IrAD+npjJs8AC2bd3C6WecyZ9HPcFpZcqyauUXPDHqMYoVK8awEWOodlZ19u37mWGDH+CJZ1705e9DOlUVD4eY/UAzioVDhEMh/vnFZsbOX1egtQtqVynN+N5NDr+vXrEUY+b9lwmLvqHdJVUZ0O5c6px+Gm1H/YtVP+wF4NLaFRjZ5WIOZWTR56XP+H7Hr5RJSmR87ybc8szHMbtvvzsZV++qAlwH7DlqvwH/9qQiH2rT/npuuOkWRgx9+PC+mrXPYcTopxnz+PDjnvfthvXMe2MWKVOmEw4n8sC9d3HFVc04q/rZvDJpAo2aXE63Hr15ZdIEXpn0En+6935emzaZv4x+mm1bNjNn5mv07T+QyRNe5NaedyhkfeZgRhadnvqQ/QczCYeMOQObs2jtNiDy2gXf/PQL145YBGRPM/piVFveXpn9dOeXW36m94tL+WvXi484585WdbgjZSln/aYUtzWrxZ9nraZf2/r8bcFXHt3hycmP/5pEGqOdD5R2zm08avseWOx5dT7R8JLGlClT9oh9NWrWpnqNmvmet/H7bznvggspUSKJcDhMw0sas+Rf2Ss2ffTBv2jdrgMArdt14MPF2f/ShcNhDh5MIy0tjXA4zOZNP7Bj+09c3OhSD+5MTtT+g5kAJCaESEwIUZhJPE3rV2bjzl/ZvPsAABu27eObn3455nMZmY4SiQkkFUsgIzOLsyuW4oxyJVi6fucJ3UPQWBRbrOTbo3XO9crn2C1FX06w1Kx9DinP/43UvXspXqI4Sz/+kHrnng/Ant27qFixEgAVK1Ziz57dAHTrcQdjRgynePHiPPLnkTz39Fh6/+meuN2D5C9k8M7DLalRqTSTPviGFd/voWWD0+nZohY3XladVRv3MHzWalL3px+3jQ6NqzFn2Y8Rr/XMgq8Y3fUS0tIzuffl5Qzt2IDR8/5blLcTDD7s0Wp6l4dq1KxN19tup//dd1CyZEnOqVOXhISEfM+pU68+L076BwArv1hOxUqVcc4xbPAAEsJh+vYbSIXfVIxF+VIAWQ6uHbGIMkmJvHTX5dQ7s0xUaxckJhi/u+gMHp+zNuK11m5Kpf3oxQBcds5v2JaahmGM792E9Mwshs9czc6cceFTWTRrHcSKHsH1WLvrOzJx2gye/ftkTitblrOqnw1kr3mwc+cOAHbu3EH58hWOOM85x5SXXqRH7zuZ9PcXuP3Ou7muTXtmvjot5vcgkf18IJ1Pvt7B1edXiWrtgpYNTmf1D3ujDsh+bevz9Ftfcn+7+oyd919mffojvVrWPtHbCAQ/Dh0oaD22Z/cuAH7atpUlixbS6ro2APy2eQsWzJ8LwIL5c7mq+dVHnPf2/LlccVUzTitTlrS0A5iFsJCRlhb14u7ikQqli1EmKXtJwxKJIZrWr8yGbfuiWrvg+sbVmLNsU1TX7XxFdd5fs43U/ekkFQvnhLojqVj+Py2dMnyYtBo6KIBHHx7Iis+Xkbp3Lze0vYbbk/tQpmxZnh4zkr17djOoXx/OqVufJ59NYeeO7fz1sWGM+dsLADwyqD+pqXsJh8P0f3AIp+V8qdate2+GDh7AP+fOpvLpZ/DYqCcPXy8t7QAL5s/lyeeyH6u+qWt3/m9Qf8KJiQwbMTr2/wAkT1XKlmBc98aEQkbIYN7nm3l/9Tb+1qNxnmsXVClbgrG3XsKtz2ZP2ElKTKDpuZWPWdugdcMz+ctNF/Gb0sWY2vdK1v6Yenj6VlJiAp0uP5su4z4C4MX31/P3Oy8jPSOLPi8ti+Hd+5cfp3dprQOJC611IHkpirUOVv6wr8CZ07D6aVrrQEQkWj78LkxBKyLB4sehAwWtiASKerQiIh7zYc4qaEUkYHyYtApaEQkUjdGKiHisAL90MeYUtCISLApaERFvaehARMRjfpzepUVlRCRQinJNGTMrZ2YzzexLM1tnZlcUpib1aEUkWIq2RzsOWOCcu9HMigElC9OIglZEAqWoFv42szJAM6AHgHPuEHCoUDUVSUUiIj4RzdCBmSWb2fJcW3KupmoBO4CXzWyFmU0ws1KFqUlBKyLBEkXSOudSnHONc20puVoKA5cALzjnLgZ+BR4qTEkKWhEJFIviTwSbgE3OuU9z3s8kO3ijpqAVkUAxK/iWH+fcNuBHM6uXs+saoFC/dlhfholIoBTxPNp7gGk5Mw6+BXoWphEFrYgESlE+GeacWwk0PtF2FLQiEih+fDJMQSsigeLDnFXQikiwqEcrIuI5/yWtglZEAkULf4uIeExDByIiHtPC3yIiXvNfzipoRSRYfJizCloRCRaN0YqIeMx8mLQKWhEJFP/FrIJWRALGhx1aBa2IBIumd4mIeEw9WhERjyloRUQ8pqEDERGPqUcrIuIxH+asglZEAsaHSaugFZFA0RitiIjH/LjwdyjeBYiIFCmLYitIc2YJZrbCzOYXtiT1aEUkUDwYOrgPWAeUKWwD6tGKSKCYFXyL3JZVA34PTDihmpxzJ3K+RMHMkp1zKfGuQ/xFfy/ix8ySgeRcu1Jy/39hZjOBkcBpwAPOuXaFuY56tLGVHPkjcgrS34s4cc6lOOca59pyh2w7YLtz7vMTvY6CVkQkb78F/mBm3wOvAi3N7JXCNKSgFRHJg3NusHOumnOuBnAzsMg5160wbSloY0vjcJIX/b0IOH0ZJiLiMfVoRUQ8pqAVEfGYgjZGzKy1mX1lZhvM7KF41yPxZ2YTzWy7ma2Jdy3iLQVtDJhZAvAc0AY4D+hiZufFtyrxgUlA63gXId5T0MZGE2CDc+5b59whsufkdYhzTRJnzrklwO541yHeU9DGRlXgx1zvN+XsE5FTgII2NvJavkLz6kROEQra2NgEnJXrfTVgS5xqEZEYU9DGxjKgjpnVNLNiZD/O92acaxKRGFHQxoBzLgPoC7xD9gLCrzvn1sa3Kok3M5sOfALUM7NNZtYr3jWJN/QIroiIx9SjFRHxmIJWRMRjCloREY8paEVEPKagFRHxmIJWRMRjCloREY/9P3YLuurI/BA+AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## Logistic Regression Classifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "logreg = Pipeline([('vect', vectorizer),\n", + " ('clf', LogisticRegression(n_jobs=1, C=100000.0, penalty='l1', solver='liblinear', random_state=12)),\n", + " ])\n", + "\n", + "logreg_train = logreg.fit(X_train, y_train)\n", + "\n", + "y_pred_logreg = logreg_train.predict(X_test)\n", + "\n", + "print(classification_report(y_test, y_pred_logreg))\n", + "# plot confusion matrix\n", + "plot_conf_matrix(confusion_matrix(y_test, y_pred_logreg))" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " neg 0.53 0.73 0.62 22\n", + " pos 0.50 0.30 0.37 20\n", + "\n", + " accuracy 0.52 42\n", + " macro avg 0.52 0.51 0.50 42\n", + "weighted avg 0.52 0.52 0.50 42\n", + "\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVoAAAD8CAYAAAA2Y2wxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAbL0lEQVR4nO3de5yOdf7H8dfnnhmMJlIzDkUyolA6odSSRKuw5BSdVFrpoKacYxFZcmjbtsIo1aaoVJKNslodtsihthOVLTHCOEWYyTDf3x8z64eZMfeMue77msv76XE9mOt0fy7M+/Gd7/X9Xpc55xAREe+Eol2AiEjQKWhFRDymoBUR8ZiCVkTEYwpaERGPKWhFRDymoBURKYCZTTezdDP76oj1fc3sWzP72szGF3YeBa2ISMGeA9ocusLMrgA6AA2dcw2AiYWdREErIlIA59wHwPYjVt8JjHPO/Za7T3ph54n1oLbDxF9wj6aeSR47lj0R7RLEh8rFYsd6jqJkTubnT94B9D5kVapzLrWQw+oCzcxsDJAJ9HfOLTvaAZ4HrYhIRFn4P6jnhmphwXqkWKAScAnQGHjFzJLdUZ5noK4DEQkWs/CX4kkDXnc5PgWygcSjHaCgFZFgsVD4S/HMAVoCmFldoAyw9WgHqOtARIKl+C3VfE5lM4EWQKKZpQEjgOnA9NwhX/uAnkfrNgAFrYgETSimxE7lnOtRwKYbi3IeBa2IBEvxuwQ8o6AVkWApwa6DkqKgFZFgUYtWRMRjatGKiHhMLVoREY+V4KiDkqKgFZFgUYtWRMRjIfXRioh4Sy1aERGPadSBiIjHdDNMRMRj6joQEfGYug5ERDymFq2IiMfUohUR8ZhatCIiHtOoAxERj6lFKyLiMfXRioh4TC1aERGPqUUrIuIxtWhFRLxlIQWtiIinTF0HIiIe81/OKmhFJFjUohUR8ZiCVkTEYyHdDBMR8Zj/GrQKWhEJFnUdiIh4TEErIuIxBa2IiMcUtCIiHrOQglZExFN+bNH6b8CZiMgxMLOwlzDONd3M0s3sq3y29TczZ2aJhZ1HQSsiwWJFWAr3HNAmz0eY1QBaA+vCOYmCVkQCpSRbtM65D4Dt+Wz6CzAQcOHUpKAVkUApStCaWW8zW37I0juM8/8B2OCc+0+4NelmmEemjLiBq5ufw5btv9Ko658Prr+z++X0ua45+w9ks+DDrxj61zejWKVE265du3ho+DDWrPkOM+Oh0X/mvPMviHZZpVpRnnXgnEsFUsPd38zKA0OBq4pSk4LWIy+8tYQpL7/P06NvPriueaM6tGtxLo27jWVf1n6SKiVEsULxg/Fjx3DZ75ox6bHHydq3j4zMzGiXVPp5O+igNlAL+E9u10N1YKWZNXHObSroIHUdeOTfK//L9p17D1vXu2szJj67kH1Z+wHYsmN3NEoTn9i9ezcrVizj2s5dAIgrU4YKFSpEuarSryT7aI/knPvSOVfZOXeGc+4MIA248GghCwraiDqzZmUuu6A2H/y9P+8+fR8X1T892iVJFKWtX0+lSiczfOgQunXuyMjhQ9m7d2/hB8pRlfDwrpnAJ8BZZpZmZr2KU5OCNoJiY0JUqlCe5jdP5MG/zGHG+NuiXZJE0YED+1m96hu6du/BK6/NIT4+nulPh91dKAUo4VEHPZxz1Zxzcc656s65Z47YfoZzbmth51HQRtCGzb8wZ1HOjcrlX/9EdrYjUf20x60qVapSpUpVGjY8D4DWV7Vh9apvolxV6WchC3uJFAVtBL21+AtaNKkLwJmnV6ZMXCxb1U973EpMSqJK1aqs/fEHAJYu+YTk2rWjXFXp52UfbXFp1IFHnh97C80uqkPiSQmsWTCa0VPe5vk5nzB15A0sf/VB9mUd4PbhL0S7TImywQ/+iSGD+pOVlUX16jUY9fDYaJdU6vnxWQfmXFgTG4ot/oJ7vP0AKZV2LHsi2iWID5WLPfbBWbVS/hF25vz4WNuIpHKhLVozOxvoAJxGznSzn4G5zrlVHtcmIlJ0/mvQHr2P1swGAbPIKf1TYFnun2ea2eCjHHdwWtv+rV+XZL0iIkflxz7awm6G9QIaO+fGOedm5C7jgCa52/LlnEt1zjVyzjWKTWxQkvVGRdkysXz4Qn+WvjyYFbOHMqzPNQA0rHsa7z/fjyWzBvPRiwNp1KBmvsdPGXEDPy0ay/JXHzxsfaUK5Zk3+R6+fHM48ybfw0knxgPQ9LxkPn15CB/NGEByjZwnsFVMiGfuk3d7eJVSVMOHDaFFs6Z06tAuz7bnn32G8xqcxY4deZ9HsmnjRnrdchMd21/NtX9oy4svPH9w27erV3PT9dfRuWN7+t7Vh927c26WfrZyBV2ubc/13Tqz7qefgJzpu33+2Auvu/9Km1DIwl4iVlMh27OBU/NZXy1323Hht337adP7cS6+bhwXdx/LVZfWp8m5ZzAmpSNjUudzSfdxjJ48jzEpHfM9/oW3ltDh7ifzrO9/a2sWf/ot53YYxeJPv6X/rTnTp++7qSU9BjzN8L+9Re+uzQAY0rsN46e/49k1StF16NiJyVOfzrN+08aNfPLxx1Srlt+3DsTExtB/4GDmvDWfGTNfZtbMl/jvmjUAPDR8KPfd34/X5rxFy1ateG56zvn//vyzTHrsb/RNeYBXXp4JQOqUp7i99x2+vPkTTaWxRZsCLDKz+WaWmrssABYB93lenY/sydgHQFxsDLGxMTjncA4qnFAOyGlxbtyyM99j85uOC9CuRUNmvLUUgBlvLaX9FQ0ByNp/gPiycZSPjyNr/wFqVU/k1Mon8dGKNV5cmhTTRY0aU6FixTzrJzwylvv7DSjwGzkpqTL16uf8pHfCCQkkJyeTnr4ZgLVrf+SiRo0BaNr0MhYtfBeA2NhYfsvMJDMzg9jYWNavW0d6+mYaNW7ixaWVambhL5Fy1JthzrkFZlaXnK6C08jpn00DljnnDkSgPt8IhYyPXxpE7RpJTH35A5Z99RMDJs7mrSfvZuz91xIKGVfcMqlI56x8yols2roLgE1bd5F08okATJj+Lk8O60HGb1n0GvZ3xj5wLQ89Na/Er0lK3uL3FlG5SmXOOvvssPbfsCGN1atWcW7upIUz69Rl8b8WcUXLVrz7zgI2bdoIQK/b72DUyOGULVuWP4+bwKSJj3B33+OqrRM2P7bwCx114JzLBpZEoBZfy852XNJ9HBUT4nn50T9Sv3Y1enW+jIGTXmfOos/p3PoCJo+4gbZ9jn3Y0hffbeDynjmhfdmFtdm4ZSeG8cK4W8naf4DBj75B+vZfj/lzpGRlZGQwLXUKU6ZND2v/vXv20C/lXgYMfpCEhJwZgg+NHsO4sWOYOvkpWlzRkri4MgCcXa8eM2a+AsCK5ctISqqMc44B/VKIjY2l/4DBnJJY6BtVjgs+zFnNDCuqnbsz+GD591x1aX1uaHcxcxZ9DsBrCz8r8GZYQdK3/UrVxJynNVVNrMCWfMJz8O1tGJs6n6F3XM3oKW8z8+1l3NWjxbFehnggbf06NmxIo1unDlzduiWbN2+ie5dObN2yJc++WVlZPJByL9e0bU+r1v//aNNaybWZOm06s159nTbXtKV6jRqHHeecI3XqZO7ocxdTn3qCu+7uS7t2f+ClFzX55X9K480wARIrJVAxIWdEQLmycbS8+Cy+XbuZjVt20uyiOgC0aFKXNevyfkMdzT/e/5Ib218MwI3tL2be4i8O235j+4tZ8OHX/PJrBuXLlSE725Gd7ShfLq4ErkpKWp26Z7H4w0+Yv/A95i98jypVqjJr9uskJiUdtp9zjpHDh5KcnMzNt9x62LZt27YBkJ2dzbSpk+l6XffDts+d8wbNm19OhYoVycjMxEIhLBQiMyPD24srRfwYtJqCG4aqiRWYNuomYkIhQiHjtYUrmf/hV+z8dS8TBnQhNjbEb7/t556Hc+4GV0uqyFPDr+favpOBgqfjTnx2ITMeuY2eHZuyfuMObhj4/w8Gii8Xx43tL6bdXTldEY/PeI+ZE29nX9Z+eg55LuJ/B5LXoP4PsHzZp/zyyw5at2zOnXf3pVPnrvnum56+mYeGD+PJKdP4bOUK5s19kzp169KtUwcA+qY8QLPml7Pg7XnMmvkSAFe2ak3HazsfPEdGRgZz33zjYNfEzT1vpV/KvcTFxTFuQtHuDwSZH7sONAVXokJTcCU/JTEFt+Hwf4adOV+MauWPKbgiIqVJqRx1ICJSmvgwZxW0IhIskbzJFS4FrYgEiroOREQ85sOcVdCKSLCoRSsi4jEf5qyCVkSCRS1aERGPadSBiIjHfNigVdCKSLCo60BExGM+zFkFrYgEi1q0IiIeU9CKiHhMow5ERDzmwwatglZEgkVdByIiHvNhzipoRSRYQj5MWr0FV0QCpSTfgmtm080s3cy+OmTdBDNbbWZfmNkbZnZSoTUd2yWJiPhLyMJfwvAc0OaIdQuBc5xzDYHvgCGF1lTEaxAR8TUzC3spjHPuA2D7Eevedc7tz/1yCVC9sPMoaEUkUMyKslhvM1t+yNK7iB93GzC/sJ10M0xEAsUI/2aYcy4VSC3W55gNBfYDLxa2r4JWRAIlEhPDzKwn0A640jnnCttfQSsigeL1FFwzawMMAi53zu0N5xgFrYgESkmOozWzmUALINHM0oAR5IwyKAsszL2htsQ51+do51HQikiglOR8Bedcj3xWP1PU8yhoRSRQ9KwDERGP+TBnFbQiEiwxPkxaBa2IBIq6DkREPObDFywoaEUkWNSiFRHxmA9zVkErIsGiFq2IiMdifNhJq6AVkUDxX8wqaEUkYPz4zjAFrYgEig9zVkErIsGim2EiIh7zYc4qaEUkWDTqQETEY8dl10HFRi28/ggRkYP8+GpvtWhFJFCOyxatiEgk+bCLVkErIsGim2EiIh7zYc4qaEUkWHzYRaugFZFg0bMOREQ8puFdIiIe82GDVkErIsGiUQciIh7zYc4qaEUkWHQzTETEYz7MWQWtiASLug5ERDxmPnw9o4JWRAIl1ocDaRW0IhIoekyiiIjH1EcrIuIxHzZofTktWESk2EJmYS+FMbPpZpZuZl8dsu5kM1toZt/n/l6p0JqO8ZpERHwlJhT+EobngDZHrBsMLHLO1QEW5X59VApaEQmUEBb2Uhjn3AfA9iNWdwCez/3z80DHwmsSEQkQs6Is1tvMlh+y9A7jI6o45zYC5P5eubADdDNMRAKlKKMOnHOpQKpnxeRS0IpIoETgoTKbzayac26jmVUD0gutyeuKREQiqShdB8U0F+iZ++eewJuFHaAWrYgESkk++NvMZgItgEQzSwNGAOOAV8ysF7AO6FrYeRS0IhIoJfljunOuRwGbrizKeRS0IhIoetaBiIjH/BezCloRCRi9ykZExGP+i1kFrYgETMiHz0lU0IpIoPhxcoCCVkQCRaMOREQ85r+YVdCKSMCoRSsi4rEYBa2IiLf8F7MKWhEJGB82aBW0IhIs4byiJtIUtCISKGrRioh4zNSiFRHxlh9HHfhxtlog/KXnRXw1qR2LR7bOs+3Oq+qyaVoXTk4oE4XKxE927dpFv5R76dCuDR3bX81/Pv8s2iWVehF4lU2RqUXrkZc//onp//ovf7ut8WHrT60UT/P6lUnbtidKlYmfjB87hst+14xJjz1O1r59ZGRmRrukUs+HDVq1aL2y5Put/LJnX571o647j9Gzv8S5KBQlvrJ7925WrFjGtZ27ABBXpgwVKlSIclWlnxXhV6SoRRtBV51XjY07MvgmbWe0SxEfSFu/nkqVTmb40CF8++1q6jdowMDBQylfvny0SyvVfPiURLVoIyW+TAwp19Rj/Nyvo12K+MSBA/tZveobunbvwSuvzSE+Pp7pT6dGu6xSL2QW9hKxmiL2Sce5mkkncHpied4b3pplY6+mWqV43h3WiqQKZaNdmkRJlSpVqVKlKg0bngdA66vasHrVN1GuqvRT18FxbPWGXZzTb97Br5eNvZrfj1nE9t15+3Hl+JCYlESVqlVZ++MPnFErmaVLPiG5du1ol1Xq+bHrQEHrkcl/bMKldZM4OaEsK8dfw4S53zDzo7XRLkt8ZvCDf2LIoP5kZWVRvXoNRj08NtollXp+nLBgzuPb31X/OFv31yWPtZO7RLsE8aFysceekh99vyPszPldnUoRSWW1aEUkUPzXnj2Gm2FmdutRtvU2s+Vmtnzv6oXF/QgRkSKLMQt7iZRjGXXwUEEbnHOpzrlGzrlG5c/OOwVVRMQzVoQlQo7adWBmXxS0CahS8uX4U9nYEHMGtqBMbIjYGGPeig1MmPsNAzs0oM351ch2sHXXb9z37DI278wM61igwOMb1z6FR268gH37s+mTupS1W/ZQIT6OqXdcTI/HPorGX4HkY/iwIXzw/mJOPvkUXn9z3mHbnn/2GR6dOJ7FH31CpUonH7Zt08aNDB0ykG3btmIWokvXbtxwU08Avl29modHjWDv3r2ceuppjB0/kYSEBD5buYIxo0dSJq4M4yY8yuk1a7Jr1y4G9rufyalP+/I9WdFS6m6Gmdlm4PfAjiM3AR87504t7AOCcjOsfNkY9v52gNgYY+7AKxj28ud89/MudmfuB6BXyzOpe+qJDJqR96Eg+R278oftJJSLzff4Z+5sysOvfUmNU8rT8pyqjHz1C0Z2bcg7//mZT77bGtHr9koQboatWL6M8uXLM3TIoMOCdtPGjYwcPoy1P/7AzFdfyxO0W7aks3XLFurVb8CePbvp3rUzjz3+JLXPPJPru3XmgQGDaNS4CW+8PpsNaWncc28K9993DykP9OfnDRv490cf0n/gYCaOH0eLK1rSqHGTSF+6Z0riZtinP+wMO3OaJFeMSCoX1nUwD0hwzv10xLIWWOx5dT6y97cDAMTF5LRMneNgSEJOmFLAP29+x0LBx+8/kE25uBjiy8SQdSCbmkknUPWk+MCEbFBc1KgxFSpWzLN+wiNjub/fgAJbmUlJlalXvwEAJ5yQQHJyMunpmwFYu/ZHLmqU8yCipk0vY9HCdwGIjY3lt8xMMjMziI2NZf26daSnbw5UyJYUH/YcHL3rwDnX6yjbri/5cvwrZPDun1pRKymBZxf/l89+3A7A4I4N6Nq0Jr9mZNF54vtFOrag4x+fv5qJN19I5r4D3DN9GSO6NOSRNzV1tzRY/N4iKlepzFlnnx3W/hs2pLF61SrOzZ0ddmaduiz+1yKuaNmKd99ZwKZNGwHodfsdjBo5nLJly/LncROYNPER7u57n2fXUar5r+dAU3DDle2g1ah/csHAf3DBGZU4+9ScpyyNm/M1Fw16m9eWruO2lmcW6diCjv96/U7ajv0XnSd9QM3EE9i0MwMzmNr7Yp7o1ZjEEzVt148yMjKYljqFu+4JLwD37tlDv5R7GTD4QRISEgB4aPQYZs18ie5dO7F37x7i4nKeWXx2vXrMmPkKzzz3Amlp60lKqoxzjgH9UhgyqD/btuqnnf/Rsw4CYFdGFh9/t4Urzql62Po3lq6n7YWnFevYox2f0rYef5m3in7t6zNh7je8tmQdt1+Zf6BLdKWtX8eGDWl069SBq1u3ZPPmTXTv0omtW7bk2TcrK4sHUu7lmrbtadX6qoPrayXXZuq06cx69XXaXNOW6jVqHHacc47UqZO5o89dTH3qCe66uy/t2v2Bl158wfPrKy382HWgoA3DKQllqBAfB0C5uBDN6lVhzaZfqVU54eA+vz+/Gms2/Rr2sUChx193aU3++eVGdu7NIr5MDNnOke0gvozmmfhRnbpnsfjDT5i/8D3mL3yPKlWqMmv26yQmJR22n3OOkcOHkpyczM23HD4cfdu2bQBkZ2czbepkul7X/bDtc+e8QfPml1OhYkUyMjOxUAgLhcjMyPD24kqTEkxaM7vfzL42s6/MbKaZlStOSfqODUPlivE8flsjYkI5P27MXZ7Gwi828nSfSziz6olkO0fatr0MnLESgCoVy/Foz4u44fF/F3gswNBO5+R7POQ8VrFb05pc99iHAExd+D3P9GnKvgPZ3DltaeT/EiSPQf0fYPmyT/nllx20btmcO+/uS6fOXfPdNz19Mw8NH8aTU6bx2coVzJv7JnXq1qVbpw4A9E15gGbNL2fB2/OYNfMlAK5s1ZqO13Y+eI6MjAzmvvkGU6ZNB+DmnrfSL+Ve4uLiGDdhksdXW3qU1PAuMzsNuBeo75zLMLNXgO7Ac0U+l551INEQhOFdUvJKYnjX5+t+DTtzzj/9xAI/LzdolwDnAbuAOcDjzrl3i1qTug5EJFCK8nLGQx8XkLv0/t95nHMbgInAOmAjsLM4IQvqOhCRgClK14FzLhXI97UWZlYJ6ADUAn4BXjWzG51zM4pak1q0IhIoJfi68VbAj865Lc65LOB14NLi1KSgFZFAKcFBB+uAS8ysvOVM87sSWFWcmtR1ICLBUkIDZJ1zS81sNrAS2A98RgHdDIVR0IpIoJTk07uccyOAEcd6HgWtiASKXs4oIuI1Ba2IiLf8+OBvBa2IBIofXzahoBWRQPFhzipoRSRgfJi0CloRCZRIPtA7XApaEQkU/8WsglZEgsaHSaugFZFA0fAuERGP+bCLVkErIsGioBUR8Zi6DkREPKYWrYiIx3yYswpaEQkWtWhFRDznv6RV0IpIoOjB3yIiHlPXgYiIxzS8S0TEa/7LWQWtiASLD3NWQSsiwaI+WhERj5kPk1ZBKyKB4r+YVdCKSMD4sEGroBWRYNHwLhERj6lFKyLiMQWtiIjH1HUgIuIxtWhFRDzmw5xV0IpIwPgwaRW0IhIo6qMVEfGYHx/8HYp2ASIiJcqKsBR2KrOTzGy2ma02s1Vm1rQ4JalFKyKBUsJdB38FFjjnuphZGaB8cU6ioBWRQCmp4V1mVgFoDtwC4JzbB+wr1rmccyVTlRTKzHo751KjXYf4i/5fRI+Z9QZ6H7Iq9X//FmZ2PpAKfAOcB6wA7nPO7Sny5yhoI8fMljvnGkW7DvEX/b/wJzNrBCwBLnPOLTWzvwK7nHN/Kuq5dDNMRCR/aUCac25p7tezgQuLcyIFrYhIPpxzm4D1ZnZW7qoryelGKDLdDIss9cNJfvT/wr/6Ai/mjjj4Abi1OCdRH62IiMfUdSAi4jEFrYiIxxS0EWJmbczsWzNbY2aDo12PRJ+ZTTezdDP7Ktq1iLcUtBFgZjHAk8DVQH2gh5nVj25V4gPPAW2iXYR4T0EbGU2ANc65H3Kn8c0COkS5Joky59wHwPZo1yHeU9BGxmnA+kO+TstdJyLHAQVtZOT3mAuNqxM5TihoIyMNqHHI19WBn6NUi4hEmII2MpYBdcysVu4Mk+7A3CjXJCIRoqCNAOfcfuAe4B1gFfCKc+7r6FYl0WZmM4FPgLPMLM3MekW7JvGGpuCKiHhMLVoREY8paEVEPKagFRHxmIJWRMRjCloREY8paEVEPKagFRHx2P8BRT7CnVy7dVEAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## Support Vector Classifier\n", + "svc = Pipeline([('vect', CountVectorizer()),\n", + " ('tfidf', TfidfTransformer()),\n", + " ('clf', SVC()),\n", + " ])\n", + "\n", + "svc.fit(X_train, y_train)\n", + "\n", + "y_pred_SVC = svc.predict(X_test)\n", + "\n", + "print(classification_report(y_test, y_pred_SVC))\n", + "# plot confusion matrix\n", + "conf_matrix = confusion_matrix(y_test, y_pred_SVC)\n", + "plot_conf_matrix(conf_matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " neg 0.65 0.77 0.71 22\n", + " pos 0.69 0.55 0.61 20\n", + "\n", + " accuracy 0.67 42\n", + " macro avg 0.67 0.66 0.66 42\n", + "weighted avg 0.67 0.67 0.66 42\n", + "\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVoAAAD4CAYAAACt8i4nAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAaO0lEQVR4nO3deXwV5b3H8c/v5LAk7BCCLCo7bkVFEL0KsmiLggJ1BamI0GAVW+qK1QvXtbTIrVYRGpFNFEVERNywKFAFFVzBIorKJkgCIQEkCYQ894+kuYAhOSdkzpkM3zev50Uy58wzvxHy9eGZZ+aYcw4REfFOKN4FiIgEnYJWRMRjCloREY8paEVEPKagFRHxWNjrAySeOULLGuRndq54It4liA9VD2NH20c0mZPz6RNHfbxIeB60IiIxZf77h7qCVkSCxWIySI2KglZEgkUjWhERj2lEKyLisVBCvCv4GQWtiASLpg5ERDymqQMREY9pRCsi4jGNaEVEPKYRrYiIx7TqQETEYxrRioh4LKQ5WhERb/lwROu/ikREjoZZ5K3MrmyKmaWb2erDtt9iZmvN7Esz+2tZ/WhEKyLBUrEXw6YBTwAz/rPBzLoDfYH2zrk8M0spqxMFrYgESwVOHTjnlppZ88M2/w4Y65zLK3pPeln9aOpARIIliqkDM0s1s5UHtdQIjtAW6GJmH5rZEjPrVNYOGtGKSLBEMaJ1zqUBaVEeIQzUA84BOgGzzaylc+6IH6GjEa2IBEsFXgw7gs3AXFfoI6AASC5tBwWtiASLhSJv5TMP6AFgZm2BqsD20nbQ1IGIBEsFrjows1lANyDZzDYDY4ApwJSiJV/7gMGlTRuAglZEgqZiVx0MOMJLg6LpR0ErIsGixySKiHjMh7fgKmhFJFg0ohUR8ZhGtCIi3rKQglZExFOmqQMREY/5L2cVtCISLBrRioh4TEErIuKxkC6GiYh4zH8DWgWtiASLpg5ERDymoBUR8ZiCVkTEYwpaERGPWUhBKyLiKY1oRUQ8pqAVEfGa/3JWQSsiwaIRrYiIxxS0x5BJY67l4q6nkZG5m45XPgzAM2OH0KZ5IwDq1koka3cO51wzNp5lSpxdfFEPkmrUICEUIiGcwKzZc+NdUqWnZx0cQ5559QMmvbCEyQ9cV7ztN6OmFn899tb+ZO/JiUdp4jOTp06nXr368S4jOPw3oMV/0R8Q73/yLZnZe4/4+uUXdWD2mx/HsCKRY4OZRdxiRSPaODivQyu2Ze7m240Z8S5F4s3gxt8Oxcy44sqrueKqq+NdUaWnOVoB4KpeHXnxzZXxLkN8YPrMWaSkNGLHjh3cOGwILVq25KyOneJdVqXmx6DV1EGMJSSE6NvjdOa89Um8SxEfSEkpvDjaoEEDelx4EatXfRHniio/C1nELVYUtDHWo3M7vl6/jR/Ss+JdisTZ3r17+emnPcVfL1/2Pq1bt4lzVZWf5miPIdP/fD1dzmpDct2arHvzAR6Y9DrT5y3nyl+dpYtgAkDmjh388fc3A5B/4ACX9O7DeV26xrmqyq8iA9TMpgB9gHTn3GmHvXY7MA5o6JzbXmo/zrkKK6okiWeO8PYAUintXPFEvEsQH6oePvrFWS1GvhZx5nz/aO9Sj2dmXYE9wIyDg9bMjgcmAycBZ5UVtGWOaM3sJKAv0BRwwBZgvnNuTVn7iojEXAXOCDjnlppZ8xJe+htwJ/BKJP2UOkdrZncBz1NY+kfAiqKvZ5nZqFL2SzWzlWa2Mn/7l5HUISJSIaKZoz04q4paagT9Xwb84Jz7PNKayroYNhTo5Jwb65ybWdTGAmcXvVYi51yac66jc65jOPnUSGvxvVDIWD7rLl567EYA6tVOYsHEEax6ZTQLJo6gbq3EiPcFaN+2KUum38YHz4/ivWfvpOOpJwJw7ukt+eiFu3lv5h20PD4ZgDo1E5k/4WYPz06iNfreu+nW5Vx+3bdP8baFb71B/8t6c8ZpJ/Hl6lVH3PfZZ6bz67596H9Zb2bOmFa8PTsri+HDhnDpxb9k+LAh7MrOBuDTTz7miv6XMvCqy9m4YQMAu3bt4sbfDsXr6b/KJhSyiNvBWVXU0krr28ySgHuA0VHVVMbrBUCTErY3LnrtmDJiYHfWfr+t+Pvbh1zE4o/W8ou+97P4o7XcPuSXEe8L8NDIfjyU9gbnXDOWByYu4KGR/QD4w296MOCOyYx+/FVSr+wCwN2pvfjrlLcq/qSk3Pr2+zUT/zH5kG2tW7flb489Xupa2G+++ZqX5rzIs8+/yItzX2HpksVs2LAegCmT0zi787m8+sZCzu58Lk9PLvy5nzF9KuMffZxbRt7K7BdmAZA26UmGpQ735brRePJ41UEroAXwuZmtB5oBn5jZcaXtVFbQjgQWmdkbZpZW1N4EFgF/KE+VlVXTlLr0Ov9Upr68rHhbn27tmfnqhwDMfPVDLu3ePuJ9AZyD2jWqA4Uj1q0ZhaOX/fkHSKxWhaTEKuzPP0CLZsk0SanLex+v8+LUpJzO6tiJ2nXqHLKtZatWNG/RstT9vv/uW9qffjqJiYmEw2HO6tiJd/75NgDvvruIy/r1A+Cyfv14951/AhAOh8nLzSU3N4dwOMymjRtJT99Gx05nV/yJVXJmkbdoOedWOedSnHPNnXPNgc1AB+fcj6XtV+rFMOfcm2bWlsKpgqYUzs9uBlY45w5EX2blNe6Oy7nnsXnUTKpevC2lQS1+3L4LgB+376Jh/VoR7wtwxyNzeHXCzfz5j/0JhYzu148vfP+UhUy4dwA5efsZeu8M/nxrf+57coFHZyax1rp1Wx5/7FGysnZSrVp13vvXUk45tfCCduaOHTRsmAJAw4YpZGZmAjB02HDu/5/RVKtWjYfHjmP8I3/h5luOqbFOxCp4edcsoBuQbGabgTHOuaej7afMVQfOuQLgg6grDJCLu5xGeuZuPl2ziS5nRbegvLR9U6/swp3j5zJv0WdcftGZTBxzLb1vfIIvvv6BCwYXhu55HVqxNSMbw3hm7BD25x9g1P++THrm7go7P4mtlq1aMWToMIYPu4GkpCTatmtHOCGh1H1OOvlkZs6aDcDHK1fQsGEKzjnuuG0k4XCY2+8YRYPk5FiU73sVOZPinBtQxuvNI+lHd4ZF4NwzWtLngl/w1Wv3MWPsELp1asuUB68jfcdujkuuDcBxybXJKCH8jrQvwLV9OjNv0WcAvPT2p8UXww42algv/pz2BvcMv5gHJr3OrNdXcNOAbp6dq8TGry+/khfmvMzUGc9Sp05dTjix8M++foMGZGSkA5CRkU79+oc+PtE5R9o/JjL8xpv4x5NPcNPNt9Cnz2U89+wzMT8Hv4rmYljMaorZkSqx0Y/Pp3Wv/+ak3mO4btRUFq/4mhvuncFrS1Yx6NLOAAy6tDMLFv/8PvUj7QuwNSO7eJTb7ey2rDvsaV6DLu3Mm//6kqzdOSRVr0pBgaOgwJFUvYrHZyxe27FjBwBbt2xh0T8XcvElhSsXunXvwfx58wCYP28e3bv3PGS/+fNepmvXC6hdpw45ublYKISFQuTm6NnG/+HHoNUtuEfhkalvM/MvNzC437ls2rqTa+8snLpp3LAOT44eSP9bJpa6/80PPMe4O64gHA6Rl5fPiAdnFb+WWL0Kgy7tTJ+bCu+g+vvMd5j1yDD27c9n8N3TPDsnidxdt9/KyhUfkZW1k4t6dOV3N99CnTp1GfvwA+zMzGTETcNp1+5kJj31NOnp27hv9L1MmPQUALeNvIXsrCzC4TB/undM8UW1G4alcsetI5k3dw7HNW7MI//7WPHxcnJymP/Ky0x6agoA1w0ewm0jf0+VKlUYO2587P8D+JQfF2HoFlyJC92CKyWpiFtw24/+Z8SZ88X9F8YkljWiFZFA8eO6YgWtiASKD3NWQSsiwRLLi1yRUtCKSKBo6kBExGM+zFkFrYgEi0a0IiIe82HOKmhFJFg0ohUR8ZhWHYiIeMyHA1oFrYgEi6YOREQ85sOcVdCKSLBoRCsi4jEFrYiIx7TqQETEYz4c0CpoRSRYNHUgIuIxH+asglZEgiXkw6RV0IpIoOhimIiIx3yYswpaEQkWXQwTEfGYD3OWULwLEBGpSBbFrzL7MptiZulmtvqgbePM7Csz+8LMXjazumX1o6AVkUAJWeQtAtOAXodtexs4zTnXHvgauLvMmqI8BxERXwuFLOJWFufcUiDzsG0LnXP5Rd9+ADQrs6bynIiIiF+FzCJuZpZqZisPaqlRHu4G4I2y3qSLYSISKNFcDHPOpQFp5TuO3QPkA8+W9V4FrYgESiyWd5nZYKAP0NM558p6v4JWRALF65w1s17AXcAFzrm9keyjoBWRQEmowKQ1s1lANyDZzDYDYyhcZVANeLto9PyBc+7G0vpR0IpIoFTk1IFzbkAJm5+Oth8FrYgEip51ICLiMT3rQETEYz7MWQWtiASLRrQiIh5L8OEkrYJWRALFfzGroBWRgNFnhomIeMyHOaugFZFg0cUwERGP+TBnFbQiEixadSAi4rFjcurgmWn3eH0IqYRue3VNvEsQH5rQ/+Sj7sOPHxujEa2IBMoxOaIVEYklH07RKmhFJFh0MUxExGM+zFkFrYgEiw+naBW0IhIsetaBiIjHtLxLRMRjPhzQKmhFJFi06kBExGM+zFkFrYgEiy6GiYh4zIc5q6AVkWDR1IGIiMfMhx/P6MclZyIi5RYORd7KYmZTzCzdzFYftK2+mb1tZt8U/V6vrH4UtCISKGYWcYvANKDXYdtGAYucc22ARUXfl0pBKyKBErLIW1mcc0uBzMM29wWmF309HehXZk3RnYKIiL+ZRdMs1cxWHtRSIzhEI+fcVoCi31PK2kEXw0QkUKJZR+ucSwPSvKumkIJWRAIlwft/p28zs8bOua1m1hhIL2sHTR2ISKCEsIhbOc0HBhd9PRh4pawdNKIVkUCpyDvDzGwW0A1INrPNwBhgLDDbzIYCG4Ery+pHQSsigVKRd4Y55wYc4aWe0fSjoBWRQNFDZUREPObDnFXQikiw6MHfIiIe8+NSKgWtiARKhM8wiCkFrYgEiv9iVkErIgGjVQciIh7zX8wqaEUkYEJadSAi4i2tOhAR8ZhWHYiIeMx/MaugFZGA0YhWRMRjCQpaERFv+S9mFbQiEjA+HNAqaEUkWI7iI2o8o6AVkUDRiFZExGOmEa2IiLe06uAYtuz1OaxYtAAcdOzZm/N6l/nBmRJAgzo05rTjarI7L5+HFn0PwJlNatH75IY0qlWVcYvXszErN85VVm4+zFlf3hYcONs2fseKRQv43cOTGDFuMms/Wc72rZvjXZbEwQcbspjw/qZDtm3ZnUfah5tZt31vnKoKFrPIW6woaGMg/YeNHN/mFKpWq05CQpjmJ5/Bvz/6V7zLkjhYtyOHn/YfOGTbtt37SN+zL04VBY9F8StWFLQx0Oj4Fqxf8wV7d2ezLy+Xrz/9gOwd6fEuSySQQhZ5ixXN0cZASrMT6dp3AFMevJ1q1RM57sRWhEIJ8S5LJJD0CQvHsI49etOxR28AFj73FLUbNIxzRSLB5MflXZo6iJE92TsByNq+jS8/Wsrp5/WMc0UiwaSpg2PYc+NHs3f3LhLCYS4bOpLEmrXiXZLEwZCOTWjTsAY1qybwYK/WvLYmg737Crjy9EbUrJrA7849ns3ZuUxYtqnszqREFTmiNbM/AsMAB6wChjjnol5/p6CNkdT7H493CeIDU1duKXH751t3x7iS4KqoKVozawr8HjjFOZdjZrOBa4Bp0faloBWRQKngGYEwkGhm+4EkoOT/U5ah3HO0ZjaklNdSzWylma18e87M8h5CRCRqCWYRt4Ozqqil/qcf59wPwCPARmArkO2cW1iemo5mRHsfMLWkF5xzaUAawJzPt7qjOIaISHSiGNIenFU/68asHtAXaAFkAS+a2SDnXNSjx1KD1sy+ONJLQKNoD1ZZZW1PZ86Eh9mTlYlZiE4X9uG/LrmCVcsX886L08j4YQM3PjyRZq1OOmIfBQUHeHLUcGrXT+a6UWMBePv5p1mz8n3MjJp16nH5TaOoXT+ZDV+t4pXJfyNcpQpX/+G/aXBcM3J+2s3zj97P9X/6qy8/E+lYVDcxzOCzmlC7ehjn4L31O1n8beHqkgta1uOClvUocLD6xz3M+/LnN6iU9NwDgKa1q3HNmcdRLSFE5t79TFu5hdz8AlrWT+SaM44jv8AxdcUPZPy0n8QqIW7o1FQXzw5SgRfDLgS+d85lAJjZXOC/gIoNWgrD9FfAzsO2G7As2oNVVqGEBC7+zU00bdmWvJy9TBiVSuv2HWl0fAsG3n4/r6SNL7OPZa+/RMOmJ5KX81Pxti6XXcNF1wwtfv2dOdPpl3ob7y2YzcDb7icr40c+XDifS667iXdfeoZu/a9VyPpIQQHMXZXOpuxcqoVD3NW9OV+l/0StamHaN67Fw+98T36Bo2bVkm9O+WBDFku+3cl1HRsfsv3aDo2ZuyqddTv2cu6JdbiwTQMWrMmgZ5v6PPXhZhokVaVLi3rMXZ3Oxe2SeWvtjlicbqVRgT8iG4FzzCwJyAF6AivL01FZc7QLgJrOuQ2HtfXA4vIcsDKqXa8BTVu2BaBaYhINm57IrsztpDQ7kYZNTihz/+wd6az95AM69ux9yPbqSTWKv96fl1scogkJYfL35bEvL5eEhAR2/PgDuzIzaHHKGRV3UnLUduXlsym7cKVPXn4B23bvo271KnRtUY+FX28nv6Bw1mzPvgMl7l/Scw8AUmpWZd2OwgfMrEn/iTOaFC4FPFAAVRJCVA0bB5wjuUYV6iSGi98rhSyKVhrn3IfAHOATCpd2hTjCNENZSh3ROueGlvLawPIcsLLbmb6Vrd9/Q7PWJ0e8z2vTnqDXoOHk5fz8B2LhrMl8tvQtqiXVYNiYRwHo2n8g89IeIVy1GleO+BNvPDORC68+4h+F+ED9pCo0q1Od9Ttz6H9aCq0bJHHZKSnsLyhg7qr0qB59uHVXHu0b1+SLrXvo0LQ29RILf0wXfr2dgWc2Zv+BAqav3EL/X6Sw4N8ZXp1S5VWB/+hzzo0BxhxtP7ozLAp5uXt5bvwYel8/4pDRaGm++ngZNerUo2nLdiW+/ssBw7hz4ouccf5FLH/zZQCaNG/DjQ9NZNiYR8nctpXa9ZLBOZ7/233M/vuD7MnKrLBzkqNXLcH47dlNmbNqG7n5BYRCkFQ1gXFL1vPy6nSGnt00qv5mfrKVri3qcVe35lQPh8h3hSPjzdl5PLJkPY+9t5HkGlXIzs3HzLihU1MGn9WEWtX0/AwofNZBpC1mNcXsSJXcgfx8nhs/htO7XMipnbtGvN+Gtav5auX7jLv5al549H6+W/0ps//+4M/e1/78nnz54ZJDtjnnWDz3Gbpffh3vzJlOz6uu54yuF7HsjblHfT5SMUIGwzo3Y8XmXXy+pfCmg6ycfD4r+nrDzlyc44jztCXZtmcfTyzbxF8Wr2fl5l1s37P/Z+/p1S6ZN77aziUnJfPamgxWbMqmW6v6FXNSlVxFTR1UJN2wEAHnHHMn/ZWUpidwfp+rotr3VwNT+dXAwqV53335Ke+9+gJX/f5eALZv3Uxy42YAfLVy2c/mez9d8ibtOpxDYs1a7MvLxSyEWYj9eXoCv18M6tCYH3fv4511//+vjM+37KZtwyS+2b6XlJpVCYfsiPO0JalZNYE9+w5gQK92DXhv/aHXos85oQ6rt+0hZ38BVRJCOKAAqJqgC6VAbBM0QgraCGxYu4rPli6k0QktefyOwrnSXw74Lfn5+1kw5TF+2pXNjLF307h5a4bcM45dmdt5+R/jGHz3X0rtd+GzaWRs3YhZiLrJjeibemvxa/vycvlkyVsMuecRAM7vcxXPjR9NQrhwyZfEX6sGiXQ+oS4/ZOdyd/cWAMz/dzrLN2QxqEMT7unZgvwCmPFx4c1EdaqHufbMxjy5vHApVknPPVi+IZuOx9ema8t6QGFoL9+QXXzMKglG5xPq8Pj7GwF4Z90Ohp3dlAMF7oi39x5r/Pj0LnPO2/sJdMOClOTd77LiXYL40IT+Jx91Sn62cXfEmXPGCbViksoa0YpIoPhxqbmCVkQCxY9TBwpaEQkUjWhFRDzmw5xV0IpIwPgwaRW0IhIomqMVEfFYLD90MVIKWhEJFgWtiIi3NHUgIuIxLe8SEfGYD3NWQSsiAePDpFXQikigxPKB3pFS0IpIoPgvZhW0IhI0PkxaBa2IBIqWd4mIeMyHU7QKWhEJFgWtiIjHNHUgIuIxjWhFRDzmw5xV0IpIsGhEKyLiOf8lbSjeBYiIVKSQRd7KYmZ1zWyOmX1lZmvM7Nzy1KQRrYgESgVPHTwGvOmcu8LMqgJJ5elEQSsigVJRy7vMrDbQFbgewDm3D9hXnr40dSAiwWKRNzNLNbOVB7XUg3pqCWQAU83sUzObbGY1ylOSglZEAiWKnMU5l+ac63hQSzuoqzDQAZjonDsT+AkYVZ6aFLQiEihmkbcybAY2O+c+LPp+DoXBGzUFrYgEiplF3ErjnPsR2GRm7Yo29QT+XZ6adDFMRAKlglfR3gI8W7Ti4DtgSHk6UdCKSKBU5PIu59xnQMej7UdBKyKBoqd3iYh4TM86EBHxmIJWRMRjmjoQEfGYRrQiIh7zYc4qaEUkYHyYtApaEQkUzdGKiHgskgd6x5qCVkSCRUErIuItTR2IiHjMj8u7zDkX7xqOGWaWetiDhUX09+IYoOfRxlZq2W+RY5D+XgScglZExGMKWhERjyloY0vzcFIS/b0IOF0MExHxmEa0IiIeU9CKiHhMQRsjZtbLzNaa2TozGxXveiT+zGyKmaWb2ep41yLeUtDGgJklABOAi4FTgAFmdkp8qxIfmAb0incR4j0FbWycDaxzzn3nnNsHPA/0jXNNEmfOuaVAZrzrEO8paGOjKbDpoO83F20TkWOAgjY2SnrMhdbViRwjFLSxsRk4/qDvmwFb4lSLiMSYgjY2VgBtzKyFmVUFrgHmx7kmEYkRBW0MOOfygRHAW8AaYLZz7sv4ViXxZmazgOVAOzPbbGZD412TeEO34IqIeEwjWhERjyloRUQ8pqAVEfGYglZExGMKWhERjyloRUQ8pqAVEfHY/wFDWkV0EXWEnwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## Linear Support Vector Classifier\n", + "linear_svc = Pipeline([('vect', vectorizer),\n", + " ('clf', LinearSVC()),\n", + " ])\n", + "\n", + "linear_svc.fit(X_train, y_train)\n", + "\n", + "y_pred_LinearSVC = linear_svc.predict(X_test)\n", + "\n", + "print(classification_report(y_test, y_pred_LinearSVC))\n", + "# plot confusion matrix\n", + "conf_matrix = confusion_matrix(y_test, y_pred_LinearSVC)\n", + "plot_conf_matrix(conf_matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " neg 0.64 0.64 0.64 22\n", + " pos 0.60 0.60 0.60 20\n", + "\n", + " accuracy 0.62 42\n", + " macro avg 0.62 0.62 0.62 42\n", + "weighted avg 0.62 0.62 0.62 42\n", + "\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVoAAAD8CAYAAAA2Y2wxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAb6klEQVR4nO3deXxU1f3G8c93sgGiKEvCjrKoIKWLaEVakUWhQFVUimgrWmhccEFEgR8WFaWgYBerIlARixKKIoKgCOKCtVJBka3ijhDFBAFBSQIkc35/hKaBhGQmzJ25uT5vX/dF5s6ce7+34tOTc889Y845RETEO6FEFyAiEnQKWhERjyloRUQ8pqAVEfGYglZExGMKWhERjyloRUSOwMxmmFmumW0o570RZubMrH5lx1HQiogc2Uyg1+E7zawZcB6wJZKDKGhFRI7AObcC2FnOW38CbgcieuIrOZZFlafmj2/Qo2dSxq5VDyW6BPGhGsnY0R4jmswpeO/ha4DMUrumOeemVdTGzC4AvnDOrTWLrFzPg1ZEJK4s8l/UD4ZqhcF6yKHNagFjgPOjKUlDByISLGaRb9FrBZwErDWzzUBT4F0za1hRI/VoRSRYoujRRss5tx5ILzlVcdh2dM59XVE79WhFJFhi2KM1syzgLeAUM8s2s8FVKUk9WhEJllBSzA7lnBtYyfsnRnIcBa2IBIuHQwdVpaAVkWCp2k0uTyloRSRY1KMVEfGYerQiIh5Tj1ZExGMxnHUQKwpaEQkW9WhFRDwW0hitiIi31KMVEfGYZh2IiHhMN8NERDymoQMREY9p6EBExGPq0YqIeEw9WhERj6lHKyLiMc06EBHxmHq0IiIe0xitiIjH1KMVEfGYerQiIh5Tj1ZExFsWUtCKiHjKNHQgIuIx/+WsglZEgkU9WhERj/kxaP03aiwichRCoVDEW2XMbIaZ5ZrZhlL77jGzdWb2npktNbPGldZ0lNckIuIvFsVWuZlAr8P2TXLOdXDO/QhYBIyt7CAaOhCRQInl0IFzboWZnXjYvj2lXh4DuMqOo6AVkUCJxxitmY0HrgR2A10r+7yGDkQkUMwsmi3TzFaX2jIjOYdzboxzrhnwFHBDZZ9Xj1ZEAiWaHq1zbhow7ShONxtYDNxZ0YfUoxWRQLGQRbxV6fhmbUq9vADYVFkb9WhFJFBiOUZrZlnAuUB9M8umuOfa28xOAcLA58C1lR1HQSsigRLjWQcDy9n9WLTHUdCKSLD478EwBa2IBIsfH8FV0IpIoPgxaDXrwCOP3nkFny+fwOqn/6/Me8N+0538NQ9R7/hjElCZ+MmsJ2bS74I+XHxhX0aOGM6+ffsSXVK1F8u1DmJWU9zO9D0z6/mVXDj04TL7m2YcT7ezTmXLtp0JqEr8JCcnh9lP/Z2sufN4dsEiwuEilrywONFlVX+xXesgJhS0Hnnz3U/YuTuvzP77R1zCmL88h3OVPh4t3wNFRUXsKyigsLCQ/IICGqSnJ7qkai+aJ8PiRWO0cdSnyw/4Mvcb1n/4RaJLER/IyMhg0FW/pWePrtSokUansztzduefJbqsak9jtN9jNWukMHJwT8ZN0a+GUmzP7t28+spyXli6nGWvvkF+fj6Lnl+Q6LKqPT/2aBW0cdKyaQNaNKnH2/8YzabFd9Mk/Xjemj2SjHrHJro0SZCVK/9Fk6ZNqVu3LikpKXTvcT5r16xJdFnVnteP4FaFhg7iZOPHX9Ki++iS15sW303nK+5nxzd7E1iVJFLDRo1Zt3Yt+fn51KhRg3+vfIt27dsnuqxqz49DBwpajzwx4Sp+fnob6h9fm4+X3MM9j77AE8+9leiyxEc6dPgh553fk8v69yMpKZlT27bl0v4DEl1WtefHoDWv737X/PENur0uZexa9VCiSxAfqpF89JOuThq2OOLM+ezPfeKSypX2aM3sVOBCoAnFX9nwJbDQOfe+x7WJiETPfx3aim+GmdlIYA7Fpb8NrDr4c5aZjaqgXcmq5YVfb4xlvSIiFfLjrIPKerSDgdOccwdK7zSzPwIbgYnlNSq9ankQhg7SUpN5+bFhpKYmk5yUxPyX13Dvoy8w9vo+9O3SgbBzbN/5LZl3Psm27bsjagscsX2nH7bkL/83gP0HCrly9ON8uvVr6tSuyaz7fssF5TxtJokx9o7RrHj9NerWrcezCxYB8MGmTdw77k7y8vJo3LgJE+6fTO3atcu0ffONFdw3cTzhojD9LunP4N8Vf4PKlIf/yrxn5lL3hLoA3DhsOD8/pwtr3n2H8ffcRWpKKhMn/ZHmLVqwZ88ebr/1FqZM+5svxyUTJRTH2QSRqnCM1sw2AT2dc58ftr8FsNQ5d0plJwhC0AIcUzOVvfn7SU4O8cqM4YyY9Azvf/oV3+4tAOD6gV04tWUjbho/J6K2b6/fzLHH1Ci3/ZzJQxjz4AJaNKrH+Z3bMuqP85k4vB+LXl/PP9/5OK7X7ZUgjNG+s3oVtWrVYszokSVBe/mvLmH4bSPpeMaZzH/2Gb7IzuaGm4Yd0q6oqIgL+vRk6vTHycjI4PIBlzJx0h9p1bo1Ux7+K7Vq1WLQ1YMPaXPLzTcwbPgIvvziC9785xuMuH0Uk++fyLldu9HxjDPjdcmei8UYbZvblkScOR9N6hWXVK5sHu0wYLmZvWhm0w5uS4DlwM2eV+cje/P3A5CSnERychLOuZKQBKhVM+2Ij9WW1xY4YvsDhUXUTEuhVs0UDhQWcVLT+jROPz4wIRsUp3c8g+Pq1Dlk3+bNn3F6xzMA6NSpM8uXLS3TbsP6dTRr1oKmzZqRkppKr959eO3V5RWeKzk5mX0FBRQU5JOcnMzWLVvIzc0JVMjGilnkW7xUOHTgnFtiZicDZ1J8M8yAbGCVc64oDvX5Rihk/Gv2SFo1a8DUf6xg1YbiTv5dQ3/JFX3PZPd3+fTKfDCqtkdqP2nGUh6+YyD5+w4w+I6/M2F4P+5+ZJH3FylHrXWbk3nt1eV07daDpS8t4auvtpX5TG5ODg0bNSx5nZ6Rwfp160pez5n9FM8vfI52p7VnxG2jOK5OHQYPuYZxd40lLS2NP0ycxAOT72Pojd+rvk7E/DiMUumTYc65sHNupXNunnPumYM/f69CFiAcdpx12URa97yDju1b0K5VIwDuevh52vzi98x5cTXXDjgnqrZHar/uwy/oMugBemU+yIlN67Ft+24MY9bEq5lx75Wk19XTZH519z3jmZM1m8v6X0xe3l5SUlLLfMZR9jef/4bDrwYMZNGSZcydt4AGDdKZPKn4NsipbdvyZNZcHps5i+zsrTRokI5zjttuHcbokSPY8fXX3l5YNeLHHq0ewY3S7u/yWbH6I84/u90h++e+uIqLuv+oSm0raj9qSC8mTHuRMdf8gnsefYGsF1Zx/cBzj+IKxEsntWzF1OkzmPP0s/Tq3YemzZqV+UxGRkO+2vZVyevcnBzSD67aVa9+fZKSkgiFQlx8aX82rF9/SFvnHNOmTuGaa69n6iMPcf3QG+nb9wJmPzXL2wurRkIhi3iLW01xO1M1Vv+E2tSpXROAGmkpdPvpKXywOYdWzRuUfKZPlw58uDkn4rZApe1//cufsuSNjXzzbT61aqQSDjvCYUetGikxv0aJjR07dgAQDoeZPnUK/QdcVuYzp7X/AVu2bCY7eysH9u9nyQuL6dK1GwDbt+eWfO6Vl1+mdZs2h7Rd+Nx8zjmnC8fVqUN+QQEWCmGhEAX5+R5eVfXix6DVI7gRaFj/OKaP+w1JoRChkDFv2bu8+MYGsiYPoU2LdMJhx5ZtO0tmHDRqUIdHxl5OvxunHLEtwL03XVhueyhe7evXv/wpfa8vvjv/4JOvkDV5CPsPFDJo9My4/28gZY0cMZzVq97mm292cV63c7hu6I3k5+UxJ2s2AN17nMdF/S4BIDc3h7vH3sHDj04nOTmZ0WPGcl3mEMLhIi7qdwmtWxcH6p8emMQHmzZhBo0bN+H3d40rOV9+fj4LF8zn0ekzALhy0NXcOuwmUlJSmDjpgThfvX/5cIhWj+BKYgRhepfEXiymd3UY+3LEmbNuXA9/PIIrIlKd+HHWgYJWRALFhzmroBWRYPHjI7gKWhEJFA0diIh4zIc5q3m0IhIssVwm0cxmmFmumW0otW+SmW0ys3VmNt/Mjq/sOApaEQmUGD+COxPoddi+ZUB751wH4ENg9OGNDqegFZFAiWWP1jm3Ath52L6lzrnCgy9XAk0rO46CVkQCJZpHcEt/G8zBLTPK0/0WeLGyD+lmmIgESjQ3w0p/G0z057ExQCHwVGWfVdCKSKDEY3qXmQ0C+gLdXQTrGChoRSRQvM5ZM+sFjAS6OOfyImmjoBWRQIllj9bMsoBzgfpmlg3cSfEsgzRg2cFzrXTOXVvRcRS0IhIosQxa59zAcnY/Fu1xFLQiEiha60BExGN+fARXQSsigaJFZUREPObDnFXQikiwhHyYtApaEQkU3QwTEfGYD3NWQSsiwaKbYSIiHvNhzipoRSRYDP8lrYJWRAJFY7QiIh7TrAMREY9pHq2IiMd8mLMKWhEJFk3vEhHxmA9zVkErIsGS5MOkVdCKSKBo6EBExGM+nN2loBWRYFGPVkTEYz7MWQWtiASLerQiIh5L8uEgrYJWRALFfzGroBWRgNFaByIiHvNhzipoRSRY/HgzLJToAkREYsks8q3yY9kMM8s1sw2l9vU3s41mFjazjpHUpKAVkUBJClnEWwRmAr0O27cBuBhYEWlNGjoQkUCJ5dCBc26FmZ142L73oz2P50G7a9VDXp9CqqFzJ7+e6BLEh1aO6nLUx4jm13QzywQyS+2a5pybdtRFHEY9WhEJlGh6mgdDNebBejgFrYgEig8fDFPQikiw+PERXM06EJFACVnkW2XMLAt4CzjFzLLNbLCZ9TOzbKATsNjMXqrsOOrRikigxPJ5BefcwCO8NT+a4yhoRSRQtNaBiIjH/DgeqqAVkUDxYYdWQSsiweLHWQcKWhEJFB/mrIJWRIJFN8NERDzmw5xV0IpIsGjoQETEY+bDr2dU0IpIoCT7cCKtglZEAsWP3xmmoBWRQNEYrYiIx3zYoVXQikiwaB6tiIjHknQzTETEWyFN7xIR8ZYPRw4UtCISLJp1ICLiMd0MExHxmA9zVkErIsGihb9FRDzmw9ldCloRCRatdSAi4jH/xayCVkQCRrMOREQ85r+YVdCKSMCEfDjrwI836EREqiwUxVYZM5thZrlmtqHUvrpmtszMPjr45wmR1CQiEhhmFvEWgZlAr8P2jQKWO+faAMsPvq6QglZEAsWi2CrjnFsB7Dxs94XAEwd/fgK4qLLjKGhFJFCi6dGaWaaZrS61ZUZwigzn3DaAg3+mV9ZAN8NEJFCSopje5ZybBkzzrppi6tGKSKDEcujgCHLMrBHAwT9zK2ugoBWRQDGLfKuihcCggz8PAhZU1kBDByISKLH8KhszywLOBeqbWTZwJzARmGtmg4EtQP/KjqOgFZFAieUTuM65gUd4q3s0x1HQikigmA8fwlXQikigRDPrIF4UtHEy64mZPDvvacyMNm1OZtz4CaSlpSW6LImzMb1PpnOreuzKO8AVj60G4IauLflZ63oUFoXJ/qaAexdv4rt9RQmutPryYc5q1kE85OTkMPupv5M1dx7PLlhEOFzEkhcWJ7osSYDF63O4Ze76Q/a9/dkurvjbKn494x227sxjUKfmCaouGOIw6yBqCto4KSoqYl9BAYWFheQXFNAgvdKHSSSA3tu6mz0FBw7Z9/bmXRS54p83fLmH9GP1m87RsCj+iRcNHcRBRkYGg676LT17dKVGjTQ6nd2Zszv/LNFliQ/9skMjXn6/0vnvUgEfrpKoHm087Nm9m1dfWc4LS5ez7NU3yM/PZ9Hzlc5xlu+Zqzo1pzDsWLJRQXs0QmYRb3GrKW5n+h5bufJfNGnalLp165KSkkL3Huezds2aRJclPtK7fQadW9fjzoXvJ7qUas+PQwcK2jho2Kgx69auJT8/H+cc/175Fie1apXossQnzjrpBH5zVjNue2YD+wrDiS6n2gtZ5Fu8aIw2Djp0+CHnnd+Ty/r3IykpmVPbtuXS/gMSXZYkwLgL2vKT5nU4vmYKC68/i+n/3MyVnZqTmmQ8eFkHoPiG2P0vfZTgSqsvPz6wYM45T09QUIi3J5Bq6dzJrye6BPGhlaO6HHVK/vOjXRFnzs/anBCXVFaPVkQCxX/92aMYozWzqyt4r2TV8seme76mrohIiSSziLd4OZoe7d3A4+W9UXrVcg0diEhc+bBLW2HQmtm6I70FZMS+HH8ae8doVrz+GnXr1uPZBYsA+GDTJu4ddyd5eXk0btyECfdPpnbt2mXavvnGCu6bOJ5wUZh+l/Rn8O+Kv5JoysN/Zd4zc6l7Ql0Abhw2nJ+f04U1777D+HvuIjUllYmT/kjzFi3Ys2cPt996C1Om/S3Sb+6UOEg/No07+55KvWNSCDt4bu025q7+gjbpxzCy58mkJocoCjsmLf2I/2z7tkz7+df9lL37Cgk7KAo7rn7iXQDuvbAtzevWAuDYGsl8W1DIlY+/Q4cmx3F7zzbsLwozdsH7ZH9TQO20JO69sB3DDnus9/vMjzfDKuvRZgA9gV2H7TfgX55U5EMXXnQxAy//NWNGjyzZd/fYMQy/bSQdzziT+c8+w8wZf+OGm4Yd0q6oqIg/jB/H1OmPk5GRweUDLuXcrt1o1bo1AL+58ioGXT34kDZ/f+JxHvjzX/nyiy+Y+48sRtw+immPPsKQzGsUsj5TFHY8+MonfJDzHbVSk5h51U94+7Nd3NC1JY+9+TlvfbqTTi3rckPXllw/e225xxiatZbd+YWH7Ltjwf/m0t7UrWXJAjOXn9mU0fP/Q6M6Nbj4J4158JVP+W3nFjzx1hbvLrIa8uN/JpWN0S4CajvnPj9s2wy85nl1PnF6xzM4rk6dQ/Zt3vwZp3c8A4BOnTqzfNnSMu02rF9Hs2YtaNqsGSmpqfTq3YfXXl1e4bmSk5PZV1BAQUE+ycnJbN2yhdzcHDqecWbsLkhiYsfe/XyQ8x0AefuL2Lwjj/Rj03AOjklNAqB2WhLbv91X5XN0P7UBy/5T/KRYYdiRlhwiLSVEYZGjyfE1aFA7jTVbdx/9xQRIHL4zLGoV9midc4MreO/y2JdTfbRuczKvvbqcrt16sPSlJXz11bYyn8nNyaFho4Ylr9MzMli/7n+jMXNmP8XzC5+j3WntGXHbKI6rU4fBQ65h3F1jSUtL4w8TJ/HA5PsYeuPNcbkmqbpGddI4Ob02G77cw5+Xf8Kff/UDbuzWEjMjc1b5TwE653hwQAccMH/NNhasPfTv0I+a1WHn3gNs3ZUPwBNvbWFUr5PZVxjmrkXvc1PXVkx7Y7PHV1YNVcMerRzB3feMZ07WbC7rfzF5eXtJSUkt8xlXzn3A//76/6sBA1m0ZBlz5y2gQYN0Jk+aCMCpbdvyZNZcHps5i+zsrTRokI5zjttuHcbokSPY8fXX3l6YRK1mSogJ/U7jz8s/IW9/ERf/uBF/eeUTLnzk3/xl+SeM6X1Kue0yn3yPQTPf5Za567n09Mb8qNmhvzWd3zadZaUWmPkody9DZq1haNZamhxfk6+/2w8Uj+ne1fdU6tZK8e4iqxGtdRAgJ7VsxdTpM5jz9LP06t2Hps2alflMRkZDvtr2Vcnr3Jwc0g8uj1ivfn2SkpIIhUJcfGl/Nqw/9GaGc45pU6dwzbXXM/WRh7h+6I307XsBs5+a5e2FSVSSQsaEfqfx0sZcXvuw+P8Ee7dvyKsfFP+8fNN22jU6tty2/w3KXXkHeP3Drw/5XJLBuafUPyRoS7v67ObMePNzhvysBdPf2MySjTn8qmOTWF5ateXHoQMFbRXt2LEDgHA4zPSpU+g/4LIynzmt/Q/YsmUz2dlbObB/P0teWEyXrt0A2L79f/8BvfLyy7Ru0+aQtgufm88553ThuDp1yC8owEIhLBSiID/fw6uSaI3pfTKbd+SRtSq7ZN/X3+3jJ82Le6cdWxxf8qt/aTVSQtQ6OI5bIyXEmSeewKfb95a8f8aJJ7B5Rx7bv91fpm2fH2Tw5ic7+XZfIWkpSYQdhB2kpSTF+vKqJx8mrZ4Mi8DIEcNZveptvvlmF+d1O4frht5Ifl4ec7JmA9C9x3lc1O8SAHJzc7h77B08/Oh0kpOTGT1mLNdlDiEcLuKifpfQunVxoP7pgUl8sGkTZtC4cRN+f9e4kvPl5+ezcMF8Hp0+A4ArB13NrcNuIiUlhYmTHojz1cuR/LDpcfRu35CPc7/j71efDsCU1z9jwpIPuaVHa5JCxv7CMBNe/BCA+rVT+b9fnMzwpzdQt1Yq911yGlA8wX7pf3JZ+dn/Jvec1y695CZYaWnJIXq3b8hN/yge6896O5sJ/dpRGHb8foFW/gJ/Tu/SWgeSEFrrQMoTi7UO3tvybcSZ86Pmx2qtAxGRaPlxHq2CVkQCxY9DBwpaEQkU9WhFRDzmw5zV9C4RCZgYTu8ys5vNbIOZbTSzYVUtSUErIoESqy9nNLP2wO+AM4EfAn3NrE2FjY5AQSsigRLDL2dsC6x0zuU55wqB14F+VaqpKo1ERHwrdkMHG4BzzKyemdUCegNln7WPgG6GiUigRDO9y8wygcxSu6Yd/IYYnHPvm9l9wDLgO2AtUFj2KJVT0IpIoEQzvav0124d4f3HgMeKj2t/ALKP9NmKKGhFJFBiOb3LzNKdc7lm1hy4GOhUleMoaEUkWGI7kXaemdUDDgBDnXOHf61XRBS0IhIosVzQ2zn381gcR0ErIoHixyfDFLQiEiw+TFoFrYgEilbvEhHxmFbvEhHxmIJWRMRjGjoQEfGYerQiIh7zYc4qaEUkWNSjFRHxnP+SVkErIoESwYLecaegFZFA0dCBiIjHNL1LRMRr/stZBa2IBIsPc1ZBKyLBojFaERGPmQ+TVkErIoHiv5hV0IpIwPiwQ6ugFZFg0fQuERGPqUcrIuIxBa2IiMc0dCAi4jH1aEVEPObDnFXQikjA+DBpFbQiEigaoxUR8ZgfF/4OJboAEZGYsii2yg5ldouZbTSzDWaWZWY1qlKSglZEAsWi+KfC45g1AW4COjrn2gNJwGVVqUlDByISKDGe3pUM1DSzA0At4Msq1eSci2lVcmRmlumcm5boOsRf9PciccwsE8gstWta6X8XZnYzMB7IB5Y6566o0nkUtPFjZqudcx0TXYf4i/5e+JOZnQDMAwYA3wBPA884556M9lgaoxURKV8P4DPn3Hbn3AHgWeDsqhxIQSsiUr4twFlmVsuKv7ahO/B+VQ6koI0vjcNJefT3woecc/8GngHeBdZTnJdV+nelMVoREY+pRysi4jEFrYiIxxS0cWJmvczsAzP72MxGJboeSTwzm2FmuWa2IdG1iLcUtHFgZknAw8AvgHbAQDNrl9iqxAdmAr0SXYR4T0EbH2cCHzvnPnXO7QfmABcmuCZJMOfcCmBnousQ7ylo46MJsLXU6+yD+0Tke0BBGx/lLXOheXUi3xMK2vjIBpqVet2UKq4CJCLVj4I2PlYBbczsJDNLpXhNy4UJrklE4kRBGwfOuULgBuAlip+Vnuuc25jYqiTRzCwLeAs4xcyyzWxwomsSb+gRXBERj6lHKyLiMQWtiIjHFLQiIh5T0IqIeExBKyLiMQWtiIjHFLQiIh77f8Ve3CNYo3J1AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## Nu-Support Vector Classifier\n", + "Nu_svc = Pipeline([('vect', CountVectorizer()),\n", + " ('tfidf', TfidfTransformer()),\n", + " ('clf', NuSVC()),\n", + " ])\n", + "\n", + "Nu_svc.fit(X_train, y_train)\n", + "\n", + "y_pred_NuSVC = Nu_svc.predict(X_test)\n", + "\n", + "print(classification_report(y_test, y_pred_NuSVC))\n", + "# plot confusion matrix\n", + "conf_matrix = confusion_matrix(y_test, y_pred_NuSVC)\n", + "plot_conf_matrix(conf_matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n", + "[Parallel(n_jobs=1)]: Done 1000 out of 1000 | elapsed: 6.3s finished\n", + "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " neg 0.68 0.86 0.76 22\n", + " pos 0.79 0.55 0.65 20\n", + "\n", + " accuracy 0.71 42\n", + " macro avg 0.73 0.71 0.70 42\n", + "weighted avg 0.73 0.71 0.71 42\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=1)]: Done 1000 out of 1000 | elapsed: 5.0s finished\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVoAAAD4CAYAAACt8i4nAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAb5ElEQVR4nO3deXgV1f3H8ff3JmwhrIkg+6KIRURUBLVldwF3XFAEoW5orVZtVaBYKdYqVauliPpLFUGxuCAobqgVQZFFUEBWEZUlsoRNCJg95/dH0pQl5OYmd3Inw+fFM483M3fO/Y5P/Hg4c+Zcc84hIiLeCcW6ABGRoFPQioh4TEErIuIxBa2IiMcUtCIiHov3+gNqnHq7pjXIYXYveirWJYgPVY/HyttGJJmTseSpcn9eaXgetCIiFcr89xd1Ba2IBItVSCc1IgpaEQkW9WhFRDymHq2IiMdCcbGu4DAKWhEJFg0diIh4TEMHIiIeU49WRMRj6tGKiHjMhz1a/1UkIlIeobjSb2GY2QQzSzOzFQfs62hmC8xsqZktNrPOYUsq5yWJiPiLhUq/hTcR6HPIvkeB0c65jsADhT+XSEMHIhIsoeiN0TrnPjWzlofuBmoXvq4DbA7XjoJWRIIlgjFaMxsKDD1gV4pzLiXMaXcBH5jZ4xSMCpwd7nMUtCISLBHMOigM1XDBeqjfAHc7594ws/7A88A5JZ2gMVoRCZYo3gw7giHAtMLXrwO6GSYiR5no3gwrzmage+HrXsC34U7Q0IGIBEsUH1gwsylADyDZzFKBUcDNwFgziwcyOXiMt1gKWhEJlig+sOCcG3CEQ6dH0o6CVkSCRY/gioh4zIeP4CpoRSRYtPC3iIjH1KMVEfGYxmhFRDymHq2IiMfUoxUR8Zh6tCIi3rKQglZExFOmoQMREY/5L2cVtCISLOrRioh4TEErIuKxkG6GiYh4zH8dWgWtiASLhg5ERDymoBUR8Zgfg9Z/o8YiIuVgZqXeStHWBDNLM7MVh+y/w8y+MbOVZvZouHbUoxWRQLFQVHu0E4GngBeL2jfrCVwKdHDOZZlZg3CNKGhFJFCiOXTgnPvUzFoesvs3wBjnXFbhe9LCtaOhAxEJlEiGDsxsqJktPmAL+9XhwAlAVzNbaGZzzOyMcCeoRysiwRJBh9Y5lwKkRPgJ8UA94EzgDOA1M2vtnHMlnSAiEhgVMOsgFZhWGKxfmFk+kAxsP9IJGjoQkUCJ5qyDI3gT6FX4WScAVYEdJZ2gHq1Hnh01kL7d2rN9VzqdrnoYgJNPaMK4kddQs0Y1NmzeyfUjJ5G+PzPGlUqsZGVlcf3ggeRkZ5Obl8e5553Pbbf/LtZlVXrRXOvAzKYAPYBkM0sFRgETgAmFU76ygSElDRuAgtYzL729gGdfncNzfxlctO+ZB65l+JPTmfvlOgZfeiZ3D+nNg0+/G8MqJZaqVq3KcxMmkVCzJjk5Ofz6umv5VddudDilY6xLq9yiOHLgnBtwhEODImlHQwce+fyr79i15+eD9rVp0YC5X64DYNaCNVzWu2MMKhO/MDMSatYEIDc3l9zcXF9+sWBlUwFDBxFT0FagVd9t4aIeJwNw+bmn0bRhvRhXJLGWl5dH/8svpWfXsznzrLPp0OGUWJdU6Sloj3K3/Pllbunfjc9fvo/EhGpk5+TFuiSJsbi4OF6b9hYfzprDiuVf8+23a2NdUqXnx6DVGG0FWrt+GxffNh6A45s3oG/Xk2JckfhF7dq1OaNzF+bN/Yw2bU6IdTmVWpQfwY0K9Wgr0DH1EoGC/+MOv/l8/jV1bowrkljatWsXe/fuBSAzM5MF8+fRslXrGFdV+alHexSZ9Miv6Xp6G5LrJrJu5l/4y7PvkVijGrdc3Q2At2Yt5cW3FsS4SomlHdvTuP+Pw8nPzyM/33He+X3o3qNnrMuq9Py4TKKFmf5VbjVOvd3bD5BKafeip2JdgvhQ9fjyT85qdde7pc6cH/5xYYWkctgerZmdSMGSYE0AB2wGZjjnVntcm4hI5PzXoS15jNbMhgGvUFD6F8CiwtdTzGx4CecVrYiTu2NlNOsVESmRH8dow90MuxE4wzk3xjk3uXAbA3QuPFYs51yKc66Tc65TfHJw7qyHQsb8KcN4Y+ytAIy85QK+++AhFrwynAWvDOf8X7U77JymDesyM+V3LHnjfr6cOpLfDuhx2Hvuuq43GUueIqluweT1s05pzRevjmDu5Htp3SwZgDqJNZgx/rfeXZyUy/ofvqf/5ZcWbWd3Po3JL0486D0/fP8d1117NZ06tmfSC88f1kZeXh79r7iM22+7pWjfk39/jCv7XczIEfcV7Xt7xpu8/NIkz66lsguFrNRbRQk3dJAPNAY2HLK/UeGxo8rt1/bkmx+2Uatm9aJ94yZ/wj9e+viI5+Tm5TP8iWksXZNKYkI15v17GB8vXMOa77cCBUHc68wT2bhlV9E5d17XiwH3PkeLRkkMvaorw5+YzoihfXh0wgfeXZyUS8tWrXlt2ltAQWCe27Mbvc4596D31K5Tl2EjRvLJrOJ/X15+6UVatz6Offv3AZCens6ypUuYOv1tRtz3B75d+w3NmrdgxpvTefr/nvP2gioxP94MC9ejvQv42MzeN7OUwm0m8DFwp+fV+UiTBnXp86uTeGH6vIjO27pjL0vXpAKw7+cs1vywlcbH1C06/ug9VzBy7JsceFMyJzePGtWqkFCjCjm5ebRqmkzjBnWLHt8Vf1u4YD7NmjWjceMmB+1PSkqi/ckdiI8/vH+zbetWPvt0Nv2uuLJoXyhk5OTk4JwjMyuL+Ph4Jk54jmsHXUeVKlU8v47Kyqz0W0UpsUfrnJtZuAxYZwpuhhkFazEucs4dVY81PXZvQSAmJlQ/aP+t13Tj2os689WqjQx/Yho/pWccsY3mjerTsW1TFq1YD8CF3U9mc9pPLF/748GfNeFDxt8/gIysHG68/0Ue+X0/Rj/9TtSvSbwx8/136XPBRRGd8+iYh7n7D/eyf//+on01ayZyzrnncfUVl9H5zLNIrFWLlStWcOttt0e75ECpjD1anHP5zrkFzrk3nHNTC18fVSHbt2t70nals2T1poP2/+v1z2h38Z/pcs0Ytu7Yy5jfX37ENmrWqMqUx2/i3sffIH1/JjWqV2HYjefz4DOHr9719dof6T7k7/QZ+k9aNk1iy/Y9GMZLY65nwkODaVC/VtSvUaIjJzubOZ/M4rzz+5T6nDmzP6F+/fq0O6n9Yceuv/FmXpv2FvfcN5zx48Zy2x2/Y9rU17n393eS8uzT0Sw9MPzYo9WTYaVwVsfWXNT9ZNa8O5oXx1xPjzNOYMJDg0nblU5+vsM5x4Rpn9OpfYtiz4+PDzHl8Zt59f3FvDVrGQCtmx5DiyZJfPHqCNa8O5omDeoy/9/DaJh0cIgOv6kPj6S8z8hb+vKXZ99jynuLuK2YG2riD3PnfsqJ7U4iKTm51OcsXfIVs2fPou+5vRh2z+9ZtHABI4bdc9B7Vq9eBUCLFi15e8abPPbEWNat+5YNG9ZHs/xAqIw3wwR4YNwMHhg3A4Cup7fhrsG9ueH+Fzk2uTZbdxQ8Qnlpr1NY9d2WYs9/dtRAvvlhK/+cPKto38p1m2nRe0TRz2veHc0vBz7Kzp/+91fHQRd3YeZnK/kpPYOE6lXJz3fk5zsSqmt8zq/ef+9d+l5wYUTn3Hn3H7jz7j8AsOiLhUyaOIFH/vb4Qe8ZP24sD/z5QXJzc8nPK/gLZchCZGZo4fhDVWSAlpaCthz+eudldGjbFOccG7bs4o6HpgDQ6Jg6PP3AtfS74xnO7tiagRd1YfnaH1nwSsHU41FPzeCDuatKbLtG9SoMurgLF91W8ATVPyfPYsrjN5Gdk8uQERM9vS4pm4yMDBbMm8efRj1YtO+1Vwt+J/pfPYAd27cz4Oor2L9vH6FQiMkvTWL6jPdITEwssd1ZH/+H9u1PpkGDhgB06HgqV1x2MSeccAJtTzzRuwuqpHw4RKtHcCU29AiuFCcaj+B2eOA/pc6crx88p0JiWWO0IhIo0XwyzMwmmFla4feDHXrsHjNzZhZ2QF5BKyKBEuVZBxOBw6aQmFkz4FxgY2kaUdCKSKBEc9aBc+5TYFcxh54E7qNgoa3wNUV0BSIiPhfJ0MGBC2AVbkNL0f4lwI/OuWWlrUmzDkQkUCKZdeCcSwFSSt+2JQAjgfMiqUlBKyKB4vEjuMcBrYBlhZ/TFPjKzDo757Ye6SQFrYgEipc565xbDjT432fZeqCTc25HSedpjFZEAiXK07umAPOBtmaWamZHXIe7JOrRikigRPMRXOfcgDDHW5amHQWtiASKHx/BVdCKSKD4cT1aBa2IBIoPc1ZBKyLBoh6tiIjHFLQiIh7Twt8iIh7zYYdWQSsiwaKhAxERj/kwZxW0IhIsIR8mrYJWRAJFN8NERDzmw5xV0IpIsOhmmIiIx3yYswpaEQkWw39Jq6AVkUDRGK2IiMc060BExGN+nEer7wwTkUAxK/0Wvi2bYGZpZrbigH2PmdkaM/vazKabWd1w7ShoRSRQovnljMBEoM8h+z4C2jvnOgBrgRHhGlHQikigRLNH65z7FNh1yL4PnXO5hT8uAJqGa0dBKyKBEmdW6s3MhprZ4gO2oRF+3A3A++HepJthIhIokTwZ5pxLAVLK+DkjgVzg5XDvVdCKSKBUxOwuMxsCXAT0ds65cO9X0IpIoHi91oGZ9QGGAd2dcz+X5hyN0YpIoER5etcUYD7Q1sxSzexG4CmgFvCRmS01s2fDtaMerYgESjR7tM65AcXsfj7SdhS0IhIocXoEV0TEW/6LWQWtiASMH9c6UNCKSKD4MGcVtCISLPoqGxERj/kwZxW0IhIsmnUgIuKxo3LoYHzKfV5/hFRCf3h7daxLEB8a3+8X5W7Dj4+7qkcrIoFyVPZoRUQqkg+HaBW0IhIsuhkmIuIxH+asglZEgsWHQ7QKWhEJFq11ICLiMU3vEhHxmA87tApaEQkWzToQEfGYD3PWl8MZIiJlFjIr9RaOmU0wszQzW3HAvvpm9pGZfVv4z3phayrnNYmI+Eo0vwUXmAj0OWTfcOBj51wb4OPCn0ukoBWRQAlZ6bdwnHOfArsO2X0pMKnw9STgsrA1RXYJIiL+ZpH8MRtqZosP2IaW4iMaOue2ABT+s0G4E3QzTEQCJT6C7qNzLgVI8ayYQgpaEQmUClgmcZuZNXLObTGzRkBauBM0dCAigRLNMdojmAEMKXw9BHgr3Anq0YpIoESzQ2tmU4AeQLKZpQKjgDHAa2Z2I7ARuCpcOwpaEQmUaC4q45wbcIRDvSNpR0ErIoES58MBUQWtiARKCP89g6ugFZFA0epdIiIe8+OiMgpaEQkUfcOCiIjHfJizCloRCRYt/C0i4jEfzu5S0IpIsFTAWgcRU9CKSKD4L2YVtCISMJp1ICLiMf/FrIJWRAImpFkHIiLe0qwDERGPadaBiIjH/BezCloRCRj1aEVEPBbnw6D147ixiEiZWQRb2LbM7jazlWa2wsymmFn1stSkoBWRQDEr/VZyO9YE+B3QyTnXHogDrilLTRo6EJFAifJX2cQDNcwsB0gANpetJhGRAImkR2tmQ81s8QHb0P+245z7EXicgq8U3wLscc59WJaa1KMVkUCxCHq0zrkUIKXYdszqAZcCrYCfgNfNbJBzbnKkNalHKyKBEmdW6i2Mc4AfnHPbnXM5wDTg7LLUpB5tBVk8cxrLZr+Pc45Tel7AGX0uj3VJEgODTmtE+2MTSc/K5a8f/wDAqY1rceEvjqFhrao8Nns9G3/KjHGVlVsUZ3dtBM40swQgA+gNLC5LQ+rRVoDtm35g2ez3GTx6HDc8/H98t2QBu7amxrosiYEFG35i/OebDtq3OT2LlIWprNvxc4yqCpZozTpwzi0EpgJfAcspyMtihxnCUdBWgJ2bN9L4uBOpUq06obg4mp3YgW8Xfx7rsiQG1u3MYH9O3kH7tqVnk7YvO0YVBY9F8Ccc59wo59yJzrn2zrnrnHNZZalJQVsBkpu2ZNM3y8lI30tOVibfL/uCvTu3x7oskUAKWem3iqIx2gqQ3KQFXS66mlfHDKNK9Ro0aN6aUFxcrMsSCSR9w8JR7JQefTmlR18A5rz6PLXqHxPjikSCKZLpXRVFQwcVZP+e3QDs3ZHG2sWf0+7snjGuSCSYNHRwFHtz7INk7NtLKD6ec4fcTvWatWJdksTA9Z0a0+aYmiRWjeOhPsfz7urt/Jydz1WnNCSxahy/OasZqXsyGT9vU/jGpFh+7NEqaCvIwAeejHUJ4gMvLC7+UfllW9IruJLg8uEQrYJWRILFhzlb9jFaM7u+hGNFCzXMmf7vsn6EiEjEovgIbtSU52bY6CMdcM6lOOc6Oec6de93bTk+QkQkQtFc+TtKShw6MLOvj3QIaBj9cvxp78403n32Ufbt2YVZiI49L6BTn8tZs3AOc6e9xM7NGxk8ehyNWrc9Yhv5+XlM+tNvqVUvmSvveQiAT1+fyLqv5mFmJNSuywW33Eutesmkrl3Bhy/8k7j4Klzy2z9S79gmZO7fx1tPPUT/+x7x5XciHY3q1ohnyOmNqV09Hudg7vrdzP6uYHZJ99b16N66HvkOVmzdx5sr0w47v7h1DwCa1K7GNaceS7W4ELt+zmHi4s1k5ubTun4Nrul4LLn5jhcW/cj2/TnUqBLihjOa6ObZASrjzbCGwPnA7kP2GzDPk4p8KBSKo+e1t3BsqzZkZfzMpD/dRsuTTye5aUv63TmKDyb8I2wbi2dOJ6lxc7Iz/vc8e5cLr6LbVb8uOP7BdOZNn8z5N9zFovemctmdD7Bn+zaWfPw2vQbeyrw3J3PWJQMUsj6Snw/TlqexaU8m1eJDDOvZkjVp+6lVLZ4OjWrx8KwfyM13JFYt/uGUBRt+Ys53uxncqdFB+wee1ohpy9NYt/NnzmpRh3PaJPHO6u30blOffy1MJSmhKl1b1WPaijT6tk3mg292VsTlVhp+/E8k3NDBO0Cic27DIdt6YLbn1flEYr0kjm3VBoBqNRJIatyc9F07SG7SgqTGzcKev3fndr5furDogYX/qpZQs+h1TlZm0W9IKC6e3OxscrOzCMXFs3vbZtJ376T5L06J4lVJee3NymXTnoKVtrJy89mWnk3d6lXo1qoeH67dQW6+A2Bfdl6x5xe37gFAg8SqrNtZ8D/k1Wn76di4YCpgXj5UiQtRNd7Ic47kmlWoUyO+6L1SwIcjByX3aJ1zN5Zw7KgcfN2zfSvbNqyj8XEnlvqcjyc/Q48BN5OdkXHYsU9fm8CKuf+hWkJNBvzxMQDOvOQaZj7/JFWqVuPCW4fxyZQUul45JGrXINFXP6EKTetUZ/3uDPq1b8DxSQlc0q4BOfn5TFueFtHSh1v2ZtGhUSJfb9nHaU1qU69GwX+mH67dwbWnNiInL59JizfT7+QGvLNKa2Ycxoc9Wk3vikB2ZgbTxz5I70G/Oag3WpJ1SxZQs3Zdjm11AhtXLTvseLf+N9Ct/w3MnzGFLz96i65XDKFhi+MZPHocAJvWfE1i3SRw8Na4hwjFxdNr4C3UrFMvqtcmZVctzri5cxOmLt9GZm4+oRAkVI3jsTnraVGvOjd2bsKoD78rdXuTv9rCVR0a0rdtMsu37iPXFfSMU/dk8fic9QAcn1SDPZm5mBk3nNGEvHzHtBXbSM8qvvd8NPHjWgd6BLeU8nJzmT52NO3O7kXbM7qW+rwf167k26/m88xdg5gx/q9sWLWUt58ec9j72p3di7WL5h60zznHvDdf5pf9BvL59Jf41RWDOemXvfnyg+nlvh6JjpDBTV2asih1L8s2Fzx08FNGLksLX2/YnYlzHHGctjjb9mXz1LxN/G32ehan7mXHvpzD3tOnbTLvr9nBBScm8+7q7SzatIcex9WPzkVVcpVu6EAKOOd4/7m/k9S4OZ0vuDKic7tffSPdry4Ygdm4ahlfvPc6F982HIBdW1Opf2xTANZ9NZ/6jQ4e713x2Ycc17EL1WvWIicrC7MQFjJyssu0JKZ4YNBpjdians2sdbuK9i3bnM4JxyTw7Y6faZBYlfiQHXGctjiJVePYl52HAX3aJjF3/cH3os9sXocV2/aRkZNPlbgQDsgHqsb5rycXEz7816CgLYUf165k5dz/cEyzVrzwx1uAgr/y5+Xk8NGL48lI38PUx++nQYvjuHrYGNJ372Dmc09w1b0Pl9junFefZ9eWVMyM2skNOf/6O4uO5WRlsuKzj+g/rKD3e0bfK5g+dnTRlC+JveOSatCleV1+3JPJiJ6tAJixKo35G35i0GmNGdm7Fbn58OKXBY/d1qkez8BTG/H0/IKpWMWtezB/wx46NatNt9YFQ0PLNqczf8Oeos+sEmd0aV6HcZ9vBGDWup3c1Llg6OBIj/cebfw4vctc4fiPVyYs2ujtB0il9GXq/liXID40vt8vyp2SSzemlzpzOjavVSGprDFaEQmUaH1nWEFbVtfMpprZGjNbbWZnlaUmDR2ISKBEeehgLDDTOXelmVUFEsrSiIJWRAIlWrO7zKw20A34NYBzLhso07doauhARAIlkuldB640WLgNPaCp1sB24AUzW2Jmz5lZ6SbQH0JBKyLBEkHSHrjSYOGWckBL8cBpwDPOuVOB/cDwspSkoBWRQLEI/oSRCqQ65xYW/jyVguCNmIJWRAIlWl/O6JzbCmwys/+uf9obWFWWmnQzTESCJbozY+8AXi6ccfA9cMRvlimJglZEAiWa07ucc0uBTuVtR0ErIoHiw8W7FLQiEiw+zFkFrYgEjA+TVkErIoHix4W/FbQiEij+i1kFrYgEjQ+TVkErIoHix4W/FbQiEig+HKJV0IpIsChoRUQ8pqEDERGPqUcrIuIxH+asglZEgkU9WhERz/kvaRW0IhIo4Rb0jgUFrYgEioYOREQ8puldIiJe81/O6ssZRSRYIvi28dK1ZxZnZkvM7J2y1qQerYgEigdjtHcCq4HaZW1APVoRCRQzK/VWiraaAhcCz5WnJgWtiARKJEMHZjbUzBYfsA09pLl/APcB+eWpSUMHIhIokQwdOOdSgJTi27GLgDTn3Jdm1qM8NSloRSRQoji965fAJWZ2AVAdqG1mk51zgyJtSEMHIhIoZqXfSuKcG+Gca+qcawlcA8wqS8iCerQiEjB6MkxExGNePBnmnJsNzC7r+QpaEQkU9WhFRDzmw5xV0IpIwPgwaRW0IhIoWr1LRMRjWvhbRMRrCloREW9p6EBExGN+nN5lzrlY13DUMLOhhYtYiBTR70Xwaa2DinXoEmwioN+LwFPQioh4TEErIuIxBW3F0jicFEe/FwGnm2EiIh5Tj1ZExGMKWhERjyloK4iZ9TGzb8xsnZkNj3U9EntmNsHM0sxsRaxrEW8paCuAmcUB44G+QDtggJm1i21V4gMTgT6xLkK8p6CtGJ2Bdc65751z2cArwKUxrklizDn3KbAr1nWI9xS0FaMJsOmAn1ML94nIUUBBWzGKW+ZC8+pEjhIK2oqRCjQ74OemwOYY1SIiFUxBWzEWAW3MrJWZVQWuAWbEuCYRqSAK2grgnMsFbgc+AFYDrznnVsa2Kok1M5sCzAfamlmqmd0Y65rEG3oEV0TEY+rRioh4TEErIuIxBa2IiMcUtCIiHlPQioh4TEErIuIxBa2IiMf+H8UEtRxq80f3AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## Random Forest Classifier\n", + "randFor = Pipeline([('vect', CountVectorizer()),\n", + " ('tfidf', TfidfTransformer()),\n", + " ('clf', RandomForestClassifier(n_estimators=1000, random_state=1, criterion='entropy', oob_score=True, verbose=1)),\n", + " ])\n", + "\n", + "randFor_train = randFor.fit(X_train, y_train)\n", + "\n", + "y_pred_RandFor = randFor_train.predict(X_test)\n", + "\n", + "print(classification_report(y_test, y_pred_RandFor))\n", + "# plot confusion matrix\n", + "plot_conf_matrix(confusion_matrix(y_test, y_pred_RandFor))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Boosting algorithms" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Jared\\anaconda3\\lib\\site-packages\\xgboost\\sklearn.py:1146: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n", + " warnings.warn(label_encoder_deprecation_msg, UserWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[12:19:53] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.4.0/src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n", + " precision recall f1-score support\n", + "\n", + " neg 0.79 0.68 0.73 22\n", + " pos 0.70 0.80 0.74 20\n", + "\n", + " accuracy 0.74 42\n", + " macro avg 0.74 0.74 0.74 42\n", + "weighted avg 0.74 0.74 0.74 42\n", + "\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVoAAAD8CAYAAAA2Y2wxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAbUElEQVR4nO3de5xN9f7H8ddnzx5jxuSWaymFKEqnIqmjdBEVpaQSpdDkOJQkicopdagcJ0UXuSRKSYiuxE+kEiFRiVMpRaNcQoy5fH9/zBxnMOZmr73XLO9nj/VoZu29vuuzNL1957u+67vNOYeIiHgnFOsCRESCTkErIuIxBa2IiMcUtCIiHlPQioh4TEErIuIxBa2IyCGY2TgzSzWzVQfs72Vma8xstZk9XlA7CloRkUN7EWiVe4eZXQhcBTR0zjUAhhXUiIJWROQQnHMLgC0H7P4bMNQ5l5bzntSC2gl7UNt+ElsO06NncpB5o7rFugTxoaZ1ytvhtpF4Rs9CZ86eFaNuB1Jy7RrtnBtdwGF1gWZm9iiwB+jrnFuS3wGeB62ISFRZ4X9RzwnVgoL1QGGgAnAO0BiYYma1XD7rGWjoQESCxazwW/FsAKa5bJ8BWUCl/A5Q0IpIsFio8FvxzAAuAjCzukAp4Lf8DtDQgYgES/F7qnk0ZZOB5kAlM9sADALGAeNypnztBTrnN2wACloRCZpQXMSacs51OMRLnYrSjoJWRIKl+EMCnlHQikiwRHDoIFIUtCISLOrRioh4TD1aERGPqUcrIuKxCM46iBQFrYgEi3q0IiIeC2mMVkTEW+rRioh4TLMOREQ8ppthIiIe09CBiIjHNHQgIuIx9WhFRDymHq2IiMfUoxUR8ZhmHYiIeEw9WhERj2mMVkTEY+rRioh4TD1aERGPqUcrIuItCyloRUQ8ZRo6EBHxmP9yVkErIsGiHq2IiMf8GLT+GzUWETkMoVCo0FtBzGycmaWa2ao8XutrZs7MKhVYUzGvRUTEn6wIW8FeBFoddAqz44AWwI+FaURBKyKBYmaF3grinFsAbMnjpX8D/QBXmJo0RisigeL1GK2ZXQn87Jz7orDnUtCKSKAUJWjNLAVIybVrtHNudD7vTwIGApcWpSYFrYgESlGCNidUDxmseagNnAj8tzdbA1hmZmc75zYd6iAFrYgEioW8Gzpwzn0JVNl3LrMfgEbOud/yO043w0QkUCJ5M8zMJgOfAPXMbIOZdS1OTerRikigRPJmmHOuQwGvn1CYdhS0IhIs/nswTEErIsHix0dwFbQiEigK2iPIc31aclmT2mze9ieNbn8RgIGdzqXLZaexeftuAAaNX8j7S76PYZUSSxs3rOeZoQP3fb95089c3SmFlm3zHRaUAhRmDYNoU9B6ZOLs1Tw3czlj7rl8v/1PT/+cJ6cujVFV4ifVa9Rk8MhJAGRlZtL75tacdW7z2BYVBP7r0CpovbJo1QaOr1o21mVICfHVF0uoUr0GlapUj3UpJZ4fhw7818cOuO5tzuCzZzvzXJ+WlE9OiHU54hOLF8zhnAuK9FSnHEIk59FGioI2il54awX1bx1Dkx4T2LRlF0NTmse6JPGBjPR0li9eSOO/XhTrUgJBQXuES932J1lZDudg3LsraVRPvyYKrFz6MTVr16NchaNjXUogWMgKvUWLgjaKqlUss+/rq849ia9+yPfxaDlCfLpgtoYNIsiPPVrdDPPIhP5X0KzhcVQql8i6SbczeOIizm94HA1rV8E5WP/rdno9NSfWZUqMpe3Zw+rln3FLz/tiXUpg+PFmmILWI52Hvn3QvgnvH/SxQ3KESyhdmlGv6i/cSCqRQWtmJwNXAceS/bENvwAznXNfe1ybiEjR+S9n8x+jNbN7gVfJLv0zYEnO15PNrH8+x6WY2VIzW5qx4dNI1isikq+SOEbbFWjgnEvPvdPMhgOrgaF5HZR71fLElsMK9eFlfpYQH8cH/7qBUvFxhONCTF/4LY9M/LhQj9SeVKMCEwe02ff9idXKMXjiIkZOX8Y1zeoy8KZzOfm4o2l2xySWrf0VgKb1j2FErxbsTc/k5qFv8d0v2yhXJoGJA1pz5cA3onfhkq+xTw5mxWeLKFu+Ao8+M3nf/jkzpzD3rdcJxcVxeuPzuL5Lr4OO3bVzB+OfepQN67/DMLr2vp86p5zGM0MHsnHDegD+3LWTpDLJDB45ibVffcGEUY8THx9P936DqXrMcezauYNnHxvI3Q+P8OWvy7ESiuJsgsIqKGizgGOA9Qfsr57z2hEhLT2TVv2msGtPOuG4EPOGd2B2TqAW9Ejt2g1bOafHS0D2D8B/Xu7OzEXrAFj9w2/c8PCbjLxj/zvOd7ZrRIfBb1KzWjlSWv+F/qPnc1/Hpjz+6mKPrlCK46+XtObi1u15YfhD+/Z9/cVSln+6gMGjXiY+vhR/bMvrA1ThldHDOe2spvQcMJSM9HTS0vYA0KP/o/veM3nMCJKSsmeqvDftFXoOGMJvqRuZ9840OnS7k5mvjqP1dbcoZA/gxz+PgoK2NzDXzNYCP+XsOx6oA/T0sC7f2bUnu1MfHw4RjgvhXNE76hf+5Xi+37iNH1P/AGDNT3n/T5iemUViQpikhDDpGZmcWL0cxxydzEdfbij+BUjE1Tv1DDb/+st+++a9M40r2t9MfHwpAMqWr3jQcbv/3MmaVcvpdteDAITj4wnHx+/3HuccSxZ+QL9/jgIgLhwmfW8ae/fsIS4uTOrGDWz9PZWTTzvTi0sr0XyYs/kHrXPuPTOrC5xN9s0wAzYAS5xzmVGozzdCIePjkTdR+5jyPD9rBUvWbOLSxrXo3uYMbry4AcvWbqL/6Pls25l2yDbaNz+ZKfO/KfBcT7y6mFF3XsruvRl0ffwdhtzWnIcmfBTJyxGPbPr5R75dvYI3XnqO+FKluL7rHdSqW3+/96Ru/IWjylVgzL8H89P3azmhzsl0vL0PCaUT973n29UrKFu+ItWOPR6AK9p3ZvzTQyhVKoGUvv/g1bFPcU2n26N6bSWFH3u0BT6w4JzLcs596px7wzk3NefrIypkAbKyHOf0eIk6HZ+nUb1q1K9ZqUiP1MaHQ1xxTm2mLVhT4LlWfreZC3q/Qqt+UzihWnk2/r4TM2PigNaM63c5VconRfDKJJKysjLZtXMHDwwfy/VdevHM0AEH/faTlZXJ+nVruOjya3j46YkklC7NW69P2O89n344mya5HmKoWbsuDw4fR/+hz7J50y9UqFgZgGeGDuT5Jwaxfevv3l9cCWFW+C1a9GRYEW3flcaCL37i0sYnFOmR2paNT2TFulRSt/1ZpPP1v/EchrzyCQM7NWXwxI+ZPO8rerTVr4t+VeHoKpx1bnPMjFr1GmAWYscf2w56T4VKVah98qkANDrvItav+99fwJmZGXz+8f/R5PxLDmrfOcfMV8dx5Q1dmPHKGNp2vI2mF7Zizswpnl5XSRIKWaG3qNUUtTOVYJXKJVKuTPZKW6VLhbnozJqs+WlLkR6pva75KYUaNsitU4sGvPfZd2zbmUZSQjxZWY6sLEdSgp4z8aszm17A119k3xzd9POPZGakc1TZ8vu9p3zFozm6cpV9swu++mIpxxx/4r7XVy9fQvUaJ1CxUtWD2v/og7c5vfF5lDmqLHvT9hAKhQiFjL05N9PEn0Gr/2MLoVrFMrzQ9zLicn6o31iwhncXf8fYey7L85Ha6hXL8MxdLbn6gWkAJCZkh3PPEbP3a/fKc+swvMfFVCqXyLTB17DyP6n7pm8lJoTp1KIBre+bCsBT05Yy+YEr2ZuRSechBz91JtH37GP3882Xy9j5xzbuurk1bTumcH6LNox98hEG9uhAOBxPtz6DMDO2/r6Z8U89Sp+HngSg4+19ef6JB8nIyKBytWPo1vuBfe0uXjBnv2GD/0rbs4dFc9+m7yNPA9CybQdGPtqfuHCYv937SFSuuSTw4RAtVpy750URhHm0EnnzRnWLdQniQ03rlD/smGz44AeFzpyVD18SlVhWj1ZEAsWPsw4UtCISKD7MWQWtiARLSXwEV0SkRNHQgYiIx3yYs5pHKyLBEsllEs1snJmlmtmqXPueMLNvzGylmU03s/IFtaOgFZFAifAjuC8CrQ7YNwc41TnXEPgWKPBziBS0IhIokezROucWAFsO2DfbOZeR8+2nQI2C2lHQikigFOUR3NyfBpOzpRTxdF2Adwt6k26GiUigFOVmWO5Pgyn6eWwgkAG8XNB7FbQiEijRmN5lZp2B1sDFrhDrGChoRSRQvM5ZM2sF3Atc4Jwr1LqnCloRCZRI9mjNbDLQHKhkZhuAQWTPMkgA5uSc61PnXPf82lHQikigRDJonXMd8tg9tqjtKGhFJFC01oGIiMf8+AiuglZEAkWLyoiIeMyHOaugFZFgCfkwaRW0IhIouhkmIuIxH+asglZEgkU3w0REPObDnFXQikiwGP5LWgWtiASKxmhFRDymWQciIh7TPFoREY/5MGcVtCISLJreJSLiMR/mrIJWRIIlzodJq6AVkUDR0IGIiMd8OLtLQSsiwaIerYiIx3yYswpaEQkW9WhFRDwW58NBWgWtiASK/2JWQSsiAaO1DkREPObDnFXQikiw+PFmWCjWBYiIRJJZ4beC27JxZpZqZqty7atoZnPMbG3OvysU1I6CVkQCJS5khd4K4UWg1QH7+gNznXMnAXNzvs+XglZEAsXMCr0VxDm3ANhywO6rgAk5X08A2hbUjudjtFvf7uv1KaQEqtC4Z6xLEB/avXzkYbdRlN6jmaUAKbl2jXbOjS7gsKrOuY0AzrmNZlaloPPoZpiIBEpRboblhGpBwXrYFLQiEihReDDsVzOrntObrQ6kFliT5yWJiERRhG+G5WUm0Dnn687AmwUdoB6tiARKJHu0ZjYZaA5UMrMNwCBgKDDFzLoCPwLtC2pHQSsigRLJ5xWccx0O8dLFRWlHQSsigaK1DkREPObHG08KWhEJFB92aBW0IhIsWvhbRMRjPsxZBa2IBItuhomIeMyHOaugFZFg0dCBiIjHzIcfz6igFZFACftwIq2CVkQCxY+fGaagFZFA0RitiIjHfNihVdCKSLBoHq2IiMfidDNMRMRbIU3vEhHxlg9HDhS0IhIsmnUgIuIx3QwTEfGYD3NWQSsiwaKFv0VEPObD2V0KWhEJFq11ICLiMf/FrIJWRAJGsw5ERDzmv5hV0IpIwIR8OOvAjzfoRESKLVSErSBmdpeZrTazVWY22cxKF7cmEZHAMLNCbwW0cyxwB9DIOXcqEAfcUJyaNHQgIoES4YGDMJBoZulAEvBLcRpRj1ZEAqUoPVozSzGzpbm2lP+245z7GRgG/AhsBLY752YXpyb1aEUkUOKKML3LOTcaGJ3Xa2ZWAbgKOBHYBrxuZp2cc5OKWpN6tCISKFaErQCXAN875zY759KBacC5xalJPVoRCZQIPq/wI3COmSUBu4GLgaXFaUhBKyKBEqmPsnHOLTazqcAyIANYziGGGQqioBWRQInkE7jOuUHAoMNtR0ErIoFiPnwIV0ErIoFSlFkH0aKgjZLMzEw6XNeOKlWrMvKZ52NdjsTIc4M6ctn5p7J5yw4atf/nvv1/u+ECul9/PhmZWby3cBUDR7wZwypLNh/mrII2Wl6e+BK1atVm566dsS5FYmjirE957rUPGTP45n37zm90Eq2bn0bj64awNz2DyhWSY1hhyefHoNU82ij4ddMmFi6Yz9Xtro11KRJji5b9hy3b/9xvX0r7ZgwbP4e96RkAbN6qv4wPhxXhn2hR0EbB40P/yV1330MopD9uOVidmlU474zaLHipL7PH3MlZ9Y+PdUklWsgKv0Wtpuid6sj04fz/o2LFitRvcGqsSxGfCseFqFA2ifNvHsaAf89g0uNdYl1SiRYyK/QWLRqj9diK5cuYP38eHy1cQFpaGrt27eS+e/sy5LFhsS5NfOLnX7cxY+4XACxdvZ6sLEelCsn8piGEYtH0riPQnXfdzZ133Q3Aks8WM+HFcQpZ2c+s+StpfnZdFn6+ljrHV6FUfFghexh8+AELClqRaJow5BaanXUSlcons+69wQx+7h0mzPiE5//RkaWvD2BveibdHpwY6zJLND/2aM055+kJ9mTg7QmkRKrQuGesSxAf2r185GGn5EdrtxY6c/56UoWopLJ6tCISKP7rzx7GrAMzuzWf1/atWj72hWItdiMiUixxZoXeouVwerQPAePzeiH3quUaOhCRqPJhlzbfoDWzlYd6Caga+XJKhpcnTuCNqa/jnKPdte3pdPMt+72+5LPF9O7Vg2OPrQHARZe0oHuPnmzauJGB9/Xj999/wyzEte2vo+NNnQH497+eYNFHC6h38ik8OuRxAGbNnMEf27fve4/4S0KpMB+M7U2pUmHCcXFM/2A5jzz3Dg3rHsvTA28gISGejMwsev/zNZauXn/Q8Yda96BC2SQmPtaFmsdUZP0vW+jUbyzbduym6em1GDHgevamZ3DzfeP57qffKJecyMTHunDl30dF89J9zY83wwrq0VYFWgJbD9hvwMeeVORza9d+yxtTX+flV18nPj6eHrd3o9kFzalZ84T93nfGWY0OWjwmLhxH3379OaV+A3bt2skN7dtxTtPzqFK1Kl+sWM7U6bO4r9/drP12DccdX5OZM6bzzPNjonh1UhRpezNolfIUu3bvJRwOMW9cH2Yv+ooH/nYFj45+l9mLvqLlX+vzaO+2tLxtxEHH57XuAUDfW1sw/7M1DBs/h763tqDvrZdy/1NvcudNF9HhnjHUrH40Ke2b0X/4dO5LacXj496P1iWXCCVxrYO3gGTn3PoDth+A+Z5X50Pff/cfGp5+OomJiYTDYc5q1Jh5H8wp1LGVK1fhlPoNAChTJplatWqRmvoroZCRnp6Oc449aWmEw2FeHDeGGzvdRHx8vJeXI4dp1+69AMSH4wiH43DO4RyULVMagHLJiWzcvD3PY/Na9wCgdfOGTJq1GIBJsxbT5sKGAKRnZJKYEE9SYjzpGZmcWKMSx1Qpz0efr/Pi0kqsCH5mWMTk26N1znXN57UbI1+O/9WpU5enRzzJtm1bSUgozUcLF+T5eO3KFStof/WVVK5ShT733EudOift9/rPP2/gm6+/5rSGp1OmTDKXtLiU69u15exzmpJ81FGsXrWK7j00BcrvQiHj41fupfZxlXn+tQUsWbWee4ZNZdaovzPkrqsJhYwLb/lXkdqscvRRbPrtDwA2/fYHlSseBcAT42Yz6v4O7E5Lp+v9LzGkz9U89MxbEb+mEs+HPVpN7yqiWrVrc2vXbtzerQtJSUnUrVePcFzcfu85pX4D3pszj6QyZVi44EPu6vV3Zr37v4+D/3PXLu7ufQf39B9AcnL2kni3dr2NW7veBsA/HhxIj153MG3q63zy8UecVLceKd17RO8ipdCyshzn3DCUcsmJvDb8NurXrk7XdufR71/TmDF3Be1anMGzgzpyRfeRh32uld/+zAWds0P7vDNrs3Hzdgxj4tBbSc/IpP/w6aRu2XHY5ynpormGQWFpUZliuKZde16bOp3xL71MuXLlOb5mzf1eT05OJqlMGQCanX8BGRkZbN26BYD09HT69L6Dy69owyUtLj2o7a+//gqAmjVPYNbMGTwxfATr1q1l/fofvL0oOSzbd+5mwdK1XHpufTq2bsKMuSsAeGPOcho1qJn/wQdI/X0H1SqVBaBapbJsziM8+3drxZDR7zLw9ssY/Nw7TH5nCT06ND/cywgEPw4dKGiL4ffffwdg4y+/MPeD2Vx2eev9Xv9t82b++8TdlytXkpWVRfnyFXDO8Y8HB1KrVi1uviXvacijnh5Bj553kJGRQVZmJgAhC7Fn9x4Pr0iKo1KFZMolJwJQOiGei5rUY80Pv7Jx83aanZU9VNT87Lqs+3Fzkdp9+8Mv6dSmCQCd2jThrfn7T/7p1KYJ7y1czbYdu0kqXYqsLEdWliOptMbzAV8mrYYOiuHu3r3Yvm0b4XCYAfcPomy5ckx5bTIA113fgTmz32fKa5MJx8WRULo0jw0bjpmx7POlvDXzTU6qW5frrrkKgF69+9Ds/AsAmDf3A0499TSqVMmeOdfwL2fQrm0b6tatS72TT47NxcohVatUlhcevom4UIhQyHhjzjLeXbiK7Tv+5Il7riUcDpGWlkHPR7J/NqpXLsczD97I1b2eBQ697sGw8XOY9FgXOrdtyk8bt9Kx39h950wsHU+nNk1o3SN7KOKpSfOYPKwbe9Mz6Hzfi1H/M/AjP07v0loHEhNa60DyEom1Dlb8uKPQmfOX44/SWgciIkXlw3thCloRCRY/Dh0oaEUkUNSjFRHxmA9zVtO7RCRgIji9y8zKm9lUM/vGzL42s6bFKUk9WhEJlAiP0Y4A3nPOXWtmpYCk4jSioBWRQInUhzOaWVngfOAWAOfcXmBvsWqKTEkiIj4RuaGDWsBmYLyZLTezMWZWpjglKWhFJFCsKP/k+titnC0lV1Nh4EzgWefcGcAuoH9xatLQgYgESlGmd+X+2K08bAA2OOcW53w/lWIGrXq0IhIokRo5cM5tAn4ys3o5uy4GvipOTerRikiwRHYibS/g5ZwZB98Bh/z07/woaEUkUCK58LdzbgXQ6HDbUdCKSKD48ckwBa2IBIsPk1ZBKyKBotW7REQ8ptW7REQ8pqAVEfGYhg5ERDymHq2IiMd8mLMKWhEJFvVoRUQ857+kVdCKSKBEauHvSFLQikigaOhARMRjmt4lIuI1/+WsglZEgsWHOaugFZFg0RitiIjHzIdJq6AVkUDxX8wqaEUkYHzYoVXQikiwaHqXiIjH1KMVEfGYglZExGMaOhAR8Zh6tCIiHvNhzipoRSRgfJi0CloRCRSN0YqIeMyPC3+HYl2AiEhEWRG2wjRnFmdmy83sreKWpB6tiASKB0MHdwJfA2WL24B6tCISKGaF3wpuy2oAVwBjDqsm59zhHC9FYGYpzrnRsa5D/EU/F7FjZilASq5do3P/tzCzqcAQ4Cigr3OudXHOox5tdKUU/BY5AunnIkacc6Odc41ybblDtjWQ6pz7/HDPo6AVEcnbecCVZvYD8CpwkZlNKk5DCloRkTw45+5zztVwzp0A3ADMc851Kk5bCtro0jic5EU/FwGnm2EiIh5Tj1ZExGMKWhERjyloo8TMWpnZGjNbZ2b9Y12PxJ6ZjTOzVDNbFetaxFsK2igwszhgFHAZUB/oYGb1Y1uV+MCLQKtYFyHeU9BGx9nAOufcd865vWTPybsqxjVJjDnnFgBbYl2HeE9BGx3HAj/l+n5Dzj4ROQIoaKMjr+UrNK9O5AihoI2ODcBxub6vAfwSo1pEJMoUtNGxBDjJzE40s1JkP843M8Y1iUiUKGijwDmXAfQE3id7AeEpzrnVsa1KYs3MJgOfAPXMbIOZdY11TeINPYIrIuIx9WhFRDymoBUR8ZiCVkTEYwpaERGPKWhFRDymoBUR8ZiCVkTEY/8PFiR7BIZBB94AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## XGBoost\n", + "XGB = Pipeline([('vect', CountVectorizer()),\n", + " ('tfidf', TfidfTransformer()),\n", + " ('clf', XGBClassifier()),\n", + " ])\n", + "\n", + "XGB_train = XGB.fit(X_train, y_train)\n", + "\n", + "y_pred_XGB = XGB_train.predict(X_test)\n", + "\n", + "print(classification_report(y_test, y_pred_XGB))\n", + "# plot confusion matrix\n", + "plot_conf_matrix(confusion_matrix(y_test, y_pred_XGB))" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " neg 0.64 0.64 0.64 22\n", + " pos 0.60 0.60 0.60 20\n", + "\n", + " accuracy 0.62 42\n", + " macro avg 0.62 0.62 0.62 42\n", + "weighted avg 0.62 0.62 0.62 42\n", + "\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVoAAAD8CAYAAAA2Y2wxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAb6klEQVR4nO3deXxU1f3G8c93sgGiKEvCjrKoIKWLaEVakUWhQFVUimgrWmhccEFEgR8WFaWgYBerIlARixKKIoKgCOKCtVJBka3ijhDFBAFBSQIkc35/hKaBhGQmzJ25uT5vX/dF5s6ce7+34tOTc889Y845RETEO6FEFyAiEnQKWhERjyloRUQ8pqAVEfGYglZExGMKWhERjyloRUSOwMxmmFmumW0o570RZubMrH5lx1HQiogc2Uyg1+E7zawZcB6wJZKDKGhFRI7AObcC2FnOW38CbgcieuIrOZZFlafmj2/Qo2dSxq5VDyW6BPGhGsnY0R4jmswpeO/ha4DMUrumOeemVdTGzC4AvnDOrTWLrFzPg1ZEJK4s8l/UD4ZqhcF6yKHNagFjgPOjKUlDByISLGaRb9FrBZwErDWzzUBT4F0za1hRI/VoRSRYoujRRss5tx5ILzlVcdh2dM59XVE79WhFJFhi2KM1syzgLeAUM8s2s8FVKUk9WhEJllBSzA7lnBtYyfsnRnIcBa2IBIuHQwdVpaAVkWCp2k0uTyloRSRY1KMVEfGYerQiIh5Tj1ZExGMxnHUQKwpaEQkW9WhFRDwW0hitiIi31KMVEfGYZh2IiHhMN8NERDymoQMREY9p6EBExGPq0YqIeEw9WhERj6lHKyLiMc06EBHxmHq0IiIe0xitiIjH1KMVEfGYerQiIh5Tj1ZExFsWUtCKiHjKNHQgIuIx/+WsglZEgkU9WhERj/kxaP03aiwichRCoVDEW2XMbIaZ5ZrZhlL77jGzdWb2npktNbPGldZ0lNckIuIvFsVWuZlAr8P2TXLOdXDO/QhYBIyt7CAaOhCRQInl0IFzboWZnXjYvj2lXh4DuMqOo6AVkUCJxxitmY0HrgR2A10r+7yGDkQkUMwsmi3TzFaX2jIjOYdzboxzrhnwFHBDZZ9Xj1ZEAiWaHq1zbhow7ShONxtYDNxZ0YfUoxWRQLGQRbxV6fhmbUq9vADYVFkb9WhFJFBiOUZrZlnAuUB9M8umuOfa28xOAcLA58C1lR1HQSsigRLjWQcDy9n9WLTHUdCKSLD478EwBa2IBIsfH8FV0IpIoPgxaDXrwCOP3nkFny+fwOqn/6/Me8N+0538NQ9R7/hjElCZ+MmsJ2bS74I+XHxhX0aOGM6+ffsSXVK1F8u1DmJWU9zO9D0z6/mVXDj04TL7m2YcT7ezTmXLtp0JqEr8JCcnh9lP/Z2sufN4dsEiwuEilrywONFlVX+xXesgJhS0Hnnz3U/YuTuvzP77R1zCmL88h3OVPh4t3wNFRUXsKyigsLCQ/IICGqSnJ7qkai+aJ8PiRWO0cdSnyw/4Mvcb1n/4RaJLER/IyMhg0FW/pWePrtSokUansztzduefJbqsak9jtN9jNWukMHJwT8ZN0a+GUmzP7t28+spyXli6nGWvvkF+fj6Lnl+Q6LKqPT/2aBW0cdKyaQNaNKnH2/8YzabFd9Mk/Xjemj2SjHrHJro0SZCVK/9Fk6ZNqVu3LikpKXTvcT5r16xJdFnVnteP4FaFhg7iZOPHX9Ki++iS15sW303nK+5nxzd7E1iVJFLDRo1Zt3Yt+fn51KhRg3+vfIt27dsnuqxqz49DBwpajzwx4Sp+fnob6h9fm4+X3MM9j77AE8+9leiyxEc6dPgh553fk8v69yMpKZlT27bl0v4DEl1WtefHoDWv737X/PENur0uZexa9VCiSxAfqpF89JOuThq2OOLM+ezPfeKSypX2aM3sVOBCoAnFX9nwJbDQOfe+x7WJiETPfx3aim+GmdlIYA7Fpb8NrDr4c5aZjaqgXcmq5YVfb4xlvSIiFfLjrIPKerSDgdOccwdK7zSzPwIbgYnlNSq9ankQhg7SUpN5+bFhpKYmk5yUxPyX13Dvoy8w9vo+9O3SgbBzbN/5LZl3Psm27bsjagscsX2nH7bkL/83gP0HCrly9ON8uvVr6tSuyaz7fssF5TxtJokx9o7RrHj9NerWrcezCxYB8MGmTdw77k7y8vJo3LgJE+6fTO3atcu0ffONFdw3cTzhojD9LunP4N8Vf4PKlIf/yrxn5lL3hLoA3DhsOD8/pwtr3n2H8ffcRWpKKhMn/ZHmLVqwZ88ebr/1FqZM+5svxyUTJRTH2QSRqnCM1sw2AT2dc58ftr8FsNQ5d0plJwhC0AIcUzOVvfn7SU4O8cqM4YyY9Azvf/oV3+4tAOD6gV04tWUjbho/J6K2b6/fzLHH1Ci3/ZzJQxjz4AJaNKrH+Z3bMuqP85k4vB+LXl/PP9/5OK7X7ZUgjNG+s3oVtWrVYszokSVBe/mvLmH4bSPpeMaZzH/2Gb7IzuaGm4Yd0q6oqIgL+vRk6vTHycjI4PIBlzJx0h9p1bo1Ux7+K7Vq1WLQ1YMPaXPLzTcwbPgIvvziC9785xuMuH0Uk++fyLldu9HxjDPjdcmei8UYbZvblkScOR9N6hWXVK5sHu0wYLmZvWhm0w5uS4DlwM2eV+cje/P3A5CSnERychLOuZKQBKhVM+2Ij9WW1xY4YvsDhUXUTEuhVs0UDhQWcVLT+jROPz4wIRsUp3c8g+Pq1Dlk3+bNn3F6xzMA6NSpM8uXLS3TbsP6dTRr1oKmzZqRkppKr959eO3V5RWeKzk5mX0FBRQU5JOcnMzWLVvIzc0JVMjGilnkW7xUOHTgnFtiZicDZ1J8M8yAbGCVc64oDvX5Rihk/Gv2SFo1a8DUf6xg1YbiTv5dQ3/JFX3PZPd3+fTKfDCqtkdqP2nGUh6+YyD5+w4w+I6/M2F4P+5+ZJH3FylHrXWbk3nt1eV07daDpS8t4auvtpX5TG5ODg0bNSx5nZ6Rwfp160pez5n9FM8vfI52p7VnxG2jOK5OHQYPuYZxd40lLS2NP0ycxAOT72Pojd+rvk7E/DiMUumTYc65sHNupXNunnPumYM/f69CFiAcdpx12URa97yDju1b0K5VIwDuevh52vzi98x5cTXXDjgnqrZHar/uwy/oMugBemU+yIlN67Ft+24MY9bEq5lx75Wk19XTZH519z3jmZM1m8v6X0xe3l5SUlLLfMZR9jef/4bDrwYMZNGSZcydt4AGDdKZPKn4NsipbdvyZNZcHps5i+zsrTRokI5zjttuHcbokSPY8fXX3l5YNeLHHq0ewY3S7u/yWbH6I84/u90h++e+uIqLuv+oSm0raj9qSC8mTHuRMdf8gnsefYGsF1Zx/cBzj+IKxEsntWzF1OkzmPP0s/Tq3YemzZqV+UxGRkO+2vZVyevcnBzSD67aVa9+fZKSkgiFQlx8aX82rF9/SFvnHNOmTuGaa69n6iMPcf3QG+nb9wJmPzXL2wurRkIhi3iLW01xO1M1Vv+E2tSpXROAGmkpdPvpKXywOYdWzRuUfKZPlw58uDkn4rZApe1//cufsuSNjXzzbT61aqQSDjvCYUetGikxv0aJjR07dgAQDoeZPnUK/QdcVuYzp7X/AVu2bCY7eysH9u9nyQuL6dK1GwDbt+eWfO6Vl1+mdZs2h7Rd+Nx8zjmnC8fVqUN+QQEWCmGhEAX5+R5eVfXix6DVI7gRaFj/OKaP+w1JoRChkDFv2bu8+MYGsiYPoU2LdMJhx5ZtO0tmHDRqUIdHxl5OvxunHLEtwL03XVhueyhe7evXv/wpfa8vvjv/4JOvkDV5CPsPFDJo9My4/28gZY0cMZzVq97mm292cV63c7hu6I3k5+UxJ2s2AN17nMdF/S4BIDc3h7vH3sHDj04nOTmZ0WPGcl3mEMLhIi7qdwmtWxcH6p8emMQHmzZhBo0bN+H3d40rOV9+fj4LF8zn0ekzALhy0NXcOuwmUlJSmDjpgThfvX/5cIhWj+BKYgRhepfEXiymd3UY+3LEmbNuXA9/PIIrIlKd+HHWgYJWRALFhzmroBWRYPHjI7gKWhEJFA0diIh4zIc5q3m0IhIssVwm0cxmmFmumW0otW+SmW0ys3VmNt/Mjq/sOApaEQmUGD+COxPoddi+ZUB751wH4ENg9OGNDqegFZFAiWWP1jm3Ath52L6lzrnCgy9XAk0rO46CVkQCJZpHcEt/G8zBLTPK0/0WeLGyD+lmmIgESjQ3w0p/G0z057ExQCHwVGWfVdCKSKDEY3qXmQ0C+gLdXQTrGChoRSRQvM5ZM+sFjAS6OOfyImmjoBWRQIllj9bMsoBzgfpmlg3cSfEsgzRg2cFzrXTOXVvRcRS0IhIosQxa59zAcnY/Fu1xFLQiEiha60BExGN+fARXQSsigaJFZUREPObDnFXQikiwhHyYtApaEQkU3QwTEfGYD3NWQSsiwaKbYSIiHvNhzipoRSRYDP8lrYJWRAJFY7QiIh7TrAMREY9pHq2IiMd8mLMKWhEJFk3vEhHxmA9zVkErIsGS5MOkVdCKSKBo6EBExGM+nN2loBWRYFGPVkTEYz7MWQWtiASLerQiIh5L8uEgrYJWRALFfzGroBWRgNFaByIiHvNhzipoRSRY/HgzLJToAkREYsks8q3yY9kMM8s1sw2l9vU3s41mFjazjpHUpKAVkUBJClnEWwRmAr0O27cBuBhYEWlNGjoQkUCJ5dCBc26FmZ142L73oz2P50G7a9VDXp9CqqFzJ7+e6BLEh1aO6nLUx4jm13QzywQyS+2a5pybdtRFHEY9WhEJlGh6mgdDNebBejgFrYgEig8fDFPQikiw+PERXM06EJFACVnkW2XMLAt4CzjFzLLNbLCZ9TOzbKATsNjMXqrsOOrRikigxPJ5BefcwCO8NT+a4yhoRSRQtNaBiIjH/DgeqqAVkUDxYYdWQSsiweLHWQcKWhEJFB/mrIJWRIJFN8NERDzmw5xV0IpIsGjoQETEY+bDr2dU0IpIoCT7cCKtglZEAsWP3xmmoBWRQNEYrYiIx3zYoVXQikiwaB6tiIjHknQzTETEWyFN7xIR8ZYPRw4UtCISLJp1ICLiMd0MExHxmA9zVkErIsGihb9FRDzmw9ldCloRCRatdSAi4jH/xayCVkQCRrMOREQ85r+YVdCKSMCEfDjrwI836EREqiwUxVYZM5thZrlmtqHUvrpmtszMPjr45wmR1CQiEhhmFvEWgZlAr8P2jQKWO+faAMsPvq6QglZEAsWi2CrjnFsB7Dxs94XAEwd/fgK4qLLjKGhFJFCi6dGaWaaZrS61ZUZwigzn3DaAg3+mV9ZAN8NEJFCSopje5ZybBkzzrppi6tGKSKDEcujgCHLMrBHAwT9zK2ugoBWRQDGLfKuihcCggz8PAhZU1kBDByISKLH8KhszywLOBeqbWTZwJzARmGtmg4EtQP/KjqOgFZFAieUTuM65gUd4q3s0x1HQikigmA8fwlXQikigRDPrIF4UtHEy64mZPDvvacyMNm1OZtz4CaSlpSW6LImzMb1PpnOreuzKO8AVj60G4IauLflZ63oUFoXJ/qaAexdv4rt9RQmutPryYc5q1kE85OTkMPupv5M1dx7PLlhEOFzEkhcWJ7osSYDF63O4Ze76Q/a9/dkurvjbKn494x227sxjUKfmCaouGOIw6yBqCto4KSoqYl9BAYWFheQXFNAgvdKHSSSA3tu6mz0FBw7Z9/bmXRS54p83fLmH9GP1m87RsCj+iRcNHcRBRkYGg676LT17dKVGjTQ6nd2Zszv/LNFliQ/9skMjXn6/0vnvUgEfrpKoHm087Nm9m1dfWc4LS5ez7NU3yM/PZ9Hzlc5xlu+Zqzo1pzDsWLJRQXs0QmYRb3GrKW5n+h5bufJfNGnalLp165KSkkL3Huezds2aRJclPtK7fQadW9fjzoXvJ7qUas+PQwcK2jho2Kgx69auJT8/H+cc/175Fie1apXossQnzjrpBH5zVjNue2YD+wrDiS6n2gtZ5Fu8aIw2Djp0+CHnnd+Ty/r3IykpmVPbtuXS/gMSXZYkwLgL2vKT5nU4vmYKC68/i+n/3MyVnZqTmmQ8eFkHoPiG2P0vfZTgSqsvPz6wYM45T09QUIi3J5Bq6dzJrye6BPGhlaO6HHVK/vOjXRFnzs/anBCXVFaPVkQCxX/92aMYozWzqyt4r2TV8seme76mrohIiSSziLd4OZoe7d3A4+W9UXrVcg0diEhc+bBLW2HQmtm6I70FZMS+HH8ae8doVrz+GnXr1uPZBYsA+GDTJu4ddyd5eXk0btyECfdPpnbt2mXavvnGCu6bOJ5wUZh+l/Rn8O+Kv5JoysN/Zd4zc6l7Ql0Abhw2nJ+f04U1777D+HvuIjUllYmT/kjzFi3Ys2cPt996C1Om/S3Sb+6UOEg/No07+55KvWNSCDt4bu025q7+gjbpxzCy58mkJocoCjsmLf2I/2z7tkz7+df9lL37Cgk7KAo7rn7iXQDuvbAtzevWAuDYGsl8W1DIlY+/Q4cmx3F7zzbsLwozdsH7ZH9TQO20JO69sB3DDnus9/vMjzfDKuvRZgA9gV2H7TfgX55U5EMXXnQxAy//NWNGjyzZd/fYMQy/bSQdzziT+c8+w8wZf+OGm4Yd0q6oqIg/jB/H1OmPk5GRweUDLuXcrt1o1bo1AL+58ioGXT34kDZ/f+JxHvjzX/nyiy+Y+48sRtw+immPPsKQzGsUsj5TFHY8+MonfJDzHbVSk5h51U94+7Nd3NC1JY+9+TlvfbqTTi3rckPXllw/e225xxiatZbd+YWH7Ltjwf/m0t7UrWXJAjOXn9mU0fP/Q6M6Nbj4J4158JVP+W3nFjzx1hbvLrIa8uN/JpWN0S4CajvnPj9s2wy85nl1PnF6xzM4rk6dQ/Zt3vwZp3c8A4BOnTqzfNnSMu02rF9Hs2YtaNqsGSmpqfTq3YfXXl1e4bmSk5PZV1BAQUE+ycnJbN2yhdzcHDqecWbsLkhiYsfe/XyQ8x0AefuL2Lwjj/Rj03AOjklNAqB2WhLbv91X5XN0P7UBy/5T/KRYYdiRlhwiLSVEYZGjyfE1aFA7jTVbdx/9xQRIHL4zLGoV9midc4MreO/y2JdTfbRuczKvvbqcrt16sPSlJXz11bYyn8nNyaFho4Ylr9MzMli/7n+jMXNmP8XzC5+j3WntGXHbKI6rU4fBQ65h3F1jSUtL4w8TJ/HA5PsYeuPNcbkmqbpGddI4Ob02G77cw5+Xf8Kff/UDbuzWEjMjc1b5TwE653hwQAccMH/NNhasPfTv0I+a1WHn3gNs3ZUPwBNvbWFUr5PZVxjmrkXvc1PXVkx7Y7PHV1YNVcMerRzB3feMZ07WbC7rfzF5eXtJSUkt8xlXzn3A//76/6sBA1m0ZBlz5y2gQYN0Jk+aCMCpbdvyZNZcHps5i+zsrTRokI5zjttuHcbokSPY8fXX3l6YRK1mSogJ/U7jz8s/IW9/ERf/uBF/eeUTLnzk3/xl+SeM6X1Kue0yn3yPQTPf5Za567n09Mb8qNmhvzWd3zadZaUWmPkody9DZq1haNZamhxfk6+/2w8Uj+ne1fdU6tZK8e4iqxGtdRAgJ7VsxdTpM5jz9LP06t2Hps2alflMRkZDvtr2Vcnr3Jwc0g8uj1ivfn2SkpIIhUJcfGl/Nqw/9GaGc45pU6dwzbXXM/WRh7h+6I307XsBs5+a5e2FSVSSQsaEfqfx0sZcXvuw+P8Ee7dvyKsfFP+8fNN22jU6tty2/w3KXXkHeP3Drw/5XJLBuafUPyRoS7v67ObMePNzhvysBdPf2MySjTn8qmOTWF5ateXHoQMFbRXt2LEDgHA4zPSpU+g/4LIynzmt/Q/YsmUz2dlbObB/P0teWEyXrt0A2L79f/8BvfLyy7Ru0+aQtgufm88553ThuDp1yC8owEIhLBSiID/fw6uSaI3pfTKbd+SRtSq7ZN/X3+3jJ82Le6cdWxxf8qt/aTVSQtQ6OI5bIyXEmSeewKfb95a8f8aJJ7B5Rx7bv91fpm2fH2Tw5ic7+XZfIWkpSYQdhB2kpSTF+vKqJx8mrZ4Mi8DIEcNZveptvvlmF+d1O4frht5Ifl4ec7JmA9C9x3lc1O8SAHJzc7h77B08/Oh0kpOTGT1mLNdlDiEcLuKifpfQunVxoP7pgUl8sGkTZtC4cRN+f9e4kvPl5+ezcMF8Hp0+A4ArB13NrcNuIiUlhYmTHojz1cuR/LDpcfRu35CPc7/j71efDsCU1z9jwpIPuaVHa5JCxv7CMBNe/BCA+rVT+b9fnMzwpzdQt1Yq911yGlA8wX7pf3JZ+dn/Jvec1y695CZYaWnJIXq3b8hN/yge6896O5sJ/dpRGHb8foFW/gJ/Tu/SWgeSEFrrQMoTi7UO3tvybcSZ86Pmx2qtAxGRaPlxHq2CVkQCxY9DBwpaEQkU9WhFRDzmw5zV9C4RCZgYTu8ys5vNbIOZbTSzYVUtSUErIoESqy9nNLP2wO+AM4EfAn3NrE2FjY5AQSsigRLDL2dsC6x0zuU55wqB14F+VaqpKo1ERHwrdkMHG4BzzKyemdUCegNln7WPgG6GiUigRDO9y8wygcxSu6Yd/IYYnHPvm9l9wDLgO2AtUFj2KJVT0IpIoEQzvav0124d4f3HgMeKj2t/ALKP9NmKKGhFJFBiOb3LzNKdc7lm1hy4GOhUleMoaEUkWGI7kXaemdUDDgBDnXOHf61XRBS0IhIosVzQ2zn381gcR0ErIoHixyfDFLQiEiw+TFoFrYgEilbvEhHxmFbvEhHxmIJWRMRjGjoQEfGYerQiIh7zYc4qaEUkWNSjFRHxnP+SVkErIoESwYLecaegFZFA0dCBiIjHNL1LRMRr/stZBa2IBIsPc1ZBKyLBojFaERGPmQ+TVkErIoHiv5hV0IpIwPiwQ6ugFZFg0fQuERGPqUcrIuIxBa2IiMc0dCAi4jH1aEVEPObDnFXQikjA+DBpFbQiEigaoxUR8ZgfF/4OJboAEZGYsii2yg5ldouZbTSzDWaWZWY1qlKSglZEAsWi+KfC45g1AW4COjrn2gNJwGVVqUlDByISKDGe3pUM1DSzA0At4Msq1eSci2lVcmRmlumcm5boOsRf9PciccwsE8gstWta6X8XZnYzMB7IB5Y6566o0nkUtPFjZqudcx0TXYf4i/5e+JOZnQDMAwYA3wBPA884556M9lgaoxURKV8P4DPn3Hbn3AHgWeDsqhxIQSsiUr4twFlmVsuKv7ahO/B+VQ6koI0vjcNJefT3woecc/8GngHeBdZTnJdV+nelMVoREY+pRysi4jEFrYiIxxS0cWJmvczsAzP72MxGJboeSTwzm2FmuWa2IdG1iLcUtHFgZknAw8AvgHbAQDNrl9iqxAdmAr0SXYR4T0EbH2cCHzvnPnXO7QfmABcmuCZJMOfcCmBnousQ7ylo46MJsLXU6+yD+0Tke0BBGx/lLXOheXUi3xMK2vjIBpqVet2UKq4CJCLVj4I2PlYBbczsJDNLpXhNy4UJrklE4kRBGwfOuULgBuAlip+Vnuuc25jYqiTRzCwLeAs4xcyyzWxwomsSb+gRXBERj6lHKyLiMQWtiIjHFLQiIh5T0IqIeExBKyLiMQWtiIjHFLQiIh77f8Ve3CNYo3J1AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## AdaBoost\n", + "AdaBoost = Pipeline([('vect', vectorizer),\n", + " ('clf', AdaBoostClassifier()),\n", + " ])\n", + "\n", + "AdaBoost.fit(X_train, y_train)\n", + "\n", + "y_pred_AdaBoost = AdaBoost.predict(X_test)\n", + "\n", + "print(classification_report(y_test, y_pred_AdaBoost))\n", + "# plot confusion matrix\n", + "plot_conf_matrix(confusion_matrix(y_test, y_pred_AdaBoost))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluate Algorithms" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "## Evaluate algorithms\n", + "def evaluate(y_test, y_pred_dict):\n", + " table = pd.DataFrame({}, index=['Accuracy', 'Precision', 'Recall','F1-score']) \n", + " \n", + " for model in y_pred_dict:\n", + " report = classification_report(y_test, y_pred_dict[model], digits=2, output_dict=True)\n", + " \n", + " cols = [report['accuracy'],(report['neg']['precision']+report['pos']['precision'])/2,(report['neg']['recall']+report['pos']['recall'])/2,(report['neg']['f1-score']+report['pos']['f1-score'])/2]\n", + " table[model] = cols\n", + " \n", + " # add CNN results\n", + " table.insert(table.shape[1],\"CNN\", [0.8235,0.8888,0.6153,0.7272], True)\n", + " \n", + " # convert to percentage\n", + " table = table*100\n", + " \n", + " # Add 'Best Score' column\n", + " table['Best Score'] = table.idxmax(axis=1)\n", + " \n", + " return table.round(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Jared\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1248: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "C:\\Users\\Jared\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1248: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "C:\\Users\\Jared\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1248: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Multinomial Naive BayesBernoulli Naive BayesSGDLogistic RegressionSupport Vector ClassifierLinear Support Vector ClassifierNu-Support Vector ClassifierRandom ForestXGBoostAdaBoost ClassifierCNNBest Score
Accuracy52.3869.0573.8178.5752.3866.6761.9071.4373.8161.9082.35CNN
Precision26.1969.0573.8178.6051.6767.0761.8273.2174.2661.8288.88CNN
Recall50.0069.0973.8678.4151.3666.1461.8270.6874.0961.8261.53Logistic Regression
F1-score34.3869.0373.7978.4649.5265.9761.8270.3573.7961.8272.72Logistic Regression
\n", + "
" + ], + "text/plain": [ + " Multinomial Naive Bayes Bernoulli Naive Bayes SGD \\\n", + "Accuracy 52.38 69.05 73.81 \n", + "Precision 26.19 69.05 73.81 \n", + "Recall 50.00 69.09 73.86 \n", + "F1-score 34.38 69.03 73.79 \n", + "\n", + " Logistic Regression Support Vector Classifier \\\n", + "Accuracy 78.57 52.38 \n", + "Precision 78.60 51.67 \n", + "Recall 78.41 51.36 \n", + "F1-score 78.46 49.52 \n", + "\n", + " Linear Support Vector Classifier Nu-Support Vector Classifier \\\n", + "Accuracy 66.67 61.90 \n", + "Precision 67.07 61.82 \n", + "Recall 66.14 61.82 \n", + "F1-score 65.97 61.82 \n", + "\n", + " Random Forest XGBoost AdaBoost Classifier CNN \\\n", + "Accuracy 71.43 73.81 61.90 82.35 \n", + "Precision 73.21 74.26 61.82 88.88 \n", + "Recall 70.68 74.09 61.82 61.53 \n", + "F1-score 70.35 73.79 61.82 72.72 \n", + "\n", + " Best Score \n", + "Accuracy CNN \n", + "Precision CNN \n", + "Recall Logistic Regression \n", + "F1-score Logistic Regression " + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# dictionary of algorithms and their respective predictions\n", + "y_pred_dict = {\"Multinomial Naive Bayes\": y_pred_MNB, \n", + " \"Bernoulli Naive Bayes\": y_pred_BNB,\n", + " \"SGD\": y_pred_SGD, \n", + " \"Logistic Regression\": y_pred_logreg,\n", + " \"Support Vector Classifier\": y_pred_SVC,\n", + " \"Linear Support Vector Classifier\": y_pred_LinearSVC,\n", + " \"Nu-Support Vector Classifier\": y_pred_NuSVC,\n", + " \"Random Forest\": y_pred_RandFor,\n", + " \"XGBoost\": y_pred_XGB,\n", + " \"AdaBoost Classifier\": y_pred_AdaBoost\n", + " }\n", + "\n", + "table = evaluate(y_test, y_pred_dict)\n", + "table" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Jared\\anaconda3\\lib\\site-packages\\pandas\\core\\frame.py:4906: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " return super().drop(\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAFXCAYAAABN1VJsAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAA4OElEQVR4nO3deZyVZfnH8c9XQNlUVJBUVMjcQVxAS3HFzA0B0cQMcd/CUsuk5WdqWaaVmmZmrpnhlltqiohWopkbKYr7ipILLuyyXb8/7nsOR5wZDjNz5swZvu/Xa15znuc85zzXOQPP9dy7IgIzMzOAFSodgJmZtRxOCmZmVuCkYGZmBU4KZmZW4KRgZmYFTgpmZlbgpGCtlqShkt6SNFPSVmU+13r5PG3KeZ46zn2YpIfK9N5L/VySQtKXynF+a35OCvY5kl6XNCdfDD6SdJekdZvofXev5/ldJC3K550h6QVJhzfilL8CRkVE54h4qhHvs1QR8WY+z8JynUPSGfkCvG25zrGkJT+XpAclHdVc57fm56RgdRkUEZ2BtYB3gYua6bzv5POuApwG/FHSZsvyBpLa5ofrA882JIhK3PHXR5KAEcCHwMhmOmfbpR9lrY2TgtUrIuYCNwOFC7OklST9StKbkt6VdKmkDvm5rpLulPSxpA8l/UvSCpKuBdYD/pZLAt9fynkjIm4DPgI2y+8xWtIrkqZJulHS6vmcPfMd9JGS3gT+JWkm0Ab4r6RX8nGb5jvdjyU9K2m/os90taTfS7pb0ixg11yyOVXS05JmSbpCUndJf88lmXGSVlsihrZ5+0FJP5U0IR87VlLXovMdKumN/Fn+b2mlKGBHYG3gO8BwSSvWdaCkPXIp6xNJl0j6R83dff4ef5zP/Z6kP0latY7vcXzx55J0do7j4vw3vLjotLtLeimXLH+Xk1hN1dYESefn7/1VSdvn/W/lGEYWxb63pOfyd/a2pO/V9+/EyiAi/OOfz/wArwO758cdgWuAPxU9fwFwB7A6sDLwN+AX+blfAJcC7fLPjoCWfN86zrsLMCU/XgEYCswHNgZOAv4N9ABWAv4AjMnH9gQC+BPQCeiQ9wfwpfy4HfAy8ENgRWA3YAawcX7+auATYId87vY53n8D3YF1gPeAJ4GtcgzjgZ8sEUPbvP0g8AqwEdAhb5+Tn9sMmAkMyLH8Kn/O+r6bK4Ab8+eYBuxf9NxhwEP5cVdgOrA/0JaUROYDR+Xnj8jfwxeBzsAtwLV1fY91fK6jlogtgDuBLqTE/z6wZ1FsC4DDSUn6Z8CbwO/yd7hH/jt0zsdPBXbMj1cDtq70/4fl7afiAfin5f3ki+FM4OP8H/odoE9+TsAsYIOi478CvJYfnwXcTr4Y1/K+S0sKi/J5PwQmAsPzc5OBgUXHrpUvdm2LLlxfXOL9ipPCjsD/gBWKnh8DnJEfX01R4iuK95Ci7b8Cvy/aPhG4LT+u7eL546JjTwDuyY9PJye0vN0RmFfXd5Ofnw4Mydt/AG4vev4wFieFQ4FHip4T8BaLk8L9wAlFz29c3/dYx+eqLSkMKNq+ERhdFNtLRc/1ycd3L9o3DdgyP34TOBZYpdL/D5bXH1cfWV2GREQX0t3cKOAfkr4AdCNdpJ7I1QEfA/fk/QDnke5Ex+aqgtHLeN53IqJLRKweEVtGxPV5//rArUXnnAwsJN3F13irnvddG3grIhYV7XuDVAKo7/XvFj2eU8t253rO+b+ix7OLjl27+FwRMZt0YazLUFJyvjtvXwfsJalbLccu+d4BTFni+TeKtt8gJYRSv8e61PVZ4fPfGRFR1/c4DNgbeCNXe32lAbFYIzgpWL0iYmFE3EK6AA8APiD9J948X7y7RMSqkRqHiYgZEfHdiPgiMAg4RdLAmrdrRChvAXsVnbNLRLSPiLeLw63n9e8A60oq/je/HlDq65vSVFI1GAC5PWaNeo4fSbpovinpf8BNpGqkg0t4bxVvk76H9Yu21yMlnOKLdH3fQ1m/o4h4LCIGA2sCt5FKHdaMnBSsXkoGk+p3J+c77T8C50taMx+zjqSv5cf7SvpSvhhNJyWTmm6a75LqshviUuBsSevn83TLcZXqUVK11/cltZO0CylpXV/fi8rkZmBQbnBdETiTVM3zOZLWAQYC+wJb5p++wC+pvRfSXUAfSUNyo/e3gC8UPT8GOFlSL0mdgZ8DN0TEghJjb8zfsF6SVpR0iKRVI2I+i//9WDNyUrC6/E2pB8904GxgZETUdO88jVRF9G9J04FxpLppgA3z9kzgEeCSiHgwP/cL4Me5CmhZe5VcSGrcHitpBqkBeLtSXxwR84D9gL1IpZ1LgEMj4vlljKPR8vd4IikhTSU1tL4HfFrL4SOAiRExNiL+V/MD/BbYQlLvJd77A+BA4FxSldRmwONF730lcC3wT+A1YG6OpVQXAgfkXka/XYbXlWoE8Hr+d3Uc8M0ynMPqUdMrxMwqJN+xfwxsGBGvNfF7r0BqUzgkIh5oyve21sklBbMKkDRIUkdJnUhdUp8h9XZqivf+mqQuklYidcEVqWRltlRlSwqSrswDUyYV7Vtd0n15kMt9ygN/8nM/kPRyHnTztXLFZdZCDCY1+r5DqnIbHk1XbP8KaYzEB6R2kyERMaeJ3ttaubJVH0naiVSv/KeI6J33nQt8GBHn5K6Kq0XEaUrTGIwBtiV1mRsHbBRlnEfGzMw+r2wlhYj4J2kAUrHBpNGx5N9DivZfHxGf5jrVl0kJwszMmlFzT3jVPSKmAkTE1JoujaQBRMV1nlP47KCiAknHAMcAdOrUaZtNNtmkjOGambU+TzzxxAcRUdvgx2ZPCnWprY92rfVaEXEZcBlAv3794vHHHy9nXGZmrY6kN+p6rrl7H70raS2A/Pu9vH8KUDxffw9SA5yZmTWj5k4Kd7B4FOZI0sRpNfuHK03J3IvUG+M/zRybmdlyr2zVR5LGkGa97CppCvAT4BzgRklHkmZDPBDSCE9JNwLPkeZh+ZZ7HpmZNb+yJYWIqG2yLkjzuNR2/Nmk6RTMzJg/fz5Tpkxh7ty5lQ6larVv354ePXrQrl27kl/TUhqazcw+Y8qUKay88sr07NmTvJCbLYOIYNq0aUyZMoVevXqV/DpPc2FmLdLcuXNZY401nBAaSBJrrLHGMpe0nBTMrMVyQmichnx/TgpmZlbgNgUzqwo9R9/VpO/3+jn7lHTcrbfeyv7778/kyZNZHmZQcEnBzKweY8aMYcCAAVx/ffkW6Vu4sOX0wHdSMDOrw8yZM5kwYQJXXHFFISksXLiQ733ve/Tp04ctttiCiy66CIDHHnuM7bffnr59+7LtttsyY8YMrr76akaNGlV4v3333ZcHH3wQgM6dO3P66aez3Xbb8cgjj3DWWWfRv39/evfuzTHHHEPNDNYvv/wyu+++O3379mXrrbfmlVdeYcSIEdx+++2F9z3kkEO44447muQzu/rIzKwOt912G3vuuScbbbQRq6++Ok8++SSPPvoor732Gk899RRt27blww8/ZN68eRx00EHccMMN9O/fn+nTp9OhQ4d633vWrFn07t2bs846C4DNNtuM008/HYARI0Zw5513MmjQIA455BBGjx7N0KFDmTt3LosWLeKoo47i/PPPZ/DgwXzyySc8/PDDXHPNNfWdrmQuKZiZ1WHMmDEMHz4cgOHDhzNmzBjGjRvHcccdR9u26Z569dVX54UXXmCttdaif//+AKyyyiqF5+vSpk0bhg0bVth+4IEH2G677ejTpw/jx4/n2WefZcaMGbz99tsMHToUSIPROnbsyM4778zLL7/Me++9x5gxYxg2bNhSz1cqlxTMzGoxbdo0xo8fz6RJk5DEwoULkcQ222zzua6eEVFr98+2bduyaNGiwnbxmIH27dvTpk2bwv4TTjiBxx9/nHXXXZczzjiDuXPnUt8iaCNGjOC6667j+uuv58orr2zsxy1wScHMrBY333wzhx56KG+88Qavv/46b731Fr169WLrrbfm0ksvZcGCBQB8+OGHbLLJJrzzzjs89thjAMyYMYMFCxbQs2dPJk6cyKJFi3jrrbf4z39qn+ezJll07dqVmTNncvPNNwOpxNGjRw9uu+02AD799FNmz54NwGGHHcYFF1wAwOabb95kn9slBTOrCqV2IW0qY8aMYfTo0Z/ZN2zYMCZPnsx6663HFltsQbt27Tj66KMZNWoUN9xwAyeeeCJz5syhQ4cOjBs3jh122IFevXrRp08fevfuzdZbb13rubp06cLRRx9Nnz596NmzZ6EaCuDaa6/l2GOP5fTTT6ddu3bcdNNNfPGLX6R79+5suummDBkypEk/d9nWaG4OXmTHrPWaPHkym266aaXDaLFmz55Nnz59ePLJJ1l11VXrPK6271HSExHRr7bjXX1kZlZlxo0bxyabbMKJJ55Yb0JoCFcfmZlVmd13350333yzLO/tkoKZmRU4KZiZWYGTgpmZFTgpmJlZgRuazaw6nNG0vWw445OlHtKmTRv69OlT2L7ttttYeeWVOeCAA3jsscc47LDDuPjii2t97ezZszn66KN5+umniQi6dOnCPffcQ+fOnZvsI5SDk4KZWR06dOjAxIkTP7Nv1qxZ/PSnP2XSpElMmjSpztdeeOGFdO/enWeeeQaAF154gXbt2jUqngULFjTZHEd1cfWRmdky6NSpEwMGDKB9+/b1Hjd16lTWWWedwvbGG2/MSiutBMCf/vQntthiC/r27cuIESMAeOONNxg4cCBbbLEFAwcOLHQ5PeywwzjllFPYddddOe2003jllVfYc8892Wabbdhxxx15/vnnm/TzuaRgZlaHOXPmsOWWWwLQq1cvbr311pJfe8QRR7DHHntw8803M3DgQEaOHMmGG27Is88+y9lnn82ECRPo2rUrH374IQCjRo3i0EMPZeTIkVx55ZV8+9vfLsx59OKLLzJu3DjatGnDwIEDufTSS9lwww159NFHOeGEExg/fnyTfWYnBTOzOtRWfVSqLbfckldffZWxY8cybtw4+vfvzyOPPML48eM54IAD6Nq1K5Cm3gZ45JFHuOWWW4A0A+r3v//9wnsdeOCBtGnThpkzZ/Lwww9z4IEHFp779NNPG/jpauekYGbWBG699VbOPPNMAC6//HL69etH586d2X///dl///1ZYYUVuPvuu2nXrl2t02wvqfiYTp06AbBo0SK6dOnS4ERVCrcpmJk1gaFDhzJx4kQmTpxIv379mDBhAh999BEA8+bN47nnnmP99ddn4MCB3HjjjUybNg2gUH20/fbbF5b8vO666xgwYMDnzrHKKqvQq1cvbrrpJiCt4/Df//63ST+HSwpmVh1K6ELaXHr27Mn06dOZN28et912G2PHjmWzzTb7zDGvvPIKxx9/PBHBokWL2GeffRg2bBiS+NGPfsTOO+9MmzZt2Gqrrbj66qv57W9/yxFHHMF5551Ht27duOqqq2o993XXXcfxxx/Pz372M+bPn8/w4cPp27dvk302T51tZi2Sp85uGp4628zMGsxJwczMCpwUzKzFqubq7ZagId+fk4KZtUjt27dn2rRpTgwNFBFMmzZtqSOvl+TeR2bWIvXo0YMpU6bw/vvvVzqUqtW+fXt69OixTK9xUjCzFqldu3b06tWr0mF8Rs/RdzX6PV4/Z58miKR8XH1kZmYFFUkKkk6W9KykSZLGSGovaXVJ90l6Kf9erRKxmZktz5o9KUhaB/g20C8iegNtgOHAaOD+iNgQuD9vm5lZM6pU9VFboIOktkBH4B1gMHBNfv4aYEhlQjMzW37V29AsqQfpLn5HYG1gDjAJuAv4e0QsWtYTRsTbkn4FvJnfb2xEjJXUPSKm5mOmSlqzjpiOAY4BWG+99Zb19GZmVo86SwqSrgKuBOYBvwQOBk4AxgF7Ag9J2mlZT5jbCgYDvUiJppOkb5b6+oi4LCL6RUS/bt26LevpzcysHvWVFH4dEbUtQDoJuEXSikBDbtV3B16LiPcBJN0CbA+8K2mtXEpYC3ivAe9tZmaNUGdSqC0hSNoA6BgRz0TEPODlBpzzTeDLkjqSqo8GAo8Ds4CRwDn59+0NeG9rhCbpg93+G40PpAVNkWy2vCl58JqkHwJ9gEWSFkXEiIacMCIelXQz8CSwAHgKuAzoDNwo6UhS4jiw7ncxM7NyqDMpSDoRuCQiFuZdfSPioPzc0405aUT8BPjJErs/JZUazMysQurrkvoRcI+kQXl7rKR/SPoXcG/5QzMzs+ZWZ1KIiD8Dg4AtJd1OqvffC9g3Ik5tpvjMzKwZLW3w2gbADcCxwCjgAqBDmWMyM7MKqa9N4er8fAfglYg4WtJWwB8l/SciftpMMZqZWTOpr/fRVhHRF0DSUwAR8RQwSNLg5gjOzMyaV31J4R5J/wBWBP5S/EREeAyBmVkrVN/gtdMkrQIsioiZzRiTmZlVSH1tCt8E/lLXpHd5dPNaEfFQuYJrDh7Fa1a/FvF/xP8/mk191UdrAE9JegJ4AngfaA98CdgZ+ACveWBm1qrUV310oaSLgd2AHYAtSHMVTQZGRMSbzROimZk1l3rnPspTXNyXf8zMrLHOWLWRry9vVVqlVl4zM7MWyEnBzMwKSp4622x50iQ9bs7ZpwkiMWteSy0pSOou6QpJf8/bm+U1D8zMrJUppaRwNXAV8KO8/SJpkrwryhSTWevQwhsUzWpTSptC14i4EVgEEBELgIX1v8TMzKpRKUlhlqQ1gACQ9GXAtzBmZq1QKdVHpwB3ABtImgB0Aw4oa1RmZlYR9SYFSW1IU1rsDGwMCHghIuY3Q2xmZtbM6q0+yiOaB0fEgoh4NiImOSGYmbVepVQfTchzIN0AzKrZGRFPli0qMzOriFKSwvb591lF+4I0UZ6ZmbUiS00KEbFrcwRiZmaVV8qI5lUl/UbS4/nn15IaOSrHzMxaolLGKVwJzAC+nn+mk0Y4m5lZK1NKm8IGETGsaPtMSRPLFI+ZmVVQKSWFOZIG1GxI2oG0ApuZmbUypZQUjgeuKWpH+Ag4rGwRmZlZxZTS+2gi0FfSKnl7ermDMjOzyiil99HPJXWJiOkRMV3SapJ+1hzBmZlZ8yqlTWGviPi4ZiMiPgL2LltEZmZWMaUkhTaSVqrZkNQBWKme483MrEqV0tD8Z+B+SVeRprc4ArimrFGZmVlFlNLQfK6kp4Hd866fRsS9jTmppC7A5UBvFieaF0iT7vUEXge+nquqzMysmZRSfURE3AP8ApgAfNAE570QuCciNgH6ApOB0cD9EbEhcH/eNjOzZlRnUpB0p6Te+fFawCTSHf21kk5q6Alz19adgCsAImJebsgezOJqqWuAIQ09h5mZNUx9JYVeETEpPz4cuC8iBgHbkZJDQ30ReB+4StJTki6X1AnoHhFTAfLvNWt7saRjaibne//99xsRhpmZLam+pFC8wtpA4G6AiJgBLGrEOdsCWwO/j4itSAv3lFxVFBGXRUS/iOjXrVu3RoRhZmZLqi8pvCXpRElDSRfxe6DQJbVdI845BZgSEY/m7Zvz+7+bq6lqqqvea8Q5zMysAepLCkcCm5PmOTqoaADbl2nE1NkR8T9Swtk47xoIPAfcAYzM+0YCtzf0HGZm1jB1dkmNiPeA44r3SfpCRDwAPNDI854IXCdpReBVUpvFCsCNko4E3gQObOQ5zMxsGZUyeK3Y3aSqnkbJk+z1q+WpgY19bzMza7iSxikUUVmiMDOzFmFZk8IfyxKFmZm1CMuUFCLiEgBJncsTjpmZVdKylhRqPNekUZiZWYtQZ0OzpFPqegpwScHMrBWqr6Twc2A1YOUlfjov5XVmZlal6uuS+iRwW0Q8seQTko4qX0hmZlYp9SWFw4FpdTxX2xgDMzOrcvUlhVciYkFtT0TEu2WKx8zMKqi+toH/1DyQdFEzxGJmZhVWX1IoHr28Q7kDMTOzyqsvKUSzRWFmZi1CfW0Km0h6mlRi2CA/Jm9HRGxR9ujMzKxZ1ZcUNm22KMzMrEWobz2FN5ozEDMzqzyPTDYzswInBTMzK3BSMDOzgvpmSX2GerqluveRmVnrU1/vo33z72/l39fm34cAs8sWkZmZVcxSex9J2iEiikc0j5Y0ATir3MGZmVnzKqVNoZOkATUbkrYHOpUvJDMzq5T6qo9qHAlcKWnVvP0xcETZIjIzs4pZalLIi+z0lbQKoIj4pPxhmZlZJSw1KUhaCRgG9ATaSmny1Ihwm4KZWStTSvXR7cAnwBPAp+UNx8zMKqmUpNAjIvYseyRmZlZxpfQ+elhSn7JHYmZmFVdKSWEAcJik10jVR15PwcyslSolKexV9ijMzKxFKKVLas3I5jWB9mWPyMzMKmapbQqS9pP0EvAa8A/gdeDvZY7LzMwqoJSG5p8CXwZejIhewEBgQlmjMjOziiglKcyPiGnACpJWiIgHgC3LG5aZmVVCKQ3NH0vqDPwTuE7Se8CC8oZlZmaVUEpJYTBp/YSTgXuAV4BBjT2xpDaSnpJ0Z95eXdJ9kl7Kv1dr7DnMzGzZLDUpRMSsiFgUEQsi4pqI+G2uTmqs7wCTi7ZHA/dHxIbA/XnbzMyaUUXWaJbUA9gHuLxo92Dgmvz4GmBIM4dlZrbcq0hSAC4Avg8sKtrXPSKmAuTfa9b2QknHSHpc0uPvv/9+2QM1M1uelDJOYV9JTZY8JO0LvJfXaVhmEXFZRPSLiH7dunVrqrDMzIzSSgrDgZcknStp0yY45w7AfpJeB64HdpP0Z+BdSWsB5N/vNcG5zMxsGZTS0PxNYCtSr6OrJD2Sq3BWbsgJI+IHEdEjInqSEs74fI47gJH5sJGkdRzMzKwZlVQtFBHTgb+S7uzXAoYCT0o6sQljOQf4ap5S46t528zMmlEpy3EOAo4ANgCuBbaNiPckdSR1Kb2ooSePiAeBB/PjaaQpNMzMrEJKGdF8IHB+RPyzeGdEzJZ0RHnCMjOzSiglKfwEmFqzIakDqfvo6xFxf9kiMzOzZldKm8JNfHY8wcK8z8zMWplSkkLbiJhXs5Efr1i+kMzMrFJKSQrvS9qvZkPSYOCD8oVkZmaVUkqbwnGkKbMvBgS8BRxa1qjMzKwiSlmj+RXgy3lNBUXEjPKHZWZmlVBKSQFJ+wCbA+0lARARZ5UxLjMzq4BSJsS7FDgIOJFUfXQgsH6Z4zIzswoopaF5+4g4FPgoIs4EvgKsW96wzMysEkpJCnPz79mS1gbmA73KF5KZmVVKKW0Kf5PUBTgPeBII4I/lDMrMzCqj3qSQF9e5PyI+Bv4q6U6gfUR80hzBmZlZ86q3+igiFgG/Ltr+1AnBzKz1KqVNYaykYarpi2pmZq1WKW0KpwCdgAWS5pK6pUZErFLWyMzMrNmVMqK5QctumplZ9Sll5bWdatu/5KI7ZmZW/UqpPjq16HF7YFvgCWC3skRkZmYVU0r10aDibUnrAueWLSIzM6uYUnofLWkK0LupAzEzs8orpU3hItIoZkhJZEvgv2WMyczMKqSUNoXHix4vAMZExIQyxWNmZhVUSlK4GZgbEQsBJLWR1DEiZpc3NDMza26ltCncD3Qo2u4AjCtPOGZmVkmlJIX2ETGzZiM/7li+kMzMrFJKSQqzJG1dsyFpG2BO+UIyM7NKKaVN4STgJknv5O21SMtzmplZK1PK4LXHJG0CbEyaDO/5iJhf9sjMzKzZLbX6SNK3gE4RMSkingE6Szqh/KGZmVlzK6VN4ei88hoAEfERcHTZIjIzs4opJSmsULzAjqQ2wIrlC8nMzCqllIbme4EbJV1Kmu7iOOCeskZlZmYVUUpSOA04Bjie1NA8FvhjOYMyM7PKWGr1UUQsiohLI+KAiBgGPAtc1NATSlpX0gOSJkt6VtJ38v7VJd0n6aX8e7WGnsPMzBqmpKmzJW0p6ZeSXgd+CjzfiHMuAL4bEZsCXwa+JWkzYDRwf0RsSJpaY3QjzmFmZg1QZ/WRpI2A4cDBwDTgBkARsWtjThgRU4Gp+fEMSZOBdYDBwC75sGuAB0lVV2Zm1kzqa1N4HvgXMCgiXgaQdHJTnlxST2Ar4FGge04YRMRUSWvW8ZpjSG0crLfeek0ZjpnZcq++6qNhwP+AByT9UdJAUkNzk5DUGfgrcFJETC/1dRFxWUT0i4h+3bp1a6pwzMyMepJCRNwaEQcBm5Cqck4Gukv6vaQ9GnNSSe1ICeG6iLgl735X0lr5+bWA9xpzDjMzW3al9D6aFRHXRcS+QA9gIo1oBM4D4a4AJkfEb4qeugMYmR+PBG5v6DnMzKxhSup9VCMiPoyIP0TEbo045w7ACGA3SRPzz97AOcBXJb0EfDVvm5lZMypl8FqTioiHqLttYmBzxmJmZp+1TCUFMzNr3ZwUzMyswEnBzMwKnBTMzKzAScHMzAqcFMzMrMBJwczMCpwUzMyswEnBzMwKnBTMzKzAScHMzAqcFMzMrMBJwczMCpwUzMyswEnBzMwKnBTMzKzAScHMzAqcFMzMrMBJwczMCpwUzMyswEnBzMwKnBTMzKzAScHMzAqcFMzMrMBJwczMCpwUzMyswEnBzMwKnBTMzKzAScHMzAqcFMzMrMBJwczMCpwUzMyswEnBzMwKnBTMzKzAScHMzApaXFKQtKekFyS9LGl0peMxM1uetKikIKkN8DtgL2Az4GBJm1U2KjOz5UeLSgrAtsDLEfFqRMwDrgcGVzgmM7PlhiKi0jEUSDoA2DMijsrbI4DtImJU0THHAMfkzY2BF5o90M/rCnxQ6SBaCH8Xi/m7WMzfxWIt4btYPyK61fZE2+aOZClUy77PZK2IuAy4rHnCKY2kxyOiX6XjaAn8XSzm72IxfxeLtfTvoqVVH00B1i3a7gG8U6FYzMyWOy0tKTwGbCipl6QVgeHAHRWOycxsudGiqo8iYoGkUcC9QBvgyoh4tsJhlaJFVWdVmL+LxfxdLObvYrEW/V20qIZmMzOrrJZWfWRmZhXkpGBmZgVOCmZVQlJtXbatyklqUdfhFhVMa9LS/tBW3SStBuyWH+8qaesKh9SkJK0qqWN+vP7ykABr/oYRsaglXS/c0NxEJK0LdAA6R8STeZ/CX3DZ1Hy/ktpGxIK8r01ELKx0bE1N0trAd0hTwbQljfyfVdmomkbufr4z0BvoBKwB/CgiZlc0sDKTdBPQLSJ2ydsrRMSiykbVwrqkVitJ+wA/AmYCHfPEfntHxEdODOWTE8IgYD9Ji4ATI2Jea0oMNReKiHhH0ifAVsB1NQmhpVxIGiP/zd4Bfg58ARgaEbNb09+xDgcBf5H094jYq6bEUOm/Z4spslQrSV8DzgRGA/tGxADgDeA+SavmC1erLwpXgqS+wBnA34GOwBOSVoqIhTkxV7V8Q7EoP94MuIo0oLOtpHOgUPXwhQqG2WBL/L94DphA+lvuI2nd1pgQJA2QtK2k1XOyHw5Mk3QftIyqJFcfNUK+KD0FbBsRj0tqHxFz83M3Al+IiJ0qGmQrJak3cBLwZkSclfddDfQFvhwRn1YuuqaVB3SOAgYA00hVSN8C3gSeAfoD/xcRcyoWZCNIOhlYLSJOl7QVcAiwkHSj1R9oExGPVDLGppCT97+AtYB/kv521wOvAOcB3SNiSD62YiUGlxQaISL+CzxMqjoiIuZKap+fPgJA0kYVCm95sAqwec2aGxFxGPAS8F9llQyuKeTqscOBnSPiA6AXaWbgC0gXl9Gkkf/VnBAOAG7MuyYC15EmwnwQ+BOtYP4zSV0i4n+kWoXxwJ3AmqS/7b3Ak8Cukm6GVGKoVKxOCg0gaRNJ2wHk6qLOku7P23MltSP9o54FtOrGsuZSc4GX1DeX0GYCRwNzgMGSNgGIiK8DB0VWsYAbqJZEFsANwN6SziRdKC8ApkXEkcBuEfFcswbZRCR1IDUuDwPmSjoCuBVYBJxP+pyDIuKNigXZBCRtAPxM0q4R8WdgLLARcAuptHsOqWT0GtBP0jqVihWcFJaZpD1Jdy+HSNoSICK+CoSk8Xl7PukfentSYrBGyEXpyN/9dcDxpLrnQcDJwAbA8KJV+p6uTKSNU9wpQVI3SV2AfwN9gD2Ah0hVR+1JF1Mi4qPKRLvslkx4uXSzAinpnU/qdfQJ8N2IeDcibo2Il5o/0ib3HukmZrCkARFxMalEewCwJ3BXRFwODAT6R8TblQvVvY+WiaS9gV8CxyxZxxkRu0saJ+k24FpSne9R1fSftqWRtAbwSZ4ocTVSNd13IuL+nADGAx8DvwB+AsyF1CupQiE3SlFCOJl0gehMups8It9o1Pwb3ACohokiC5ZIeN8AVgVmRsThkrYHXs89rL4KjJK0ckTMqGTMjSVpTWBRRHwg6WfAt4FvSCIifpd7zA0D2ksaGxHTKhpw5pJCCXL1dEfgMODU4oQg6XxJ50NKDEA34C/A4RHxTCXibQ0kdSK1y6wDhTvi10iNcuQqk1Gk/vqvAKMi4tUKhdsoxXfQkr4O7A3sR7qb3AeoGYPxDeD/SEni9eaPtOGKEsJxpNJdO2CEpPsi4uGcEE4hNbj+uBUkhC2At4F789+0b0T8HHgX2FPSLhHxe1Kvq90qGOrnOCmUZqU8kGYG8GHNzlwH2o9UD/gHgIjYAfhitdbztiCfAlcCCyT9MF84ZwJ/WOK4NXL305nNHWBTWOIOeg3SheSnwKnAeqRuziHpS8BdwAHVerMhqS2wC3BSRPw2IvYAZkq6Mh/SGTi4Wj9fjVytPAu4jVSq2xI4RdIVpGtuL+BASdtFxK9JPcdazL9fJ4WlyMX1cyVtCHQn/aOuMTkidoyIHYGNtXjYekXrBKtdbkNYkIvTm5Ea5Y6LiBNIF5Hxkn5CqjK6NiIWVnrAT0MVJYQjgYtIg9OuAfpFxNciYr6kY0m9VuZW07+tWhrNF5FKPcXjKn5AugEgIs6KiMnNFF5ZKA1k/QuwNaln2DXAJsDXSY3os0ndpo8Hfq40rqZFVTG7TaEekvYFzgbOjIiXJP0YuFrSexFxdU01kqT9Sf/Y36xguK1CvnNeJGk3UhK+nlTVsJ+kEyJimKQhpOkQRkXEP4rvtquRpC8D+wIjImKmpH7Al3IPtx1I3RaHV9PYiyVKQDuTStkfAJcDf5X0dkT8G/gKabXFTsDsKv877gxcCBwSEY/mfT8iLarz54g4GLgzdzvtDnzQEv+mHrxWB6WBJmOA70fEY7n7nIDBpOL9b0hJYB3gBFKx11VGTSBf9M8AfhARf8/VDjuTemu8Afy6puG1GtVcMPOd9OrAj4G9gB9GxC35mHPz4WsCv6zWO2hJJwDHkHpO7Q2MIFUT/Yq0/O5WwDejOlZYrFduE1kYERdKalfUOaAT8HvS3Ghfb+mJzyWFun0KzCf1n24PnEYaUTqd1ONlMKmRqAvpzsAJoQlIWpV0ERkETM13y1+u+Y8GDCHVtb9SuSgbbolSzYoRMU3S/5F6TvXLpdCHIuL7+fjCZH/VJvfPP4w0l9FrkgaQ6tkHAl8DVgTmV1OVWG2K/qa9SF1qIXcOAIiIWZLOJvVc/AtwcPNHWTqXFOqQ7+JOIfUP3xwYR7rbmUy6Y30oIv6a6wRbXBGwmhTdOa9KajB+kDRqd0VSHexewC0RcbKkrpFG9lY1Sd8m9TpZldSW8G9SN2aAeyPin/m4qqkak9QfWA2YEhHP5W7EvwOOjsUT+H0H6JR74rQqucrzh8BpEfGE8hxGuTr0BOBuYE5EvFvJOJfGDc11yP8R/0CqxjgFODYiLo+ICaTSQed86LyKBNhKFCWEPYGz8u7DSaWxP0bEcaTumavnBFyVCUFSD0kr58fDSPP7nAb8mlSVsgNwLqmtZOdcOq2aMReS9iJN2Hc08D1J6+cG1FX57EL1HUndtlujR0k3jgdJ2ibShHeLJB1E+l4WtPSEAC4pLDNJB5L+Mx+U+8dbI+U7rN+TxnY8vMRzQ0k9b34cEXdUIr7Gyj1SjiPdMf9P0ghg44j4cX7+K8BNpMSwEvBRRLxfsYCXUU7oFwL7RcQLkm4BrgbGRpr2ZSypeuxVYCdSdWtVtpEsjdIUFUeSqsgeI33uA0hdiSdVMrZSuU2hRJLWIs1/fjROCE0ijy8Q6T/Nb4B/SzoE2J00++lPSA2TP46IO6qpKqWG0tTqZ5E6LPwvVynMAjaStGJEzIuIRyTdCXSstoulpM6kUbkv5YTQhjSz6UqkwWlPRcQeufPASsDvonVMXVGriHhb0nmk0fa7A1NJyfLFykZWOpcUSpR7H+0GvBARL1c6nmpW3PumqOroN6Q5Yh4hjeTdn7R2wMKImFOlCWET4B5Sr6K/SFqfNJr3B6QGx+mkuZxq9n8tIt6qVLzLqqaHjdIEhUOAdYFtgAsj4mql6StOA86OiP9UMFRbBi4plCjS5F13VTqOaleUCHYjzfz5GPA66aKyICJezReZ44FVIuIdqJ669SW8QmpA3jRXEZ1HmvxsDjBU0i+ArwIbk6oXqikh7AbsIunRiLhL0kosbgv6K0BEPCxpLtC1cpHasnJJwZqdpD1Is2KeQVp3+GFSj43I1S0X5O2qbEOAxWtF54vlZcD2pNHXZ9VybGFxpmqgNMr/TFKSez4ins77e5Ma0FckzdG0A6kb5gFRpfNSLY+cFKxZ5Wq4k0n91VcBLiHNmf+2pB6keWI+jYj7qrHKqJjy6ll5fMUlpFG9vwHeyfs/U41W2WhLk8eN/Jk04OzRov17kOrRvwgcShpsuBppsJbH8FQRJwVrNrnKoRNpLphhpEbmoZFmyNyX1BB5e7UO1qrNEiWGy0ntCOdFlc1yWiP/nXpHxDlFn+080mDDf5MGHm4EHAVc5oRQfTxOwZpFrlo4mLS04h2kuufrc0LYjtRX/5NqTQh5sGPN4zY1j/NFs20e4HgkaVqUE5Wm7qgaknZTmqNpTdKAzprPtjlpWdC9SQMPT8pdL7/vhFCdquofplWffLFcg9Rn+/Y80nNVUr/83SQ9CKxMWqdiXOUibZya6h+l6dR3knQT8HBEfBRpkaC2ETFPaW79rlWY/HYmDUQ7mzS4bjDwt4h4VtLInCBeI01eSER4UGeVcknByqLozrlLHoX8TWB/SXtFxCek1emOAo4FhkTE34rvtqtFzVQG+fFepNLAU6RVtg6WtC5ATgxt8riEalyIfgLQLQ+qewHYFRgKhRLDcNLI81sqF6I1BbcpWJMrakDdFvgZ8NuIuDOPBr8SODAi7qlslI1X3ECcq1E2I41GHpcbXkeQelbdHVW4+LykgcCmpCT3DmlsxR6k0sCRpHWiNyXNVbUfVTRq1+rm6iNrckUD0o4gdU+8RNK8iLhJ0gLgbkl7V3NiWCIhHEua/vot0pxYW0TEWEkLgRNJq8ddVYVVRiuSFov5Kos7CJwC/Is00d2KpNlOpwCXVGvjuX2WSwrW5PL8L7cD34qIRyWNIo1O/kUe6HQgMD0i7q1ooE1A0o6karBTST2L/kDqijk0V6vsArxYpVVGBZJ6kdoTVs4/H5Daii6OiL9WMjZrWm5TsHJ4F3gRaAMQEReTZpD8g9K6tDdFxL1V2oag/HsFSd1Ig+82BNbLA9COJ10wx+dxCg9Wc0LQ4umfXyN1FvggInYhlYCuJlUtWSvipGCNVnSh7CZp7VxN8i6wndIKdpCW1XwJuEzSKlB9U1csMcisTW50PYnU8LqbpA0iYjbpgvksqftpVYvPrn19C4t7F02NiGs8Urn1cZuCNVpuQxgMfA/4RNJ/gRtJk6FtkNsRdiTNMvsD0sLt0ysVb0Ms0YZwHDBQ0hzS5zyVtC4Ckm6LNCPmCRULtnw+AbbOpb1Hl3q0VSWXFKzRJG1FSghDSCvUDYmIR4BRwH2kUsOhQA/SHEBVlRDgM+MQvkFqQL+QNFXH1cAupAT4ZWDPPK1Fa/QJqQdS1UzcZ8vODc22zCStTpqiYk5EzJbUjzRn0QLSuIND8mynm0TE8/k1XyGNTRhSTd0WJW1KmsLh17nh+HhghYj4XX6+H2n6it1Ia0e/V81tCEujKl4z2krjkoItE6U1Au4HLiZ1Le1Omt7gG6R5bw7ICWEP4EpJa+eXPg7sWmUJoQ3wJdJcPiflRtcFpBIRABHxODARWDUiJrbmhABpEF6lY7DyclKwkknaiDRD5oWkmU4nAj/IpYEHSBfMrSUdRpoN9Od5bqMVImJ+VNd6AStExMKI+BvwBLAtMDIi/gh8KukuSRvlz7oFXqvbWglXH1lJ8gRufwI6R8R+ed/mpPr10/I0DiOADUi9bm7KA7iqZlro2kj6NmnwlkgdM26JiMskXUzqibMB8J2IeLaCYZo1GScFK1muX/8N8GBE/FLSqaTFViaQBmxdDExuLT1TlNZ3uAHYLSI+lTSItEzo/RHx53xMx9wN1axVcPWRlSRXp0wmTXOwu6SbSVNhbwN8HbgK2LyCITZaHYPpupKqhyC1pcwCTpZ0Yt43pzliM2suLilYybR4JbENSctpToqI0UXPr5TXDag6S4xD+BKpF9F0SSeRVhO7PCKezlNjf4m0OP27lYvYrDycFGyp8pTPC/PjmsSwCSkx/Bu4KCI+rGiQjbBEQjiJtM5wO+C7pJ5VA4CjgbHAXsBeEfFyZaI1Ky9XH1mtiqau2AkYKqk9pGkPcmJ4njRgbSegS8UCbQJFCWFv0joBOwG/J81j1A24jDTp3T+BPZ0QrDXzNBf2OUWlgT2AS4FD82RvwGcSw7OS9ouIWZWLtmnkKrEjSAPT5pAm75tHWjegM6k31cJKxmjWHFxSsII862fNRX9l0p3yqIh4qGa2zJoSRNFEaVXZ86aWRuWppJ5GK+b1EYiIq4B7gcFAh+aN0KwyXFIwIDUSA6MkXRMRr0bEDEnvAjUX/zb58Zck/S8iZkD1zXQKn2tDGA7MJU3ZcVPOFXvkQy6LiEsljYmImZWM2ay5uKRgNRYC5wFzJP0673sPOEBSu4iYnye+O5+0gHvVKkoI3yGth9ABuEjS4RFxE3APaSrsw/NLqm4CP7OGcklhOSepA9A+Ij6S1BVYG1hP0mkRcbqkMcD1kj4ijUk4KyKmVDLmpiBpa2BPUsPyqcAbwGm5W+2lebrv/0B1lobMGspdUpdjuV59W+AA0kppOwL/R1pm8bvA03nk8leANYF3IuKxapy6QtIAoA/wGmkE9mzSZ9oeODkiBuQZUH8LHBcRV1QsWLMKcvXRcixf2CeRpnw+F7g3It4AniNVE20u6dyIeCQibo+Ix4peVzUkfY3UxXQHYARptTQiYippQfqaNYbnkNYh/kfzR2nWMrj6aDlVdLc/B3iaNEhrb0mPR8QLwOOSLgGOkrRpnuKi6kjaDbgJ2Dgipub5i/ZlcQP6HGCQpC8CewC7V9NsrmZNzdVHy6GahCBpCGmE7reB9qQ76D6kvvmdSIO47omIjysTaeNJ2oK0uPyIiPhL3vcQadW05yPizjxorSPwbLUmP7Om4pLCcignhL2AnwCn5hlA5wF/IA3gug9YHTi8mhMCQJ6vaDvgvtztdm3SjK7bADtKupC0vvJlXkDGzCWF5YaktYCDIuKCvH02ad6ip4DtSPP9XA38i9T4PCMiHq5IsGUgqT9p7qIPI2KDov2DgCcj4u2KBWfWgjgpLCckbUzqWPBxrlv/NrA7qUTwN6BHfnxsax2olauS/gF8OyKurXQ8Zi2Rq4+WHy+SZv68StI7EXFqrlufEREvSdoAuI7UTbNVJoVclfRV4D9KC9BfVemYzFoad0ldfvSMiHnAL4Hukn4REU/mhLA/cDtpTeVXKxtmeUXE46T2hFZTNWbWlJwUWrGi6a83BO7Jo5SfJiWGdSSdmw9tT6pSuaOO1cdalYh4Kne7NbMluE2hlZO0L2nZzAXA1sANEfEzSZsBZwEvRsQPKxmjmbUcTgqtmKQupO6lJ5OqS/qQRvbeGhHnSeoDtIuIJysXpZm1JG5obt0WAh8Ar+c1EiaRGpNPkTQ7In5X2fDMrKVxm0IrUtSGsHae7XMGaSzCzZI65JXDXgVuBr4mafMKhmtmLZBLCq1IHqm8J2mk8kuS2gA/BAJ4UtKVwLHASNKYhFbfqGxmy8ZJoRXJvYwuAI4G3gWGAH8hrRtQM05hMLAKqVumF48xs89wUqhyS6xtsBB4JCL+JWmFiDhX0vrAfhFxXT6+P2la7MMj4s0KhW1mLZTbFKqUpFWhUGVUUw00A9hJ0rciomZq6GnAF4pe+h4wJCL+23zRmlm1cEmhCuXZPp+UdHFEnJ8TQ7uIeF/SgcCYPAHe48B+5EVlAPIiOmZmtfI4hSqVl8i8HTg9Ii7N+9pFxHxJ6wI/Bd4G/h0Rf6tgqGZWRVxSqFIR8UheHOY+SeTEUFNl1I40aO0vNdVL1baEpplVhtsUqlie3O2rwC8knRARCyXtAvwHeLcmETghmFmpXH3UCkjqB9wN3ArsDPwwIm6pbFRmVo2cFFqJ3NV0PHBERNxU0yPJpQQzWxZOCq2IpM4RMdNtCGbWUG5TaF1mVToAM6tuLimYmVmBSwpmZlbgpGBmZgVOCmZmVuCkYMslSUMlhaRN8nbPvDJdU73/5XkdbCT9sGh/k57HrKk5Kdjy6mDgIWB4U7+xpDYRcVREPJd3/bDeF5i1IE4KttyR1BnYATiSWpKCpI6SbpT0tKQbJD2aR40j6WBJz0iaJOmXRa+ZKeksSY8CX5H0oKR+ks4BOkiaKOm6fHgbSX+U9KyksZI65Pd4UNL5kv4pabKk/pJukfSSpJ/lYzpJukvSf3MMB5X327LljZOCLY+GAPdExIvAh5K2XuL5E4CPImIL0myz20Ba+xr4JbAbsCXQX9KQ/JpOwKSI2C4iHqp5o4gYDcyJiC0j4pC8e0PgdxGxOfAxMKzo3PMiYifgUtIsuN8CegOHSVqDtIreOxHRNyJ6A/c09sswK+akYMujg4Hr8+Pr83axATXPR8Qk4Om8vz/wYES8HxELgOuAnfJzC4G/lnj+1yJiYn78BNCz6Lk78u9ngGcjYmpEfAq8Cqyb9+8u6ZeSdoyIT0o8p1lJPHW2LVfy3fZuQG9JAbQBArik+LC6Xl7PW8+NiIUlhvFp0eOFQIdanlu0xHGLgLYR8aKkbYC9SbPjjo2Is0o8r9lSuaRgy5sDgD9FxPoR0TMi1gVeA3oUHfMQ8HWA3IOoT97/KLCzpK6S2pBKGP8o4ZzzJbVriuBzFdbsiPgz8Ctgyaovs0ZxScGWNwcD5yyx7698tofQJcA1kp4GniJVH30SEVMl/QB4gFRquDsibi/hnJcBT0t6EvhRI+PvA5wnaREwHzi+ke9n9hme+8hsCbkU0C4i5kraALgf2Cgi5lU4NLOyc0nB7PM6Ag/kKh8Bxzsh2PLCJQUzMytwQ7OZmRU4KZiZWYGTgpmZFTgpmJlZgZOCmZkV/D/iGgQI5kDtywAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## Plot\n", + "# get only accuracy and F1-score\n", + "accuracy_f1 = table.iloc[[0,3]]\n", + "accuracy_f1.drop(accuracy_f1.columns[len(accuracy_f1.columns)-1], axis=1, inplace=True)\n", + "# get all algoritms with accuracy >= 70\n", + "accuracy_f1 = accuracy_f1.loc[:, accuracy_f1.gt(70).any()]\n", + "\n", + "# convert numpy array into list\n", + "accuracy_values = [item for sublist in accuracy_f1.iloc[[0]].values.tolist() for item in sublist]\n", + "f1_values = [item for sublist in accuracy_f1.iloc[[1]].values.tolist() for item in sublist]\n", + "\n", + "plotdata = pd.DataFrame({\n", + " \"Accuracy\" : accuracy_values,\n", + " \"F1-Score\" : f1_values\n", + " }, \n", + " index = list(accuracy_f1)\n", + ")\n", + "plotdata.plot(kind=\"bar\") \n", + "\n", + "plt.title(\"Best Performing Algorithms\")\n", + "plt.xlabel(\"Algorithms\")\n", + "plt.ylabel(\"Accuracy and F1-Score (%)\")\n", + "plt.xticks(rotation=45, ha=\"center\")\n", + "plt.ylim([0,100])\n", + "plt.legend(loc='best')\n", + "plt.rcParams[\"figure.figsize\"] = plt.rcParamsDefault[\"figure.figsize\"] \n", + "#plt.savefig('Best Performing Algorithms.png', bbox_inches = \"tight\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}