matrix-spam-ml/dataset_analysis.ipynb

202 lines
39 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebook is mainly to debug the dataset and to see how the data is distributed. It is also used to generate the dataset statistics."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import csv\n",
"\n",
"import numpy as np # numerical computing\n",
"import pandas as pd # data analysis, working with DataFrames\n",
"import seaborn as sns\n",
"from nltk.corpus import stopwords\n",
"\n",
"def remove_stopwords(input_text):\n",
" \"\"\"\n",
" Function to remove English stopwords from a Pandas Series.\n",
"\n",
" Parameters:\n",
" input_text : text to clean\n",
" Output:\n",
" cleaned Pandas Series\n",
" \"\"\"\n",
" stopwords_list = stopwords.words(\"english\")\n",
" # Some words which might indicate a certain sentiment are kept via a whitelist\n",
" whitelist = [\"n't\", \"not\", \"no\"]\n",
" words = input_text.split()\n",
" clean_words = [\n",
" word\n",
" for word in words\n",
" if (word not in stopwords_list or word in whitelist) and len(word) > 1\n",
" ]\n",
" return \" \".join(clean_words)\n",
"\n",
"# Code for text lowercasing\n",
"def lower_casing_text(text):\n",
"\n",
" \"\"\"\n",
" The function will convert text into lower case.\n",
"\n",
" arguments:\n",
" input_text: \"text\" of type \"String\".\n",
"\n",
" return:\n",
" value: text in lowercase\n",
"\n",
" Example:\n",
" Input : The World is Full of Surprises!\n",
" Output : the world is full of surprises!\n",
"\n",
" \"\"\"\n",
" # Convert text to lower case\n",
" # lower() - It converts all upperase letter of given string to lowercase.\n",
" text = text.lower()\n",
" return text\n",
"\n",
"df = pd.read_csv(\"./input/MatrixData.tsv\", sep='\\t', quoting=csv.QUOTE_NONE, encoding='utf-8')\n",
"df = df.query('(message.str.split().str.len() >= 14 & label == \"ham\") | label == \"spam\"').assign(\n",
" message=df[\"message\"].astype(str),\n",
" label=df[\"label\"].astype(str),\n",
")\n",
"df.drop_duplicates(inplace=True)\n",
"df.reset_index(drop=True, inplace=True)\n",
"df[\"message\"] = df[\"message\"].apply(remove_stopwords)\n",
"df[\"message\"] = df[\"message\"].apply(lower_casing_text)\n",
"df.drop_duplicates(inplace=True)\n",
"df.reset_index(drop=True, inplace=True)\n",
"data = df.sample(frac=1).reset_index(drop=True)\n",
"df.to_csv(\"./input/MatrixData_cleaned.csv\", encoding='utf-8', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot: xlabel='label', ylabel='count'>"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAGwCAYAAABIC3rIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAArHklEQVR4nO3df1TVdYL/8ddFvRdQQEHhwnTFHxWlgRo5xinN0gHRsamcmtRSR1bNwdykVZZdQ7TZMC0zXdfGNrNmKfsxZZO1HpFSy9CSlkgtUgeH5uRFV8UbOPFD7veP/frZuSv2g4B74f18nHPP4fP+vO/n8/7MOeRzPvcD2Lxer1cAAAAGC/L3AgAAAPyNIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8br6ewEdQVNTk7766iuFhYXJZrP5ezkAAOB78Hq9+vrrrxUXF6egoG+/B0QQfQ9fffWVXC6Xv5cBAABa4Msvv9Rll132rXMIou8hLCxM0v/8DxoeHu7n1QAAgO/D4/HI5XJZ/45/G4Loe7jwMVl4eDhBBABAB/N9HnfhoWoAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMbr6u8F4H8lL3ze30sAAlLJymn+XgKATs6vd4h2796tiRMnKi4uTjabTVu2bPHZb7PZmn2tXLnSmtOvX7+L9i9fvtznOGVlZRo5cqSCg4Plcrm0YsWK9rg8AADQQfg1iGprazVkyBCtW7eu2f3Hjx/3eW3cuFE2m02TJk3ymbds2TKfeffff7+1z+PxKDU1VfHx8SopKdHKlSuVl5enDRs2tOm1AQCAjsOvH5mlp6crPT39kvudTqfP9htvvKGbb75ZAwYM8BkPCwu7aO4FBQUFqq+v18aNG2W32zV48GCVlpZq1apVmj17drPvqaurU11dnbXt8Xi+7yUBAIAOqMM8VF1VVaW33npLGRkZF+1bvny5oqKiNGzYMK1cuVKNjY3WvuLiYo0aNUp2u90aS0tLU3l5uc6cOdPsufLz8xUREWG9XC5X618QAAAIGB0miJ577jmFhYXpjjvu8BmfP3++Nm/erHfffVdz5szRI488okWLFln73W63YmJifN5zYdvtdjd7rpycHJ09e9Z6ffnll618NQAAIJB0mJ8y27hxo6ZOnarg4GCf8aysLOvrpKQk2e12zZkzR/n5+XI4HC06l8PhaPF7AQBAx9Mh7hC99957Ki8v19/93d9959wRI0aosbFRx44dk/Q/zyFVVVX5zLmwfannjgAAgFk6RBA988wzSk5O1pAhQ75zbmlpqYKCghQdHS1JSklJ0e7du9XQ0GDNKSwsVEJCgnr16tVmawYAAB2HX4OopqZGpaWlKi0tlSRVVFSotLRUlZWV1hyPx6NXXnml2btDxcXFWr16tT755BP96U9/UkFBgRYsWKB77rnHip0pU6bIbrcrIyNDBw8e1EsvvaQnn3zS56M2AABgNr8+Q7R//37dfPPN1vaFSJk+fbo2bdokSdq8ebO8Xq8mT5580fsdDoc2b96svLw81dXVqX///lqwYIFP7ERERGj79u3KzMxUcnKyevfurdzc3Ev+yD0AADCPzev1ev29iEDn8XgUERGhs2fPKjw8vM3Ow5/uAJrHn+4A0BI/5N/vDvEMEQAAQFsiiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPH8GkS7d+/WxIkTFRcXJ5vNpi1btvjsnzFjhmw2m89r3LhxPnNOnz6tqVOnKjw8XD179lRGRoZqamp85pSVlWnkyJEKDg6Wy+XSihUr2vrSAABAB+LXIKqtrdWQIUO0bt26S84ZN26cjh8/br1efPFFn/1Tp07VwYMHVVhYqK1bt2r37t2aPXu2td/j8Sg1NVXx8fEqKSnRypUrlZeXpw0bNrTZdQEAgI6lqz9Pnp6ervT09G+d43A45HQ6m9332Wefadu2bfroo4903XXXSZLWrl2r8ePH67HHHlNcXJwKCgpUX1+vjRs3ym63a/DgwSotLdWqVat8wgkAAJgr4J8h2rlzp6Kjo5WQkKC5c+fq1KlT1r7i4mL17NnTiiFJGjt2rIKCgrRv3z5rzqhRo2S32605aWlpKi8v15kzZ5o9Z11dnTwej88LAAB0XgEdROPGjdPzzz+voqIiPfroo9q1a5fS09N1/vx5SZLb7VZ0dLTPe7p27arIyEi53W5rTkxMjM+cC9sX5vxf+fn5ioiIsF4ul6u1Lw0AAAQQv35k9l3uvvtu6+vExEQlJSVp4MCB2rlzp8aMGdNm583JyVFWVpa17fF4iCIAADqxgL5D9H8NGDBAvXv31pEjRyRJTqdTJ06c8JnT2Nio06dPW88dOZ1OVVVV+cy5sH2pZ5McDofCw8N9XgAAoPPqUEH0l7/8RadOnVJsbKwkKSUlRdXV1SopKbHmvPPOO2pqatKIESOsObt371ZDQ4M1p7CwUAkJCerVq1f7XgAAAAhIfg2impoalZaWqrS0VJJUUVGh0tJSVVZWqqamRgsXLtTevXt17NgxFRUV6Re/+IUuv/xypaWlSZKuvvpqjRs3TrNmzdKHH36oPXv2aN68ebr77rsVFxcnSZoyZYrsdrsyMjJ08OBBvfTSS3ryySd9PhIDAABm82sQ7d+/X8OGDdOwYcMkSVlZWRo2bJhyc3PVpUsXlZWV6dZbb9WVV16pjIwMJScn67333pPD4bCOUVBQoKuuukpjxozR+PHjdeONN/r8jqGIiAht375dFRUVSk5O1oMPPqjc3Fx+5B4AAFhsXq/X6+9FBDqPx6OIiAidPXu2TZ8nSl74fJsdG+jISlZO8/cSAHRAP+Tf7w71DBEAAEBbIIgAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8vwbR7t27NXHiRMXFxclms2nLli3WvoaGBmVnZysxMVHdu3dXXFycpk2bpq+++srnGP369ZPNZvN5LV++3GdOWVmZRo4cqeDgYLlcLq1YsaI9Lg8AAHQQfg2i2tpaDRkyROvWrbto37lz5/Txxx/roYce0scff6zXXntN5eXluvXWWy+au2zZMh0/ftx63X///dY+j8ej1NRUxcfHq6SkRCtXrlReXp42bNjQptcGAAA6jq7+PHl6errS09Ob3RcREaHCwkKfsX/913/VT3/6U1VWVqpv377WeFhYmJxOZ7PHKSgoUH19vTZu3Ci73a7BgwertLRUq1at0uzZs1vvYgAAQIfVoZ4hOnv2rGw2m3r27Okzvnz5ckVFRWnYsGFauXKlGhsbrX3FxcUaNWqU7Ha7NZaWlqby8nKdOXOm2fPU1dXJ4/H4vAAAQOfl1ztEP8Q333yj7OxsTZ48WeHh4db4/Pnzde211yoyMlIffPCBcnJydPz4ca1atUqS5Ha71b9/f59jxcTEWPt69ep10bny8/O1dOnSNrwaAAAQSDpEEDU0NOiuu+6S1+vV+vXrffZlZWVZXyclJclut2vOnDnKz8+Xw+Fo0flycnJ8juvxeORyuVq2eAAAEPACPoguxNCf//xnvfPOOz53h5ozYsQINTY26tixY0pISJDT6VRVVZXPnAvbl3ruyOFwtDimAABAxxPQzxBdiKHDhw9rx44dioqK+s73lJaWKigoSNHR0ZKklJQU7d69Ww0NDdacwsJCJSQkNPtxGQAAMI9f7xDV1NToyJEj1nZFRYVKS0sVGRmp2NhY/fKXv9THH3+srVu36vz583K73ZKkyMhI2e12FRcXa9++fbr55psVFham4uJiLViwQPfcc48VO1OmTNHSpUuVkZGh7OxsHThwQE8++aSeeOIJv1wzAAAIPH4Nov379+vmm2+2ti88tzN9+nTl5eXpj3/8oyRp6NChPu979913NXr0aDkcDm3evFl5eXmqq6tT//79tWDBAp/nfyIiIrR9+3ZlZmYqOTlZvXv3Vm5uLj9yDwAALH4NotGjR8vr9V5y/7ftk6Rrr71We/fu/c7zJCUl6b333vvB6wMAAGYI6GeIAAAA2gNBBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIxHEAEAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIxHEAEAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIxHEAEAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA47UoiG655RZVV1dfNO7xeHTLLbd87+Ps3r1bEydOVFxcnGw2m7Zs2eKz3+v1Kjc3V7GxsQoJCdHYsWN1+PBhnzmnT5/W1KlTFR4erp49eyojI0M1NTU+c8rKyjRy5EgFBwfL5XJpxYoV33uNAACg82tREO3cuVP19fUXjX/zzTd67733vvdxamtrNWTIEK1bt67Z/StWrNCaNWv01FNPad++ferevbvS0tL0zTffWHOmTp2qgwcPqrCwUFu3btXu3bs1e/Zsa7/H41Fqaqri4+NVUlKilStXKi8vTxs2bPgBVwwAADqzrj9kcllZmfX1oUOH5Ha7re3z589r27Zt+slPfvK9j5eenq709PRm93m9Xq1evVqLFy/WL37xC0nS888/r5iYGG3ZskV33323PvvsM23btk0fffSRrrvuOknS2rVrNX78eD322GOKi4tTQUGB6uvrtXHjRtntdg0ePFilpaVatWqVTzj9rbq6OtXV1VnbHo/ne18TAADoeH5QEA0dOlQ2m002m63Zj8ZCQkK0du3aVllYRUWF3G63xo4da41FRERoxIgRKi4u1t13363i4mL17NnTiiFJGjt2rIKCgrRv3z7dfvvtKi4u1qhRo2S32605aWlpevTRR3XmzBn16tXronPn5+dr6dKlrXIdAAAg8P2gIKqoqJDX69WAAQP04Ycfqk+fPtY+u92u6OhodenSpVUWduHuU0xMjM94TEyMtc/tdis6Otpnf9euXRUZGekzp3///hcd48K+5oIoJydHWVlZ1rbH45HL5fqRVwQAAALVDwqi+Ph4SVJTU1ObLCZQOBwOORwOfy8DAAC0kx8URH/r8OHDevfdd3XixImLAik3N/dHL8zpdEqSqqqqFBsba41XVVVp6NCh1pwTJ074vK+xsVGnT5+23u90OlVVVeUz58L2hTkAAMBsLQqip59+WnPnzlXv3r3ldDpls9msfTabrVWCqH///nI6nSoqKrICyOPxaN++fZo7d64kKSUlRdXV1SopKVFycrIk6Z133lFTU5NGjBhhzfnnf/5nNTQ0qFu3bpKkwsJCJSQkNPtxGQAAME+Lgui3v/2t/uVf/kXZ2dk/6uQ1NTU6cuSItV1RUaHS0lJFRkaqb9++euCBB/Tb3/5WV1xxhfr376+HHnpIcXFxuu222yRJV199tcaNG6dZs2bpqaeeUkNDg+bNm6e7775bcXFxkqQpU6Zo6dKlysjIUHZ2tg4cOKAnn3xSTzzxxI9aOwAA6DxaFERnzpzRnXfe+aNPvn//ft18883W9oUHmadPn65NmzZp0aJFqq2t1ezZs1VdXa0bb7xR27ZtU3BwsPWegoICzZs3T2PGjFFQUJAmTZqkNWvWWPsjIiK0fft2ZWZmKjk5Wb1791Zubu4lf+QeAACYx+b1er0/9E0ZGRkaPny47rvvvrZYU8DxeDyKiIjQ2bNnFR4e3mbnSV74fJsdG+jISlZO8/cSAHRAP+Tf7xbdIbr88sv10EMPae/evUpMTLSezblg/vz5LTksAACAX7QoiDZs2KAePXpo165d2rVrl88+m81GEAEAgA6lRUFUUVHR2usAAADwmxb9cVcAAIDOpEV3iGbOnPmt+zdu3NiixQAAAPhDi3/s/m81NDTowIEDqq6ubvaPvgIAAASyFgXR66+/ftFYU1OT5s6dq4EDB/7oRQEAALSnVnuGKCgoSFlZWfwGaAAA0OG06kPVR48eVWNjY2seEgAAoM216COzC39i4wKv16vjx4/rrbfe0vTp01tlYQAAAO2lRUH0X//1Xz7bQUFB6tOnjx5//PHv/Ak0AACAQNOiIHr33Xdbex0AAAB+06IguuDkyZMqLy+XJCUkJKhPnz6tsigAAID21KKHqmtrazVz5kzFxsZq1KhRGjVqlOLi4pSRkaFz58619hoBAADaVIuCKCsrS7t27dKbb76p6upqVVdX64033tCuXbv04IMPtvYaAQAA2lSLPjL7wx/+oFdffVWjR4+2xsaPH6+QkBDdddddWr9+fWutDwAAoM216A7RuXPnFBMTc9F4dHQ0H5kBAIAOp0VBlJKSoiVLluibb76xxv76179q6dKlSklJabXFAQAAtIcWfWS2evVqjRs3TpdddpmGDBkiSfrkk0/kcDi0ffv2Vl0gAABAW2tRECUmJurw4cMqKCjQ559/LkmaPHmypk6dqpCQkFZdIAAAQFtrURDl5+crJiZGs2bN8hnfuHGjTp48qezs7FZZHAAAQHto0TNEv/vd73TVVVddND548GA99dRTP3pRAAAA7alFQeR2uxUbG3vReJ8+fXT8+PEfvSgAAID21KIgcrlc2rNnz0Xje/bsUVxc3I9eFAAAQHtq0TNEs2bN0gMPPKCGhgbdcsstkqSioiItWrSI31QNAAA6nBYF0cKFC3Xq1Cn95je/UX19vSQpODhY2dnZysnJadUFAgAAtLUWBZHNZtOjjz6qhx56SJ999plCQkJ0xRVXyOFwtPb6AAAA2lyLguiCHj16aPjw4a21FgAAAL9o0UPVAAAAnQlBBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIwX8EHUr18/2Wy2i16ZmZmSpNGjR1+077777vM5RmVlpSZMmKDQ0FBFR0dr4cKFamxs9MflAACAAPSj/pZZe/joo490/vx5a/vAgQP62c9+pjvvvNMamzVrlpYtW2Zth4aGWl+fP39eEyZMkNPp1AcffKDjx49r2rRp6tatmx555JH2uQgAABDQAj6I+vTp47O9fPlyDRw4UDfddJM1FhoaKqfT2ez7t2/frkOHDmnHjh2KiYnR0KFD9fDDDys7O1t5eXmy2+1tun4AABD4Av4js79VX1+v//iP/9DMmTNls9ms8YKCAvXu3VvXXHONcnJydO7cOWtfcXGxEhMTFRMTY42lpaXJ4/Ho4MGDzZ6nrq5OHo/H5wUAADqvgL9D9Le2bNmi6upqzZgxwxqbMmWK4uPjFRcXp7KyMmVnZ6u8vFyvvfaaJMntdvvEkCRr2+12N3ue/Px8LV26tG0uAgAABJwOFUTPPPOM0tPTFRcXZ43Nnj3b+joxMVGxsbEaM2aMjh49qoEDB7boPDk5OcrKyrK2PR6PXC5XyxcOAAACWocJoj//+c/asWOHdefnUkaMGCFJOnLkiAYOHCin06kPP/zQZ05VVZUkXfK5I4fDIYfD0QqrBgAAHUGHeYbo2WefVXR0tCZMmPCt80pLSyVJsbGxkqSUlBR9+umnOnHihDWnsLBQ4eHhGjRoUJutFwAAdBwd4g5RU1OTnn32WU2fPl1du/7vko8ePaoXXnhB48ePV1RUlMrKyrRgwQKNGjVKSUlJkqTU1FQNGjRI9957r1asWCG3263FixcrMzOTu0AAAEBSBwmiHTt2qLKyUjNnzvQZt9vt2rFjh1avXq3a2lq5XC5NmjRJixcvtuZ06dJFW7du1dy5c5WSkqLu3btr+vTpPr+3CAAAmK1DBFFqaqq8Xu9F4y6XS7t27frO98fHx+vtt99ui6UBAIBOoMM8QwQAANBWCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8TrEb6oGgI6uclmiv5cABKS+uZ/6ewmSuEMEAABAEAEAABBEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIxHEAEAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIxHEAEAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMF9BBlJeXJ5vN5vO66qqrrP3ffPONMjMzFRUVpR49emjSpEmqqqryOUZlZaUmTJig0NBQRUdHa+HChWpsbGzvSwEAAAGsq78X8F0GDx6sHTt2WNtdu/7vkhcsWKC33npLr7zyiiIiIjRv3jzdcccd2rNnjyTp/PnzmjBhgpxOpz744AMdP35c06ZNU7du3fTII4+0+7UAAIDAFPBB1LVrVzmdzovGz549q2eeeUYvvPCCbrnlFknSs88+q6uvvlp79+7V9ddfr+3bt+vQoUPasWOHYmJiNHToUD388MPKzs5WXl6e7HZ7s+esq6tTXV2dte3xeNrm4gAAQEAI6I/MJOnw4cOKi4vTgAEDNHXqVFVWVkqSSkpK1NDQoLFjx1pzr7rqKvXt21fFxcWSpOLiYiUmJiomJsaak5aWJo/Ho4MHD17ynPn5+YqIiLBeLperja4OAAAEgoAOohEjRmjTpk3atm2b1q9fr4qKCo0cOVJff/213G637Ha7evbs6fOemJgYud1uSZLb7faJoQv7L+y7lJycHJ09e9Z6ffnll617YQAAIKAE9Edm6enp1tdJSUkaMWKE4uPj9fLLLyskJKTNzutwOORwONrs+AAAILAE9B2i/6tnz5668sordeTIETmdTtXX16u6utpnTlVVlfXMkdPpvOinzi5sN/dcEgAAMFOHCqKamhodPXpUsbGxSk5OVrdu3VRUVGTtLy8vV2VlpVJSUiRJKSkp+vTTT3XixAlrTmFhocLDwzVo0KB2Xz8AAAhMAf2R2T/8wz9o4sSJio+P11dffaUlS5aoS5cumjx5siIiIpSRkaGsrCxFRkYqPDxc999/v1JSUnT99ddLklJTUzVo0CDde++9WrFihdxutxYvXqzMzEw+EgMAAJaADqK//OUvmjx5sk6dOqU+ffroxhtv1N69e9WnTx9J0hNPPKGgoCBNmjRJdXV1SktL07/9279Z7+/SpYu2bt2quXPnKiUlRd27d9f06dO1bNkyf10SAAAIQAEdRJs3b/7W/cHBwVq3bp3WrVt3yTnx8fF6++23W3tpAACgE+lQzxABAAC0BYIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYL6CDKD8/X8OHD1dYWJiio6N12223qby83GfO6NGjZbPZfF733Xefz5zKykpNmDBBoaGhio6O1sKFC9XY2NielwIAAAJYV38v4Nvs2rVLmZmZGj58uBobG/VP//RPSk1N1aFDh9S9e3dr3qxZs7Rs2TJrOzQ01Pr6/PnzmjBhgpxOpz744AMdP35c06ZNU7du3fTII4+06/UAAIDAFNBBtG3bNp/tTZs2KTo6WiUlJRo1apQ1HhoaKqfT2ewxtm/frkOHDmnHjh2KiYnR0KFD9fDDDys7O1t5eXmy2+1teg0AACDwBfRHZv/X2bNnJUmRkZE+4wUFBerdu7euueYa5eTk6Ny5c9a+4uJiJSYmKiYmxhpLS0uTx+PRwYMHmz1PXV2dPB6PzwsAAHReAX2H6G81NTXpgQce0A033KBrrrnGGp8yZYri4+MVFxensrIyZWdnq7y8XK+99pokye12+8SQJGvb7XY3e678/HwtXbq0ja4EAAAEmg4TRJmZmTpw4IDef/99n/HZs2dbXycmJio2NlZjxozR0aNHNXDgwBadKycnR1lZWda2x+ORy+Vq2cIBAEDA6xAfmc2bN09bt27Vu+++q8suu+xb544YMUKSdOTIEUmS0+lUVVWVz5wL25d67sjhcCg8PNznBQAAOq+ADiKv16t58+bp9ddf1zvvvKP+/ft/53tKS0slSbGxsZKklJQUffrppzpx4oQ1p7CwUOHh4Ro0aFCbrBsAAHQsAf2RWWZmpl544QW98cYbCgsLs575iYiIUEhIiI4ePaoXXnhB48ePV1RUlMrKyrRgwQKNGjVKSUlJkqTU1FQNGjRI9957r1asWCG3263FixcrMzNTDofDn5cHAAACREDfIVq/fr3Onj2r0aNHKzY21nq99NJLkiS73a4dO3YoNTVVV111lR588EFNmjRJb775pnWMLl26aOvWrerSpYtSUlJ0zz33aNq0aT6/twgAAJgtoO8Qeb3eb93vcrm0a9eu7zxOfHy83n777dZaFgAA6GQC+g4RAABAeyCIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPKOCaN26derXr5+Cg4M1YsQIffjhh/5eEgAACADGBNFLL72krKwsLVmyRB9//LGGDBmitLQ0nThxwt9LAwAAfmZMEK1atUqzZs3Sr3/9aw0aNEhPPfWUQkNDtXHjRn8vDQAA+FlXfy+gPdTX16ukpEQ5OTnWWFBQkMaOHavi4uKL5tfV1amurs7aPnv2rCTJ4/G06TrP1/21TY8PdFRt/b3XHr7+5ry/lwAEpLb8/r5wbK/X+51zjQii//7v/9b58+cVExPjMx4TE6PPP//8ovn5+flaunTpReMul6vN1gjg0iLW3ufvJQBoK/kRbX6Kr7/+WhER334eI4Loh8rJyVFWVpa13dTUpNOnTysqKko2m82PK0N78Hg8crlc+vLLLxUeHu7v5QBoRXx/m8Xr9errr79WXFzcd841Ioh69+6tLl26qKqqyme8qqpKTqfzovkOh0MOh8NnrGfPnm25RASg8PBw/oMJdFJ8f5vju+4MXWDEQ9V2u13JyckqKiqyxpqamlRUVKSUlBQ/rgwAAAQCI+4QSVJWVpamT5+u6667Tj/96U+1evVq1dbW6te//rW/lwYAAPzMmCD61a9+pZMnTyo3N1dut1tDhw7Vtm3bLnrQGnA4HFqyZMlFH5sC6Pj4/sal2Lzf52fRAAAAOjEjniECAAD4NgQRAAAwHkEEAACMRxChUxs9erQeeOABfy8DABDgCCIAAGA8gggAABiPIEKn19TUpEWLFikyMlJOp1N5eXnWvlWrVikxMVHdu3eXy+XSb37zG9XU1Fj7N23apJ49e2rr1q1KSEhQaGiofvnLX+rcuXN67rnn1K9fP/Xq1Uvz58/X+fP8NXOgrb366qtKTExUSEiIoqKiNHbsWNXW1mrGjBm67bbbtHTpUvXp00fh4eG67777VF9fb71327ZtuvHGG9WzZ09FRUXp5z//uY4ePWrtP3bsmGw2m15++WWNHDlSISEhGj58uL744gt99NFHuu6669SjRw+lp6fr5MmT/rh8tCGCCJ3ec889p+7du2vfvn1asWKFli1bpsLCQklSUFCQ1qxZo4MHD+q5557TO++8o0WLFvm8/9y5c1qzZo02b96sbdu2aefOnbr99tv19ttv6+2339bvf/97/e53v9Orr77qj8sDjHH8+HFNnjxZM2fO1GeffaadO3fqjjvu0IVfp1dUVGSNv/jii3rttde0dOlS6/21tbXKysrS/v37VVRUpKCgIN1+++1qamryOc+SJUu0ePFiffzxx+rataumTJmiRYsW6cknn9R7772nI0eOKDc3t12vHe3AC3RiN910k/fGG2/0GRs+fLg3Ozu72fmvvPKKNyoqytp+9tlnvZK8R44cscbmzJnjDQ0N9X799dfWWFpamnfOnDmtvHoAf6ukpMQryXvs2LGL9k2fPt0bGRnpra2ttcbWr1/v7dGjh/f8+fPNHu/kyZNeSd5PP/3U6/V6vRUVFV5J3n//93+35rz44oteSd6ioiJrLD8/35uQkNBal4UAwR0idHpJSUk+27GxsTpx4oQkaceOHRozZox+8pOfKCwsTPfee69OnTqlc+fOWfNDQ0M1cOBAazsmJkb9+vVTjx49fMYuHBNA2xgyZIjGjBmjxMRE3XnnnXr66ad15swZn/2hoaHWdkpKimpqavTll19Kkg4fPqzJkydrwIABCg8PV79+/SRJlZWVPuf52/9mXPjzTomJiT5jfL93PgQROr1u3br5bNtsNjU1NenYsWP6+c9/rqSkJP3hD39QSUmJ1q1bJ0k+zx009/5LHRNA2+nSpYsKCwv1n//5nxo0aJDWrl2rhIQEVVRUfK/3T5w4UadPn9bTTz+tffv2ad++fZJ8v98l3+95m83W7Bjf752PMX/cFfi/SkpK1NTUpMcff1xBQf/z/w1efvllP68KwLex2Wy64YYbdMMNNyg3N1fx8fF6/fXXJUmffPKJ/vrXvyokJESStHfvXvXo0UMul0unTp1SeXm5nn76aY0cOVKS9P777/vtOhB4CCIY6/LLL1dDQ4PWrl2riRMnas+ePXrqqaf8vSwAl7Bv3z4VFRUpNTVV0dHR2rdvn06ePKmrr75aZWVlqq+vV0ZGhhYvXqxjx45pyZIlmjdvnoKCgtSrVy9FRUVpw4YNio2NVWVlpf7xH//R35eEAMJHZjDWkCFDtGrVKj366KO65pprVFBQoPz8fH8vC8AlhIeHa/fu3Ro/fryuvPJKLV68WI8//rjS09MlSWPGjNEVV1yhUaNG6Ve/+pVuvfVW69dsBAUFafPmzSopKdE111yjBQsWaOXKlX68GgQam9f7/39eEQCADmrGjBmqrq7Wli1b/L0UdFDcIQIAAMYjiAAAgPH4yAwAABiPO0QAAMB4BBEAADAeQQQAAIxHEAEAAOMRRAAAwHgEEYBOYfTo0XrggQe+19ydO3fKZrOpurr6R52zX79+Wr169Y86BoDAQBABAADjEUQAAMB4BBGATuf3v/+9rrvuOoWFhcnpdGrKlCk6ceLERfP27NmjpKQkBQcH6/rrr9eBAwd89r///vsaOXKkQkJC5HK5NH/+fNXW1rbXZQBoRwQRgE6noaFBDz/8sD755BNt2bJFx44d04wZMy6at3DhQj3++OP66KOP1KdPH02cOFENDQ2SpKNHj2rcuHGaNGmSysrK9NJLL+n999/XvHnz2vlqALSHrv5eAAC0tpkzZ1pfDxgwQGvWrNHw4cNVU1OjHj16WPuWLFmin/3sZ5Kk5557Tpdddplef/113XXXXcrPz9fUqVOtB7WvuOIKrVmzRjfddJPWr1+v4ODgdr0mAG2LO0QAOp2SkhJNnDhRffv2VVhYmG666SZJUmVlpc+8lJQU6+vIyEglJCTos88+kyR98skn2rRpk3r06GG90tLS1NTUpIqKiva7GADtgjtEADqV2tpapaWlKS0tTQUFBerTp48qKyuVlpam+vr6732cmpoazZkzR/Pnz79oX9++fVtzyQACAEEEoFP5/PPPderUKS1fvlwul0uStH///mbn7t2714qbM2fO6IsvvtDVV18tSbr22mt16NAhXX755e2zcAB+xUdmADqVvn37ym63a+3atfrTn/6kP/7xj3r44Yebnbts2TIVFRXpwIEDmjFjhnr37q3bbrtNkpSdna0PPvhA8+bNU2lpqQ4fPqw33niDh6qBToogAtCp9OnTR5s2bdIrr7yiQYMGafny5Xrssceanbt8+XL9/d//vZKTk+V2u/Xmm2/KbrdLkpKSkrRr1y598cUXGjlypIYNG6bc3FzFxcW15+UAaCc2r9fr9fciAAAA/Ik7RAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIz3/wDm+TLDOJ8aIgAAAABJRU5ErkJggg==",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.countplot(x=df['label']) # countplot for label"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot: ylabel='count'>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkMAAAGdCAYAAAAR5XdZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA4i0lEQVR4nO3deXxU5aH/8e8syWQPZk9ICKugIqiIGBcuIIWERZQornXjakXESqzYtKLF1otdtQsGN8QNNwJSrYKVSqwKWqlc9NrLVbQVlQD1J0wMJKB5fn8k5zAzmQlJSDIh5/N+vZ7XzJzzzDPPnDkz853nLOMyxhgBAAA4lDvaHQAAAIgmwhAAAHA0whAAAHA0whAAAHA0whAAAHA0whAAAHA0whAAAHA0whAAAHA0b7Q70NkaGhr0xRdfKDk5WS6XK9rdAQAArWCMUU1NjfLy8uR2d+7YTY8PQ1988YUKCgqi3Q0AANAO27ZtU35+fqc+Ro8PQ8nJyZIaF2ZKSkqUewMAAFrD7/eroKDA/h7vTD0+DFmbxlJSUghDAAAcYbpiF5eo7kBdUVGhYcOG2UGlqKhIL730kj1/zJgxcrlcQeXaa6+NYo8BAEBPE9WRofz8fN11110aNGiQjDF65JFHNG3aNL377rs67rjjJElXX3217rjjDvs+CQkJ0eouAADogaIahqZOnRp0+84771RFRYU2bNhgh6GEhATl5OREo3sAAMABus15hr799ls99dRTqq2tVVFRkT39iSeeUEZGhoYOHary8nLt3bu3xXbq6+vl9/uDCgAAQCRR34H6vffeU1FRkerq6pSUlKSVK1fq2GOPlSRdfPHFKiwsVF5enjZv3qxbbrlFW7Zs0YoVKyK2t3DhQi1YsKCrug8AAI5wLmOMiWYH9u/fr08//VR79uzR8uXL9eCDD6qqqsoORIH+8pe/6KyzztJHH32kAQMGhG2vvr5e9fX19m3r0Lw9e/ZwNBkAAEcIv9+v1NTULvn+jnoYCjV+/HgNGDBA9913X7N5tbW1SkpK0urVqzVx4sRWtdeVCxMAAHSMrvz+7jb7DFkaGhqCRnYCbdq0SZKUm5vbhT0CAAA9WVT3GSovL1dJSYn69OmjmpoaLVu2TOvWrdOaNWu0detWLVu2TJMmTVJ6ero2b96suXPnavTo0Ro2bFg0uw0AAHqQqIahnTt36rLLLtP27duVmpqqYcOGac2aNfrOd76jbdu26ZVXXtE999yj2tpaFRQUqLS0VLfeems0uwwAAHqYbrfPUEdjnyEAAI48jt5nCAAAoCsRhgAAgKMRhgAAgKMRhoBOUlZZrLLK4mh3AwBwCIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhdIgXlpTohSUl0e4GAABtRhgCAACORhgCAACORhgCAACORhgCAACORhgCAACORhgCAACORhgCAACORhgCAACORhgCAACORhhCl6l8uFiVDxdHuxsAAAQhDAEAAEcjDAEAAEcjDAEAAEcjDAEAAEcjDAEAAEcjDAEAAEeLahiqqKjQsGHDlJKSopSUFBUVFemll16y59fV1Wn27NlKT09XUlKSSktLtWPHjij2GAAA9DRRDUP5+fm66667tHHjRr3zzjsaN26cpk2bpv/5n/+RJM2dO1fPP/+8nn32WVVVVemLL77Q9OnTo9llAADQw3ij+eBTp04Nun3nnXeqoqJCGzZsUH5+vh566CEtW7ZM48aNkyQ9/PDDOuaYY7Rhwwadeuqp0egyAADoYbrNPkPffvutnnrqKdXW1qqoqEgbN27UgQMHNH78eLvOkCFD1KdPH61fvz5iO/X19fL7/UEFAAAgkqiODEnSe++9p6KiItXV1SkpKUkrV67Uscceq02bNik2Nla9evUKqp+dna3q6uqI7S1cuFALFizo5F7D8uJDkxqvuCLXWbmk5JB12uOhRydIkmZe9nLHNgwAcJSojwwNHjxYmzZt0ltvvaVZs2bp8ssv1wcffNDu9srLy7Vnzx67bNu2rQN7CwAAepqojwzFxsZq4MCBkqQRI0bob3/7m37729/qggsu0P79+7V79+6g0aEdO3YoJycnYns+n08+n6+zuw0AAHqIqI8MhWpoaFB9fb1GjBihmJgYrV271p63ZcsWffrppyoqKopiDwEAQE8S1ZGh8vJylZSUqE+fPqqpqdGyZcu0bt06rVmzRqmpqZo5c6bKysqUlpamlJQUzZkzR0VFRRxJBgAAOkxUw9DOnTt12WWXafv27UpNTdWwYcO0Zs0afec735Ek3X333XK73SotLVV9fb0mTpyoe++9N5pdBgAAPUxUw9BDDz3U4vy4uDgtWrRIixYt6qIeAQAAp+l2+wwBAAB0JcIQAABwNMIQAABwNMIQ0INMeu5mTXru5mh3AwCOKIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhAADgaIQhNPPqg5P16oOTW6yz+qFJYac/v6REzy8p6YxuHZZFj0/UoscnRuWxb6gs1g2VxVF5bADAoRGGAACAoxGGAACAoxGGAACAoxGGAACAoxGGAACAoxGGAACAoxGGAACAoxGGAACAoxGGAACAoxGGAACAoxGGYKt6IPxfcLzy4CS98mD4v9/AkWfSygWatHJBu+47ecXdmrzi7g7uEQBEF2EIAAA4GmEIAAA4GmEIAAA4GmEIAAA4GmEIAAA4GmEIAAA4GmEIAAA4GmEIAAA4GmEIAAA4GmHI4f76wBT99YEp0e4GjmCTV/xOk1f8LuD2Ik1esahLHnvK8ic0ZfkTXfJYAHouwhAAAHA0whAAAHA0whAAAHA0whAAAHA0whAAAHC0qIahhQsXauTIkUpOTlZWVpbOOeccbdmyJajOmDFj5HK5gsq1114bpR4DAICeJqphqKqqSrNnz9aGDRv05z//WQcOHNCECRNUW1sbVO/qq6/W9u3b7fKLX/wiSj0GAAA9jTeaD7569eqg20uXLlVWVpY2btyo0aNH29MTEhKUk5PT1d0DAAAO0K32GdqzZ48kKS0tLWj6E088oYyMDA0dOlTl5eXau3dvxDbq6+vl9/uDCgAAQCTdJgw1NDToxhtv1Omnn66hQ4fa0y+++GI9/vjjevXVV1VeXq7HHntMl156acR2Fi5cqNTUVLsUFBR0RfcRxqolJR3a3iNLJ3Roe79dNrHd9/3JMxP1k2faf/+eYPKK30a7CwDQIaK6mSzQ7Nmz9f777+v1118Pmn7NNdfY148//njl5ubqrLPO0tatWzVgwIBm7ZSXl6usrMy+7ff7CUQAACCibhGGrr/+er3wwgt67bXXlJ+f32LdUaNGSZI++uijsGHI5/PJ5/N1Sj8BAEDPE9UwZIzRnDlztHLlSq1bt079+vU75H02bdokScrNze3k3gEAACeIahiaPXu2li1bplWrVik5OVnV1dWSpNTUVMXHx2vr1q1atmyZJk2apPT0dG3evFlz587V6NGjNWzYsGh2HQAA9BBRDUMVFRWSGk+sGOjhhx/WFVdcodjYWL3yyiu65557VFtbq4KCApWWlurWW2+NQm8BAEBPFPXNZC0pKChQVVVVF/UGAAA4Ubc5tB4AACAaCEMAAMDRCEMAAMDRCEMAAMDRCEM92N/um6q/3Tc17Lw37p+iN+6f0sU9AoJNrnxQkysflCRNqVwS5d4AcCrCEAAAcDTCEAAAcDTCEAAAcDTCEAAAcDTCEAAAcDTCEAAAcDTCEAAAcDTCEAAAcDTCEAAAcDRvtDuA9nv/3rM19Lo/Rrsb3dLixybq2u+uaTb9909MlCTNuaT5vFB3PdVY94cXHrpue136XLEk6fFzVndou5Oe+2HTNY8k6cVz7jys9iav+HXTtcP7/TS58n5J0p9KrzmsdlpjyvKn9MJ5F3b64wA48jEyBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0w5BBv3TelzfdZ++BkrX1wcif0pnv69ZMT9esnJ4add+fT4ad3ppJV07v8MbuTKZVLNaVyabS7AcABCEMAAMDRCEMAAMDRCEMAAMDRCEMAAMDRCEMAAMDRCEMAAMDRCEMAAMDRCEMAAMDRCEMAAMDRvNHuAJzrmYeLJUkzrlzdaY9R8Xjbzxz9C+ss1K7m8376dOR53d2klbfpxXPvaGXdX0iSXjx3Xmd2qUtNWf6MJOmF82ZEuScAuhtGhgAAgKMRhgAAgKMRhgAAgKMRhgAAgKMRhgAAgKNFNQwtXLhQI0eOVHJysrKysnTOOedoy5YtQXXq6uo0e/ZspaenKykpSaWlpdqxY0eUegwAAHqaqIahqqoqzZ49Wxs2bNCf//xnHThwQBMmTFBtba1dZ+7cuXr++ef17LPPqqqqSl988YWmT58exV4DAICeJKrnGVq9Ovj8MkuXLlVWVpY2btyo0aNHa8+ePXrooYe0bNkyjRs3TpL08MMP65hjjtGGDRt06qmnRqPbAACgB+lW+wzt2bNHkpSWliZJ2rhxow4cOKDx48fbdYYMGaI+ffpo/fr1Yduor6+X3+8PKgAAAJF0mzDU0NCgG2+8UaeffrqGDh0qSaqurlZsbKx69eoVVDc7O1vV1dVh21m4cKFSU1PtUlBQ0Nldd4yXH5qklx+aFO1uNPPgoxP14KNtP9N0dzTpj8Wa9MfiVtcvee76TuzN4Zu8oqJD25uy/NEObW/q8soObQ/AkanbhKHZs2fr/fff11NPPXVY7ZSXl2vPnj122bZtWwf1EAAA9ETd4r/Jrr/+er3wwgt67bXXlJ+fb0/PycnR/v37tXv37qDRoR07dignJydsWz6fTz6fr7O7DAAAeoiojgwZY3T99ddr5cqV+stf/qJ+/foFzR8xYoRiYmK0du1ae9qWLVv06aefqqioqKu7CwAAeqCojgzNnj1by5Yt06pVq5ScnGzvB5Samqr4+HilpqZq5syZKisrU1pamlJSUjRnzhwVFRVxJBkAAOgQ7RoZGjdunHbv3t1sut/vtw+Bb42Kigrt2bNHY8aMUW5url2efvppu87dd9+tKVOmqLS0VKNHj1ZOTo5WrFjRnm4DAAA0066RoXXr1mn//v3NptfV1emvf/1rq9sxxhyyTlxcnBYtWqRFixa1qY8AAACt0aYwtHnzZvv6Bx98EHR4+7fffqvVq1erd+/eHdc7AACATtamMHTCCSfI5XLJ5XKF3RwWHx+v3//+9x3WOQAAgM7WpjD0ySefyBij/v376+2331ZmZqY9LzY2VllZWfJ4PB3eSQAAgM7SpjBUWFgoqfFs0QAAAD1Buw+t//DDD/Xqq69q586dzcLRbbfddtgdQ6MP/zBNkjTo+lWSpA/uPVuSdOx1f2xXe+vvn9IxHTsMyx8+9N9NPL608e81Lr1iTWd354hTsup7kqSXpt3X4W1PWnmXXjz3hx3ebltMqXy46Zqr9fdZvkyS9MJ5F3dCjwD0dO0KQw888IBmzZqljIwM5eTkyOU6+KHlcrkIQwAA4IjRrjD0s5/9THfeeaduueWWju4PAABAl2rXSRe/+uornX/++R3dFwAAgC7XrjB0/vnn6+WXX+7ovgAAAHS5dm0mGzhwoObPn68NGzbo+OOPV0xMTND8G264oUM6BwAA0NnaFYbuv/9+JSUlqaqqSlVVVUHzXC4XYQgAABwx2hWGPvnkk47uBwAAQFS0a58hAACAnqJdI0NXXXVVi/OXLFnSrs4AAAB0tXaFoa+++iro9oEDB/T+++9r9+7dYf/AFZ1rc0XjWamHzWrfWam7g6eWTtSFV6zRsqYzT1sea7r93U48E/Xdy5oes/UnPD5sM1cW66FzV3fdA7Zg0so7JUkvnvvjw2pn8oo/NF1rviAnVy5uutb2wegplY9Ikl4ovfzQdZc/2Vj3vIva/DgAnKtdYWjlypXNpjU0NGjWrFkaMGDAYXcKAACgq3TYPkNut1tlZWW6++67O6pJAACATtehO1Bv3bpV33zzTUc2CQAA0KnatZmsrKws6LYxRtu3b9ef/vQnXX75obfrAwAAdBftCkPvvvtu0G23263MzEz9+te/PuSRZgAAAN1Ju8LQq6++2tH9AAAAiIp2hSHLrl27tGXLFknS4MGDlZmZ2SGdAgAA6Crt2oG6trZWV111lXJzczV69GiNHj1aeXl5mjlzpvbu3dvRfQQAAOg07QpDZWVlqqqq0vPPP6/du3dr9+7dWrVqlaqqqnTTTTd1dB8BAAA6Tbs2k1VWVmr58uUaM2aMPW3SpEmKj4/XjBkzVFFR0VH9A5p5+JEJjVe68IzRbTFveXHjlS7sX8lzc5se8rC2fHeZyZUPNF3rpi8iAEdp18jQ3r17lZ2d3Wx6VlYWm8kAAMARpV1hqKioSLfffrvq6ursafv27dOCBQtUVFTUYZ0DAADobO0aU7/nnntUXFys/Px8DR8+XJL03//93/L5fHr55Zc7tIMAAACdqV1h6Pjjj9eHH36oJ554Qv/7v/8rSbrooot0ySWXKD4+vkM7CAAA0JnaFYYWLlyo7OxsXX311UHTlyxZol27dumWW27pkM4BAAB0tnbtM3TfffdpyJAhzaYfd9xxWrx48WF3CgAAoKu0KwxVV1crNze32fTMzExt3779sDsFAADQVdoVhgoKCvTGG280m/7GG28oLy/vsDsFAADQVdq1z9DVV1+tG2+8UQcOHNC4ceMkSWvXrtW8efM4AzUAADiitCsM3Xzzzfryyy913XXXaf/+/ZKkuLg43XLLLSovL+/QDgIAAHSmdoUhl8uln//855o/f77+8Y9/KD4+XoMGDZLP5+vo/qEdNi6e2njFYf90cP9jEyVJ13x3TZR70jlKVl3VdC2myx5z8spfNV1z2MrUgrOX/0mS9MfzJke5JwA6ymH9kVFSUpJGjhzZUX0BAADocu3agRoAAKCnIAwBAABHIwwBAABHIwwBAABHi2oYeu211zR16lTl5eXJ5XLpueeeC5p/xRVXyOVyBZXi4uLodBYAAPRIUQ1DtbW1Gj58uBYtWhSxTnFxsbZv326XJ598sgt7CAAAerrDOrT+cJWUlKikpKTFOj6fTzk5OV3UIwAA4DTdfp+hdevWKSsrS4MHD9asWbP05Zdftli/vr5efr8/qAAAAEQS1ZGhQykuLtb06dPVr18/bd26VT/60Y9UUlKi9evXy+PxhL3PwoULtWDBgi7uKZxq/rON+7D99PzV+tGzxfqv81d3+GOUrLqk6VrHnuF90sqfNl3r9r+JAKBTdeswdOGFF9rXjz/+eA0bNkwDBgzQunXrdNZZZ4W9T3l5ucrKyuzbfr9fBQUFnd5XAABwZDqifhL2799fGRkZ+uijjyLW8fl8SklJCSoAAACRHFFh6LPPPtOXX36p3NzcaHcFAAD0EFHdTPb1118HjfJ88skn2rRpk9LS0pSWlqYFCxaotLRUOTk52rp1q+bNm6eBAwdq4sSJUew1AADoSaIaht555x2NHTvWvm3t63P55ZeroqJCmzdv1iOPPKLdu3crLy9PEyZM0E9/+lP5fB27IykAAHCuqIahMWPGyBgTcf6aNWu6sDcAAMCJjqh9hgAAADoaYQgAADgaYQgAADhatz7pItrm3cVTo90FwDGmLX9RkrTqvElR7gmAw8XIEAAAcDTCEAAAcDTCEAAAcDTCEAAAcDTCEAAAcDTCEAAAcDTCEAAAcDTCEAAAcDTCEAAAcDTCEAAAcDT+jgNAjzZl+dNN11yNt599VpL0wvnnR6lHALobRoYAAICjEYYAAICjEYYAAICjEYYAAICjEYYAAICjEYYAAICjEYYAAICjEYYAAICjEYYAAICjcQbqTrL93lslSbnX/SzKPcGR4LxVxY1XXNHtx5FiyvLHmq51zu+5qctXSZKeP29ap7QPoHthZAgAADgaYQgAADgaYQgAADgaYQgAADgaYQgAADgaYQgAADgaYQgAADgaYQgAADgaYQgAADgaZ6COos/+MFOSlH/9Q1HuCbrCrBWNZ5mumL46yj1BJFOXr2y61vx34tnLn9cfz5vaqnamLX9ZkrTqvAkd1TUAnYiRIQAA4GiEIQAA4GiEIQAA4GiEIQAA4GiEIQAA4GiEIQAA4GhRDUOvvfaapk6dqry8PLlcLj333HNB840xuu2225Sbm6v4+HiNHz9eH374YXQ6CwAAeqSohqHa2loNHz5cixYtCjv/F7/4hX73u99p8eLFeuutt5SYmKiJEyeqrq6ui3sKAAB6qqiedLGkpEQlJSVh5xljdM899+jWW2/VtGnTJEmPPvqosrOz9dxzz+nCCy/syq4CAIAeqtvuM/TJJ5+ourpa48ePt6elpqZq1KhRWr9+fcT71dfXy+/3BxUAAIBIuu3fcVRXV0uSsrOzg6ZnZ2fb88JZuHChFixY0Kl9A9CzTF2+oumaK2j62cv/KEn643lnR7zvtOUvSZJWnRd+lBtA99dtR4baq7y8XHv27LHLtm3bot0lAADQjXXbMJSTkyNJ2rFjR9D0HTt22PPC8fl8SklJCSoAAACRdNsw1K9fP+Xk5Gjt2rX2NL/fr7feektFRUVR7BkAAOhJorrP0Ndff62PPvrIvv3JJ59o06ZNSktLU58+fXTjjTfqZz/7mQYNGqR+/fpp/vz5ysvL0znnnBO9TgMAgB4lqmHonXfe0dixY+3bZWVlkqTLL79cS5cu1bx581RbW6trrrlGu3fv1hlnnKHVq1crLi4uWl0GAAA9TFTD0JgxY2SMiTjf5XLpjjvu0B133NGFvQIAAE7SbfcZAgAA6AqEIQAA4GiEIQAA4Gjd9gzUTvSv350jSSq84Tl9/Ptz1H/Oc/a8LYsa/58t5AS5AADgMDEyBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAdZNryNZ3SbmnlWyqtfKtT2gZAGAIAAA5HGAIAAI5GGAIAAI5GGAIAAI5GGAIAAI5GGAIAAI5GGAIAAI5GGAIAAI5GGAIAAI5GGAIAAI7mjXYHnOKLRXMlSXmz7242b9vvL+7q7gDoAucsf0WS9Nx54yPWObfyNUnSytLRXdInAM0xMgQAAByNMAQAAByNMAQAAByNMAQAAByNMAQAAByNMAQAAByNMAQAAByNMAQAAByNMAQAAByNM1BHweeLrlPv2fdGuxsAusg5y9c2XnE1/v58rnRsFHsDIBQjQwAAwNEIQwAAwNEIQwAAwNEIQwAAwNEIQwAAwNG6dRj6yU9+IpfLFVSGDBkS7W4BAIAepNsfWn/cccfplVdesW97vd2+ywAA4AjS7ZOF1+tVTk5OtLsBAAB6qG69mUySPvzwQ+Xl5al///665JJL9Omnn0a7SwAAoAfp1iNDo0aN0tKlSzV48GBt375dCxYs0Jlnnqn3339fycnJYe9TX1+v+vp6+7bf7++q7gIAgCNQtw5DJSUl9vVhw4Zp1KhRKiws1DPPPKOZM2eGvc/ChQu1YMGCrupiM9UVP4naYwNwhvMqN0qSlpeOaDbv/Mr3JUnPlg7t0j4BR7Juv5ksUK9evXT00Ufro48+ilinvLxce/bsscu2bdu6sIcAAOBIc0SFoa+//lpbt25Vbm5uxDo+n08pKSlBBQAAIJJuHYZ+8IMfqKqqSv/85z/15ptv6txzz5XH49FFF10U7a4BAIAeolvvM/TZZ5/poosu0pdffqnMzEydccYZ2rBhgzIzM6PdNQAA0EN06zD01FNPRbsLAACgh+vWm8kAAAA6G2EIAAA4GmEIAAA4GmEIAAA4WrfegRoAeqpzK6uarrki1pleuV4rSou6pkOAgzEyBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0wBAAAHI0w1GTX4ge0a/EDLdbZufge7Vx8T9d0CAA62AUrPpIkXbjiE1244pOwdW5c+VlXdqlVXnjm33rhmX+3+/6vP7ZLrz+267D78T+Ldxx2G+ieCEMAAMDRCEMAAMDRCEMAAMDRCEMAAMDRCEMAAMDRCEMAAMDRCEMAAMDRCEMAAMDRCEMAAMDRCEMAAMDRvNHuQHeza/F9kqTMa79nT9u5+PfKunaOfXtHxa+arn0rScqedYuqK+5Uzqwfd1k/AfQs0ytfb7rW+Bt1RelpzeqUVv5NkuRqxe/YGZX/kCQ9U3pMxDpXrPiXJGnp9MK2dLWZe1ZWS5JuPDcnYp2lKxr/DsPTdPu70zOb1ams/LdKSzNa9Zhrnmr8e46JF7aufmv9/aGdkqSTZmZ1aLvo3hgZAgAAjkYYAgAAjkYYAgAAjkYYAgAAjkYYAgAAjkYYAgAAjkYYAgAAjkYYAgAAjkYYAgAAjuaYM1D/+8GnVR/vkyRlzrpMuyqWKnPWFdq1eEnY+rsW39tY99rrDutxv7h33mHdHwBacl7lJi0vPUHnV25umhL8G3dG5RZJ0jOlgyO2MWvlNklSxbkF9rRbVn4uSYqVS5IU0zQ9pul2+bm5+tXKav0g4KzTi1bukCR5TePt703P1oMrduo/pzc/m/OTldYZqRvbmxFw5umVyxvPLu0xzZ/Ri0//W5MuOFj3z082tuO26jZdjr0kU1WP79J/XHrwTNdvPtpUt6Hx9qlXZOrth3fqlCuz9M6SnUGPtemBnUF9OP6ag8/hHxU7gtqxHnPQ9dn6+HfV6n9Djv55T3VQHc+3jZV635yr7T/frtxbcrX9F59JklyupkpN/2qgpts5Pxig6l9/qJybBqn611uC5klNdcqOU/Xd7yln7vHacfd/B80LrZt940jt+O1byv7+KO347fqmeSaobvYNZ2jH715T9g2jteN365r6Z4LbbbrMmvMd7fzDy8q6foJ2/mF1+DrXT9HORc8ra/ZU7Vy0qsX+ZV13vnbe+5SyrrtQuyqWqWbfXnUVRoYAAICjEYYAAICjEYYAAICjEYYAAICjEYYAAICjHRFhaNGiRerbt6/i4uI0atQovf3229HuEgAA6CG6fRh6+umnVVZWpttvv11///vfNXz4cE2cOFE7d+6MdtcAAEAP0O3D0G9+8xtdffXVuvLKK3Xsscdq8eLFSkhI0JIl4c8PBAAA0Bbd+qSL+/fv18aNG1VeXm5Pc7vdGj9+vNavXx/2PvX19aqvr7dv79mzR5JUU7dP1omdfH6/avbtsy8bNZ3sSsEnv7LqxPn9qtlXJ0ky5pugOvFN8xL8ftXsqw+qY7S/sao5IEny+/2q2bffvmyse6DpkQPrHJDf79fX+5rmuZoesenkV9a8wDrWPPs0Vk338fv9qm2qW2u114Y6pumkXOHq7g3pn3VqrsZ539iXjXVMUJ2GFupadQLb29dUZ5/dnlXH1ax/zeqGecy6vd/Yl0HLOMyyiVjX5bLr1DfVqW+q801IewcC2tvfVPdAU11rXmCdb/YesC8buYKWSuO8/U2X1jpvPZon6LLxseqbLuua5rmDLq15gXVcVh3jOmTdg/1zBdTZZ18GP2Zr6ra9TnDdvfZlUB3TijpN/XS10J4r7GPW2pdBy8+q63IH1Q2c5wr7etS2WLdx3tf2ZXDfPWHaq7EvG+c0zvME9Ku+qU59Ux3r/WWtWd/azzdRdXtr5PcnqK6p7jdN86yTLvr98dq3t0Z+f5z2NdWx18ymOh67vVjt3VtjXwbWcdvtHaxTaz0HY4LqHKzrU+2+GvsyqE7DwTpf72vs39cR6njs9uKa6sYfrBty0kW/P141dY3LpKYuuI510kW/P1E1dTVKbLqUIp90McHvV03d1/Zl4Dzr0yxsnQgnNYz3+1VTV2tfNgo+6WK4Oq5mn9qNl43fi7X2ZVB7QXX22pct9S+0buP3tmSM1WYnMt3Y559/biSZN998M2j6zTffbE455ZSw97n99ttN06tBoVAoFArlCC/btm3r9LzR7TeTtVV5ebn27Nljl6+++kqbNm2SJH3wwQdBl9u2bdO2bdvCzmtLne7eXk94Dk5rryc8B9o78h6T9rpXez3hOXREe3l5eeps3XozWUZGhjwej3bs2BE0fceOHcrJyQl7H5/PJ5/PFzTN7W7MfMnJyUGXKSkpdp3QeW2p093b6wnPwWnt9YTnQHtH3mPSXvdqryc8h8Ntr3fv3vZ3eGfq1iNDsbGxGjFihNauXWtPa2ho0Nq1a1VUVBTFngEAgJ6iW48MSVJZWZkuv/xynXzyyTrllFN0zz33qLa2VldeeWW0uwYAAHqAbh+GLrjgAu3atUu33XabqqurdcIJJ2j16tXKzs5udRs+n0+33367UlJSgi6tzWnh5rWlTndvryc8B6e11xOeA+0deY9Je92rvZ7wHA63vdDdXjqLy5iuOGYNAACge+rW+wwBAAB0NsIQAABwNMIQAABwNMIQAABwtk4/x3UXqqqqMlOmTDFpaWlGkklKSjKSTFxcnJFk3G531E8r3pZi9dflchlJxuPx2POsaVY56qijot7fI7m0Zt0IXeYUCoVC6Zxy1FFHmdjY2KBpXq/XnH322aa6utoYY0xDQ4P55S9/afr27WtcLpdxuVwmMTHR/OAHPzAHDhxoU37oUSNDtbW1Gj58uK677jpJUnFxsSQpNzdXkpSTk6PLLrtMMTExcrlc8nga/zIwLS1NknTUUUfZbblcLrma/oTT5/PJ7XbL5/PphhtuUEpK41kyrbNixsfHS5Li4uKCpp955pkaMmSIpOAza0pS//79JUlTp07V+PHjJUler1eFhYWSpJiYGDU0NMjtdsvtdsvj8aihocHuU0pKijIyMuRyuRQfH2//IW1gP+Li4pSUlKTjjjtOiYmJ9n1jY2MlSR6PR3FxcfZyyM7O1uzZs3XaaafJ6/UqLy/Pvo91v6SkJJ166qk66qijlJycrKysLLuO9bjWc/H5fEpMTJQkDRo0SMXFxUpOTlZiYqI8Ho/dD6/XK4/Ho4kTJ+p73/uefvnLX2rQoEGSpISEhKDXxjorqdXnyZMna8aMGZKk/Pz8oGV8+umn28vyhBNOsK+HcrlcamhosNu1Xj/rNbP6aa1HgfdLSEiQ2+1WbGxsxLOkWn0NXJa9evWSdHDdCWfgwIFhp7flbKwulytifWt66OWh2pOkpKSkQ9b1+Xw69thjD1kv8H0nhV9egeuW1XZ3kZqaai+PSMvQWo/dbre83vBnNAm9b1JSUrN10HLMMcdIalxHI7VnsZZnoMBlG2lev379Itbp27dvi49pCe13R2nLutqa55qRkSEp+PPB5XK1uGxTU1PDthXIet0PVS+ShISEoPqB/Qn3WebxeOz61mdvS8KtG53N+szzeDzq3bu3PB6PfD6fBgwYEPS9XFNTo/379yspKUmpqalKSUlRUlKSXn/9dU2fPl2S9P3vf18PPPCAjDE67bTT9Pjjj6u8vFxLly7Vbbfd1raOdcIATbcgyaxcubLZpTHGDB48OCht3nHHHUaSufTSS01ycrKRgkdhJJnzzz/fSDJVVVUmMTHRHm2KjY2161544YVGkpk4caKRZObOnWuMMSYxMdG43e6gkYXs7GwTFxdn/vCHP5hBgwYZSSYrK8seobDa93g8pri42EgyJ554on3/ZcuWmfj4eDNixAgjycTExAT11+fzmcLCQpORkWESExPNaaedZiSZY4891q7fu3dvI8lkZmYaSaZ///7GmMY/uy0sLGw2EjJ48GBzxhlnmFtuucWcccYZJjEx0ZxyyilGkjnuuOPMGWecYSTZI3NDhw41V155pZFkFi5caN/vsssuC1rGPp/PjBgxIuj1S0tLM6mpqfZ8l8tlvF6vOfnkk43b7TbHHHOMkWTWrFljvv/975vs7GzjcrmCXrdevXqZmJgYc+WVV5q+ffsaSWb48OF2nYKCAhMbG2tcLpc9spabm2vOPPNMI8mkpKSYwsJCe/mcddZZ9mtuvU6VlZXG5XKZ+Ph443a77dctPz/f7sf06dONJDNkyBD7F4/L5QqqYxVrmffv39/86Ec/sh8vtJ5VrNfdul+4Ea6LL764xV9gXq+31b/Wrrjiila1Gfh+O1S9J5544pB1/vM//7PVfWxtae1I39lnn93i/ISEBLN27dqg18Mq1usxcOBAe9qAAQOMJJOXlxe2PWu9OO2008y0adPCvkbW7dNPP93+bLLuV1hYGLZd63NGkvmP//iPiMvAas/63AxdT91ut3nggQeCplnvr9BSUFBgv48jjV4HvmcTExObzbfeN+GKtVyHDh3abN7JJ59sJJkbbrih2byLLrrISDJTp041ksyPf/xjIwWv1x9++GHYZX799dcbSeaXv/ylvRxnzpxprwuB91m8eLF9ffjw4RGfo9T4ORe6HJ588smg13TChAn245x00klB65hVrO+fX/3qV83WydDX+5xzzmnWj1mzZgW9NlZfQu9rvW7h1qHc3NyIr/nNN99sX58+fbqZPHmykWTmzZtnJJmxY8fa863P02effda89NJLQY/15JNPGq/Xa+6//37jdrvt0SJjjKmoqDApKSmmvr6+9ZnhMPJGtxb4Zg4NQzNmzAh6caw3wsMPP2x69eoV9gPghBNOsF/cwBU+cEW86aabjCQ7oMyZM8c8+eSTxuPx2EN4oSHL5/PZK2vgymVt4svKyrLD26hRo+yV4cILLzS9e/c22dnZRjq4STBSse7X0htDkomPjzcpKSkRNxtZ9wl9LrGxsc2eh9vtttuJiYkxbrfbZGZmNvvCsEpMTIxJSkoyJ598somJibFDlfWh5vV6TWpqqklOTrbbWLBggUlPT7ffUKFBNzCsSrJf39DnYIXghIQE+wMiIyPDuN1uO0CmpKQ067P1QW+9aa1ivS5SY1AMfAzrMtKXlvVcrb61FIasD+iCggIjNYao0DrWB36kYrXfmlBUVlZmpINfpi2VtLQ0O0i2VH74wx8esk64ZR+utLS5Mz4+Puh5tlQ38D1uhZdI9U855ZSgD/DDLYGbx9uzab81r2NLdaz3uLVOh36heb1es379+rDLtj19OdRzDAySocUKeIHvN6tY4eKSSy6JeH8roNx+++1GCg4qGRkZRmr+w/j00083kszrr79uTj31VCPJvgztx6OPPmq30adPnxafT7jPRWv5hX5Wt2a9CP1MClfGjBnTbNo111zT4nphPR+rb5Fe+9b2y/putNq3XtOYmBh72rRp08zHH39svxbp6elmypQp5uijjzbjx483MTExprCw0MycOdN8+eWX5uOPPzaSzN///vfWZ4ZOyiJRJ4UPQ6tWrWq2IrlcLjNw4EDzX//1X/a0Y4891h59aOtKGPrmaU19600ZWo4++uig0QZr5TjmmGPsN6vVr/T09KA2XS6XSU5OtkNFYMnKymr2JRgbG2uGDh0a9AslISHB/sJvT7H6G+nXRWCxRoBGjRplJDX7Ir300kubLf/i4uKgadnZ2UG34+Pj2zTy0Z7i9XpbDDah+36FW0fCrVdt+SK0QqAVHENf68DboX21gkbgyEGkYoWhcI8TWhISEszo0aPDzgv84L/qqqsO2V/r12pn77dlBdVwyz7w/RVYTjrpJHuU1Sqh7xnrdrjPhtDnZL1X4uLimn3RhAvGHb0v5KHeLzfffLP56U9/GnZe6I+Rzi7Wcw9dLm63234e4UZiQr+QrfXZ+lFhtZGWlhZ2+WZlZZktW7a0a310u90thntrHbReC+t5xMTEdPpnWXcqgYMDY8eONQMHDrRf5/z8fDN8+HDj8/lMZmamGTlypHn11VfNCSecYMaOHWtqa2uNJPPiiy+2PjN0Yh6JKil8GJo5c6ZJSUkxCQkJ9iYSa+EHrqAFBQXm0ksvtW9bvxSfffZZe6Qg9E2Wk5NjPB6P/YYaN26cvWKfccYZxuPxBI1MSM1TuMvlsodhrdK3b1+TkJBgvF6vvTI89thjdn2Px2NvJgp8c1sB67zzzjNS4y/1wDper9fk5+cHhaGf//zndpsJCQlm+PDh9i/jwF+qbrfbfv6hH/BWH+Pj4+3HS01NDRopiouLC7qdmZlpbwZMTEy0R7pCd6JLTU01GRkZ9pdLfn6+SUhIMG6324wcOdJkZmYGvY7Tpk2zv3gDPwBzcnLCvvFiYmLszX3W87OWj/UlbX1BZWZmhh2J6axywQUXNJtmBdeWwpD13KzRzkijiJE2dQQWKwwFbrKNVKxf24El9MO8sLCwWTizXm/r9Ro2bJg9uhYalKzSml/BoQElKSnJXj8jfcm43e5mX7Sht30+n71MreUSGuStx2lLWLA2kXVGCRfQrRJu82245RdYrKAYGhwivV7h+hKptCZwWO9xt9sd9oeG9Tn8k5/8JOgxrdfS6mdgX6wRscDPvNBwGvp5av0IDbeetPTcEhISgtqyno/1vRO6+e1Qy9PqszVS1ZbRG6tu6PdbuNG30HKogQCXy2W3Y/3oDSw5OTn2e8nj8dgDEoG7VCQnJ5v8/HwzbNgwI8nMmDHDTJgwwRhjzMaNG40k8+677xqJMGSMCR+GSkpKTO/evY3L5TIZGRnmu9/9rvF6vWbOnDlm0KBBLb6Q/fr1M5LMkiVL7Gl9+/a1t9umpqaaY445xsTGxtovYFJSkh2MIr2hu/IIpaOPPtr+MD7ttNNMZmamSUhIsD+48/LyTF5eXoub0qwvjcAv1Pj4eJOWlmZ/IVkjUYWFhfabMj4+PuLmscA3Uuibdv78+UEfUNaXorWvknTwQyvcsgz8cAp8w3m9XjNlypSgPlubJ63t3VLjr7SWPqwLCwuDvoiTk5PNhAkTzLhx4+z1YtKkSeaoo46yPwTi4uJMUlJSi1+MsbGxzb6gf/Ob3zSrZy2vljaTBY70HW6xwlDgr9dIJVwwszYVBK5PocG0LR/8ga+pdd9IwaY1o3FdWSItw8DNU+3pY7gR7dDn29ZNG4GlpVHQlh7zcEtsbKy54IILgt7P1useOIpgfSZb61GvXr3s+1ibM633nvVetzZzBX5Bh4a/uLg44/P5zN/+9jc7dB1qOU6YMKHFzXSRSuguB9bmrMDPpkgl3G4OkX7o5OXlNft8bE8ACy1WP0N/pLjdbvu7dP78+UZqeRcPa0Biy5YtJj093d73NiMjw0yYMMF4vV4zf/58M3z4cGOMMXv37jWSzCOPPGKktm0m61FHk0Vimv5+bcOGDTr99NNljNHxxx+vtWvXKicnRxs2bNBnn30WdHRAenq6Jk+eLKnxqI4TTzxR0sEjjDwej31klMvl0rfffiu/36/9+/fbR1DExMRo3bp1OuWUUzR16lSdfPLJ9lEQXq9XWVlZ9hFPlvz8fC1cuNA+ksPn82nYsGGSpOOOO86u16dPH/Xp00cnnHCCsrOzNXz4cHuey+VS3759mx0NkpeXpx07dsjtdsvv92v37t3au3evvv76a0lSdXW1fD6fEhISVFZWppSUFN100032UQtJSUn2UVd79+61n4fX61VmZqZiYmLstiVp3759QUfRuVwuJSYmqm/fvkpMTFRJSYm9PNLS0jRkyBDt27ev2dFEmZmZcrlcio2NtY+aGzlypP34l1xyiTIyMnTNNdcoPT292dFgoUd0JCcnBx2pZz0fY4xiY2Pt5yYdPCJt7NixKiwslMvlUlxcnH0E2SOPPKK6ujr7cerq6lRcXKyNGzfK6/WqpqZGw4cP11dffaVx48ZJkurr6zVv3jxt375dN998s5KTk5sdoehyufTNN98E9Xvz5s0KtW/fPknSZ599Jkn65z//2azOxIkTg5bn4aiurpbU/OiucP71r3/Z163nZR21Y/nmm2909NFHB00bPHiwJGnBggX2NOvowrFjx4Z9LOs9HvjahbKOxElPT5ckzZ8/354X6ainUaNGae7cuZIOHr0TuhzT0tI0b948SWrV0XOWmpqasNOt9amwsNB+Xm2xc+fOsNMzMzPt69YRPaFHREmNR90Gsp7vgAEDJEl+v7/Z6xh6JKfF+rxoLevo20DWch88eLD+8pe/BC0Taxnm5eVJalwPPv74Y0nS/v37JR18b0vSW2+9JUn6/PPPJUm7du2SJL399tuSFLQuhr4+Bw4c0Nq1azVlyhQ1NDQoNjbWfv/FxcVp8uTJQUdweb1e7du3T2vXrg1qJ/CIL+u5Bb6f3G63/d63PgvfffddSc2PaA3ldruVn58ftNwDjxgLXXd79+4dtDyt7zKpcR0ZMWJEsyPqrPdPZmamjj766LCfBV999ZUkNfsMs5abJPs7K/B7TQo+Sq60tFQxMTF688039f/+3/9TQ0ODYmJi9O9//1uTJk3SN998o759++q9997Tzp079X//93+SGj97UlJS2vR+7FEjQzU1Nebdd981b7zxhpEad2CWDv5SSktLM/Hx8SY5Odm4XC77V2rgcKb1ay0vL89OtVbKTU1NtduUDm5Ws+4fuHOx1DgSYw05W79grHnWaExycnLQvj+TJ082d911l70PgsfjMTExMSYjIyNoM9cFF1xgBg0aZKZPn26GDh1q8vPzg0ZhrBGRwHSfnp5uvF5v0K+L0KFcq87AgQON1+sNu59EuF/eoUfLBe73YD2PwJ3IrfNBWPcNvI/1q83n85nU1FQzZcqUoGUcuL/T4MGDzVFHHWVmzJhhMjMzTUFBgd0Pa/Ng4GNFum1d+nw+uz/W80xJSTEul8teFoEjXKHb/lNTU02vXr2Cfu1Ym1qsEZrY2Fh7c19eXl7QyE3g6EWkX9WB01szsnio/QzaMzrZmpGh1vQlcLmELoPATcrWehS6D1pnl9zc3GabrUP3wcvKyrKPWLJGTaznELj5ta2vi9vttu8XWrcrR5StEjgiFLrJJHS0qDP2bQl8zq15/oHvk9B1LPS9ZX1OWgd6hGvvkksuiTjKmpGRYWJiYpqtl16vt1l74UaTAncgDrccwx3kYLXblhGc0MdoadQ40v5zoZvd21vS09ObLYvAkaSEhASTnp5uTjzxRPt1cbvdpm/fvqaoqMh8++235qSTTjJnnnmmGThwoDn11FPN0KFDzYknnmgyMzNNeXl5m/JDjwpDr776aoe/ATu7BAaE1NTUZjv8ejyeVu2Q3adPnxaHGzvrw7Mj27Xasr4EQ7cpt2Y5WPtzBdZvzQdzW3d6p1AoFErHldbsOxYbG2vOPfdcs337dmOMMZ9//rmZPn26vQ+wx+Mx6enp5qabbmrzSRddxrRjHBYAAKCHcMQ+QwAAAJEQhgAAgKMRhgAAgKMRhgAAgKMRhgAAgKMRhgAAgKMRhgAAgKMRhgAAgKMRhgAAgKMRhgAAgKMRhgAAgKMRhgAAgKP9f3U4QUt1BL2VAAAAAElFTkSuQmCC",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.countplot(x=[len(df.loc[i]['message']) for i in range(len(df))])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"we have 38695 words in our Dataframe\n",
"the average word count in every sentence is 15\n"
]
},
{
"data": {
"text/plain": [
"([14, 22, 19, 11, 17], 38695, 15)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"text_words_lengths = [len(df.loc[i]['message'].split()) for i in range(0, len(df))]\n",
"total_length = np.sum(text_words_lengths)\n",
"text_words_mean = int(np.mean(text_words_lengths))\n",
"print('we have ' + str(total_length) + ' words in our Dataframe')\n",
"print('the average word count in every sentence is ' + str(text_words_mean))\n",
"text_words_lengths[:5], total_length, text_words_mean"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "tf",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "86eece18b6898e5d361741678d0e9a4298e9b9ab2411f93d35b863e6e254e93a"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}