matrix-spam-ml/spam-keras.ipynb

675 lines
141 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Spam Model using Keras\n",
"## Imports"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-09-26 11:49:22.756897: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2022-09-26 11:49:23.682758: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
"2022-09-26 11:49:25.265522: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/marcel/libtorch/lib:\n",
"2022-09-26 11:49:25.265770: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/marcel/libtorch/lib:\n",
"2022-09-26 11:49:25.265778: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n"
]
}
],
"source": [
"import keras_tuner as kt\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import tensorflow as tf\n",
"import tensorflow_addons as tfa\n",
"from nltk.corpus import stopwords\n",
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
"from tensorflow.keras.preprocessing.text import Tokenizer"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Import dataset and normalize"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"label 0 \\\n",
"message count 4845 \n",
" unique 4533 \n",
" top Sorry, I'll call later \n",
" freq 30 \n",
"\n",
"label 1 \n",
"message count 750 \n",
" unique 644 \n",
" top Please call customer service representative FR... \n",
" freq 4 \n",
"label 0 \\\n",
"message count 4845 \n",
" unique 4533 \n",
" top Sorry, I'll call later \n",
" freq 30 \n",
"\n",
"label 1 \n",
"message count 750 \n",
" unique 644 \n",
" top Please call customer service representative FR... \n",
" freq 4 \n"
]
}
],
"source": [
"# Read data\n",
"data = pd.read_csv('./input/MatrixData', sep='\\t')\n",
"\n",
"\n",
"def remove_stopwords(input_text):\n",
" '''\n",
" Function to remove English stopwords from a Pandas Series.\n",
" \n",
" Parameters:\n",
" input_text : text to clean\n",
" Output:\n",
" cleaned Pandas Series \n",
" '''\n",
" stopwords_list = stopwords.words('english')\n",
" # Some words which might indicate a certain sentiment are kept via a whitelist\n",
" whitelist = [\"n't\", \"not\", \"no\"]\n",
" words = input_text.split()\n",
" clean_words = [word for word in words if (\n",
" word not in stopwords_list or word in whitelist) and len(word) > 1]\n",
" return \" \".join(clean_words)\n",
"\n",
"# Remve unknown\n",
"data.dropna(inplace=True)\n",
"\n",
"# Convert label to something useful\n",
"def change_labels(x): return 1 if x == \"spam\" else 0\n",
"data['label'] = data['label'].apply(change_labels) \n",
"\n",
"# Count by label\n",
"spam = 0\n",
"ham = 0\n",
"\n",
"\n",
"def count_labels(x):\n",
" if x == 1:\n",
" global spam\n",
" spam += 1\n",
" else:\n",
" global ham\n",
" ham += 1\n",
" return x\n",
"#.apply(count_labels)\n",
"#print(\"Spam: \", spam)\n",
"#print(\"Ham: \", ham)\n",
"\n",
"# Remove stopwords\n",
"data['message'] = data['message'].apply(\n",
" remove_stopwords)\n",
"\n",
"# Print unbalanced\n",
"print(data.groupby('label').describe().T)\n",
"\n",
"\n",
"#ham_msg = data[data.label == 0]\n",
"#spam_msg = data[data.label == 1]\n",
"\n",
"#randomly taking data from ham_msg\n",
"#ham_msg = ham_msg.sample(n=len(spam_msg)*2, random_state=42)\n",
"\n",
"#data = pd.concat([ham_msg, spam_msg]).reset_index(drop=True)\n",
"\n",
"# Balanced\n",
"print(data.groupby('label').describe().T)\n",
"\n",
"# Shuffle data\n",
"data = data.sample(frac=1).reset_index(drop=True)\n",
"\n",
"# Split data into messages and label sets\n",
"sentences = data['message'].tolist()\n",
"labels = data['label'].tolist()\n",
"\n",
"# Separate out the sentences and labels into training and test sets\n",
"#training_size = int(len(sentences) * 0.8)\n",
"training_size = int(len(sentences) * 0.7)\n",
"training_sentences = sentences[0:training_size]\n",
"testing_sentences = sentences[training_size:]\n",
"training_labels = labels[0:training_size]\n",
"testing_labels = labels[training_size:]\n",
"\n",
"# Make labels into numpy arrays for use with the network later\n",
"training_labels_final = np.array(training_labels)\n",
"testing_labels_final = np.array(testing_labels)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Tokenize"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"vocab_size = 1000\n",
"embedding_dim = 16\n",
"#embedding_dim = 32\n",
"#max_length = 120\n",
"max_length = None\n",
"trunc_type = 'post'\n",
"padding_type = 'post'\n",
"oov_tok = \"<OOV>\"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)\n",
"\n",
"tokenizer.fit_on_texts(training_sentences)\n",
"word_index = tokenizer.word_index\n",
"\n",
"sequences = tokenizer.texts_to_sequences(training_sentences)\n",
"padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type,\n",
" truncating=trunc_type)\n",
"\n",
"testing_sequences = tokenizer.texts_to_sequences(testing_sentences)\n",
"testing_padded = pad_sequences(testing_sequences, maxlen=max_length,\n",
" padding=padding_type, truncating=trunc_type)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Model"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime\n",
"\n",
"from tensorflow import keras\n",
"\n",
"logdir = \"logs/scalars/\" + datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n",
"tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)\n",
"\n",
"hypermodel_logdir = \"logs/scalars/\" + datetime.now().strftime(\"%Y%m%d-%H%M%S\") + \"_hypermodel\"\n",
"hypermodel_tensorboard_callback = keras.callbacks.TensorBoard(\n",
" log_dir=hypermodel_logdir)\n",
"\n",
"hypertuner_logdir = \"hypertuner_logs/scalars/\" + datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n",
"hypertuner_tensorboard_callback = keras.callbacks.TensorBoard(\n",
" log_dir=hypertuner_logdir)\n",
"# Define the checkpoint directory to store the checkpoints.\n",
"checkpoint_dir = './training_checkpoints'\n",
"# Define the name of the checkpoint files.\n",
"checkpoint_prefix = os.path.join(checkpoint_dir, \"ckpt_{epoch}\")\n",
"\n",
"#es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def model_builder(hp):\n",
" # Tune the number of units in the first Dense layer\n",
" # Choose an optimal value between 6-512\n",
" hp_units = hp.Int('units', min_value=6, max_value=512, step=12)\n",
" hp_dropout = hp.Float('dropout', min_value=.1, max_value=.9, step=.01)\n",
" hp_l2 = hp.Float('l2', min_value=0.0001, max_value=0.001, step=0.0001)\n",
" model = tf.keras.Sequential([\n",
" tf.keras.layers.Embedding(\n",
" vocab_size, embedding_dim, input_length=max_length),\n",
" tf.keras.layers.GlobalAveragePooling1D(),\n",
" tf.keras.layers.Dropout(hp_dropout,),\n",
" tf.keras.layers.Dense(units=hp_units, activation='relu',\n",
" kernel_regularizer=tf.keras.regularizers.l2(hp_l2)),\n",
" #tf.keras.layers.Dense(6, activation='relu',\n",
" # kernel_regularizer=tf.keras.regularizers.l2(0.0001)),\n",
" tf.keras.layers.Dropout(hp_dropout,),\n",
" tf.keras.layers.Dense(1, activation='sigmoid')\n",
" ])\n",
" # Adam was best so far\n",
" # tf.keras.optimizers.Nadam() has similar results to Adam but a bit worse. second best\n",
" hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5,])\n",
" opt = tf.keras.optimizers.Adam(learning_rate=hp_learning_rate)\n",
" # opt = tf.keras.optimizers.Nadam()\n",
" model.compile(loss=tf.keras.losses.BinaryCrossentropy(),\n",
" optimizer=opt, metrics=['accuracy'])\n",
" #print(model.summary())\n",
"\n",
" return model\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Get best hypermodel values"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Reloading Oracle from existing project hyper_tuning/spam-keras/oracle.json\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-09-26 11:49:32.695952: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node\n",
"Your kernel may have been built without NUMA support.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Reloading Tuner from hyper_tuning/spam-keras/tuner0.json\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-09-26 11:49:32.820214: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node\n",
"Your kernel may have been built without NUMA support.\n",
"2022-09-26 11:49:32.820285: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node\n",
"Your kernel may have been built without NUMA support.\n",
"2022-09-26 11:49:32.822176: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2022-09-26 11:49:32.822979: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node\n",
"Your kernel may have been built without NUMA support.\n",
"2022-09-26 11:49:32.823077: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node\n",
"Your kernel may have been built without NUMA support.\n",
"2022-09-26 11:49:32.823130: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node\n",
"Your kernel may have been built without NUMA support.\n",
"2022-09-26 11:49:34.550694: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node\n",
"Your kernel may have been built without NUMA support.\n",
"2022-09-26 11:49:34.550929: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node\n",
"Your kernel may have been built without NUMA support.\n",
"2022-09-26 11:49:34.550941: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1700] Could not identify NUMA node of platform GPU id 0, defaulting to 0. Your kernel may not have been built with NUMA support.\n",
"2022-09-26 11:49:34.551039: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node\n",
"Your kernel may have been built without NUMA support.\n",
"2022-09-26 11:49:34.551613: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5904 MB memory: -> device: 0, name: NVIDIA GeForce RTX 2080 SUPER, pci bus id: 0000:09:00.0, compute capability: 7.5\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Oracle triggered exit\n",
"\n",
"The hyperparameter search is complete. The optimal number of units in the first densely-connected\n",
"layer is 330 and the optimal learning rate for the optimizer is 0.01.\n",
"The optimal dropout rate is 0.5499999999999998 and the optimal l2 rate is 0.0001.\n",
"\n"
]
}
],
"source": [
"tuner = kt.Hyperband(model_builder,\n",
" objective='val_accuracy',\n",
" max_epochs=750,\n",
" factor=3,\n",
" directory='hyper_tuning',\n",
" project_name='spam-keras')\n",
"stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)\n",
"tuner.search(padded, training_labels_final, epochs=800, verbose=0,\n",
" validation_data=(testing_padded, testing_labels_final), callbacks=[hypertuner_tensorboard_callback, stop_early])\n",
"# Get the optimal hyperparameters\n",
"best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]\n",
"\n",
"print(f\"\"\"\n",
"The hyperparameter search is complete. The optimal number of units in the first densely-connected\n",
"layer is {best_hps.get('units')} and the optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.\n",
"The optimal dropout rate is {best_hps.get('dropout')} and the optimal l2 rate is {best_hps.get('l2')}.\n",
"\"\"\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Train"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best epoch: 6\n",
"Average train loss: 0.006002985829836689\n",
"Average test loss: 0.22046395140389602\n",
"Average train loss(hypermodel_history): 0.10212815863390763\n",
"Average test loss(hypermodel_history): 0.08288264522949855\n"
]
}
],
"source": [
"num_epochs = 200\n",
"model = tuner.hypermodel.build(best_hps)\n",
"history = model.fit(padded, \n",
" training_labels_final, \n",
" epochs=num_epochs, \n",
" verbose=0, \n",
" callbacks=[tensorboard_callback,],\n",
" validation_data=(testing_padded, testing_labels_final))\n",
"\n",
"\n",
"val_acc_per_epoch = history.history['val_accuracy']\n",
"best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1\n",
"print('Best epoch: %d' % (best_epoch,))\n",
"print(\"Average train loss: \", np.average(history.history['loss']))\n",
"print(\"Average test loss: \", np.average(history.history['val_loss']))\n",
"\n",
"hypermodel = tuner.hypermodel.build(best_hps)\n",
"hypermodel_history = hypermodel.fit(padded, training_labels_final, verbose=0,\n",
" epochs=best_epoch, validation_split=0.2,\n",
" callbacks=[hypermodel_tensorboard_callback,\n",
" tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix,\n",
" save_weights_only=True),\n",
" #es_callback\n",
" ],\n",
" )\n",
"\n",
"print(\"Average train loss(hypermodel_history): \",\n",
" np.average(hypermodel_history.history['loss']))\n",
"print(\"Average test loss(hypermodel_history): \",\n",
" np.average(hypermodel_history.history['val_loss']))\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: ./models/spam_keras_1664186466.3541195/assets\n"
]
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Save model\n",
"import time\n",
"\n",
"hypermodel.save(f\"./models/spam_keras_{time.time()}\")\n",
"\n",
"# summarize history for accuracy\n",
"plt.plot(history.history['loss'])\n",
"plt.plot(history.history['val_loss'])\n",
"plt.title('model loss')\n",
"plt.ylabel('loss')\n",
"plt.xlabel('epoch')\n",
"plt.legend(['train', 'val'], loc='upper left')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# summarize history for accuracy\n",
"plt.plot(hypermodel_history.history['loss'])\n",
"plt.plot(hypermodel_history.history['val_loss'])\n",
"plt.title('model loss')\n",
"plt.ylabel('loss')\n",
"plt.xlabel('epoch')\n",
"plt.legend(['train', 'val'], loc='upper left')\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1/1 [==============================] - 0s 73ms/step\n",
"Message: \"Greg, can you call me back once you get this?\"\n",
"Likeliness of spam in percentage: 0.7976732850074768\n",
"\n",
"\n",
"Message: \"Congrats on your new iPhone! Click here to claim your prize...\"\n",
"Likeliness of spam in percentage: 0.9998524188995361\n",
"\n",
"\n",
"Message: \"Really like that new photo of you\"\n",
"Likeliness of spam in percentage: 0.008655842393636703\n",
"\n",
"\n",
"Message: \"Did you hear the news today? Terrible what has happened...\"\n",
"Likeliness of spam in percentage: 0.0016309074126183987\n",
"\n",
"\n",
"Message: \"Attend this free COVID webinar today: Book your session now...\"\n",
"Likeliness of spam in percentage: 0.9975067973136902\n",
"\n",
"\n",
"Message: \"Are you coming to the party tonight?\"\n",
"Likeliness of spam in percentage: 0.00012246571714058518\n",
"\n",
"\n",
"Message: \"Your parcel has gone missing\"\n",
"Likeliness of spam in percentage: 0.0040820869617164135\n",
"\n",
"\n",
"Message: \"Do not forget to bring friends!\"\n",
"Likeliness of spam in percentage: 0.013192469254136086\n",
"\n",
"\n",
"Message: \"You have won a million dollars! Fill out your bank details here...\"\n",
"Likeliness of spam in percentage: 0.1410737931728363\n",
"\n",
"\n",
"Message: \"Looking forward to seeing you again\"\n",
"Likeliness of spam in percentage: 0.01070433109998703\n",
"\n",
"\n",
"Message: \"oh wow https://github.com/MGCodesandStats/tensorflow-nlp/blob/master/spam%20detection%20tensorflow%20v2.ipynb works really good on spam detection. Guess I go with that as the base model then lol :D\"\n",
"Likeliness of spam in percentage: 0.0005995486862957478\n",
"\n",
"\n",
"Message: \"ayo\"\n",
"Likeliness of spam in percentage: 0.002324814209714532\n",
"\n",
"\n",
"Message: \"Almost all my spam is coming to my non-gmail address actually\"\n",
"Likeliness of spam in percentage: 2.6484692625672324e-06\n",
"\n",
"\n",
"Message: \"Oh neat I think I found the sizing sweetspot for my data :D\"\n",
"Likeliness of spam in percentage: 5.796300683869049e-05\n",
"\n",
"\n",
"Message: \"would never click on buttons in gmail :D always expecting there to be a bug in gmail that allows js to grab your google credentials :D XSS via email lol. I am too scared for touching spam in gmail\"\n",
"Likeliness of spam in percentage: 0.41252583265304565\n",
"\n",
"\n",
"Message: \"back to cacophony \"\n",
"Likeliness of spam in percentage: 0.3257969617843628\n",
"\n",
"\n",
"Message: \"Room version 11 when\"\n",
"Likeliness of spam in percentage: 0.00024024260346777737\n",
"\n",
"\n",
"Message: \"skip 11 and go straight to 12\"\n",
"Likeliness of spam in percentage: 0.038723770529031754\n",
"\n",
"\n",
"Message: \"100 events should clear out any events that might be causing a request to fail lol\"\n",
"Likeliness of spam in percentage: 0.0003453373210504651\n",
"\n",
"\n"
]
}
],
"source": [
"# Use the model to predict whether a message is spam\n",
"text_messages = ['Greg, can you call me back once you get this?',\n",
" 'Congrats on your new iPhone! Click here to claim your prize...',\n",
" 'Really like that new photo of you',\n",
" 'Did you hear the news today? Terrible what has happened...',\n",
" 'Attend this free COVID webinar today: Book your session now...',\n",
" 'Are you coming to the party tonight?',\n",
" 'Your parcel has gone missing',\n",
" 'Do not forget to bring friends!',\n",
" 'You have won a million dollars! Fill out your bank details here...',\n",
" 'Looking forward to seeing you again',\n",
" 'oh wow https://github.com/MGCodesandStats/tensorflow-nlp/blob/master/spam%20detection%20tensorflow%20v2.ipynb works really good on spam detection. Guess I go with that as the base model then lol :D',\n",
" 'ayo',\n",
" 'Almost all my spam is coming to my non-gmail address actually',\n",
" 'Oh neat I think I found the sizing sweetspot for my data :D',\n",
" 'would never click on buttons in gmail :D always expecting there to be a bug in gmail that allows js to grab your google credentials :D XSS via email lol. I am too scared for touching spam in gmail',\n",
" 'back to cacophony ',\n",
" 'Room version 11 when',\n",
" 'skip 11 and go straight to 12',\n",
" '100 events should clear out any events that might be causing a request to fail lol']\n",
"\n",
"#print(text_messages)\n",
"\n",
"# Create the sequences\n",
"padding_type = 'post'\n",
"sample_sequences = tokenizer.texts_to_sequences(text_messages)\n",
"fakes_padded = pad_sequences(\n",
" sample_sequences, padding=padding_type, maxlen=max_length)\n",
"\n",
"classes = hypermodel.predict(fakes_padded)\n",
"\n",
"# The closer the class is to 1, the more likely that the message is spam\n",
"for x in range(len(text_messages)):\n",
" print(f\"Message: \\\"{text_messages[x]}\\\"\")\n",
" print(f\"Likeliness of spam in percentage: {classes[x][0]}\")\n",
" print('\\n')\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tf.keras.utils.plot_model(model, rankdir=\"LR\", show_shapes=True)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.6 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}