matrix-spam-ml/bert.ipynb

575 lines
116 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: \"model_1\"\n",
"__________________________________________________________________________________________________\n",
" Layer (type) Output Shape Param # Connected to \n",
"==================================================================================================\n",
" text (InputLayer) [(None,)] 0 [] \n",
" \n",
" preprocessing (KerasLayer) {'input_type_ids': 0 ['text[0][0]'] \n",
" (None, 128), \n",
" 'input_word_ids': \n",
" (None, 128), \n",
" 'input_mask': (Non \n",
" e, 128)} \n",
" \n",
" BERT_encoder (KerasLayer) {'sequence_output': 13548801 ['preprocessing[0][0]', \n",
" (None, 128, 256), 'preprocessing[0][1]', \n",
" 'encoder_outputs': 'preprocessing[0][2]'] \n",
" [(None, 128, 256), \n",
" (None, 128, 256), \n",
" (None, 128, 256), \n",
" (None, 128, 256), \n",
" (None, 128, 256), \n",
" (None, 128, 256), \n",
" (None, 128, 256), \n",
" (None, 128, 256), \n",
" (None, 128, 256), \n",
" (None, 128, 256), \n",
" (None, 128, 256), \n",
" (None, 128, 256)], \n",
" 'default': (None, \n",
" 256), \n",
" 'pooled_output': ( \n",
" None, 256)} \n",
" \n",
" dropout_1 (Dropout) (None, 256) 0 ['BERT_encoder[0][13]'] \n",
" \n",
" classifier (Dense) (None, 1) 257 ['dropout_1[0][0]'] \n",
" \n",
"==================================================================================================\n",
"Total params: 13,549,058\n",
"Trainable params: 13,549,057\n",
"Non-trainable params: 1\n",
"__________________________________________________________________________________________________\n",
"Training model with https://tfhub.dev/google/electra_small/2\n",
"Epoch 1/34\n",
"42/42 [==============================] - 37s 255ms/step - loss: 0.5633 - binary_accuracy: 0.7036 - val_loss: 0.3529 - val_binary_accuracy: 0.8482\n",
"Epoch 2/34\n",
"42/42 [==============================] - 10s 237ms/step - loss: 0.2114 - binary_accuracy: 0.9289 - val_loss: 0.0842 - val_binary_accuracy: 0.9773\n",
"Epoch 3/34\n",
"42/42 [==============================] - 10s 237ms/step - loss: 0.0651 - binary_accuracy: 0.9813 - val_loss: 0.0626 - val_binary_accuracy: 0.9791\n",
"Epoch 4/34\n",
"42/42 [==============================] - 10s 237ms/step - loss: 0.0376 - binary_accuracy: 0.9903 - val_loss: 0.0551 - val_binary_accuracy: 0.9843\n",
"Epoch 5/34\n",
"42/42 [==============================] - 10s 236ms/step - loss: 0.0199 - binary_accuracy: 0.9948 - val_loss: 0.0757 - val_binary_accuracy: 0.9773\n",
"Epoch 6/34\n",
"42/42 [==============================] - 10s 237ms/step - loss: 0.0143 - binary_accuracy: 0.9970 - val_loss: 0.0709 - val_binary_accuracy: 0.9773\n",
"Epoch 7/34\n",
"42/42 [==============================] - 10s 235ms/step - loss: 0.0080 - binary_accuracy: 0.9978 - val_loss: 0.0553 - val_binary_accuracy: 0.9843\n",
"Epoch 8/34\n",
"42/42 [==============================] - 10s 239ms/step - loss: 0.0042 - binary_accuracy: 0.9993 - val_loss: 0.0608 - val_binary_accuracy: 0.9860\n",
"Epoch 9/34\n",
"42/42 [==============================] - 10s 244ms/step - loss: 0.0027 - binary_accuracy: 1.0000 - val_loss: 0.0617 - val_binary_accuracy: 0.9860\n",
"Epoch 10/34\n",
"42/42 [==============================] - 10s 239ms/step - loss: 0.0018 - binary_accuracy: 1.0000 - val_loss: 0.0679 - val_binary_accuracy: 0.9843\n",
"Epoch 11/34\n",
"42/42 [==============================] - 10s 239ms/step - loss: 0.0014 - binary_accuracy: 1.0000 - val_loss: 0.0721 - val_binary_accuracy: 0.9843\n",
"Epoch 12/34\n",
"42/42 [==============================] - 10s 236ms/step - loss: 0.0011 - binary_accuracy: 1.0000 - val_loss: 0.0772 - val_binary_accuracy: 0.9843\n",
"Epoch 13/34\n",
"42/42 [==============================] - 10s 235ms/step - loss: 0.0026 - binary_accuracy: 0.9993 - val_loss: 0.0946 - val_binary_accuracy: 0.9756\n",
"Epoch 14/34\n",
"42/42 [==============================] - 10s 236ms/step - loss: 0.0048 - binary_accuracy: 0.9985 - val_loss: 0.0864 - val_binary_accuracy: 0.9791\n",
"Epoch 15/34\n",
"42/42 [==============================] - 10s 237ms/step - loss: 0.0021 - binary_accuracy: 1.0000 - val_loss: 0.0864 - val_binary_accuracy: 0.9791\n",
"Epoch 16/34\n",
"42/42 [==============================] - 10s 238ms/step - loss: 0.0011 - binary_accuracy: 1.0000 - val_loss: 0.0792 - val_binary_accuracy: 0.9825\n",
"Epoch 17/34\n",
"42/42 [==============================] - 10s 232ms/step - loss: 8.2108e-04 - binary_accuracy: 1.0000 - val_loss: 0.0830 - val_binary_accuracy: 0.9808\n",
"Epoch 18/34\n",
"42/42 [==============================] - 10s 235ms/step - loss: 7.7923e-04 - binary_accuracy: 1.0000 - val_loss: 0.0829 - val_binary_accuracy: 0.9825\n",
"Epoch 19/34\n",
"42/42 [==============================] - 10s 237ms/step - loss: 6.6255e-04 - binary_accuracy: 1.0000 - val_loss: 0.0806 - val_binary_accuracy: 0.9843\n",
"Epoch 20/34\n",
"42/42 [==============================] - 10s 238ms/step - loss: 6.3440e-04 - binary_accuracy: 1.0000 - val_loss: 0.0848 - val_binary_accuracy: 0.9825\n",
"Epoch 21/34\n",
"42/42 [==============================] - 10s 237ms/step - loss: 5.3386e-04 - binary_accuracy: 1.0000 - val_loss: 0.0854 - val_binary_accuracy: 0.9825\n",
"Epoch 22/34\n",
"42/42 [==============================] - 10s 236ms/step - loss: 5.6435e-04 - binary_accuracy: 1.0000 - val_loss: 0.0874 - val_binary_accuracy: 0.9825\n",
"Epoch 23/34\n",
"42/42 [==============================] - 10s 235ms/step - loss: 5.6810e-04 - binary_accuracy: 1.0000 - val_loss: 0.0868 - val_binary_accuracy: 0.9825\n",
"Epoch 24/34\n",
"42/42 [==============================] - 10s 235ms/step - loss: 4.5731e-04 - binary_accuracy: 1.0000 - val_loss: 0.0867 - val_binary_accuracy: 0.9825\n",
"Epoch 25/34\n",
"42/42 [==============================] - 10s 237ms/step - loss: 4.4671e-04 - binary_accuracy: 1.0000 - val_loss: 0.0877 - val_binary_accuracy: 0.9825\n",
"Epoch 26/34\n",
"42/42 [==============================] - 10s 237ms/step - loss: 4.9401e-04 - binary_accuracy: 1.0000 - val_loss: 0.0900 - val_binary_accuracy: 0.9825\n",
"Epoch 27/34\n",
"42/42 [==============================] - 10s 238ms/step - loss: 3.6857e-04 - binary_accuracy: 1.0000 - val_loss: 0.0903 - val_binary_accuracy: 0.9825\n",
"Epoch 28/34\n",
"42/42 [==============================] - 10s 235ms/step - loss: 3.8106e-04 - binary_accuracy: 1.0000 - val_loss: 0.0905 - val_binary_accuracy: 0.9825\n",
"Epoch 29/34\n",
"42/42 [==============================] - 10s 236ms/step - loss: 3.7847e-04 - binary_accuracy: 1.0000 - val_loss: 0.0910 - val_binary_accuracy: 0.9825\n",
"Epoch 30/34\n",
"42/42 [==============================] - 10s 236ms/step - loss: 3.7153e-04 - binary_accuracy: 1.0000 - val_loss: 0.0911 - val_binary_accuracy: 0.9825\n",
"Epoch 31/34\n",
"42/42 [==============================] - 10s 237ms/step - loss: 3.4990e-04 - binary_accuracy: 1.0000 - val_loss: 0.0913 - val_binary_accuracy: 0.9825\n",
"Epoch 32/34\n",
"42/42 [==============================] - 10s 236ms/step - loss: 3.6358e-04 - binary_accuracy: 1.0000 - val_loss: 0.0911 - val_binary_accuracy: 0.9825\n",
"Epoch 33/34\n",
"42/42 [==============================] - 10s 236ms/step - loss: 3.5723e-04 - binary_accuracy: 1.0000 - val_loss: 0.0910 - val_binary_accuracy: 0.9825\n",
"Epoch 34/34\n",
"42/42 [==============================] - 10s 234ms/step - loss: 3.7230e-04 - binary_accuracy: 1.0000 - val_loss: 0.0915 - val_binary_accuracy: 0.9825\n",
"18/18 [==============================] - 1s 64ms/step - loss: 0.0915 - binary_accuracy: 0.9825\n",
"Loss: 0.09151919186115265\n",
"Accuracy: 0.9825479984283447\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:absl:Found untraced functions such as restored_function_body, restored_function_body, restored_function_body, restored_function_body, restored_function_body while saving (showing 5 of 360). These functions will not be directly callable after loading.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: ./bert_models/1670690385.641988/assets\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: ./bert_models/1670690385.641988/assets\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Message: \"Greg, can you call me back once you get this?\"\n",
"Likeliness of spam in percentage: 0.000258\n",
"Vote by AI: Not Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"Congrats on your new iPhone! Click here to claim your prize...\"\n",
"Likeliness of spam in percentage: 0.858939\n",
"Vote by AI: Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"Really like that new photo of you\"\n",
"Likeliness of spam in percentage: 0.007752\n",
"Vote by AI: Not Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"Did you hear the news today? Terrible what has happened...\"\n",
"Likeliness of spam in percentage: 0.000284\n",
"Vote by AI: Not Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"Attend this free COVID webinar today: Book your session now...\"\n",
"Likeliness of spam in percentage: 0.998953\n",
"Vote by AI: Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"Are you coming to the party tonight?\"\n",
"Likeliness of spam in percentage: 0.007384\n",
"Vote by AI: Not Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"Your parcel has gone missing\"\n",
"Likeliness of spam in percentage: 0.003767\n",
"Vote by AI: Not Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"Do not forget to bring friends!\"\n",
"Likeliness of spam in percentage: 0.000505\n",
"Vote by AI: Not Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"You have won a million dollars! Fill out your bank details here...\"\n",
"Likeliness of spam in percentage: 0.947860\n",
"Vote by AI: Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"Looking forward to seeing you again\"\n",
"Likeliness of spam in percentage: 0.152917\n",
"Vote by AI: Not Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"oh wow https://github.com/MGCodesandStats/tensorflow-nlp/blob/master/spam%20detection%20tensorflow%20v2.ipynb works really good on spam detection. Guess I go with that as the base model then lol :D\"\n",
"Likeliness of spam in percentage: 0.999562\n",
"Vote by AI: Spam\n",
"Model failed to predict correctly\n",
"\n",
"\n",
"Message: \"ayo\"\n",
"Likeliness of spam in percentage: 0.027977\n",
"Vote by AI: Not Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"Almost all my spam is coming to my non-gmail address actually\"\n",
"Likeliness of spam in percentage: 0.057861\n",
"Vote by AI: Not Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"Oh neat I think I found the sizing sweetspot for my data :D\"\n",
"Likeliness of spam in percentage: 0.000136\n",
"Vote by AI: Not Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"would never click on buttons in gmail :D always expecting there to be a bug in gmail that allows js to grab your google credentials :D XSS via email lol. I am too scared for touching spam in gmail\"\n",
"Likeliness of spam in percentage: 0.212154\n",
"Vote by AI: Not Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"back to cacophony \"\n",
"Likeliness of spam in percentage: 0.066419\n",
"Vote by AI: Not Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"Room version 11 when\"\n",
"Likeliness of spam in percentage: 0.944931\n",
"Vote by AI: Spam\n",
"Model failed to predict correctly\n",
"\n",
"\n",
"Message: \"skip 11 and go straight to 12\"\n",
"Likeliness of spam in percentage: 0.006444\n",
"Vote by AI: Not Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"100 events should clear out any events that might be causing a request to fail lol\"\n",
"Likeliness of spam in percentage: 0.019123\n",
"Vote by AI: Not Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"Message: \"I'll help anyone interested on how to invest and earn $30k, $50k, $100k, $200k or more in just 72hours from the crypto market.But you will have to pay me my commission! when you receive your profit! if interested send me a direct message let's get started or via WhatsApp +1 (605) 9536801\"\n",
"Likeliness of spam in percentage: 0.999778\n",
"Vote by AI: Spam\n",
"Model predicted correctly\n",
"\n",
"\n",
"18 out of 20 are detected correctly\n",
"\n"
]
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1000x600 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"!pip3 install --quiet tensorflow-text numpy pandas tf-models-official\n",
"\n",
"import csv\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"import tensorflow_hub as hub\n",
"import pandas as pd\n",
"import tensorflow as tf\n",
"import tensorflow_hub as hub\n",
"import tensorflow_models as tfm\n",
"#from official.nlp import optimization # to create AdamW optimizer\n",
"import tensorflow_text as text # needed even if unused\n",
"import time\n",
"\n",
"import matplotlib.pyplot as plt\n",
"\n",
"def change_labels(x):\n",
" return 1 if x == \"spam\" else 0\n",
"\n",
"data = pd.read_csv(\n",
" \"./input/MatrixData.tsv\", sep=\"\\t\", quoting=csv.QUOTE_NONE, encoding=\"utf-8\"\n",
")\n",
"\n",
"# Minimum length\n",
"data = data[data[\"message\"].str.split().str.len().gt(18)]\n",
"# Remove unknown\n",
"data.dropna(inplace=True)\n",
"data.reset_index(drop=True, inplace=True)\n",
"data[\"label\"] = data[\"label\"].apply(change_labels)\n",
"\n",
"# Remove stopwords\n",
"#data[\"message\"] = data[\"message\"].apply(remove_stopwords)\n",
"# Shuffle data\n",
"data = data.sample(frac=1).reset_index(drop=True)\n",
"\n",
"# Split data into messages and label sets\n",
"sentences = data[\"message\"].tolist()\n",
"labels = data[\"label\"].tolist()\n",
"\n",
"# Separate out the sentences and labels into training and test sets\n",
"# training_size = int(len(sentences) * 0.8)\n",
"training_size = int(len(sentences) * 0.7)\n",
"training_sentences = sentences[0:training_size]\n",
"testing_sentences = sentences[training_size:]\n",
"training_labels = labels[0:training_size]\n",
"testing_labels = labels[training_size:]\n",
"\n",
"# Make labels into numpy arrays for use with the network later\n",
"test_labels = np.array(testing_labels)\n",
"train_labels = np.array(training_labels)\n",
"train_examples = np.array(training_sentences)\n",
"test_examples = np.array(testing_sentences)\n",
"\n",
"# Build dataset\n",
"AUTOTUNE = tf.data.AUTOTUNE\n",
"batch_size = 32\n",
"\n",
"raw_train_ds = tf.data.Dataset.from_tensor_slices((train_examples,train_labels))\n",
"train_ds = raw_train_ds.batch(batch_size).cache().prefetch(buffer_size=AUTOTUNE)\n",
"\n",
"raw_val_ds = tf.data.Dataset.from_tensor_slices((test_examples,test_labels))\n",
"val_ds = raw_val_ds.batch(batch_size).cache().prefetch(buffer_size=AUTOTUNE)\n",
"test_ds = raw_val_ds.batch(batch_size).cache().prefetch(buffer_size=AUTOTUNE)\n",
"\n",
"# Load the BERT encoder and preprocessing models\n",
"# Alternative https://tfhub.dev/google/electra_small/2\n",
"tfhub_handle_preprocess = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'\n",
"tfhub_handle_encoder = 'https://tfhub.dev/google/electra_small/2'\n",
"\n",
"def build_classifier_model():\n",
" text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')\n",
" preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')\n",
" encoder_inputs = preprocessing_layer(text_input)\n",
" encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')\n",
" outputs = encoder(encoder_inputs)\n",
" net = outputs['pooled_output']\n",
" net = tf.keras.layers.Dropout(0.1)(net)\n",
" net = tf.keras.layers.Dense(1, activation=None, name='classifier')(net)\n",
" return tf.keras.Model(text_input, net)\n",
"\n",
"\n",
"classifier_model = build_classifier_model()\n",
"classifier_model.summary()\n",
"# bert_raw_result = classifier_model(tf.constant(sentences))\n",
"# print(tf.sigmoid(bert_raw_result))\n",
"tf.keras.utils.plot_model(classifier_model, show_dtype=True)\n",
"\n",
"loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)\n",
"metrics = tf.metrics.BinaryAccuracy()\n",
"\n",
"epochs = 34\n",
"steps_per_epoch = tf.data.experimental.cardinality(train_ds).numpy()\n",
"num_train_steps = steps_per_epoch * epochs\n",
"num_warmup_steps = int(0.1*num_train_steps)\n",
"\n",
"init_lr = 3e-5\n",
"linear_decay = tf.keras.optimizers.schedules.PolynomialDecay(\n",
" initial_learning_rate=init_lr,\n",
" end_learning_rate=0,\n",
" decay_steps=num_train_steps)\n",
"warmup_schedule = tfm.optimization.lr_schedule.LinearWarmup(\n",
" warmup_learning_rate = 0,\n",
" after_warmup_lr_sched = linear_decay,\n",
" warmup_steps = num_warmup_steps\n",
")\n",
"x = tf.linspace(0, num_train_steps, 1001)\n",
"y = [warmup_schedule(xi) for xi in x]\n",
"plt.plot(x,y)\n",
"plt.xlabel('Train step')\n",
"plt.ylabel('Learning rate')\n",
"\n",
"\n",
"\n",
"#optimizer = optimization.create_optimizer(init_lr=init_lr,\n",
"# num_train_steps=num_train_steps,\n",
"# num_warmup_steps=num_warmup_steps,\n",
"# optimizer_type='adamw')\n",
"optimizer = tf.keras.optimizers.experimental.Adam(\n",
" learning_rate = warmup_schedule)\n",
"\n",
"\n",
"classifier_model.compile(optimizer=optimizer,\n",
" loss=loss,\n",
" metrics=metrics)\n",
"\n",
"print(f'Training model with {tfhub_handle_encoder}')\n",
"history = classifier_model.fit(x=train_ds,\n",
" validation_data=val_ds,\n",
" epochs=epochs)\n",
"\n",
"loss, accuracy = classifier_model.evaluate(test_ds)\n",
"\n",
"print(f'Loss: {loss}')\n",
"print(f'Accuracy: {accuracy}')\n",
"\n",
"\n",
"history_dict = history.history\n",
"\n",
"acc = history_dict['binary_accuracy']\n",
"val_acc = history_dict['val_binary_accuracy']\n",
"loss = history_dict['loss']\n",
"val_loss = history_dict['val_loss']\n",
"\n",
"epochs = range(1, len(acc) + 1)\n",
"fig = plt.figure(figsize=(10, 6))\n",
"fig.tight_layout()\n",
"\n",
"plt.subplot(2, 1, 1)\n",
"# r is for \"solid red line\"\n",
"plt.plot(epochs, loss, 'r', label='Training loss')\n",
"# b is for \"solid blue line\"\n",
"plt.plot(epochs, val_loss, 'b', label='Validation loss')\n",
"plt.title('Training and validation loss')\n",
"# plt.xlabel('Epochs')\n",
"plt.ylabel('Loss')\n",
"plt.legend()\n",
"\n",
"plt.subplot(2, 1, 2)\n",
"plt.plot(epochs, acc, 'r', label='Training acc')\n",
"plt.plot(epochs, val_acc, 'b', label='Validation acc')\n",
"plt.title('Training and validation accuracy')\n",
"plt.xlabel('Epochs')\n",
"plt.ylabel('Accuracy')\n",
"plt.legend(loc='lower right')\n",
"\n",
"\n",
"saved_model_path = f'./bert_models/{time.time()}'\n",
"\n",
"classifier_model.save(saved_model_path, include_optimizer=False)\n",
"\n",
"def test_model(model):\n",
" # Use the model to predict whether a message is spam\n",
" text_messages = [\n",
" \"Greg, can you call me back once you get this?\",\n",
" \"Congrats on your new iPhone! Click here to claim your prize...\",\n",
" \"Really like that new photo of you\",\n",
" \"Did you hear the news today? Terrible what has happened...\",\n",
" \"Attend this free COVID webinar today: Book your session now...\",\n",
" \"Are you coming to the party tonight?\",\n",
" \"Your parcel has gone missing\",\n",
" \"Do not forget to bring friends!\",\n",
" \"You have won a million dollars! Fill out your bank details here...\",\n",
" \"Looking forward to seeing you again\",\n",
" \"oh wow https://github.com/MGCodesandStats/tensorflow-nlp/blob/master/spam%20detection%20tensorflow%20v2.ipynb works really good on spam detection. Guess I go with that as the base model then lol :D\",\n",
" \"ayo\",\n",
" \"Almost all my spam is coming to my non-gmail address actually\",\n",
" \"Oh neat I think I found the sizing sweetspot for my data :D\",\n",
" \"would never click on buttons in gmail :D always expecting there to be a bug in gmail that allows js to grab your google credentials :D XSS via email lol. I am too scared for touching spam in gmail\",\n",
" \"back to cacophony \",\n",
" \"Room version 11 when\",\n",
" \"skip 11 and go straight to 12\",\n",
" \"100 events should clear out any events that might be causing a request to fail lol\",\n",
" \"I'll help anyone interested on how to invest and earn $30k, $50k, $100k, $200k or more in just 72hours from the crypto market.But you will have to pay me my commission! when you receive your profit! if interested send me a direct message let's get started or via WhatsApp +1 (605) 9536801\",\n",
" ]\n",
"\n",
" spam_no_spam = [\n",
" False,\n",
" True,\n",
" False,\n",
" False,\n",
" True,\n",
" False,\n",
" False,\n",
" False,\n",
" True,\n",
" False,\n",
" False,\n",
" False,\n",
" False,\n",
" False,\n",
" False,\n",
" False,\n",
" False,\n",
" False,\n",
" False,\n",
" True,\n",
" ]\n",
"\n",
" # print(text_messages)\n",
"\n",
" # Create the sequences\n",
" results = tf.sigmoid(model(tf.constant(text_messages)))\n",
"\n",
" # The closer the class is to 1, the more likely that the message is spam\n",
" correct = 0\n",
" expected = len(spam_no_spam)\n",
" for x in range(len(text_messages)):\n",
" print(f'Message: \"{text_messages[x]}\"')\n",
" print(f\"Likeliness of spam in percentage: {results[x][0]:.6f}\")\n",
" spam = results[x][0] >= 0.8\n",
" if spam:\n",
" print(\"Vote by AI: Spam\")\n",
" else:\n",
" print(\"Vote by AI: Not Spam\")\n",
"\n",
" if spam_no_spam[x] != spam:\n",
" print(\"Model failed to predict correctly\")\n",
" else:\n",
" correct = correct+1\n",
" print(\"Model predicted correctly\")\n",
" print(\"\\n\")\n",
" print(f\"{correct} out of {expected} are detected correctly\\n\")\n",
"\n",
"\n",
"test_model(classifier_model)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.15 ('tf')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "86eece18b6898e5d361741678d0e9a4298e9b9ab2411f93d35b863e6e254e93a"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}