Add small bot

This commit is contained in:
MTRNord 2022-12-10 19:02:35 +01:00
parent 6328a8b0e4
commit df29ac59f4
11 changed files with 5226 additions and 199 deletions

6
.gitignore vendored
View File

@ -6,4 +6,8 @@
/wandb
/data
/conda
/model.png
/model.png
node_modules
ml-bot.json
config.yaml
ml-bot-store

View File

@ -12,8 +12,6 @@ The current code base is fast moving. Expect to change rapidly.
To train the model, you need to have a set of labeled data.
This data is at `./input/MatrixData`. It is a TSV file.
Please note that URLs should not be added as well as newlines.
Newlines will be stripped anyway and URLs tend to break the model result.
To train the model, run `python3 model_v2.py`. This will train the model and save it to `./model/`.
Please make sure you installed tensorflow.

File diff suppressed because one or more lines are too long

33
bot/dist/index.js vendored Normal file
View File

@ -0,0 +1,33 @@
import { MatrixClient, SimpleFsStorageProvider, AutojoinRoomsMixin, MessageEvent, } from "matrix-bot-sdk";
import { readFile } from "fs/promises";
import { load } from "js-yaml";
import * as tf from "@tensorflow/tfjs-node";
const config = load(await readFile("./config.yaml", "utf8"));
const homeserverUrl = config.homeserver;
const accessToken = config.accessToken;
const storage = new SimpleFsStorageProvider("ml-bot.json");
const model = await tf.node.loadSavedModel(config.modelPath);
const client = new MatrixClient(homeserverUrl, accessToken, storage);
AutojoinRoomsMixin.setupOnClient(client);
client.on("room.message", handleMessage);
client.start().then(() => console.log("Bot started!"));
async function handleMessage(roomId, event) {
if (event['content']?.['msgtype'] !== 'm.text')
return;
if (event['sender'] === await client.getUserId())
return;
const body = event['content']['body'];
console.log(`Checking: "${body}"`);
const data = tf.tensor([body]);
const prediction = model.predict(data);
const prediction_data = await prediction.array();
console.log(`Prediction: ${prediction_data}`);
const message = new MessageEvent(event);
const textEvent = new MessageEvent(message.raw);
if (((prediction_data[0] ?? [])[0] ?? 0) > 0.8) {
await client.unstableApis.addReactionToEvent(roomId, textEvent.eventId, "Classified Spam");
}
else {
}
}
//# sourceMappingURL=index.js.map

1
bot/dist/index.js.map vendored Normal file
View File

@ -0,0 +1 @@
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACH,YAAY,EACZ,uBAAuB,EACvB,kBAAkB,EAClB,YAAY,GAGf,MAAM,gBAAgB,CAAC;AACxB,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAC/B,OAAO,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAS5C,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC,CAAW,CAAC;AAKvE,MAAM,aAAa,GAAG,MAAM,CAAC,UAAU,CAAC;AAGxC,MAAM,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC;AAKvC,MAAM,OAAO,GAAG,IAAI,uBAAuB,CAAC,aAAa,CAAC,CAAC;AAI3D,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;AAK7D,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,aAAa,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;AACrE,kBAAkB,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;AAGzC,MAAM,CAAC,EAAE,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;AAGzC,MAAM,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,CAAC;AAGvD,KAAK,UAAU,aAAa,CAAC,MAAc,EAAE,KAAU;IAEnD,IAAI,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC,SAAS,CAAC,KAAK,QAAQ;QAAE,OAAO;IACvD,IAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,MAAM,MAAM,CAAC,SAAS,EAAE;QAAE,OAAO;IAEzD,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC;IACtC,OAAO,CAAC,GAAG,CAAC,cAAc,IAAI,GAAG,CAAC,CAAA;IAGlC,MAAM,IAAI,GAAG,EAAE,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAA;IAC9B,MAAM,UAAU,GAAiB,KAAK,CAAC,OAAO,CAAC,IAAI,CAAiB,CAAC;IACrE,MAAM,eAAe,GAAe,MAAM,UAAU,CAAC,KAAK,EAAgB,CAAC;IAC3E,OAAO,CAAC,GAAG,CAAC,eAAe,eAAe,EAAE,CAAC,CAAC;IAG9C,MAAM,OAAO,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;IACxC,MAAM,SAAS,GAAG,IAAI,YAAY,CAA6B,OAAO,CAAC,GAAG,CAAC,CAAC;IAC5E,IAAI,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,GAAG,EAAE;QAC5C,MAAM,MAAM,CAAC,YAAY,CAAC,kBAAkB,CAAC,MAAM,EAAE,SAAS,CAAC,OAAO,EAAE,iBAAiB,CAAC,CAAA;KAC7F;SAAM;KAEN;AAGL,CAAC"}

4824
bot/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

21
bot/package.json Normal file
View File

@ -0,0 +1,21 @@
{
"name": "spam-ml-bot",
"version": "0.1.0",
"description": "A Matrix Bot that warns admins about possible spam",
"main": "dist/index.js",
"type": "module",
"scripts": {
"build": "tsc"
},
"author": "MTRNord <mtrnord@nordgedanken.dev>",
"license": "Apache-2.0",
"devDependencies": {
"@types/js-yaml": "^4.0.5",
"typescript": "^4.9.4"
},
"dependencies": {
"@tensorflow/tfjs-node": "^4.1.0",
"js-yaml": "^4.1.0",
"matrix-bot-sdk": "^0.6.2"
}
}

76
bot/src/index.ts Normal file
View File

@ -0,0 +1,76 @@
import {
MatrixClient,
SimpleFsStorageProvider,
AutojoinRoomsMixin,
MessageEvent,
TextualMessageEventContent,
//RustSdkCryptoStorageProvider,
} from "matrix-bot-sdk";
import { readFile } from "fs/promises";
import { load } from "js-yaml";
import * as tf from "@tensorflow/tfjs-node";
import { Rank, Tensor } from "@tensorflow/tfjs-node";
type Config = {
homeserver: string;
accessToken: string;
modelPath: string;
};
const config = load(await readFile("./config.yaml", "utf8")) as Config;
// This will be the URL where clients can reach your homeserver. Note that this might be different
// from where the web/chat interface is hosted. The server must support password registration without
// captcha or terms of service (public servers typically won't work).
const homeserverUrl = config.homeserver;
// Use the access token you got from login or registration above.
const accessToken = config.accessToken;
// In order to make sure the bot doesn't lose its state between restarts, we'll give it a place to cache
// any information it needs to. You can implement your own storage provider if you like, but a JSON file
// will work fine for this example.
const storage = new SimpleFsStorageProvider("ml-bot.json");
// Broken
//const cryptoProvider = new RustSdkCryptoStorageProvider("./ml-bot-store");
const model = await tf.node.loadSavedModel(config.modelPath);
// Finally, let's create the client and set it to autojoin rooms. Autojoining is typical of bots to ensure
// they can be easily added to any room.
//const client = new MatrixClient(homeserverUrl, accessToken, storage, cryptoProvider);
const client = new MatrixClient(homeserverUrl, accessToken, storage);
AutojoinRoomsMixin.setupOnClient(client);
// Before we start the bot, register our command handler
client.on("room.message", handleMessage);
// Now that everything is set up, start the bot. This will start the sync loop and run until killed.
client.start().then(() => console.log("Bot started!"));
// This is the command handler we registered a few lines up
async function handleMessage(roomId: string, event: any) {
// Don't handle unhelpful events (ones that aren't text messages, are redacted, or sent by us)
if (event['content']?.['msgtype'] !== 'm.text') return;
if (event['sender'] === await client.getUserId()) return;
const body = event['content']['body'];
console.log(`Checking: "${body}"`)
// Check if spam
const data = tf.tensor([body])
const prediction: Tensor<Rank> = model.predict(data) as Tensor<Rank>;
const prediction_data: number[][] = await prediction.array() as number[][];
console.log(`Prediction: ${prediction_data}`);
const message = new MessageEvent(event);
const textEvent = new MessageEvent<TextualMessageEventContent>(message.raw);
if (((prediction_data[0] ?? [])[0] ?? 0) > 0.8) {
await client.unstableApis.addReactionToEvent(roomId, textEvent.eventId, "Classified Spam")
} else {
//await client.unstableApis.addReactionToEvent(roomId, textEvent.eventId, "Classified Not Spam")
}
}

104
bot/tsconfig.json Normal file
View File

@ -0,0 +1,104 @@
{
"compilerOptions": {
/* Visit https://aka.ms/tsconfig to read more about this file */
/* Projects */
// "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
// "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
// "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
// "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
// "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
/* Language and Environment */
"target": "ES2022", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
/* Specify a set of bundled library declaration files that describe the target runtime environment. */
"lib": [
"ESNext"
],
// "jsx": "preserve", /* Specify what JSX code is generated. */
// "experimentalDecorators": true, /* Enable experimental support for TC39 stage 2 draft decorators. */
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
// "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
// "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
// "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
// "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
/* Modules */
"module": "ESNext", /* Specify what module code is generated. */
// "rootDir": "./", /* Specify the root folder within your source files. */
"moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
// "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */
// "types": [], /* Specify type package names to be included without being referenced in a source file. */
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
// "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */
// "resolveJsonModule": true, /* Enable importing .json files. */
// "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
/* JavaScript Support */
// "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
// "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */
// "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
/* Emit */
// "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
// "declarationMap": true, /* Create sourcemaps for d.ts files. */
// "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
"sourceMap": true, /* Create source map files for emitted JavaScript files. */
// "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
"outDir": "./dist", /* Specify an output folder for all emitted files. */
"removeComments": true, /* Disable emitting comments. */
// "noEmit": true, /* Disable emitting files from a compilation. */
// "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
// "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types. */
// "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
// "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
// "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
// "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */
// "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
// "newLine": "crlf", /* Set the newline character for emitting files. */
// "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
// "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
// "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
// "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
// "declarationDir": "./", /* Specify the output directory for generated declaration files. */
// "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */
/* Interop Constraints */
// "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */
// "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
"esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
// "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
"forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */
/* Type Checking */
"strict": true, /* Enable all strict type-checking options. */
"noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
"strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */
// "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
// "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
// "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */
// "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */
// "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */
"alwaysStrict": true, /* Ensure 'use strict' is always emitted. */
"noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */
"noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
// "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
"noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
// "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */
"noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
// "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
// "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */
// "allowUnusedLabels": true, /* Disable error reporting for unused labels. */
"allowUnreachableCode": false, /* Disable error reporting for unreachable code. */
/* Completeness */
// "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */
"skipLibCheck": true /* Skip type checking all .d.ts files. */
},
"include": [
"./src/**/*.ts"
],
"exclude": [
"node_modules/**/*"
]
}

View File

@ -30,8 +30,7 @@ async fn main() -> Result<()> {
};
let mut graph = Graph::new();
let bundle =
SavedModelBundle::load(&SessionOptions::new(), ["serve"], &mut graph, model_path)?;
let bundle = SavedModelBundle::load(&SessionOptions::new(), ["serve"], &mut graph, model_path)?;
GRAPH.set(graph).unwrap();
MODEL.set(bundle).unwrap();
@ -150,15 +149,15 @@ async fn submit_for_review(
// Sanitize
// We remove newlines, html tags and links
let sanitized = strip::strip_tags(&payload.input_data);
let sanitized = sanitized.replace(['\r', '\n'], " ");
//let sanitized = sanitized.replace(['\r', '\n'], " ");
let mut sanitized = trim_whitespace(&sanitized);
let mut finder = LinkFinder::new();
let cloned_sanitized = sanitized.clone();
finder.url_must_have_scheme(false);
let links: Vec<_> = finder.links(&cloned_sanitized).collect();
for link in links {
sanitized = sanitized.replace(link.as_str(), " ");
}
//let mut finder = LinkFinder::new();
//let cloned_sanitized = sanitized.clone();
//finder.url_must_have_scheme(false);
//let links: Vec<_> = finder.links(&cloned_sanitized).collect();
//for link in links {
// sanitized = sanitized.replace(link.as_str(), " ");
//}
match file {
Ok(mut file) => {
if let Err(e) = writeln!(file, "{}", sanitized) {

View File

@ -5625,4 +5625,7 @@ ham Jae (DN0): why's fluff generator do this? yeah this is another weird one...
spam 1: Register to get 5$ free 2: 7% -23% of daily income 3: The referral team can get 19% commission. Contact through the link to get started 👇👇👇👇👇 https://t.me/+KuOs8q7WfH8zZGZk
spam Stay at Home and make money online received minimum $200 every 12hrs (1) No hidden fees (2) No Scam ASk ME How or inbox me for more information https://t.me/+KuOs8q7WfH8zZGZk https://t.me/+KuOs8q7WfH8zZGZk
spam I'll help the community how to earn $30k within three days or seventy-two hours but you will reimburse me 10% of your dividend when you collect it. Note: only interested people should involve, ask me how now or Whatsapp +1 (209) 876-7868 immediately.
spam I started with € 500 and in 24hours of trading I was credited with € 5,500 excluding my € 500 that`s over 8x my startup capital. Imagine how much you will make with a higher startup capital. Thank you https://t.me/Donald_florence_binary_trade https://t.me/Donald_florence_binary_trade
spam I started with € 500 and in 24hours of trading I was credited with € 5,500 excluding my € 500 that`s over 8x my startup capital. Imagine how much you will make with a higher startup capital. Thank you https://t.me/Donald_florence_binary_trade https://t.me/Donald_florence_binary_trade
spam @albasam7:matrix.org in #nim-science:envs.net\n\n> 💨Dont miss this chance to be Rich ✌👍🙏DiscJockey is mine name and am an admin who has his own store link and group link https://t.me/+b7jYjbVzzOVkYjU8 Thats my group link , you can check out my legit works there and proof 🙏UPDATE Services I offer: Sell PayPal account verified PayPal transfer *Sell Bank Transfer Bank Login *Sell Clone card - Secured shipping tunnel *Sell cc Fullz & random Infos 99% valid cards *Sell Dumps with pin track 1 and 2 101 201 *Sell Western Union money transfer services *Sell Gift Card Itune Amazon Ebay Clone/Credit Cards *Sell Booking flight ticket services worldwide *Sell Electronics carding services *Sell SMTP - Pass Mail - PHP - RDP !! Follow Group Stage and contact me if of you are interested to make money ! Lets make more bread ! Follow the rules ! Order or purchase and get instantly ✌Partnership member needed for a long term business 🙏100%. 100% High quality Good and High Quality Dumps+Pins 90% Approval TRANSFER SERVICE BTC/USDT /CASHAPP TN DUA PAYMENT ARIZONA DUA MASS UI DUA has EDD RELOAD ALL STATE DUA PAYMENT METHOD IS AVAILABLE !! FULLZ PAYPAL CASH APP DUMPS+PINS FULLZ + DUMPS PAYPAL Transfer CashaPP transfer BANK LOG and bank 🏦 deposit DUMP + P!N PRODUCTS CVV+SSN/D WU TRANSFER ♛ CARDING ALL APPLE ♛ With good product of good sell QUALITY STUFFS ! With a lot of customers worldwide and also a Valid Vendor 🇺🇸 BESTBUY TAP & PAY WORKING BINS VERIZON TAP & PAY WALMART - TAP & Pay Bins Amazon prime Tap & pay ••Cool random prices Random Bins •• COUNTRIES ! Verizon - AMAZON DUMPS with pin AMAZON TRACK1-2 Verizon- Track2- 1 With Pins Walmart - Track1-2 Pins ZIPCODE + PINS FULLZ + Ssn - details FULLZ - Dob + Pins Cvv - ZIPCODE + ATM Anyone who is ready and willing to follow the rules should immediately message me or contact me ! Telegram ✅ @DarknessWeb715
spam @adsspottipsusa:matrix.org in #edgeware-builders:matrix.org\n\n> I'll help the community how earn $30k within 3 days and hours but you will reimburse me 10% of your dividend when you collect it. Note: only interested people should involve. Whatsapp +1 (209) 876-7868 immediately.
spam @williamjo:matrix.org in #spbpython:matrix.org\n\n> Stay at Home and make money online received minimum $200 every 12hrs (1) No hidden fees (2) No Scam ASk ME How or inbox me for more information https://t.me/+KuOs8q7WfH8zZGZk https://t.me/+KuOs8q7WfH8zZGZk

Can't render this file because it contains an unexpected character in line 68 and column 14.