-
Notifications
You must be signed in to change notification settings - Fork 0
/
server.js
96 lines (76 loc) · 3.4 KB
/
server.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import express from "express";
import { pipeline, env } from "@xenova/transformers";
import Tokenizer from "sentence-tokenizer";
env.allowLocalModels = false;
// env.userBrowserCache = false;
class Pipeline {
static task = "zero-shot-classification";
static model = "Xenova/roberta-large-mnli";
static instance = null;
static async getInstance(progressCallback = null) {
if (this.instance === null) {
this.instance = await pipeline(this.task, this.model, { progress_callback: progressCallback });
}
return this.instance;
}
}
const app = express();
app.use(express.json());
const SCORE_THRESHOLD = 0.5;
const SCORE_INCREMENT = 0.05;
const containsInterrogativeWord = (sentence) => {
// List of common interrogative words
const interrogativeWords = [
"what", "how", "why", "when", "where", "who",
"which", "is", "are", "can", "could", "would", "should"
];
// Check if any interrogative word is present in the beginning of a sentence
return interrogativeWords.some(word => sentence.toLowerCase().startsWith(word));
}
const endsWithQuestionMark = (sentence) => {
// Check if the last character is a question mark
return sentence.endsWith('?');
}
const normalizeText = (str) => {
// Remove hashtags and newlines, then trim whitespace
return str.replace(/#\w+/g, "") // remove hashtags
.replace(/https?:\/\/[^\s]+|www\.[^\s]+/g, '') // remove urls
.replace(/\n/g, " ") // replace newlines with space
.replace(/\s+/g, " ") // replace multiple spaces with a single space
.replace(/['"]/g, "") // remove single and double quotes
.replace(/[~^&*[\]{}|<>]/g, "") // remove special chars
.replace(/[\u{1F600}-\u{1F64F}]/gu, "") // remove emojis
.replace(/[!?.,;:]{2,}/g, match => match[0]) // replace multiple instances of punctuation marks with a single one
.replace(/[^\w\s.,!?;:]/g, '') // remove any other unwanted punctuation characters but keep basic ones: . , ! ? ; :
.replace(/nostr:[a-zA-Z0-9]+/g, '[REFERENCE]') // replace bech32 entities references
.trim()
.toLowerCase();
}
const classifySentences = async (sentences, candidate_labels, hypothesis_template) => {
const classifier = await Pipeline.getInstance();
const classificationPromises = sentences.map(sentence =>
classifier(sentence, candidate_labels, { hypothesis_template})
)
return await Promise.all(classificationPromises);
}
app.post("/classify-text", async (req, res) => {
const text = normalizeText(req.body.text);
console.log({text});
const tokenizer = new Tokenizer();
tokenizer.setEntry(text);
const sentences = tokenizer.getSentences();
console.log({sentences});
const candidate_labels = ["question", "statement", "command", "exclamation"];
const hypothesis_template = "This text is a {}.";
const responses = await classifySentences(sentences, candidate_labels, hypothesis_template);
const questions = responses.filter((response, index) => {
const { labels, scores } = response;
console.log('Response', {labels, scores})
return labels[0] === "question" && scores[0] >= SCORE_THRESHOLD;
}).map((response, index) => sentences[index].trim());
res.json({ result: questions.length > 0 });
});
const PORT = 3006;
app.listen(PORT, () => {
console.log(`Server is running on http://localhost:${PORT}`);
});