-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalgolia.js
111 lines (93 loc) · 3.61 KB
/
algolia.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/* ALGOLIA SCRAPER
* This script searches for relevant content to index in docsDir.
* Before running it make sure you have the file algolia-apikey.js
* containing your ALGOLIA_API_KEY and make sure you have it on gitignore.
*
* example: node algolia.js
*
* IMPORTANT NOTE:
* We have been accepted into the algolia program for FOSS projects.
* So this scraper is not really necessary anymore unless that changes.
* */
const fs = require('fs');
const path = require('path');
const algoliasearch = require('algoliasearch');
const cheerio = require('cheerio');
const { ALGOLIA_API_KEY } = require('./.algolia-apikey');
// MAKE SURE YOUR TOKEN IS CORRECT
// Initialize Algolia client with your credentials
const appId = 'S1DLXCNPKJ'; // Your Algolia Application ID
const apiKey = ALGOLIA_API_KEY; // Get API key from apikey.js
const client = algoliasearch(appId, apiKey);
const index = client.initIndex('normalnvim');
// Directory containing your Docusaurus build content
const docsDir = path.join(__dirname, 'build', 'docs');
// Toggle DEBUG_MODE to true or false
const DEBUG_MODE = false;
// Debug print function
function debugPrint(message, ...optionalParams) {
if (DEBUG_MODE) {
console.log(message, ...optionalParams);
}
}
// Function to calculate relevancy score
function calculateRelevancyScore(content) {
// Basic example: higher scores for longer content
// You can enhance this function with more complex logic
const lengthScore = content.length / 1000; // Normalize length
return Math.min(lengthScore, 1); // Ensure score is between 0 and 1
}
// Recursively traverse directories and process files
function extractDocsFromDir(dir) {
const records = [];
function readDirRecursive(currentDir) {
const files = fs.readdirSync(currentDir);
files.forEach((file) => {
const filePath = path.join(currentDir, file);
const stats = fs.statSync(filePath);
if (stats.isDirectory()) {
// Recursively process subdirectories
readDirRecursive(filePath);
} else if (stats.isFile() && file.endsWith('.html')) {
// Process HTML files
debugPrint(`Processing file: ${filePath}`);
const content = fs.readFileSync(filePath, 'utf8');
debugPrint(`Content of ${file}:`, content.slice(0, 100)); // Show first 100 characters
// Parse and create records using cheerio
const $ = cheerio.load(content);
const title = $('h1').first().text() || $('title').text() || '';
const bodyContent = $('main').text() || $('body').text() || '';
const relevancyScore = calculateRelevancyScore(bodyContent);
const record = {
objectID: path.relative(docsDir, filePath),
title: title,
content: bodyContent,
url: `https://normalnvim.github.io/docs/${path.relative(docsDir, filePath).replace(/\index.html$/, '')}`,
relevancy: relevancyScore,
hierarchy: {
lvl0: title || '',
lvl1: '', // Adjust if your content has specific sub-levels
lvl2: '', // Adjust if needed
lvl3: '', // Adjust if needed
lvl4: '', // Adjust if needed
lvl5: '', // Adjust if needed
},
};
debugPrint('Generated record:', record);
records.push(record);
}
});
}
readDirRecursive(dir);
return records;
}
async function uploadRecords() {
const records = extractDocsFromDir(docsDir);
try {
const { objectIDs } = await index.saveObjects(records);
console.log('Algolia - Index updated successfully:', objectIDs);
} catch (error) {
console.error('Error uploading documents:', error);
}
}
uploadRecords();