-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinkExtractor.js
69 lines (62 loc) · 2.18 KB
/
linkExtractor.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
'use strict';
const Promise = require("bluebird");
const recursive = Promise.promisify(require("recursive-readdir"));
const getUrls = require('get-urls');
const readFile = Promise.promisify(require("fs").readFile);
const path = require('path')
const chalk = require('chalk');
const LinkExtractor = {
start: (options) => {
return recursive(options.path, [(file, stats) => {
if (stats.isFile()) {
return path.extname(file) !== options.extension;
}
return false;
}])
.then((files) => {
return getUrlsInFiles(files);
})
}
};
const getUrlsInFiles = (files) => {
let fileUrls = {};
let totalFiles = 0;
let totalUrls = 0;
return Promise.map(files, (currentFile) => {
return extractUrlsForFile(currentFile).then((foundUrls) => {
totalFiles += 1;
fileUrls[currentFile] = foundUrls;
totalUrls += foundUrls.length;
return Promise.resolve();
});
})
.then(() => {
console.log(chalk.bold('✓ ') + chalk.white("Found " + totalFiles + " files, with a total of " + totalUrls + " urls inside"));
return Promise.resolve({
fileUrls: fileUrls,
totalFiles: totalFiles,
totalUrls: totalUrls});
});
};
const extractUrlsForFile = (file) => {
let fixedUrls = [];
return readFile(file, "utf8").then((content) => {
return Promise.resolve(getUrls(content));
}).then((urls) => {
return Promise.map(urls, (url) => {
let fixedUrl = url.replace('):', '');
fixedUrl = fixedUrl.replace('))', '');
fixedUrl = fixedUrl.replace('),', '');
fixedUrl = fixedUrl.replace(')', '');
fixedUrl = fixedUrl.replace('%27', '');
fixedUrl = fixedUrl.replace(',', '');
fixedUrl = fixedUrl.replace('**', '');
fixedUrl = fixedUrl.replace('*', '');
fixedUrls.push(fixedUrl);
})
.then(() => {
return Promise.resolve(fixedUrls);
})
});
};
module.exports = LinkExtractor;