-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfetch-from-sitemaps.js
79 lines (67 loc) · 2.02 KB
/
fetch-from-sitemaps.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
'use strict';
const axios = require('axios');
const xml2js = require('xml2js');
/**
* Fetch Sitemap URL and dumb all the URLs from the XML to a variable.
*
* @param string sitemapUrl Sitemap URL from which list of URLs needs to be fetched.
*
* @returns array List of URLs fetched from the sitemap.
*/
function fetchFromSitemap(sitemapUrl) {
var errorMessage = 'Error: ' + sitemapUrl + ' returns with status code ';
var fetchedUrls = [];
return new Promise((resolve) => {
axios.get(sitemapUrl)
.then((response) => {
if (response.status === 200) {
if (response.data.indexOf('<?xml') === -1) {
console.error('\x1b[31m%s\x1b[0m', 'Error: Not a XML file.');
} else {
const parser = new xml2js.Parser();
parser.parseString(response.data, (err, result) => {
for (const readUrl of result.urlset.url) {
fetchedUrls.push(readUrl.loc[0]);
}
});
}
}
resolve(fetchedUrls);
})
.catch((error) => {
if (error.response && error.response.status) {
errorMessage += error.response.status;
console.error('\x1b[31m%s\x1b[0m', errorMessage);
} else {
console.error(error);
}
resolve(fetchedUrls);
});
});
}
/**
* Fetch URLs from the sitemap one-by-one.
*
* @param array sitemapUrls List of Sitemap URL(s).
*
* @returns array List of URLs fetched from all the sitemap(s).
*/
async function fetchAllSitemaps(sitemapUrls) {
const sitemapUrlsLists = [];
const urlsList = [];
var mergeLists = [];
for (const sitemapUrl of sitemapUrls) {
sitemapUrlsLists.push(fetchFromSitemap(sitemapUrl));
}
const allUrls = await Promise.all(sitemapUrlsLists);
for (const singleList of allUrls) {
mergeLists = mergeLists.concat(singleList);
}
for (const singleUrl of mergeLists) {
if (!urlsList.includes(singleUrl)) {
urlsList.push(singleUrl);
}
}
return urlsList;
}
module.exports = fetchAllSitemaps;