-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathindex.js
97 lines (85 loc) · 2.64 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
const fs = require('fs')
const path = require('path')
const axios = require('./speed-limiter')
const cheerio = require('cheerio')
const bookID = '9781319081799'
const baseURL = `https://jigsaw.vitalsource.com/books/${bookID}/epub/`
const fsRelativePath = './epub/'
// Cookie value sent to jigsaw.vitalsource.com from the browser
// including: reese84 + jigsaw_session
const globalCookieVal = ``
const writeIntoFS = async (filepath, contents) => {
const file = path.resolve(fsRelativePath, filepath)
await fs.promises.mkdir(path.dirname(file), { recursive: true })
await fs.promises.writeFile(file, contents)
return file
}
const fetchXML = async epubPath => {
const furl = baseURL + epubPath
console.log(`Fetching ${furl}`)
const { data } = await axios.request({
url: furl,
method: 'get',
transformResponse: [d => d],
responseType: 'text',
headers: { Cookie: globalCookieVal }
})
const $ = cheerio.load(data)
return { data: `<?xml version="1.0" encoding="UTF-8"?>` + data, $ }
}
const fetchURL = async epubPath => {
const furl = baseURL + epubPath
console.log(`Fetching ${furl}`)
const { data } = await axios.request({
url: furl,
method: 'get',
transformResponse: [d => d],
responseType: 'arraybuffer',
headers: { Cookie: globalCookieVal }
})
return data
}
// Specific Fetchers/extractors
const getContainer = async () => {
const url = 'META-INF/container.xml'
let packageOPF
await fetchXML(url).then(async ({ data, $ }) => {
packageOPF = $('rootfile').attr('full-path')
if (!fs.existsSync(path.resolve(fsRelativePath, url))) {
await writeIntoFS(url, Buffer.from(data, 'utf8'))
}
})
return packageOPF
}
const getOPF = async () => {
const url = 'OEBPS/package.opf' // big one
let looperURLs = []
await fetchXML(url).then(async ({ data, $ }) => {
$('manifest [href]').each((i, el) => {
const fip = 'OEBPS/' + $(el).attr('href')
if (!fs.existsSync(path.resolve(fsRelativePath, fip))) looperURLs.push(fip)
})
if (!fs.existsSync(path.resolve(fsRelativePath, url))) {
await writeIntoFS(url, Buffer.from(data, 'utf8'))
}
})
return looperURLs
}
let completed = 0
let totalToRun = 0
const getAndSave = async (url) => {
await fetchURL(url).then(async data => {
await writeIntoFS(url, Buffer.from(data, 'binary'))
completed += 1
console.log(`${(completed / totalToRun * 100).toFixed(2)}%\t Saved ${url}`)
})
return true
}
const recursiveGet = async (arrayURLs) => {
totalToRun = arrayURLs.length
for (let i = 0; i < arrayURLs.length; i += 1) {
getAndSave(arrayURLs[i])
}
return true
}
getContainer().then(getOPF).then(recursiveGet)