-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.js
104 lines (99 loc) · 3.46 KB
/
app.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
const cheerio = require('cheerio');
const superagent = require('superagent');
const { save, saveMany } = require('./db/save');
const getschema = require('./db/getschema');
const trimSurplus = require('./trimSurplus');
const URL = require('./URL');
superagent.get(URL.QIDIAN.URL_SE, (err, res) => {
if (err) {
return;
}
let $ = cheerio.load(res.text);
let bookids = [];
$('#result-list li').each((i, el) => {
bookids.push($(el).find('a.blue-btn').attr('data-bookid'));
});
var promises = bookids.map((el) => {
return getBookInfo(el);
});
Promise.all(promises)
.then((res) => {
console.log('finish');
})
.catch((err) => {
console.log(err);
});
});
var getBookInfo = (bookid) => {
return new Promise((resolve, reject) => {
superagent.get(URL.QIDIAN.URL_INFO + bookid, (err, res) => {
if (err) {
reject(err);
return;
}
let $ = cheerio.load(res.text);
var bid = $('.book-detail-wrap #bookImg').attr('data-bid');
var bname = $('.book-information .book-info h1 em').text();
var bookintro = trimSurplus($('.book-content-wrap .book-intro p').eq(0).text());
var booktag = trimSurplus($('.book-intro p.tag-wrap').text());
var bookvolume = [];
$('.volume-wrap .volume li').each((i, el) => {
bookvolume.push({
bookid: bid,
bookname: bname,
title: $(el).find('a').text(),
url: $(el).find('a').attr('href').replace('//', ''),
status: 0
})
});
var bookinfo = {
bookid: bid,
bookname: bname,
intro: bookintro,
tag: booktag
};
return save('BookInfo', bookinfo)
.then((res) => {
console.log('正在爬取《' + bname + '》...');
return saveMany('BookVolume', bookvolume);
})
.then((res) => {
return getBookChapter(bookvolume);
})
.then((res) => {
console.log('爬取《' + bname + '》完成');
return 'success';
}).catch((err) => {
reject(err);
});
});
});
}
var getBookChapter = (bookvolume) => {
var promises = bookvolume.map((el) => {
return getBookChapterByUrl(el);
});
return Promise.all(promises);
}
var getBookChapterByUrl = (vol) => {
console.log('正在爬取《' + vol.bookname + '》' + vol.title + '...');
return new Promise((resolve, reject) => {
superagent.get(vol.url, (err, res) => {
if (err) {
console.log('爬取失败:' + err.code + '《' + vol.bookname + '》' + vol.title);
resolve(err);
return;
}
let $ = cheerio.load(res.text);
var chapter = $('#j_chapterBox .read-content').text();
save('BookChapter', { bookid: vol.bookid, chapter: chapter })
.then((res) => {
console.log('成功爬取《' + vol.bookname + '》' + vol.title);
resolve(res);
})
.catch((err) => {
reject(err);
});
});
});
}