-
Notifications
You must be signed in to change notification settings - Fork 22
/
girl.js
148 lines (130 loc) · 3.17 KB
/
girl.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
/* @Author: xuelangcxy
* @Date: 2016-03-15 22:15:13
* @Last Modified by: xuelangcxy
* @Last Modified time: 2016-03-15 22:15:13
* @this is change
*/
'use strict';
var http = require("http"),
cheerio = require("cheerio"),
fs = require('fs');
var baseUrl = "http://www.girl13.com",
picArr = [],
urlList = [],
rootPath = './girls/';
/*
* @desc 获取主页上所有主题的url列表
* @author xuelangcxy
* @param {Callback} callback
*/
function getUrlList(callback) {
download(baseUrl, function(data) {
if (data) {
var $ = cheerio.load(data);
$("#tagcloud-cc a").each(function(i, e) {
var listUrl = $(e).attr("href");
var urlTitle = $(e).text();
urlList.push({
url: listUrl,
title: urlTitle
})
})
callback(urlList);
console.log("Done getting url list!");
} else console.log("error");
});
}
getUrlList(function(urlList) {
checkFolderExist(rootPath);
getImgUrl(urlList, function() {})
})
/*
* @desc 获取每个主题下的图片src列表
* @author xuelangcxy
* @param {Callback} callback
*/
function getImgUrl(urlList, callback) {
urlList.forEach(function(element, index) {
console.log(element.url + " : " + index);
picArr[index] = [];
var path = rootPath + element.title + '/';
checkFolderExist(path);
download(element.url, function(data) {
if (data) {
var $ = cheerio.load(data);
$(".entry-content img").each(function(i, e) {
var picUrl = $(e).attr("src");
var picName = picUrl.slice(picUrl.lastIndexOf("/") + 1, picUrl.length);
var picTitle = $(e).attr('alt');
picArr[index].push({
url: picUrl,
name: picName,
title: picTitle
})
});
storeImgToLocal(picArr[index], element.title);
} else {
console.log("error");
}
})
});
//callback(picArr);
}
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function(chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function() {
callback(null);
});
}
/*
* @desc 根据图片列表去获取图片并存放在对应目录下
* @author xuelangcxy
* @param {Callback} callback
*/
function storeImgToLocal(picArr, title) {
var num = 0;
picArr.forEach(function(element, i) {
var picFullPath = './girls/' + title + '/' + element.name;
if (!fs.existsSync(picFullPath)) {
console.log(i + ":" + element.url);
http.get(element.url, function(res) {
var imgData = "";
res.setEncoding("binary"); //一定要设置response的编码为binary否则会下载下来的图片打不开
res.on('data', function(chunk) {
imgData += chunk;
})
res.on('end', function() {
fs.writeFile(picFullPath, imgData, "binary", function(err) {
if (err) {
throw err;
}
console.log(++num + ':Download ' + element.name + ' success!');
})
}).on('error', function() {
console.log('error!');
})
})
} else {
// do nothing
}
});
}
function checkFolderExist(path) {
if (!fs.existsSync(path)) {
fs.mkdir(path, function(err) {
if (err) {
throw err;
}
});
}
}
exports.download = download,
storeImgToLocal = storeImgToLocal,
checkFolderExist = checkFolderExist;