-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcrawl.json
56 lines (56 loc) · 2.15 KB
/
crawl.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
{
"timeout__": 5,
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36 FirePHP/4Chrome",
"content_DOM": [
"//article",
"//main",
"//div[contains(attribute::class, 'item-page')]",
"//div[contains(attribute::class, 'itemBody')]"
],
"remove_Attributes": [
"class",
"id",
"itemprop",
"itemscope",
"itemtype",
"rel",
"style",
"target"
],
"remove_DOM": [
"//head",
"//header",
"//footer",
"//noscript",
"//aside",
"//nav",
"//div[contains(attribute::class, 'footer-menu')]",
"//ul[contains(attribute::class, 'pagenav')]",
"//ul[(attribute::class) = 'nav']",
"//ul[(attribute::class) = 'breadcrumb']",
"//ul[(attribute::class) = 'pagination-list']",
"//div[(attribute::class) = 'clr']",
"//div[(attribute::class) = 'middleSocialSharing']",
"//div[(attribute::class) = 'share-container']",
"//div[(attribute::class) = 'cat-children']",
"//div[contains(attribute::class, 'pagination')]",
"//div[contains(attribute::class, 'blogDate')]",
"//div[contains(attribute::class, 'items-more')]",
"//div[contains(attribute::class, 'itemContentFooter')]",
"//div[contains(attribute::class, 'blocauteur')]",
"//div[contains(attribute::class, 'blogInfo')]",
"//div[contains(attribute::class, 'itemNavigation')]",
"//div[contains(attribute::class, 'itemRelated')]",
"//div[(attribute::class) = 'itemLinks']",
"//div[(attribute::class) = 'itemComments']",
"//div[(attribute::class) = 'itemBackToTop']",
"//span[(attribute::class) = 'itemAuthor']",
"//span[(attribute::class) = 'itemHits']",
"//span[contains(attribute::class, 'itemDateModified')]",
"//dl[contains(attribute::class, 'article-info')]",
"//dd[(attribute::class) = 'create']",
"//*[@id='system-message-container']",
"//*[@id='itemCommentsAnchor']",
"//*[(attribute::data-mce-bogus)]"
]
}