Skip to content

Commit

Permalink
Update bmc.json for the new site layout
Browse files Browse the repository at this point in the history
Updated the bmc.json to work for the standards and the HTML5 used in the new site. Article I tested on: http://ethnobiomed.biomedcentral.com/articles/10.1186/1746-4269-2-29
  • Loading branch information
larsgw authored Jun 22, 2016
1 parent fbedb6e commit d0af021
Showing 1 changed file with 9 additions and 16 deletions.
25 changes: 9 additions & 16 deletions scrapers/bmc.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"url": "www\\.biomedcentral\\.com",
"url": "biomedcentral\\.com",
"elements": {
"publisher": {
"selector": "//meta[@name='citation_publisher']",
Expand Down Expand Up @@ -38,11 +38,11 @@
"attribute": "content"
},
"description": {
"selector": "//meta[@name='description']",
"selector": "//meta[@name='dc.description']",
"attribute": "content"
},
"abstract": {
"selector": "//meta[@name='description']",
"selector": "//meta[@name='dc.description']",
"attribute": "content"
},
"fulltext_html": {
Expand All @@ -59,32 +59,25 @@
"rename": "fulltext.pdf"
}
},
"fulltext_xml": {
"selector": "//a[.='Download XML']",
"attribute": "href",
"download": {
"rename": "fulltext.xml"
}
},
"supplementary_material": {
"selector": "//link[starts-with(@title,'Additional file')]",
"selector": "//a[@class='filename']",
"attribute": "href",
"download": true
},
"figure": {
"selector": "//div[@class='fig']/p/a/img",
"selector": "//figure[@class='Figure']/div/img",
"attribute": "src",
"download": true
},
"figure_caption": {
"selector": "//div[@class='fig']//strong"
"selector": "//figure[@class='Figure']/figcaption"
},
"license": {
"selector": "//p[a/@href='http://creativecommons.org/licenses/by/4.0']"
"selector": "//section[@id='CopyrightMessage']//div[@class='CopyrightComment']/p"
},
"copyright": {
"selector": "//p[contains(.,'licensee')]"
"selector": "//meta[@name='dc.copyright']",
"attribute": "content"
}
}
}

0 comments on commit d0af021

Please # to comment.