From f7000aa5a36911d725199ec1a5d863e6e01425c8 Mon Sep 17 00:00:00 2001 From: blaine-arcjet <146491715+blaine-arcjet@users.noreply.github.com> Date: Wed, 4 Sep 2024 00:17:43 -0700 Subject: [PATCH] chore: ensure consistent formatting of the JSON file (#365) --- .github/workflows/ci-validation.yml | 7 +- crawler-user-agents.json | 1487 ++++++++++----------------- format.js | 34 + 3 files changed, 584 insertions(+), 944 deletions(-) create mode 100644 format.js diff --git a/.github/workflows/ci-validation.yml b/.github/workflows/ci-validation.yml index 4991c25..f4e4f89 100644 --- a/.github/workflows/ci-validation.yml +++ b/.github/workflows/ci-validation.yml @@ -5,11 +5,11 @@ name: CI validation on: # https://stackoverflow.com/questions/64635032/github-actions-run-on-push-to-all-branches push: - branches: + branches: - '**' pull_request: branches: - - master + - master jobs: build: @@ -19,8 +19,9 @@ jobs: - uses: actions/setup-node@v4 with: node-version: 20 + - run: node format.js --check - run: pip3 install -r requirements.txt - run: py.test -vv - - run: python3 validate.py + - run: python3 validate.py - run: php validate.php - run: go test diff --git a/crawler-user-agents.json b/crawler-user-agents.json index a022b41..ce6f26d 100644 --- a/crawler-user-agents.json +++ b/crawler-user-agents.json @@ -22,37 +22,32 @@ "Nokia6820/2.0 (4.83) Profile/MIDP-1.0 Configuration/CLDC-1.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)", "SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/6.2.3.3.c.1.101 (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)" ] - } - , + }, { "pattern": "Googlebot-Image", "instances": [ "Googlebot-Image/1.0" ] - } - , + }, { "pattern": "Googlebot-News", "instances": [ "Googlebot-News" ] - } - , + }, { "pattern": "Googlebot-Video", "instances": [ "Googlebot-Video/1.0" ] - } - , + }, { "pattern": "AdsBot-Google([^-]|$)", "url": "https://support.google.com/webmasters/answer/1061943?hl=en", "instances": [ "AdsBot-Google (+http://www.google.com/adsbot.html)" ] - } - , + }, { "pattern": "AdsBot-Google-Mobile", "addition_date": "2017/08/21", @@ -62,8 +57,7 @@ "Mozilla/5.0 (Linux; Android 5.0; SM-G920A) AppleWebKit (KHTML, like Gecko) Chrome Mobile Safari (compatible; AdsBot-Google-Mobile; +http://www.google.com/mobile/adsbot.html)", "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1 (compatible; AdsBot-Google-Mobile; +http://www.google.com/mobile/adsbot.html)" ] - } - , + }, { "pattern": "Feedfetcher-Google", "addition_date": "2018/06/27", @@ -71,8 +65,7 @@ "instances": [ "Feedfetcher-Google; (+http://www.google.com/feedfetcher.html; 1 subscribers; feed-id=728742641706423)" ] - } - , + }, { "pattern": "Mediapartners-Google", "url": "https://support.google.com/webmasters/answer/1061943?hl=en", @@ -82,15 +75,13 @@ "Mozilla/5.0 (iPhone; U; CPU iPhone OS 10_0 like Mac OS X; en-us) AppleWebKit/602.1.38 (KHTML, like Gecko) Version/10.0 Mobile/14A5297c Safari/602.1 (compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_1 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7 (compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)" ] - } - , + }, { "pattern": "Mediapartners \\(Googlebot\\)", "addition_date": "2017/08/08", "url": "https://support.google.com/webmasters/answer/1061943?hl=en", "instances": [] - } - , + }, { "pattern": "APIs-Google", "addition_date": "2017/08/08", @@ -98,8 +89,7 @@ "instances": [ "APIs-Google (+https://developers.google.com/webmasters/APIs-Google.html)" ] - } - , + }, { "pattern": "Google-InspectionTool", "url": "https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers", @@ -107,8 +97,7 @@ "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Google-InspectionTool/1.0)", "Mozilla/5.0 (compatible; Google-InspectionTool/1.0)" ] - } - , + }, { "pattern": "Storebot-Google", "url": "https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers", @@ -116,16 +105,14 @@ "Mozilla/5.0 (X11; Linux x86_64; Storebot-Google/1.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36", "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012; Storebot-Google/1.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Mobile Safari/537.36" ] - } - , + }, { "pattern": "GoogleOther", "url": "https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers", "instances": [ "GoogleOther" ] - } - , + }, { "pattern": "bingbot", "url": "http://www.bing.com/bingbot.htm", @@ -145,8 +132,7 @@ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Safari/537.36", "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/103.0.5060.134 Safari/537.36" ] - } - , + }, { "pattern": "Slurp", "url": "http://help.yahoo.com/help/us/ysearch/slurp", @@ -155,8 +141,7 @@ "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)", "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)" ] - } - , + }, { "pattern": "[wW]get", "instances": [ @@ -164,8 +149,7 @@ "Wget/1.14 (linux-gnu)", "Wget/1.20.3 (linux-gnu)" ] - } - , + }, { "pattern": "LinkedInBot", "instances": [ @@ -173,8 +157,7 @@ "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/4.3 +http://www.linkedin.com)", "LinkedInBot/1.0 (compatible; Mozilla/5.0; Apache-HttpClient +http://www.linkedin.com)" ] - } - , + }, { "pattern": "Python-urllib", "instances": [ @@ -189,9 +172,8 @@ "Python-urllib/3.5", "Python-urllib/3.6", "Python-urllib/3.7" - ] - } - , + ] + }, { "pattern": "python-requests", "addition_date": "2018/05/27", @@ -204,8 +186,7 @@ "python-requests/2.21.0", "python-requests/2.22.0" ] - } - , + }, { "pattern": "aiohttp", "addition_date": "2019/12/23", @@ -215,19 +196,16 @@ "Python/3.7 aiohttp/3.6.2a2" ], "url": "https://docs.aiohttp.org/en/stable/" - } - , + }, { "pattern": "httpx", "addition_date": "2019/12/23", "instances": [ "python-httpx/0.16.1", "python-httpx/0.13.0.dev1" - ], "url": "https://www.python-httpx.org" - } - , + }, { "pattern": "libwww-perl", "instances": [ @@ -235,15 +213,13 @@ "2Bone_LinkChkr/1.0 libwww-perl/6.03", "amibot - http://www.amidalla.de - tech@amidalla.com libwww-perl/5.831" ] - } - , + }, { "pattern": "httpunit", "instances": [ "httpunit/1.x" ] - } - , + }, { "pattern": "Nutch", "instances": [ @@ -251,8 +227,7 @@ "NutchCVS/0.7.1 (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)", "istellabot-nutch/Nutch-1.10" ] - } - , + }, { "pattern": "Go-http-client", "addition_date": "2016/03/26", @@ -261,8 +236,7 @@ "Go-http-client/1.1", "Go-http-client/2.0" ] - } - , + }, { "pattern": "phpcrawl", "addition_date": "2012/09/17", @@ -270,8 +244,7 @@ "instances": [ "phpcrawl" ] - } - , + }, { "pattern": "msnbot", "url": "http://search.msn.com/msnbot.htm", @@ -291,13 +264,11 @@ "msnbot/2.0b (+http://search.msn.com/msnbot.htm).", "msnbot/2.0b (+http://search.msn.com/msnbot.htm)._" ] - } - , + }, { "pattern": "jyxobot", "instances": [] - } - , + }, { "pattern": "FAST-WebCrawler", "instances": [ @@ -306,23 +277,20 @@ "FAST-WebCrawler/3.7/FirstPage (atw-crawler at fast dot no;http://fast.no/support/crawler.asp)", "FAST-WebCrawler/3.8" ] - } - , + }, { "pattern": "FAST Enterprise Crawler", "instances": [ "FAST Enterprise Crawler 6 / Scirus scirus-crawler@fast.no; http://www.scirus.com/srsapp/contactus/", "FAST Enterprise Crawler 6 used by Schibsted (webcrawl@schibstedsok.no)" ] - } - , + }, { "pattern": "BIGLOTRON", "instances": [ "BIGLOTRON (Beta 2;GNU/Linux)" ] - } - , + }, { "pattern": "Teoma", "instances": [ @@ -330,24 +298,21 @@ "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://about.ask.com/en/docs/about/webmasters.shtml)" ], "url": "http://about.ask.com/en/docs/about/webmasters.shtml" - } - , + }, { "pattern": "convera", "instances": [ "ConveraCrawler/0.9e (+http://ews.converasearch.com/crawl.htm)" ], "url": "http://ews.converasearch.com/crawl.htm" - } - , + }, { "pattern": "seekbot", "instances": [ "Seekbot/1.0 (http://www.seekbot.net/bot.html) RobotsTxtFetcher/1.2" ], "url": "http://www.seekbot.net/bot.html" - } - , + }, { "pattern": "Gigabot", "instances": [ @@ -355,16 +320,14 @@ "Gigabot/2.0 (http://www.gigablast.com/spider.html)" ], "url": "http://www.gigablast.com/spider.html" - } - , + }, { "pattern": "Gigablast", "instances": [ "GigablastOpenSource/1.0" ], "url": "https://github.com/gigablast/open-source-search-engine" - } - , + }, { "pattern": "exabot", "instances": [ @@ -375,35 +338,30 @@ "Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)", "Mozilla/5.0 (compatible; Exabot/3.0; http://www.exabot.com/go/robot)" ] - } - , + }, { "pattern": "ia_archiver", "instances": [ "ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)", "ia_archiver-web.archive.org" ] - } - , + }, { "pattern": "GingerCrawler", "instances": [ "GingerCrawler/1.0 (Language Assistant for Dyslexics; www.gingersoftware.com/crawler_agent.htm; support at ginger software dot com)" ] - } - , + }, { "pattern": "webmon ", "instances": [] - } - , + }, { "pattern": "HTTrack", "instances": [ "Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)" ] - } - , + }, { "pattern": "grub\\.org", "instances": [ @@ -419,23 +377,19 @@ "Mozilla/4.0 (compatible; grub-client-1.4.3; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.5.3; Crawl your own stuff with http://grub.org)" ] - } - , + }, { "pattern": "UsineNouvelleCrawler", "instances": [] - } - , + }, { "pattern": "antibot", "instances": [] - } - , + }, { "pattern": "netresearchserver", "instances": [] - } - , + }, { "pattern": "speedy", "instances": [ @@ -445,13 +399,11 @@ "Speedy Spider (Entireweb; Beta/1.2; http://www.entireweb.com/about/search_tech/speedyspider/)", "Speedy Spider (http://www.entireweb.com/about/search_tech/speedy_spider/)" ] - } - , + }, { "pattern": "fluffy", "instances": [] - } - , + }, { "pattern": "findlink", "instances": [ @@ -479,20 +431,17 @@ "findlinks/2.5 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.6 (+http://wortschatz.uni-leipzig.de/findlinks/)" ] - } - , + }, { "pattern": "msrbot", "instances": [] - } - , + }, { "pattern": "panscient", "instances": [ "panscient.com" ] - } - , + }, { "pattern": "yacybot", "instances": [ @@ -544,13 +493,11 @@ "yacybot (-global; amd64 Linux 5.2.9-Jinsol; java 12.0.2; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 5.2.11-Jinsol; java 12.0.2; Europe/en) http://yacy.net/bot.html" ] - } - , + }, { "pattern": "AISearchBot", "instances": [] - } - , + }, { "pattern": "ips-agent", "instances": [ @@ -560,13 +507,11 @@ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.24; ips-agent) Gecko/20111107 Ubuntu/10.04 (lucid) Firefox/3.6.24", "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:14.0; ips-agent) Gecko/20100101 Firefox/14.0.1" ] - } - , + }, { "pattern": "tagoobot", "instances": [] - } - , + }, { "pattern": "MJ12bot", "instances": [ @@ -591,37 +536,32 @@ "Mozilla/5.0 (compatible; MJ12bot/v1.4.7; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)" ] - } - , + }, { "pattern": "woriobot", "instances": [ "Mozilla/5.0 (compatible; woriobot +http://worio.com)", "Mozilla/5.0 (compatible; woriobot support [at] zite [dot] com +http://zite.com)" ] - } - , + }, { "pattern": "yanga", "instances": [ "Yanga WorldSearch Bot v1.1/beta (http://www.yanga.co.uk/)" ] - } - , + }, { "pattern": "buzzbot", "instances": [ "Buzzbot/1.0 (Buzzbot; http://www.buzzstream.com; buzzbot@buzzstream.com)" ] - } - , + }, { "pattern": "mlbot", "instances": [ "MLBot (www.metadatalabs.com/mlbot)" ] - } - , + }, { "pattern": "yandex\\.com\\/bots", "url": "https://yandex.ru/support/webmaster/robot-workings/check-yandex-robots.html#robot-in-logs", @@ -669,14 +609,12 @@ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexScreenshotBot/3.0; +http://yandex.com/bots)" ], "addition_date": "2015/04/14" - } - , + }, { "pattern": "purebot", "addition_date": "2010/01/19", "instances": [] - } - , + }, { "pattern": "Linguee Bot", "addition_date": "2010/01/26", @@ -685,8 +623,7 @@ "Linguee Bot (http://www.linguee.com/bot)", "Linguee Bot (http://www.linguee.com/bot; bot@linguee.com)" ] - } - , + }, { "pattern": "CyberPatrol", "addition_date": "2010/02/11", @@ -694,8 +631,7 @@ "instances": [ "CyberPatrol SiteCat Webbot (http://www.cyberpatrol.com/cyberpatrolcrawler.asp)" ] - } - , + }, { "pattern": "voilabot", "addition_date": "2010/05/18", @@ -703,8 +639,7 @@ "Mozilla/5.0 (Windows NT 5.1; U; Win64; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)", "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)" ] - } - , + }, { "pattern": "Baiduspider", "addition_date": "2010/07/15", @@ -713,14 +648,12 @@ "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)", "Mozilla/5.0 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)" ] - } - , + }, { "pattern": "citeseerxbot", "addition_date": "2010/07/17", "instances": [] - } - , + }, { "pattern": "spbot", "addition_date": "2010/07/31", @@ -761,15 +694,13 @@ "Mozilla/5.0 (compatible; spbot/5.0.3; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/5.0; +http://OpenLinkProfiler.org/bot )" ] - } - , + }, { "pattern": "twengabot", "addition_date": "2010/08/03", "url": "http://www.twenga.com/bot.html", "instances": [] - } - , + }, { "pattern": "postrank", "addition_date": "2010/08/03", @@ -778,8 +709,7 @@ "PostRank/2.0 (postrank.com)", "PostRank/2.0 (postrank.com; 1 subscribers)" ] - } - , + }, { "pattern": "Turnitin", "addition_date": "2010/09/26", @@ -788,15 +718,13 @@ "TurnitinBot (https://turnitin.com/robot/crawlerinfo.html)", "Turnitin (https://bit.ly/2UvnfoQ)" ] - } - , + }, { "pattern": "scribdbot", "addition_date": "2010/09/28", "url": "http://www.scribd.com", "instances": [] - } - , + }, { "pattern": "page2rss", "addition_date": "2010/10/07", @@ -804,8 +732,7 @@ "instances": [ "Mozilla/5.0 (compatible; Page2RSS/0.7; +http://page2rss.com/)" ] - } - , + }, { "pattern": "sitebot", "addition_date": "2010/12/15", @@ -813,8 +740,7 @@ "instances": [ "Mozilla/5.0 (compatible; Whoiswebsitebot/0.1; +http://www.whoiswebsite.net)" ] - } - , + }, { "pattern": "linkdex", "addition_date": "2011/01/06", @@ -828,14 +754,12 @@ "linkdex.com/v2.0", "linkdexbot/Nutch-1.0-dev (http://www.linkdex.com/; crawl at linkdex dot com)" ] - } - , + }, { "pattern": "Adidxbot", "url": "http://onlinehelp.microsoft.com/en-us/bing/hh204496.aspx", "instances": [] - } - , + }, { "pattern": "ezooms", "addition_date": "2011/04/27", @@ -843,8 +767,7 @@ "instances": [ "Mozilla/5.0 (compatible; Ezooms/1.0; ezooms.bot@gmail.com)" ] - } - , + }, { "pattern": "dotbot", "addition_date": "2011/04/27", @@ -852,8 +775,7 @@ "Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot, help@moz.com)", "dotbot" ] - } - , + }, { "pattern": "Mail\\.RU_Bot", "addition_date": "2011/04/27", @@ -863,8 +785,7 @@ "Mozilla/5.0 (compatible; Mail.RU_Bot/2.0; +http://go.mail.ru/", "Mozilla/5.0 (compatible; Linux x86_64; Mail.RU_Bot/Robots/2.0; +http://go.mail.ru/help/robots)" ] - } - , + }, { "pattern": "discobot", "addition_date": "2011/05/03", @@ -874,8 +795,7 @@ "Mozilla/5.0 (compatible; discobot/2.0; +http://discoveryengine.com/discobot.html)", "mozilla/5.0 (compatible; discobot/1.1; +http://discoveryengine.com/discobot.html)" ] - } - , + }, { "pattern": "heritrix", "addition_date": "2011/06/21", @@ -904,15 +824,13 @@ "Mozilla/5.0 (compatible; heritrix/3.3.0-SNAPSHOT-20160309-0050; UniLeipzigASV +http://corpora.informatik.uni-leipzig.de/crawler_faq.html)", "Mozilla/5.0 (compatible; sukibot_heritrix/3.1.1 +http://suki.ling.helsinki.fi/eng/webmasters.html)" ] - } - , + }, { "pattern": "findthatfile", "addition_date": "2011/06/21", "url": "http://www.findthatfile.com/", "instances": [] - } - , + }, { "pattern": "europarchive\\.org", "addition_date": "2011/06/21", @@ -920,8 +838,7 @@ "instances": [ "Mozilla/5.0 (compatible; MSIE 7.0 +http://www.europarchive.org)" ] - } - , + }, { "pattern": "NerdByNature\\.Bot", "addition_date": "2011/07/12", @@ -929,14 +846,12 @@ "instances": [ "Mozilla/5.0 (compatible; NerdByNature.Bot; http://www.nerdbynature.net/bot)" ] - } - , + }, { "pattern": "sistrix crawler", "addition_date": "2011/08/02", "instances": [] - } - , + }, { "pattern": "Ahrefs(Bot|SiteAudit)", "addition_date": "2011/08/28", @@ -949,104 +864,91 @@ "Mozilla/5.0 (compatible; AhrefsBot/6.1; News; +http://ahrefs.com/robot/)", "Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)" ] - } - , + }, { "pattern": "fuelbot", "addition_date": "2018/06/28", "instances": [ "fuelbot" ] - } - , + }, { "pattern": "CrunchBot", "addition_date": "2018/06/28", "instances": [ "CrunchBot/1.0 (+http://www.leadcrunch.com/crunchbot)" ] - } - , + }, { "pattern": "IndeedBot", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (Windows NT 6.1; rv:38.0) Gecko/20100101 Firefox/38.0 (IndeedBot 1.1)" ] - } - , + }, { "pattern": "mappydata", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; Mappy/1.0; +http://mappydata.net/bot/)" ] - } - , + }, { "pattern": "woobot", "addition_date": "2018/06/28", "instances": [ "woobot" ] - } - , + }, { "pattern": "ZoominfoBot", "addition_date": "2018/06/28", "instances": [ "ZoominfoBot (zoominfobot at zoominfo dot com)" ] - } - , + }, { "pattern": "PrivacyAwareBot", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; PrivacyAwareBot/1.1; +http://www.privacyaware.org)" ] - } - , + }, { "pattern": "Multiviewbot", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Multiviewbot" ] - } - , + }, { "pattern": "SWIMGBot", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36 SWIMGBot" ] - } - , + }, { "pattern": "Grobbot", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; Grobbot/2.2; +https://grob.it)" ] - } - , + }, { "pattern": "eright", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; eright/1.0; +bot@eright.com)" ] - } - , + }, { "pattern": "Apercite", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; Apercite; +http://www.apercite.fr/robot/index.html)" ] - } - , + }, { "pattern": "semanticbot", "addition_date": "2018/06/28", @@ -1054,8 +956,7 @@ "semanticbot", "semanticbot (info@semanticaudience.com)" ] - } - , + }, { "pattern": "Aboundex", "addition_date": "2011/09/28", @@ -1064,23 +965,20 @@ "Aboundex/0.2 (http://www.aboundex.com/crawler/)", "Aboundex/0.3 (http://www.aboundex.com/crawler/)" ] - } - , + }, { "pattern": "domaincrawler", "addition_date": "2011/10/21", "instances": [ "CipaCrawler/3.0 (info@domaincrawler.com; http://www.domaincrawler.com/www.example.com)" ] - } - , + }, { "pattern": "wbsearchbot", "addition_date": "2011/12/21", "url": "http://www.warebay.com/bot.html", "instances": [] - } - , + }, { "pattern": "summify", "addition_date": "2012/01/04", @@ -1088,8 +986,7 @@ "instances": [ "Summify (Summify/1.0.1; +http://summify.com)" ] - } - , + }, { "pattern": "CCBot", "addition_date": "2012/02/05", @@ -1098,14 +995,12 @@ "CCBot/2.0 (http://commoncrawl.org/faq/)", "CCBot/2.0 (https://commoncrawl.org/faq/)" ] - } - , + }, { "pattern": "edisterbot", "addition_date": "2012/02/25", "instances": [] - } - , + }, { "pattern": "SeznamBot", "addition_date": "2012/03/14", @@ -1117,36 +1012,31 @@ "Mozilla/5.0 (compatible; SeznamBot/3.2; +http://napoveda.seznam.cz/en/seznambot-intro/)", "Mozilla/5.0 (compatible; SeznamBot/4.0; +http://napoveda.seznam.cz/seznambot-intro/)" ] - } - , + }, { "pattern": "ec2linkfinder", "addition_date": "2012/03/22", "instances": [ "ec2linkfinder" ] - } - , + }, { "pattern": "gslfbot", "addition_date": "2012/04/03", "instances": [] - } - , + }, { "pattern": "aiHitBot", "addition_date": "2012/04/16", "instances": [ "Mozilla/5.0 (compatible; aiHitBot/2.9; +https://www.aihitdata.com/about)" ] - } - , + }, { "pattern": "intelium_bot", "addition_date": "2012/05/07", "instances": [] - } - , + }, { "pattern": "facebookexternalhit", "addition_date": "2012/05/07", @@ -1156,8 +1046,7 @@ "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)" ], "url": "https://developers.facebook.com/docs/sharing/webmasters/crawler/" - } - , + }, { "pattern": "Yeti", "addition_date": "2012/05/07", @@ -1165,22 +1054,19 @@ "instances": [ "Mozilla/5.0 (compatible; Yeti/1.1; +http://naver.me/bot)" ] - } - , + }, { "pattern": "RetrevoPageAnalyzer", "addition_date": "2012/05/07", "instances": [ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; RetrevoPageAnalyzer; +http://www.retrevo.com/content/about-us)" ] - } - , + }, { "pattern": "lb-spider", "addition_date": "2012/05/07", "instances": [] - } - , + }, { "pattern": "Sogou", "addition_date": "2012/05/13", @@ -1190,21 +1076,18 @@ "Sogou Pic Spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)", "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)" ] - } - , + }, { "pattern": "lssbot", "addition_date": "2012/05/15", "instances": [] - } - , + }, { "pattern": "careerbot", "addition_date": "2012/05/23", "url": "http://www.career-x.de/bot.html", "instances": [] - } - , + }, { "pattern": "wotbox", "addition_date": "2012/06/12", @@ -1213,15 +1096,13 @@ "Wotbox/2.0 (bot@wotbox.com; http://www.wotbox.com)", "Wotbox/2.01 (+http://www.wotbox.com/bot/)" ] - } - , + }, { "pattern": "wocbot", "addition_date": "2012/07/25", "url": "http://www.wocodi.com/crawler", "instances": [] - } - , + }, { "pattern": "ichiro", "addition_date": "2012/08/28", @@ -1243,8 +1124,7 @@ "ichiro/4.0 (http://help.goo.ne.jp/door/crawler.html)", "ichiro/5.0 (http://help.goo.ne.jp/door/crawler.html)" ] - } - , + }, { "pattern": "DuckDuckBot", "addition_date": "2012/09/19", @@ -1255,14 +1135,12 @@ "Mozilla/5.0 (compatible; DuckDuckBot-Https/1.1; https://duckduckgo.com/duckduckbot)", "'Mozilla/5.0 (compatible; DuckDuckBot-Https/1.1; https://duckduckgo.com/duckduckbot)'" ] - } - , + }, { "pattern": "lssrocketcrawler", "addition_date": "2012/09/24", "instances": [] - } - , + }, { "pattern": "drupact", "addition_date": "2012/09/27", @@ -1270,45 +1148,38 @@ "instances": [ "drupact/0.7; http://www.arocom.de/drupact" ] - } - , + }, { "pattern": "webcompanycrawler", "addition_date": "2012/10/03", "instances": [] - } - , + }, { "pattern": "acoonbot", "addition_date": "2012/10/07", "url": "http://www.acoon.de/robot.asp", "instances": [] - } - , + }, { "pattern": "openindexspider", "addition_date": "2012/10/26", "url": "http://www.openindex.io/en/webmasters/spider.html", "instances": [] - } - , + }, { "pattern": "gnam gnam spider", "addition_date": "2012/10/31", "instances": [] - } - , + }, { "pattern": "web-archive-net\\.com\\.bot", "instances": [] - } - , + }, { "pattern": "backlinkcrawler", "addition_date": "2013/01/04", "instances": [] - } - , + }, { "pattern": "coccoc", "addition_date": "2013/01/04", @@ -1326,8 +1197,7 @@ "coccoc/1.0 (http://help.coccoc.com/)", "coccoc/1.0 (http://help.coccoc.vn/)" ] - } - , + }, { "pattern": "integromedb", "addition_date": "2013/01/10", @@ -1335,20 +1205,17 @@ "instances": [ "www.integromedb.org/Crawler" ] - } - , + }, { "pattern": "content crawler spider", "addition_date": "2013/01/11", "instances": [] - } - , + }, { "pattern": "toplistbot", "addition_date": "2013/02/05", "instances": [] - } - , + }, { "pattern": "it2media-domain-crawler", "addition_date": "2013/03/12", @@ -1356,14 +1223,12 @@ "it2media-domain-crawler/1.0 on crawler-prod.it2media.de", "it2media-domain-crawler/2.0" ] - } - , + }, { "pattern": "ip-web-crawler\\.com", "addition_date": "2013/03/22", "instances": [] - } - , + }, { "pattern": "siteexplorer\\.info", "addition_date": "2013/05/01", @@ -1371,14 +1236,12 @@ "Mozilla/5.0 (compatible; SiteExplorer/1.0b; +http://siteexplorer.info/)", "Mozilla/5.0 (compatible; SiteExplorer/1.1b; +http://siteexplorer.info/Backlink-Checker-Spider/)" ] - } - , + }, { "pattern": "elisabot", "addition_date": "2013/06/27", "instances": [] - } - , + }, { "pattern": "proximic", "addition_date": "2013/09/12", @@ -1387,8 +1250,7 @@ "Mozilla/5.0 (compatible; proximic; +http://www.proximic.com)", "Mozilla/5.0 (compatible; proximic; +http://www.proximic.com/info/spider.php)" ] - } - , + }, { "pattern": "changedetection", "addition_date": "2013/09/13", @@ -1396,14 +1258,12 @@ "instances": [ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; http://www.changedetection.com/bot.html )" ] - } - , + }, { "pattern": "arabot", "addition_date": "2013/10/09", "instances": [] - } - , + }, { "pattern": "WeSEE:Search", "addition_date": "2013/11/18", @@ -1411,21 +1271,18 @@ "WeSEE:Search", "WeSEE:Search/0.1 (Alpha, http://www.wesee.com/en/support/bot/)" ] - } - , + }, { "pattern": "niki-bot", "addition_date": "2014/01/01", "instances": [] - } - , + }, { "pattern": "CrystalSemanticsBot", "addition_date": "2014/02/17", "url": "http://www.crystalsemantics.com/user-agent/", "instances": [] - } - , + }, { "pattern": "rogerbot", "addition_date": "2014/02/28", @@ -1445,8 +1302,7 @@ "rogerbot/1.1 (http://moz.com/help/guides/search-overview/crawl-diagnostics#more-help, rogerbot-crawler+pr4-crawler-15@moz.com)", "rogerbot/1.2 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-crawler+phaser-testing-crawler-01@moz.com)" ] - } - , + }, { "pattern": "360Spider", "addition_date": "2014/03/14", @@ -1463,8 +1319,7 @@ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0); 360Spider(compatible; HaosouSpider; http://www.haosou.com/help/help_3_2.html)", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36; 360Spider" ] - } - , + }, { "pattern": "psbot", "addition_date": "2014/03/31", @@ -1474,15 +1329,13 @@ "psbot-page (+http://www.picsearch.com/bot.html)", "psbot/0.1 (+http://www.picsearch.com/bot.html)" ] - } - , + }, { "pattern": "InterfaxScanBot", "addition_date": "2014/03/31", "url": "http://scan-interfax.ru", "instances": [] - } - , + }, { "pattern": "CC Metadata Scaper", "addition_date": "2014/04/01", @@ -1490,15 +1343,13 @@ "instances": [ "CC Metadata Scaper http://wiki.creativecommons.org/Metadata_Scraper" ] - } - , + }, { "pattern": "g00g1e\\.net", "addition_date": "2014/04/01", "url": "http://www.g00g1e.net/", "instances": [] - } - , + }, { "pattern": "GrapeshotCrawler", "addition_date": "2014/04/01", @@ -1506,8 +1357,7 @@ "instances": [ "Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)" ] - } - , + }, { "pattern": "urlappendbot", "addition_date": "2014/05/10", @@ -1515,22 +1365,19 @@ "instances": [ "Mozilla/5.0 (compatible; URLAppendBot/1.0; +http://www.profound.net/urlappendbot.html)" ] - } - , + }, { "pattern": "brainobot", "addition_date": "2014/06/24", "instances": [] - } - , + }, { "pattern": "fr-crawler", "addition_date": "2014/07/31", "instances": [ "Mozilla/5.0 (compatible; fr-crawler/1.1)" ] - } - , + }, { "pattern": "binlar", "addition_date": "2014/09/12", @@ -1541,16 +1388,14 @@ "binlar_2.6.3 phanendra_kalapala@McAfee.com", "binlar_2.6.3 test@mgmt.mic" ] - } - , + }, { "pattern": "SimpleCrawler", "addition_date": "2014/09/12", "instances": [ "SimpleCrawler/0.1" ] - } - , + }, { "pattern": "Twitterbot", "addition_date": "2014/09/12", @@ -1559,8 +1404,7 @@ "Twitterbot/0.1", "Twitterbot/1.0" ] - } - , + }, { "pattern": "cXensebot", "addition_date": "2014/10/05", @@ -1568,8 +1412,7 @@ "cXensebot/1.1a" ], "url": "http://www.cxense.com/bot.html" - } - , + }, { "pattern": "smtbot", "addition_date": "2014/10/04", @@ -1581,8 +1424,7 @@ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36 (compatible; SMTBot/1.0; http://www.similartech.com/smtbot)" ], "url": "http://www.similartech.com/smtbot" - } - , + }, { "pattern": "bnf\\.fr_bot", "addition_date": "2014/11/18", @@ -1591,8 +1433,7 @@ "Mozilla/5.0 (compatible; bnf.fr_bot; +http://bibnum.bnf.fr/robot/bnf.html)", "Mozilla/5.0 (compatible; bnf.fr_bot; +http://www.bnf.fr/fr/outils/a.dl_web_capture_robot.html)" ] - } - , + }, { "pattern": "A6-Indexer", "addition_date": "2014/12/05", @@ -1600,8 +1441,7 @@ "instances": [ "A6-Indexer" ] - } - , + }, { "pattern": "ADmantX", "addition_date": "2014/12/05", @@ -1609,8 +1449,7 @@ "instances": [ "ADmantX Platform Semantic Analyzer - ADmantX Inc. - www.admantx.com - support@admantx.com" ] - } - , + }, { "pattern": "Facebot", "url": "https://developers.facebook.com/docs/sharing/best-practices#crawl", @@ -1618,16 +1457,14 @@ "instances": [ "Facebot/1.0" ] - } - , + }, { "pattern": "OrangeBot\\/", "instances": [ "Mozilla/5.0 (compatible; OrangeBot/2.0; support.orangebot@orange.com" ], "addition_date": "2015/01/12" - } - , + }, { "pattern": "memorybot", "url": "http://mignify.com/bot.htm", @@ -1635,8 +1472,7 @@ "Mozilla/5.0 (compatible; memorybot/1.21.14 +http://mignify.com/bot.html)" ], "addition_date": "2015/02/01" - } - , + }, { "pattern": "AdvBot", "url": "http://advbot.net/bot.html", @@ -1644,8 +1480,7 @@ "Mozilla/5.0 (compatible; AdvBot/2.0; +http://advbot.net/bot.html)" ], "addition_date": "2015/02/01" - } - , + }, { "pattern": "MegaIndex", "url": "https://www.megaindex.ru/?tab=linkAnalyze", @@ -1654,8 +1489,7 @@ "Mozilla/5.0 (compatible; MegaIndex.ru/2.0; +http://megaindex.com/crawler)" ], "addition_date": "2015/03/28" - } - , + }, { "pattern": "SemanticScholarBot", "url": "https://www.semanticscholar.org/crawler", @@ -1664,8 +1498,7 @@ "Mozilla/5.0 (compatible) SemanticScholarBot (+https://www.semanticscholar.org/crawler)" ], "addition_date": "2015/03/28" - } - , + }, { "pattern": "ltx71", "url": "http://ltx71.com/", @@ -1673,8 +1506,7 @@ "ltx71 - (http://ltx71.com/)" ], "addition_date": "2015/04/04" - } - , + }, { "pattern": "nerdybot", "url": "http://nerdybot.com/", @@ -1682,8 +1514,7 @@ "nerdybot" ], "addition_date": "2015/04/05" - } - , + }, { "pattern": "xovibot", "url": "http://www.xovibot.net/", @@ -1691,8 +1522,7 @@ "Mozilla/5.0 (compatible; XoviBot/2.0; +http://www.xovibot.net/)" ], "addition_date": "2015/04/05" - } - , + }, { "pattern": "BUbiNG", "url": "http://law.di.unimi.it/BUbiNG.html", @@ -1700,8 +1530,7 @@ "BUbiNG (+http://law.di.unimi.it/BUbiNG.html)" ], "addition_date": "2015/04/06" - } - , + }, { "pattern": "Qwantify", "url": "https://www.qwant.com/", @@ -1712,12 +1541,13 @@ "Mozilla/5.0 (compatible; Qwantify/Bleriot/1.2.1; +https://help.qwant.com/bot)" ], "addition_date": "2015/04/06" - } - , + }, { "pattern": "archive\\.org_bot", "url": "http://www.archive.org/details/archive.org_bot", - "depends_on": ["heritrix"], + "depends_on": [ + "heritrix" + ], "instances": [ "Mozilla/5.0 (compatible; heritrix/3.1.1-SNAPSHOT-20120116.200628 +http://www.archive.org/details/archive.org_bot)", "Mozilla/5.0 (compatible; archive.org_bot/heritrix-1.15.4 +http://www.archive.org)", @@ -1727,8 +1557,7 @@ "Mozilla/5.0 (compatible; special_archiver/3.1.1 +http://www.archive.org/details/archive.org_bot)" ], "addition_date": "2015/04/14" - } - , + }, { "pattern": "Applebot", "url": "http://www.apple.com/go/applebot", @@ -1740,8 +1569,7 @@ "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Applebot/0.3; +http://www.apple.com/go/applebot)", "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1; +http://www.apple.com/go/applebot)" ] - } - , + }, { "pattern": "TweetmemeBot", "url": "http://datasift.com/bot.html", @@ -1749,8 +1577,7 @@ "Mozilla/5.0 (TweetmemeBot/4.0; +http://datasift.com/bot.html) Gecko/20100101 Firefox/31.0" ], "addition_date": "2015/04/15" - } - , + }, { "pattern": "crawler4j", "url": "https://github.com/yasserg/crawler4j", @@ -1759,8 +1586,7 @@ "crawler4j (https://github.com/yasserg/crawler4j/)" ], "addition_date": "2015/05/07" - } - , + }, { "pattern": "findxbot", "url": "http://www.findxbot.com", @@ -1768,8 +1594,7 @@ "Mozilla/5.0 (compatible; Findxbot/1.0; +http://www.findxbot.com)" ], "addition_date": "2015/05/07" - } - , + }, { "pattern": "S[eE][mM]rushBot", "url": "http://www.semrush.com/bot.html", @@ -1784,8 +1609,7 @@ "SEMrushBot" ], "addition_date": "2015/05/26" - } - , + }, { "pattern": "yoozBot", "url": "http://yooz.ir", @@ -1793,8 +1617,7 @@ "Mozilla/5.0 (compatible; yoozBot-2.2; http://yooz.ir; info@yooz.ir)" ], "addition_date": "2015/05/26" - } - , + }, { "pattern": "lipperhey", "url": "http://www.lipperhey.com/", @@ -1805,8 +1628,7 @@ "Mozilla/5.0 (compatible; Lipperhey-Kaus-Australis/5.0; +https://www.lipperhey.com/en/about/)" ], "addition_date": "2015/08/26" - } - , + }, { "pattern": "Y!J", "url": "https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/%E3%82%A6%E3%82%A7%E3%83%96%E3%83%9A%E3%83%BC%E3%82%B8%E3%81%AB%E3%82%A2%E3%82%AF%E3%82%BB%E3%82%B9%E3%81%99%E3%82%8B%E3%82%B7%E3%82%B9%E3%83%86%E3%83%A0%E3%81%AE%E3%83%A6%E3%83%BC%E3%82%B6%E3%83%BC%E3%82%A8%E3%83%BC%E3%82%B8%E3%82%A7%E3%83%B3%E3%83%88%E3%81%AB%E3%81%A4%E3%81%84%E3%81%A6", @@ -1819,8 +1641,7 @@ "Mozilla/5.0 (compatible; Y!J SearchMonkey/1.0 (Y!J-AGENT; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html))" ], "addition_date": "2015/05/26" - } - , + }, { "pattern": "Domain Re-Animator Bot", "url": "http://domainreanimator.com", @@ -1828,8 +1649,7 @@ "Domain Re-Animator Bot (http://domainreanimator.com) - support@domainreanimator.com" ], "addition_date": "2015/04/14" - } - , + }, { "pattern": "AddThis", "url": "https://www.addthis.com", @@ -1837,8 +1657,7 @@ "AddThis.com robot tech.support@clearspring.com" ], "addition_date": "2015/06/02" - } - , + }, { "pattern": "Screaming Frog SEO Spider", "url": "http://www.screamingfrog.co.uk/seo-spider", @@ -1846,8 +1665,7 @@ "Screaming Frog SEO Spider/5.1" ], "addition_date": "2016/01/08" - } - , + }, { "pattern": "MetaURI", "url": "http://www.useragentstring.com/MetaURI_id_17683.php", @@ -1855,8 +1673,7 @@ "MetaURI API/2.0 +metauri.com" ], "addition_date": "2016/01/02" - } - , + }, { "pattern": "Scrapy", "url": "http://scrapy.org/", @@ -1864,8 +1681,7 @@ "Scrapy/1.0.3 (+http://scrapy.org)" ], "addition_date": "2016/01/02" - } - , + }, { "pattern": "Livelap[bB]ot", "url": "http://site.livelap.com/crawler", @@ -1874,8 +1690,7 @@ "Livelapbot/0.1" ], "addition_date": "2016/01/02" - } - , + }, { "pattern": "OpenHoseBot", "url": "http://www.openhose.org/bot.html", @@ -1883,8 +1698,7 @@ "Mozilla/5.0 (compatible; OpenHoseBot/2.1; +http://www.openhose.org/bot.html)" ], "addition_date": "2016/01/02" - } - , + }, { "pattern": "CapsuleChecker", "url": "http://www.capsulink.com/about", @@ -1892,8 +1706,7 @@ "CapsuleChecker (http://www.capsulink.com/)" ], "addition_date": "2016/01/02" - } - , + }, { "pattern": "collection@infegy\\.com", "url": "http://infegy.com/", @@ -1901,8 +1714,7 @@ "Mozilla/5.0 (compatible) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36 collection@infegy.com" ], "addition_date": "2016/01/03" - } - , + }, { "pattern": "IstellaBot", "url": "http://www.tiscali.it/", @@ -1910,8 +1722,7 @@ "Mozilla/5.0 (compatible; IstellaBot/1.23.15 +http://www.tiscali.it/)" ], "addition_date": "2016/01/09" - } - , + }, { "pattern": "DeuSu\\/", "addition_date": "2016/01/23", @@ -1920,14 +1731,12 @@ "Mozilla/5.0 (compatible; DeuSu/0.1.0; +https://deusu.org)", "Mozilla/5.0 (compatible; DeuSu/5.0.2; +https://deusu.de/robot.html)" ] - } - , + }, { "pattern": "betaBot", "addition_date": "2016/01/23", "instances": [] - } - , + }, { "pattern": "Cliqzbot\\/", "addition_date": "2016/01/23", @@ -1939,8 +1748,7 @@ "Mozilla/5.0 (compatible; Cliqzbot/0.1 +http://cliqz.com/company/cliqzbot)", "Mozilla/5.0 (compatible; Cliqzbot/1.0 +http://cliqz.com/company/cliqzbot)" ] - } - , + }, { "pattern": "MojeekBot\\/", "addition_date": "2016/01/23", @@ -1953,8 +1761,7 @@ "Mozilla/5.0 (compatible; MojeekBot/0.6; +https://www.mojeek.com/bot.html)", "Mozilla/5.0 (compatible; MojeekBot/0.6; http://www.mojeek.com/bot.html)" ] - } - , + }, { "pattern": "netEstate NE Crawler", "addition_date": "2016/01/23", @@ -1963,8 +1770,7 @@ "netEstate NE Crawler (+http://www.sengine.info/)", "netEstate NE Crawler (+http://www.website-datenbank.de/)" ] - } - , + }, { "pattern": "SafeSearch microdata crawler", "addition_date": "2016/01/23", @@ -1972,8 +1778,7 @@ "instances": [ "SafeSearch microdata crawler (https://safesearch.avira.com, safesearch-abuse@avira.com)" ] - } - , + }, { "pattern": "Gluten Free Crawler\\/", "addition_date": "2016/01/23", @@ -1981,8 +1786,7 @@ "instances": [ "Mozilla/5.0 (compatible; Gluten Free Crawler/1.0; +http://glutenfreepleasure.com/)" ] - } - , + }, { "pattern": "Sonic", "addition_date": "2016/02/08", @@ -1992,8 +1796,7 @@ "Mozilla/5.0 (compatible; Sonic/1.0; http://www.yama.info.waseda.ac.jp/~crawler/info.html)", "Mozzila/5.0 (compatible; Sonic/1.0; http://www.yama.info.waseda.ac.jp/~crawler/info.html)" ] - } - , + }, { "pattern": "Sysomos", "addition_date": "2016/02/08", @@ -2001,15 +1804,13 @@ "instances": [ "Mozilla/5.0 (compatible; Sysomos/1.0; +http://www.sysomos.com/; Sysomos)" ] - } - , + }, { "pattern": "Trove", "addition_date": "2016/02/08", "url": "http://www.trove.com", "instances": [] - } - , + }, { "pattern": "deadlinkchecker", "addition_date": "2016/02/08", @@ -2019,8 +1820,7 @@ "www.deadlinkchecker.com XMLHTTP/1.0", "www.deadlinkchecker.com XMLHTTP/1.0 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36" ] - } - , + }, { "pattern": "Slack-ImgProxy", "addition_date": "2016/04/25", @@ -2033,8 +1833,7 @@ "Slack-ImgProxy 1.138 (+https://api.slack.com/robots)", "Slack-ImgProxy 149 (+https://api.slack.com/robots)" ] - } - , + }, { "pattern": "Embedly", "addition_date": "2016/04/25", @@ -2044,8 +1843,7 @@ "Mozilla/5.0 (compatible; Embedly/0.2; +http://support.embed.ly/)", "Mozilla/5.0 (compatible; Embedly/0.2; snap; +http://support.embed.ly/)" ] - } - , + }, { "pattern": "RankActiveLinkBot", "addition_date": "2016/06/20", @@ -2053,8 +1851,7 @@ "instances": [ "Mozilla/5.0 (compatible; RankActiveLinkBot; +https://rankactive.com/resources/rankactive-linkbot)" ] - } - , + }, { "pattern": "iskanie", "addition_date": "2016/09/02", @@ -2062,8 +1859,7 @@ "instances": [ "iskanie (+http://www.iskanie.com)" ] - } - , + }, { "pattern": "SafeDNSBot", "addition_date": "2016/09/10", @@ -2071,16 +1867,14 @@ "instances": [ "SafeDNSBot (https://www.safedns.com/searchbot)" ] - } - , + }, { "pattern": "SkypeUriPreview", "addition_date": "2016/10/10", "instances": [ "Mozilla/5.0 (Windows NT 6.1; WOW64) SkypeUriPreview Preview/0.5" ] - } - , + }, { "pattern": "Veoozbot", "addition_date": "2016/11/03", @@ -2088,8 +1882,7 @@ "instances": [ "Mozilla/5.0 (compatible; Veoozbot/1.0; +http://www.veooz.com/veoozbot.html)" ] - } - , + }, { "pattern": "Slackbot", "addition_date": "2016/11/03", @@ -2099,8 +1892,7 @@ "Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)", "Slackbot 1.0 (+https://api.slack.com/robots)" ] - } - , + }, { "pattern": "redditbot", "addition_date": "2016/11/03", @@ -2108,8 +1900,7 @@ "instances": [ "Mozilla/5.0 (compatible; redditbot/1.0; +http://www.reddit.com/feedback)" ] - } - , + }, { "pattern": "datagnionbot", "addition_date": "2016/11/03", @@ -2117,8 +1908,7 @@ "instances": [ "datagnionbot (+http://www.datagnion.com/bot.html)" ] - } - , + }, { "pattern": "Google-Adwords-Instant", "addition_date": "2016/11/03", @@ -2126,8 +1916,7 @@ "instances": [ "Google-Adwords-Instant (+http://www.google.com/adsbot.html)" ] - } - , + }, { "pattern": "adbeat_bot", "addition_date": "2016/11/04", @@ -2135,8 +1924,7 @@ "Mozilla/5.0 (compatible; adbeat_bot; +support@adbeat.com; support@adbeat.com)", "adbeat_bot" ] - } - , + }, { "pattern": "WhatsApp", "addition_date": "2016/11/15", @@ -2168,16 +1956,14 @@ "WhatsApp/2.19.308 A", "WhatsApp/2.19.330 A" ] - } - , + }, { "pattern": "contxbot", "addition_date": "2017/02/25", "instances": [ "Mozilla/5.0 (compatible;contxbot/1.0)" ] - } - , + }, { "pattern": "pinterest\\.com\\/bot", "addition_date": "2017/03/03", @@ -2186,8 +1972,7 @@ "Pinterest/0.2 (+http://www.pinterest.com/bot.html)" ], "url": "http://www.pinterest.com/bot.html" - } - , + }, { "pattern": "electricmonk", "addition_date": "2017/03/04", @@ -2195,8 +1980,7 @@ "Mozilla/5.0 (compatible; electricmonk/3.2.0 +https://www.duedil.com/our-crawler/)" ], "url": "https://www.duedil.com/our-crawler/" - } - , + }, { "pattern": "GarlikCrawler", "addition_date": "2017/03/18", @@ -2204,8 +1988,7 @@ "GarlikCrawler/1.2 (http://garlik.com/, crawler@garlik.com)" ], "url": "http://garlik.com/" - } - , + }, { "pattern": "BingPreview\\/", "addition_date": "2017/04/23", @@ -2217,8 +2000,7 @@ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; WOW64; Trident/5.0; BingPreview/1.0b)", "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 BingPreview/1.0b" ] - } - , + }, { "pattern": "vebidoobot", "addition_date": "2017/05/08", @@ -2226,8 +2008,7 @@ "Mozilla/5.0 (compatible; vebidoobot/1.0; +https://blog.vebidoo.de/vebidoobot/" ], "url": "https://blog.vebidoo.de/vebidoobot/" - } - , + }, { "pattern": "FemtosearchBot", "addition_date": "2017/05/16", @@ -2235,8 +2016,7 @@ "Mozilla/5.0 (compatible; FemtosearchBot/1.0; http://femtosearch.com)" ], "url": "http://femtosearch.com" - } - , + }, { "pattern": "Yahoo Link Preview", "addition_date": "2017/06/28", @@ -2244,8 +2024,7 @@ "Mozilla/5.0 (compatible; Yahoo Link Preview; https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html)" ], "url": "https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html" - } - , + }, { "pattern": "MetaJobBot", "addition_date": "2017/08/16", @@ -2253,8 +2032,7 @@ "Mozilla/5.0 (compatible; MetaJobBot; http://www.metajob.de/crawler)" ], "url": "http://www.metajob.de/the/crawler" - } - , + }, { "pattern": "DomainStatsBot", "addition_date": "2017/08/16", @@ -2262,8 +2040,7 @@ "DomainStatsBot/1.0 (http://domainstats.io/our-bot)" ], "url": "http://domainstats.io/our-bot" - } - , + }, { "pattern": "mindUpBot", "addition_date": "2017/08/16", @@ -2271,8 +2048,7 @@ "mindUpBot (datenbutler.de)" ], "url": "http://www.datenbutler.de/" - } - , + }, { "pattern": "Daum\\/", "addition_date": "2017/08/16", @@ -2280,8 +2056,7 @@ "Mozilla/5.0 (compatible; Daum/4.1; +http://cs.daum.net/faq/15/4118.html?faqId=28966)" ], "url": "http://cs.daum.net/faq/15/4118.html?faqId=28966" - } - , + }, { "pattern": "Jugendschutzprogramm-Crawler", "addition_date": "2017/08/16", @@ -2289,8 +2064,7 @@ "Jugendschutzprogramm-Crawler; Info: http://www.jugendschutzprogramm.de" ], "url": "http://www.jugendschutzprogramm.de" - } - , + }, { "pattern": "Xenu Link Sleuth", "addition_date": "2017/08/19", @@ -2298,8 +2072,7 @@ "Xenu Link Sleuth/1.3.8" ], "url": "http://home.snafu.de/tilman/xenulink.html" - } - , + }, { "pattern": "Pcore-HTTP", "addition_date": "2017/08/19", @@ -2308,8 +2081,7 @@ "Pcore-HTTP/v0.44.0" ], "url": "https://bitbucket.org/softvisio/pcore/overview" - } - , + }, { "pattern": "moatbot", "addition_date": "2017/09/16", @@ -2318,8 +2090,7 @@ "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4 moatbot" ], "url": "https://moat.com" - } - , + }, { "pattern": "KosmioBot", "addition_date": "2017/09/16", @@ -2327,8 +2098,7 @@ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.125 Safari/537.36 (compatible; KosmioBot/1.0; +http://kosm.io/bot.html)" ], "url": "http://kosm.io/bot.html" - } - , + }, { "pattern": "[pP]ingdom", "addition_date": "2017/09/16", @@ -2344,8 +2114,7 @@ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/61.0.3163.100 Chrome/61.0.3163.100 Safari/537.36 PingdomPageSpeed/1.0 (pingbot/2.0; http://www.pingdom.com/)" ], "url": "http://www.pingdom.com" - } - , + }, { "pattern": "AppInsights", "addition_date": "2019/03/09", @@ -2353,8 +2122,7 @@ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; AppInsights)" ], "url": "https://docs.microsoft.com/en-us/azure/azure-monitor/app/app-insights-overview" - } - , + }, { "pattern": "PhantomJS", "addition_date": "2017/09/18", @@ -2362,8 +2130,7 @@ "Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1 bl.uk_lddc_renderbot/2.0.0 (+ http://www.bl.uk/aboutus/legaldeposit/websites/websites/faqswebmaster/index.html)" ], "url": "http://phantomjs.org/" - } - , + }, { "pattern": "Gowikibot", "addition_date": "2017/10/26", @@ -2371,8 +2138,7 @@ "Mozilla/5.0 (compatible; Gowikibot/1.0; +http://www.gowikibot.com)" ], "url": "http://www.gowikibot.com" - } - , + }, { "pattern": "PiplBot", "addition_date": "2017/10/30", @@ -2381,8 +2147,7 @@ "Mozilla/5.0+(compatible;+PiplBot;+http://www.pipl.com/bot/)" ], "url": "http://www.pipl.com/bot/" - } - , + }, { "pattern": "Discordbot", "addition_date": "2017/09/22", @@ -2390,16 +2155,14 @@ "instances": [ "Mozilla/5.0 (compatible; Discordbot/2.0; +https://discordapp.com)" ] - } - , + }, { "pattern": "TelegramBot", "addition_date": "2017/10/01", "instances": [ "TelegramBot (like TwitterBot)" ] - } - , + }, { "pattern": "Jetslide", "addition_date": "2017/09/27", @@ -2407,8 +2170,7 @@ "instances": [ "Mozilla/5.0 (compatible; Jetslide; +http://jetsli.de/crawler)" ] - } - , + }, { "pattern": "newsharecounts", "addition_date": "2017/09/30", @@ -2416,8 +2178,7 @@ "instances": [ "Mozilla/5.0 (compatible; NewShareCounts.com/1.0; +http://newsharecounts.com/crawler)" ] - } - , + }, { "pattern": "James BOT", "addition_date": "2017/10/12", @@ -2425,8 +2186,7 @@ "instances": [ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6 - James BOT - WebCrawler http://cognitiveseo.com/bot.html" ] - } - , + }, { "pattern": "Bark[rR]owler", "addition_date": "2017/10/09", @@ -2437,8 +2197,7 @@ "BarkRowler/0.7 (+http://www.exensa.com/crawling)", "Barkrowler/0.9 (+http://www.exensa.com/crawl)" ] - } - , + }, { "pattern": "TinEye", "addition_date": "2017/10/14", @@ -2447,8 +2206,7 @@ "Mozilla/5.0 (compatible; TinEye-bot/1.31; +http://www.tineye.com/crawler.html)", "TinEye/1.1 (http://tineye.com/crawler.html)" ] - } - , + }, { "pattern": "SocialRankIOBot", "addition_date": "2017/10/19", @@ -2456,8 +2214,7 @@ "instances": [ "SocialRankIOBot; http://socialrank.io/about" ] - } - , + }, { "pattern": "trendictionbot", "addition_date": "2017/10/30", @@ -2466,16 +2223,14 @@ "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.0; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20071127 Firefox/3.0.0.11", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20170101 Firefox/67.0" ] - } - , + }, { "pattern": "Ocarinabot", "addition_date": "2017/09/27", "instances": [ "Ocarinabot" ] - } - , + }, { "pattern": "epicbot", "addition_date": "2017/10/31", @@ -2483,8 +2238,7 @@ "instances": [ "Mozilla/5.0 (compatible; epicbot; +http://www.epictions.com/epicbot)" ] - } - , + }, { "pattern": "Primalbot", "addition_date": "2017/09/27", @@ -2492,8 +2246,7 @@ "instances": [ "Mozilla/5.0 (compatible; Primalbot; +https://www.primal.com;)" ] - } - , + }, { "pattern": "DuckDuckGo-Favicons-Bot", "addition_date": "2017/10/06", @@ -2501,8 +2254,7 @@ "instances": [ "Mozilla/5.0 (compatible; DuckDuckGo-Favicons-Bot/1.0; +http://duckduckgo.com)" ] - } - , + }, { "pattern": "GnowitNewsbot", "addition_date": "2017/10/30", @@ -2510,8 +2262,7 @@ "instances": [ "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:49.0) Gecko/20100101 Firefox/49.0 / GnowitNewsbot / Contact information at http://www.gnowit.com" ] - } - , + }, { "pattern": "Leikibot", "addition_date": "2017/09/24", @@ -2519,16 +2270,14 @@ "instances": [ "Mozilla/5.0 (Windows NT 6.3;compatible; Leikibot/1.0; +http://www.leiki.com)" ] - } - , + }, { "pattern": "LinkArchiver", "addition_date": "2017/09/24", "instances": [ "@LinkArchiver twitter bot" ] - } - , + }, { "pattern": "YaK\\/", "addition_date": "2017/09/25", @@ -2536,8 +2285,7 @@ "instances": [ "Mozilla/5.0 (compatible; YaK/1.0; http://linkfluence.com/; bot@linkfluence.com)" ] - } - , + }, { "pattern": "PaperLiBot", "addition_date": "2017/09/25", @@ -2545,10 +2293,8 @@ "instances": [ "Mozilla/5.0 (compatible; PaperLiBot/2.1; http://support.paper.li/entries/20023257-what-is-paper-li)", "Mozilla/5.0 (compatible; PaperLiBot/2.1; https://support.paper.li/entries/20023257-what-is-paper-li)" - ] - } - , + }, { "pattern": "Digg Deeper", "addition_date": "2017/09/26", @@ -2556,16 +2302,14 @@ "instances": [ "Digg Deeper/v1 (http://digg.com/about)" ] - } - , + }, { "pattern": "dcrawl", "addition_date": "2017/09/22", "instances": [ "dcrawl/1.0" ] - } - , + }, { "pattern": "Snacktory", "addition_date": "2017/09/23", @@ -2573,8 +2317,7 @@ "instances": [ "Mozilla/5.0 (compatible; Snacktory; +https://github.com/karussell/snacktory)" ] - } - , + }, { "pattern": "AndersPinkBot", "addition_date": "2017/09/24", @@ -2582,16 +2325,14 @@ "instances": [ "Mozilla/5.0 (compatible; AndersPinkBot/1.0; +http://anderspink.com/bot.html)" ] - } - , + }, { "pattern": "Fyrebot", "addition_date": "2017/09/22", "instances": [ "Fyrebot/1.0" ] - } - , + }, { "pattern": "EveryoneSocialBot", "addition_date": "2017/09/22", @@ -2599,8 +2340,7 @@ "instances": [ "Mozilla/5.0 (compatible; EveryoneSocialBot/1.0; support@everyonesocial.com http://everyonesocial.com/)" ] - } - , + }, { "pattern": "Mediatoolkitbot", "addition_date": "2017/10/06", @@ -2608,16 +2348,14 @@ "instances": [ "Mediatoolkitbot (complaints@mediatoolkit.com)" ] - } - , + }, { "pattern": "Luminator-robots", "addition_date": "2017/09/22", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/30.0.1599.66 Safari/537.13 Luminator-robots/2.0" ] - } - , + }, { "pattern": "ExtLinksBot", "addition_date": "2017/11/02", @@ -2625,24 +2363,21 @@ "instances": [ "Mozilla/5.0 (compatible; ExtLinksBot/1.5 +https://extlinks.com/Bot.html)" ] - } - , + }, { "pattern": "SurveyBot", "addition_date": "2017/11/02", "instances": [ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en; rv:1.9.0.13) Gecko/2009073022 Firefox/3.5.2 (.NET CLR 3.5.30729) SurveyBot/2.3 (DomainTools)" ] - } - , + }, { "pattern": "NING\\/", "addition_date": "2017/11/02", "instances": [ "NING/1.0" ] - } - , + }, { "pattern": "okhttp", "addition_date": "2017/11/02", @@ -2653,16 +2388,14 @@ "okhttp/3.5.0", "okhttp/4.1.0" ] - } - , + }, { "pattern": "Nuzzel", "addition_date": "2017/11/02", "instances": [ "Nuzzel" ] - } - , + }, { "pattern": "omgili", "addition_date": "2017/11/02", @@ -2670,8 +2403,7 @@ "instances": [ "omgili/0.5 +http://omgili.com" ] - } - , + }, { "pattern": "PocketParser", "addition_date": "2017/11/02", @@ -2679,8 +2411,7 @@ "instances": [ "PocketParser/2.0 (+https://getpocket.com/pocketparser_ua)" ] - } - , + }, { "pattern": "YisouSpider", "addition_date": "2017/11/02", @@ -2688,16 +2419,14 @@ "YisouSpider", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 YisouSpider/5.0 Safari/537.36" ] - } - , + }, { "pattern": "um-LN", "addition_date": "2017/11/02", "instances": [ "Mozilla/5.0 (compatible; um-LN/1.0; mailto: techinfo@ubermetrics-technologies.com)" ] - } - , + }, { "pattern": "ToutiaoSpider", "addition_date": "2017/11/02", @@ -2705,8 +2434,7 @@ "instances": [ "Mozilla/5.0 (compatible; ToutiaoSpider/1.0; http://web.toutiao.com/media_cooperation/;)" ] - } - , + }, { "pattern": "MuckRack", "addition_date": "2017/11/02", @@ -2714,8 +2442,7 @@ "instances": [ "Mozilla/5.0 (compatible; MuckRack/1.0; +http://muckrack.com)" ] - } - , + }, { "pattern": "Jamie's Spider", "addition_date": "2017/11/02", @@ -2723,32 +2450,28 @@ "instances": [ "Jamie's Spider (http://jamiembrown.com/)" ] - } - , + }, { "pattern": "AHC\\/", "addition_date": "2017/11/02", "instances": [ "AHC/2.0" ] - } - , + }, { "pattern": "NetcraftSurveyAgent", "addition_date": "2017/11/02", "instances": [ "Mozilla/5.0 (compatible; NetcraftSurveyAgent/1.0; +info@netcraft.com)" ] - } - , + }, { "pattern": "Laserlikebot", "addition_date": "2017/11/02", "instances": [ "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Laserlikebot/0.1)" ] - } - , + }, { "pattern": "^Apache-HttpClient", "addition_date": "2017/11/02", @@ -2768,8 +2491,7 @@ "Apache-HttpClient/4.5.7 (Java/11.0.3)", "Apache-HttpClient/4.5.10 (Java/1.8.0_201)" ] - } - , + }, { "pattern": "AppEngine-Google", "addition_date": "2017/11/02", @@ -2777,24 +2499,21 @@ "AppEngine-Google; (+http://code.google.com/appengine; appid: example)", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36 AppEngine-Google; (+http://code.google.com/appengine; appid: s~feedly-nikon3)" ] - } - , + }, { "pattern": "Jetty", "addition_date": "2017/11/02", "instances": [ "Jetty/9.3.z-SNAPSHOT" ] - } - , + }, { "pattern": "Upflow", "addition_date": "2017/11/02", "instances": [ "Upflow/1.0" ] - } - , + }, { "pattern": "Thinklab", "addition_date": "2017/11/02", @@ -2802,8 +2521,7 @@ "instances": [ "Thinklab (thinklab.com)" ] - } - , + }, { "pattern": "Traackr\\.com", "addition_date": "2017/11/02", @@ -2811,8 +2529,7 @@ "instances": [ "Traackr.com" ] - } - , + }, { "pattern": "Twurly", "addition_date": "2017/11/02", @@ -2820,56 +2537,49 @@ "instances": [ "Ruby, Twurly v1.1 (http://twurly.org)" ] - } - , + }, { "pattern": "Mastodon", "addition_date": "2017/11/02", "instances": [ "http.rb/2.2.2 (Mastodon/1.5.1; +https://example-masto-instance.org/)" ] - } - , + }, { "pattern": "http_get", "addition_date": "2017/11/02", "instances": [ "http_get" ] - } - , + }, { "pattern": "DnyzBot", "addition_date": "2017/11/20", "instances": [ "Mozilla/5.0 (compatible; DnyzBot/1.0)" ] - } - , + }, { "pattern": "botify", "addition_date": "2018/02/01", "instances": [ "Mozilla/5.0 (compatible; botify; http://botify.com)" ] - } - , + }, { "pattern": "007ac9 Crawler", "addition_date": "2018/02/09", "instances": [ "Mozilla/5.0 (compatible; 007ac9 Crawler; http://crawler.007ac9.net/)" ] - } - , + }, { "pattern": "BehloolBot", "addition_date": "2018/02/09", "instances": [ "Mozilla/5.0 (compatible; BehloolBot/beta; +http://www.webeaver.com/bot)" ] - } - , + }, { "pattern": "BrandVerity", "addition_date": "2018/02/27", @@ -2878,16 +2588,14 @@ "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Mobile/11A465 Twitter for iPhone BrandVerity/1.0 (http://www.brandverity.com/why-is-brandverity-visiting-me)" ], "url": "http://www.brandverity.com/why-is-brandverity-visiting-me" - } - , + }, { "pattern": "check_http", "addition_date": "2018/02/09", "instances": [ "check_http/v2.2.1 (nagios-plugins 2.2.1)" ] - } - , + }, { "pattern": "BDCbot", "addition_date": "2018/02/09", @@ -2895,24 +2603,21 @@ "Mozilla/5.0 (Windows NT 6.1; compatible; BDCbot/1.0; +http://bigweb.bigdatacorp.com.br/faq.aspx) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; BDCbot/1.0; +http://bigweb.bigdatacorp.com.br/faq.aspx) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36" ] - } - , + }, { "pattern": "ZumBot", "addition_date": "2018/02/09", "instances": [ "Mozilla/5.0 (compatible; ZumBot/1.0; http://help.zum.com/inquiry)" ] - } - , + }, { "pattern": "EZID", "addition_date": "2018/02/09", "instances": [ "EZID (EZID link checker; https://ezid.cdlib.org/)" ] - } - , + }, { "pattern": "ICC-Crawler", "addition_date": "2018/02/28", @@ -2920,8 +2625,7 @@ "ICC-Crawler/2.0 (Mozilla-compatible; ; http://ucri.nict.go.jp/en/icccrawler.html)" ], "url": "http://ucri.nict.go.jp/en/icccrawler.html" - } - , + }, { "pattern": "ArchiveBot", "addition_date": "2018/02/28", @@ -2929,8 +2633,7 @@ "ArchiveTeam ArchiveBot/20170106.02 (wpull 2.0.2)" ], "url": "https://github.com/ArchiveTeam/ArchiveBot" - } - , + }, { "pattern": "^LCC ", "addition_date": "2018/02/28", @@ -2938,8 +2641,7 @@ "LCC (+http://corpora.informatik.uni-leipzig.de/crawler_faq.html)" ], "url": "http://corpora.informatik.uni-leipzig.de/crawler_faq.html" - } - , + }, { "pattern": "filterdb\\.iss\\.net\\/crawler", "addition_date": "2018/03/16", @@ -2947,16 +2649,14 @@ "Mozilla/5.0 (compatible; oBot/2.3.1; +http://filterdb.iss.net/crawler/)" ], "url": "http://filterdb.iss.net/crawler/" - } - , + }, { "pattern": "BLP_bbot", "addition_date": "2018/03/27", "instances": [ "BLP_bbot/0.1" ] - } - , + }, { "pattern": "BomboraBot", "addition_date": "2018/03/27", @@ -2964,8 +2664,7 @@ "Mozilla/5.0 (compatible; BomboraBot/1.0; +http://www.bombora.com/bot)" ], "url": "http://www.bombora.com/bot" - } - , + }, { "pattern": "Buck\\/", "addition_date": "2018/03/27", @@ -2973,8 +2672,7 @@ "Buck/2.2; (+https://app.hypefactors.com/media-monitoring/about.html)" ], "url": "https://app.hypefactors.com/media-monitoring/about.html" - } - , + }, { "pattern": "Companybook-Crawler", "addition_date": "2018/03/27", @@ -2982,8 +2680,7 @@ "Companybook-Crawler (+https://www.companybooknetworking.com/)" ], "url": "https://www.companybooknetworking.com/" - } - , + }, { "pattern": "Genieo", "addition_date": "2018/03/27", @@ -2991,8 +2688,7 @@ "Mozilla/5.0 (compatible; Genieo/1.0 http://www.genieo.com/webfilter.html)" ], "url": "http://www.genieo.com/webfilter.html" - } - , + }, { "pattern": "magpie-crawler", "addition_date": "2018/03/27", @@ -3000,8 +2696,7 @@ "magpie-crawler/1.1 (U; Linux amd64; en-GB; +http://www.brandwatch.net)" ], "url": "http://www.brandwatch.net" - } - , + }, { "pattern": "MeltwaterNews", "addition_date": "2018/03/27", @@ -3009,8 +2704,7 @@ "MeltwaterNews www.meltwater.com" ], "url": "http://www.meltwater.com" - } - , + }, { "pattern": "Moreover", "addition_date": "2018/03/27", @@ -3018,8 +2712,7 @@ "Mozilla/5.0 Moreover/5.1 (+http://www.moreover.com)" ], "url": "http://www.moreover.com" - } - , + }, { "pattern": "newspaper\\/", "addition_date": "2018/03/27", @@ -3029,8 +2722,7 @@ "newspaper/0.2.6", "newspaper/0.2.8" ] - } - , + }, { "pattern": "ScoutJet", "addition_date": "2018/03/27", @@ -3038,8 +2730,7 @@ "Mozilla/5.0 (compatible; ScoutJet; +http://www.scoutjet.com/)" ], "url": "http://www.scoutjet.com/" - } - , + }, { "pattern": "(^| )sentry\\/", "addition_date": "2018/03/27", @@ -3047,8 +2738,7 @@ "sentry/8.22.0 (https://sentry.io)" ], "url": "https://sentry.io" - } - , + }, { "pattern": "StorygizeBot", "addition_date": "2018/03/27", @@ -3056,8 +2746,7 @@ "Mozilla/5.0 (compatible; StorygizeBot; http://www.storygize.com)" ], "url": "http://www.storygize.com" - } - , + }, { "pattern": "UptimeRobot", "addition_date": "2018/03/27", @@ -3065,8 +2754,7 @@ "Mozilla/5.0+(compatible; UptimeRobot/2.0; http://www.uptimerobot.com/)" ], "url": "http://www.uptimerobot.com/" - } - , + }, { "pattern": "OutclicksBot", "addition_date": "2018/04/21", @@ -3077,8 +2765,7 @@ "OutclicksBot/2 +https://www.outclicks.net/agent/p2i4sNUh7eylJF1S6SGgRs5mP40ExlYvsr9GBxVQG6h" ], "url": "https://www.outclicks.net" - } - , + }, { "pattern": "seoscanners", "addition_date": "2018/05/27", @@ -3086,8 +2773,7 @@ "Mozilla/5.0 (compatible; seoscanners.net/1; +spider@seoscanners.net)" ], "url": "http://www.seoscanners.net/" - } - , + }, { "pattern": "Hatena", "addition_date": "2018/05/29", @@ -3099,8 +2785,7 @@ "HatenaBookmark/4.0 (Hatena::Bookmark; Analyzer)", "Hatena::Fetcher/0.01 (master) Furl/3.13" ] - } - , + }, { "pattern": "Google Web Preview", "addition_date": "2018/05/31", @@ -3108,16 +2793,14 @@ "Mozilla/5.0 (Linux; U; Android 2.3.4; generic) AppleWebKit/537.36 (KHTML, like Gecko; Google Web Preview) Version/4.0 Mobile Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko; Google Web Preview) Chrome/27.0.1453 Safari/537.36" ] - } - , + }, { "pattern": "MauiBot", "addition_date": "2018/06/06", "instances": [ "MauiBot (crawler.feedback+wc@gmail.com)" ] - } - , + }, { "pattern": "AlphaBot", "addition_date": "2018/05/27", @@ -3125,8 +2808,7 @@ "Mozilla/5.0 (compatible; AlphaBot/3.2; +http://alphaseobot.com/bot.html)" ], "url": "http://alphaseobot.com/bot.html" - } - , + }, { "pattern": "SBL-BOT", "addition_date": "2018/06/06", @@ -3134,9 +2816,8 @@ "SBL-BOT (http://sbl.net)" ], "url": "http://sbl.net", - "description" : "Bot of SoftByte BlackWidow" - } - , + "description": "Bot of SoftByte BlackWidow" + }, { "pattern": "IAS crawler", "addition_date": "2018/06/06", @@ -3144,17 +2825,15 @@ "IAS crawler (ias_crawler; http://integralads.com/site-indexing-policy/)" ], "url": "http://integralads.com/site-indexing-policy/", - "description" : "Bot of Integral Ad Science, Inc." - } - , + "description": "Bot of Integral Ad Science, Inc." + }, { "pattern": "adscanner", "addition_date": "2018/06/24", "instances": [ "Mozilla/5.0 (compatible; adscanner/)" ] - } - , + }, { "pattern": "Netvibes", "addition_date": "2018/06/24", @@ -3163,8 +2842,7 @@ "Netvibes (crawler; http://www.netvibes.com)" ], "url": "http://www.netvibes.com" - } - , + }, { "pattern": "acapbot", "addition_date": "2018/06/27", @@ -3172,8 +2850,7 @@ "Mozilla/5.0 (compatible;acapbot/0.1;treat like Googlebot)", "Mozilla/5.0 (compatible;acapbot/0.1.;treat like Googlebot)" ] - } - , + }, { "pattern": "Baidu-YunGuanCe", "addition_date": "2018/06/27", @@ -3186,8 +2863,7 @@ ], "url": "https://ce.baidu.com/topic/topic20150908", "description": "Baidu Cloud Watch" - } - , + }, { "pattern": "bitlybot", "addition_date": "2018/06/27", @@ -3197,8 +2873,7 @@ "bitlybot" ], "url": "http://bit.ly/" - } - , + }, { "pattern": "blogmuraBot", "addition_date": "2018/06/27", @@ -3207,8 +2882,7 @@ ], "url": "http://www.blogmura.com", "description": "A blog ranking site which links to blogs on just about every theme possible." - } - , + }, { "pattern": "Bot\\.AraTurka\\.com", "addition_date": "2018/06/27", @@ -3216,16 +2890,14 @@ "Bot.AraTurka.com/0.0.1" ], "url": "http://www.araturka.com" - } - , + }, { "pattern": "bot-pge\\.chlooe\\.com", "addition_date": "2018/06/27", "instances": [ "bot-pge.chlooe.com/1.0.0 (+http://www.chlooe.com/)" ] - } - , + }, { "pattern": "BoxcarBot", "addition_date": "2018/06/27", @@ -3233,8 +2905,7 @@ "Mozilla/5.0 (compatible; BoxcarBot/1.1; +awesome@boxcar.io)" ], "url": "https://boxcar.io/" - } - , + }, { "pattern": "BTWebClient", "addition_date": "2018/06/27", @@ -3243,8 +2914,7 @@ ], "url": "http://www.utorrent.com/", "description": "µTorrent BitTorrent Client" - } - , + }, { "pattern": "ContextAd Bot", "addition_date": "2018/06/27", @@ -3252,8 +2922,7 @@ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0;.NET CLR 1.0.3705; ContextAd Bot 1.0)", "ContextAd Bot 1.0" ] - } - , + }, { "pattern": "Digincore bot", "addition_date": "2018/06/27", @@ -3261,8 +2930,7 @@ "Mozilla/5.0 (compatible; Digincore bot; https://www.digincore.com/crawler.html for rules and instructions.)" ], "url": "http://www.digincore.com/crawler.html" - } - , + }, { "pattern": "Disqus", "addition_date": "2018/06/27", @@ -3271,8 +2939,7 @@ ], "url": "https://disqus.com/", "description": "validate and quality check pages." - } - , + }, { "pattern": "Feedly", "addition_date": "2018/06/27", @@ -3282,16 +2949,14 @@ ], "url": "https://www.feedly.com/fetcher.html", "description": "Feedly Fetcher is how Feedly grabs RSS or Atom feeds when users choose to add them to their Feedly or any of the other applications built on top of the feedly cloud." - } - , + }, { "pattern": "Fetch\\/", "addition_date": "2018/06/27", "instances": [ "Fetch/2.0a (CMS Detection/Web/SEO analysis tool, see http://guess.scritch.org)" ] - } - , + }, { "pattern": "Fever", "addition_date": "2018/06/27", @@ -3299,16 +2964,14 @@ "Fever/1.38 (Feed Parser; http://feedafever.com; Allow like Gecko)" ], "url": "http://feedafever.com" - } - , + }, { "pattern": "Flamingo_SearchEngine", "addition_date": "2018/06/27", "instances": [ "Flamingo_SearchEngine (+http://www.flamingosearch.com/bot)" ] - } - , + }, { "pattern": "FlipboardProxy", "addition_date": "2018/06/27", @@ -3321,8 +2984,7 @@ ], "url": "https://about.flipboard.com/browserproxy/", "description": "a proxy service to fetch, validate, and prepare certain elements of websites for presentation through the Flipboard Application" - } - , + }, { "pattern": "g2reader-bot", "addition_date": "2018/06/27", @@ -3330,8 +2992,7 @@ "g2reader-bot/1.0 (+http://www.g2reader.com/)" ], "url": "http://www.g2reader.com/" - } - , + }, { "pattern": "G2 Web Services", "addition_date": "2019/03/01", @@ -3339,8 +3000,7 @@ "G2 Web Services/1.0 (built with StormCrawler Archetype 1.8; https://www.g2webservices.com/; developers@g2llc.com)" ], "url": "https://www.g2webservices.com/" - } - , + }, { "pattern": "imrbot", "addition_date": "2018/06/27", @@ -3348,8 +3008,7 @@ "Mozilla/5.0 (compatible; imrbot/1.10.8 +http://www.mignify.com)" ], "url": "http://www.mignify.com" - } - , + }, { "pattern": "K7MLWCBot", "addition_date": "2018/06/27", @@ -3358,8 +3017,7 @@ ], "url": "http://www.k7computing.com", "description": "Virus scanner" - } - , + }, { "pattern": "Kemvibot", "addition_date": "2018/06/27", @@ -3367,8 +3025,7 @@ "Kemvibot/1.0 (http://kemvi.com, marco@kemvi.com)" ], "url": "http://kemvi.com" - } - , + }, { "pattern": "Landau-Media-Spider", "addition_date": "2018/06/27", @@ -3376,8 +3033,7 @@ "Landau-Media-Spider/1.0(http://bots.landaumedia.de/bot.html)" ], "url": "http://bots.landaumedia.de/bot.html" - } - , + }, { "pattern": "linkapediabot", "addition_date": "2018/06/27", @@ -3385,8 +3041,7 @@ "linkapediabot (+http://www.linkapedia.com)" ], "url": "http://www.linkapedia.com" - } - , + }, { "pattern": "vkShare", "addition_date": "2018/07/02", @@ -3394,8 +3049,7 @@ "Mozilla/5.0 (compatible; vkShare; +http://vk.com/dev/Share)" ], "url": "http://vk.com/dev/Share" - } - , + }, { "pattern": "Siteimprove\\.com", "addition_date": "2018/06/22", @@ -3405,121 +3059,109 @@ "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0) SiteCheck-sitecrawl by Siteimprove.com", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.0) LinkCheck by Siteimprove.com" ] - } - , - { - "pattern": "BLEXBot\\/", - "addition_date": "2018/07/07", - "instances": [ - "Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)" - ], - "url": "http://webmeup-crawler.com" - } - , - { - "pattern": "DareBoost", - "addition_date": "2018/07/07", - "instances": [ - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36 DareBoost" - ], - "url": "https://www.dareboost.com/", - "description": "Bot to test, Analyze and Optimize website" - } - , + }, { - "pattern": "ZuperlistBot\\/", - "addition_date": "2018/07/07", - "instances": [ - "Mozilla/5.0 (compatible; ZuperlistBot/1.0)" - ] - } - , - { - "pattern": "Miniflux\\/", - "addition_date": "2018/07/07", - "instances": [ - "Mozilla/5.0 (compatible; Miniflux/2.0.x-dev; +https://miniflux.net)", - "Mozilla/5.0 (compatible; Miniflux/2.0.3; +https://miniflux.net)", - "Mozilla/5.0 (compatible; Miniflux/2.0.7; +https://miniflux.net)", - "Mozilla/5.0 (compatible; Miniflux/2.0.10; +https://miniflux.net)", - "Mozilla/5.0 (compatibl$; Miniflux/2.0.x-dev; +https://miniflux.app)", - "Mozilla/5.0 (compatible; Miniflux/2.0.11; +https://miniflux.app)", - "Mozilla/5.0 (compatible; Miniflux/2.0.12; +https://miniflux.app)", - "Mozilla/5.0 (compatible; Miniflux/ae1dc1a; +https://miniflux.app)", - "Mozilla/5.0 (compatible; Miniflux/3b6e44c; +https://miniflux.app)" - ], - "url": "https://miniflux.net", - "description": "Miniflux is a minimalist and opinionated feed reader." - } - , - { - "pattern": "Feedspot", - "addition_date": "2018/07/07", - "instances": [ - "Mozilla/5.0 (compatible; Feedspotbot/1.0; +http://www.feedspot.com/fs/bot)", - "Mozilla/5.0 (compatible; Feedspot/1.0 (+https://www.feedspot.com/fs/fetcher; like FeedFetcher-Google)" - ], - "url": "http://www.feedspot.com/fs/bot" - } - , - { - "pattern": "Diffbot\\/", - "addition_date": "2018/07/07", - "instances": [ - "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 (.NET CLR 3.5.30729; Diffbot/0.1; +http://www.diffbot.com)" - ], - "url": "http://www.diffbot.com" - } - , - { - "pattern": "SEOkicks", - "addition_date": "2018/08/22", - "instances": [ - "Mozilla/5.0 (compatible; SEOkicks; +https://www.seokicks.de/robot.html)" - ], - "url": "https://www.seokicks.de/robot.html" - } - , + "pattern": "BLEXBot\\/", + "addition_date": "2018/07/07", + "instances": [ + "Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)" + ], + "url": "http://webmeup-crawler.com" + }, { - "pattern": "tracemyfile", - "addition_date": "2018/08/23", - "instances": [ - "Mozilla/5.0 (compatible; tracemyfile/1.0; +bot@tracemyfile.com)" - ] - } - , + "pattern": "DareBoost", + "addition_date": "2018/07/07", + "instances": [ + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36 DareBoost" + ], + "url": "https://www.dareboost.com/", + "description": "Bot to test, Analyze and Optimize website" + }, { - "pattern": "Nimbostratus-Bot", - "addition_date": "2018/08/29", - "instances": [ - "Mozilla/5.0 (compatible; Nimbostratus-Bot/v1.3.2; http://cloudsystemnetworks.com)" - ] - } - , + "pattern": "ZuperlistBot\\/", + "addition_date": "2018/07/07", + "instances": [ + "Mozilla/5.0 (compatible; ZuperlistBot/1.0)" + ] + }, + { + "pattern": "Miniflux\\/", + "addition_date": "2018/07/07", + "instances": [ + "Mozilla/5.0 (compatible; Miniflux/2.0.x-dev; +https://miniflux.net)", + "Mozilla/5.0 (compatible; Miniflux/2.0.3; +https://miniflux.net)", + "Mozilla/5.0 (compatible; Miniflux/2.0.7; +https://miniflux.net)", + "Mozilla/5.0 (compatible; Miniflux/2.0.10; +https://miniflux.net)", + "Mozilla/5.0 (compatibl$; Miniflux/2.0.x-dev; +https://miniflux.app)", + "Mozilla/5.0 (compatible; Miniflux/2.0.11; +https://miniflux.app)", + "Mozilla/5.0 (compatible; Miniflux/2.0.12; +https://miniflux.app)", + "Mozilla/5.0 (compatible; Miniflux/ae1dc1a; +https://miniflux.app)", + "Mozilla/5.0 (compatible; Miniflux/3b6e44c; +https://miniflux.app)" + ], + "url": "https://miniflux.net", + "description": "Miniflux is a minimalist and opinionated feed reader." + }, + { + "pattern": "Feedspot", + "addition_date": "2018/07/07", + "instances": [ + "Mozilla/5.0 (compatible; Feedspotbot/1.0; +http://www.feedspot.com/fs/bot)", + "Mozilla/5.0 (compatible; Feedspot/1.0 (+https://www.feedspot.com/fs/fetcher; like FeedFetcher-Google)" + ], + "url": "http://www.feedspot.com/fs/bot" + }, + { + "pattern": "Diffbot\\/", + "addition_date": "2018/07/07", + "instances": [ + "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 (.NET CLR 3.5.30729; Diffbot/0.1; +http://www.diffbot.com)" + ], + "url": "http://www.diffbot.com" + }, + { + "pattern": "SEOkicks", + "addition_date": "2018/08/22", + "instances": [ + "Mozilla/5.0 (compatible; SEOkicks; +https://www.seokicks.de/robot.html)" + ], + "url": "https://www.seokicks.de/robot.html" + }, + { + "pattern": "tracemyfile", + "addition_date": "2018/08/23", + "instances": [ + "Mozilla/5.0 (compatible; tracemyfile/1.0; +bot@tracemyfile.com)" + ] + }, + { + "pattern": "Nimbostratus-Bot", + "addition_date": "2018/08/29", + "instances": [ + "Mozilla/5.0 (compatible; Nimbostratus-Bot/v1.3.2; http://cloudsystemnetworks.com)" + ] + }, { - "pattern": "zgrab", - "addition_date": "2018/08/30", - "instances": [ - "Mozilla/5.0 zgrab/0.x" - ], + "pattern": "zgrab", + "addition_date": "2018/08/30", + "instances": [ + "Mozilla/5.0 zgrab/0.x" + ], "url": "https://zmap.io/" - } - , + }, { - "pattern": "PR-CY\\.RU", - "addition_date": "2018/08/30", - "instances": [ - "Mozilla/5.0 (compatible; PR-CY.RU; + https://a.pr-cy.ru)" - ], + "pattern": "PR-CY\\.RU", + "addition_date": "2018/08/30", + "instances": [ + "Mozilla/5.0 (compatible; PR-CY.RU; + https://a.pr-cy.ru)" + ], "url": "https://a.pr-cy.ru/" - } - , + }, { - "pattern": "AdsTxtCrawler", - "addition_date": "2018/08/30", - "instances": [ - "AdsTxtCrawler/1.0" - ] + "pattern": "AdsTxtCrawler", + "addition_date": "2018/08/30", + "instances": [ + "AdsTxtCrawler/1.0" + ] }, { "pattern": "Datafeedwatch", @@ -3528,8 +3170,7 @@ "Datafeedwatch/2.1.x" ], "url": "https://www.datafeedwatch.com/" - } - , + }, { "pattern": "Zabbix", "addition_date": "2018/09/05", @@ -3537,8 +3178,7 @@ "Zabbix" ], "url": "https://www.zabbix.com/documentation/3.4/manual/web_monitoring" - } - , + }, { "pattern": "TangibleeBot", "addition_date": "2018/09/05", @@ -3546,8 +3186,7 @@ "TangibleeBot/1.0.0.0 (http://tangiblee.com/bot)" ], "url": "http://tangiblee.com/bot" - } - , + }, { "pattern": "google-xrawler", "addition_date": "2018/09/05", @@ -3555,8 +3194,7 @@ "google-xrawler" ], "url": "https://webmasters.stackexchange.com/questions/105560/what-is-the-google-xrawler-user-agent-used-for" - } - , + }, { "pattern": "axios", "addition_date": "2018/09/06", @@ -3565,8 +3203,7 @@ "axios/0.19.0" ], "url": "https://github.com/axios/axios" - } - , + }, { "pattern": "Amazon CloudFront", "addition_date": "2018/09/07", @@ -3574,16 +3211,14 @@ "Amazon CloudFront" ], "url": "https://aws.amazon.com/cloudfront/" - } - , + }, { "pattern": "Pulsepoint", "addition_date": "2018/09/24", "instances": [ "Pulsepoint XT3 web scraper" ] - } - , + }, { "pattern": "CloudFlare-AlwaysOnline", "addition_date": "2018/09/27", @@ -3591,27 +3226,24 @@ "Mozilla/5.0 (compatible; CloudFlare-AlwaysOnline/1.0; +http://www.cloudflare.com/always-online) AppleWebKit/534.34", "Mozilla/5.0 (compatible; CloudFlare-AlwaysOnline/1.0; +https://www.cloudflare.com/always-online) AppleWebKit/534.34" ], - "url" : "https://www.cloudflare.com/always-online/" - } - , + "url": "https://www.cloudflare.com/always-online/" + }, { - "pattern": "Google-Structured-Data-Testing-Tool", + "pattern": "Google-Structured-Data-Testing-Tool", "addition_date": "2018/10/02", "instances": [ "Mozilla/5.0 (compatible; Google-Structured-Data-Testing-Tool +https://search.google.com/structured-data/testing-tool)", "Mozilla/5.0 (compatible; Google-Structured-Data-Testing-Tool +http://developers.google.com/structured-data/testing-tool/)" ], "url": "https://search.google.com/structured-data/testing-tool" - } - , + }, { - "pattern": "WordupInfoSearch", + "pattern": "WordupInfoSearch", "addition_date": "2018/10/07", "instances": [ "WordupInfoSearch/1.0" ] - } - , + }, { "pattern": "WebDataStats", "addition_date": "2018/10/08", @@ -3619,16 +3251,14 @@ "Mozilla/5.0 (compatible; WebDataStats/1.0 ; +https://webdatastats.com/policy.html)" ], "url": "https://webdatastats.com/" - } - , + }, { "pattern": "HttpUrlConnection", "addition_date": "2018/10/08", "instances": [ "Jersey/2.25.1 (HttpUrlConnection 1.8.0_141)" ] - } - , + }, { "pattern": "ZoomBot", "addition_date": "2018/10/10", @@ -3636,24 +3266,21 @@ "ZoomBot (Linkbot 1.0 http://suite.seozoom.it/bot.html)" ], "url": "http://suite.seozoom.it/bot.html" - } - , + }, { "pattern": "VelenPublicWebCrawler", "addition_date": "2018/10/09", "instances": [ "VelenPublicWebCrawler (velen.io)" ] - } - , + }, { "pattern": "MoodleBot", "addition_date": "2018/10/10", "instances": [ "MoodleBot/1.0" ] - } - , + }, { "pattern": "jpg-newsbot", "addition_date": "2018/10/10", @@ -3661,8 +3288,7 @@ "jpg-newsbot/2.0; (+https://vipnytt.no/bots/)" ], "url": "https://vipnytt.no/bots/" - } - , + }, { "pattern": "outbrain", "addition_date": "2018/10/14", @@ -3670,8 +3296,7 @@ "Mozilla/5.0 (Java) outbrain" ], "url": "https://www.outbrain.com/help/advertisers/invalid-url/" - } - , + }, { "pattern": "W3C_Validator", "addition_date": "2018/10/14", @@ -3679,8 +3304,7 @@ "W3C_Validator/1.3" ], "url": "https://validator.w3.org/services" - } - , + }, { "pattern": "Validator\\.nu", "addition_date": "2018/10/14", @@ -3688,12 +3312,13 @@ "Validator.nu/LV" ], "url": "https://validator.w3.org/services" - } - , + }, { "pattern": "W3C-checklink", "addition_date": "2018/10/14", - "depends_on": ["libwww-perl"], + "depends_on": [ + "libwww-perl" + ], "instances": [ "W3C-checklink/2.90 libwww-perl/5.64", "W3C-checklink/3.6.2.3 libwww-perl/5.64", @@ -3706,8 +3331,7 @@ "W3C-checklink/4.5 [4.160] libwww-perl/5.823" ], "url": "https://validator.w3.org/services" - } - , + }, { "pattern": "W3C-mobileOK", "addition_date": "2018/10/14", @@ -3715,8 +3339,7 @@ "W3C-mobileOK/DDC-1.0" ], "url": "https://validator.w3.org/services" - } - , + }, { "pattern": "W3C_I18n-Checker", "addition_date": "2018/10/14", @@ -3724,8 +3347,7 @@ "W3C_I18n-Checker/1.0" ], "url": "https://validator.w3.org/services" - } - , + }, { "pattern": "FeedValidator", "addition_date": "2018/10/14", @@ -3733,8 +3355,7 @@ "FeedValidator/1.3" ], "url": "https://validator.w3.org/services" - } - , + }, { "pattern": "W3C_CSS_Validator", "addition_date": "2018/10/14", @@ -3742,8 +3363,7 @@ "Jigsaw/2.3.0 W3C_CSS_Validator_JFouffa/2.0" ], "url": "https://validator.w3.org/services" - } - , + }, { "pattern": "W3C_Unicorn", "addition_date": "2018/10/14", @@ -3751,16 +3371,14 @@ "W3C_Unicorn/1.0" ], "url": "https://validator.w3.org/services" - } - , + }, { "pattern": "Google-PhysicalWeb", "addition_date": "2018/10/21", "instances": [ "Mozilla/5.0 (Google-PhysicalWeb)" ] - } - , + }, { "pattern": "Blackboard", "addition_date": "2018/10/28", @@ -3840,8 +3458,7 @@ "Mozilla/5.0 (compatible; theoldreader.com)" ], "url": "https://www.theoldreader.com/" - } - , + }, { "pattern": "AnyEvent", "addition_date": "2018/12/07", @@ -3849,8 +3466,7 @@ "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/2.24; +http://software.schmorp.de/pkg/AnyEvent)" ], "url": "http://software.schmorp.de/pkg/AnyEvent.html" - } - , + }, { "pattern": "Uptimebot\\.org", "addition_date": "2019/01/17", @@ -3858,8 +3474,7 @@ "Uptimebot.org - Free website monitoring" ], "url": "http://uptimebot.org/" - } - , + }, { "pattern": "Nmap Scripting Engine", "addition_date": "2019/02/04", @@ -3867,8 +3482,7 @@ "Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)" ], "url": "https://nmap.org/book/nse.html" - } - , + }, { "pattern": "2ip\\.ru", "addition_date": "2019/02/12", @@ -3958,9 +3572,9 @@ "url": "https://developers.google.com/web/updates/2017/04/headless-chrome", "addition_date": "2019/06/17", "instances": [ - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/74.0.3729.169 Safari/537.36", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/69.0.3494.0 Safari/537.36", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/76.0.3803.0 Safari/537.36" + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/74.0.3729.169 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/69.0.3494.0 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/76.0.3803.0 Safari/537.36" ] }, { @@ -3978,8 +3592,7 @@ "Mozilla/5.0 (compatible; Uptimebot/1.0; +http://www.uptime.com/uptimebot)" ], "url": "http://www.uptime.com/uptimebot" - } - , + }, { "pattern": "Streamline3Bot\\/", "addition_date": "2019/07/21", @@ -3988,8 +3601,7 @@ "Mozilla/5.0 (Windows NT 6.1; Win64; x64; +https://www.ubtsupport.com/legal/Streamline3Bot.php) Streamline3Bot/1.0" ], "url": "https://www.ubtsupport.com/legal/Streamline3Bot.php" - } - , + }, { "pattern": "serpstatbot\\/", "addition_date": "2019/07/25", @@ -3998,8 +3610,7 @@ "serpstatbot/1.0 (advanced backlink tracking bot; curl/7.58.0; http://serpstatbot.com/; abuse@serpstatbot.com)" ], "url": "http://serpstatbot.com" - } - , + }, { "pattern": "MixnodeCache\\/", "addition_date": "2019/08/04", @@ -4007,8 +3618,7 @@ "MixnodeCache/1.8(+https://cache.mixnode.com/)" ], "url": "https://cache.mixnode.com/" - } - , + }, { "pattern": "^curl", "addition_date": "2019/08/15", @@ -4023,8 +3633,7 @@ "curl/7.65.3" ], "url": "https://curl.haxx.se/" - } - , + }, { "pattern": "SimpleScraper", "addition_date": "2019/08/16", @@ -4032,8 +3641,7 @@ "Mozilla/5.0 (compatible; SimpleScraper)" ], "url": "https://github.com/ramonkcom/simple-scraper/" - } - , + }, { "pattern": "RSSingBot", "addition_date": "2019/09/15", @@ -4041,8 +3649,7 @@ "RSSingBot (http://www.rssing.com)" ], "url": "http://www.rssing.com" - } - , + }, { "pattern": "Jooblebot", "addition_date": "2019/09/25", @@ -4050,8 +3657,7 @@ "Mozilla/5.0 (compatible; Jooblebot/2.0; Windows NT 6.1; WOW64; +http://jooble.org/jooble-bot) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36" ], "url": "http://jooble.org/jooble-bot" - } - , + }, { "pattern": "fedoraplanet", "addition_date": "2019/09/28", @@ -4059,8 +3665,7 @@ "venus/fedoraplanet" ], "url": "http://fedoraplanet.org/" - } - , + }, { "pattern": "Friendica", "addition_date": "2019/09/28", @@ -4068,8 +3673,7 @@ "Friendica 'The Tazmans Flax-lily' 2019.01-1293; https://hoyer.xyz" ], "url": "https://hoyer.xyz" - } - , + }, { "pattern": "NextCloud", "addition_date": "2019/09/30", @@ -4077,8 +3681,7 @@ "NextCloud-News/1.0" ], "url": "https://nextcloud.com/" - } - , + }, { "pattern": "Tiny Tiny RSS", "addition_date": "2019/10/04", @@ -4089,8 +3692,7 @@ "Tiny Tiny RSS/19.8 (http://tt-rss.org/)" ], "url": "http://tt-rss.org/" - } - , + }, { "pattern": "RegionStuttgartBot", "addition_date": "2019/10/17", @@ -4098,35 +3700,33 @@ "Mozilla/5.0 (compatible; RegionStuttgartBot/1.0; +http://it.region-stuttgart.de/competenzatlas/unternehmen-suchen/)" ], "url": "http://it.region-stuttgart.de/competenzatlas/unternehmen-suchen/" - } - , + }, { "pattern": "Bytespider", "addition_date": "2019/11/11", "instances": [ - "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.3754.1902 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.4454.1745 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.7597.1164 Mobile Safari/537.36; Bytespider;bytespider@bytedance.com", - "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2988.1545 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.4141.1682 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.3478.1649 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.5267.1259 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.7990.1979 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.2268.1523 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2576.1836 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.9681.1227 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.6023.1635 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.4944.1981 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.3613.1739 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.4022.1033 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.3248.1547 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.5527.1507 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.5216.1326 Mobile Safari/537.36; Bytespider", - "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.9038.1080 Mobile Safari/537.36; Bytespider" + "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.3754.1902 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.4454.1745 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.7597.1164 Mobile Safari/537.36; Bytespider;bytespider@bytedance.com", + "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2988.1545 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.4141.1682 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.3478.1649 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.5267.1259 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.7990.1979 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.2268.1523 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2576.1836 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.9681.1227 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.6023.1635 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.4944.1981 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.3613.1739 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.4022.1033 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.3248.1547 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.5527.1507 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.5216.1326 Mobile Safari/537.36; Bytespider", + "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.9038.1080 Mobile Safari/537.36; Bytespider" ], "url": "https://stackoverflow.com/questions/57908900/what-is-the-bytespider-user-agent" - } - , + }, { "pattern": "Datanyze", "addition_date": "2019/11/17", @@ -4134,8 +3734,7 @@ "Mozilla/5.0 (X11; Datanyze; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36" ], "url": "https://www.datanyze.com/dnyzbot/" - } - , + }, { "pattern": "Google-Site-Verification", "addition_date": "2019/12/11", @@ -4143,8 +3742,7 @@ "Mozilla/5.0 (compatible; Google-Site-Verification/1.0)" ], "url": "https://support.google.com/webmasters/answer/9008080" - } - , + }, { "pattern": "TrendsmapResolver", "addition_date": "2020/02/24", @@ -4152,8 +3750,7 @@ "Mozilla/5.0 (compatible; TrendsmapResolver/0.1)" ], "url": "https://www.trendsmap.com/" - } - , + }, { "pattern": "tweetedtimes", "addition_date": "2020/02/24", @@ -4647,8 +4244,8 @@ "pattern": "virustotal", "addition_date": "2021/09/22", "instances": [ - "Mozilla\/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US) AppEngine-Google; (+http:\/\/code.google.com\/appengine; appid: s~virustotalcloud)", - "AppEngine-Google; (+http:\/\/code.google.com\/appengine; appid: s~virustotalcloud)" + "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US) AppEngine-Google; (+http://code.google.com/appengine; appid: s~virustotalcloud)", + "AppEngine-Google; (+http://code.google.com/appengine; appid: s~virustotalcloud)" ], "url": "https://www.virustotal.com/gui/home/url" }, @@ -4677,14 +4274,14 @@ "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko; compatible; Cookiebot/1.0; +http://cookiebot.com/) Chrome/97.0.4692.71 Safari/537.36" ] }, - { - "pattern": "trovitBot", - "addition_date": "2022/06/08", - "url": "http://www.trovit.com/bot.html", - "instances": [ - "Mozilla/5.0 (compatible; trovitBot 1.0; +http://www.trovit.com/bot.html)" - ] - }, + { + "pattern": "trovitBot", + "addition_date": "2022/06/08", + "url": "http://www.trovit.com/bot.html", + "instances": [ + "Mozilla/5.0 (compatible; trovitBot 1.0; +http://www.trovit.com/bot.html)" + ] + }, { "pattern": "seostar\\.co", "addition_date": "2022/08/04", @@ -4762,7 +4359,9 @@ "pattern": "RuxitSynthetic", "addition_date": "2023/02/16", "url": "https://www.dynatrace.com/support/help/platform-modules/digital-experience/synthetic-monitoring/browser-monitors/configure-browser-monitors#expand--default-user-agent", - "instances" : ["RuxitSynthetic/1.0"] + "instances": [ + "RuxitSynthetic/1.0" + ] }, { "pattern": "Google-Read-Aloud", @@ -4785,7 +4384,7 @@ "addition_date": "2023/05/24", "instances": [ "OdklBot/1.0 (share@odnoklassniki.ru)", - "Mozilla/5.0 (compatible; OdklBot/1.0 like Linux; klass@odnoklassniki.ru)" + "Mozilla/5.0 (compatible; OdklBot/1.0 like Linux; klass@odnoklassniki.ru)" ], "url": "https://odnoklassniki.ru/" }, @@ -4876,7 +4475,7 @@ "addition_date": "2023/08/24", "instances": [ "Mozilla/5.0 (compatible; Linespider/1.1; +https://lin.ee/4dwXkTH)", - "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Linespider/1.1; +https://lin.ee/4dwXkTH) Chrome/W.X.Y.Z Safari/537.36" + "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Linespider/1.1; +https://lin.ee/4dwXkTH) Chrome/W.X.Y.Z Safari/537.36" ], "url": "https://help2.line.me/linesearchbot/web/?contentId=50006055&lang=en" }, @@ -5304,19 +4903,25 @@ { "pattern": "Monsidobot", "addition_date": "2024/05/14", - "instances": ["Mozilla/5.0 (compatible; Monsidobot/2.2; +http://monsido.com/bot.html; info@monsido.com)"], + "instances": [ + "Mozilla/5.0 (compatible; Monsidobot/2.2; +http://monsido.com/bot.html; info@monsido.com)" + ], "url": "http://monsido.com/bot.html" }, { "pattern": "GroupMeBot", "addition_date": "2024/05/19", - "instances": ["GroupMeBot/1.0"], + "instances": [ + "GroupMeBot/1.0" + ], "url": "https://groupme.com/" }, { "pattern": "Vercelbot", "addition_date": "2024/08/30", - "instances": ["Vercelbot (+https://vercel.com)"], + "instances": [ + "Vercelbot (+https://vercel.com)" + ], "url": "https://github.com/vercel/vercel/discussions/5095#discussioncomment-58705" }, { diff --git a/format.js b/format.js new file mode 100644 index 0000000..9526884 --- /dev/null +++ b/format.js @@ -0,0 +1,34 @@ +/** + * This file is used for checking and updating the format of the JSON file. + * + * You can check the format via `node format.js --check` and regenerate the + * file with the correct formatting using `node format.js --generate`. + * + * The formatting logic uses `JSON.stringify` with 2 spaces, which will keep + * separating commas on the same line as any closing character. This technique + * was chosen for simplicty and to align with common default JSON formatters, + * such as VSCode. + */ + +const fs = require("fs"); +const path = require("path"); + +const jsonFilePath = path.join(__dirname, "crawler-user-agents.json"); + +const original = fs.readFileSync(jsonFilePath, "utf-8"); + +const updated = JSON.stringify(JSON.parse(original), null, 2) + '\n'; + +if (process.argv[2] === "--generate") { + fs.writeFileSync(jsonFilePath, updated); + process.exit(0); +} + +if (process.argv[2] === "--check") { + if (updated !== original) { + console.error("JSON file format is wrong. Run `node format.js --generate` to update."); + console.error("Format must be 2 spaces, with newlines for objects and arrays, and separating commas on the line with the previous closing character."); + process.exit(1); + } +} +