From 738026c738b10624f95d9678137cc5ffcb745cc6 Mon Sep 17 00:00:00 2001 From: David McHealy Date: Fri, 26 Jan 2024 11:14:58 -0500 Subject: [PATCH 1/4] More podcast updates, rename NPR, fix ms project bot detection. --- Tests/Parser/Client/fixtures/mobile_app.yml | 2 +- Tests/fixtures/bots.yml | 13 + Tests/fixtures/podcasting.yml | 348 +++++++++++++++++++- regexes/bots.yml | 12 +- regexes/client/libraries.yml | 15 + regexes/client/mobile_apps.yml | 43 ++- regexes/device/mobiles.yml | 15 +- 7 files changed, 430 insertions(+), 18 deletions(-) diff --git a/Tests/Parser/Client/fixtures/mobile_app.yml b/Tests/Parser/Client/fixtures/mobile_app.yml index 11e1479744..12a1ba5724 100644 --- a/Tests/Parser/Client/fixtures/mobile_app.yml +++ b/Tests/Parser/Client/fixtures/mobile_app.yml @@ -531,7 +531,7 @@ user_agent: NPROneAndroid client: type: mobile app - name: NPR One + name: NPR version: - user_agent: WirtschaftsWoche-iOS-1.1.14-20200824.1315 diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index 551ce6fd68..a01223a631 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -6337,3 +6337,16 @@ name: GitHubCopilotChat category: Crawler url: https://github.com/aaamoon/copilot-gpt4-service + +- + user_agent: pdrl.fm Analyzer / 1.0.0 + bot: + name: Podroll Analyzer + category: Crawler + url: https://podroll.fm +- + user_agent: Mozilla/5.0 (compatible; PodUptime/1.0; +https://poduptime.com) + bot: + name: PodUptime + category: Site Monitor + url: https://poduptime.com diff --git a/Tests/fixtures/podcasting.yml b/Tests/fixtures/podcasting.yml index 851e5d42d5..be7a746e0d 100644 --- a/Tests/fixtures/podcasting.yml +++ b/Tests/fixtures/podcasting.yml @@ -1314,7 +1314,7 @@ os: [ ] client: type: mobile app - name: NPR One + name: NPR version: "" device: type: "" @@ -1330,7 +1330,7 @@ platform: "" client: type: mobile app - name: NPR One + name: NPR version: "234" device: type: "" @@ -1346,7 +1346,7 @@ platform: "" client: type: mobile app - name: NPR One + name: NPR version: "371" device: type: "" @@ -3939,7 +3939,7 @@ client: type: mobile app name: De Standaard - version: "" + version: "1.0.5" device: type: smartphone brand: Samsung @@ -3955,7 +3955,7 @@ client: type: mobile app name: De Standaard - version: "" + version: "1.0.5" device: type: smartphone brand: Fairphone @@ -3971,7 +3971,7 @@ client: type: mobile app name: De Standaard - version: "" + version: "1.0.5" device: type: smartphone brand: Samsung @@ -8859,7 +8859,7 @@ os: [ ] client: type: mobile app - name: NPR One + name: NPR version: "" device: type: "" @@ -8875,7 +8875,7 @@ platform: "" client: type: mobile app - name: NPR One + name: NPR version: "371" device: type: "" @@ -9069,3 +9069,335 @@ model: '' os_family: iOS browser_family: Unknown +- + user_agent: 'Podgrab - docker' + os: [ ] + client: + type: library + name: Podgrab + version: "" + device: + type: "" + brand: '' + model: '' + os_family: Unknown + browser_family: Unknown + +- + user_agent: 'Podcast Provider 0.34-9e9009e7 for Radio Downloader 0.34-9e9009e7' + os: [ ] + client: + type: library + name: Radio Downloader + version: "0.34" + device: + type: "" + brand: '' + model: '' + os_family: Unknown + browser_family: Unknown + +- + user_agent: 'ESP32 HTTP Client/1.0' + os: [ ] + client: + type: library + name: ESP32 HTTP Client + version: "1.0" + device: + type: "" + brand: '' + model: '' + os_family: Unknown + browser_family: Unknown + +- + user_agent: 'com.google.android.apps.youtube.music/62354340 (Linux; U; Android 14; en_US; Pixel 7 Pro; Build/UP1A.231105.002; Cronet/114.0.5735.84)' + os: + name: Android + version: "14" + platform: "" + client: + type: mobile app + name: Youtube Music + version: "62354340" + device: + type: smartphone + brand: Google + model: 'Pixel 7 Pro' + os_family: Android + browser_family: Unknown + +- + user_agent: 'YouTubeMusic/20231020.1.1 CFNetwork/1474 Darwin/23.0.0' + os: + name: iOS + version: "17.0" + platform: "" + client: + type: mobile app + name: Youtube Music + version: "20231020.1.1" + device: + type: "" + brand: Apple + model: '' + os_family: iOS + browser_family: Unknown + +- + user_agent: 'YouTubeMusic/6.29.2 CFNetwork/1485 Darwin/23.1.0' + os: + name: iOS + version: "17.1" + platform: "" + client: + type: mobile app + name: Youtube Music + version: "6.29.2" + device: + type: "" + brand: Apple + model: '' + os_family: iOS + browser_family: Unknown + +- + user_agent: 'Amazon;Echo_Dot;27d4dfe427b34d57995b463e5d63198d;;tpapi;3.199.422' + os: [ ] + client: + type: mobile app + name: Alexa Media Player + version: "" + device: + type: smart speaker + brand: Amazon + model: 'Echo Dot' + os_family: Unknown + browser_family: Unknown + +- + user_agent: 'Amazon;Echo_Dot_with_clock;27d4dfe427b34d57995b463e5d63198d;;tpapi;3.199.422' + os: [ ] + client: + type: mobile app + name: Alexa Media Player + version: "" + device: + type: smart speaker + brand: Amazon + model: 'Echo Dot' + os_family: Unknown + browser_family: Unknown + +- + user_agent: 'Amazon;Echo_Show_5;27d4dfe427b34d57995b463e5d63198d;;tpapi;3.199.422' + os: [ ] + client: + type: mobile app + name: Alexa Media Player + version: "" + device: + type: tablet + brand: Amazon + model: 'Echo Show 5' + os_family: Unknown + browser_family: Unknown + +- + user_agent: 'Amazon;Echo;27d4dfe427b34d57995b463e5d63198d;;tpapi;3.199.422' + os: [ ] + client: + type: mobile app + name: Alexa Media Player + version: "" + device: + type: smart speaker + brand: Amazon + model: Echo + os_family: Unknown + browser_family: Unknown + +- + user_agent: 'DS%20Podcast/2 CFNetwork/1410.0.3 Darwin/22.6.0' + os: + name: iOS + version: "16.6" + platform: "" + client: + type: mobile app + name: De Standaard + version: "2" + device: + type: "" + brand: Apple + model: '' + os_family: iOS + browser_family: Unknown + +- + user_agent: 'DS podcast/1.0.4 (be.standaard.audio; build:5; Android 11; Sdk:30; Manufacturer:samsung; Model: SM-A505FN) OkHttp/4.9.0' + os: + name: Android + version: "11" + platform: "" + client: + type: mobile app + name: De Standaard + version: "1.0.4" + device: + type: smartphone + brand: 'Samsung' + model: 'Galaxy A50' + os_family: Android + browser_family: Unknown + +- + user_agent: 'SM-T970 (compatible; Tablet2.0) HandelsbladProduction, com.twipemobile.nrc 5.1.4 (511) / Android 33' + os: + name: Android + version: "33" + platform: "" + client: + type: mobile app + name: NRC + version: "5.1.4" + device: + type: "tablet" + brand: Samsung + model: 'Galaxy Tab S7+ 12.4" WiFi' + os_family: Android + browser_family: Unknown + +- + user_agent: 'Dalvik/2.1.0 (Linux; U; Android 13; SM-G991U Build/TP1A.220624.014) nprone_android/3.7.0/OSv:13' + os: + name: Android + version: "13" + platform: "" + client: + type: mobile app + name: NPR + version: "3.7.0" + device: + type: smartphone + brand: Samsung + model: Galaxy S21 5G + os_family: Android + browser_family: Unknown + +- + user_agent: 'com.audials/9.52.0-0+gb3764950b-1058878988 (Linux;Android 11) AndroidXMedia3/1.1.1' + os: + name: Android + version: "11" + platform: "" + client: + type: mobile app + name: Audials + version: "9.52.0" + device: + type: "" + brand: '' + model: '' + os_family: Android + browser_family: Unknown + +- + user_agent: 'com.audials.paid/9.18.2-0 g6a7a08b50-770931356 (Linux;Android 6.0) ExoPlayerLib/2.18.1' + os: + name: Android + version: "6.0" + platform: "" + client: + type: mobile app + name: Audials + version: "9.18.2" + device: + type: "" + brand: '' + model: '' + os_family: Android + browser_family: Unknown + +- + user_agent: 'CoolerFM/1.0 iPhone/15 +https://cooler.fm' + os: + name: iOS + version: "" + platform: "" + client: + type: mobile app + name: Cooler + version: "1.0" + device: + type: "smartphone" + brand: Apple + model: 'iPhone' + os_family: iOS + browser_family: Unknown + +- + user_agent: 'Metacast/189 CFNetwork/1485 Darwin/23.1.0' + os: + name: iOS + version: "17.1" + platform: "" + client: + type: mobile app + name: Metacast + version: "189" + device: + type: "" + brand: Apple + model: '' + os_family: iOS + browser_family: Unknown + +- + user_agent: 'mowPod/1.0 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36' + os: + name: Mac + version: "10.15.7" + platform: "" + client: + type: mobile app + name: mowPod + version: "1.0" + device: + type: "desktop" + brand: Apple + model: '' + os_family: Mac + browser_family: Unknown + +- + user_agent: 'com.meecel.feedreader.RssDemonAd' + os: [ ] + client: + type: mobile app + name: RSSDemon + version: "" + device: + type: "" + brand: '' + model: '' + os_family: Unknown + browser_family: Unknown + +- + user_agent: 'Virgin%20Radio/45.2.0.22026 / (Linux; Android 14) ExoPlayerLib/2.17.1 / samsung (SM-G996B)' + os: + name: Android + version: "14" + platform: "" + client: + type: mobile app + name: Virgin Radio + version: "45.2.0.22026" + device: + type: "smartphone" + brand: Samsung + model: 'Galaxy S21+ 5G' + os_family: Android + browser_family: Unknown diff --git a/regexes/bots.yml b/regexes/bots.yml index e25ac7f2f3..89d6cb6ee2 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -3694,8 +3694,18 @@ category: 'Crawler' url: 'https://github.com/aaamoon/copilot-gpt4-service' +- regex: '^pdrl.fm' + name: 'Podroll Analyzer' + category: 'Crawler' + url: 'https://podroll.fm' + +- regex: 'PodUptime/' + name: 'PodUptime' + category: 'Site Monitor' + url: 'https://poduptime.com' + # Generic detections -- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel|Magellan|CustomUserAgent|TheSafex?Internetx?Search|kirkland-signature|research|project(?!or))' +- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel|Magellan|CustomUserAgent|TheSafex?Internetx?Search|kirkland-signature|research|(? Date: Mon, 29 Jan 2024 14:19:04 -0500 Subject: [PATCH 2/4] Hopefully a fix for occasional android misidentification. --- Tests/fixtures/podcasting.yml | 2 +- regexes/oss.yml | 40 +++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/Tests/fixtures/podcasting.yml b/Tests/fixtures/podcasting.yml index be7a746e0d..316686c391 100644 --- a/Tests/fixtures/podcasting.yml +++ b/Tests/fixtures/podcasting.yml @@ -9256,7 +9256,7 @@ user_agent: 'SM-T970 (compatible; Tablet2.0) HandelsbladProduction, com.twipemobile.nrc 5.1.4 (511) / Android 33' os: name: Android - version: "33" + version: "13" platform: "" client: type: mobile app diff --git a/regexes/oss.yml b/regexes/oss.yml index 688a652d3a..e0b90147b1 100644 --- a/regexes/oss.yml +++ b/regexes/oss.yml @@ -429,6 +429,46 @@ - regex: '(?:(?:Orca-)?(? Date: Wed, 31 Jan 2024 16:54:59 -0500 Subject: [PATCH 3/4] Changing Alexa Media Player -> Alexa, and making it a mediaplayer. --- Tests/fixtures/mobile_apps.yml | 4 ++-- Tests/fixtures/podcasting.yml | 36 ++++++++++++++++---------------- Tests/fixtures/smart_speaker.yml | 4 ++-- regexes/client/mediaplayers.yml | 4 ++++ regexes/client/mobile_apps.yml | 4 ---- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/Tests/fixtures/mobile_apps.yml b/Tests/fixtures/mobile_apps.yml index c6b8991dc6..4f3820b16d 100644 --- a/Tests/fixtures/mobile_apps.yml +++ b/Tests/fixtures/mobile_apps.yml @@ -822,8 +822,8 @@ version: 5.1.1 platform: "" client: - type: mobile app - name: Alexa Media Player + type: mediaplayer + name: Alexa version: 2.0.201528.0 device: type: smart speaker diff --git a/Tests/fixtures/podcasting.yml b/Tests/fixtures/podcasting.yml index 316686c391..e0d455c0fc 100644 --- a/Tests/fixtures/podcasting.yml +++ b/Tests/fixtures/podcasting.yml @@ -1149,8 +1149,8 @@ version: "5.1.1" platform: "" client: - type: mobile app - name: Alexa Media Player + type: mediaplayer + name: Alexa version: "vizzini.locked.38.0" device: type: smart speaker @@ -1165,8 +1165,8 @@ version: "9" platform: "" client: - type: mobile app - name: Alexa Media Player + type: mediaplayer + name: Alexa version: "2.2.303087.0" device: type: "" @@ -1181,8 +1181,8 @@ version: "7.0" platform: "" client: - type: mobile app - name: Alexa Media Player + type: mediaplayer + name: Alexa version: "2.2.2768.0" device: type: "" @@ -1197,8 +1197,8 @@ version: "13" platform: "" client: - type: mobile app - name: Alexa Media Player + type: mediaplayer + name: Alexa version: "2.2.521848.0" device: type: "" @@ -1210,8 +1210,8 @@ user_agent: 'Echo/1.0(APNG)' os: [ ] client: - type: mobile app - name: Alexa Media Player + type: mediaplayer + name: Alexa version: "1.0" device: type: smart speaker @@ -9166,8 +9166,8 @@ user_agent: 'Amazon;Echo_Dot;27d4dfe427b34d57995b463e5d63198d;;tpapi;3.199.422' os: [ ] client: - type: mobile app - name: Alexa Media Player + type: mediaplayer + name: Alexa version: "" device: type: smart speaker @@ -9180,8 +9180,8 @@ user_agent: 'Amazon;Echo_Dot_with_clock;27d4dfe427b34d57995b463e5d63198d;;tpapi;3.199.422' os: [ ] client: - type: mobile app - name: Alexa Media Player + type: mediaplayer + name: Alexa version: "" device: type: smart speaker @@ -9194,8 +9194,8 @@ user_agent: 'Amazon;Echo_Show_5;27d4dfe427b34d57995b463e5d63198d;;tpapi;3.199.422' os: [ ] client: - type: mobile app - name: Alexa Media Player + type: mediaplayer + name: Alexa version: "" device: type: tablet @@ -9208,8 +9208,8 @@ user_agent: 'Amazon;Echo;27d4dfe427b34d57995b463e5d63198d;;tpapi;3.199.422' os: [ ] client: - type: mobile app - name: Alexa Media Player + type: mediaplayer + name: Alexa version: "" device: type: smart speaker diff --git a/Tests/fixtures/smart_speaker.yml b/Tests/fixtures/smart_speaker.yml index bc77bc83ff..bb28d0404e 100644 --- a/Tests/fixtures/smart_speaker.yml +++ b/Tests/fixtures/smart_speaker.yml @@ -55,8 +55,8 @@ user_agent: Echo/1.0(APNG) os: [ ] client: - type: mobile app - name: Alexa Media Player + type: mediaplayer + name: Alexa version: "1.0" device: type: smart speaker diff --git a/regexes/client/mediaplayers.yml b/regexes/client/mediaplayers.yml index ebc0a2de12..7765c034f7 100644 --- a/regexes/client/mediaplayers.yml +++ b/regexes/client/mediaplayers.yml @@ -9,6 +9,10 @@ name: 'Audacious' version: '$1' +- regex: '(?:AlexaMediaPlayer/|^AlexaMediaPlayer/|^Echo/|Amazon;Echo(?:_|;)|^AlexaService/|^Alexa Mobile Voice/)([a-z\d]+\.[a-z.\d]+)?' + name: 'Alexa' + version: '$1' + - regex: 'Banshee(?:[ /]([\d\.]+))?' name: 'Banshee' version: '$1' diff --git a/regexes/client/mobile_apps.yml b/regexes/client/mobile_apps.yml index f913ea72c8..425b318be7 100644 --- a/regexes/client/mobile_apps.yml +++ b/regexes/client/mobile_apps.yml @@ -585,10 +585,6 @@ name: 'Podbean' version: '$1' -- regex: '(?:AlexaMediaPlayer/|^AlexaMediaPlayer/|^Echo/|Amazon;Echo(?:_|;)|^AlexaService/|^Alexa Mobile Voice/)([a-z\d]+\.[a-z.\d]+)?' - name: 'Alexa Media Player' - version: '$1' - - regex: 'TuneIn Radio Pro(?:[^/]*)/(\d+[\.\d]+)' name: 'TuneIn Radio Pro' version: '$1' From 153149765dfc9966bc58bebd165e1dc51691507e Mon Sep 17 00:00:00 2001 From: David McHealy Date: Thu, 1 Feb 2024 14:29:33 -0500 Subject: [PATCH 4/4] More tweaks to android sdk version detection. --- Tests/fixtures/mobile_apps.yml | 2 +- Tests/fixtures/podcasting.yml | 2 +- regexes/oss.yml | 81 ++++++++++------------------------ 3 files changed, 25 insertions(+), 60 deletions(-) diff --git a/Tests/fixtures/mobile_apps.yml b/Tests/fixtures/mobile_apps.yml index 4f3820b16d..7e675b56f4 100644 --- a/Tests/fixtures/mobile_apps.yml +++ b/Tests/fixtures/mobile_apps.yml @@ -915,7 +915,7 @@ user_agent: Podimo/1.20.1 build 148/Android 29 os: name: Android - version: "" + version: "10" platform: "" client: type: mobile app diff --git a/Tests/fixtures/podcasting.yml b/Tests/fixtures/podcasting.yml index e0d455c0fc..872eb14bc7 100644 --- a/Tests/fixtures/podcasting.yml +++ b/Tests/fixtures/podcasting.yml @@ -7212,7 +7212,7 @@ user_agent: 'Podimo/1.11.3 build 91/Android 28' os: name: Android - version: "" + version: "9" platform: "" client: type: mobile app diff --git a/regexes/oss.yml b/regexes/oss.yml index e0b90147b1..865e15eccc 100644 --- a/regexes/oss.yml +++ b/regexes/oss.yml @@ -355,49 +355,53 @@ ########## # Android SDK Level Api ########## -- regex: '(?:Android API \d+|\d+/tclwebkit(?:\d+[\.\d]*))' +- regex: '(?:Android API \d+|\d+/tclwebkit(?:\d+[\.\d]*)|(?:(?