From 2b9e30866eca052641f205c904743f31d971dae9 Mon Sep 17 00:00:00 2001 From: Sean Callan Date: Mon, 24 Jun 2024 20:32:51 -0400 Subject: [PATCH] feat: Update pattern file --- priv/patterns.yml | 358 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 316 insertions(+), 42 deletions(-) diff --git a/priv/patterns.yml b/priv/patterns.yml index 7065068..5459ca1 100644 --- a/priv/patterns.yml +++ b/priv/patterns.yml @@ -78,6 +78,9 @@ user_agent_parsers: # 'Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/534.34 (KHTML, like Gecko) PingdomTMS/0.8.5 Safari/534.34' - regex: '(PingdomTMS)/(\d+)\.(\d+)\.(\d+)' family_replacement: 'PingdomBot' + # 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/61.0.3163.100 Chrome/61.0.3163.100 Safari/537.36 PingdomPageSpeed/1.0 (pingbot/2.0; +http://www.pingdom.com/)' + - regex: '(PingdomPageSpeed)/(\d+)\.(\d+)' + family_replacement: 'PingdomBot' # PTST / WebPageTest.org crawlers - regex: ' (PTST)/(\d+)(?:\.(\d+)|)$' @@ -128,7 +131,7 @@ user_agent_parsers: # Bots Pattern 'name/0.0.0' - regex: '/((?:Ant-|)Nutch|[A-z]+[Bb]ot|[A-z]+[Ss]pider|Axtaris|fetchurl|Isara|ShopSalad|Tailsweep)[ \-](\d+)(?:\.(\d+)|)(?:\.(\d+)|)' # Bots Pattern 'name/0.0.0' - - regex: '\b(008|Altresium|Argus|BaiduMobaider|BoardReader|DNSGroup|DataparkSearch|EDI|Goodzer|Grub|INGRID|Infohelfer|LinkedInBot|LOOQ|Nutch|OgScrper|PathDefender|Peew|PostPost|Steeler|Twitterbot|VSE|WebCrunch|WebZIP|Y!J-BR[A-Z]|YahooSeeker|envolk|sproose|wminer)/(\d+)(?:\.(\d+)|)(?:\.(\d+)|)' + - regex: '\b(008|Altresium|Argus|BaiduMobaider|BoardReader|DNSGroup|DataparkSearch|EDI|Goodzer|Grub|INGRID|Infohelfer|LinkedInBot|LOOQ|Nutch|OgScrper|Pandora|PathDefender|Peew|PostPost|Steeler|Twitterbot|VSE|WebCrunch|WebZIP|Y!J-BR[A-Z]|YahooSeeker|envolk|sproose|wminer)/(\d+)(?:\.(\d+)|)(?:\.(\d+)|)' # MSIECrawler - regex: '(MSIE) (\d+)\.(\d+)([a-z]\d|[a-z]|);.{0,200} MSIECrawler' @@ -138,7 +141,7 @@ user_agent_parsers: - regex: '(DAVdroid)/(\d+)\.(\d+)(?:\.(\d+)|)' # Downloader ... - - regex: '(Google-HTTP-Java-Client|Apache-HttpClient|Go-http-client|scalaj-http|http%20client|Python-urllib|HttpMonitor|TLSProber|WinHTTP|JNLP|okhttp|aihttp|reqwest|axios|unirest-(?:java|python|ruby|nodejs|php|net))(?:[ /](\d+)(?:\.(\d+)|)(?:\.(\d+)|)|)' + - regex: '(Google-HTTP-Java-Client|Apache-HttpClient|PostmanRuntime|Go-http-client|scalaj-http|http%20client|Python-urllib|HttpMonitor|TLSProber|WinHTTP|JNLP|okhttp|aihttp|reqwest|axios|unirest-(?:java|python|ruby|nodejs|php|net))(?:[ /](\d+)(?:\.(\d+)|)(?:\.(\d+)|)|)' # Pinterestbot - regex: '(Pinterest(?:bot|))/(\d+)(?:\.(\d+)|)(?:\.(\d+)|)[;\s(]+\+https://www.pinterest.com/bot.html' @@ -213,11 +216,15 @@ user_agent_parsers: - regex: '(Twitter for (?:iPhone|iPad)|TwitterAndroid)(?:\/(\d+)\.(\d+)|)' family_replacement: 'Twitter' - # aspiegel.com spider (owned by Huawei) - - regex: 'Mozilla.{1,100}Mobile.{1,100}AspiegelBot' + # Phantom app + - regex: 'Mozilla.{1,200}Mobile.{1,100}(Phantom\/ios|Phantom\/android).(\d+)\.(\d+)\.(\d+)' + family_replacement: 'Phantom' + + # aspiegel.com spider (owned by Huawei, later called PetalBot) + - regex: 'Mozilla.{1,100}Mobile.{1,100}(AspiegelBot|PetalBot)' family_replacement: 'Spider' - - regex: 'AspiegelBot' + - regex: 'AspiegelBot|PetalBot' family_replacement: 'Spider' # Basilisk @@ -371,6 +378,29 @@ user_agent_parsers: - regex: '(Nintendo 3DS)' family_replacement: 'NetFront NX' + # Huawei Browser, should go before Safari and Chrome Mobile + - regex: '(HuaweiBrowser)/(\d+)\.(\d+)\.(\d+)\.\d+' + family_replacement: 'Huawei Browser' + + # AVG + - regex: '(AVG)/(\d+)\.(\d+)\.(\d+)\.\d+' + family_replacement: 'AVG' + + # Avast + - regex: '(AvastSecureBrowser|Avast)/(\d+)\.(\d+)\.(\d+)' + family_replacement: 'Avast Secure Browser' + + # Instabridge + - regex: '(Instabridge)/(\d+)(?:\.(\d+)|)(?:\.(\d+)|)' + + # Aloha Browser + - regex: '(AlohaBrowser)/(\d+)\.(\d+)\.(\d+)(?:\.(\d+)|)' + family_replacement: 'Aloha Browser' + + # Brave Browser https://brave.com/ , should go before Safari and Chrome Mobile + - regex: '((?:B|b)rave(?:\sChrome)?)/(\d+)(?:\.(\d+)|)(?:\.(\d+)|)(?:\.(\d+)|)' + family_replacement: 'Brave' + # Amazon Silk, should go before Safari and Chrome Mobile - regex: '(Silk)/(\d+)\.(\d+)(?:\.([0-9\-]+)|)' family_replacement: 'Amazon Silk' @@ -381,9 +411,13 @@ user_agent_parsers: # Edge Mobile - regex: 'Windows Phone .{0,200}(Edge)/(\d+)\.(\d+)' family_replacement: 'Edge Mobile' - - regex: '(EdgiOS|EdgA)/(\d+)\.(\d+)\.(\d+)\.(\d+)' + - regex: '(EdgiOS|EdgA)/(\d+)(?:\.(\d+)|)(?:\.(\d+)|)(?:\.(\d+)|)' family_replacement: 'Edge Mobile' + # Oculus Browser, should go before Samsung Internet + - regex: '(OculusBrowser)/(\d+)\.(\d+).0.0(?:\.([0-9\-]+)|)' + family_replacement: 'Oculus Browser' + # Samsung Internet (based on Chrome, but lacking some features) - regex: '(SamsungBrowser)/(\d+)\.(\d+)' family_replacement: 'Samsung Internet' @@ -439,13 +473,25 @@ user_agent_parsers: family_replacement: 'QQ Browser' # DuckDuckGo - - regex: 'Mobile.{0,200}(DuckDuckGo)/(\d+)' + - regex: 'Mozilla.{1,200}Mobile.{1,100}(DuckDuckGo)/(\d+)' family_replacement: 'DuckDuckGo Mobile' + - regex: 'Mozilla.{1,200}(DuckDuckGo)/(\d+)' + family_replacement: 'DuckDuckGo' + - regex: 'Mozilla.{1,200}Mobile.{1,100}(Ddg)/(\d+)(?:\.(\d+)|)' + family_replacement: 'DuckDuckGo Mobile' + - regex: 'Mozilla.{1,200}(Ddg)/(\d+)(?:\.(\d+)|)' + family_replacement: 'DuckDuckGo' # Tenta Browser - regex: '(Tenta/)(\d+)\.(\d+)\.(\d+)' family_replacement: 'Tenta Browser' + # Ecosia on iOS / Android + - regex: '(Ecosia) ios@(\d+)\.(\d+)\.(\d+)\.(\d+)' + family_replacement: 'Ecosia iOS' + - regex: '(Ecosia) android@(\d+)\.(\d+)\.(\d+)\.(\d+)' + family_replacement: 'Ecosia Android' + # Chrome Mobile - regex: 'Version/.{1,300}(Chrome)/(\d+)\.(\d+)\.(\d+)\.(\d+)' family_replacement: 'Chrome Mobile WebView' @@ -453,7 +499,7 @@ user_agent_parsers: family_replacement: 'Chrome Mobile WebView' - regex: '(CrMo)/(\d+)\.(\d+)\.(\d+)\.(\d+)' family_replacement: 'Chrome Mobile' - - regex: '(CriOS)/(\d+)\.(\d+)\.(\d+)\.(\d+)' + - regex: '(CriOS)/(\d+)(?:\.(\d+)|)(?:\.(\d+)|)(?:\.(\d+)|)' family_replacement: 'Chrome Mobile iOS' - regex: '(Chrome)/(\d+)\.(\d+)\.(\d+)\.(\d+) Mobile(?:[ /]|$)' family_replacement: 'Chrome Mobile' @@ -485,6 +531,8 @@ user_agent_parsers: # Yandex Browser - regex: '(YaBrowser)/(\d+)\.(\d+)\.(\d+)' family_replacement: 'Yandex Browser' + - regex: '(YaSearchBrowser)/(\d+)\.(\d+)\.(\d+)' + family_replacement: 'Yandex Browser' # Mail.ru Amigo/Internet Browser (Chromium-based) - regex: '(Chrome)/(\d+)\.(\d+)\.(\d+).{0,100} MRCHROME' @@ -517,6 +565,10 @@ user_agent_parsers: # Palo Alto GlobalProtect Linux - regex: 'PAN (GlobalProtect)/(\d+)\.(\d+)\.(\d+) .{1,100} \(X11; Linux x86_64\)' + # Surveyon https://www.surveyon.com/ + - regex: '^(surveyon)/(\d+)\.(\d+)\.(\d+)' + family_replacement: 'Surveyon' + #### END SPECIAL CASES TOP #### #### MAIN CASES - this catches > 50% of all browsers #### @@ -585,18 +637,14 @@ user_agent_parsers: - regex: 'Superhuman' family_replacement: 'Superhuman' - # Vivaldi uses "Vivaldi" - - regex: '(Vivaldi)/(\d+)\.(\d+)\.(\d+)' + # Vivaldi + - regex: '(Vivaldi)/(\d+)(?:\.(\d+)|)(?:\.(\d+)|)' # Edge/major_version.minor_version # Edge with chromium Edg/major_version.minor_version.patch.minor_patch - regex: '(Edge?)/(\d+)(?:\.(\d+)|)(?:\.(\d+)|)(?:\.(\d+)|)' family_replacement: 'Edge' - # Brave Browser https://brave.com/ - - regex: '(brave)/(\d+)\.(\d+)\.(\d+) Chrome' - family_replacement: 'Brave' - # Iron Browser ~since version 50 - regex: '(Chrome)/(\d+)\.(\d+)\.(\d+)[\d.]{0,100} Iron[^/]' family_replacement: 'Iron' @@ -913,7 +961,7 @@ user_agent_parsers: # AFTER THE EDGE CASES ABOVE! # AFTER IE11 # BEFORE all other IE - - regex: '(Firefox)/(\d+)\.(\d+)\.(\d+)' + - regex: '(Firefox)/(\d+)\.(\d+)(?:\.(\d+)|$)' - regex: '(Firefox)/(\d+)\.(\d+)(pre|[ab]\d+[a-z]*|)' @@ -924,7 +972,7 @@ user_agent_parsers: family_replacement: 'Python Requests' # headless user-agents - - regex: '\b(Windows-Update-Agent|WindowsPowerShell|Microsoft-CryptoAPI|SophosUpdateManager|SophosAgent|Debian APT-HTTP|Ubuntu APT-HTTP|libcurl-agent|libwww-perl|urlgrabber|curl|PycURL|Wget|wget2|aria2|Axel|OpenBSD ftp|lftp|jupdate|insomnia|fetch libfetch|akka-http|got|CloudCockpitBackend|ReactorNetty|axios|Jersey|Vert.x-WebClient|Apache-CXF|Go-CF-client|go-resty|AHC)(?:[ /](\d+)(?:\.(\d+)|)(?:\.(\d+)|)|)' + - regex: '\b(Windows-Update-Agent|WindowsPowerShell|Microsoft-CryptoAPI|SophosUpdateManager|SophosAgent|Debian APT-HTTP|Ubuntu APT-HTTP|libcurl-agent|libwww-perl|urlgrabber|curl|PycURL|Wget|wget2|aria2|Axel|OpenBSD ftp|lftp|jupdate|insomnia|fetch libfetch|akka-http|got|CloudCockpitBackend|ReactorNetty|axios|Jersey|Vert.x-WebClient|Apache-CXF|Go-CF-client|go-resty|AHC|HTTPie)(?:[ /](\d+)(?:\.(\d+)|)(?:\.(\d+)|)|)' # CloudFoundry - regex: '^(cf)\/(\d+)\.(\d+)\.(\S+)' @@ -953,11 +1001,29 @@ user_agent_parsers: - regex: '(Python/3\.\d{1,3} aiohttp)/(\d+)\.(\d+)\.(\d+)' family_replacement: 'Python aiohttp' - - regex: '(Java)[/ ]?\d+\.(\d+)\.(\d+)[_-]*([a-zA-Z0-9]+|)' + - regex: '(Java)[/ ]?\d{1}\.(\d+)\.(\d+)[_-]*([a-zA-Z0-9]+|)' + + - regex: '(Java)[/ ]?(\d+)\.(\d+)\.(\d+)' + + # minio-go (https://github.com/minio/minio-go) + - regex: '(minio-go)/v(\d+)\.(\d+)\.(\d+)' + + # ureq - minimal request library in rust (https://github.com/algesten/ureq) + - regex: '^(ureq)[/ ](\d+)\.(\d+).(\d+)' + + # http.rb - HTTP (The Gem! a.k.a. http.rb) - a fast Ruby HTTP client + # (https://github.com/httprb/http/blob/3aa7470288deb81f7d7b982c1e2381871049dcbb/lib/http/request.rb#L27) + - regex: '^(http\.rb)/(\d+)\.(\d+).(\d+)' + + # Guzzle, PHP HTTP client (https://docs.guzzlephp.org/) + - regex: '^(GuzzleHttp)/(\d+)\.(\d+).(\d+)' + + # lorien/grab - Web Scraping Framework (https://github.com/lorien/grab) + - regex: '^(grab)\b' # Cloud Storage Clients - regex: '^(Cyberduck)/(\d+)\.(\d+)\.(\d+)(?:\.\d+|)' - - regex: '^(S3 Browser) (\d+)-(\d+)-(\d+)(?:\s*http://s3browser\.com|)' + - regex: '^(S3 Browser) (\d+)[.-](\d+)[.-](\d+)(?:\s*https?://s3browser\.com|)' - regex: '(S3Gof3r)' # IBM COS (Cloud Object Storage) API - regex: '\b(ibm-cos-sdk-(?:core|java|js|python))/(\d+)\.(\d+)(?:\.(\d+)|)' @@ -980,6 +1046,22 @@ user_agent_parsers: - regex: '^(ViaFree|Viafree)-(?:tvOS-)?[A-Z]{2}/(\d+)\.(\d+)\.(\d+)' family_replacement: 'ViaFree' + # Transmit (https://library.panic.com/transmit/) + - regex: '(Transmit)/(\d+)\.(\d+)\.(\d+)' + + # Download Master (https://downloadmaster.ru/) + - regex: '(Download Master)' + + # HTTrack crawler + - regex: '\b(HTTrack) (\d+)\.(\d+)(?:[\.\-](\d+)|)' + + # Ladybird Browser (https://ladybird.dev) + # https://github.com/SerenityOS/serenity/blob/6a662e0d43810c1dbd56fbf0c123f258aa1d694e/Userland/Libraries/LibWeb/Loader/ResourceLoader.h#L64 + - regex: '(Ladybird)\/(\d+)\.(\d+)' + + # MullvadBrowser (https://mullvad.net/en/browser) + - regex: '(MullvadBrowser)/(\d+)(?:\.(\d+)|)(?:\.(\d+)|)' + os_parsers: ########## # HbbTV vendors @@ -1039,8 +1121,8 @@ os_parsers: # generic HbbTV, hoping to catch manufacturer name (always after 2nd comma) and the first string that looks like a 2011-2019 year - regex: 'HbbTV/\d+\.\d+\.\d+ \(.{0,30}; ?([a-zA-Z]+) ?;.{0,30}(201[1-9]).{0,30}\)' - # aspiegel.com spider (owned by Huawei) - - regex: 'AspiegelBot' + # aspiegel.com spider (owned by Huawei, later renamed PetalBot) + - regex: 'AspiegelBot|PetalBot' os_replacement: 'Other' ########## @@ -1063,6 +1145,29 @@ os_parsers: # Special case for new ArcGIS Mobile products - regex: 'ArcGISRuntime-(?:Android|iOS)\/\d+\.\d+(?:\.\d+|) \((Android|iOS) (\d+)(?:\.(\d+)(?:\.(\d+)|)|);' + ########## + # Chromecast + ########## + # Ex: Mozilla/5.0 (Linux; Android 12.0; Build/STTL.240206.002) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.0 Safari/537.36 CrKey/1.56.500000 DeviceType/AndroidTV + # These are the newer Android-based "Google TV" Chromecast devices. + # Google stopped updating the Chromecast firmware version in these, so they always say CrKey/1.56.500000. Therefore we extract the more useful Android version instead. + - regex: '(Android) (\d+)(?:\.(\d+)).*CrKey' + os_replacement: 'Chromecast Android' + + # Ex: Mozilla/5.0 (Fuchsia) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 CrKey/1.56.500000 + # These are some intermediate "Nest Hub" Chromecast devices running Fuchsia. + - regex: 'Fuchsia.*(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|)' + os_replacement: 'Chromecast Fuchsia' + + # Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/SmartSpeaker + - regex: 'Linux.*(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|).*DeviceType/SmartSpeaker' + os_replacement: 'Chromecast SmartSpeaker' + + # Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/Chromecast + # These are the oldest Chromecast devices that ran Linux. + - regex: 'Linux.*(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|)' + os_replacement: 'Chromecast Linux' + ########## # Android # can actually detect rooted android os. do we care? @@ -1090,6 +1195,7 @@ os_parsers: # Android 9; Android 10; - regex: '(Android) (\d+);' + - regex: '(Android): (\d+)(?:\.(\d+)(?:\.(\d+)|)|);' # UCWEB - regex: '^UCWEB.{0,200}; (Adr) (\d+)\.(\d+)(?:[.\-]([a-z0-9]{1,100})|);' @@ -1106,6 +1212,12 @@ os_parsers: - regex: '(android)\s(?:mobile\/)(\d+)(?:\.(\d+)(?:\.(\d+)|)|)' os_replacement: 'Android' + ########## + # Meta Quest + ########## + - regex: 'Quest' + os_replacement: 'Android' + ########## # Kindle Android ########## @@ -1219,6 +1331,10 @@ os_parsers: ########## - regex: '(Tizen)[/ ](\d+)\.(\d+)' + # Chrome and Edge on iOS with desktop mode contains Mac OS X, so it must be before any Mac OS check + - regex: 'Intel Mac OS X.+(CriOS|EdgiOS)/\d+' + os_replacement: 'iOS' + ########## # Mac OS # @ref: http://en.wikipedia.org/wiki/Mac_OS_X#Versions @@ -1280,7 +1396,7 @@ os_parsers: - regex: '(Apple\s?TV)(?:/(\d+)\.(\d+)|)' os_replacement: 'ATV OS X' - - regex: '(CPU[ +]OS|iPhone[ +]OS|CPU[ +]iPhone|CPU IPhone OS)[ +]+(\d+)[_\.](\d+)(?:[_\.](\d+)|)' + - regex: '(CPU[ +]OS|iPhone[ +]OS|CPU[ +]iPhone|CPU IPhone OS|CPU iPad OS)[ +]+(\d+)[_\.](\d+)(?:[_\.](\d+)|)' os_replacement: 'iOS' # remaining cases are mostly only opera uas, so catch opera as to not catch iphone spoofs @@ -1491,12 +1607,94 @@ os_parsers: os_replacement: 'iOS' os_v1_replacement: '13' os_v2_replacement: '5' - - regex: 'CFNetwork/11.{0,100} Darwin/(19)\.\d+' + - regex: 'CFNetwork/11.{0,100} Darwin/19\.6\.\d+' os_replacement: 'iOS' os_v1_replacement: '13' - - regex: 'CFNetwork/11.{0,100} Darwin/20\.0\.\d+' + os_v2_replacement: '6' + - regex: 'CFNetwork/1[01].{0,100} Darwin/19\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '13' + - regex: 'CFNetwork/12.{0,100} Darwin/20\.1\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '14' + os_v2_replacement: '2' + - regex: 'CFNetwork/12.{0,100} Darwin/20\.2\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '14' + os_v2_replacement: '3' + - regex: 'CFNetwork/12.{0,100} Darwin/20\.3\.\d+' os_replacement: 'iOS' os_v1_replacement: '14' + os_v2_replacement: '4' + - regex: 'CFNetwork/12.{0,100} Darwin/20\.4\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '14' + os_v2_replacement: '5' + - regex: 'CFNetwork/12.{0,100} Darwin/20\.5\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '14' + os_v2_replacement: '6' + - regex: 'CFNetwork/12.{0,100} Darwin/20\.6\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '14' + os_v2_replacement: '8' + - regex: 'CFNetwork/.{0,100} Darwin/(20)\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '14' + - regex: 'CFNetwork/13.{0,100} Darwin/21\.0\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '15' + os_v2_replacement: '0' + - regex: 'CFNetwork/13.{0,100} Darwin/21\.1\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '15' + os_v2_replacement: '1' + - regex: 'CFNetwork/13.{0,100} Darwin/21\.2\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '15' + os_v2_replacement: '2' + - regex: 'CFNetwork/13.{0,100} Darwin/21\.3\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '15' + os_v2_replacement: '3' + - regex: 'CFNetwork/13.{0,100} Darwin/21\.4\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '15' + os_v2_replacement: '4' + - regex: 'CFNetwork/13.{0,100} Darwin/21\.5\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '15' + os_v2_replacement: '5' + - regex: 'CFNetwork/13.{0,100} Darwin/21\.6\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '15' + os_v2_replacement: '6' + - regex: 'CFNetwork/.{0,100} Darwin/(21)\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '15' + - regex: 'CFNetwork/.{0,100} Darwin/22\.0\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '16' + os_v2_replacement: '0' + - regex: 'CFNetwork/.{0,100} Darwin/22\.1\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '16' + os_v2_replacement: '1' + - regex: 'CFNetwork/.{0,100} Darwin/22\.2\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '16' + os_v2_replacement: '2' + - regex: 'CFNetwork/.{0,100} Darwin/22\.3\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '16' + os_v2_replacement: '3' + - regex: 'CFNetwork/.{0,100} Darwin/22\.4\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '16' + os_v2_replacement: '4' + - regex: 'CFNetwork/.{0,100} Darwin/(22)\.\d+' + os_replacement: 'iOS' + os_v1_replacement: '16' - regex: 'CFNetwork/.{0,100} Darwin/' os_replacement: 'iOS' @@ -1508,7 +1706,7 @@ os_parsers: ########## # Apple Watch ########## - - regex: '(watchOS)/(\d+)\.(\d+)(?:\.(\d+)|)' + - regex: '(watchOS)[/ ](\d+)\.(\d+)(?:\.(\d+)|)' os_replacement: 'WatchOS' ########################## @@ -1651,12 +1849,6 @@ os_parsers: - regex: '(WebTV)/(\d+).(\d+)' - ########## - # Chromecast - ########## - - regex: '(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|)' - os_replacement: 'Chromecast' - ########## # Misc mobile ########## @@ -1679,7 +1871,7 @@ os_parsers: # just os - regex: '(Windows|Android|WeTab|Maemo|Web0S)' - - regex: '(Ubuntu|Kubuntu|Arch Linux|CentOS|Slackware|Gentoo|openSUSE|SUSE|Red Hat|Fedora|PCLinuxOS|Mageia|(?:Free|Open|Net|\b)BSD)' + - regex: '(Ubuntu|Kubuntu|Arch Linux|CentOS|Slackware|Gentoo|openSUSE|SUSE|Red Hat|Fedora|PCLinuxOS|Mageia|SerenityOS|(?:Free|Open|Net|\b)BSD)' # Linux + Kernel Version - regex: '(Linux)(?:[ /](\d+)\.(\d+)(?:\.(\d+)|)|)' - regex: 'SunOS' @@ -1725,11 +1917,11 @@ device_parsers: brand_replacement: 'Spider' # aspiegel.com spider (owned by Huawei) - - regex: 'Mozilla.{1,100}Mobile.{1,100}AspiegelBot' + - regex: 'Mozilla.{1,100}Mobile.{1,100}(AspiegelBot|PetalBot)' device_replacement: 'Spider' brand_replacement: 'Spider' model_replacement: 'Smartphone' - - regex: 'Mozilla.{0,200}AspiegelBot' + - regex: 'Mozilla.{0,200}(AspiegelBot|PetalBot)' device_replacement: 'Spider' brand_replacement: 'Spider' model_replacement: 'Desktop' @@ -2290,6 +2482,39 @@ device_parsers: brand_replacement: 'ChangJia' model_replacement: '$1' + ########## + # Chromecast + # @ref: https://en.wikipedia.org/wiki/Chromecast#Hardware_and_design + ########## + # Ex: Mozilla/5.0 (Linux; Android 12.0; Build/STTL.240206.002) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.0 Safari/537.36 CrKey/1.56.500000 DeviceType/AndroidTV + # Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/SmartSpeaker + # Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/Chromecast + # These are the newer Chromecast devices, such as smart speakers, Google TVs, etc. that have an explicit device type. + - regex: 'CrKey.*DeviceType/([^/]*)' + brand_replacement: 'Google' + device_replacement: 'Chromecast' + model_replacement: '$1' + + # Ex: Mozilla/5.0 (Fuchsia) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 CrKey/1.56.500000 + # These are some intermediate "Nest Hub" Chromecast devices running Fuchsia. + - regex: 'Fuchsia.*CrKey' + brand_replacement: 'Google' + device_replacement: 'Chromecast' + model_replacement: 'Nest Hub' + + # Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.47 Safari/537.36 CrKey/1.36.159268 + # These are the first generation of Chromecast devices that ran Linux. They don't specify a device type. + - regex: 'Linux.*CrKey/1.36' + brand_replacement: 'Google' + device_replacement: 'Chromecast' + model_replacement: 'First Generation' + + # We have no data on the user agent strings of other models, except that they all report CrKey/ + - regex: 'CrKey/' + brand_replacement: 'Google' + device_replacement: 'Chromecast' + model_replacement: 'Chromecast' + ######### # Cloudfone # @ref: http://www.cloudfonemobile.com/ @@ -2411,10 +2636,6 @@ device_parsers: device_replacement: 'Dell $1' brand_replacement: 'Dell' model_replacement: '$1' - - regex: '; {0,2}Dell ([^;/]{1,100}?)(?: Build|\) AppleWebKit)' - device_replacement: 'Dell $1' - brand_replacement: 'Dell' - model_replacement: '$1' ######### # Denver @@ -2621,10 +2842,14 @@ device_parsers: device_replacement: '$1' brand_replacement: 'Google' model_replacement: '$1' - - regex: '; {0,2}(Pixel.{0,200}?)(?: Build|\) AppleWebKit)' - device_replacement: '$1' + - regex: '; {0,2}([g|G]oogle)? (Pixel[ a-zA-z0-9]{1,100});(?: Build|.{0,50}\) AppleWebKit)' + device_replacement: '$2' brand_replacement: 'Google' - model_replacement: '$1' + model_replacement: '$2' + - regex: '; {0,2}([g|G]oogle)? (Pixel.{0,200}?)(?: Build|\) AppleWebKit)' + device_replacement: '$2' + brand_replacement: 'Google' + model_replacement: '$2' ######### # Gigabyte @@ -2801,6 +3026,10 @@ device_parsers: device_replacement: 'Huawei $1' brand_replacement: 'Huawei' model_replacement: '$1' + - regex: '; {0,2}([^;]{1,200}) Build/(HONOR|Honor)' + device_replacement: 'Huawei Honor $1' + brand_replacement: 'Huawei' + model_replacement: 'Honor $1' ######### # HTC @@ -3438,6 +3667,30 @@ device_parsers: brand_replacement: 'Meizu' model_replacement: '$1' + ######### + # Meta + # @ref: https://www.meta.com + ######### + - regex: 'Quest 3' + device_replacement: 'Quest' + brand_replacement: 'Meta' + model_replacement: 'Quest 3' + + - regex: 'Quest 2' + device_replacement: 'Quest' + brand_replacement: 'Meta' + model_replacement: 'Quest 2' + + - regex: 'Quest Pro' + device_replacement: 'Quest' + brand_replacement: 'Meta' + model_replacement: 'Quest Pro' + + - regex: 'Quest' + device_replacement: 'Quest' + brand_replacement: 'Meta' + model_replacement: 'Quest' + ######### # Micromax # @ref: http://www.micromaxinfo.com @@ -3747,6 +4000,18 @@ device_parsers: device_replacement: 'OnePlus $1' brand_replacement: 'OnePlus' model_replacement: '$1' + - regex: '; {0,2}(HD1903|GM1917|IN2025|LE2115|LE2127|HD1907|BE2012|BE2025|BE2026|BE2028|BE2029|DE2117|DE2118|EB2101|GM1900|GM1910|GM1915|HD1905|HD1925|IN2015|IN2017|IN2019|KB2005|KB2007|LE2117|LE2125|BE2015|GM1903|HD1900|HD1901|HD1910|HD1913|IN2010|IN2013|IN2020|LE2111|LE2120|LE2121|LE2123|BE2011|IN2023|KB2003|LE2113|NE2215|DN2101)(?: Build|\) AppleWebKit)' + device_replacement: 'OnePlus $1' + brand_replacement: 'OnePlus' + model_replacement: 'OnePlus $1' + - regex: '; (OnePlus[ a-zA-z0-9]{0,50});((?: Build|.{0,50}\) AppleWebKit))' + device_replacement: '$1' + brand_replacement: 'OnePlus' + model_replacement: '$1' + - regex: '; (OnePlus[ a-zA-z0-9]{0,50})((?: Build|\) AppleWebKit))' + device_replacement: '$1' + brand_replacement: 'OnePlus' + model_replacement: '$1' ######### # Orion @@ -5427,6 +5692,15 @@ device_parsers: device_replacement: 'Motorola $1' brand_replacement: 'Motorola' model_replacement: '$1' + - regex: '; (moto[ a-zA-z0-9()]{0,50});((?: Build|.{0,50}\) AppleWebKit))' + device_replacement: '$1' + brand_replacement: 'Motorola' + model_replacement: '$1' + - regex: '; {0,2}(moto)(.{0,50})(?: Build|\) AppleWebKit)' + device_replacement: 'Motorola$2' + brand_replacement: 'Motorola' + model_replacement: '$2' + ########## # nintendo @@ -5435,7 +5709,7 @@ device_parsers: device_replacement: 'Nintendo Wii U' brand_replacement: 'Nintendo' model_replacement: 'Wii U' - - regex: 'Nintendo (DS|3DS|DSi|Wii);' + - regex: 'Nintendo (Switch|DS|3DS|DSi|Wii);' device_replacement: 'Nintendo $1' brand_replacement: 'Nintendo' model_replacement: '$1' @@ -5680,4 +5954,4 @@ device_parsers: - regex: 'Mac OS' device_replacement: 'Mac' brand_replacement: 'Apple' - model_replacement: 'Mac' + model_replacement: 'Mac' \ No newline at end of file