Skip to content

Commit

Permalink
Merge pull request #419 from sebastian-nagel/user-agents-from-s3-logs-2
Browse files Browse the repository at this point in the history
Add user agents from S3 logs
  • Loading branch information
elsigh authored Nov 5, 2019
2 parents bb7d6b7 + 1ffc5c2 commit 7d5f5d9
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 6 deletions.
18 changes: 15 additions & 3 deletions regexes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ user_agent_parsers:
- regex: '(DAVdroid)/(\d+)\.(\d+)(?:\.(\d+)|)'

# Downloader ...
- regex: '(Google-HTTP-Java-Client|Apache-HttpClient|Go-http-client|scalaj-http|http%20client|Python-urllib|HttpMonitor|TLSProber|WinHTTP|JNLP|okhttp|aihttp|reqwest)(?:[ /](\d+)(?:\.(\d+)|)(?:\.(\d+)|)|)'
- regex: '(Google-HTTP-Java-Client|Apache-HttpClient|Go-http-client|scalaj-http|http%20client|Python-urllib|HttpMonitor|TLSProber|WinHTTP|JNLP|okhttp|aihttp|reqwest|axios|unirest-(?:java|python|ruby|nodejs|php|net))(?:[ /](\d+)(?:\.(\d+)|)(?:\.(\d+)|)|)'

# Pinterestbot
- regex: '(Pinterest(?:bot|))/(\d+)(?:\.(\d+)|)(?:\.(\d+)|)[;\s(]+\+https://www.pinterest.com/bot.html'
Expand All @@ -99,7 +99,7 @@ user_agent_parsers:

# AWS S3 Clients
# must come before "Bots General matcher" to catch "boto"/"boto3" before "bot"
- regex: '\b(Boto3?|JetS3t|aws-(?:cli|sdk-(?:cpp|go|java|nodejs|ruby2?))|s3fs)/(\d+)\.(\d+)(?:\.(\d+)|)'
- regex: '\b(Boto3?|JetS3t|aws-(?:cli|sdk-(?:cpp|go|java|nodejs|ruby2?|dotnet-(?:\d{1,2}|core)))|s3fs)/(\d+)\.(\d+)(?:\.(\d+)|)'

# Facebook
# Must come before "Bots General matcher" to catch OrangeBotswana
Expand Down Expand Up @@ -810,13 +810,23 @@ user_agent_parsers:
family_replacement: 'Python Requests'

# headless user-agents
- regex: '\b(Windows-Update-Agent|Microsoft-CryptoAPI|SophosUpdateManager|SophosAgent|Debian APT-HTTP|Ubuntu APT-HTTP|libcurl-agent|libwww-perl|urlgrabber|curl|PycURL|Wget|aria2|Axel|OpenBSD ftp|lftp|jupdate|insomnia)(?:[ /](\d+)(?:\.(\d+)|)(?:\.(\d+)|)|)'
- regex: '\b(Windows-Update-Agent|Microsoft-CryptoAPI|SophosUpdateManager|SophosAgent|Debian APT-HTTP|Ubuntu APT-HTTP|libcurl-agent|libwww-perl|urlgrabber|curl|PycURL|Wget|aria2|Axel|OpenBSD ftp|lftp|jupdate|insomnia|fetch libfetch|akka-http|got)(?:[ /](\d+)(?:\.(\d+)|)(?:\.(\d+)|)|)'

# Asynchronous HTTP Client/Server for asyncio and Python (https://aiohttp.readthedocs.io/)
- regex: '(Python/3\.\d{1,3} aiohttp)/(\d+)\.(\d+)\.(\d+)'
# Asynchronous HTTP Client/Server for asyncio and Python (https://aiohttp.readthedocs.io/)
- regex: '(Python/3\.\d{1,3} aiohttp)/(\d+)\.(\d+)\.(\d+)'

- regex: '(Java)[/ ]{0,1}\d+\.(\d+)\.(\d+)[_-]*([a-zA-Z0-9]+|)'

# Cloud Storage Clients
- regex: '^(Cyberduck)/(\d+)\.(\d+)\.(\d+)(?:\.\d+|)'
- regex: '^(S3 Browser) (\d+)-(\d+)-(\d+)(?:\s*http://s3browser\.com|)'
- regex: '(S3Gof3r)'
# IBM COS (Cloud Object Storage) API
- regex: '\b(ibm-cos-sdk-(?:core|java|js|python))/(\d+)\.(\d+)(?:\.(\d+)|)'
# rusoto - Rusoto - AWS SDK for Rust - https://github.com/rusoto/rusoto
- regex: '^(rusoto)/(\d+)\.(\d+)\.(\d+)'
# rclone - rsync for cloud storage - https://rclone.org/
- regex: '^(rclone)/v(\d+)\.(\d+)'

Expand Down Expand Up @@ -1493,6 +1503,8 @@ os_parsers:
os_replacement: 'Red Hat'
- regex: '\((freebsd)(\d+)\.(\d+)\)'
os_replacement: 'FreeBSD'
- regex: 'linux'
os_replacement: 'Linux'

# Roku Digital-Video-Players https://www.roku.com/
- regex: '^(Roku)/DVP-(\d+)\.(\d+)'
Expand Down
6 changes: 3 additions & 3 deletions test_resources/pgts_browser_list.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4159,9 +4159,9 @@ test_cases:
patch:

- user_agent_string: 'fetch libfetch/2.0'
family: 'Other'
major:
minor:
family: 'fetch libfetch'
major: '2'
minor: '0'
patch:

- user_agent_string: 'libwww-perl/5.79'
Expand Down
21 changes: 21 additions & 0 deletions tests/test_os.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2700,6 +2700,27 @@ test_cases:
patch:
patch_minor:

- user_agent_string: 'ibm-cos-sdk-java/2.3.0 Linux/4.9.0-8-amd64 Java_HotSpot(TM)_64-Bit_Server_VM/9.0.4+11/9.0.4'
family: 'Linux'
major: '4'
minor: '9'
patch: '0'
patch_minor:

- user_agent_string: 'aws-sdk-dotnet-45/3.3.11.0 aws-sdk-dotnet-core/3.3.17.10 .NET_Runtime/4.0 .NET_Framework/4.0 OS/Microsoft_Windows_NT_6.2.9200.0 ClientSync'
family: 'Windows'
major:
minor:
patch:
patch_minor:

- user_agent_string: 'rusoto/0.36.0 rust/1.35.0 linux'
family: 'Linux'
major:
minor:
patch:
patch_minor:

- user_agent_string: 'SalesforceMobileSDK/5.3.0 android mobile/8.0.0 (SM-G955F) Salesforce1/15.2 Native uid_c4589f605fad8c7e ftr_ Cordova/6.2.3'
family: 'Android'
major: '8'
Expand Down
55 changes: 55 additions & 0 deletions tests/test_ua.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7664,6 +7664,61 @@ test_cases:
minor: '8'
patch: '1'

- user_agent_string: 'akka-http/10.0.10'
family: 'akka-http'
major: '10'
minor: '0'
patch: '10'

- user_agent_string: 'Python/3.6 aiohttp/3.5.4'
family: 'Python/3.6 aiohttp'
major: '3'
minor: '5'
patch: '4'

- user_agent_string: 'unirest-java/1.3.11'
family: 'unirest-java'
major: '1'
minor: '3'
patch: '11'

- user_agent_string: 'axios/0.18.0'
family: 'axios'
major: '0'
minor: '18'
patch: '0'

- user_agent_string: 'got/9.6.0 (https://github.com/sindresorhus/got)'
family: 'got'
major: '9'
minor: '6'
patch: '0'

- user_agent_string: 'S3Gof3r'
family: 'S3Gof3r'
major:
minor:
patch:

- user_agent_string: 'rusoto/0.36.0 rust/1.35.0 linux'
family: 'rusoto'
major: '0'
minor: '36'
patch: '0'

- user_agent_string: 'ibm-cos-sdk-java/2.3.0 Linux/4.9.0-8-amd64 Java_HotSpot(TM)_64-Bit_Server_VM/9.0.4+11/9.0.4'
family: 'ibm-cos-sdk-java'
major: '2'
minor: '3'
patch: '0'

- user_agent_string: 'aws-sdk-dotnet-45/3.3.11.0 aws-sdk-dotnet-core/3.3.17.10 .NET_Runtime/4.0 .NET_Framework/4.0 OS/Microsoft_Windows_NT_6.2.9200.0 ClientSync'
family: 'aws-sdk-dotnet-45'
major: '3'
minor: '3'
patch: '11'
patch_minor: '0'

- user_agent_string: 'Boto/2.48.0 Python/2.7.14 Linux/4.2.0-41-generic'
family: 'Boto'
major: '2'
Expand Down

0 comments on commit 7d5f5d9

Please sign in to comment.