From a516573a1481bb6ad9f07cab2e9e71d131f60ea3 Mon Sep 17 00:00:00 2001 From: Andrew Kuklewicz Date: Wed, 11 Oct 2023 22:30:27 -0400 Subject: [PATCH] Handle dash for xff, and region id starting the path --- etc/cdn-log-shipper/log-shipper.yml | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/etc/cdn-log-shipper/log-shipper.yml b/etc/cdn-log-shipper/log-shipper.yml index a38ae64e6..3b84f31c2 100644 --- a/etc/cdn-log-shipper/log-shipper.yml +++ b/etc/cdn-log-shipper/log-shipper.yml @@ -92,6 +92,7 @@ Resources: const crypto = require('crypto'); const IPV4_MASK = /\.[0-9]{1,3}$/; + const maskIp = (ip, field) => { if (ip.match(IPV4_MASK)) { return ip.replace(IPV4_MASK, '.0'); @@ -119,6 +120,16 @@ Resources: }); }; + const findIp = (xff, ip) => { + if (xff === '-') { + return ip; + } else if (xff) { + return xff.split(',').map(s => s.trim()).filter(s => s)[0]; + } else { + return ip; + } + }; + const PODCAST_IDS = process.env.PODCAST_IDS.split(',').map(s => s.trim()).filter(s => s); const IGNORE_PATHS = ['/', '/favicon.ico', '/robots.txt']; @@ -146,6 +157,12 @@ Resources: // podcast id and episode guid (only works for dovetail3-cdn requests) const datas = mappedRows.filter(data => { const parts = data['cs-uri-stem'].split('/').filter(s => s); + + // if the path starts with a region like usw2, shift that off + if (parts[0] && parts[0].match(/^[a-z][a-z0-9\-]+$/)) { + parts.shift(); + } + if (parts.length === 4) { data['prx-podcast-id'] = parts[0]; data['prx-episode-guid'] = parts[1]; @@ -163,8 +180,7 @@ Resources: // calculate listener_ids datas.forEach(data => { // use leftmost XFF or IP - const xffParts = (data['x-forwarded-for'] || '').split(',').map(s => s.trim()).filter(s => s); - const leftMostIp = xffParts[0] || data['c-ip']; + const leftMostIp = findIp(data['x-forwarded-for'], data['c-ip']); // truncate ipv6 but not ipv4 const truncatedIp = leftMostIp.includes(':') ? maskIp(leftMostIp, 'listener-id') : leftMostIp;