KSP-CKAN · HebaruSan · Sep 15, 2024 · Sep 15, 2024
diff --git a/netkan/netkan/download_counter.py b/netkan/netkan/download_counter.py
@@ -190,11 +190,8 @@ def empty(self) -> bool:
     def full(self) -> bool:
         return len(self.ids) >= self.MODULES_PER_REQUEST
 
-    def _get_ia_ident(self, ckan: Ckan) -> str:
-        return f'{ckan.identifier}-{ckan.version.string.replace(":", "-")}'
-
     def add(self, ckan: Ckan) -> None:
-        self.ids[ckan.identifier] = self._get_ia_ident(ckan)
+        self.ids[ckan.identifier] = ckan.mirror_item()
 
     def get_result(self, counts: Optional[Dict[str, int]] = None) -> Dict[str, int]:
         if counts is None:

diff --git a/netkan/netkan/metadata.py b/netkan/netkan/metadata.py
@@ -119,6 +119,8 @@ class Ckan:
 
     EPOCH_VERSION_REGEXP = re.compile('^[0-9]+:')
 
+    BUCKET_EXCLUDE_PATTERN = re.compile(r'^[^a-zA-Z0-9]+|[^a-zA-Z0-9._-]')
+
     REDISTRIBUTABLE_LICENSES = {
         "public-domain",
         "Apache", "Apache-1.0", "Apache-2.0",
@@ -443,6 +445,19 @@ def mirror_download(self, with_epoch: bool = True) -> Optional[str]:
             return f'https://archive.org/download/{self.identifier}-{self._format_version(with_epoch)}/{filename}'
         return None
 
+    def mirror_item(self, with_epoch: bool = True) -> str:
+        return self._ia_bucket_sanitize(
+            f'{self.identifier}-{self._format_version(with_epoch)}')
+
+    # InternetArchive says:
+    # Bucket names should be valid archive identifiers;
+    # try someting matching this regular expression:
+    # ^[a-zA-Z0-9][a-zA-Z0-9_.-]{4,100}$
+    # (We enforce everything except the minimum of 4 characters)
+    @classmethod
+    def _ia_bucket_sanitize(cls, s: str) -> str:
+        return cls.BUCKET_EXCLUDE_PATTERN.sub('', s)[:100]
+
     def _format_version(self, with_epoch: bool) -> Optional[str]:
         if self.version:
             if with_epoch:

diff --git a/netkan/netkan/mirrorer.py b/netkan/netkan/mirrorer.py
@@ -28,8 +28,6 @@ class CkanMirror(Ckan):
     DESCRIPTION_TEMPLATE = Template(
         legacy_read_text('netkan', 'mirror_description_template.jinja2'))
 
-    BUCKET_EXCLUDE_PATTERN = re.compile(r'^[^a-zA-Z0-9]+|[^a-zA-Z0-9._-]')
-
     LICENSE_URLS = {
         "Apache"            : 'http://www.apache.org/licenses/LICENSE-1.0',
         "Apache-1.0"        : 'http://www.apache.org/licenses/LICENSE-1.0',
@@ -133,26 +131,13 @@ def license_urls(self) -> List[str]:
         return [self.LICENSE_URLS[lic]
                 for lic in self.licenses() if lic in self.LICENSE_URLS]
 
-    def mirror_item(self, with_epoch: bool = True) -> str:
-        return self._ia_bucket_sanitize(
-            f'{self.identifier}-{self._format_version(with_epoch)}')
-
     def mirror_source_filename(self, with_epoch: bool = True) -> str:
         return self._ia_bucket_sanitize(
             f'{self.identifier}-{self._format_version(with_epoch)}.source.zip')
 
     def mirror_title(self, with_epoch: bool = True) -> str:
         return f'{self.name} - {self._format_version(with_epoch)}'
 
-    # InternetArchive says:
-    # Bucket names should be valid archive identifiers;
-    # try someting matching this regular expression:
-    # ^[a-zA-Z0-9][a-zA-Z0-9_.-]{4,100}$
-    # (We enforce everything except the minimum of 4 characters)
-    @classmethod
-    def _ia_bucket_sanitize(cls, s: str) -> str:
-        return cls.BUCKET_EXCLUDE_PATTERN.sub('', s)[:100]
-
     @property
     def item_metadata(self) -> Dict[str, Any]:
         lic_urls = self.license_urls()