diff --git a/requirements.txt b/requirements.txt index b7cb3df0..5daaddfa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,8 @@ lxml virtualenv pyyaml lastversion -fosslight_util>=1.4.29 +fosslight_util>=1.4.39 PyGithub requirements-parser -defusedxml \ No newline at end of file +defusedxml +packageurl-python \ No newline at end of file diff --git a/src/fosslight_dependency/_package_manager.py b/src/fosslight_dependency/_package_manager.py index 121170e4..d9f7e283 100644 --- a/src/fosslight_dependency/_package_manager.py +++ b/src/fosslight_dependency/_package_manager.py @@ -13,6 +13,7 @@ import shutil import fosslight_util.constant as constant import fosslight_dependency.constant as const +from packageurl.contrib import url2purl try: from github import Github @@ -48,6 +49,7 @@ def __init__(self, package_manager_name, dn_url, input_dir, output_dir): self.manifest_file_name = [] self.relation_tree = {} self.package_name = '' + self.purl_dict = {} self.platform = platform.system() self.license_scanner_bin = check_license_scanner(self.platform) @@ -188,6 +190,38 @@ def parse_dependency_tree(self, f_name): logger.warning(f'Fail to parse gradle dependency tree:{e}') +def get_url_to_purl(url, pkg_manager, oss_name='', oss_version=''): + purl_prefix = f'pkg:{pkg_manager}' + purl = str(url2purl.get_purl(url)) + if not re.match(purl_prefix, purl): + match = re.match(constant.PKG_PATTERN.get(pkg_manager, 'not_support'), url) + try: + if match and (match != ''): + if pkg_manager == 'maven': + purl = f'{purl_prefix}/{match.group(1)}/{match.group(2)}@{match.group(3)}' + elif pkg_manager == 'pub': + purl = f'{purl_prefix}/{match.group(1)}@{match.group(2)}' + elif pkg_manager == 'cocoapods': + match = re.match(r'([^\/]+)\/?([^\/]*)', oss_name) # ex, GoogleUtilities/NSData+zlib + purl = f'{purl_prefix}/{match.group(1)}@{oss_version}' + if match.group(2): + purl = f'{purl}#{match.group(2)}' + elif pkg_manager == 'go': + purl = f'{purl_prefix}lang/{match.group(1)}@{match.group(2)}' + else: + if pkg_manager == 'swift': + if oss_version: + purl = f'{purl_prefix}/{oss_name}@{oss_version}' + else: + purl = f'{purl_prefix}/{oss_name}' + elif pkg_manager == 'carthage': + if oss_version: + purl = f'{purl}@{oss_version}' + except Exception: + logger.debug('Fail to get purl. So use the link purl({purl}).') + return purl + + def version_refine(oss_version): version_cmp = oss_version.upper() diff --git a/src/fosslight_dependency/package_manager/Android.py b/src/fosslight_dependency/package_manager/Android.py index d4647436..31cf6677 100644 --- a/src/fosslight_dependency/package_manager/Android.py +++ b/src/fosslight_dependency/package_manager/Android.py @@ -7,7 +7,7 @@ import logging import fosslight_util.constant as constant import fosslight_dependency.constant as const -from fosslight_dependency._package_manager import PackageManager +from fosslight_dependency._package_manager import PackageManager, get_url_to_purl logger = logging.getLogger(constant.LOGGER_NAME) @@ -49,6 +49,8 @@ def parse_oss_information(self, f_name): idx, manifest_file, oss_name, oss_version, license_name, dn_loc, homepage = split_str else: continue + purl = get_url_to_purl(dn_loc, 'maven') + self.purl_dict[f'{oss_name}({oss_version})'] = purl comment_list = [] deps_list = [] @@ -69,6 +71,7 @@ def parse_oss_information(self, f_name): comment = ','.join(comment_list) deps = ','.join(deps_list) - sheet_list.append([manifest_file, oss_name, oss_version, license_name, dn_loc, homepage, '', '', comment, deps]) + sheet_list.append([purl, oss_name, oss_version, license_name, dn_loc, homepage, + '', '', comment, deps]) return sheet_list diff --git a/src/fosslight_dependency/package_manager/Carthage.py b/src/fosslight_dependency/package_manager/Carthage.py index c0eefa45..e97b33b1 100644 --- a/src/fosslight_dependency/package_manager/Carthage.py +++ b/src/fosslight_dependency/package_manager/Carthage.py @@ -9,9 +9,8 @@ import fosslight_util.constant as constant import fosslight_dependency.constant as const from fosslight_dependency._package_manager import PackageManager -from fosslight_dependency._package_manager import connect_github -from fosslight_dependency._package_manager import get_github_license -from fosslight_dependency._package_manager import check_and_run_license_scanner +from fosslight_dependency._package_manager import connect_github, get_github_license, check_and_run_license_scanner +from fosslight_dependency._package_manager import get_url_to_purl logger = logging.getLogger(constant.LOGGER_NAME) @@ -60,9 +59,10 @@ def parse_oss_information(self, f_name): else: homepage = oss_path dn_loc = homepage - oss_version = re_result[0][2] + purl = get_url_to_purl(homepage, self.package_manager_name, oss_origin_name, oss_version) + license_name = '' find_license = False if oss_origin_name in checkout_dir_list: @@ -96,8 +96,8 @@ def parse_oss_information(self, f_name): else: comment = 'transitive' - sheet_list.append([const.SUPPORT_PACKAE.get(self.package_manager_name), - oss_name, oss_version, license_name, dn_loc, homepage, '', '', comment, '']) + sheet_list.append([purl, oss_name, oss_version, license_name, dn_loc, homepage, + '', '', comment, '']) except Exception as e: logger.warning(f"Failed to parse oss information: {e}") diff --git a/src/fosslight_dependency/package_manager/Cocoapods.py b/src/fosslight_dependency/package_manager/Cocoapods.py index 214e5831..56d3af37 100644 --- a/src/fosslight_dependency/package_manager/Cocoapods.py +++ b/src/fosslight_dependency/package_manager/Cocoapods.py @@ -10,7 +10,7 @@ import re import fosslight_util.constant as constant import fosslight_dependency.constant as const -from fosslight_dependency._package_manager import PackageManager +from fosslight_dependency._package_manager import PackageManager, get_url_to_purl logger = logging.getLogger(constant.LOGGER_NAME) @@ -128,6 +128,8 @@ def parse_oss_information(self, f_name): spec_file_path = os.path.join(file_path_without_version, pod_oss_version, file_path[-1]) oss_name, oss_version, license_name, dn_loc, homepage = self.get_oss_in_podspec(spec_file_path) + purl = get_url_to_purl(homepage, self.package_manager_name, pod_oss_name_origin, oss_version) + self.purl_dict[f'{pod_oss_name_origin}({oss_version})'] = purl if pod_oss_name in external_source_list: homepage = dn_loc if oss_name == '': @@ -135,8 +137,8 @@ def parse_oss_information(self, f_name): if pod_oss_version != oss_version: logger.warning(f'{pod_oss_name_origin} has different version({pod_oss_version})\ with spec version({oss_version})') - sheet_list.append([const.SUPPORT_PACKAE.get(self.package_manager_name), oss_name_report, - pod_oss_version, license_name, dn_loc, homepage, '', '', comment, deps]) + sheet_list.append([purl, oss_name_report, pod_oss_version, license_name, dn_loc, homepage, + '', '', comment, deps]) except Exception as e: logger.warning(f"Fail to get {pod_oss_name_origin}:{e}") diff --git a/src/fosslight_dependency/package_manager/Go.py b/src/fosslight_dependency/package_manager/Go.py index 256eaa4b..0990dbfd 100644 --- a/src/fosslight_dependency/package_manager/Go.py +++ b/src/fosslight_dependency/package_manager/Go.py @@ -12,7 +12,7 @@ import re import fosslight_util.constant as constant import fosslight_dependency.constant as const -from fosslight_dependency._package_manager import PackageManager +from fosslight_dependency._package_manager import PackageManager, get_url_to_purl logger = logging.getLogger(constant.LOGGER_NAME) @@ -107,6 +107,8 @@ def parse_oss_information(self, f_name): homepage_set = [] homepage = self.dn_url + package_path + purl = get_url_to_purl(f"{homepage}@{oss_version}", self.package_manager_name) + self.purl_dict[f'{package_path}({oss_version})'] = purl if oss_origin_version: tmp_homepage = f"{homepage}@{oss_origin_version}" @@ -148,7 +150,7 @@ def parse_oss_information(self, f_name): comment = ','.join(comment_list) deps = ','.join(deps_list) - sheet_list.append([const.SUPPORT_PACKAE.get(self.package_manager_name), - oss_name, oss_version, license_name, dn_loc, homepage, '', '', comment, deps]) + sheet_list.append([purl, oss_name, oss_version, license_name, dn_loc, homepage, + '', '', comment, deps]) return sheet_list diff --git a/src/fosslight_dependency/package_manager/Gradle.py b/src/fosslight_dependency/package_manager/Gradle.py index 615a279f..f7e14b1e 100644 --- a/src/fosslight_dependency/package_manager/Gradle.py +++ b/src/fosslight_dependency/package_manager/Gradle.py @@ -9,7 +9,7 @@ import fosslight_util.constant as constant import fosslight_dependency.constant as const from fosslight_dependency._package_manager import PackageManager -from fosslight_dependency._package_manager import version_refine +from fosslight_dependency._package_manager import version_refine, get_url_to_purl logger = logging.getLogger(constant.LOGGER_NAME) @@ -59,6 +59,7 @@ def parse_oss_information(self, f_name): oss_version = version_refine(oss_ini_version) license_names = [] + purl = '' try: for licenses in d['licenses']: if licenses['name'] != '': @@ -73,6 +74,8 @@ def parse_oss_information(self, f_name): else: dn_loc = f"{self.dn_url}{group_id}/{artifact_id}/{oss_ini_version}" homepage = f"{self.dn_url}{group_id}/{artifact_id}" + purl = get_url_to_purl(dn_loc, 'maven') + self.purl_dict[f'{oss_name}({oss_ini_version})'] = purl comment_list = [] deps_list = [] @@ -90,8 +93,8 @@ def parse_oss_information(self, f_name): comment = ','.join(comment_list) deps = ','.join(deps_list) - sheet_list.append([const.SUPPORT_PACKAE.get(self.package_manager_name), - oss_name, oss_version, license_name, dn_loc, homepage, '', '', comment, deps]) + sheet_list.append([purl, oss_name, oss_version, license_name, dn_loc, homepage, + '', '', comment, deps]) return sheet_list diff --git a/src/fosslight_dependency/package_manager/Helm.py b/src/fosslight_dependency/package_manager/Helm.py index c5127e4a..35c22cb8 100644 --- a/src/fosslight_dependency/package_manager/Helm.py +++ b/src/fosslight_dependency/package_manager/Helm.py @@ -10,7 +10,7 @@ import shutil import fosslight_util.constant as constant import fosslight_dependency.constant as const -from fosslight_dependency._package_manager import PackageManager +from fosslight_dependency._package_manager import PackageManager, get_url_to_purl from fosslight_util.download import extract_compressed_dir logger = logging.getLogger(constant.LOGGER_NAME) @@ -73,6 +73,7 @@ def parse_oss_information(self, f_name): for dep in dep_item_list: try: f_path = os.path.join(self.tmp_charts_dir, dep, f_name) + purl = '' with open(f_path, 'r', encoding='utf8') as yaml_fp: yaml_f = yaml.safe_load(yaml_fp) oss_name = f'{self.package_manager_name}:{yaml_f["name"]}' @@ -85,6 +86,8 @@ def parse_oss_information(self, f_name): if yaml_f.get('sources', '') != '': dn_loc = yaml_f.get('sources', '')[0] + purl = get_url_to_purl(dn_loc if dn_loc else homepage, self.package_manager_name) + license_name = '' if yaml_f.get('annotations', '') != '': license_name = yaml_f['annotations'].get('licenses', '') @@ -96,7 +99,7 @@ def parse_oss_information(self, f_name): logging.warning(f"Fail to parse chart info {dep}: {e}") continue - sheet_list.append([const.SUPPORT_PACKAE.get(self.package_manager_name), - oss_name, oss_version, license_name, dn_loc, homepage, '', '', comment, '']) + sheet_list.append([purl, oss_name, oss_version, license_name, dn_loc, homepage, + '', '', comment, '']) return sheet_list diff --git a/src/fosslight_dependency/package_manager/Maven.py b/src/fosslight_dependency/package_manager/Maven.py index 4ab389d3..d9686aaa 100644 --- a/src/fosslight_dependency/package_manager/Maven.py +++ b/src/fosslight_dependency/package_manager/Maven.py @@ -13,7 +13,7 @@ import fosslight_util.constant as constant import fosslight_dependency.constant as const from fosslight_dependency._package_manager import PackageManager -from fosslight_dependency._package_manager import version_refine +from fosslight_dependency._package_manager import version_refine, get_url_to_purl logger = logging.getLogger(constant.LOGGER_NAME) @@ -225,6 +225,8 @@ def parse_oss_information(self, f_name): oss_name = f"{groupid}:{artifactid}" dn_loc = f"{self.dn_url}{groupid}/{artifactid}/{version}" homepage = f"{self.dn_url}{groupid}/{artifactid}" + purl = get_url_to_purl(dn_loc, self.package_manager_name) + self.purl_dict[f'{oss_name}({oss_version})'] = purl licenses = d.find("licenses") if len(licenses): @@ -253,7 +255,7 @@ def parse_oss_information(self, f_name): comment = ','.join(comment_list) deps = ','.join(deps_list) - sheet_list.append([const.SUPPORT_PACKAE.get(self.package_manager_name), - oss_name, oss_version, license_name, dn_loc, homepage, '', '', comment, deps]) + sheet_list.append([purl, oss_name, oss_version, license_name, dn_loc, homepage, + '', '', comment, deps]) return sheet_list diff --git a/src/fosslight_dependency/package_manager/Npm.py b/src/fosslight_dependency/package_manager/Npm.py index 230c4324..96e4bc1e 100644 --- a/src/fosslight_dependency/package_manager/Npm.py +++ b/src/fosslight_dependency/package_manager/Npm.py @@ -11,7 +11,7 @@ import re import fosslight_util.constant as constant import fosslight_dependency.constant as const -from fosslight_dependency._package_manager import PackageManager +from fosslight_dependency._package_manager import PackageManager, get_url_to_purl logger = logging.getLogger(constant.LOGGER_NAME) node_modules = 'node_modules' @@ -176,6 +176,8 @@ def parse_oss_information(self, f_name): homepage = self.dn_url + oss_init_name dn_loc = f"{self.dn_url}{oss_init_name}/v/{oss_version}" + purl = get_url_to_purl(dn_loc, self.package_manager_name) + self.purl_dict[f'{oss_init_name}({oss_version})'] = purl if d[_repository]: dn_loc = d[_repository] elif private_pkg: @@ -205,13 +207,13 @@ def parse_oss_information(self, f_name): deps = ','.join(deps_list) if multi_flag: comment = f'{comment}, {license_comment}' - sheet_list.append([const.SUPPORT_PACKAE.get(self.package_manager_name), - oss_name, oss_version, multi_license, dn_loc, homepage, '', '', comment, deps]) + sheet_list.append([purl, oss_name, oss_version, multi_license, dn_loc, homepage, + '', '', comment, deps]) else: license_name = license_name.replace(",", "") license_name = check_unknown_license(license_name, manifest_file_path) - sheet_list.append([const.SUPPORT_PACKAE.get(self.package_manager_name), - oss_name, oss_version, license_name, dn_loc, homepage, '', '', comment, deps]) + sheet_list.append([purl, oss_name, oss_version, license_name, dn_loc, homepage, + '', '', comment, deps]) return sheet_list diff --git a/src/fosslight_dependency/package_manager/Nuget.py b/src/fosslight_dependency/package_manager/Nuget.py index 2f4482d2..654ace31 100644 --- a/src/fosslight_dependency/package_manager/Nuget.py +++ b/src/fosslight_dependency/package_manager/Nuget.py @@ -12,7 +12,7 @@ import fosslight_util.constant as constant import fosslight_dependency.constant as const from fosslight_dependency._package_manager import PackageManager -from fosslight_dependency._package_manager import check_and_run_license_scanner +from fosslight_dependency._package_manager import check_and_run_license_scanner, get_url_to_purl logger = logging.getLogger(constant.LOGGER_NAME) @@ -95,8 +95,11 @@ def parse_oss_information(self, f_name): else: if dn_loc.endswith('.git'): dn_loc = dn_loc[:-4] + purl = get_url_to_purl(f'{homepage}/{oss_version}', self.package_manager_name) else: comment_list.append('Fail to response for nuget api') + purl = f'pkg:nuget/{oss_origin_name}@{oss_version}' + self.purl_dict[f'{oss_origin_name}({oss_version})'] = purl deps_list = [] if self.direct_dep and self.packageReference: @@ -112,8 +115,7 @@ def parse_oss_information(self, f_name): comment = ','.join(comment_list) deps = ','.join(deps_list) - sheet_list.append([','.join(self.input_package_list_file), - oss_name, oss_version, license_name, dn_loc, homepage, '', '', comment, deps]) + sheet_list.append([purl, oss_name, oss_version, license_name, dn_loc, homepage, '', '', comment, deps]) except Exception as e: logger.warning(f"Failed to parse oss information: {e}") diff --git a/src/fosslight_dependency/package_manager/Pub.py b/src/fosslight_dependency/package_manager/Pub.py index 8f6a9a10..5c462b4b 100644 --- a/src/fosslight_dependency/package_manager/Pub.py +++ b/src/fosslight_dependency/package_manager/Pub.py @@ -13,7 +13,7 @@ import fosslight_util.constant as constant import fosslight_dependency.constant as const from fosslight_dependency._package_manager import PackageManager -from fosslight_dependency._package_manager import check_and_run_license_scanner +from fosslight_dependency._package_manager import check_and_run_license_scanner, get_url_to_purl logger = logging.getLogger(constant.LOGGER_NAME) @@ -124,6 +124,8 @@ def parse_oss_information(self, f_name): if homepage is None: homepage = '' dn_loc = f"{self.dn_url}{oss_origin_name}/versions/{oss_version}" + purl = get_url_to_purl(dn_loc, self.package_manager_name) + self.purl_dict[f'{oss_origin_name}({oss_version})'] = purl license_txt = json_data['license'] tmp_license_txt = open(tmp_license_txt_file_name, 'w', encoding='utf-8') @@ -158,8 +160,8 @@ def parse_oss_information(self, f_name): deps_list.extend(rel_items) comment = ','.join(comment_list) deps = ','.join(deps_list) - sheet_list.append([const.SUPPORT_PACKAE.get(self.package_manager_name), - oss_name, oss_version, license_name, dn_loc, homepage, '', '', comment, deps]) + sheet_list.append([purl, oss_name, oss_version, license_name, dn_loc, homepage, + '', '', comment, deps]) except Exception as e: logger.error(f"Fail to parse pub oss information: {e}") diff --git a/src/fosslight_dependency/package_manager/Pypi.py b/src/fosslight_dependency/package_manager/Pypi.py index 36a9c0ee..81648052 100644 --- a/src/fosslight_dependency/package_manager/Pypi.py +++ b/src/fosslight_dependency/package_manager/Pypi.py @@ -13,7 +13,7 @@ import fosslight_util.constant as constant import fosslight_dependency.constant as const from fosslight_dependency._package_manager import PackageManager -from fosslight_dependency._package_manager import check_and_run_license_scanner +from fosslight_dependency._package_manager import check_and_run_license_scanner, get_url_to_purl logger = logging.getLogger(constant.LOGGER_NAME) @@ -282,7 +282,8 @@ def parse_oss_information(self, f_name): homepage = check_UNKNOWN(d['URL']) oss_version = d['Version'] dn_loc = f"{self.dn_url}{oss_init_name}/{oss_version}" - + purl = get_url_to_purl(dn_loc, self.package_manager_name) + self.purl_dict[f'{oss_init_name}({oss_version})'] = purl if license_name is not None: license_name = license_name.replace(';', ',') else: @@ -308,8 +309,7 @@ def parse_oss_information(self, f_name): deps_list.extend(rel_items) comment = ','.join(comment_list) deps = ','.join(deps_list) - sheet_list.append([', '.join(self.manifest_file_name), - oss_name, oss_version, + sheet_list.append([purl, oss_name, oss_version, license_name, dn_loc, homepage, '', '', comment, deps]) except Exception as ex: diff --git a/src/fosslight_dependency/package_manager/Swift.py b/src/fosslight_dependency/package_manager/Swift.py index 7389520a..7198a3ac 100644 --- a/src/fosslight_dependency/package_manager/Swift.py +++ b/src/fosslight_dependency/package_manager/Swift.py @@ -10,8 +10,8 @@ import fosslight_util.constant as constant import fosslight_dependency.constant as const from fosslight_dependency._package_manager import PackageManager -from fosslight_dependency._package_manager import connect_github -from fosslight_dependency._package_manager import get_github_license +from fosslight_dependency._package_manager import connect_github, get_github_license +from fosslight_dependency._package_manager import get_url_to_purl logger = logging.getLogger(constant.LOGGER_NAME) @@ -134,6 +134,8 @@ def parse_oss_information(self, f_name): license_name = '' github_repo = "/".join(homepage.split('/')[-2:]) + purl = get_url_to_purl(dn_loc, self.package_manager_name, github_repo, oss_version) + self.purl_dict[f'{oss_origin_name}({oss_version})'] = purl license_name = get_github_license(g, github_repo, self.platform, self.license_scanner_bin) comment_list = [] @@ -150,7 +152,7 @@ def parse_oss_information(self, f_name): deps_list.extend(rel_items) comment = ','.join(comment_list) deps = ','.join(deps_list) - sheet_list.append([const.SUPPORT_PACKAE.get(self.package_manager_name), - oss_name, oss_version, license_name, dn_loc, homepage, '', '', comment, deps]) + sheet_list.append([purl, oss_name, oss_version, license_name, dn_loc, homepage, + '', '', comment, deps]) return sheet_list diff --git a/src/fosslight_dependency/run_dependency_scanner.py b/src/fosslight_dependency/run_dependency_scanner.py index da77d115..515a08d6 100755 --- a/src/fosslight_dependency/run_dependency_scanner.py +++ b/src/fosslight_dependency/run_dependency_scanner.py @@ -26,7 +26,7 @@ logger = logging.getLogger(constant.LOGGER_NAME) warnings.filterwarnings("ignore", category=FutureWarning) _sheet_name = "DEP_FL_Dependency" -EXTENDED_HEADER = {_sheet_name: ['ID', 'Source Name or Path', 'OSS Name', +EXTENDED_HEADER = {_sheet_name: ['ID', 'Purl', 'OSS Name', 'OSS Version', 'License', 'Download Location', 'Homepage', 'Copyright Text', 'Exclude', 'Comment', 'Dependencies']}