From 9603cdd8b26325dd0a0915220d5f3fbef6a49ee5 Mon Sep 17 00:00:00 2001 From: frypam Date: Fri, 30 Aug 2024 10:06:46 +0900 Subject: [PATCH 01/16] Setting.json with source_scanner selection (#109) Signed-off-by: soonhong99 --- src/fosslight_scanner/_parse_setting.py | 31 ++++-- src/fosslight_scanner/cli.py | 120 ++++++++++++++------- src/fosslight_scanner/common.py | 48 ++++----- src/fosslight_scanner/fosslight_scanner.py | 96 +++++++++++------ tests/setting.json | 11 +- 5 files changed, 202 insertions(+), 104 deletions(-) diff --git a/src/fosslight_scanner/_parse_setting.py b/src/fosslight_scanner/_parse_setting.py index 83c73ad..a5d1f37 100644 --- a/src/fosslight_scanner/_parse_setting.py +++ b/src/fosslight_scanner/_parse_setting.py @@ -11,7 +11,7 @@ def parse_setting_json(data): dep_argument = data.get('dep_argument', '') output = data.get('output', '') format = data.get('format', '') - link = data.get('link', "") + link = data.get('link', '') db_url = data.get('db_url', '') timer = data.get('timer', False) raw = data.get('raw', False) @@ -20,34 +20,49 @@ def parse_setting_json(data): correct_fpath = data.get('correct_fpath', '') ui = data.get('ui', False) exclude_path = data.get('exclude', []) - + selected_source_scanner = data.get('selected_source_scanner', '') + source_write_json_file = data.get('source_write_json_file', False) + source_print_matched_text = data.get('source_print_matched_text', False) + source_time_out = data.get('source_time_out', 120) + binary_simple = data.get('binary_simple', False) str_lists = [mode, path, exclude_path] - strings = [dep_argument, output, format, db_url, correct_fpath, link] - booleans = [timer, raw, no_correction, ui] + strings = [ + dep_argument, output, format, db_url, + correct_fpath, link, selected_source_scanner + ] + booleans = [timer, raw, no_correction, ui, source_write_json_file, source_print_matched_text, binary_simple] + is_incorrect = False # check if json file is incorrect format for i, target in enumerate(str_lists): - if not (isinstance(target, list) and all(isinstance(item, str) for item in target)): + if not (isinstance(target, list) and + all(isinstance(item, str) for item in target)): is_incorrect = True str_lists[i] = [] for i, target in enumerate(strings): if not isinstance(target, str): is_incorrect = True - str_lists[i] = '' + strings[i] = '' for i, target in enumerate(booleans): if not isinstance(target, bool): is_incorrect = True - str_lists[i] = False + booleans[i] = False if not isinstance(core, int): is_incorrect = True core = -1 + if not isinstance(source_time_out, int): + is_incorrect = True + source_time_out = 120 + if is_incorrect: print('Ignoring some values with incorrect format in the setting file.') return mode, path, dep_argument, output, format, link, db_url, timer, \ - raw, core, no_correction, correct_fpath, ui, exclude_path + raw, core, no_correction, correct_fpath, ui, exclude_path, \ + selected_source_scanner, source_write_json_file, source_print_matched_text, source_time_out, \ + binary_simple diff --git a/src/fosslight_scanner/cli.py b/src/fosslight_scanner/cli.py index 2a61c65..5b0f691 100644 --- a/src/fosslight_scanner/cli.py +++ b/src/fosslight_scanner/cli.py @@ -4,68 +4,107 @@ # SPDX-License-Identifier: Apache-2.0 import sys import json +import os +import os.path from argparse import ArgumentParser + from ._help import print_help_msg from .fosslight_scanner import run_main, PKG_NAME from ._parse_setting import parse_setting_json from fosslight_util.help import print_package_version -import os.path def set_args(mode, path, dep_argument, output, format, link, db_url, timer, raw, core, no_correction, correct_fpath, ui, setting, exclude_path): + + selected_source_scanner = "all" + source_write_json_file = False + source_print_matched_text = False + source_time_out = 120 + binary_simple = False + if setting and os.path.isfile(setting): try: with open(setting, 'r', encoding='utf-8') as file: data = json.load(file) s_mode, s_path, s_dep_argument, s_output, s_format, s_link, s_db_url, s_timer, s_raw, s_core, \ - s_no_correction, s_correct_fpath, s_ui, s_exclude_path = parse_setting_json(data) + s_no_correction, s_correct_fpath, s_ui, s_exclude_path, \ + s_selected_source_scanner, s_source_write_json_file, s_source_print_matched_text, \ + s_source_time_out, s_binary_simple = parse_setting_json(data) # direct cli arguments have higher priority than setting file - mode = mode if mode else s_mode - path = path if path else s_path - dep_argument = dep_argument if dep_argument else s_dep_argument - output = output if output else s_output - format = format if format else s_format - link = link if link else s_link - db_url = db_url if db_url else s_db_url - timer = timer if timer else s_timer - raw = raw if raw else s_raw - core = core if core else s_core - no_correction = no_correction if no_correction else s_no_correction - correct_fpath = correct_fpath if correct_fpath else s_correct_fpath - ui = ui if ui else s_ui - exclude_path = exclude_path if exclude_path else s_exclude_path + mode = mode or s_mode + path = path or s_path + dep_argument = dep_argument or s_dep_argument + output = output or s_output + format = format or s_format + link = link or s_link + db_url = db_url or s_db_url + timer = timer or s_timer + raw = raw or s_raw + core = core if core != -1 else s_core + no_correction = no_correction or s_no_correction + correct_fpath = correct_fpath or s_correct_fpath + ui = ui or s_ui + exclude_path = exclude_path or s_exclude_path + + # These options are only set from the setting file, not from CLI arguments + selected_source_scanner = s_selected_source_scanner or selected_source_scanner + source_write_json_file = s_source_write_json_file + source_print_matched_text = s_source_print_matched_text + source_time_out = s_source_time_out if s_source_time_out != 120 else source_time_out + binary_simple = s_binary_simple except Exception as e: print(f"Cannot open setting file: {e}") return mode, path, dep_argument, output, format, link, db_url, timer, \ - raw, core, no_correction, correct_fpath, ui, exclude_path + raw, core, no_correction, correct_fpath, ui, exclude_path, \ + selected_source_scanner, source_write_json_file, source_print_matched_text, source_time_out, \ + binary_simple def main(): - parser = ArgumentParser(description='FOSSLight Scanner', prog='fosslight_scanner', add_help=False) - parser.add_argument('mode', nargs='*', help='source| dependency| binary| all| compare', default="") - parser.add_argument('--path', '-p', help='Path to analyze (In compare mode, two FOSSLight reports', + parser = ArgumentParser(description='FOSSLight Scanner', + prog='fosslight_scanner', add_help=False) + parser.add_argument('mode', nargs='*', + help='source| dependency| binary| all| compare', + default="") + parser.add_argument('--path', '-p', + help='Path to analyze (In compare mode, two FOSSLight reports', dest='path', nargs='+', default="") - parser.add_argument('--wget', '-w', help='Link to be analyzed', type=str, dest='link', default="") - parser.add_argument('--format', '-f', help='Scanner output file format (excel,yaml), Compare mode (excel,html,yaml,json)', + parser.add_argument('--wget', '-w', help='Link to be analyzed', + type=str, dest='link', default="") + parser.add_argument('--format', '-f', + help='Scanner output file format (excel,yaml), Compare mode (excel,html,yaml,json)', type=str, dest='format', default="") - parser.add_argument('--output', '-o', help='Output directory or file', type=str, dest='output', default="") - parser.add_argument('--dependency', '-d', help='Dependency arguments', type=str, dest='dep_argument', default="") - parser.add_argument('--url', '-u', help="DB Url", type=str, dest='db_url', default="") - parser.add_argument('--core', '-c', help='Number of processes to analyze source', type=int, dest='core', default=-1) - parser.add_argument('--raw', '-r', help='Keep raw data', action='store_true', dest='raw', default=False) - parser.add_argument('--timer', '-t', help='Hide the progress bar', action='store_true', dest='timer', default=False) - parser.add_argument('--version', '-v', help='Print version', action='store_true', dest='version', default=False) - parser.add_argument('--help', '-h', help='Print help message', action='store_true', dest='help') - parser.add_argument('--exclude', '-e', help='Path to exclude from analysis', dest='exclude_path', nargs='*', default=[]) - parser.add_argument('--setting', '-s', help='Scanner json setting file', type=str, dest='setting', default="") - parser.add_argument('--no_correction', help='No correction with sbom-info.yaml', + parser.add_argument('--output', '-o', help='Output directory or file', + type=str, dest='output', default="") + parser.add_argument('--dependency', '-d', help='Dependency arguments', + type=str, dest='dep_argument', default="") + parser.add_argument('--url', '-u', help="DB Url", + type=str, dest='db_url', default="") + parser.add_argument('--core', '-c', + help='Number of processes to analyze source', + type=int, dest='core', default=-1) + parser.add_argument('--raw', '-r', help='Keep raw data', + action='store_true', dest='raw', default=False) + parser.add_argument('--timer', '-t', help='Hide the progress bar', + action='store_true', dest='timer', default=False) + parser.add_argument('--version', '-v', help='Print version', + action='store_true', dest='version', default=False) + parser.add_argument('--help', '-h', help='Print help message', + action='store_true', dest='help') + parser.add_argument('--exclude', '-e', help='Path to exclude from analysis', + dest='exclude_path', nargs='*', default=[]) + parser.add_argument('--setting', '-s', help='Scanner json setting file', + type=str, dest='setting', default="") + parser.add_argument('--no_correction', + help='No correction with sbom-info.yaml', action='store_true', required=False, default=False) parser.add_argument('--correct_fpath', help='Path to the sbom-info.yaml', type=str, required=False, default='') - parser.add_argument('--ui', help='Generate UI mode result file', action='store_true', required=False, default=False) + parser.add_argument('--ui', help='Generate UI mode result file', + action='store_true', required=False, default=False) try: args = parser.parse_args() @@ -78,12 +117,17 @@ def main(): print_package_version(PKG_NAME, "FOSSLight Scanner Version:") else: mode, path, dep_argument, output, format, link, db_url, timer, raw, core, no_correction, correct_fpath, \ - ui, exclude_path = set_args(args.mode, args.path, args.dep_argument, args.output, args.format, - args.link, args.db_url, args.timer, args.raw, args.core, args.no_correction, - args.correct_fpath, args.ui, args.setting, args.exclude_path) + ui, exclude_path, selected_source_scanner, source_write_json_file, source_print_matched_text, \ + source_time_out, binary_simple, = set_args( + args.mode, args.path, args.dep_argument, args.output, + args.format, args.link, args.db_url, args.timer, args.raw, + args.core, args.no_correction, args.correct_fpath, args.ui, + args.setting, args.exclude_path) run_main(mode, path, dep_argument, output, format, link, db_url, timer, - raw, core, not no_correction, correct_fpath, ui, exclude_path) + raw, core, not no_correction, correct_fpath, ui, exclude_path, + selected_source_scanner, source_write_json_file, source_print_matched_text, + source_time_out, binary_simple) if __name__ == "__main__": diff --git a/src/fosslight_scanner/common.py b/src/fosslight_scanner/common.py index 503cba1..f8ee2eb 100755 --- a/src/fosslight_scanner/common.py +++ b/src/fosslight_scanner/common.py @@ -9,6 +9,7 @@ import shutil import pandas as pd import yaml + import fosslight_util.constant as constant from fosslight_util.parsing_yaml import parsing_yml from fosslight_util.write_yaml import create_yaml_with_ossitem @@ -20,11 +21,14 @@ logger = logging.getLogger(constant.LOGGER_NAME) SRC_SHEET = 'SRC_FL_Source' BIN_SHEET = 'BIN_FL_Binary' -BIN_EXT_HEADER = {'BIN_FL_Binary': ['ID', 'Binary Path', 'OSS Name', - 'OSS Version', 'License', 'Download Location', - 'Homepage', 'Copyright Text', 'Exclude', - 'Comment', 'Vulnerability Link', 'TLSH', 'SHA1']} -BIN_HIDDEN_HEADER = {'TLSH', "SHA1"} +BIN_EXT_HEADER = { + 'BIN_FL_Binary': [ + 'ID', 'Binary Path', 'OSS Name', 'OSS Version', 'License', + 'Download Location', 'Homepage', 'Copyright Text', 'Exclude', + 'Comment', 'Vulnerability Link', 'TLSH', 'SHA1' + ] +} +BIN_HIDDEN_HEADER = {'TLSH', 'SHA1'} def copy_file(source, destination): @@ -38,16 +42,15 @@ def copy_file(source, destination): except Exception as ex: logger.debug(f"Failed to copy {source} to {destination}: {ex}") return False, copied_file - else: - return True, copied_file + return True, copied_file def run_analysis(path_to_run, params, func, str_run_start, output, exe_path): # This function will be replaced by call_analysis_api(). - logger.info("## Start to run "+str_run_start) + logger.info("## Start to run " + str_run_start) return_value = "" try: - if path_to_run != "": + if path_to_run: logger.info(f"|--- Path to analyze : {path_to_run}") os.chdir(output) sys.argv = params @@ -68,7 +71,7 @@ def call_analysis_api(path_to_run, str_run_start, return_idx, func, *args, **kwa success = True result = [] try: - if path_to_run != "": + if path_to_run: logger.info(f"|--- Path to analyze : {path_to_run}") result = func(*args, **kwargs) else: @@ -79,36 +82,33 @@ def call_analysis_api(path_to_run, str_run_start, return_idx, func, *args, **kwa success = False logger.error(f"{str_run_start}:{ex}") try: - if success: - if result and return_idx >= 0: - if len(result) > return_idx: - result = result[return_idx] - else: - success = False + if success and result and return_idx >= 0: + if len(result) > return_idx: + result = result[return_idx] + else: + success = False except Exception as ex: logger.debug(f"Get return value:{ex}") success = False - if not result: - result = [] - return success, result + return success, result or [] def overwrite_excel(excel_file_path, oss_name, column_name='OSS Name'): - if oss_name != "": + if oss_name: try: files = os.listdir(excel_file_path) for file in files: if file.endswith(".xlsx"): - file = os.path.join(excel_file_path, file) - excel_file = pd.ExcelFile(file, engine='openpyxl') + file_path = os.path.join(excel_file_path, file) + excel_file = pd.ExcelFile(file_path, engine='openpyxl') for sheet_name in excel_file.sheet_names: try: - df = pd.read_excel(file, sheet_name=sheet_name, engine='openpyxl') + df = pd.read_excel(file_path, sheet_name=sheet_name, engine='openpyxl') if column_name in df.columns: updated = (df[column_name] == '') | (df[column_name].isnull()) df.loc[updated, column_name] = oss_name - df.to_excel(file, sheet_name=sheet_name, index=False) + df.to_excel(file_path, sheet_name=sheet_name, index=False) except Exception as ex: logger.debug(f"overwrite_sheet {sheet_name}:{ex}") except Exception as ex: diff --git a/src/fosslight_scanner/fosslight_scanner.py b/src/fosslight_scanner/fosslight_scanner.py index bdd8d58..d0a1399 100755 --- a/src/fosslight_scanner/fosslight_scanner.py +++ b/src/fosslight_scanner/fosslight_scanner.py @@ -4,16 +4,17 @@ # Copyright (c) 2020 LG Electronics Inc. # SPDX-License-Identifier: Apache-2.0 import os +import sys +import re import logging import warnings -import re import yaml -import sys import shutil import shlex import subprocess from pathlib import Path from datetime import datetime + from fosslight_binary import binary_analysis from fosslight_dependency.run_dependency_scanner import run_dependency_scanner from fosslight_util.download import cli_download_and_extract, compression_extension @@ -24,13 +25,15 @@ import fosslight_util.constant as constant from fosslight_util.output_format import check_output_format from fosslight_prechecker._precheck import run_lint as prechecker_lint -from .common import (copy_file, call_analysis_api, - overwrite_excel, - merge_yamls, correct_scanner_result, - create_scancodejson) from fosslight_util.write_excel import merge_excels, merge_cover_comment -from ._run_compare import run_compare from fosslight_util.cover import CoverItem + +from .common import ( + copy_file, call_analysis_api, overwrite_excel, + merge_yamls, correct_scanner_result, create_scancodejson +) +from ._run_compare import run_compare + fosslight_source_installed = True try: from fosslight_source.cli import run_scanners as source_analysis @@ -46,10 +49,14 @@ _start_time = "" _executed_path = "" SRC_DIR_FROM_LINK_PREFIX = "fosslight_src_dir_" -SCANNER_MODE = ["all", "compare", "reuse", "prechecker", "binary", "bin", "src", "source", "dependency", "dep"] +SCANNER_MODE = [ + "all", "compare", "reuse", "prechecker", "binary", + "bin", "src", "source", "dependency", "dep" +] -def run_dependency(path_to_analyze, output_file_with_path, params="", path_to_exclude=[]): +def run_dependency(path_to_analyze, output_file_with_path, params="", path_to_exclude=None): + path_to_exclude = [] result_list = [] package_manager = "" @@ -60,7 +67,7 @@ def run_dependency(path_to_analyze, output_file_with_path, params="", path_to_ex github_token = "" try: - if params != "": + if params: match_obj = re.findall( r'\s*(-\s*[a|d|m|c|n|t])\s*\'([^\']+)\'\s*', params) for param, value in match_obj: @@ -84,22 +91,34 @@ def run_dependency(path_to_analyze, output_file_with_path, params="", path_to_ex timer.start() try: - success, result = call_analysis_api(path_to_analyze, "Dependency Analysis", - 1, run_dependency_scanner, - package_manager, - os.path.abspath(path_to_analyze), - output_file_with_path, - pip_activate_cmd, pip_deactivate_cmd, - output_custom_dir, app_name, - github_token, path_to_exclude=path_to_exclude) + success, result = call_analysis_api( + path_to_analyze, "Dependency Analysis", + 1, run_dependency_scanner, + package_manager, + os.path.abspath(path_to_analyze), + output_file_with_path, + pip_activate_cmd, pip_deactivate_cmd, + output_custom_dir, app_name, + github_token, path_to_exclude=path_to_exclude + ) if success: result_list = result.get('SRC_FL_Dependency') except Exception as ex: logger.warning(f"Run dependency: {ex}") - if not result_list: - result_list = [] - return result_list + return result_list or [] + + +def source_analysis_wrapper(*args, **kwargs): + selected_scanner = kwargs.pop('selected_scanner', 'all') + source_write_json_file = kwargs.pop('source_write_json_file', False) + source_print_matched_text = kwargs.pop('source_print_matched_text', False) + source_time_out = kwargs.pop('source_time_out', 120) + args = list(args) + args.insert(2, source_write_json_file) + args.insert(5, source_print_matched_text) + + return source_analysis(*args, selected_scanner=selected_scanner, time_out=source_time_out, **kwargs) def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False, @@ -107,7 +126,9 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False, remove_src_data=True, result_log={}, output_file="", output_extension="", num_cores=-1, db_url="", default_oss_name="", default_oss_version="", url="", - correct_mode=True, correct_fpath="", ui_mode=False, path_to_exclude=[]): + correct_mode=True, correct_fpath="", ui_mode=False, path_to_exclude=[], + selected_source_scanner="all", source_write_json_file=False, source_print_matched_text=False, + source_time_out=120, binary_simple=False): final_excel_dir = output_path success = True temp_output_fiiles = [] @@ -146,12 +167,21 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False, try: if fosslight_source_installed: src_output = os.path.join(_output_dir, output_files["SRC"]) - success, result = call_analysis_api(src_path, "Source Analysis", - -1, source_analysis, - abs_path, - src_output, - False, num_cores, False, - path_to_exclude=path_to_exclude) + success, result = call_analysis_api( + src_path, + "Source Analysis", + -1, source_analysis_wrapper, + abs_path, + src_output, + num_cores, + False, + path_to_exclude=path_to_exclude, + selected_scanner=selected_source_scanner, + source_write_json_file=source_write_json_file, + source_print_matched_text=source_print_matched_text, + source_time_out=source_time_out + ) + else: # Run fosslight_source by using docker image src_output = os.path.join("output", output_files["SRC"]) output_rel_path = os.path.relpath(abs_path, os.getcwd()) @@ -170,7 +200,7 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False, 1, binary_analysis.find_binaries, abs_path, os.path.join(_output_dir, output_files["BIN"]), - "", db_url, False, + "", db_url, binary_simple, correct_mode, correct_fpath, path_to_exclude=path_to_exclude) @@ -309,7 +339,9 @@ def init(output_path="", make_outdir=True): def run_main(mode_list, path_arg, dep_arguments, output_file_or_dir, file_format, url_to_analyze, db_url, hide_progressbar=False, keep_raw_data=False, num_cores=-1, - correct_mode=True, correct_fpath="", ui_mode=False, path_to_exclude=[]): + correct_mode=True, correct_fpath="", ui_mode=False, path_to_exclude=[], + selected_source_scanner="all", source_write_json_file=False, source_print_matched_text=False, + source_time_out=120, binary_simple=False): global _executed_path, _start_time output_file = "" @@ -427,7 +459,9 @@ def run_main(mode_list, path_arg, dep_arguments, output_file_or_dir, file_format remove_downloaded_source, {}, output_file, output_extension, num_cores, db_url, default_oss_name, default_oss_version, url_to_analyze, - correct_mode, correct_fpath, ui_mode, path_to_exclude) + correct_mode, correct_fpath, ui_mode, path_to_exclude, + selected_source_scanner, source_write_json_file, source_print_matched_text, source_time_out, + binary_simple) if extract_folder: shutil.rmtree(extract_folder) diff --git a/tests/setting.json b/tests/setting.json index 8f0bece..3844d05 100644 --- a/tests/setting.json +++ b/tests/setting.json @@ -1,16 +1,21 @@ { "mode": ["binary", "source"], "path": ["tests"], - "link": "", "dep_argument": "", "output": "test_result_dir", - "exclude": ["test", "sample_license.txt"], "format": "excel", + "link": "", "db_url": "", "timer": false, "raw": true, "core": -1, "no_correction": false, "correct_fpath": "", - "ui": false + "ui": false, + "exclude": ["test", "sample_license.txt"], + "selected_source_scanner": "scancode", + "source_write_json_file": true, + "source_print_matched_text": true, + "source_time_out": 120, + "binary_simple": false } From 10ae4f17cedf34e9d8325c8db2367e748f04e63c Mon Sep 17 00:00:00 2001 From: Soim Date: Fri, 30 Aug 2024 10:09:19 +0900 Subject: [PATCH 02/16] Fix a bug related to path_to_exclude --- src/fosslight_scanner/fosslight_scanner.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/fosslight_scanner/fosslight_scanner.py b/src/fosslight_scanner/fosslight_scanner.py index d0a1399..73940c0 100755 --- a/src/fosslight_scanner/fosslight_scanner.py +++ b/src/fosslight_scanner/fosslight_scanner.py @@ -55,8 +55,7 @@ ] -def run_dependency(path_to_analyze, output_file_with_path, params="", path_to_exclude=None): - path_to_exclude = [] +def run_dependency(path_to_analyze, output_file_with_path, params="", path_to_exclude=[]): result_list = [] package_manager = "" From 8d163e3b301237a1e1769efd3c660040a565bee1 Mon Sep 17 00:00:00 2001 From: "jiyeong.seok" Date: Mon, 2 Sep 2024 16:54:19 +0900 Subject: [PATCH 03/16] Limit installation fosslight package Signed-off-by: jiyeong.seok --- requirements.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index e6a3080..3d9a4b5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,8 +4,8 @@ openpyxl progress pyyaml beautifulsoup4 -fosslight_util>=1.4.48 -fosslight_source>=1.7.8 -fosslight_dependency>=3.15.1 -fosslight_binary>=4.1.30 -fosslight_prechecker>=3.0.27 \ No newline at end of file +fosslight_util~=1.4.48 +fosslight_source~=1.7.8 +fosslight_dependency~=3.15.1 +fosslight_binary~=4.1.30 +fosslight_prechecker==3.0.27 \ No newline at end of file From a1a7c7ed3fb03c727e09231a9d2281527a5a0f5a Mon Sep 17 00:00:00 2001 From: "jiyeong.seok" Date: Tue, 3 Sep 2024 17:58:08 +0900 Subject: [PATCH 04/16] Refactoring oss item Signed-off-by: jiyeong.seok --- src/fosslight_scanner/_run_compare.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/fosslight_scanner/_run_compare.py b/src/fosslight_scanner/_run_compare.py index d547796..51c2978 100644 --- a/src/fosslight_scanner/_run_compare.py +++ b/src/fosslight_scanner/_run_compare.py @@ -14,7 +14,8 @@ from bs4 import BeautifulSoup import fosslight_util.constant as constant from fosslight_util.compare_yaml import compare_yaml -from fosslight_util.convert_excel_to_yaml import convert_excel_to_yaml +from fosslight_util.read_excel import read_oss_report +from fosslight_util.parsing_yaml import parsing_yml logger = logging.getLogger(constant.LOGGER_NAME) ADD = "add" @@ -255,10 +256,18 @@ def run_compare(before_f, after_f, output_path, output_file, file_ext, _start_ti result_file = get_comparison_result_filename(output_path, output_file, file_ext, _start_time) - if before_ext == XLSX_EXT: - convert_excel_to_yaml(before_f, before_yaml) - convert_excel_to_yaml(after_f, after_yaml) - compared_result = compare_yaml(before_yaml, after_yaml) + before_basepath = os.path.dirname(before_f) + after_basepath = os.path.dirname(after_f) + if XLSX_EXT == before_ext: + before_fileitems = read_oss_report(before_f, "", before_basepath) + elif YAML_EXT == before_ext: + before_fileitems, _, _ = parsing_yml(before_yaml, before_basepath) + if XLSX_EXT == after_ext: + after_fileitems = read_oss_report(after_f, after_basepath) + elif YAML_EXT == after_ext: + after_fileitems, _, _ = parsing_yml(after_yaml, after_basepath) + + compared_result = compare_yaml(before_fileitems, after_fileitems) if compared_result != '': count_compared_result(compared_result) ret, result_file = write_compared_result(result_file, compared_result, file_ext, before_yaml, after_yaml) From 4ec250ac3d8f31fdd30543baea1ba7174dc346dd Mon Sep 17 00:00:00 2001 From: "jiyeong.seok" Date: Wed, 4 Sep 2024 19:37:18 +0900 Subject: [PATCH 05/16] Update scanner with ossinfo Signed-off-by: jiyeong.seok --- src/fosslight_scanner/common.py | 165 ++++++--------------- src/fosslight_scanner/fosslight_scanner.py | 93 +++++------- 2 files changed, 83 insertions(+), 175 deletions(-) diff --git a/src/fosslight_scanner/common.py b/src/fosslight_scanner/common.py index f8ee2eb..edab9d0 100755 --- a/src/fosslight_scanner/common.py +++ b/src/fosslight_scanner/common.py @@ -9,16 +9,13 @@ import shutil import pandas as pd import yaml - -import fosslight_util.constant as constant +from fosslight_util.constant import LOGGER_NAME, FOSSLIGHT_SOURCE, FOSSLIGHT_BINARY from fosslight_util.parsing_yaml import parsing_yml -from fosslight_util.write_yaml import create_yaml_with_ossitem from fosslight_util.write_scancodejson import write_scancodejson from fosslight_util.read_excel import read_oss_report -from fosslight_util.output_format import write_output_file from fosslight_util.oss_item import OssItem -logger = logging.getLogger(constant.LOGGER_NAME) +logger = logging.getLogger(LOGGER_NAME) SRC_SHEET = 'SRC_FL_Source' BIN_SHEET = 'BIN_FL_Binary' BIN_EXT_HEADER = { @@ -93,65 +90,18 @@ def call_analysis_api(path_to_run, str_run_start, return_idx, func, *args, **kwa return success, result or [] -def overwrite_excel(excel_file_path, oss_name, column_name='OSS Name'): - if oss_name: - try: - files = os.listdir(excel_file_path) - for file in files: - if file.endswith(".xlsx"): - file_path = os.path.join(excel_file_path, file) - excel_file = pd.ExcelFile(file_path, engine='openpyxl') - - for sheet_name in excel_file.sheet_names: - try: - df = pd.read_excel(file_path, sheet_name=sheet_name, engine='openpyxl') - if column_name in df.columns: - updated = (df[column_name] == '') | (df[column_name].isnull()) - df.loc[updated, column_name] = oss_name - df.to_excel(file_path, sheet_name=sheet_name, index=False) - except Exception as ex: - logger.debug(f"overwrite_sheet {sheet_name}:{ex}") - except Exception as ex: - logger.debug(f"overwrite_excel:{ex}") - - -def merge_yamls(_output_dir, merge_yaml_files, final_report, remove_src_data=False, - default_oss_name='', default_oss_version='', url=''): - success = True - err_msg = '' - - oss_total_list = [] - yaml_dict = {} - try: - for mf in merge_yaml_files: - if os.path.exists(os.path.join(_output_dir, mf)): - oss_list, _, _ = parsing_yml(os.path.join(_output_dir, mf), _output_dir) - - if remove_src_data: - existed_yaml = {} - for oi in oss_list: - oi.name = default_oss_name if oi.name == '' else oi.name - oi.version = default_oss_version if oi.version == '' else oi.version - oi.download_location = url if oi.download_location == '' else oi.download_location - create_yaml_with_ossitem(oi, existed_yaml) - with open(os.path.join(_output_dir, mf), 'w') as f: - yaml.dump(existed_yaml, f, default_flow_style=False, sort_keys=False) - - oss_total_list.extend(oss_list) - - if oss_total_list != []: - for oti in oss_total_list: - create_yaml_with_ossitem(oti, yaml_dict) - with open(os.path.join(_output_dir, final_report), 'w') as f: - yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False) - else: - success = False - err_msg = "Output file is not created as no oss items detected." - except Exception as ex: - err_msg = ex - success = False - - return success, err_msg +def update_oss_item(scan_item, oss_name, oss_version, download_loc): + for file_items in scan_item.file_items.values(): + for file_item in file_items: + if file_item.oss_items: + for oi in file_item.oss_items: + if oi.name == '' and oi.version == '' and oi.download_location == '': + oi.name = oss_name + oi.version = oss_version + oi.download_location = download_loc + else: + file_item.oss_items.append(OssItem(oss_name, oss_version, '', download_loc)) + return scan_item def create_scancodejson(final_report, output_extension, ui_mode_report, src_path=""): @@ -199,73 +149,48 @@ def create_scancodejson(final_report, output_extension, ui_mode_report, src_path return success, err_msg -def correct_scanner_result(_output_dir, output_files, output_extension, exist_src, exist_bin): - src_oss_list = [] - bin_oss_list = [] +def correct_scanner_result(all_scan_item): duplicates = False - if exist_src: - src_oss_list = check_exclude_dir(get_osslist(_output_dir, output_files['SRC'], output_extension, SRC_SHEET)) - if exist_bin: - bin_oss_list = check_exclude_dir(get_osslist(_output_dir, output_files['BIN'], output_extension, BIN_SHEET)) - - if exist_src and exist_bin: + keys_needed = {FOSSLIGHT_SOURCE, FOSSLIGHT_BINARY} + is_contained = keys_needed.issubset(all_scan_item.file_items.keys()) + if is_contained: + src_fileitems = all_scan_item.file_items[FOSSLIGHT_SOURCE] + bin_fileitems = all_scan_item.file_items[FOSSLIGHT_BINARY] try: remove_src_idx_list = [] - for idx_src, src_item in enumerate(src_oss_list): + for idx_src, src_fileitem in enumerate(src_fileitems): + src_fileitem.exclude = check_exclude_dir(src_fileitem.source_name_or_path) dup_flag = False - for bin_item in bin_oss_list: - if (not src_item.source_name_or_path): - continue - if src_item.source_name_or_path[0] == bin_item.source_name_or_path[0]: + for bin_fileitem in bin_fileitems: + bin_fileitem.exclude = check_exclude_dir(bin_fileitem.source_name_or_path) + if src_fileitem.source_name_or_path == bin_fileitem.source_name_or_path: dup_flag = True - if not bin_item.license and src_item.license: - src_item.exclude = bin_item.exclude - bin_item.set_sheet_item(src_item.get_print_array(constant.FL_BINARY)[0]) - if bin_item.comment: - bin_item.comment += '/' - bin_item.comment += 'Loaded from SRC OSS info' + src_all_licenses_non_empty = all(oss_item.license for oss_item in src_fileitem.oss_items) + bin_empty_license_exists = all(not oss_item.license for oss_item in bin_fileitem.oss_items) + + if src_all_licenses_non_empty and bin_empty_license_exists: + exclude = bin_fileitem.oss_items[0].exclude + bin_fileitem.oss_items = [] + for src_oss_item in src_fileitem.oss_items: + src_oss_item.exclude = exclude + bin_fileitem.oss_items.append(src_oss_item) + bin_fileitem.comment = 'Loaded from SRC OSS info' if dup_flag: remove_src_idx_list.append(idx_src) if remove_src_idx_list: duplicates = True for i in sorted(remove_src_idx_list, reverse=True): - del src_oss_list[i] + del src_fileitems[i] except Exception as ex: logger.warning(f"correct the scanner result:{ex}") try: - if exist_src: - success, err_msg = write_output_with_osslist(src_oss_list, _output_dir, output_files['SRC'], - output_extension, SRC_SHEET) - if not success: - logger.warning(err_msg) - if exist_bin: - success, err_msg = write_output_with_osslist(bin_oss_list, _output_dir, output_files['BIN'], - output_extension, BIN_SHEET, BIN_EXT_HEADER, BIN_HIDDEN_HEADER) - if not success: - logger.warning(err_msg) if duplicates: logger.info('Success to correct the src/bin scanner result') except Exception as ex: logger.warning(f"Corrected src/bin scanner result:{ex}") - return - - -def write_output_with_osslist(oss_list, output_dir, output_file, output_extension, sheetname, extended_hdr={}, hidden_hdr={}): - new_oss_list = [] - sheet_list = {} - sheet_list[sheetname] = [] - - for src_item in oss_list: - scanner_name = constant.supported_sheet_and_scanner[sheetname] - new_oss_list.append(src_item.get_print_array(scanner_name)[0]) - sheet_list[sheetname].extend(new_oss_list) - if os.path.exists(os.path.join(output_dir, output_file)): - os.remove(os.path.join(output_dir, output_file)) - success, err_msg, _ = write_output_file(os.path.join(output_dir, output_file).rstrip(output_extension), - output_extension, sheet_list, extended_hdr, hidden_hdr) - return success, err_msg + return all_scan_item def get_osslist(_output_dir, output_file, output_extension, sheet_name=''): @@ -285,14 +210,12 @@ def get_osslist(_output_dir, output_file, output_extension, sheet_name=''): return oss_list -def check_exclude_dir(oss_list): +def check_exclude_dir(source_name_or_path): _exclude_dirs = ["venv", "node_modules", "Pods", "Carthage"] + exclude = False - for oss_item in oss_list: - if not oss_item.source_name_or_path: - continue - for exclude_dir in _exclude_dirs: - if exclude_dir in oss_item.source_name_or_path[0].split(os.path.sep): - oss_item.exclude = True - break - return oss_list + for exclude_dir in _exclude_dirs: + if exclude_dir in source_name_or_path.split(os.path.sep): + exclude = True + break + return exclude diff --git a/src/fosslight_scanner/fosslight_scanner.py b/src/fosslight_scanner/fosslight_scanner.py index 73940c0..245f48a 100755 --- a/src/fosslight_scanner/fosslight_scanner.py +++ b/src/fosslight_scanner/fosslight_scanner.py @@ -25,12 +25,13 @@ import fosslight_util.constant as constant from fosslight_util.output_format import check_output_format from fosslight_prechecker._precheck import run_lint as prechecker_lint -from fosslight_util.write_excel import merge_excels, merge_cover_comment from fosslight_util.cover import CoverItem +from fosslight_util.oss_item import ScannerItem +from fosslight_util.output_format import write_output_file from .common import ( - copy_file, call_analysis_api, overwrite_excel, - merge_yamls, correct_scanner_result, create_scancodejson + copy_file, call_analysis_api, update_oss_item, + correct_scanner_result, create_scancodejson ) from ._run_compare import run_compare @@ -56,7 +57,7 @@ def run_dependency(path_to_analyze, output_file_with_path, params="", path_to_exclude=[]): - result_list = [] + result = [] package_manager = "" pip_activate_cmd = "" @@ -90,7 +91,7 @@ def run_dependency(path_to_analyze, output_file_with_path, params="", path_to_ex timer.start() try: - success, result = call_analysis_api( + success, scan_item = call_analysis_api( path_to_analyze, "Dependency Analysis", 1, run_dependency_scanner, package_manager, @@ -101,11 +102,11 @@ def run_dependency(path_to_analyze, output_file_with_path, params="", path_to_ex github_token, path_to_exclude=path_to_exclude ) if success: - result_list = result.get('SRC_FL_Dependency') + result = scan_item except Exception as ex: logger.warning(f"Run dependency: {ex}") - return result_list or [] + return result def source_analysis_wrapper(*args, **kwargs): @@ -130,7 +131,8 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False, source_time_out=120, binary_simple=False): final_excel_dir = output_path success = True - temp_output_fiiles = [] + all_cover_items = [] + all_scan_item = ScannerItem(PKG_NAME, _start_time) if not remove_src_data: success, final_excel_dir, result_log = init(output_path) @@ -158,9 +160,6 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False, -1, prechecker_lint, abs_path, False, output_prechecker, exclude_path=path_to_exclude) - success_file, copied_file = copy_file(output_prechecker, output_path) - if success_file: - temp_output_fiiles.append(copied_file) if run_src: try: @@ -180,6 +179,9 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False, source_print_matched_text=source_print_matched_text, source_time_out=source_time_out ) + if success: + all_scan_item.file_items.update(result[2].file_items) + all_cover_items.append(result[2].cover) else: # Run fosslight_source by using docker image src_output = os.path.join("output", output_files["SRC"]) @@ -195,16 +197,22 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False, logger.warning(f"Failed to run source analysis: {ex}") if run_bin: - success, _ = call_analysis_api(src_path, "Binary Analysis", - 1, binary_analysis.find_binaries, - abs_path, - os.path.join(_output_dir, output_files["BIN"]), - "", db_url, binary_simple, - correct_mode, correct_fpath, - path_to_exclude=path_to_exclude) + success, result = call_analysis_api(src_path, "Binary Analysis", + 1, binary_analysis.find_binaries, + abs_path, + os.path.join(_output_dir, output_files["BIN"]), + "", db_url, binary_simple, + correct_mode, correct_fpath, + path_to_exclude=path_to_exclude) + if success: + all_scan_item.file_items.update(result.file_items) + all_cover_items.append(result.cover) if run_dep: - run_dependency(src_path, os.path.join(_output_dir, output_files["DEP"]), dep_arguments, path_to_exclude) + dep_scanitem = run_dependency(src_path, os.path.join(_output_dir, output_files["DEP"]), + dep_arguments, path_to_exclude) + all_scan_item.file_items.update(dep_scanitem.file_items) + all_cover_items.append(dep_scanitem.cover) else: return @@ -215,46 +223,23 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False, try: output_file_without_ext = os.path.join(final_excel_dir, output_file) final_report = f"{output_file_without_ext}{output_extension}" - merge_files = [output_files["SRC"], output_files["BIN"], output_files["DEP"]] cover = CoverItem(tool_name=PKG_NAME, start_time=_start_time, input_path=abs_path, exclude_path=path_to_exclude, simple_mode=False) - cover.comment = merge_cover_comment(_output_dir, merge_files) - - if output_extension == ".xlsx": - tmp_dir = f"tmp_{datetime.now().strftime('%y%m%d_%H%M')}" - exist_src = False - exist_bin = False - if correct_mode: - os.makedirs(os.path.join(_output_dir, tmp_dir), exist_ok=True) - if os.path.exists(os.path.join(_output_dir, output_files['SRC'])): - exist_src = True - shutil.copy2(os.path.join(_output_dir, output_files['SRC']), os.path.join(_output_dir, tmp_dir)) - if os.path.exists(os.path.join(_output_dir, output_files['BIN'])): - exist_bin = True - shutil.copy2(os.path.join(_output_dir, output_files['BIN']), os.path.join(_output_dir, tmp_dir)) - if exist_src or exist_bin: - correct_scanner_result(_output_dir, output_files, output_extension, exist_src, exist_bin) - - if remove_src_data: - overwrite_excel(_output_dir, default_oss_name, "OSS Name") - overwrite_excel(_output_dir, default_oss_version, "OSS Version") - overwrite_excel(_output_dir, url, "Download Location") - success, err_msg = merge_excels(_output_dir, final_report, merge_files, cover) - - if correct_mode: - if exist_src: - shutil.move(os.path.join(_output_dir, tmp_dir, output_files['SRC']), - os.path.join(_output_dir, output_files['SRC'])) - if exist_bin: - shutil.move(os.path.join(_output_dir, tmp_dir, output_files['BIN']), - os.path.join(_output_dir, output_files['BIN'])) - shutil.rmtree(os.path.join(_output_dir, tmp_dir), ignore_errors=True) - elif output_extension == ".yaml": - success, err_msg = merge_yamls(_output_dir, merge_files, final_report, - remove_src_data, default_oss_name, default_oss_version, url) + merge_comment = [] + for ci in all_cover_items: + merge_comment.append(str(f'[{ci.tool_name}] {ci.comment}')) + cover.comment = '\n'.join(merge_comment) + all_scan_item.cover = cover + + if correct_mode: + all_scan_item = correct_scanner_result(all_scan_item) + + if remove_src_data: + all_scan_item = update_oss_item(all_scan_item, default_oss_name, default_oss_version, url) + success, err_msg, final_report = write_output_file(output_file_without_ext, output_extension, all_scan_item) if success: if os.path.isfile(final_report): logger.info(f'Generated the result file: {final_report}') From b9e988de65643dd0fb06a5536c8f1605d467922d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 6 Sep 2024 05:59:50 +0000 Subject: [PATCH 06/16] =?UTF-8?q?Bump=20version:=201.7.30=20=E2=86=92=201.?= =?UTF-8?q?7.31?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 3cebbe5..5d84a1f 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -2,7 +2,7 @@ commit = True tag = False message = Bump version: {current_version} → {new_version} -current_version = 1.7.30 +current_version = 1.7.31 [bumpversion:file:setup.py] search = '{current_version}' diff --git a/setup.py b/setup.py index 2b7d6c3..50f9065 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ if __name__ == "__main__": setup( name='fosslight_scanner', - version='1.7.30', + version='1.7.31', package_dir={"": "src"}, packages=find_packages(where='src'), description='FOSSLight Scanner', From a17f6bf23e95e65c8486967a6c8b8e40cbbf95bc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 6 Sep 2024 06:00:04 +0000 Subject: [PATCH 07/16] Update ChangeLog --- CHANGELOG.md | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c040cc..a255029 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,22 @@ # Changelog +## v1.7.31 (06/09/2024) +## Changes +## 🚀 Features + +- Setting.json with source_scanner selection @soonhong99 (#109) + +## 🐛 Hotfixes + +- Fix a bug related to path_to_exclude @soimkim (#116) + +## 🔧 Maintenance + +- Limit installation fosslight package @dd-jy (#117) +- Add simple_mode parameter to CoverItem constructor @YongGoose (#108) + +--- + ## v1.7.30 (22/07/2024) ## Changes ## 🚀 Features @@ -283,11 +300,3 @@ ## 🚀 Features - Support yaml format of FOSSLight Report @dd-jy (#42) - ---- - -## v1.7.1 (22/07/2022) -## Changes -## 🐛 Hotfixes - -- Change FL Reuse to FL Prechecker @bjk7119 (#43) From 2acef361bd027c1e748f46c691adc20470222a5c Mon Sep 17 00:00:00 2001 From: "jiyeong.seok" Date: Fri, 6 Sep 2024 11:41:57 +0900 Subject: [PATCH 08/16] update ui mode feature Signed-off-by: jiyeong.seok --- requirements.txt | 8 +-- src/fosslight_scanner/common.py | 75 +++++++++------------- src/fosslight_scanner/fosslight_scanner.py | 16 ++--- 3 files changed, 44 insertions(+), 55 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3d9a4b5..cca12ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,8 +4,8 @@ openpyxl progress pyyaml beautifulsoup4 -fosslight_util~=1.4.48 -fosslight_source~=1.7.8 -fosslight_dependency~=3.15.1 -fosslight_binary~=4.1.30 +fosslight_util>=2.0.0 +fosslight_source>=2.0.0 +fosslight_dependency>=4.0.0 +fosslight_binary>=5.0.0 fosslight_prechecker==3.0.27 \ No newline at end of file diff --git a/src/fosslight_scanner/common.py b/src/fosslight_scanner/common.py index edab9d0..b2c6455 100755 --- a/src/fosslight_scanner/common.py +++ b/src/fosslight_scanner/common.py @@ -7,13 +7,10 @@ import sys import logging import shutil -import pandas as pd -import yaml -from fosslight_util.constant import LOGGER_NAME, FOSSLIGHT_SOURCE, FOSSLIGHT_BINARY -from fosslight_util.parsing_yaml import parsing_yml +import copy +from fosslight_util.constant import LOGGER_NAME, FOSSLIGHT_SOURCE, FOSSLIGHT_BINARY, FOSSLIGHT_DEPENDENCY from fosslight_util.write_scancodejson import write_scancodejson -from fosslight_util.read_excel import read_oss_report -from fosslight_util.oss_item import OssItem +from fosslight_util.oss_item import OssItem, FileItem logger = logging.getLogger(LOGGER_NAME) SRC_SHEET = 'SRC_FL_Source' @@ -104,11 +101,9 @@ def update_oss_item(scan_item, oss_name, oss_version, download_loc): return scan_item -def create_scancodejson(final_report, output_extension, ui_mode_report, src_path=""): +def create_scancodejson(all_scan_item_origin, ui_mode_report, src_path=""): success = True err_msg = '' - - oss_total_list = [] root_dir = "" root_strip = "" try: @@ -120,28 +115,37 @@ def create_scancodejson(final_report, output_extension, ui_mode_report, src_path root_dir = "" try: - item_without_oss = OssItem("") - oss_total_list = get_osslist(os.path.dirname(final_report), os.path.basename(final_report), - output_extension, '') + all_scan_item = copy.deepcopy(all_scan_item_origin) + if FOSSLIGHT_DEPENDENCY in all_scan_item.file_items: + del all_scan_item.file_items[FOSSLIGHT_DEPENDENCY] if src_path: - for root, dirs, files in os.walk(src_path): + fileitems_without_oss = [] + for root, _, files in os.walk(src_path): root = root.replace(root_strip, "") for file in files: + fi_without_oss = FileItem('') + included = False item_path = os.path.join(root, file) item_path = item_path.replace(parent + os.path.sep, '', 1) - included = any(item_path in x.source_name_or_path for x in oss_total_list) + + for file_items in all_scan_item.file_items.values(): + for file_item in file_items: + if file_item.source_name_or_path: + if file_item.source_name_or_path == item_path: + included = True + break if not included: - item_without_oss.source_name_or_path = item_path - if len(item_without_oss.source_name_or_path) > 0: - oss_total_list.append(item_without_oss) + fi_without_oss.source_name_or_path = item_path + fileitems_without_oss.append(fi_without_oss) + if len(fileitems_without_oss) > 0: + all_scan_item.file_items[FOSSLIGHT_SOURCE].extend(fileitems_without_oss) if root_dir: - for oss in oss_total_list: - tmp_path_list = oss.source_name_or_path - oss.source_name_or_path = "" - oss.source_name_or_path = [os.path.join(root_dir, path) for path in tmp_path_list] - + for file_items in all_scan_item.file_items.values(): + for fi in file_items: + if fi.source_name_or_path: + fi.source_name_or_path = os.path.join(root_dir, fi.source_name_or_path) write_scancodejson(os.path.dirname(ui_mode_report), os.path.basename(ui_mode_report), - oss_total_list) + all_scan_item) except Exception as ex: err_msg = ex success = False @@ -160,10 +164,10 @@ def correct_scanner_result(all_scan_item): try: remove_src_idx_list = [] for idx_src, src_fileitem in enumerate(src_fileitems): - src_fileitem.exclude = check_exclude_dir(src_fileitem.source_name_or_path) + src_fileitem.exclude = check_exclude_dir(src_fileitem.source_name_or_path, src_fileitem.exclude) dup_flag = False for bin_fileitem in bin_fileitems: - bin_fileitem.exclude = check_exclude_dir(bin_fileitem.source_name_or_path) + bin_fileitem.exclude = check_exclude_dir(bin_fileitem.source_name_or_path, bin_fileitem.exclude) if src_fileitem.source_name_or_path == bin_fileitem.source_name_or_path: dup_flag = True src_all_licenses_non_empty = all(oss_item.license for oss_item in src_fileitem.oss_items) @@ -193,24 +197,9 @@ def correct_scanner_result(all_scan_item): return all_scan_item -def get_osslist(_output_dir, output_file, output_extension, sheet_name=''): - err_reason = '' - oss_list = [] - oss_file_with_fullpath = os.path.join(_output_dir, output_file) - - if os.path.exists(oss_file_with_fullpath): - if output_extension == '.xlsx': - oss_list = read_oss_report(oss_file_with_fullpath, sheet_name) - elif output_extension == '.yaml': - oss_list, _, err_reason = parsing_yml(oss_file_with_fullpath, _output_dir) - else: - err_reason = f'Not supported extension: {output_extension}' - if err_reason: - logger.info(f'get_osslist: {err_reason}') - return oss_list - - -def check_exclude_dir(source_name_or_path): +def check_exclude_dir(source_name_or_path, file_item_exclude): + if file_item_exclude: + return True _exclude_dirs = ["venv", "node_modules", "Pods", "Carthage"] exclude = False diff --git a/src/fosslight_scanner/fosslight_scanner.py b/src/fosslight_scanner/fosslight_scanner.py index 245f48a..09bb2f5 100755 --- a/src/fosslight_scanner/fosslight_scanner.py +++ b/src/fosslight_scanner/fosslight_scanner.py @@ -30,7 +30,7 @@ from fosslight_util.output_format import write_output_file from .common import ( - copy_file, call_analysis_api, update_oss_item, + call_analysis_api, update_oss_item, correct_scanner_result, create_scancodejson ) from ._run_compare import run_compare @@ -198,12 +198,12 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False, if run_bin: success, result = call_analysis_api(src_path, "Binary Analysis", - 1, binary_analysis.find_binaries, - abs_path, - os.path.join(_output_dir, output_files["BIN"]), - "", db_url, binary_simple, - correct_mode, correct_fpath, - path_to_exclude=path_to_exclude) + 1, binary_analysis.find_binaries, + abs_path, + os.path.join(_output_dir, output_files["BIN"]), + "", db_url, binary_simple, + correct_mode, correct_fpath, + path_to_exclude=path_to_exclude) if success: all_scan_item.file_items.update(result.file_items) all_cover_items.append(result.cover) @@ -251,7 +251,7 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False, if ui_mode: ui_mode_report = f"{output_file_without_ext}.json" - success, err_msg = create_scancodejson(final_report, output_extension, ui_mode_report, src_path) + success, err_msg = create_scancodejson(all_scan_item, ui_mode_report, src_path) if success and os.path.isfile(ui_mode_report): logger.info(f'Generated the ui mode result file: {ui_mode_report}') else: From e2bc299ae1f6f67bc6ab656ea98ccaea625d605e Mon Sep 17 00:00:00 2001 From: "jiyeong.seok" Date: Fri, 6 Sep 2024 20:36:23 +0900 Subject: [PATCH 09/16] Remove prechecker Signed-off-by: jiyeong.seok --- requirements.txt | 3 +-- src/fosslight_scanner/fosslight_scanner.py | 22 ++++------------------ 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/requirements.txt b/requirements.txt index cca12ba..8de2d2f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,5 +7,4 @@ beautifulsoup4 fosslight_util>=2.0.0 fosslight_source>=2.0.0 fosslight_dependency>=4.0.0 -fosslight_binary>=5.0.0 -fosslight_prechecker==3.0.27 \ No newline at end of file +fosslight_binary>=5.0.0 \ No newline at end of file diff --git a/src/fosslight_scanner/fosslight_scanner.py b/src/fosslight_scanner/fosslight_scanner.py index 09bb2f5..83dd63e 100755 --- a/src/fosslight_scanner/fosslight_scanner.py +++ b/src/fosslight_scanner/fosslight_scanner.py @@ -24,7 +24,6 @@ from fosslight_util.timer_thread import TimerThread import fosslight_util.constant as constant from fosslight_util.output_format import check_output_format -from fosslight_prechecker._precheck import run_lint as prechecker_lint from fosslight_util.cover import CoverItem from fosslight_util.oss_item import ScannerItem from fosslight_util.output_format import write_output_file @@ -51,7 +50,7 @@ _executed_path = "" SRC_DIR_FROM_LINK_PREFIX = "fosslight_src_dir_" SCANNER_MODE = [ - "all", "compare", "reuse", "prechecker", "binary", + "all", "compare", "binary", "bin", "src", "source", "dependency", "dep" ] @@ -152,14 +151,7 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False, if success: output_files = {"SRC": f"fosslight_src_{_start_time}{output_extension}", "BIN": f"fosslight_bin_{_start_time}{output_extension}", - "DEP": f"fosslight_dep_{_start_time}{output_extension}", - "PRECHECKER": f"fosslight_lint_{_start_time}.yaml"} - if run_prechecker: - output_prechecker = os.path.join(_output_dir, output_files["PRECHECKER"]) - success, result = call_analysis_api(src_path, "Prechecker Lint", - -1, prechecker_lint, - abs_path, False, output_prechecker, - exclude_path=path_to_exclude) + "DEP": f"fosslight_dep_{_start_time}{output_extension}"} if run_src: try: @@ -397,19 +389,13 @@ def run_main(mode_list, path_arg, dep_arguments, output_file_or_dir, file_format run_src = False run_bin = False run_dep = False - run_prechecker = False remove_downloaded_source = False if "all" in mode_list or (not mode_list): run_src = True run_bin = True run_dep = True - run_prechecker = False - if "prechecker" in mode_list or "reuse" in mode_list: - run_prechecker = True else: - if "prechecker" in mode_list or "reuse" in mode_list: - run_prechecker = True if "binary" in mode_list or "bin" in mode_list: run_bin = True if "source" in mode_list or "src" in mode_list: @@ -417,7 +403,7 @@ def run_main(mode_list, path_arg, dep_arguments, output_file_or_dir, file_format if "dependency" in mode_list or "dep" in mode_list: run_dep = True - if run_dep or run_src or run_bin or run_prechecker: + if run_dep or run_src or run_bin: if src_path == "" and url_to_analyze == "": src_path, dep_arguments, url_to_analyze = get_input_mode(_executed_path, mode_list) @@ -439,7 +425,7 @@ def run_main(mode_list, path_arg, dep_arguments, output_file_or_dir, file_format if src_path != "": run_scanner(src_path, dep_arguments, output_path, keep_raw_data, - run_src, run_bin, run_dep, run_prechecker, + run_src, run_bin, run_dep, '', remove_downloaded_source, {}, output_file, output_extension, num_cores, db_url, default_oss_name, default_oss_version, url_to_analyze, From 8907916a9466cfb9b73493f0276e880d9063aff0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 6 Sep 2024 11:43:42 +0000 Subject: [PATCH 10/16] =?UTF-8?q?Bump=20version:=201.7.31=20=E2=86=92=202.?= =?UTF-8?q?0.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 5d84a1f..b518328 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -2,7 +2,7 @@ commit = True tag = False message = Bump version: {current_version} → {new_version} -current_version = 1.7.31 +current_version = 2.0.0 [bumpversion:file:setup.py] search = '{current_version}' diff --git a/setup.py b/setup.py index 50f9065..3867570 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ if __name__ == "__main__": setup( name='fosslight_scanner', - version='1.7.31', + version='2.0.0', package_dir={"": "src"}, packages=find_packages(where='src'), description='FOSSLight Scanner', From 20ee7a17517a9970bac9c1c78a88d3a4b0083043 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 6 Sep 2024 11:43:51 +0000 Subject: [PATCH 11/16] Update ChangeLog --- CHANGELOG.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a255029..976e17a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## v2.0.0 (06/09/2024) +## Changes +## 🔧 Maintenance + +- Remove prechecker @dd-jy (#119) +- Refactoring OSS item @dd-jy (#118) + +--- + ## v1.7.31 (06/09/2024) ## Changes ## 🚀 Features @@ -292,11 +301,3 @@ ## 🔧 Maintenance - Change the required version of Python to 3.7 @soimkim (#45) - ---- - -## v1.7.2 (16/08/2022) -## Changes -## 🚀 Features - -- Support yaml format of FOSSLight Report @dd-jy (#42) From 799c245ecc47265d79a8e3d22876e3d7657bef51 Mon Sep 17 00:00:00 2001 From: Jiyeong Seok <50347670+dd-jy@users.noreply.github.com> Date: Mon, 9 Sep 2024 10:27:04 +0900 Subject: [PATCH 12/16] Revert "Remove prechecker" --- requirements.txt | 3 ++- src/fosslight_scanner/fosslight_scanner.py | 22 ++++++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8de2d2f..cca12ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ beautifulsoup4 fosslight_util>=2.0.0 fosslight_source>=2.0.0 fosslight_dependency>=4.0.0 -fosslight_binary>=5.0.0 \ No newline at end of file +fosslight_binary>=5.0.0 +fosslight_prechecker==3.0.27 \ No newline at end of file diff --git a/src/fosslight_scanner/fosslight_scanner.py b/src/fosslight_scanner/fosslight_scanner.py index 83dd63e..09bb2f5 100755 --- a/src/fosslight_scanner/fosslight_scanner.py +++ b/src/fosslight_scanner/fosslight_scanner.py @@ -24,6 +24,7 @@ from fosslight_util.timer_thread import TimerThread import fosslight_util.constant as constant from fosslight_util.output_format import check_output_format +from fosslight_prechecker._precheck import run_lint as prechecker_lint from fosslight_util.cover import CoverItem from fosslight_util.oss_item import ScannerItem from fosslight_util.output_format import write_output_file @@ -50,7 +51,7 @@ _executed_path = "" SRC_DIR_FROM_LINK_PREFIX = "fosslight_src_dir_" SCANNER_MODE = [ - "all", "compare", "binary", + "all", "compare", "reuse", "prechecker", "binary", "bin", "src", "source", "dependency", "dep" ] @@ -151,7 +152,14 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False, if success: output_files = {"SRC": f"fosslight_src_{_start_time}{output_extension}", "BIN": f"fosslight_bin_{_start_time}{output_extension}", - "DEP": f"fosslight_dep_{_start_time}{output_extension}"} + "DEP": f"fosslight_dep_{_start_time}{output_extension}", + "PRECHECKER": f"fosslight_lint_{_start_time}.yaml"} + if run_prechecker: + output_prechecker = os.path.join(_output_dir, output_files["PRECHECKER"]) + success, result = call_analysis_api(src_path, "Prechecker Lint", + -1, prechecker_lint, + abs_path, False, output_prechecker, + exclude_path=path_to_exclude) if run_src: try: @@ -389,13 +397,19 @@ def run_main(mode_list, path_arg, dep_arguments, output_file_or_dir, file_format run_src = False run_bin = False run_dep = False + run_prechecker = False remove_downloaded_source = False if "all" in mode_list or (not mode_list): run_src = True run_bin = True run_dep = True + run_prechecker = False + if "prechecker" in mode_list or "reuse" in mode_list: + run_prechecker = True else: + if "prechecker" in mode_list or "reuse" in mode_list: + run_prechecker = True if "binary" in mode_list or "bin" in mode_list: run_bin = True if "source" in mode_list or "src" in mode_list: @@ -403,7 +417,7 @@ def run_main(mode_list, path_arg, dep_arguments, output_file_or_dir, file_format if "dependency" in mode_list or "dep" in mode_list: run_dep = True - if run_dep or run_src or run_bin: + if run_dep or run_src or run_bin or run_prechecker: if src_path == "" and url_to_analyze == "": src_path, dep_arguments, url_to_analyze = get_input_mode(_executed_path, mode_list) @@ -425,7 +439,7 @@ def run_main(mode_list, path_arg, dep_arguments, output_file_or_dir, file_format if src_path != "": run_scanner(src_path, dep_arguments, output_path, keep_raw_data, - run_src, run_bin, run_dep, '', + run_src, run_bin, run_dep, run_prechecker, remove_downloaded_source, {}, output_file, output_extension, num_cores, db_url, default_oss_name, default_oss_version, url_to_analyze, From 5c1b602e88453922731505bf5036bec106d7f77d Mon Sep 17 00:00:00 2001 From: "jiyeong.seok" Date: Mon, 9 Sep 2024 11:42:28 +0900 Subject: [PATCH 13/16] Update prechecker version Signed-off-by: jiyeong.seok --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index cca12ba..a72ca53 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,4 @@ fosslight_util>=2.0.0 fosslight_source>=2.0.0 fosslight_dependency>=4.0.0 fosslight_binary>=5.0.0 -fosslight_prechecker==3.0.27 \ No newline at end of file +fosslight_prechecker>=4.0.0 From 3a4557f8b0ebb202a0577c631bbc739a2bebedcf Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 9 Sep 2024 02:47:40 +0000 Subject: [PATCH 14/16] =?UTF-8?q?Bump=20version:=202.0.0=20=E2=86=92=202.0?= =?UTF-8?q?.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index b518328..c4816f8 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -2,7 +2,7 @@ commit = True tag = False message = Bump version: {current_version} → {new_version} -current_version = 2.0.0 +current_version = 2.0.1 [bumpversion:file:setup.py] search = '{current_version}' diff --git a/setup.py b/setup.py index 3867570..bf2cf51 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ if __name__ == "__main__": setup( name='fosslight_scanner', - version='2.0.0', + version='2.0.1', package_dir={"": "src"}, packages=find_packages(where='src'), description='FOSSLight Scanner', From e7d926b1aa84b04ae8398d9168b76e7ee826e4c5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 9 Sep 2024 02:47:49 +0000 Subject: [PATCH 15/16] Update ChangeLog --- CHANGELOG.md | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 976e17a..2460a7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## v2.0.1 (09/09/2024) +## Changes +## 🔧 Maintenance + +- Revert "Remove prechecker" @dd-jy (#120) + +--- + ## v2.0.0 (06/09/2024) ## Changes ## 🔧 Maintenance @@ -289,15 +297,3 @@ - Change the report file name @dd-jy (#48) - Modify help msg if invalid input @bjk7119 (#47) - ---- - -## v1.7.3 (01/09/2022) -## Changes -## 🚀 Features - -- Support 'xlsx' report for Compare mode @dd-jy (#46) - -## 🔧 Maintenance - -- Change the required version of Python to 3.7 @soimkim (#45) From 15ca9b6ac3ce34e0cd65acd57a28c74f4b2e37c0 Mon Sep 17 00:00:00 2001 From: frypam Date: Sat, 28 Sep 2024 16:50:21 +0900 Subject: [PATCH 16/16] ci: Add Docker build and push workflow for automated releases (#122) Signed-off-by: soonhong99 --- .github/workflows/docker-build-push.yml | 34 +++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/docker-build-push.yml diff --git a/.github/workflows/docker-build-push.yml b/.github/workflows/docker-build-push.yml new file mode 100644 index 0000000..963e54e --- /dev/null +++ b/.github/workflows/docker-build-push.yml @@ -0,0 +1,34 @@ +name: Docker Build and Push + +on: + release: + types: [published] + +jobs: + build-and-push: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@v2 + with: + context: . + push: true + platforms: linux/amd64,linux/arm64 + tags: | + fosslight/fosslight_scanner:latest + fosslight/fosslight_scanner:${{ github.event.release.tag_name }} \ No newline at end of file