From adc835d50a5ea2fc5f37593ae4b4aa9a3501affc Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Tue, 22 Dec 2020 14:01:29 +0100 Subject: [PATCH 01/22] Reporting feature --- detect_secrets/audit/__init__.py | 1 + detect_secrets/audit/report.py | 137 +++++++++++++++++++++++++++++ detect_secrets/core/usage/audit.py | 29 ++++++ detect_secrets/main.py | 7 ++ 4 files changed, 174 insertions(+) create mode 100644 detect_secrets/audit/report.py diff --git a/detect_secrets/audit/__init__.py b/detect_secrets/audit/__init__.py index a3f9e1c9b..9e1dbdbb4 100644 --- a/detect_secrets/audit/__init__.py +++ b/detect_secrets/audit/__init__.py @@ -1,3 +1,4 @@ from . import analytics # noqa: F401 +from . import report # noqa: F401 from .audit import audit_baseline # noqa: F401 from .compare import compare_baselines # noqa: F401 diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py new file mode 100644 index 000000000..358886845 --- /dev/null +++ b/detect_secrets/audit/report.py @@ -0,0 +1,137 @@ +import hashlib +import codecs +import json +from enum import Enum + +from .io import print_message +from ..core.plugins.util import Plugin, get_mapping_from_secret_type_to_class +from ..core.scan import _get_lines_from_file, _scan_line +from ..core.potential_secret import PotentialSecret +from ..plugins.base import BasePlugin + + +class SecretClass(Enum): + TRUE_POSITIVE = 1 + FALSE_POSITIVE = 2 + UNKNOWN = 3 + + def from_boolean(is_secret: bool) -> Enum: + if is_secret == None: + return SecretClass.UNKNOWN + elif is_secret: + return SecretClass.TRUE_POSITIVE + else: + return SecretClass.FALSE_POSITIVE + + def to_string(self) -> str: + return self.name + + def get_prioritary(self, secret_class: str) -> Enum: + try: + to_compare = SecretClass[secret_class] + except: + return self + if to_compare.value < self.value: + return secret_class + else: + return self + + +class SecretClassToPrint(Enum): + REAL_SECRET = 1 + FALSE_POSITIVE = 2 + + def from_class(secret_class: SecretClass) -> Enum: + if secret_class in [SecretClass.UNKNOWN, SecretClass.TRUE_POSITIVE]: + return SecretClassToPrint.REAL_SECRET + else: + return SecretClassToPrint.FALSE_POSITIVE + + +def print_report( + baseline_file: str, + class_to_print: SecretClassToPrint = None +) -> None: + baseline = json.load(codecs.open(baseline_file, encoding='utf-8')) + details = get_secrets_details_from_baseline(baseline) + plugins = get_mapping_from_secret_type_to_class() + secrets = {} + for filename, secret_type, secret_hash, is_secret in details: + secret_class = SecretClass.from_boolean(is_secret) + if class_to_print != None and SecretClassToPrint.from_class(secret_class) != class_to_print: + continue + try: + detections = get_potential_secrets(filename, plugins[secret_type](), secret_hash) + except: + continue + identifier = hashlib.sha512((secret_hash + filename).encode('utf-8')).hexdigest() + for detection in detections: + if identifier in secrets: + secrets[identifier]['lines'][detection.line_number] = get_line_content(filename, detection.line_number) + if not secret_type in secrets[identifier]['types']: + secrets[identifier]['types'].append(secret_type) + secrets[identifier]['class'] = secret_class.get_prioritary(secrets[identifier]['class']).to_string() + else: + finding = {} + finding['secret'] = detection.secret_value + finding['filename'] = filename + finding['lines'] = {} + finding['lines'][detection.line_number] = get_line_content(filename, detection.line_number) + finding['types'] = [secret_type] + finding['class'] = secret_class.to_string() + secrets[identifier] = finding + + output = [] + for identifier in secrets: + output.append(secrets[identifier]) + + print_message(json.dumps(output, indent=4, sort_keys=True)) + + +def get_secrets_details_from_baseline( + baseline: str +) -> [(str, str, str, bool)]: + """ + :returns: Details of each secret present in the baseline file. + """ + for filename, secrets in baseline['results'].items(): + for secret in secrets: + yield filename, secret['type'], secret['hashed_secret'], secret['is_secret'] + + +def get_secret_class( + is_secret: bool +) -> str: + """ + :returns: Secret class as string. + """ + return 'Unknown' if is_secret == None else 'True positive' if is_secret else 'False positive' + + +def get_potential_secrets( + filename: str, + plugin: Plugin, + secret_to_find: str +) -> [PotentialSecret]: + """ + :returns: List of PotentialSecrets detected by a specific plugin in a file. + """ + for lines in _get_lines_from_file(filename): + for line_number, line in list(enumerate(lines, 1)): + secrets = _scan_line(plugin, filename, line, line_number) + for secret in secrets: + if secret.secret_hash == secret_to_find: + yield secret + + +def get_line_content( + filename: str, + line_number: int +) -> str: + """ + :returns: Line content from filename by line number. + """ + content = codecs.open(filename, encoding='utf-8').read() + if not content: + return None + return content.splitlines()[line_number - 1] \ No newline at end of file diff --git a/detect_secrets/core/usage/audit.py b/detect_secrets/core/usage/audit.py index 453c17a8d..8f55fab41 100644 --- a/detect_secrets/core/usage/audit.py +++ b/detect_secrets/core/usage/audit.py @@ -21,6 +21,7 @@ def add_audit_action(parent: argparse._SubParsersAction) -> argparse.ArgumentPar ) _add_mode_parser(parser) + _add_report_parser(parser) _add_statistics_module(parser) return parser @@ -46,6 +47,34 @@ def _add_mode_parser(parser: argparse.ArgumentParser) -> None: ) +def _add_report_parser(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + '--report', + action='store_true', + help=( + 'Displays a report with the secrets detected' + ) + ) + + parser.add_argument( + '--only-real', + action='store_true', + help=( + 'Only includes real secrets in the report' + ) + ) + + parser.add_argument( + '--only-false', + action='store_true', + help=( + 'Only includes false positives in the report' + ) + ) + + + + def _add_statistics_module(parent: argparse.ArgumentParser) -> None: parser = parent.add_argument_group( title='analytics', diff --git a/detect_secrets/main.py b/detect_secrets/main.py index 7f055d6b5..00f24819c 100644 --- a/detect_secrets/main.py +++ b/detect_secrets/main.py @@ -120,6 +120,13 @@ def handle_audit_action(args: argparse.Namespace) -> None: print(json.dumps(stats.json(), indent=2)) else: print(str(stats)) + elif args.report: + class_to_print = None + if args.only_real: + class_to_print = audit.report.SecretClassToPrint.REAL_SECRET + elif args.only_false: + class_to_print = audit.report.SecretClassToPrint.FALSE_POSITIVE + audit.report.print_report(args.filename[0], class_to_print) else: # Starts interactive session. if args.diff: From f895819443592f559f58b6fdd264511ab1ed61b5 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Wed, 23 Dec 2020 11:08:27 +0100 Subject: [PATCH 02/22] Reporting feature: first corrections --- detect_secrets/audit/report.py | 108 +++++++++++------------------ detect_secrets/core/usage/audit.py | 7 +- detect_secrets/main.py | 2 +- 3 files changed, 47 insertions(+), 70 deletions(-) diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index 358886845..f4b328286 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -3,109 +3,85 @@ import json from enum import Enum -from .io import print_message +from .common import get_baseline_from_file from ..core.plugins.util import Plugin, get_mapping_from_secret_type_to_class from ..core.scan import _get_lines_from_file, _scan_line from ..core.potential_secret import PotentialSecret from ..plugins.base import BasePlugin - - -class SecretClass(Enum): - TRUE_POSITIVE = 1 - FALSE_POSITIVE = 2 - UNKNOWN = 3 - - def from_boolean(is_secret: bool) -> Enum: - if is_secret == None: - return SecretClass.UNKNOWN - elif is_secret: - return SecretClass.TRUE_POSITIVE - else: - return SecretClass.FALSE_POSITIVE - - def to_string(self) -> str: - return self.name - - def get_prioritary(self, secret_class: str) -> Enum: - try: - to_compare = SecretClass[secret_class] - except: - return self - if to_compare.value < self.value: - return secret_class - else: - return self +from ..constants import VerifiedResult class SecretClassToPrint(Enum): REAL_SECRET = 1 FALSE_POSITIVE = 2 - def from_class(secret_class: SecretClass) -> Enum: - if secret_class in [SecretClass.UNKNOWN, SecretClass.TRUE_POSITIVE]: + def from_class(secret_class: VerifiedResult) -> Enum: + if secret_class in [VerifiedResult.UNVERIFIED, VerifiedResult.VERIFIED_TRUE]: return SecretClassToPrint.REAL_SECRET else: return SecretClassToPrint.FALSE_POSITIVE -def print_report( +def generate_report( baseline_file: str, class_to_print: SecretClassToPrint = None ) -> None: - baseline = json.load(codecs.open(baseline_file, encoding='utf-8')) - details = get_secrets_details_from_baseline(baseline) plugins = get_mapping_from_secret_type_to_class() secrets = {} - for filename, secret_type, secret_hash, is_secret in details: - secret_class = SecretClass.from_boolean(is_secret) - if class_to_print != None and SecretClassToPrint.from_class(secret_class) != class_to_print: + for filename, secret in get_baseline_from_file(baseline_file): + verified_result = get_verified_result_from_boolean(secret.is_secret) + if class_to_print != None and SecretClassToPrint.from_class(verified_result) != class_to_print: continue try: - detections = get_potential_secrets(filename, plugins[secret_type](), secret_hash) + detections = get_potential_secrets(filename, plugins[secret.type](), secret.secret_hash) except: continue - identifier = hashlib.sha512((secret_hash + filename).encode('utf-8')).hexdigest() + identifier = hashlib.sha512((secret.secret_hash + filename).encode('utf-8')).hexdigest() for detection in detections: if identifier in secrets: secrets[identifier]['lines'][detection.line_number] = get_line_content(filename, detection.line_number) - if not secret_type in secrets[identifier]['types']: - secrets[identifier]['types'].append(secret_type) - secrets[identifier]['class'] = secret_class.get_prioritary(secrets[identifier]['class']).to_string() + if not secret.type in secrets[identifier]['types']: + secrets[identifier]['types'].append(secret.type) + secrets[identifier]['category'] = get_prioritary_verified_result(verified_result, VerifiedResult[secrets[identifier]['category']]).name else: - finding = {} - finding['secret'] = detection.secret_value - finding['filename'] = filename - finding['lines'] = {} - finding['lines'][detection.line_number] = get_line_content(filename, detection.line_number) - finding['types'] = [secret_type] - finding['class'] = secret_class.to_string() - secrets[identifier] = finding + secrets[identifier] = { + 'secrets': detection.secret_value, + 'filename': filename, + 'lines': { + detection.line_number: get_line_content(filename, detection.line_number) + }, + 'types': [ + secret.type + ], + 'category': verified_result.name + } output = [] for identifier in secrets: output.append(secrets[identifier]) - print_message(json.dumps(output, indent=4, sort_keys=True)) + return output - -def get_secrets_details_from_baseline( - baseline: str -) -> [(str, str, str, bool)]: - """ - :returns: Details of each secret present in the baseline file. - """ - for filename, secrets in baseline['results'].items(): - for secret in secrets: - yield filename, secret['type'], secret['hashed_secret'], secret['is_secret'] +def get_prioritary_verified_result( + result1: VerifiedResult, + result2: VerifiedResult +) -> VerifiedResult: + if result1.value > result2.value: + return result1 + else: + return result2 -def get_secret_class( + +def get_verified_result_from_boolean( is_secret: bool -) -> str: - """ - :returns: Secret class as string. - """ - return 'Unknown' if is_secret == None else 'True positive' if is_secret else 'False positive' +) -> VerifiedResult: + if is_secret == None: + return VerifiedResult.UNVERIFIED + elif is_secret: + return VerifiedResult.VERIFIED_TRUE + else: + return VerifiedResult.VERIFIED_FALSE def get_potential_secrets( diff --git a/detect_secrets/core/usage/audit.py b/detect_secrets/core/usage/audit.py index 8f55fab41..b9a7129b4 100644 --- a/detect_secrets/core/usage/audit.py +++ b/detect_secrets/core/usage/audit.py @@ -55,8 +55,9 @@ def _add_report_parser(parser: argparse.ArgumentParser) -> None: 'Displays a report with the secrets detected' ) ) - - parser.add_argument( + + report_parser = parser.add_mutually_exclusive_group() + report_parser.add_argument( '--only-real', action='store_true', help=( @@ -64,7 +65,7 @@ def _add_report_parser(parser: argparse.ArgumentParser) -> None: ) ) - parser.add_argument( + report_parser.add_argument( '--only-false', action='store_true', help=( diff --git a/detect_secrets/main.py b/detect_secrets/main.py index 00f24819c..2845b2f2d 100644 --- a/detect_secrets/main.py +++ b/detect_secrets/main.py @@ -126,7 +126,7 @@ def handle_audit_action(args: argparse.Namespace) -> None: class_to_print = audit.report.SecretClassToPrint.REAL_SECRET elif args.only_false: class_to_print = audit.report.SecretClassToPrint.FALSE_POSITIVE - audit.report.print_report(args.filename[0], class_to_print) + print(json.dumps(audit.report.generate_report(args.filename[0], class_to_print), indent=4, sort_keys=True)) else: # Starts interactive session. if args.diff: From c9633deb89c3f6a9556c883304bc4dadd270ee52 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Fri, 8 Jan 2021 14:29:20 +0100 Subject: [PATCH 03/22] Reporting feature: first test version --- detect_secrets/audit/report.py | 53 +++++++------ detect_secrets/core/usage/audit.py | 10 +-- detect_secrets/main.py | 8 +- tests/audit/report_test.py | 122 +++++++++++++++++++++++++++++ 4 files changed, 161 insertions(+), 32 deletions(-) create mode 100644 tests/audit/report_test.py diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index f4b328286..ece9bddd9 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -1,14 +1,14 @@ -import hashlib import codecs -import json +import hashlib from enum import Enum -from .common import get_baseline_from_file -from ..core.plugins.util import Plugin, get_mapping_from_secret_type_to_class -from ..core.scan import _get_lines_from_file, _scan_line -from ..core.potential_secret import PotentialSecret -from ..plugins.base import BasePlugin from ..constants import VerifiedResult +from ..core.plugins.util import get_mapping_from_secret_type_to_class +from ..core.plugins.util import Plugin +from ..core.potential_secret import PotentialSecret +from ..core.scan import _get_lines_from_file +from ..core.scan import _scan_line +from .common import get_baseline_from_file class SecretClassToPrint(Enum): @@ -19,41 +19,44 @@ def from_class(secret_class: VerifiedResult) -> Enum: if secret_class in [VerifiedResult.UNVERIFIED, VerifiedResult.VERIFIED_TRUE]: return SecretClassToPrint.REAL_SECRET else: - return SecretClassToPrint.FALSE_POSITIVE + return SecretClassToPrint.FALSE_POSITIVE def generate_report( baseline_file: str, - class_to_print: SecretClassToPrint = None + class_to_print: SecretClassToPrint = None, ) -> None: plugins = get_mapping_from_secret_type_to_class() secrets = {} for filename, secret in get_baseline_from_file(baseline_file): verified_result = get_verified_result_from_boolean(secret.is_secret) - if class_to_print != None and SecretClassToPrint.from_class(verified_result) != class_to_print: + if class_to_print is not None and SecretClassToPrint.from_class(verified_result) != class_to_print: # noqa: E501 continue try: detections = get_potential_secrets(filename, plugins[secret.type](), secret.secret_hash) - except: + except Exception: continue identifier = hashlib.sha512((secret.secret_hash + filename).encode('utf-8')).hexdigest() for detection in detections: if identifier in secrets: - secrets[identifier]['lines'][detection.line_number] = get_line_content(filename, detection.line_number) - if not secret.type in secrets[identifier]['types']: + secrets[identifier]['lines'][detection.line_number] = get_line_content(filename, detection.line_number) # noqa: E501 + if secret.type not in secrets[identifier]['types']: secrets[identifier]['types'].append(secret.type) - secrets[identifier]['category'] = get_prioritary_verified_result(verified_result, VerifiedResult[secrets[identifier]['category']]).name + secrets[identifier]['category'] = get_prioritary_verified_result( + verified_result, + VerifiedResult[secrets[identifier]['category']], + ).name else: secrets[identifier] = { 'secrets': detection.secret_value, 'filename': filename, 'lines': { - detection.line_number: get_line_content(filename, detection.line_number) + detection.line_number: get_line_content(filename, detection.line_number), }, 'types': [ - secret.type + secret.type, ], - 'category': verified_result.name + 'category': verified_result.name, } output = [] @@ -64,19 +67,19 @@ def generate_report( def get_prioritary_verified_result( - result1: VerifiedResult, - result2: VerifiedResult + result1: VerifiedResult, + result2: VerifiedResult, ) -> VerifiedResult: if result1.value > result2.value: return result1 - else: + else: return result2 def get_verified_result_from_boolean( - is_secret: bool + is_secret: bool, ) -> VerifiedResult: - if is_secret == None: + if is_secret is None: return VerifiedResult.UNVERIFIED elif is_secret: return VerifiedResult.VERIFIED_TRUE @@ -87,7 +90,7 @@ def get_verified_result_from_boolean( def get_potential_secrets( filename: str, plugin: Plugin, - secret_to_find: str + secret_to_find: str, ) -> [PotentialSecret]: """ :returns: List of PotentialSecrets detected by a specific plugin in a file. @@ -102,7 +105,7 @@ def get_potential_secrets( def get_line_content( filename: str, - line_number: int + line_number: int, ) -> str: """ :returns: Line content from filename by line number. @@ -110,4 +113,4 @@ def get_line_content( content = codecs.open(filename, encoding='utf-8').read() if not content: return None - return content.splitlines()[line_number - 1] \ No newline at end of file + return content.splitlines()[line_number - 1] diff --git a/detect_secrets/core/usage/audit.py b/detect_secrets/core/usage/audit.py index b9a7129b4..db4dccfc0 100644 --- a/detect_secrets/core/usage/audit.py +++ b/detect_secrets/core/usage/audit.py @@ -53,16 +53,16 @@ def _add_report_parser(parser: argparse.ArgumentParser) -> None: action='store_true', help=( 'Displays a report with the secrets detected' - ) + ), ) - + report_parser = parser.add_mutually_exclusive_group() report_parser.add_argument( '--only-real', action='store_true', help=( 'Only includes real secrets in the report' - ) + ), ) report_parser.add_argument( @@ -70,12 +70,10 @@ def _add_report_parser(parser: argparse.ArgumentParser) -> None: action='store_true', help=( 'Only includes false positives in the report' - ) + ), ) - - def _add_statistics_module(parent: argparse.ArgumentParser) -> None: parser = parent.add_argument_group( title='analytics', diff --git a/detect_secrets/main.py b/detect_secrets/main.py index 2845b2f2d..34c8c23fb 100644 --- a/detect_secrets/main.py +++ b/detect_secrets/main.py @@ -126,7 +126,13 @@ def handle_audit_action(args: argparse.Namespace) -> None: class_to_print = audit.report.SecretClassToPrint.REAL_SECRET elif args.only_false: class_to_print = audit.report.SecretClassToPrint.FALSE_POSITIVE - print(json.dumps(audit.report.generate_report(args.filename[0], class_to_print), indent=4, sort_keys=True)) + print( + json.dumps( + audit.report.generate_report(args.filename[0], class_to_print), + indent=4, + sort_keys=True, + ), + ) else: # Starts interactive session. if args.diff: diff --git a/tests/audit/report_test.py b/tests/audit/report_test.py new file mode 100644 index 000000000..308071565 --- /dev/null +++ b/tests/audit/report_test.py @@ -0,0 +1,122 @@ +import os +import random +import string +import tempfile + +import pytest + +from detect_secrets.audit.report import generate_report +from detect_secrets.audit.report import SecretClassToPrint +from detect_secrets.constants import VerifiedResult +from detect_secrets.core import baseline +from detect_secrets.core.secrets_collection import SecretsCollection +from detect_secrets.plugins.basic_auth import BasicAuthDetector +from detect_secrets.plugins.jwt import JwtTokenDetector +from testing.factories import potential_secret_factory as original_potential_secret_factory + + +CREATED_FILES = [] + + +@pytest.mark.parametrize( + 'class_to_print, expected_real, expected_false', + [ + (None, 2, 2), + (SecretClassToPrint.REAL_SECRET, 2, 0), + (SecretClassToPrint.FALSE_POSITIVE, 0, 3), + ], +) +def test_generate_report(class_to_print, expected_real, expected_false): + filename = baseline_file() + output = generate_report(filename, class_to_print) + real, false = count_results(output) + assert real == expected_real + assert false == expected_false + delete_all_temporal_files() + + +def count_results(data): + real_secrets = 0 + false_secrets = 0 + for secret in data: + if SecretClassToPrint.from_class(VerifiedResult[secret['category']]) == SecretClassToPrint.REAL_SECRET: # noqa: E501 + real_secrets += 1 + else: + false_secrets += 1 + return real_secrets, false_secrets + + +def baseline_file(): + # Create our own SecretsCollection manually, so that we have fine-tuned control. + url_format = 'http://username:{}@www.example.com/auth' + first_secret = 'value1' + second_secret = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ' # noqa: E501 + random_secret = ''.join(random.choice(string.ascii_letters) for _ in range(8)) + file_content = 'url = ' + url_format.format(first_secret) \ + + '\nexample = ' + url_format.format(random_secret) \ + + '\nlink = ' + url_format.format(first_secret) + first_file = create_file_with_content(file_content) + file_content = 'url = ' + url_format.format(second_secret) \ + + '\nexample = ' + url_format.format(random_secret) + second_file = create_file_with_content(file_content) + secrets = SecretsCollection() + secrets[first_file] = { + original_potential_secret_factory( + type=BasicAuthDetector.secret_type, + secret=first_secret, + is_secret=True, + line_number=1, + ), + original_potential_secret_factory( + type=BasicAuthDetector.secret_type, + secret=random_secret, + is_secret=False, + line_number=2, + ), + original_potential_secret_factory( + type=BasicAuthDetector.secret_type, + secret=first_secret, + is_secret=True, + line_number=3, + ), + } + secrets[second_file] = { + original_potential_secret_factory( + type=JwtTokenDetector.secret_type, + secret=second_secret, + is_secret=True, + line_number=1, + ), + original_potential_secret_factory( + type=BasicAuthDetector.secret_type, + secret=second_secret, + is_secret=False, + line_number=1, + ), + original_potential_secret_factory( + type=BasicAuthDetector.secret_type, + secret=random_secret, + is_secret=False, + line_number=2, + ), + } + + f = tempfile.NamedTemporaryFile(delete=False) + baseline.save_to_file(secrets, f.name) + f.seek(0) + CREATED_FILES.append(f.name) + return f.name + + +def create_file_with_content(file_content): + f = tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8', delete=False) + f.write(file_content) + f.seek(0) + CREATED_FILES.append(f.name) + return f.name + + +def delete_all_temporal_files(): + for file in CREATED_FILES: + if os.path.exists(file): + os.remove(file) From 46d0adb9aff8c90f3fecefbcc7c9a411b8802d07 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Tue, 2 Feb 2021 09:56:59 +0100 Subject: [PATCH 04/22] Reporting feature optimization --- detect_secrets/audit/common.py | 38 +++++++++++++++++++++++++++---- detect_secrets/audit/report.py | 41 +++++----------------------------- tests/audit/report_test.py | 6 +++++ 3 files changed, 45 insertions(+), 40 deletions(-) diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py index 9413dc475..abf85686e 100644 --- a/detect_secrets/audit/common.py +++ b/detect_secrets/audit/common.py @@ -41,10 +41,7 @@ def get_raw_secret_from_file(secret: PotentialSecret) -> str: :raises: SecretNotFoundOnSpecifiedLineError """ plugin = cast(BasePlugin, plugins.initialize.from_secret_type(secret.type)) - try: - target_line = open_file(secret.filename)[secret.line_number - 1] - except IndexError: - raise SecretNotFoundOnSpecifiedLineError(secret.line_number) + target_line = get_raw_secret_line_from_file(secret) function = plugin.__class__.analyze_line if not hasattr(function, 'injectable_variables'): @@ -69,6 +66,39 @@ def get_raw_secret_from_file(secret: PotentialSecret) -> str: raise SecretNotFoundOnSpecifiedLineError(secret.line_number) +def get_all_raw_secrets_from_file(secret: PotentialSecret) -> [PotentialSecret]: + plugin = cast(BasePlugin, plugins.initialize.from_secret_type(secret.type)) + lines = open_file(secret.filename) + + function = plugin.__class__.analyze_line + if not hasattr(function, 'injectable_variables'): + function.injectable_variables = set( # type: ignore + get_injectable_variables(plugin.analyze_line), + ) + function.path = f'{plugin.__class__.__name__}.analyze_line' # type: ignore + + for line_number, line in enumerate(lines): + identified_secrets = inject_variables_into_function( + cast(SelfAwareCallable, function), + self=plugin, + filename=secret.filename, + line=line, + line_number=line_number + 1, # TODO: this will be optional + enable_eager_search=True, + ) + + for identified_secret in (identified_secrets or []): + if identified_secret == secret: + yield identified_secret + + +def get_raw_secret_line_from_file(secret: PotentialSecret) -> str: + try: + return open_file(secret.filename)[secret.line_number - 1] + except IndexError: + raise SecretNotFoundOnSpecifiedLineError(secret.line_number) + + @lru_cache(maxsize=1) def open_file(filename: str) -> List[str]: """ diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index ece9bddd9..2b2d73303 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -8,7 +8,9 @@ from ..core.potential_secret import PotentialSecret from ..core.scan import _get_lines_from_file from ..core.scan import _scan_line +from .common import get_all_raw_secrets_from_file from .common import get_baseline_from_file +from .common import get_raw_secret_line_from_file class SecretClassToPrint(Enum): @@ -26,20 +28,16 @@ def generate_report( baseline_file: str, class_to_print: SecretClassToPrint = None, ) -> None: - plugins = get_mapping_from_secret_type_to_class() secrets = {} for filename, secret in get_baseline_from_file(baseline_file): verified_result = get_verified_result_from_boolean(secret.is_secret) if class_to_print is not None and SecretClassToPrint.from_class(verified_result) != class_to_print: # noqa: E501 continue - try: - detections = get_potential_secrets(filename, plugins[secret.type](), secret.secret_hash) - except Exception: - continue + detections = get_all_raw_secrets_from_file(secret) identifier = hashlib.sha512((secret.secret_hash + filename).encode('utf-8')).hexdigest() for detection in detections: if identifier in secrets: - secrets[identifier]['lines'][detection.line_number] = get_line_content(filename, detection.line_number) # noqa: E501 + secrets[identifier]['lines'][detection.line_number] = get_raw_secret_line_from_file(detection) if secret.type not in secrets[identifier]['types']: secrets[identifier]['types'].append(secret.type) secrets[identifier]['category'] = get_prioritary_verified_result( @@ -51,7 +49,7 @@ def generate_report( 'secrets': detection.secret_value, 'filename': filename, 'lines': { - detection.line_number: get_line_content(filename, detection.line_number), + detection.line_number: get_raw_secret_line_from_file(detection), }, 'types': [ secret.type, @@ -85,32 +83,3 @@ def get_verified_result_from_boolean( return VerifiedResult.VERIFIED_TRUE else: return VerifiedResult.VERIFIED_FALSE - - -def get_potential_secrets( - filename: str, - plugin: Plugin, - secret_to_find: str, -) -> [PotentialSecret]: - """ - :returns: List of PotentialSecrets detected by a specific plugin in a file. - """ - for lines in _get_lines_from_file(filename): - for line_number, line in list(enumerate(lines, 1)): - secrets = _scan_line(plugin, filename, line, line_number) - for secret in secrets: - if secret.secret_hash == secret_to_find: - yield secret - - -def get_line_content( - filename: str, - line_number: int, -) -> str: - """ - :returns: Line content from filename by line number. - """ - content = codecs.open(filename, encoding='utf-8').read() - if not content: - return None - return content.splitlines()[line_number - 1] diff --git a/tests/audit/report_test.py b/tests/audit/report_test.py index 308071565..887215d4e 100644 --- a/tests/audit/report_test.py +++ b/tests/audit/report_test.py @@ -66,18 +66,21 @@ def baseline_file(): secret=first_secret, is_secret=True, line_number=1, + filename=first_file, ), original_potential_secret_factory( type=BasicAuthDetector.secret_type, secret=random_secret, is_secret=False, line_number=2, + filename=first_file, ), original_potential_secret_factory( type=BasicAuthDetector.secret_type, secret=first_secret, is_secret=True, line_number=3, + filename=first_file, ), } secrets[second_file] = { @@ -86,18 +89,21 @@ def baseline_file(): secret=second_secret, is_secret=True, line_number=1, + filename=second_file, ), original_potential_secret_factory( type=BasicAuthDetector.secret_type, secret=second_secret, is_secret=False, line_number=1, + filename=second_file, ), original_potential_secret_factory( type=BasicAuthDetector.secret_type, secret=random_secret, is_secret=False, line_number=2, + filename=second_file, ), } From f2e2421fcd63ccd91e8b34a35c799c556a2a345c Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Tue, 2 Feb 2021 10:01:41 +0100 Subject: [PATCH 05/22] Code correction --- detect_secrets/audit/report.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index 2b2d73303..9b79d2d24 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -1,13 +1,7 @@ -import codecs import hashlib from enum import Enum from ..constants import VerifiedResult -from ..core.plugins.util import get_mapping_from_secret_type_to_class -from ..core.plugins.util import Plugin -from ..core.potential_secret import PotentialSecret -from ..core.scan import _get_lines_from_file -from ..core.scan import _scan_line from .common import get_all_raw_secrets_from_file from .common import get_baseline_from_file from .common import get_raw_secret_line_from_file @@ -37,7 +31,7 @@ def generate_report( identifier = hashlib.sha512((secret.secret_hash + filename).encode('utf-8')).hexdigest() for detection in detections: if identifier in secrets: - secrets[identifier]['lines'][detection.line_number] = get_raw_secret_line_from_file(detection) + secrets[identifier]['lines'][detection.line_number] = get_raw_secret_line_from_file(detection) # noqa: E501 if secret.type not in secrets[identifier]['types']: secrets[identifier]['types'].append(secret.type) secrets[identifier]['category'] = get_prioritary_verified_result( From 74614cbcd3de2514c219df2942662863710a4f89 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Tue, 2 Feb 2021 11:14:07 +0100 Subject: [PATCH 06/22] Reporting feature documentation --- README.md | 29 +++++++----- docs/audit.md | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index b4b071aec..12bda5c52 100644 --- a/README.md +++ b/README.md @@ -297,29 +297,34 @@ const secret = "hunter2"; ```bash $ detect-secrets audit --help -usage: detect-secrets audit [-h] [--diff] [--stats] [--json] - filename [filename ...] +usage: detect-secrets audit [-h] [--diff] [--stats] + [--report] [--only-real | --only-false] + [--json] + filename [filename ...] Auditing a baseline allows analysts to label results, and optimize plugins for the highest signal-to-noise ratio for their environment. positional arguments: - filename Audit a given baseline file to distinguish the difference - between false and true positives. + filename Audit a given baseline file to distinguish the difference + between false and true positives. optional arguments: - -h, --help show this help message and exit - --diff Allows the comparison of two baseline files, in order to - effectively distinguish the difference between various plugin - configurations. - --stats Displays the results of an interactive auditing session which - have been saved to a baseline file. + -h, --help show this help message and exit + --diff Allows the comparison of two baseline files, in order to + effectively distinguish the difference between various plugin + configurations. + --stats Displays the results of an interactive auditing session which + have been saved to a baseline file. + --report Displays a report with the secrets detected + --only-real Only includes real secrets in the report + --only-false Only includes false positives in the report analytics: - Quantify the success of your plugins based on the labelled results in your + Quantify the success of your plugins based on the labelled results in your baseline. To be used with the statisitcs mode (--stats). - --json Outputs results in a machine-readable format. + --json Outputs results in a machine-readable format. ``` ## Configuration diff --git a/docs/audit.md b/docs/audit.md index 687e5eca2..5fa34bbbc 100644 --- a/docs/audit.md +++ b/docs/audit.md @@ -140,3 +140,126 @@ There are times you want to extract the raw secret values to run further analysi so with the `--raw` flag. TODO: Example when this feature is written up. + +## Report generation + +Maybe, you need to generate a full report with all the detect-secrets findings. You can generate +one with the `--report` flag: + +'''bash +$ detect-secrets audit --report .secret.baseline +[ + { + "category": "VERIFIED_TRUE", + "filename": "test.properties", + "lines": { + "1": "secret=value", + "6": "password=value" + }, + "secrets": "value", + "types": [ + "Secret Keyword" + ] + }, + { + "category": "UNVERIFIED", + "filename": "test.properties", + "lines": { + "2": "password=changeit", + "5": "password=changeit" + }, + "secrets": "changeit", + "types": [ + "Secret Keyword" + ] + }, + { + "category": "VERIFIED_TRUE", + "filename": "test.properties", + "lines": { + "3": "password=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.", + "4": "test=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ." + }, + "secrets": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.", + "types": [ + "Secret Keyword", + "JSON Web Token" + ] + }, + { + "category": "VERIFIED_FALSE", + "filename": "test.properties", + "lines": { + "7": "password=faketest" + }, + "secrets": "faketest", + "types": [ + "Secret Keyword" + ] + } +] +''' + +You can also select only the real secrets with the option `--only-real`: + +'''bash +$ detect-secrets audit --report --only-real .secret.baseline +[ + { + "category": "VERIFIED_TRUE", + "filename": "test.properties", + "lines": { + "1": "secret=value", + "6": "password=value" + }, + "secrets": "value", + "types": [ + "Secret Keyword" + ] + }, + { + "category": "UNVERIFIED", + "filename": "test.properties", + "lines": { + "2": "password=changeit", + "5": "password=changeit" + }, + "secrets": "changeit", + "types": [ + "Secret Keyword" + ] + }, + { + "category": "VERIFIED_TRUE", + "filename": "test.properties", + "lines": { + "3": "password=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.", + "4": "test=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ." + }, + "secrets": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.", + "types": [ + "JSON Web Token", + "Secret Keyword" + ] + } +] +''' + +Or include only the false positives with `--only-false`: + +'''bash +$ detect-secrets audit --report --only-false .secret.baseline +[ + { + "category": "VERIFIED_FALSE", + "filename": "test.properties", + "lines": { + "7": "password=faketest" + }, + "secrets": "faketest", + "types": [ + "Secret Keyword" + ] + } +] +''' \ No newline at end of file From 047325785ad755dcc2c4a9ec19de4672c383be00 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Tue, 2 Feb 2021 12:15:24 +0100 Subject: [PATCH 07/22] Documentation corrections --- README.md | 16 ++++++++-------- docs/audit.md | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index dad4f45cf..8804f676e 100644 --- a/README.md +++ b/README.md @@ -357,31 +357,31 @@ const secret = "hunter2"; ```bash $ detect-secrets audit --help -usage: detect-secrets audit [-h] [--diff] [--stats] - [--report] [--only-real | --only-false] - [--json] +usage: detect-secrets audit [-h] [--diff] [--stats] + [--report] [--only-real | --only-false] + [--json] filename [filename ...] Auditing a baseline allows analysts to label results, and optimize plugins for the highest signal-to-noise ratio for their environment. positional arguments: - filename Audit a given baseline file to distinguish the difference + filename Audit a given baseline file to distinguish the difference between false and true positives. optional arguments: -h, --help show this help message and exit - --diff Allows the comparison of two baseline files, in order to - effectively distinguish the difference between various plugin + --diff Allows the comparison of two baseline files, in order to + effectively distinguish the difference between various plugin configurations. - --stats Displays the results of an interactive auditing session which + --stats Displays the results of an interactive auditing session which have been saved to a baseline file. --report Displays a report with the secrets detected --only-real Only includes real secrets in the report --only-false Only includes false positives in the report analytics: - Quantify the success of your plugins based on the labelled results in your + Quantify the success of your plugins based on the labelled results in your baseline. To be used with the statisitcs mode (--stats). --json Outputs results in a machine-readable format. diff --git a/docs/audit.md b/docs/audit.md index 5fa34bbbc..ae6e43645 100644 --- a/docs/audit.md +++ b/docs/audit.md @@ -262,4 +262,4 @@ $ detect-secrets audit --report --only-false .secret.baseline ] } ] -''' \ No newline at end of file +''' From 45ec64147e3e66ed662897d32da07d3dfcfe4476 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Tue, 2 Feb 2021 12:19:51 +0100 Subject: [PATCH 08/22] Pre-commit errors fix --- detect_secrets/audit/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py index f36ccf134..93b72485e 100644 --- a/detect_secrets/audit/common.py +++ b/detect_secrets/audit/common.py @@ -126,7 +126,7 @@ def get_all_secrets_from_file( if identified_secret == secret: all_secrets.append(identified_secret) - if len(all_secrets) == 0 and is_first_time_opening_file and not line_getter.use_eager_transformers: + if len(all_secrets) == 0 and is_first_time_opening_file and not line_getter.use_eager_transformers: # noqa: E501 line_getter.use_eager_transformers = True else: return all_secrets From 8f45d254e08c3223e6da72c05de4a7d163510d45 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Thu, 25 Feb 2021 14:59:41 +0100 Subject: [PATCH 09/22] Reporting feature optimization --- detect_secrets/audit/common.py | 72 ++++++++++++------------------ detect_secrets/audit/report.py | 47 +++++++------------ detect_secrets/constants.py | 11 +++++ detect_secrets/core/usage/audit.py | 10 ++++- 4 files changed, 64 insertions(+), 76 deletions(-) diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py index 93b72485e..bb28d797c 100644 --- a/detect_secrets/audit/common.py +++ b/detect_secrets/audit/common.py @@ -53,64 +53,44 @@ def get_raw_secret_from_file( :raises: SecretNotFoundOnSpecifiedLineError :raises: NoLineNumberError """ - plugin = cast(BasePlugin, plugins.initialize.from_secret_type(secret.type)) - line_getter = line_getter_factory(secret.filename) - is_first_time_opening_file = not line_getter.has_cached_lines - while True: - if not secret.line_number: - raise NoLineNumberError - - try: - target_line = line_getter.lines[secret.line_number - 1] - except IndexError: - raise SecretNotFoundOnSpecifiedLineError(secret.line_number) - - identified_secrets = call_function_with_arguments( - plugin.analyze_line, - filename=secret.filename, - line=target_line, - line_number=secret.line_number, - - # We enable eager search, because we *know* there's a secret here -- the baseline - # flagged it after all. - enable_eager_search=True, - ) - - for identified_secret in (identified_secrets or []): - if identified_secret == secret: - return cast(str, identified_secret.secret_value) - - # No secret found -- maybe it's due to invalid file transformation. - # However, this only applies to the first execution of the file, since we want a - # consistent transformed file. - # - # NOTE: This is defensive coding. If we assume that this is only run on valid baselines, - # then the baseline wouldn't record secrets that were both found with and without an eager - # transformer, in the same file. - if is_first_time_opening_file and not line_getter.use_eager_transformers: - line_getter.use_eager_transformers = True - else: - break + if not secret.line_number: + raise NoLineNumberError + + for item in get_raw_secrets_from_file(secret, line_getter_factory): + return item.secret_value raise SecretNotFoundOnSpecifiedLineError(secret.line_number) -def get_all_secrets_from_file( +def get_raw_secrets_from_file( secret: PotentialSecret, line_getter_factory: Callable[[str], 'LineGetter'] = open_file, ) -> [PotentialSecret]: """ We're analyzing the contents straight from the baseline, and therefore, we don't know the secret value (by design). However, we have secret hashes, filenames, and how we detected - it was a secret in the first place, so we can reverse-engineer it. This method searchs all - the ocurrences of one secret in one file using one plugin. + it was a secret in the first place, so we can reverse-engineer it. This method searches all + the occurrences of one secret in one file using one plugin. + + :raises: SecretNotFoundOnSpecifiedLineError + :raises: NoLineNumberError """ plugin = cast(BasePlugin, plugins.initialize.from_secret_type(secret.type)) line_getter = line_getter_factory(secret.filename) is_first_time_opening_file = not line_getter.has_cached_lines all_secrets = [] while True: - for line_number, line in enumerate(line_getter.lines): + if secret.line_number: + try: + lines_to_scan = [line_getter.lines[secret.line_number - 1]] + line_numbers = [secret.line_number] + except IndexError: + raise SecretNotFoundOnSpecifiedLineError(secret.line_number) + else: + lines_to_scan = line_getter.lines + line_numbers = range(len(lines_to_scan)) + + for line_number, line in zip(line_numbers, lines_to_scan): identified_secrets = call_function_with_arguments( plugin.analyze_line, filename=secret.filename, @@ -119,14 +99,18 @@ def get_all_secrets_from_file( # We enable eager search, because we *know* there's a secret here -- the baseline # flagged it after all. - enable_eager_search=True, + enable_eager_search=bool(secret.line_number), ) for identified_secret in (identified_secrets or []): if identified_secret == secret: all_secrets.append(identified_secret) - if len(all_secrets) == 0 and is_first_time_opening_file and not line_getter.use_eager_transformers: # noqa: E501 + if ( + len(all_secrets) == 0 and + is_first_time_opening_file and + not line_getter.use_eager_transformers + ): line_getter.use_eager_transformers = True else: return all_secrets diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index af8c3eefe..25dc11917 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -1,10 +1,9 @@ -import hashlib from enum import Enum from typing import Callable from ..constants import VerifiedResult -from .common import get_all_secrets_from_file from .common import get_baseline_from_file +from .common import get_raw_secrets_from_file from .common import LineGetter from .common import open_file @@ -27,23 +26,26 @@ def generate_report( ) -> None: secrets = {} for filename, secret in get_baseline_from_file(baseline_file): - verified_result = get_verified_result_from_boolean(secret.is_secret) - if class_to_print is not None and SecretClassToPrint.from_class(verified_result) != class_to_print: # noqa: E501 + verified_result = VerifiedResult.from_secret(secret) + if ( + class_to_print is not None and + SecretClassToPrint.from_class(verified_result) != class_to_print + ): continue - detections = get_all_secrets_from_file(secret) - identifier = hashlib.sha512((secret.secret_hash + filename).encode('utf-8')).hexdigest() + secret.line_number = 0 + detections = get_raw_secrets_from_file(secret) line_getter = line_getter_factory(filename) for detection in detections: - if identifier in secrets: - secrets[identifier]['lines'][detection.line_number] = line_getter.lines[detection.line_number - 1] # noqa: E501 - if secret.type not in secrets[identifier]['types']: - secrets[identifier]['types'].append(secret.type) - secrets[identifier]['category'] = get_prioritary_verified_result( + if (secret.secret_hash, filename) in secrets: + secrets[(secret.secret_hash, filename)]['lines'][detection.line_number] = line_getter.lines[detection.line_number - 1] # noqa: E501 + if secret.type not in secrets[(secret.secret_hash, filename)]['types']: + secrets[(secret.secret_hash, filename)]['types'].append(secret.type) + secrets[(secret.secret_hash, filename)]['category'] = get_prioritized_verified_result( # noqa: E501 verified_result, - VerifiedResult[secrets[identifier]['category']], + VerifiedResult[secrets[(secret.secret_hash, filename)]['category']], ).name else: - secrets[identifier] = { + secrets[(secret.secret_hash, filename)] = { 'secrets': detection.secret_value, 'filename': filename, 'lines': { @@ -55,14 +57,10 @@ def generate_report( 'category': verified_result.name, } - output = [] - for identifier in secrets: - output.append(secrets[identifier]) + return list(secrets.values()) - return output - -def get_prioritary_verified_result( +def get_prioritized_verified_result( result1: VerifiedResult, result2: VerifiedResult, ) -> VerifiedResult: @@ -70,14 +68,3 @@ def get_prioritary_verified_result( return result1 else: return result2 - - -def get_verified_result_from_boolean( - is_secret: bool, -) -> VerifiedResult: - if is_secret is None: - return VerifiedResult.UNVERIFIED - elif is_secret: - return VerifiedResult.VERIFIED_TRUE - else: - return VerifiedResult.VERIFIED_FALSE diff --git a/detect_secrets/constants.py b/detect_secrets/constants.py index f9763eb59..7c84d1f8a 100644 --- a/detect_secrets/constants.py +++ b/detect_secrets/constants.py @@ -1,7 +1,18 @@ from enum import Enum +from .core.potential_secret import PotentialSecret + class VerifiedResult(Enum): VERIFIED_FALSE = 1 UNVERIFIED = 2 VERIFIED_TRUE = 3 + + @staticmethod + def from_secret(secret: PotentialSecret) -> 'VerifiedResult': + if secret.is_secret is None: + return VerifiedResult.UNVERIFIED + elif secret.is_secret: + return VerifiedResult.VERIFIED_TRUE + else: + return VerifiedResult.VERIFIED_FALSE diff --git a/detect_secrets/core/usage/audit.py b/detect_secrets/core/usage/audit.py index db4dccfc0..9cc784f92 100644 --- a/detect_secrets/core/usage/audit.py +++ b/detect_secrets/core/usage/audit.py @@ -21,7 +21,7 @@ def add_audit_action(parent: argparse._SubParsersAction) -> argparse.ArgumentPar ) _add_mode_parser(parser) - _add_report_parser(parser) + _add_report_module(parser) _add_statistics_module(parser) return parser @@ -47,7 +47,13 @@ def _add_mode_parser(parser: argparse.ArgumentParser) -> None: ) -def _add_report_parser(parser: argparse.ArgumentParser) -> None: +def _add_report_module(parent: argparse.ArgumentParser) -> None: + parser = parent.add_argument_group( + title='reporting', + description=( + 'Display a summary with all the findings and the made decisions' + ), + ) parser.add_argument( '--report', action='store_true', From efd9cdaa7da2062281b30bed25afe8cdc80d7a3f Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Thu, 25 Feb 2021 20:01:36 +0100 Subject: [PATCH 10/22] Reporting test correction --- tests/audit/report_test.py | 260 ++++++++++++++++++++++++------------- 1 file changed, 173 insertions(+), 87 deletions(-) diff --git a/tests/audit/report_test.py b/tests/audit/report_test.py index 887215d4e..d544d21e1 100644 --- a/tests/audit/report_test.py +++ b/tests/audit/report_test.py @@ -1,7 +1,8 @@ -import os import random import string import tempfile +import textwrap +from contextlib import contextmanager import pytest @@ -12,27 +13,144 @@ from detect_secrets.core.secrets_collection import SecretsCollection from detect_secrets.plugins.basic_auth import BasicAuthDetector from detect_secrets.plugins.jwt import JwtTokenDetector -from testing.factories import potential_secret_factory as original_potential_secret_factory +from detect_secrets.settings import transient_settings -CREATED_FILES = [] +url_format = 'http://username:{}@www.example.com/auth' +first_secret = 'value1' +second_secret = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ' # noqa: E501 +random_secret = ''.join(random.choice(string.ascii_letters) for _ in range(8)) @pytest.mark.parametrize( - 'class_to_print, expected_real, expected_false', + 'class_to_print, expected_real, expected_false, expected_output', [ - (None, 2, 2), - (SecretClassToPrint.REAL_SECRET, 2, 0), - (SecretClassToPrint.FALSE_POSITIVE, 0, 3), + ( + None, 3, 1, [ + { + 'category': 'VERIFIED_TRUE', + 'lines': { + 1: 'url = {}'.format(url_format.format(first_secret)), + 3: 'link = {}'.format(url_format.format(first_secret)), + }, + 'secrets': first_secret, + 'types': [ + BasicAuthDetector.secret_type, + ], + }, + { + 'category': 'UNVERIFIED', + 'lines': { + 2: 'example = {}'.format(url_format.format(random_secret)), + }, + 'secrets': random_secret, + 'types': [ + BasicAuthDetector.secret_type, + ], + }, + { + 'category': 'VERIFIED_TRUE', + 'lines': { + 1: 'url = {}'.format(url_format.format(second_secret)), + }, + 'secrets': second_secret, + 'types': [ + BasicAuthDetector.secret_type, + JwtTokenDetector.secret_type, + ], + }, + { + 'category': 'VERIFIED_FALSE', + 'lines': { + 2: 'example = {}'.format(url_format.format(random_secret)), + }, + 'secrets': random_secret, + 'types': [ + BasicAuthDetector.secret_type, + ], + }, + ], + ), + ( + SecretClassToPrint.REAL_SECRET, 3, 0, [ + { + 'category': 'VERIFIED_TRUE', + 'lines': { + 1: 'url = {}'.format(url_format.format(first_secret)), + 3: 'link = {}'.format(url_format.format(first_secret)), + }, + 'secrets': first_secret, + 'types': [ + BasicAuthDetector.secret_type, + ], + }, + { + 'category': 'UNVERIFIED', + 'lines': { + 2: 'example = {}'.format(url_format.format(random_secret)), + }, + 'secrets': random_secret, + 'types': [ + BasicAuthDetector.secret_type, + ], + }, + { + 'category': 'VERIFIED_TRUE', + 'lines': { + 1: 'url = {}'.format(url_format.format(second_secret)), + }, + 'secrets': second_secret, + 'types': [ + JwtTokenDetector.secret_type, + ], + }, + ], + ), + ( + SecretClassToPrint.FALSE_POSITIVE, 0, 2, [ + { + 'category': 'VERIFIED_FALSE', + 'lines': { + 1: 'url = {}'.format(url_format.format(second_secret)), + }, + 'secrets': second_secret, + 'types': [ + BasicAuthDetector.secret_type, + ], + }, + { + 'category': 'VERIFIED_FALSE', + 'lines': { + 2: 'example = {}'.format(url_format.format(random_secret)), + }, + 'secrets': random_secret, + 'types': [ + BasicAuthDetector.secret_type, + ], + }, + ], + ), ], ) -def test_generate_report(class_to_print, expected_real, expected_false): - filename = baseline_file() - output = generate_report(filename, class_to_print) +def test_generate_report( + class_to_print, + expected_real, + expected_false, + expected_output, + baseline_file, +): + output = generate_report(baseline_file, class_to_print) real, false = count_results(output) assert real == expected_real assert false == expected_false - delete_all_temporal_files() + for expected in expected_output: + found = False + for item in output: + if expected['secrets'] == item['secrets'] and expected['category'] == item['category']: + for key in expected.keys(): + assert item[key] == expected[key] + found = True + assert found def count_results(data): @@ -46,83 +164,51 @@ def count_results(data): return real_secrets, false_secrets -def baseline_file(): - # Create our own SecretsCollection manually, so that we have fine-tuned control. - url_format = 'http://username:{}@www.example.com/auth' - first_secret = 'value1' - second_secret = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ' # noqa: E501 - random_secret = ''.join(random.choice(string.ascii_letters) for _ in range(8)) - file_content = 'url = ' + url_format.format(first_secret) \ - + '\nexample = ' + url_format.format(random_secret) \ - + '\nlink = ' + url_format.format(first_secret) - first_file = create_file_with_content(file_content) - file_content = 'url = ' + url_format.format(second_secret) \ - + '\nexample = ' + url_format.format(random_secret) - second_file = create_file_with_content(file_content) - secrets = SecretsCollection() - secrets[first_file] = { - original_potential_secret_factory( - type=BasicAuthDetector.secret_type, - secret=first_secret, - is_secret=True, - line_number=1, - filename=first_file, - ), - original_potential_secret_factory( - type=BasicAuthDetector.secret_type, - secret=random_secret, - is_secret=False, - line_number=2, - filename=first_file, - ), - original_potential_secret_factory( - type=BasicAuthDetector.secret_type, - secret=first_secret, - is_secret=True, - line_number=3, - filename=first_file, - ), - } - secrets[second_file] = { - original_potential_secret_factory( - type=JwtTokenDetector.secret_type, - secret=second_secret, - is_secret=True, - line_number=1, - filename=second_file, - ), - original_potential_secret_factory( - type=BasicAuthDetector.secret_type, - secret=second_secret, - is_secret=False, - line_number=1, - filename=second_file, - ), - original_potential_secret_factory( - type=BasicAuthDetector.secret_type, - secret=random_secret, - is_secret=False, - line_number=2, - filename=second_file, - ), - } - - f = tempfile.NamedTemporaryFile(delete=False) - baseline.save_to_file(secrets, f.name) - f.seek(0) - CREATED_FILES.append(f.name) - return f.name +@contextmanager +def create_file_with_content(content): + with tempfile.NamedTemporaryFile() as f: + f.write(content.encode()) + f.seek(0) + yield f.name -def create_file_with_content(file_content): - f = tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8', delete=False) - f.write(file_content) - f.seek(0) - CREATED_FILES.append(f.name) - return f.name +@pytest.fixture +def baseline_file(): + # Create our own SecretsCollection manually, so that we have fine-tuned control. + first_content = textwrap.dedent(f""" + url = {url_format.format(first_secret)} + example = {url_format.format(random_secret)} + link = {url_format.format(first_secret)} + """)[1:] + second_content = textwrap.dedent(f""" + url = {url_format.format(second_secret)} + example = {url_format.format(random_secret)} + """)[1:] + with create_file_with_content(first_content) as first_file, \ + create_file_with_content(second_content) as second_file, \ + tempfile.NamedTemporaryFile() as baseline_file, \ + transient_settings({ + 'plugins_used': [ + {'name': 'BasicAuthDetector'}, + {'name': 'JwtTokenDetector'}, -def delete_all_temporal_files(): - for file in CREATED_FILES: - if os.path.exists(file): - os.remove(file) + ], + }): + secrets = SecretsCollection() + secrets.scan_file(first_file) + secrets.scan_file(second_file) + labels = { + (first_file, BasicAuthDetector.secret_type, 1): True, + (first_file, BasicAuthDetector.secret_type, 2): None, + (first_file, BasicAuthDetector.secret_type, 3): True, + (second_file, JwtTokenDetector.secret_type, 1): True, + (second_file, BasicAuthDetector.secret_type, 1): False, + (second_file, BasicAuthDetector.secret_type, 2): False, + } + for item in secrets: + _, secret = item + secret.is_secret = labels[(secret.filename, secret.type, secret.line_number)] + baseline.save_to_file(secrets, baseline_file.name) + baseline_file.seek(0) + yield baseline_file.name From d1430e1796deea07c656ed91c8b49bb1858ce81a Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Thu, 25 Feb 2021 20:08:18 +0100 Subject: [PATCH 11/22] Documentation upgrade --- README.md | 4 ++++ detect_secrets/core/usage/audit.py | 18 ++++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 8804f676e..9d2aa0356 100644 --- a/README.md +++ b/README.md @@ -377,6 +377,10 @@ optional arguments: --stats Displays the results of an interactive auditing session which have been saved to a baseline file. --report Displays a report with the secrets detected + +reporting: + Display a summary with all the findings and the made decisions. To be used with the report mode (--report). + --only-real Only includes real secrets in the report --only-false Only includes false positives in the report diff --git a/detect_secrets/core/usage/audit.py b/detect_secrets/core/usage/audit.py index 9cc784f92..7d864b9ef 100644 --- a/detect_secrets/core/usage/audit.py +++ b/detect_secrets/core/usage/audit.py @@ -46,14 +46,6 @@ def _add_mode_parser(parser: argparse.ArgumentParser) -> None: ), ) - -def _add_report_module(parent: argparse.ArgumentParser) -> None: - parser = parent.add_argument_group( - title='reporting', - description=( - 'Display a summary with all the findings and the made decisions' - ), - ) parser.add_argument( '--report', action='store_true', @@ -62,6 +54,16 @@ def _add_report_module(parent: argparse.ArgumentParser) -> None: ), ) + +def _add_report_module(parent: argparse.ArgumentParser) -> None: + parser = parent.add_argument_group( + title='reporting', + description=( + 'Display a report with all the findings and the made decisions. ' + 'To be used with the report mode (--report).' + ), + ) + report_parser = parser.add_mutually_exclusive_group() report_parser.add_argument( '--only-real', From c4e4a2c431601cc8d828225b09e953bbdfbaab45 Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Fri, 26 Feb 2021 18:00:31 +0100 Subject: [PATCH 12/22] Corrections --- detect_secrets/audit/report.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index 25dc11917..9a82df90d 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -12,7 +12,7 @@ class SecretClassToPrint(Enum): REAL_SECRET = 1 FALSE_POSITIVE = 2 - def from_class(secret_class: VerifiedResult) -> Enum: + def from_class(secret_class: VerifiedResult) -> 'SecretClassToPrint': if secret_class in [VerifiedResult.UNVERIFIED, VerifiedResult.VERIFIED_TRUE]: return SecretClassToPrint.REAL_SECRET else: @@ -32,6 +32,7 @@ def generate_report( SecretClassToPrint.from_class(verified_result) != class_to_print ): continue + # Removal of the stored line number is required to force the complete file scanning to obtain all the secret occurrences. secret.line_number = 0 detections = get_raw_secrets_from_file(secret) line_getter = line_getter_factory(filename) From fcbee98e3f9b8a14962d293b096bb25d1de7547d Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Fri, 26 Feb 2021 18:25:00 +0100 Subject: [PATCH 13/22] Corrections --- detect_secrets/audit/common.py | 2 +- detect_secrets/audit/report.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py index bb28d797c..977854b99 100644 --- a/detect_secrets/audit/common.py +++ b/detect_secrets/audit/common.py @@ -83,7 +83,7 @@ def get_raw_secrets_from_file( if secret.line_number: try: lines_to_scan = [line_getter.lines[secret.line_number - 1]] - line_numbers = [secret.line_number] + line_numbers = [secret.line_number - 1] except IndexError: raise SecretNotFoundOnSpecifiedLineError(secret.line_number) else: diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index 9a82df90d..164aee80b 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -32,7 +32,7 @@ def generate_report( SecretClassToPrint.from_class(verified_result) != class_to_print ): continue - # Removal of the stored line number is required to force the complete file scanning to obtain all the secret occurrences. + # Removal of the stored line number is required to force the complete file scanning to obtain all the secret occurrences. # noqa: E501 secret.line_number = 0 detections = get_raw_secrets_from_file(secret) line_getter = line_getter_factory(filename) From 0c79989dbf8696514cd93273e8165b33300b2f2d Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Mon, 15 Mar 2021 13:07:14 +0100 Subject: [PATCH 14/22] New keywords in the denylist --- detect_secrets/plugins/keyword.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index 4002e8c03..4173276bf 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -40,15 +40,23 @@ # Note: All values here should be lowercase DENYLIST = ( - 'apikey', - 'api_key', - 'aws_secret_access_key', - 'db_pass', + 'api_?key', + 'auth_?key', + 'service_?key', + 'account_?key', + 'db_?key', + 'database_?key', + 'priv_?key', + 'private_?key', + 'client_?key', + 'db_?pass', + 'database_?pass', + 'key_?pass', 'password', 'passwd', - 'private_key', 'secret', - 'secrete', + 'contraseƱa', + 'contrasena', ) # Includes ], ', " as closing CLOSING = r'[]\'"]{0,2}' From 8a509d6fb13b12aeaf9940bd3b4af3bbdaa4145e Mon Sep 17 00:00:00 2001 From: Pablo Santiago Date: Mon, 15 Mar 2021 14:53:09 +0100 Subject: [PATCH 15/22] Pwd keyword --- detect_secrets/plugins/keyword.py | 1 + 1 file changed, 1 insertion(+) diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index 4173276bf..d14f1cb0d 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -54,6 +54,7 @@ 'key_?pass', 'password', 'passwd', + 'pwd', 'secret', 'contraseƱa', 'contrasena', From b4e9cc4a2ce6bc14b5b495bc318b9e6b3998bba7 Mon Sep 17 00:00:00 2001 From: Julian Fonticoba Mouriz Date: Tue, 13 Apr 2021 16:09:44 +0200 Subject: [PATCH 16/22] Correct mypy issues --- detect_secrets/audit/common.py | 7 ++++--- detect_secrets/audit/report.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py index 977854b99..0ab6149b8 100644 --- a/detect_secrets/audit/common.py +++ b/detect_secrets/audit/common.py @@ -6,6 +6,7 @@ from typing import Iterator from typing import List from typing import Optional +from typing import Any from . import io from ..core import baseline @@ -44,7 +45,7 @@ def open_file(filename: str) -> 'LineGetter': def get_raw_secret_from_file( secret: PotentialSecret, line_getter_factory: Callable[[str], 'LineGetter'] = open_file, -) -> str: +) -> Any: """ We're analyzing the contents straight from the baseline, and therefore, we don't know the secret value (by design). However, we have line numbers, filenames, and how we detected @@ -65,7 +66,7 @@ def get_raw_secret_from_file( def get_raw_secrets_from_file( secret: PotentialSecret, line_getter_factory: Callable[[str], 'LineGetter'] = open_file, -) -> [PotentialSecret]: +) -> List[PotentialSecret]: """ We're analyzing the contents straight from the baseline, and therefore, we don't know the secret value (by design). However, we have secret hashes, filenames, and how we detected @@ -88,7 +89,7 @@ def get_raw_secrets_from_file( raise SecretNotFoundOnSpecifiedLineError(secret.line_number) else: lines_to_scan = line_getter.lines - line_numbers = range(len(lines_to_scan)) + line_numbers = list(range(len(lines_to_scan))) for line_number, line in zip(line_numbers, lines_to_scan): identified_secrets = call_function_with_arguments( diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index 164aee80b..0eea28bf5 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -1,5 +1,9 @@ from enum import Enum from typing import Callable +from typing import Any +from typing import List +from typing import Dict +from typing import Tuple from ..constants import VerifiedResult from .common import get_baseline_from_file @@ -12,6 +16,7 @@ class SecretClassToPrint(Enum): REAL_SECRET = 1 FALSE_POSITIVE = 2 + @staticmethod def from_class(secret_class: VerifiedResult) -> 'SecretClassToPrint': if secret_class in [VerifiedResult.UNVERIFIED, VerifiedResult.VERIFIED_TRUE]: return SecretClassToPrint.REAL_SECRET @@ -23,8 +28,9 @@ def generate_report( baseline_file: str, class_to_print: SecretClassToPrint = None, line_getter_factory: Callable[[str], 'LineGetter'] = open_file, -) -> None: - secrets = {} +) -> List[Dict[str, Any]]: + + secrets: Dict[Tuple[str, str], Any] = {} for filename, secret in get_baseline_from_file(baseline_file): verified_result = VerifiedResult.from_secret(secret) if ( From 14c964f8f72fd7b6fa44642b0f83aaa6814b17ba Mon Sep 17 00:00:00 2001 From: Julian Fonticoba Mouriz Date: Tue, 13 Apr 2021 16:39:48 +0200 Subject: [PATCH 17/22] Reorder imports by precommit --- detect_secrets/audit/common.py | 2 +- detect_secrets/audit/report.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py index 0ab6149b8..8a2c434ff 100644 --- a/detect_secrets/audit/common.py +++ b/detect_secrets/audit/common.py @@ -1,12 +1,12 @@ import json from contextlib import contextmanager from functools import lru_cache +from typing import Any from typing import Callable from typing import cast from typing import Iterator from typing import List from typing import Optional -from typing import Any from . import io from ..core import baseline diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py index 0eea28bf5..ce1536b04 100644 --- a/detect_secrets/audit/report.py +++ b/detect_secrets/audit/report.py @@ -1,8 +1,8 @@ from enum import Enum -from typing import Callable from typing import Any -from typing import List +from typing import Callable from typing import Dict +from typing import List from typing import Tuple from ..constants import VerifiedResult From 4001e8e110bdc80c11e444cc9df40bbe3ad63b5a Mon Sep 17 00:00:00 2001 From: Julian Fonticoba Mouriz Date: Tue, 13 Apr 2021 17:12:57 +0200 Subject: [PATCH 18/22] Improve mypy issue resolution --- detect_secrets/audit/common.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py index 8a2c434ff..7359fc419 100644 --- a/detect_secrets/audit/common.py +++ b/detect_secrets/audit/common.py @@ -1,7 +1,6 @@ import json from contextlib import contextmanager from functools import lru_cache -from typing import Any from typing import Callable from typing import cast from typing import Iterator @@ -45,7 +44,7 @@ def open_file(filename: str) -> 'LineGetter': def get_raw_secret_from_file( secret: PotentialSecret, line_getter_factory: Callable[[str], 'LineGetter'] = open_file, -) -> Any: +) -> Optional[str]: """ We're analyzing the contents straight from the baseline, and therefore, we don't know the secret value (by design). However, we have line numbers, filenames, and how we detected From adf9aadf1ac4c69d1bd34646ba14d30853bba046 Mon Sep 17 00:00:00 2001 From: Julian Fonticoba Mouriz Date: Wed, 14 Apr 2021 17:06:09 +0200 Subject: [PATCH 19/22] Improve configuration files keyword plugin --- detect_secrets/plugins/keyword.py | 10 +++++++--- detect_secrets/util/filetype.py | 15 +++++++++++++-- tests/plugins/keyword_test.py | 5 +++-- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index 388685b23..9be37706d 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -193,7 +193,7 @@ ), flags=re.IGNORECASE, ) -DENYLIST_REGEX_TO_GROUP = { +CONFIG_DENYLIST_REGEX_TO_GROUP = { FOLLOWED_BY_COLON_REGEX: 4, PRECEDED_BY_EQUAL_COMPARISON_SIGNS_QUOTES_REQUIRED_REGEX: 2, FOLLOWED_BY_EQUAL_SIGNS_REGEX: 5, @@ -230,6 +230,11 @@ FileType.PYTHON: QUOTES_REQUIRED_DENYLIST_REGEX_TO_GROUP, FileType.SWIFT: QUOTES_REQUIRED_DENYLIST_REGEX_TO_GROUP, FileType.TERRAFORM: QUOTES_REQUIRED_DENYLIST_REGEX_TO_GROUP, + FileType.YAML: CONFIG_DENYLIST_REGEX_TO_GROUP, + FileType.CONFIG: CONFIG_DENYLIST_REGEX_TO_GROUP, + FileType.INI: CONFIG_DENYLIST_REGEX_TO_GROUP, + FileType.PROPERTIES: CONFIG_DENYLIST_REGEX_TO_GROUP, + FileType.TOML: CONFIG_DENYLIST_REGEX_TO_GROUP, } @@ -260,7 +265,6 @@ def analyze_string( if denylist_regex_to_group is None: attempts = [ QUOTES_REQUIRED_DENYLIST_REGEX_TO_GROUP, - DENYLIST_REGEX_TO_GROUP, ] else: attempts = [denylist_regex_to_group] @@ -284,7 +288,7 @@ def analyze_line( **kwargs: Any, ) -> Set[PotentialSecret]: filetype = determine_file_type(filename) - denylist_regex_to_group = REGEX_BY_FILETYPE.get(filetype, DENYLIST_REGEX_TO_GROUP) + denylist_regex_to_group = REGEX_BY_FILETYPE.get(filetype, QUOTES_REQUIRED_DENYLIST_REGEX_TO_GROUP) return super().analyze_line( filename=filename, line=line, diff --git a/detect_secrets/util/filetype.py b/detect_secrets/util/filetype.py index bd3887d9d..4d152db55 100644 --- a/detect_secrets/util/filetype.py +++ b/detect_secrets/util/filetype.py @@ -17,7 +17,11 @@ class FileType(Enum): C_SHARP = 11 C = 12 C_PLUS_PLUS = 13 - OTHER = 14 + CONFIG = 14 + INI = 15 + PROPERTIES = 16 + TOML = 17 + OTHER = 18 def determine_file_type(filename: str) -> FileType: @@ -39,5 +43,12 @@ def determine_file_type(filename: str) -> FileType: '.yml': FileType.YAML, '.cs': FileType.C_SHARP, '.c': FileType.C, - '.cpp': FileType.C_PLUS_PLUS + '.cpp': FileType.C_PLUS_PLUS, + '.cnf': FileType.CONFIG, + '.conf': FileType.CONFIG, + '.cfg': FileType.CONFIG, + '.cf': FileType.CONFIG, + '.ini': FileType.INI, + '.properties': FileType.PROPERTIES, + '.toml': FileType.TOML }.get(file_extension, FileType.OTHER) diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 73e9f8bb7..6126d6697 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -15,7 +15,7 @@ LONG_LINE = ''.format(base64.b64encode((str(randint(0, 9)) * 30500).encode())) # noqa: E501 -GENERIC_TEST_CASES = [ +CONFIG_TEST_CASES = [ ('password = "{}"'.format(WHITES_SECRET), WHITES_SECRET), ('password_super_secure = "{}"'.format(WHITES_SECRET), WHITES_SECRET), # Suffix ('my_password_super_secure = "{}"'.format(WHITES_SECRET), WHITES_SECRET), # Prefix/suffix @@ -163,7 +163,7 @@ def parse_test_cases(test_cases): 'file_extension, line, expected_secret', ( parse_test_cases([ - (None, GENERIC_TEST_CASES), + ('conf', CONFIG_TEST_CASES), ('go', GOLANG_TEST_CASES), ('m', COMMON_C_TEST_CASES), ('c', COMMON_C_TEST_CASES), @@ -175,6 +175,7 @@ def parse_test_cases(test_cases): ('js', QUOTES_REQUIRED_TEST_CASES), ('swift', QUOTES_REQUIRED_TEST_CASES), ('tf', QUOTES_REQUIRED_TEST_CASES), + (None, QUOTES_REQUIRED_TEST_CASES), ]) ), ) From 5023252d4ac5e4d120d33a6ecaa6849e81b4e9e5 Mon Sep 17 00:00:00 2001 From: Julian Fonticoba Mouriz Date: Wed, 14 Apr 2021 17:24:57 +0200 Subject: [PATCH 20/22] Fix test cases --- tests/filters/heuristic_filter_test.py | 2 +- tests/plugins/keyword_test.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/filters/heuristic_filter_test.py b/tests/filters/heuristic_filter_test.py index 065ade6a7..72d2c31e5 100644 --- a/tests/filters/heuristic_filter_test.py +++ b/tests/filters/heuristic_filter_test.py @@ -90,7 +90,7 @@ def test_failure(self, secret, line): ( ('secret = {hunter2}', False), ('secret = ', False), - ('secret = hunter2', True), + ('secret = "hunter2"', True), ('secret= ${hunter2}', False), ), ) diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 6126d6697..3fcd9c953 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -40,7 +40,7 @@ ('password = {}'.format(SYMBOL_SECRET), None), # At least 1 alphanumeric character is required ('api_key = ""', None), # Nothing in the quotes ("secret: ''", None), # Nothing in the quotes - ('secret = "abcdefghi"', None), # Alphabet sequential string + ('password = "somefakekey"', None), # 'fake' in the secret ('password: ${link}', None), # Has a ${ followed by a } ('some_key = "real_secret"', None), # We cannot make 'key' a Keyword, too noisy) ('private_key "hopenobodyfindsthisone\';', None), # Double-quote does not match single-quote) @@ -144,7 +144,7 @@ ('password = {}'.format(COMMON_SECRET), None), # Secret without quotes ('api_key = ""', None), # Nothing in the quotes ("secret: ''", None), # Nothing in the quotes - ('password = "somefakekey"', None), # 'fake' in the secret + ('secret = "abcdefghi"', None), # Alphabet sequential string ('password: ${link}', None), # Has a ${ followed by a } ('some_key = "real_secret"', None), # We cannot make 'key' a Keyword, too noisy) ('private_key "hopenobodyfindsthisone\';', None), # Double-quote does not match single-quote) From 6713e76161c415398c2f74662ff4a2f148b2d3b0 Mon Sep 17 00:00:00 2001 From: Julian Fonticoba Mouriz Date: Wed, 14 Apr 2021 17:32:44 +0200 Subject: [PATCH 21/22] Change test case --- tests/plugins/keyword_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index 3fcd9c953..ce8b0748c 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -144,7 +144,6 @@ ('password = {}'.format(COMMON_SECRET), None), # Secret without quotes ('api_key = ""', None), # Nothing in the quotes ("secret: ''", None), # Nothing in the quotes - ('secret = "abcdefghi"', None), # Alphabet sequential string ('password: ${link}', None), # Has a ${ followed by a } ('some_key = "real_secret"', None), # We cannot make 'key' a Keyword, too noisy) ('private_key "hopenobodyfindsthisone\';', None), # Double-quote does not match single-quote) From c88f2e1ed7d3f792954042d8418acfd6cc3264ee Mon Sep 17 00:00:00 2001 From: Julian Fonticoba Mouriz Date: Wed, 14 Apr 2021 17:51:28 +0200 Subject: [PATCH 22/22] Formatting text by pre-commit --- detect_secrets/plugins/keyword.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detect_secrets/plugins/keyword.py b/detect_secrets/plugins/keyword.py index 9be37706d..868ade2b4 100644 --- a/detect_secrets/plugins/keyword.py +++ b/detect_secrets/plugins/keyword.py @@ -288,7 +288,7 @@ def analyze_line( **kwargs: Any, ) -> Set[PotentialSecret]: filetype = determine_file_type(filename) - denylist_regex_to_group = REGEX_BY_FILETYPE.get(filetype, QUOTES_REQUIRED_DENYLIST_REGEX_TO_GROUP) + denylist_regex_to_group = REGEX_BY_FILETYPE.get(filetype, QUOTES_REQUIRED_DENYLIST_REGEX_TO_GROUP) # noqa: E501 return super().analyze_line( filename=filename, line=line,