Merge branch 'master' of github.com:Yelp/detect-secrets

Yelp · Apr 14, 2021 · c5b56f4 · c5b56f4
2 parents 392040a + 16f6625
commit c5b56f4
Show file tree

Hide file tree

Showing 13 changed files with 591 additions and 62 deletions.
diff --git a/README.md b/README.md
@@ -407,29 +407,38 @@ const secret = "hunter2";
 
 ```bash
 $ detect-secrets audit --help
-usage: detect-secrets audit [-h] [--diff] [--stats] [--json]
-                            filename [filename ...]
+usage: detect-secrets audit [-h] [--diff] [--stats]
+                      [--report] [--only-real | --only-false]
+                      [--json]
+                      filename [filename ...]
 
 Auditing a baseline allows analysts to label results, and optimize plugins for
 the highest signal-to-noise ratio for their environment.
 
 positional arguments:
-  filename    Audit a given baseline file to distinguish the difference
-              between false and true positives.
+  filename      Audit a given baseline file to distinguish the difference
+                between false and true positives.
 
 optional arguments:
-  -h, --help  show this help message and exit
-  --diff      Allows the comparison of two baseline files, in order to
-              effectively distinguish the difference between various plugin
-              configurations.
-  --stats     Displays the results of an interactive auditing session which
-              have been saved to a baseline file.
+  -h, --help    show this help message and exit
+  --diff        Allows the comparison of two baseline files, in order to
+                effectively distinguish the difference between various plugin
+                configurations.
+  --stats       Displays the results of an interactive auditing session which
+                have been saved to a baseline file.
+  --report      Displays a report with the secrets detected
+
+reporting:
+  Display a summary with all the findings and the made decisions. To be used with the report mode (--report).
+
+  --only-real   Only includes real secrets in the report
+  --only-false  Only includes false positives in the report
 
 analytics:
   Quantify the success of your plugins based on the labelled results in your
   baseline. To be used with the statisitcs mode (--stats).
 
-  --json      Outputs results in a machine-readable format.
+  --json        Outputs results in a machine-readable format.
 ```
 
 ## Configuration

diff --git a/detect_secrets/audit/__init__.py b/detect_secrets/audit/__init__.py
@@ -1,3 +1,4 @@
 from . import analytics                 # noqa: F401
+from . import report                    # noqa: F401
 from .audit import audit_baseline       # noqa: F401
 from .compare import compare_baselines  # noqa: F401
diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py
@@ -44,55 +44,76 @@ def open_file(filename: str) -> 'LineGetter':
 def get_raw_secret_from_file(
     secret: PotentialSecret,
     line_getter_factory: Callable[[str], 'LineGetter'] = open_file,
-) -> str:
+) -> Optional[str]:
     """
     We're analyzing the contents straight from the baseline, and therefore, we don't know
     the secret value (by design). However, we have line numbers, filenames, and how we detected
     it was a secret in the first place, so we can reverse-engineer it.
 
+    :raises: SecretNotFoundOnSpecifiedLineError
+    :raises: NoLineNumberError
+    """
+    if not secret.line_number:
+        raise NoLineNumberError
+
+    for item in get_raw_secrets_from_file(secret, line_getter_factory):
+        return item.secret_value
+
+    raise SecretNotFoundOnSpecifiedLineError(secret.line_number)
+
+
+def get_raw_secrets_from_file(
+    secret: PotentialSecret,
+    line_getter_factory: Callable[[str], 'LineGetter'] = open_file,
+) -> List[PotentialSecret]:
+    """
+    We're analyzing the contents straight from the baseline, and therefore, we don't know
+    the secret value (by design). However, we have secret hashes, filenames, and how we detected
+    it was a secret in the first place, so we can reverse-engineer it. This method searches all
+    the occurrences of one secret in one file using one plugin.
+
     :raises: SecretNotFoundOnSpecifiedLineError
     :raises: NoLineNumberError
     """
     plugin = cast(BasePlugin, plugins.initialize.from_secret_type(secret.type))
     line_getter = line_getter_factory(secret.filename)
     is_first_time_opening_file = not line_getter.has_cached_lines
+    all_secrets = []
     while True:
-        if not secret.line_number:
-            raise NoLineNumberError
-
-        try:
-            target_line = line_getter.lines[secret.line_number - 1]
-        except IndexError:
-            raise SecretNotFoundOnSpecifiedLineError(secret.line_number)
-
-        identified_secrets = call_function_with_arguments(
-            plugin.analyze_line,
-            filename=secret.filename,
-            line=target_line,
-            line_number=secret.line_number,
-
-            # We enable eager search, because we *know* there's a secret here -- the baseline
-            # flagged it after all.
-            enable_eager_search=True,
-        )
-
-        for identified_secret in (identified_secrets or []):
-            if identified_secret == secret:
-                return cast(str, identified_secret.secret_value)
-
-        # No secret found -- maybe it's due to invalid file transformation.
-        # However, this only applies to the first execution of the file, since we want a
-        # consistent transformed file.
-        #
-        # NOTE: This is defensive coding. If we assume that this is only run on valid baselines,
-        # then the baseline wouldn't record secrets that were both found with and without an eager
-        # transformer, in the same file.
-        if is_first_time_opening_file and not line_getter.use_eager_transformers:
+        if secret.line_number:
+            try:
+                lines_to_scan = [line_getter.lines[secret.line_number - 1]]
+                line_numbers = [secret.line_number - 1]
+            except IndexError:
+                raise SecretNotFoundOnSpecifiedLineError(secret.line_number)
+        else:
+            lines_to_scan = line_getter.lines
+            line_numbers = list(range(len(lines_to_scan)))
+
+        for line_number, line in zip(line_numbers, lines_to_scan):
+            identified_secrets = call_function_with_arguments(
+                plugin.analyze_line,
+                filename=secret.filename,
+                line=line,
+                line_number=line_number + 1,
+
+                # We enable eager search, because we *know* there's a secret here -- the baseline
+                # flagged it after all.
+                enable_eager_search=bool(secret.line_number),
+            )
+
+            for identified_secret in (identified_secrets or []):
+                if identified_secret == secret:
+                    all_secrets.append(identified_secret)
+
+        if (
+            len(all_secrets) == 0 and
+            is_first_time_opening_file and
+            not line_getter.use_eager_transformers
+        ):
             line_getter.use_eager_transformers = True
         else:
-            break
-
-    raise SecretNotFoundOnSpecifiedLineError(secret.line_number)
+            return all_secrets
 
 
 class LineGetter:

diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py
@@ -0,0 +1,77 @@
+from enum import Enum
+from typing import Any
+from typing import Callable
+from typing import Dict
+from typing import List
+from typing import Tuple
+
+from ..constants import VerifiedResult
+from .common import get_baseline_from_file
+from .common import get_raw_secrets_from_file
+from .common import LineGetter
+from .common import open_file
+
+
+class SecretClassToPrint(Enum):
+    REAL_SECRET = 1
+    FALSE_POSITIVE = 2
+
+    @staticmethod
+    def from_class(secret_class: VerifiedResult) -> 'SecretClassToPrint':
+        if secret_class in [VerifiedResult.UNVERIFIED, VerifiedResult.VERIFIED_TRUE]:
+            return SecretClassToPrint.REAL_SECRET
+        else:
+            return SecretClassToPrint.FALSE_POSITIVE
+
+
+def generate_report(
+    baseline_file: str,
+    class_to_print: SecretClassToPrint = None,
+    line_getter_factory: Callable[[str], 'LineGetter'] = open_file,
+) -> List[Dict[str, Any]]:
+
+    secrets: Dict[Tuple[str, str], Any] = {}
+    for filename, secret in get_baseline_from_file(baseline_file):
+        verified_result = VerifiedResult.from_secret(secret)
+        if (
+            class_to_print is not None and
+            SecretClassToPrint.from_class(verified_result) != class_to_print
+        ):
+            continue
+        # Removal of the stored line number is required to force the complete file scanning to obtain all the secret occurrences. # noqa: E501
+        secret.line_number = 0
+        detections = get_raw_secrets_from_file(secret)
+        line_getter = line_getter_factory(filename)
+        for detection in detections:
+            if (secret.secret_hash, filename) in secrets:
+                secrets[(secret.secret_hash, filename)]['lines'][detection.line_number] = line_getter.lines[detection.line_number - 1]  # noqa: E501
+                if secret.type not in secrets[(secret.secret_hash, filename)]['types']:
+                    secrets[(secret.secret_hash, filename)]['types'].append(secret.type)
+                secrets[(secret.secret_hash, filename)]['category'] = get_prioritized_verified_result(  # noqa: E501
+                    verified_result,
+                    VerifiedResult[secrets[(secret.secret_hash, filename)]['category']],
+                ).name
+            else:
+                secrets[(secret.secret_hash, filename)] = {
+                    'secrets': detection.secret_value,
+                    'filename': filename,
+                    'lines': {
+                        detection.line_number: line_getter.lines[detection.line_number - 1],
+                    },
+                    'types': [
+                        secret.type,
+                    ],
+                    'category': verified_result.name,
+                }
+
+    return list(secrets.values())
+
+
+def get_prioritized_verified_result(
+    result1: VerifiedResult,
+    result2: VerifiedResult,
+) -> VerifiedResult:
+    if result1.value > result2.value:
+        return result1
+    else:
+        return result2
diff --git a/detect_secrets/constants.py b/detect_secrets/constants.py
@@ -1,7 +1,18 @@
 from enum import Enum
 
+from .core.potential_secret import PotentialSecret
+
 
 class VerifiedResult(Enum):
     VERIFIED_FALSE = 1
     UNVERIFIED = 2
     VERIFIED_TRUE = 3
+
+    @staticmethod
+    def from_secret(secret: PotentialSecret) -> 'VerifiedResult':
+        if secret.is_secret is None:
+            return VerifiedResult.UNVERIFIED
+        elif secret.is_secret:
+            return VerifiedResult.VERIFIED_TRUE
+        else:
+            return VerifiedResult.VERIFIED_FALSE
diff --git a/detect_secrets/core/usage/audit.py b/detect_secrets/core/usage/audit.py
@@ -21,6 +21,7 @@ def add_audit_action(parent: argparse._SubParsersAction) -> argparse.ArgumentPar
     )
 
     _add_mode_parser(parser)
+    _add_report_module(parser)
     _add_statistics_module(parser)
     return parser
 
@@ -45,6 +46,41 @@ def _add_mode_parser(parser: argparse.ArgumentParser) -> None:
         ),
     )
 
+    parser.add_argument(
+        '--report',
+        action='store_true',
+        help=(
+            'Displays a report with the secrets detected'
+        ),
+    )
+
+
+def _add_report_module(parent: argparse.ArgumentParser) -> None:
+    parser = parent.add_argument_group(
+        title='reporting',
+        description=(
+            'Display a report with all the findings and the made decisions. '
+            'To be used with the report mode (--report).'
+        ),
+    )
+
+    report_parser = parser.add_mutually_exclusive_group()
+    report_parser.add_argument(
+        '--only-real',
+        action='store_true',
+        help=(
+            'Only includes real secrets in the report'
+        ),
+    )
+
+    report_parser.add_argument(
+        '--only-false',
+        action='store_true',
+        help=(
+            'Only includes false positives in the report'
+        ),
+    )
+
 
 def _add_statistics_module(parent: argparse.ArgumentParser) -> None:
     parser = parent.add_argument_group(

diff --git a/detect_secrets/main.py b/detect_secrets/main.py
@@ -129,6 +129,19 @@ def handle_audit_action(args: argparse.Namespace) -> None:
                 print(json.dumps(stats.json(), indent=2))
             else:
                 print(str(stats))
+        elif args.report:
+            class_to_print = None
+            if args.only_real:
+                class_to_print = audit.report.SecretClassToPrint.REAL_SECRET
+            elif args.only_false:
+                class_to_print = audit.report.SecretClassToPrint.FALSE_POSITIVE
+            print(
+                json.dumps(
+                    audit.report.generate_report(args.filename[0], class_to_print),
+                    indent=4,
+                    sort_keys=True,
+                ),
+            )
         else:
             # Starts interactive session.
             if args.diff: