Skip to content

Commit

Permalink
Merge branch 'master' of github.com:Yelp/detect-secrets
Browse files Browse the repository at this point in the history
  • Loading branch information
Aaron Loo committed Apr 14, 2021
2 parents 392040a + 16f6625 commit c5b56f4
Show file tree
Hide file tree
Showing 13 changed files with 591 additions and 62 deletions.
31 changes: 20 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -407,29 +407,38 @@ const secret = "hunter2";

```bash
$ detect-secrets audit --help
usage: detect-secrets audit [-h] [--diff] [--stats] [--json]
filename [filename ...]
usage: detect-secrets audit [-h] [--diff] [--stats]
[--report] [--only-real | --only-false]
[--json]
filename [filename ...]

Auditing a baseline allows analysts to label results, and optimize plugins for
the highest signal-to-noise ratio for their environment.

positional arguments:
filename Audit a given baseline file to distinguish the difference
between false and true positives.
filename Audit a given baseline file to distinguish the difference
between false and true positives.

optional arguments:
-h, --help show this help message and exit
--diff Allows the comparison of two baseline files, in order to
effectively distinguish the difference between various plugin
configurations.
--stats Displays the results of an interactive auditing session which
have been saved to a baseline file.
-h, --help show this help message and exit
--diff Allows the comparison of two baseline files, in order to
effectively distinguish the difference between various plugin
configurations.
--stats Displays the results of an interactive auditing session which
have been saved to a baseline file.
--report Displays a report with the secrets detected

reporting:
Display a summary with all the findings and the made decisions. To be used with the report mode (--report).

--only-real Only includes real secrets in the report
--only-false Only includes false positives in the report

analytics:
Quantify the success of your plugins based on the labelled results in your
baseline. To be used with the statisitcs mode (--stats).

--json Outputs results in a machine-readable format.
--json Outputs results in a machine-readable format.
```

## Configuration
Expand Down
1 change: 1 addition & 0 deletions detect_secrets/audit/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from . import analytics # noqa: F401
from . import report # noqa: F401
from .audit import audit_baseline # noqa: F401
from .compare import compare_baselines # noqa: F401
91 changes: 56 additions & 35 deletions detect_secrets/audit/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,55 +44,76 @@ def open_file(filename: str) -> 'LineGetter':
def get_raw_secret_from_file(
secret: PotentialSecret,
line_getter_factory: Callable[[str], 'LineGetter'] = open_file,
) -> str:
) -> Optional[str]:
"""
We're analyzing the contents straight from the baseline, and therefore, we don't know
the secret value (by design). However, we have line numbers, filenames, and how we detected
it was a secret in the first place, so we can reverse-engineer it.
:raises: SecretNotFoundOnSpecifiedLineError
:raises: NoLineNumberError
"""
if not secret.line_number:
raise NoLineNumberError

for item in get_raw_secrets_from_file(secret, line_getter_factory):
return item.secret_value

raise SecretNotFoundOnSpecifiedLineError(secret.line_number)


def get_raw_secrets_from_file(
secret: PotentialSecret,
line_getter_factory: Callable[[str], 'LineGetter'] = open_file,
) -> List[PotentialSecret]:
"""
We're analyzing the contents straight from the baseline, and therefore, we don't know
the secret value (by design). However, we have secret hashes, filenames, and how we detected
it was a secret in the first place, so we can reverse-engineer it. This method searches all
the occurrences of one secret in one file using one plugin.
:raises: SecretNotFoundOnSpecifiedLineError
:raises: NoLineNumberError
"""
plugin = cast(BasePlugin, plugins.initialize.from_secret_type(secret.type))
line_getter = line_getter_factory(secret.filename)
is_first_time_opening_file = not line_getter.has_cached_lines
all_secrets = []
while True:
if not secret.line_number:
raise NoLineNumberError

try:
target_line = line_getter.lines[secret.line_number - 1]
except IndexError:
raise SecretNotFoundOnSpecifiedLineError(secret.line_number)

identified_secrets = call_function_with_arguments(
plugin.analyze_line,
filename=secret.filename,
line=target_line,
line_number=secret.line_number,

# We enable eager search, because we *know* there's a secret here -- the baseline
# flagged it after all.
enable_eager_search=True,
)

for identified_secret in (identified_secrets or []):
if identified_secret == secret:
return cast(str, identified_secret.secret_value)

# No secret found -- maybe it's due to invalid file transformation.
# However, this only applies to the first execution of the file, since we want a
# consistent transformed file.
#
# NOTE: This is defensive coding. If we assume that this is only run on valid baselines,
# then the baseline wouldn't record secrets that were both found with and without an eager
# transformer, in the same file.
if is_first_time_opening_file and not line_getter.use_eager_transformers:
if secret.line_number:
try:
lines_to_scan = [line_getter.lines[secret.line_number - 1]]
line_numbers = [secret.line_number - 1]
except IndexError:
raise SecretNotFoundOnSpecifiedLineError(secret.line_number)
else:
lines_to_scan = line_getter.lines
line_numbers = list(range(len(lines_to_scan)))

for line_number, line in zip(line_numbers, lines_to_scan):
identified_secrets = call_function_with_arguments(
plugin.analyze_line,
filename=secret.filename,
line=line,
line_number=line_number + 1,

# We enable eager search, because we *know* there's a secret here -- the baseline
# flagged it after all.
enable_eager_search=bool(secret.line_number),
)

for identified_secret in (identified_secrets or []):
if identified_secret == secret:
all_secrets.append(identified_secret)

if (
len(all_secrets) == 0 and
is_first_time_opening_file and
not line_getter.use_eager_transformers
):
line_getter.use_eager_transformers = True
else:
break

raise SecretNotFoundOnSpecifiedLineError(secret.line_number)
return all_secrets


class LineGetter:
Expand Down
77 changes: 77 additions & 0 deletions detect_secrets/audit/report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from enum import Enum
from typing import Any
from typing import Callable
from typing import Dict
from typing import List
from typing import Tuple

from ..constants import VerifiedResult
from .common import get_baseline_from_file
from .common import get_raw_secrets_from_file
from .common import LineGetter
from .common import open_file


class SecretClassToPrint(Enum):
REAL_SECRET = 1
FALSE_POSITIVE = 2

@staticmethod
def from_class(secret_class: VerifiedResult) -> 'SecretClassToPrint':
if secret_class in [VerifiedResult.UNVERIFIED, VerifiedResult.VERIFIED_TRUE]:
return SecretClassToPrint.REAL_SECRET
else:
return SecretClassToPrint.FALSE_POSITIVE


def generate_report(
baseline_file: str,
class_to_print: SecretClassToPrint = None,
line_getter_factory: Callable[[str], 'LineGetter'] = open_file,
) -> List[Dict[str, Any]]:

secrets: Dict[Tuple[str, str], Any] = {}
for filename, secret in get_baseline_from_file(baseline_file):
verified_result = VerifiedResult.from_secret(secret)
if (
class_to_print is not None and
SecretClassToPrint.from_class(verified_result) != class_to_print
):
continue
# Removal of the stored line number is required to force the complete file scanning to obtain all the secret occurrences. # noqa: E501
secret.line_number = 0
detections = get_raw_secrets_from_file(secret)
line_getter = line_getter_factory(filename)
for detection in detections:
if (secret.secret_hash, filename) in secrets:
secrets[(secret.secret_hash, filename)]['lines'][detection.line_number] = line_getter.lines[detection.line_number - 1] # noqa: E501
if secret.type not in secrets[(secret.secret_hash, filename)]['types']:
secrets[(secret.secret_hash, filename)]['types'].append(secret.type)
secrets[(secret.secret_hash, filename)]['category'] = get_prioritized_verified_result( # noqa: E501
verified_result,
VerifiedResult[secrets[(secret.secret_hash, filename)]['category']],
).name
else:
secrets[(secret.secret_hash, filename)] = {
'secrets': detection.secret_value,
'filename': filename,
'lines': {
detection.line_number: line_getter.lines[detection.line_number - 1],
},
'types': [
secret.type,
],
'category': verified_result.name,
}

return list(secrets.values())


def get_prioritized_verified_result(
result1: VerifiedResult,
result2: VerifiedResult,
) -> VerifiedResult:
if result1.value > result2.value:
return result1
else:
return result2
11 changes: 11 additions & 0 deletions detect_secrets/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
from enum import Enum

from .core.potential_secret import PotentialSecret


class VerifiedResult(Enum):
VERIFIED_FALSE = 1
UNVERIFIED = 2
VERIFIED_TRUE = 3

@staticmethod
def from_secret(secret: PotentialSecret) -> 'VerifiedResult':
if secret.is_secret is None:
return VerifiedResult.UNVERIFIED
elif secret.is_secret:
return VerifiedResult.VERIFIED_TRUE
else:
return VerifiedResult.VERIFIED_FALSE
36 changes: 36 additions & 0 deletions detect_secrets/core/usage/audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def add_audit_action(parent: argparse._SubParsersAction) -> argparse.ArgumentPar
)

_add_mode_parser(parser)
_add_report_module(parser)
_add_statistics_module(parser)
return parser

Expand All @@ -45,6 +46,41 @@ def _add_mode_parser(parser: argparse.ArgumentParser) -> None:
),
)

parser.add_argument(
'--report',
action='store_true',
help=(
'Displays a report with the secrets detected'
),
)


def _add_report_module(parent: argparse.ArgumentParser) -> None:
parser = parent.add_argument_group(
title='reporting',
description=(
'Display a report with all the findings and the made decisions. '
'To be used with the report mode (--report).'
),
)

report_parser = parser.add_mutually_exclusive_group()
report_parser.add_argument(
'--only-real',
action='store_true',
help=(
'Only includes real secrets in the report'
),
)

report_parser.add_argument(
'--only-false',
action='store_true',
help=(
'Only includes false positives in the report'
),
)


def _add_statistics_module(parent: argparse.ArgumentParser) -> None:
parser = parent.add_argument_group(
Expand Down
13 changes: 13 additions & 0 deletions detect_secrets/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,19 @@ def handle_audit_action(args: argparse.Namespace) -> None:
print(json.dumps(stats.json(), indent=2))
else:
print(str(stats))
elif args.report:
class_to_print = None
if args.only_real:
class_to_print = audit.report.SecretClassToPrint.REAL_SECRET
elif args.only_false:
class_to_print = audit.report.SecretClassToPrint.FALSE_POSITIVE
print(
json.dumps(
audit.report.generate_report(args.filename[0], class_to_print),
indent=4,
sort_keys=True,
),
)
else:
# Starts interactive session.
if args.diff:
Expand Down
Loading

0 comments on commit c5b56f4

Please sign in to comment.