Skip to content

Commit

Permalink
Add usedforsecurity for sha1 algorithm (apache#44081)
Browse files Browse the repository at this point in the history
SHA1 is cryptographically weak and some restricted environments
(FIPS compliant) are blocking weak algorithms. You can use them
(as of Python 3.9) in those environments by specifically stating
that the algorithm is not used for security.
  • Loading branch information
potiuk authored Nov 16, 2024
1 parent 80a2f10 commit a85d94e
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 5 deletions.
7 changes: 6 additions & 1 deletion airflow/models/dagcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,9 @@ def dag_fileloc_hash(full_filepath: str) -> int:
import hashlib

# Only 7 bytes because MySQL BigInteger can hold only 8 bytes (signed).
return struct.unpack(">Q", hashlib.sha1(full_filepath.encode("utf-8")).digest()[-8:])[0] >> 8
return (
struct.unpack(
">Q", hashlib.sha1(full_filepath.encode("utf-8"), usedforsecurity=False).digest()[-8:]
)[0]
>> 8
)
3 changes: 2 additions & 1 deletion airflow/models/taskinstance.py
Original file line number Diff line number Diff line change
Expand Up @@ -2456,7 +2456,8 @@ def next_retry_datetime(self):
# deterministic per task instance
ti_hash = int(
hashlib.sha1(
f"{self.dag_id}#{self.task_id}#{self.logical_date}#{self.try_number}".encode()
f"{self.dag_id}#{self.task_id}#{self.logical_date}#{self.try_number}".encode(),
usedforsecurity=False,
).hexdigest(),
16,
)
Expand Down
7 changes: 5 additions & 2 deletions airflow/sensors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,8 @@ def _get_next_poke_interval(
# Calculate the jitter
run_hash = int(
hashlib.sha1(
f"{self.dag_id}#{self.task_id}#{started_at}#{estimated_poke_count}".encode()
f"{self.dag_id}#{self.task_id}#{started_at}#{estimated_poke_count}".encode(),
usedforsecurity=False,
).hexdigest(),
16,
)
Expand All @@ -384,7 +385,9 @@ def _get_next_poke_interval(
min_backoff = max(int(self.poke_interval * (2 ** (poke_count - 2))), 1)

run_hash = int(
hashlib.sha1(f"{self.dag_id}#{self.task_id}#{started_at}#{poke_count}".encode()).hexdigest(),
hashlib.sha1(
f"{self.dag_id}#{self.task_id}#{started_at}#{poke_count}".encode(), usedforsecurity=False
).hexdigest(),
16,
)
modded_hash = min_backoff + run_hash % min_backoff
Expand Down
2 changes: 1 addition & 1 deletion airflow/utils/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ def iter_airflow_imports(file_path: str) -> Generator[str, None, None]:
def get_unique_dag_module_name(file_path: str) -> str:
"""Return a unique module name in the format unusual_prefix_{sha1 of module's file path}_{original module name}."""
if isinstance(file_path, str):
path_hash = hashlib.sha1(file_path.encode("utf-8")).hexdigest()
path_hash = hashlib.sha1(file_path.encode("utf-8"), usedforsecurity=False).hexdigest()
org_mod_name = re2.sub(r"[.-]", "_", Path(file_path).stem)
return MODIFIED_DAG_MODULE_NAME.format(path_hash=path_hash, module_name=org_mod_name)
raise ValueError("file_path should be a string to generate unique module name")

0 comments on commit a85d94e

Please sign in to comment.