Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BACKPORT 1.6] Exclude password-like fields for considering reparse #9879

Merged
merged 1 commit into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20240402-135556.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Exclude password-like fields for considering reparse
time: 2024-04-02T13:55:56.169953-07:00
custom:
Author: ChenyuLInx
Issue: "9795"
30 changes: 11 additions & 19 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,13 +817,6 @@ def is_partial_parsable(self, manifest: Manifest) -> Tuple[bool, Optional[str]]:
)
valid = False
reparse_reason = ReparseReason.proj_env_vars_changed
if (
self.manifest.state_check.profile_env_vars_hash
!= manifest.state_check.profile_env_vars_hash
):
fire_event(UnableToPartialParse(reason="env vars used in profiles.yml have changed"))
valid = False
reparse_reason = ReparseReason.prof_env_vars_changed

missing_keys = {
k
Expand Down Expand Up @@ -962,18 +955,18 @@ def build_manifest_state_check(self):
env_var_str += f"{key}:{config.project_env_vars[key]}|"
project_env_vars_hash = FileHash.from_contents(env_var_str)

# Create a FileHash of the env_vars in the project
key_list = list(config.profile_env_vars.keys())
key_list.sort()
env_var_str = ""
for key in key_list:
env_var_str += f"{key}:{config.profile_env_vars[key]}|"
profile_env_vars_hash = FileHash.from_contents(env_var_str)
# Create a hash of the connection_info, which user has access to in
# jinja context. Thus attributes here may affect the parsing result.
# Ideally we should not expose all of the connection info to the jinja.

# Create a FileHash of the profile file
profile_path = os.path.join(get_flags().PROFILES_DIR, "profiles.yml")
with open(profile_path) as fp:
profile_hash = FileHash.from_contents(fp.read())
# Renaming this variable mean that we will have to do a whole lot more
# change to make sure the previous manifest can be loaded correctly.
# This is an example of naming should be chosen based on the functionality
# rather than the implementation details.
connection_keys = list(config.credentials.connection_info())
# avoid reparsing because of ordering issues
connection_keys.sort()
profile_hash = FileHash.from_contents(pprint.pformat(connection_keys))

# Create a FileHashes for dbt_project for all dependencies
project_hashes = {}
Expand All @@ -985,7 +978,6 @@ def build_manifest_state_check(self):
# Create the ManifestStateCheck object
state_check = ManifestStateCheck(
project_env_vars_hash=project_env_vars_hash,
profile_env_vars_hash=profile_env_vars_hash,
vars_hash=vars_hash,
profile_hash=profile_hash,
project_hashes=project_hashes,
Expand Down
29 changes: 29 additions & 0 deletions tests/functional/partial_parsing/test_partial_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
run_dbt_and_capture,
rename_dir,
)
import yaml
from tests.functional.utils import up_one
from dbt.tests.fixtures.project import write_project_files
from tests.functional.partial_parsing.fixtures import (
Expand Down Expand Up @@ -824,3 +825,31 @@ def test_pp_renamed_project_dir_changed_project_contents(self, project):
run_dbt(["deps"])
len(run_dbt(["--partial-parse", "seed"])) == 1
len(run_dbt(["--partial-parse", "run"])) == 3


class TestProfileChanges:
@pytest.fixture(scope="class")
def models(self):
return {
"model.sql": "select 1 as id",
}

def test_profile_change(self, project, dbt_profile_data):
# Fist run not partial parsing
_, stdout = run_dbt_and_capture(["parse"])
assert "Unable to do partial parsing because saved manifest not found" in stdout

_, stdout = run_dbt_and_capture(["parse"])
assert "Unable to do partial parsing" not in stdout

# change dbname which is included in the connection_info
dbt_profile_data["test"]["outputs"]["default"]["dbname"] = "dbt2"
write_file(yaml.safe_dump(dbt_profile_data), project.profiles_dir, "profiles.yml")
_, stdout = run_dbt_and_capture(["parse"])
assert "Unable to do partial parsing because profile has changed" in stdout

# Change the password which is not included in the connection_info
dbt_profile_data["test"]["outputs"]["default"]["pass"] = "another_password"
write_file(yaml.safe_dump(dbt_profile_data), project.profiles_dir, "profiles.yml")
_, stdout = run_dbt_and_capture(["parse"])
assert "Unable to do partial parsing" not in stdout
21 changes: 7 additions & 14 deletions tests/functional/partial_parsing/test_pp_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,40 +313,33 @@ def dbt_profile_target(self):
# calls 'load_config' before the tests are run.
# Note: only the specified profile is rendered, so there's no
# point it setting env_vars in non-used profiles.
os.environ["ENV_VAR_USER"] = "root"
os.environ["ENV_VAR_PASS"] = "password"
os.environ["ENV_VAR_HOST"] = "localhost"
return {
"type": "postgres",
"threads": 4,
"host": "localhost",
"host": "{{ env_var('ENV_VAR_HOST') }}",
"port": 5432,
"user": "{{ env_var('ENV_VAR_USER') }}",
"pass": "{{ env_var('ENV_VAR_PASS') }}",
"user": "root",
"pass": "password",
"dbname": "dbt",
}

def test_profile_env_vars(self, project, logs_dir):

# Initial run
os.environ["ENV_VAR_USER"] = "root"
os.environ["ENV_VAR_PASS"] = "password"
os.environ["ENV_VAR_HOST"] = "localhost"

run_dbt(["run"])
manifest = get_manifest(project.project_root)
env_vars_checksum = manifest.state_check.profile_env_vars_hash.checksum

# Change env_vars, the user doesn't exist, this should fail
os.environ["ENV_VAR_USER"] = "fake_user"
os.environ["ENV_VAR_HOST"] = "wrong_host"

# N.B. run_dbt_and_capture won't work here because FailedToConnectError ends the test entirely
with pytest.raises(FailedToConnectError):
run_dbt(["run"], expect_pass=False)

log_output = Path(logs_dir, "dbt.log").read_text()
assert "env vars used in profiles.yml have changed" in log_output

manifest = get_manifest(project.project_root)
assert env_vars_checksum != manifest.state_check.profile_env_vars_hash.checksum
assert "Unable to do partial parsing because profile has changed" in log_output


class TestProfileSecretEnvVars:
Expand Down
23 changes: 23 additions & 0 deletions tests/unit/test_parse_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,18 @@ def _new_file(self, searched, name, match):


class TestPartialParse(unittest.TestCase):
def setUp(self) -> None:
mock_project = MagicMock(RuntimeConfig)
mock_project.cli_vars = ""
mock_project.args = MagicMock()
mock_project.args.profile = "test"
mock_project.args.target = "test"
mock_project.project_env_vars = {}
mock_project.profile_env_vars = {}
mock_project.project_target_path = "mock_target_path"
mock_project.credentials = MagicMock()
self.mock_project = mock_project

@patch("dbt.parser.manifest.ManifestLoader.build_manifest_state_check")
@patch("dbt.parser.manifest.os.path.exists")
@patch("dbt.parser.manifest.open")
Expand All @@ -121,3 +133,14 @@ def test_partial_parse_file_path(self, patched_open, patched_os_exist, patched_s
ManifestLoader(mock_project, {})
# if specified in flags, we use the specified path
patched_open.assert_called_with("specified_partial_parse_path", "rb")

def test_profile_hash_change(self):
# This test validate that the profile_hash is updated when the connection keys change
profile_hash = "750bc99c1d64ca518536ead26b28465a224be5ffc918bf2a490102faa5a1bcf5"
self.mock_project.credentials.connection_info.return_value = "test"
set_from_args(Namespace(), {})
manifest = ManifestLoader(self.mock_project, {})
assert manifest.manifest.state_check.profile_hash.checksum == profile_hash
self.mock_project.credentials.connection_info.return_value = "test1"
manifest = ManifestLoader(self.mock_project, {})
assert manifest.manifest.state_check.profile_hash.checksum != profile_hash
Loading