Skip to content

Commit

Permalink
STAR-843: Update dtests for ULID based generation ID
Browse files Browse the repository at this point in the history
(cherry picked from commit fd2ebac)
  • Loading branch information
jacek-lewandowski committed Mar 9, 2022
1 parent 0465be4 commit b85b0f0
Showing 1 changed file with 25 additions and 30 deletions.
55 changes: 25 additions & 30 deletions scrub_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,23 +171,25 @@ def standalonescrub(self, table, *indexes, acceptable_errors=None):
self.launch_standalone_scrub(KEYSPACE, '{}.{}'.format(table, index))
return self.get_sstables(table, indexes)

def increment_generation_by(self, sstable, generation_increment):
def get_latest_generation(self, sstables):
"""
Set the generation number for an sstable file name
Get the latest generation ID of the provided sstables
"""
return re.sub('(\d(?!\d))\-', lambda x: str(int(x.group(1)) + generation_increment) + '-', sstable)
latest_gen = None
for table_or_index, table_sstables in list(sstables.items()):
gen = max(parse.search('{}-{generation}-{}.{}', s).named['generation'] for s in table_sstables)
latest_gen = gen if latest_gen is None else max([gen, latest_gen])
return latest_gen

def increase_sstable_generations(self, sstables):
def get_earliest_generation(self, sstables):
"""
After finding the number of existing sstables, increase all of the
generations by that amount.
Get the earliest generation ID of the provided sstables
"""
earliest_gen = None
for table_or_index, table_sstables in list(sstables.items()):
increment_by = len(set(parse.search('{}-{increment_by}-{suffix}.{file_extention}', s).named['increment_by'] for s in table_sstables))
sstables[table_or_index] = [self.increment_generation_by(s, increment_by) for s in table_sstables]

logger.debug('sstables after increment {}'.format(str(sstables)))

gen = min(parse.search('{}-{generation}-{}.{}', s).named['generation'] for s in table_sstables)
earliest_gen = gen if earliest_gen is None else min([gen, earliest_gen])
return earliest_gen

@since('2.2')
class TestScrubIndexes(TestHelper):
Expand Down Expand Up @@ -240,16 +242,15 @@ def test_scrub_static_table(self):
initial_sstables = self.flush('users', 'gender_idx', 'state_idx', 'birth_year_idx')
scrubbed_sstables = self.scrub('users', 'gender_idx', 'state_idx', 'birth_year_idx')

self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

users = self.query_users(session)
assert initial_users == users

# Scrub and check sstables and data again
initial_sstables = scrubbed_sstables
scrubbed_sstables = self.scrub('users', 'gender_idx', 'state_idx', 'birth_year_idx')
self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

users = self.query_users(session)
assert initial_users == users
Expand Down Expand Up @@ -281,8 +282,7 @@ def test_standalone_scrub(self):
cluster.stop()

scrubbed_sstables = self.standalonescrub('users', 'gender_idx', 'state_idx', 'birth_year_idx')
self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

cluster.start()
session = self.patient_cql_connection(node1)
Expand Down Expand Up @@ -315,16 +315,14 @@ def test_scrub_collections_table(self):
initial_sstables = self.flush('users', 'user_uuids_idx')
scrubbed_sstables = self.scrub('users', 'user_uuids_idx')

self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

users = list(session.execute(("SELECT * from users where uuids contains {some_uuid}").format(some_uuid=_id)))
assert initial_users == users

initial_sstables = scrubbed_sstables
scrubbed_sstables = self.scrub('users', 'user_uuids_idx')

self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

users = list(session.execute(("SELECT * from users where uuids contains {some_uuid}").format(some_uuid=_id)))

Expand Down Expand Up @@ -377,16 +375,15 @@ def test_nodetool_scrub(self):
initial_sstables = self.flush('users')
scrubbed_sstables = self.scrub('users')

self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

users = self.query_users(session)
assert initial_users == users

# Scrub and check sstables and data again
initial_sstables = scrubbed_sstables
scrubbed_sstables = self.scrub('users')
self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

users = self.query_users(session)
assert initial_users == users
Expand Down Expand Up @@ -418,8 +415,7 @@ def test_standalone_scrub(self):
cluster.stop()

scrubbed_sstables = self.standalonescrub('users')
self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

cluster.start()
session = self.patient_cql_connection(node1)
Expand Down Expand Up @@ -447,8 +443,7 @@ def test_standalone_scrub_essential_files_only(self):
self.delete_non_essential_sstable_files('users')

scrubbed_sstables = self.standalonescrub(table='users', acceptable_errors=["WARN.*Could not recreate or deserialize existing bloom filter, continuing with a pass-through bloom filter but this will significantly impact reads performance"])
self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

cluster.start()
session = self.patient_cql_connection(node1)
Expand Down

0 comments on commit b85b0f0

Please sign in to comment.