Skip to content

Commit

Permalink
Add logic to generate a new snapshot-id (#37)
Browse files Browse the repository at this point in the history
* Add logic to generate a new snapshot-id

* Use the xor approach

* Closer to the Java way of doing it
  • Loading branch information
Fokko authored Oct 5, 2023
1 parent 76aa6ea commit 6a77195
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 0 deletions.
23 changes: 23 additions & 0 deletions pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from __future__ import annotations

import itertools
import uuid
from abc import ABC, abstractmethod
from copy import copy
from dataclasses import dataclass
Expand Down Expand Up @@ -498,6 +499,14 @@ def location(self) -> str:
"""Return the table's base location."""
return self.metadata.location

def new_snapshot_id(self) -> int:
"""Generate a new snapshot-id that's not in use."""
snapshot_id = _generate_snapshot_id()
while self.snapshot_by_id(snapshot_id) is not None:
snapshot_id = _generate_snapshot_id()

return snapshot_id

def current_snapshot(self) -> Optional[Snapshot]:
"""Get the current snapshot for this table, or None if there is no current snapshot."""
if snapshot_id := self.metadata.current_snapshot_id:
Expand Down Expand Up @@ -1566,3 +1575,17 @@ def _add_and_move_fields(
elif len(moves) > 0:
return _move_fields(fields, moves)
return None if len(adds) == 0 else tuple(*fields, *adds)


def _generate_snapshot_id() -> int:
"""Generate a new Snapshot ID from a UUID.
Returns: An 64 bit long
"""
rnd_uuid = uuid.uuid4()
snapshot_id = int.from_bytes(
bytes(lhs ^ rhs for lhs, rhs in zip(rnd_uuid.bytes[0:8], rnd_uuid.bytes[8:16])), byteorder='little', signed=True
)
snapshot_id = snapshot_id if snapshot_id >= 0 else snapshot_id * -1

return snapshot_id
6 changes: 6 additions & 0 deletions tests/table/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
StaticTable,
Table,
UpdateSchema,
_generate_snapshot_id,
_match_deletes_to_datafile,
)
from pyiceberg.table.metadata import INITIAL_SEQUENCE_NUMBER
Expand Down Expand Up @@ -506,3 +507,8 @@ def test_add_nested_list_type_column(table: Table) -> None:
element_required=False,
)
assert new_schema.highest_field_id == 7


def test_generate_snapshot_id(table: Table) -> None:
assert isinstance(_generate_snapshot_id(), int)
assert isinstance(table.new_snapshot_id(), int)

0 comments on commit 6a77195

Please sign in to comment.