""" Test UUID v7 generation for database primary keys. UUID v7 is time-ordered and random (NOT deterministic from GHCID). Use for database performance, not for persistent identifiers. """ import uuid import pytest import time from glam_extractor.identifiers.ghcid import GHCIDComponents class TestUUIDv7Generation: """Test UUID v7 generation for database primary keys.""" def test_uuid_v7_format(self): """UUID v7 should have correct version and variant.""" uuid_v7 = GHCIDComponents.generate_uuid_v7() assert isinstance(uuid_v7, uuid.UUID) assert uuid_v7.version == 7 assert uuid_v7.variant == uuid.RFC_4122 def test_uuid_v7_not_deterministic(self): """UUID v7 should be unique on each call (not deterministic).""" uuid1 = GHCIDComponents.generate_uuid_v7() uuid2 = GHCIDComponents.generate_uuid_v7() assert uuid1 != uuid2 assert str(uuid1) != str(uuid2) def test_uuid_v7_time_ordered(self): """UUID v7 should be time-ordered (k-sortable).""" uuid1 = GHCIDComponents.generate_uuid_v7() time.sleep(0.002) # 2ms delay uuid2 = GHCIDComponents.generate_uuid_v7() # UUIDs should sort by creation time assert uuid1 < uuid2 assert str(uuid1) < str(uuid2) def test_uuid_v7_format_rfc_9562(self): """UUID v7 should conform to RFC 9562 format.""" result_uuid = GHCIDComponents.generate_uuid_v7() # UUID v7 format: xxxxxxxx-xxxx-7xxx-yxxx-xxxxxxxxxxxx uuid_str = str(result_uuid) parts = uuid_str.split('-') assert len(parts) == 5 assert len(parts[0]) == 8 # 8 hex chars assert len(parts[1]) == 4 # 4 hex chars assert len(parts[2]) == 4 # 4 hex chars (version) assert len(parts[3]) == 4 # 4 hex chars (variant) assert len(parts[4]) == 12 # 12 hex chars assert parts[2][0] == '7' # Version 7 def test_uuid_v7_collision_resistance(self): """Generate many UUIDs to test collision resistance.""" uuids = set() count = 1000 for _ in range(count): uuids.add(GHCIDComponents.generate_uuid_v7()) # All should be unique assert len(uuids) == count def test_uuid_v7_timestamp_extraction(self): """UUID v7 should contain valid timestamp.""" before = int(time.time() * 1000) uuid_v7 = GHCIDComponents.generate_uuid_v7() after = int(time.time() * 1000) # Extract timestamp from UUID (first 48 bits = 6 bytes) uuid_bytes = uuid_v7.bytes timestamp_ms = int.from_bytes(uuid_bytes[:6], byteorder='big') # Timestamp should be between before and after assert before <= timestamp_ms <= after def test_uuid_v7_batch_generation(self): """Generate batch of UUIDs in same millisecond.""" uuids = [] # Generate 100 UUIDs as fast as possible for _ in range(100): uuids.append(GHCIDComponents.generate_uuid_v7()) # All should be unique despite being in same millisecond assert len(set(uuids)) == len(uuids) # All should be sortable sorted_uuids = sorted(uuids) assert sorted_uuids[0] <= sorted_uuids[-1] class TestUUIDv7UseCases: """Test UUID v7 use cases and integration scenarios.""" def test_database_primary_key_scenario(self): """UUID v7 should work as database primary key.""" # Simulate creating multiple records records = [] for i in range(10): record_id = GHCIDComponents.generate_uuid_v7() records.append({ 'id': record_id, 'name': f'Institution {i}', 'created_at': time.time() }) # Small delay to ensure different timestamps time.sleep(0.001) # All IDs should be unique ids = [r['id'] for r in records] assert len(ids) == len(set(ids)) # IDs should be naturally ordered by creation time assert ids == sorted(ids) def test_uuid_v7_vs_uuid_v5_difference(self): """UUID v7 is random, UUID v5 is deterministic.""" components = GHCIDComponents("US", "CA", "SAN", "A", "IA") # UUID v5 is deterministic uuid_v5_1 = components.to_uuid() uuid_v5_2 = components.to_uuid() assert uuid_v5_1 == uuid_v5_2 # Same every time # UUID v7 is random uuid_v7_1 = GHCIDComponents.generate_uuid_v7() uuid_v7_2 = GHCIDComponents.generate_uuid_v7() assert uuid_v7_1 != uuid_v7_2 # Different every time def test_four_identifier_strategy(self): """Demonstrate the four-identifier strategy.""" components = GHCIDComponents("NL", "NH", "AMS", "M", "RM") # 1. UUID v7 - Database primary key (random, time-ordered) record_id = GHCIDComponents.generate_uuid_v7() assert record_id.version == 7 # 2. UUID v5 - Public PID (deterministic, interoperable) pid_uuid = components.to_uuid() assert pid_uuid.version == 5 # 3. UUID v8 - SOTA PID (deterministic, SHA-256) pid_sha256 = components.to_uuid_sha256() assert pid_sha256.version == 8 # 4. GHCID string - Human-readable ghcid_str = components.to_string() assert ghcid_str == "NL-NH-AMS-M-RM" # All four are different assert record_id != pid_uuid assert record_id != pid_sha256 assert pid_uuid != pid_sha256 # UUID v5 and v8 are deterministic assert components.to_uuid() == pid_uuid assert components.to_uuid_sha256() == pid_sha256 # UUID v7 is random assert GHCIDComponents.generate_uuid_v7() != record_id if __name__ == "__main__": pytest.main([__file__, "-v"])