fix(scripts): force table recreation in web archives migration

Drop existing tables before creating to ensure schema updates are applied
properly instead of using IF NOT EXISTS which would skip schema changes.
This commit is contained in:
kempersc 2025-12-07 18:47:46 +01:00
parent 0c4c378e06
commit 400b1c04c1

View file

@ -168,9 +168,14 @@ def build_ducklake_database(mapping: Dict[int, str]):
con = duckdb.connect(str(DUCKLAKE_DB))
# Drop and recreate tables to ensure schema is up to date
con.execute("DROP TABLE IF EXISTS web_claims")
con.execute("DROP TABLE IF EXISTS web_pages")
con.execute("DROP TABLE IF EXISTS web_archives")
# Create tables
con.execute("""
CREATE TABLE IF NOT EXISTS web_archives (
CREATE TABLE web_archives (
ghcid VARCHAR PRIMARY KEY,
entry_index INTEGER,
domain VARCHAR,
@ -186,7 +191,7 @@ def build_ducklake_database(mapping: Dict[int, str]):
""")
con.execute("""
CREATE TABLE IF NOT EXISTS web_pages (
CREATE TABLE web_pages (
id INTEGER PRIMARY KEY,
ghcid VARCHAR,
page_title VARCHAR,
@ -198,7 +203,7 @@ def build_ducklake_database(mapping: Dict[int, str]):
""")
con.execute("""
CREATE TABLE IF NOT EXISTS web_claims (
CREATE TABLE web_claims (
id INTEGER PRIMARY KEY,
ghcid VARCHAR,
claim_id VARCHAR,
@ -217,9 +222,9 @@ def build_ducklake_database(mapping: Dict[int, str]):
""")
# Clear existing data
con.execute("DELETE FROM web_claims")
con.execute("DELETE FROM web_pages")
con.execute("DELETE FROM web_archives")
con.execute("-- Removed: DELETE FROM web_claims")
con.execute("-- Removed: DELETE FROM web_pages")
con.execute("-- Removed: DELETE FROM web_archives")
page_id = 0
claim_id_counter = 0