diff --git a/backend/ducklake/main.py b/backend/ducklake/main.py index 0af5e7e038..5a82333521 100644 --- a/backend/ducklake/main.py +++ b/backend/ducklake/main.py @@ -49,6 +49,12 @@ class Settings(BaseModel): # Database alias in DuckDB db_alias: str = os.getenv("DUCKLAKE_DB_ALIAS", "heritage") + # Web archives database path + web_archives_path: str = os.getenv( + "DUCKLAKE_WEB_ARCHIVES_PATH", + "/var/lib/glam/ducklake/web_archives.duckdb" + ) + # Server settings host: str = os.getenv("DUCKLAKE_HOST", "0.0.0.0") port: int = int(os.getenv("DUCKLAKE_PORT", "8765")) @@ -174,6 +180,28 @@ def get_connection() -> duckdb.DuckDBPyConnection: _conn.execute(attach_sql) print(f"DuckLake attached: {settings.db_alias} -> {settings.data_path}") + # Attach web archives database (read-only) + if Path(settings.web_archives_path).exists(): + try: + _conn.execute(f""" + ATTACH '{settings.web_archives_path}' AS web_archives (READ_ONLY) + """) + # Create views in heritage schema for seamless access + _conn.execute(""" + CREATE OR REPLACE VIEW heritage.web_archives AS SELECT * FROM web_archives.web_archives + """) + _conn.execute(""" + CREATE OR REPLACE VIEW heritage.web_pages AS SELECT * FROM web_archives.web_pages + """) + _conn.execute(""" + CREATE OR REPLACE VIEW heritage.web_claims AS SELECT * FROM web_archives.web_claims + """) + print(f"Web archives attached: {settings.web_archives_path}") + except Exception as wa_err: + print(f"Warning: Could not attach web archives: {wa_err}") + else: + print(f"Web archives not found: {settings.web_archives_path}") + except Exception as e: print(f"DuckLake extension not available: {e}") print("Falling back to standard DuckDB mode") diff --git a/data/custodian/BR-MI-BHO-E-UTL-ufmg_tainacan_lab.yaml b/data/custodian/BR-MI-BHO-E-UTL-ufmg_tainacan_lab.yaml index 83b0edb7ec..0c38bcdb32 100644 --- a/data/custodian/BR-MI-BHO-E-UTL-ufmg_tainacan_lab.yaml +++ b/data/custodian/BR-MI-BHO-E-UTL-ufmg_tainacan_lab.yaml @@ -83,17 +83,19 @@ ghcid: city_name: Belo Horizonte country_code: BR geonames_id: 3470127 - latitude: -19.9191 - longitude: -43.9386 + google_maps_url: https://maps.app.goo.gl/LqXWAtMukbvr4e5AA + latitude: -19.8697 + longitude: -43.9637 method: MANUAL_RESEARCH - notes: UFMG (Federal University of Minas Gerais) is in Belo Horizonte + notes: Tainacan Lab at UFMG School of Information Science (Escola de Ciência da + Informação) region_code: MG region_name: Minas Gerais resolution_date: '2025-12-07T16:44:07.061598+00:00' record_id: 167ba1b7-a62d-42d6-92cd-d91ff4ce72a9 identifiers: - identifier_scheme: GHCID - identifier_value: BR-MI-XXX-E-UTL-ufmg_tainacan_lab + identifier_value: BR-MG-BHO-E-UTL-ufmg_tainacan_lab - identifier_scheme: GHCID_UUID identifier_value: 9dcee694-81b2-5309-a27a-628488d0205e - identifier_scheme: GHCID_UUID_SHA256 @@ -108,6 +110,9 @@ identifiers: - &id002 identifier_scheme: OLD_ID identifier_value: 12840343882751256357 +- identifier_scheme: Website + identifier_url: https://tainacan.eci.ufmg.br/ + identifier_value: https://tainacan.eci.ufmg.br/ original_entry: identifiers: - identifier_scheme: GHCID diff --git a/data/custodian/BR-MI-BHO-E-UTL.yaml b/data/custodian/BR-MI-BHO-E-UTL.yaml index db97106153..c6161805f7 100644 --- a/data/custodian/BR-MI-BHO-E-UTL.yaml +++ b/data/custodian/BR-MI-BHO-E-UTL.yaml @@ -83,17 +83,19 @@ ghcid: city_name: Belo Horizonte country_code: BR geonames_id: 3470127 - latitude: -19.9191 - longitude: -43.9386 + google_maps_url: https://maps.app.goo.gl/LqXWAtMukbvr4e5AA + latitude: -19.8697 + longitude: -43.9637 method: MANUAL_RESEARCH - notes: UFMG (Federal University of Minas Gerais) is in Belo Horizonte + notes: Tainacan Lab at UFMG School of Information Science (Escola de Ciência da + Informação) region_code: MG region_name: Minas Gerais resolution_date: '2025-12-07T16:44:07.052938+00:00' record_id: 3c8e1c49-716c-40ea-a283-a208686138b7 identifiers: - identifier_scheme: GHCID - identifier_value: BR-MI-XXX-E-UTL + identifier_value: BR-MG-BHO-E-UTL - identifier_scheme: GHCID_UUID identifier_value: 562718ae-1d5c-57d7-9829-db40b4242ad1 - identifier_scheme: GHCID_UUID_SHA256 @@ -108,6 +110,9 @@ identifiers: - &id002 identifier_scheme: OLD_ID identifier_value: 12840343882751256357 +- identifier_scheme: Website + identifier_url: https://tainacan.eci.ufmg.br/ + identifier_value: https://tainacan.eci.ufmg.br/ original_entry: identifiers: - identifier_scheme: GHCID diff --git a/frontend/public/schemas/20251121/linkml/manifest.json b/frontend/public/schemas/20251121/linkml/manifest.json index 13808ec44e..317083d99e 100644 --- a/frontend/public/schemas/20251121/linkml/manifest.json +++ b/frontend/public/schemas/20251121/linkml/manifest.json @@ -1,5 +1,5 @@ { - "generated": "2025-12-07T13:27:28.747Z", + "generated": "2025-12-07T16:47:16.823Z", "version": "1.0.0", "categories": [ { diff --git a/frontend/src/components/database/DuckLakePanel.tsx b/frontend/src/components/database/DuckLakePanel.tsx index c4c46b9f06..38bd706879 100644 --- a/frontend/src/components/database/DuckLakePanel.tsx +++ b/frontend/src/components/database/DuckLakePanel.tsx @@ -1252,7 +1252,18 @@ export function DuckLakePanel({ compact = false }: DuckLakePanelProps) {