clean up GHCID
This commit is contained in:
parent
99430c2a70
commit
23b1d8ee5f
75 changed files with 13132 additions and 7905 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -26,6 +26,7 @@ MANIFEST
|
|||
docs/invoice
|
||||
data/custodian/web/bu
|
||||
data/custodian/weboj
|
||||
data/custodian/person/affiliated/manual/
|
||||
|
||||
# Virtual environments
|
||||
venv/
|
||||
|
|
|
|||
|
|
@ -86,11 +86,16 @@ select_visualization: Any = None
|
|||
VisualizationSelector: Any = None # type: ignore[no-redef]
|
||||
generate_sparql: Any = None
|
||||
configure_dspy: Any = None
|
||||
get_province_code: Any = None # Province name to ISO 3166-2 code converter
|
||||
|
||||
try:
|
||||
import sys
|
||||
sys.path.insert(0, str(os.path.join(os.path.dirname(__file__), "..", "..", "src")))
|
||||
from glam_extractor.api.hybrid_retriever import HybridRetriever as _HybridRetriever, create_hybrid_retriever as _create_hybrid_retriever
|
||||
from glam_extractor.api.hybrid_retriever import (
|
||||
HybridRetriever as _HybridRetriever,
|
||||
create_hybrid_retriever as _create_hybrid_retriever,
|
||||
get_province_code as _get_province_code,
|
||||
)
|
||||
from glam_extractor.api.qdrant_retriever import HeritageCustodianRetriever as _HeritageCustodianRetriever
|
||||
from glam_extractor.api.typedb_retriever import TypeDBRetriever as _TypeDBRetriever, create_typedb_retriever as _create_typedb_retriever
|
||||
from glam_extractor.api.visualization import select_visualization as _select_visualization, VisualizationSelector as _VisualizationSelector
|
||||
|
|
@ -100,9 +105,14 @@ try:
|
|||
create_typedb_retriever = _create_typedb_retriever
|
||||
select_visualization = _select_visualization
|
||||
VisualizationSelector = _VisualizationSelector
|
||||
get_province_code = _get_province_code
|
||||
RETRIEVERS_AVAILABLE = True
|
||||
except ImportError as e:
|
||||
logger.warning(f"Core retrievers not available: {e}")
|
||||
# Provide a fallback get_province_code that returns None
|
||||
def get_province_code(province_name: str | None) -> str | None:
|
||||
"""Fallback when hybrid_retriever is not available."""
|
||||
return None
|
||||
|
||||
# DSPy is optional - don't block retrievers if it's missing
|
||||
try:
|
||||
|
|
@ -112,6 +122,17 @@ try:
|
|||
except ImportError as e:
|
||||
logger.warning(f"DSPy SPARQL not available: {e}")
|
||||
|
||||
# Atomic query decomposition for geographic/type filtering
|
||||
decompose_query: Any = None
|
||||
DECOMPOSER_AVAILABLE = False
|
||||
try:
|
||||
from atomic_decomposer import decompose_query as _decompose_query
|
||||
decompose_query = _decompose_query
|
||||
DECOMPOSER_AVAILABLE = True
|
||||
logger.info("Query decomposer loaded successfully")
|
||||
except ImportError as e:
|
||||
logger.info(f"Query decomposer not available: {e}")
|
||||
|
||||
# Cost tracker is optional - gracefully degrades if unavailable
|
||||
COST_TRACKER_AVAILABLE = False
|
||||
get_tracker = None
|
||||
|
|
@ -126,6 +147,98 @@ except ImportError as e:
|
|||
logger.info(f"Cost tracker not available (optional): {e}")
|
||||
|
||||
|
||||
# Province detection for geographic filtering
|
||||
DUTCH_PROVINCES = {
|
||||
"noord-holland", "noordholland", "north holland", "north-holland",
|
||||
"zuid-holland", "zuidholland", "south holland", "south-holland",
|
||||
"utrecht", "gelderland", "noord-brabant", "noordbrabant", "brabant",
|
||||
"north brabant", "limburg", "overijssel", "friesland", "fryslân",
|
||||
"fryslan", "groningen", "drenthe", "flevoland", "zeeland",
|
||||
}
|
||||
|
||||
|
||||
def infer_location_level(location: str) -> str:
|
||||
"""Infer whether location is city, province, or region.
|
||||
|
||||
Returns:
|
||||
'province' if location is a Dutch province
|
||||
'region' if location is a sub-provincial region
|
||||
'city' otherwise
|
||||
"""
|
||||
location_lower = location.lower().strip()
|
||||
|
||||
if location_lower in DUTCH_PROVINCES:
|
||||
return "province"
|
||||
|
||||
# Sub-provincial regions
|
||||
regions = {"randstad", "veluwe", "achterhoek", "twente", "de betuwe", "betuwe"}
|
||||
if location_lower in regions:
|
||||
return "region"
|
||||
|
||||
return "city"
|
||||
|
||||
|
||||
def extract_geographic_filters(question: str) -> dict[str, list[str] | None]:
|
||||
"""Extract geographic filters from a question using query decomposition.
|
||||
|
||||
Returns:
|
||||
dict with keys: region_codes, cities, institution_types
|
||||
"""
|
||||
filters: dict[str, list[str] | None] = {
|
||||
"region_codes": None,
|
||||
"cities": None,
|
||||
"institution_types": None,
|
||||
}
|
||||
|
||||
if not DECOMPOSER_AVAILABLE or not decompose_query:
|
||||
return filters
|
||||
|
||||
try:
|
||||
decomposed = decompose_query(question)
|
||||
|
||||
# Extract location and determine if it's a province or city
|
||||
if decomposed.location:
|
||||
location = decomposed.location
|
||||
level = infer_location_level(location)
|
||||
|
||||
if level == "province":
|
||||
# Convert province name to ISO 3166-2 code for Qdrant filtering
|
||||
# e.g., "Noord-Holland" → "NH"
|
||||
province_code = get_province_code(location)
|
||||
if province_code:
|
||||
filters["region_codes"] = [province_code]
|
||||
logger.info(f"Province filter: {location} → {province_code}")
|
||||
elif level == "city":
|
||||
filters["cities"] = [location]
|
||||
logger.info(f"City filter: {location}")
|
||||
|
||||
# Extract institution type
|
||||
if decomposed.institution_type:
|
||||
# Map common types to enum values
|
||||
type_mapping = {
|
||||
"archive": "ARCHIVE",
|
||||
"archief": "ARCHIVE",
|
||||
"archieven": "ARCHIVE",
|
||||
"museum": "MUSEUM",
|
||||
"musea": "MUSEUM",
|
||||
"museums": "MUSEUM",
|
||||
"library": "LIBRARY",
|
||||
"bibliotheek": "LIBRARY",
|
||||
"bibliotheken": "LIBRARY",
|
||||
"gallery": "GALLERY",
|
||||
"galerie": "GALLERY",
|
||||
}
|
||||
inst_type = decomposed.institution_type.lower()
|
||||
mapped_type = type_mapping.get(inst_type, inst_type.upper())
|
||||
filters["institution_types"] = [mapped_type]
|
||||
logger.info(f"Institution type filter: {mapped_type}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to extract geographic filters: {e}")
|
||||
|
||||
return filters
|
||||
|
||||
|
||||
# Configuration
|
||||
class Settings:
|
||||
"""Application settings from environment variables."""
|
||||
|
|
@ -1235,12 +1348,20 @@ async def query_rag(request: QueryRequest) -> QueryResponse:
|
|||
intent, sources = retriever.router.get_sources(request.question, request.sources)
|
||||
logger.info(f"Query intent: {intent}, sources: {sources}")
|
||||
|
||||
# Extract geographic filters from question (province, city, institution type)
|
||||
geo_filters = extract_geographic_filters(request.question)
|
||||
if any(geo_filters.values()):
|
||||
logger.info(f"Geographic filters extracted: {geo_filters}")
|
||||
|
||||
# Retrieve from all sources
|
||||
results = await retriever.retrieve(
|
||||
request.question,
|
||||
sources,
|
||||
request.k,
|
||||
embedding_model=request.embedding_model,
|
||||
region_codes=geo_filters["region_codes"],
|
||||
cities=geo_filters["cities"],
|
||||
institution_types=geo_filters["institution_types"],
|
||||
)
|
||||
|
||||
# Merge results
|
||||
|
|
@ -1664,10 +1785,14 @@ async def stream_query_response(
|
|||
# Route query
|
||||
intent, sources = retriever.router.get_sources(request.question, request.sources)
|
||||
|
||||
# Extract geographic filters from question (province, city, institution type)
|
||||
geo_filters = extract_geographic_filters(request.question)
|
||||
|
||||
yield json.dumps({
|
||||
"type": "status",
|
||||
"message": f"Routing query to {len(sources)} sources...",
|
||||
"intent": intent.value,
|
||||
"geo_filters": {k: v for k, v in geo_filters.items() if v},
|
||||
}) + "\n"
|
||||
|
||||
# Retrieve from sources and stream progress
|
||||
|
|
@ -1683,6 +1808,9 @@ async def stream_query_response(
|
|||
[source],
|
||||
request.k,
|
||||
embedding_model=request.embedding_model,
|
||||
region_codes=geo_filters["region_codes"],
|
||||
cities=geo_filters["cities"],
|
||||
institution_types=geo_filters["institution_types"],
|
||||
)
|
||||
results.extend(source_results)
|
||||
|
||||
|
|
|
|||
|
|
@ -21,28 +21,39 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/v-z-w-archief-en-documentatiecentrum-erfgoed-binnenvaart.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
country: NL
|
||||
city: Oudenburg
|
||||
region: West-Vlaanderen
|
||||
country: BE
|
||||
address: Vaartdijk zuid 11, 8460 Oudenburg (aboard Museumschip Tordino)
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-A-VZWADEB
|
||||
ghcid_original: NL-XX-XXX-A-VZWADEB
|
||||
ghcid_uuid: 1f4e98ec-143f-5448-90ef-a295fd4a1a6e
|
||||
ghcid_uuid_sha256: f6cabc91-9be6-875f-ab66-ffc5be964330
|
||||
ghcid_numeric: 17783233412197709663
|
||||
ghcid_current: BE-VWV-OUD-A-VZWADEB
|
||||
ghcid_original: BE-VWV-OUD-A-VZWADEB
|
||||
ghcid_uuid: 2975de6a-8d00-51ac-8ef7-238dc217515a
|
||||
ghcid_uuid_sha256: 865f764c-c704-803b-8338-b16c56fcdc45
|
||||
ghcid_numeric: 9682587795998437435
|
||||
record_id: fdcd0fb5-b8cf-453d-9a7c-1d0bc87be5d0
|
||||
generation_timestamp: '2025-12-16T21:06:45.654173+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-A-VZWADEB
|
||||
ghcid_numeric: 17783233412197709663
|
||||
valid_from: '2025-12-16T21:06:45.654173+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:41:23.616579+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: BE-VWV-OUD-A-VZWADEB
|
||||
ghcid_numeric: 9682587795998437435
|
||||
valid_from: '2025-12-17T09:41:23.616579+00:00'
|
||||
valid_to: null
|
||||
reason: 'Country code corrected: NL -> BE (Belgium). Location: Oudenburg, West-Vlaanderen'
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
country_code: NL
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: OUD
|
||||
city_name: Oudenburg
|
||||
region_code: VWV
|
||||
region_name: West-Vlaanderen
|
||||
country_code: BE
|
||||
resolution_date: '2025-12-17T09:41:23.616579+00:00'
|
||||
source_url: http://binnenvaarterfgoed.be/
|
||||
notes: Belgian v.z.w. (vzw = Belgian non-profit), located aboard museum ship
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:45.654173+00:00'
|
||||
|
|
@ -59,9 +70,21 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_2_VERIFIED
|
||||
source_url: http://binnenvaarterfgoed.be/
|
||||
extraction_timestamp: '2025-12-17T09:41:23.616579+00:00'
|
||||
claims_extracted:
|
||||
- country
|
||||
- region
|
||||
- city
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Country code corrected on 2025-12-17: NL was incorrect, institution is in Belgium
|
||||
(BE)'
|
||||
|
|
@ -18,28 +18,41 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/museum-of-batik-pekalongan.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
country: NL
|
||||
city: Pekalongan
|
||||
region: Jawa Tengah
|
||||
country: ID
|
||||
address: Jl. Jetayu No.1, Pekalongan 51152
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-MBP
|
||||
ghcid_original: NL-XX-XXX-M-MBP
|
||||
ghcid_uuid: eb74d910-63b2-5e5e-9db8-d91073782abf
|
||||
ghcid_uuid_sha256: 6e7ba504-8c91-8169-aeb8-137a6a295254
|
||||
ghcid_numeric: 7961138205264294249
|
||||
ghcid_current: ID-JT-PEK-M-MBP
|
||||
ghcid_original: ID-JT-PEK-M-MBP
|
||||
ghcid_uuid: c3b6fa1c-543c-509b-8200-9c3e55ea5917
|
||||
ghcid_uuid_sha256: fe292b5f-2a03-82a6-8ea7-13192da4c6f8
|
||||
ghcid_numeric: 18314217047405564582
|
||||
record_id: 3e933428-e095-4b85-aeb4-ed7eaa57b11c
|
||||
generation_timestamp: '2025-12-16T21:06:37.585649+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-MBP
|
||||
ghcid_numeric: 7961138205264294249
|
||||
valid_from: '2025-12-16T21:06:37.585649+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:53:29.196550+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: ID-JT-PEK-M-MBP
|
||||
ghcid_numeric: 18314217047405564582
|
||||
valid_from: '2025-12-17T09:53:29.196550+00:00'
|
||||
valid_to: null
|
||||
reason: 'Country code corrected: NL -> ID (Indonesia). Location: Pekalongan, Jawa
|
||||
Tengah'
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
country_code: NL
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: PEK
|
||||
city_name: Pekalongan
|
||||
region_code: JT
|
||||
region_name: Jawa Tengah
|
||||
country_code: ID
|
||||
resolution_date: '2025-12-17T09:53:29.196550+00:00'
|
||||
source_url: https://id.wikipedia.org/wiki/Museum_Batik_Pekalongan
|
||||
notes: UNESCO recognized museum for batik conservation, opened 12 July 2006 by
|
||||
President SBY
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:37.585649+00:00'
|
||||
|
|
@ -56,9 +69,21 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_2_VERIFIED
|
||||
source_url: https://id.wikipedia.org/wiki/Museum_Batik_Pekalongan
|
||||
extraction_timestamp: '2025-12-17T09:53:29.196550+00:00'
|
||||
claims_extracted:
|
||||
- country
|
||||
- region
|
||||
- city
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Country code corrected on 2025-12-17: NL was incorrect, institution is in Indonesia
|
||||
(ID)'
|
||||
|
|
@ -2,7 +2,7 @@ custodian_name:
|
|||
emic_name: Diorama Arsip Jogja
|
||||
emic_name_source: linkedin
|
||||
institution_type:
|
||||
- M
|
||||
- A
|
||||
linkedin_enrichment:
|
||||
linkedin_url: https://www.linkedin.com/company/diorama-arsip-jogja
|
||||
linkedin_slug: diorama-arsip-jogja
|
||||
|
|
@ -24,28 +24,41 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/diorama-arsip-jogja.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: Technology
|
||||
region: XX
|
||||
country: NL
|
||||
city: Bantul
|
||||
region: Daerah Istimewa Yogyakarta
|
||||
country: ID
|
||||
address: LT 1 Gedung DEPO ARSIP, Jl. Janti, Banguntapan, Kabupaten Bantul, Yogyakarta
|
||||
55198
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-DAJ
|
||||
ghcid_original: NL-XX-XXX-M-DAJ
|
||||
ghcid_uuid: 70fabdbb-cfa2-579c-9ed6-715ed3a9961b
|
||||
ghcid_uuid_sha256: a495caff-0e4e-8b6a-8166-888465abc0cd
|
||||
ghcid_numeric: 11859608390555585386
|
||||
ghcid_current: ID-YO-BAN-A-DAJ
|
||||
ghcid_original: ID-YO-BAN-A-DAJ
|
||||
ghcid_uuid: 059d21ea-5974-5a1e-8525-ea372adb2f57
|
||||
ghcid_uuid_sha256: 2dcb25b6-bf85-86e0-820b-be86083fea2d
|
||||
ghcid_numeric: 3299772618806470368
|
||||
record_id: 7e4ea863-e058-47a7-ab46-85aa9b50ec7c
|
||||
generation_timestamp: '2025-12-16T21:06:39.082344+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-DAJ
|
||||
ghcid_numeric: 11859608390555585386
|
||||
valid_from: '2025-12-16T21:06:39.082344+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:41:23.625814+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: ID-YO-BAN-A-DAJ
|
||||
ghcid_numeric: 3299772618806470368
|
||||
valid_from: '2025-12-17T09:41:23.625814+00:00'
|
||||
valid_to: null
|
||||
reason: 'Country code corrected: NL -> ID (Indonesia). Location: Bantul, Daerah
|
||||
Istimewa Yogyakarta'
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
country_code: NL
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: BAN
|
||||
city_name: Bantul
|
||||
region_code: YO
|
||||
region_name: Daerah Istimewa Yogyakarta
|
||||
country_code: ID
|
||||
resolution_date: '2025-12-17T09:41:23.625814+00:00'
|
||||
source_url: https://dioramaarsip.jogjaprov.go.id/home
|
||||
notes: Digital archive diorama of Yogyakarta history, opened February 2022
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:39.082344+00:00'
|
||||
|
|
@ -62,9 +75,21 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_2_VERIFIED
|
||||
source_url: https://dioramaarsip.jogjaprov.go.id/home
|
||||
extraction_timestamp: '2025-12-17T09:41:23.625814+00:00'
|
||||
claims_extracted:
|
||||
- country
|
||||
- region
|
||||
- city
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Country code corrected on 2025-12-17: NL was incorrect, institution is in Indonesia
|
||||
(ID)'
|
||||
|
|
@ -86,23 +86,41 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/museum-benteng-vredeburg.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
country: NL
|
||||
city: Yogyakarta
|
||||
region: Daerah Istimewa Yogyakarta
|
||||
country: ID
|
||||
address: Jl. Margo Mulyo No.6, Ngupasan, Kec. Gondomanan, Kota Yogyakarta 55122
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-MBV
|
||||
ghcid_original: NL-XX-XXX-M-MBV
|
||||
ghcid_uuid: f52e92b3-a191-56c0-95ca-c93ebfd1fa81
|
||||
ghcid_uuid_sha256: 669bd4a6-699d-87ae-9590-77f5dd087af7
|
||||
ghcid_numeric: 7393737024460408750
|
||||
ghcid_current: ID-YO-YOG-M-MBV
|
||||
ghcid_original: ID-YO-YOG-M-MBV
|
||||
ghcid_uuid: 5d5e4910-7cd2-5ef8-a51c-c7dc54a055f0
|
||||
ghcid_uuid_sha256: aedc6dac-9c94-8841-abd8-6a7416a4b795
|
||||
ghcid_numeric: 12600066445604583489
|
||||
record_id: 1eed48b4-a9a7-436e-a4ac-edfef3de4aee
|
||||
generation_timestamp: '2025-12-16T21:06:42.973186+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-MBV
|
||||
ghcid_numeric: 7393737024460408750
|
||||
valid_from: '2025-12-17T08:44:26.023035+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:53:29.174813+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: ID-YO-YOG-M-MBV
|
||||
ghcid_numeric: 12600066445604583489
|
||||
valid_from: '2025-12-17T09:53:29.174813+00:00'
|
||||
valid_to: null
|
||||
reason: 'Country code corrected: NL -> ID (Indonesia). Location: Yogyakarta, Daerah
|
||||
Istimewa Yogyakarta'
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: YOG
|
||||
city_name: Yogyakarta
|
||||
region_code: YO
|
||||
region_name: Daerah Istimewa Yogyakarta
|
||||
country_code: ID
|
||||
resolution_date: '2025-12-17T09:53:29.174813+00:00'
|
||||
source_url: https://forevervacation.com/yogyakarta/museum-benteng-vredeburg
|
||||
notes: Dutch colonial fortress converted to museum in 1992, documents Indonesian
|
||||
independence struggle
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:42.973186+00:00'
|
||||
|
|
@ -119,6 +137,16 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_2_VERIFIED
|
||||
source_url: https://forevervacation.com/yogyakarta/museum-benteng-vredeburg
|
||||
extraction_timestamp: '2025-12-17T09:53:29.174813+00:00'
|
||||
claims_extracted:
|
||||
- country
|
||||
- region
|
||||
- city
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -127,3 +155,5 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Country code corrected on 2025-12-17: NL was incorrect, institution is in Indonesia
|
||||
(ID)'
|
||||
|
|
@ -18,28 +18,39 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/historical-archives-of-the-european-union.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: Director
|
||||
region: XX
|
||||
country: NL
|
||||
city: Firenze
|
||||
region: Tuscany
|
||||
country: IT
|
||||
address: Via Bolognese 156, 50139 Firenze, Villa Salviati
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-A-HAEU
|
||||
ghcid_original: NL-XX-XXX-A-HAEU
|
||||
ghcid_uuid: 8c50158d-ca20-52fa-a586-123d856d30ce
|
||||
ghcid_uuid_sha256: a5122731-05a4-8550-91dc-709ae9537003
|
||||
ghcid_numeric: 11894612657340421456
|
||||
ghcid_current: IT-52-FIR-A-HAEU
|
||||
ghcid_original: IT-52-FIR-A-HAEU
|
||||
ghcid_uuid: f61c2f7c-d9e1-5ffe-b5d8-a79fddadc795
|
||||
ghcid_uuid_sha256: b546a4f3-4270-80ca-a53a-00d1bf0d4469
|
||||
ghcid_numeric: 13062309133933396170
|
||||
record_id: 63749121-4b05-471e-b075-ec53cbbf0917
|
||||
generation_timestamp: '2025-12-16T21:06:45.012969+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-A-HAEU
|
||||
ghcid_numeric: 11894612657340421456
|
||||
valid_from: '2025-12-16T21:06:45.012969+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:41:23.611933+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: IT-52-FIR-A-HAEU
|
||||
ghcid_numeric: 13062309133933396170
|
||||
valid_from: '2025-12-17T09:41:23.611933+00:00'
|
||||
valid_to: null
|
||||
reason: 'Country code corrected: NL -> IT (Italy). Location: Firenze, Tuscany'
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
country_code: NL
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: FIR
|
||||
city_name: Firenze
|
||||
region_code: '52'
|
||||
region_name: Tuscany
|
||||
country_code: IT
|
||||
resolution_date: '2025-12-17T09:41:23.611933+00:00'
|
||||
source_url: https://archives.eui.eu/en/repositories/1
|
||||
notes: Part of European University Institute, Florence
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:45.012969+00:00'
|
||||
|
|
@ -56,9 +67,21 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_2_VERIFIED
|
||||
source_url: https://archives.eui.eu/en/repositories/1
|
||||
extraction_timestamp: '2025-12-17T09:41:23.611933+00:00'
|
||||
claims_extracted:
|
||||
- country
|
||||
- region
|
||||
- city
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Country code corrected on 2025-12-17: NL was incorrect, institution is in Italy
|
||||
(IT)'
|
||||
|
|
@ -21,23 +21,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/museum-janning.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Nieuw Schoonebeek
|
||||
region: Drenthe
|
||||
country: NL
|
||||
address: Europaweg 143a, 7766 AE Nieuw Schoonebeek
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-MJ-museum_janning
|
||||
ghcid_current: NL-DR-NIS-M-MJ-museum_janning
|
||||
ghcid_original: NL-XX-XXX-M-MJ-museum_janning
|
||||
ghcid_uuid: 2da042ef-f83f-5b4c-9cf6-e95338656e75
|
||||
ghcid_uuid_sha256: 2841359b-879c-88e5-8c7f-27f6e806a1d5
|
||||
ghcid_numeric: 2900658577114687717
|
||||
ghcid_uuid: b14501ab-9840-5df7-b1d5-599a1606b08d
|
||||
ghcid_uuid_sha256: b8ef7355-6ab8-860e-b9b7-8a9de4f58c9d
|
||||
ghcid_numeric: 13325996633112462862
|
||||
record_id: ba018a83-8c5e-422b-a8a0-8685147c0268
|
||||
generation_timestamp: '2025-12-16T21:06:42.719826+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-MJ-museum_janning
|
||||
ghcid_numeric: 2900658577114687717
|
||||
valid_from: '2025-12-17T08:44:26.037456+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:56:09.433656+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-DR-NIS-M-MJ-museum_janning
|
||||
ghcid_numeric: 13325996633112462862
|
||||
valid_from: '2025-12-17T10:56:09.433656+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Nieuw Schoonebeek, Drenthe
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: NIS
|
||||
city_name: Nieuw Schoonebeek
|
||||
region_code: DR
|
||||
region_name: Drenthe
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:56:09.433656+00:00'
|
||||
source_url: https://www.museumjanning.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:42.719826+00:00'
|
||||
|
|
@ -54,6 +69,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.museumjanning.nl/
|
||||
extraction_timestamp: '2025-12-17T10:56:09.433656+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -62,3 +86,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Nieuw Schoonebeek, Drenthe'
|
||||
|
|
@ -18,28 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/jopie-huismanmuseum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Workum
|
||||
region: Friesland
|
||||
country: NL
|
||||
address: Noard 6, 8711 AH Workum
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-JH
|
||||
ghcid_current: NL-FR-WOR-M-JH
|
||||
ghcid_original: NL-XX-XXX-M-JH
|
||||
ghcid_uuid: d356866c-b69b-5f7c-9b0d-5be544a5a315
|
||||
ghcid_uuid_sha256: 383eb391-9231-8e3c-b45b-2c9a8ff180df
|
||||
ghcid_numeric: 4052874152484372028
|
||||
ghcid_uuid: 3a0b508c-4200-5076-91aa-0c5296f68636
|
||||
ghcid_uuid_sha256: 0ded076f-d63e-84a1-af3a-86c064d7338e
|
||||
ghcid_numeric: 1003466468890657953
|
||||
record_id: 70d3a7ac-504a-4bca-b45f-7feb1f7fce95
|
||||
generation_timestamp: '2025-12-16T21:06:39.406621+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-JH
|
||||
ghcid_numeric: 4052874152484372028
|
||||
valid_from: '2025-12-16T21:06:39.406621+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:25:04.169919+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-FR-WOR-M-JH
|
||||
ghcid_numeric: 1003466468890657953
|
||||
valid_from: '2025-12-17T09:25:04.169919+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Workum, Friesland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: WOR
|
||||
city_name: Workum
|
||||
region_code: FR
|
||||
region_name: Friesland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:25:04.169919+00:00'
|
||||
source_url: https://www.jopiehuismanmuseum.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:39.406621+00:00'
|
||||
|
|
@ -56,9 +66,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.jopiehuismanmuseum.nl/
|
||||
extraction_timestamp: '2025-12-17T09:25:04.169919+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Workum, Friesland'
|
||||
|
|
@ -18,23 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/museum-de-grote-glind.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Barneveld
|
||||
region: Gelderland
|
||||
country: NL
|
||||
address: Scherpenzeelseweg 158, 3772 MG Barneveld
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-MGG
|
||||
ghcid_current: NL-GE-BAR-M-MGG
|
||||
ghcid_original: NL-XX-XXX-M-MGG
|
||||
ghcid_uuid: 10736a2b-5d91-5b7a-b4b9-a4f66e26d978
|
||||
ghcid_uuid_sha256: 3037e34d-a06d-8ce6-a605-466e1ce15bb0
|
||||
ghcid_numeric: 3474495560083172582
|
||||
ghcid_uuid: 1c2dc7dc-bc91-5fb7-bb54-1b2c846b2363
|
||||
ghcid_uuid_sha256: c22fdb77-ceac-81a6-8d77-c89492e65c7b
|
||||
ghcid_numeric: 13992643874878439846
|
||||
record_id: b014bedb-05da-4f35-9192-e07ab708ed0e
|
||||
generation_timestamp: '2025-12-16T21:06:43.269943+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-MGG
|
||||
ghcid_numeric: 3474495560083172582
|
||||
valid_from: '2025-12-17T08:44:26.015666+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:56:09.456702+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-GE-BAR-M-MGG
|
||||
ghcid_numeric: 13992643874878439846
|
||||
valid_from: '2025-12-17T10:56:09.456702+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Barneveld, Gelderland
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: BAR
|
||||
city_name: Barneveld
|
||||
region_code: GE
|
||||
region_name: Gelderland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:56:09.456702+00:00'
|
||||
source_url: https://www.degroteglind.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:43.269943+00:00'
|
||||
|
|
@ -51,6 +66,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.degroteglind.nl/
|
||||
extraction_timestamp: '2025-12-17T10:56:09.456702+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -59,3 +83,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Barneveld, Gelderland'
|
||||
|
|
@ -18,23 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/museum-1939-1945.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Harreveld
|
||||
region: Gelderland
|
||||
country: NL
|
||||
address: Schurinkweg 14, 7135 KJ Harreveld
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-M-museum_19391945
|
||||
ghcid_current: NL-GE-HAR-M-M-museum_19391945
|
||||
ghcid_original: NL-XX-XXX-M-M-museum_19391945
|
||||
ghcid_uuid: e853e192-4695-59e4-87ee-4174351509b7
|
||||
ghcid_uuid_sha256: 3b9218d3-0891-8043-bab3-18ddb7bb105f
|
||||
ghcid_numeric: 4292520689498423363
|
||||
ghcid_uuid: f7319b45-8862-5063-932b-0ad042cd196f
|
||||
ghcid_uuid_sha256: 98c8af8e-ffc8-85e5-a162-937baa38ee40
|
||||
ghcid_numeric: 11009242317818750437
|
||||
record_id: f533f9a7-b9e4-40d9-9406-1003736c61ba
|
||||
generation_timestamp: '2025-12-16T21:06:44.440421+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-M-museum_19391945
|
||||
ghcid_numeric: 4292520689498423363
|
||||
valid_from: '2025-12-17T08:44:26.038723+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:15:28.609076+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-GE-HAR-M-M-museum_19391945
|
||||
ghcid_numeric: 11009242317818750437
|
||||
valid_from: '2025-12-17T10:15:28.609076+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Harreveld, Gelderland
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: HAR
|
||||
city_name: Harreveld
|
||||
region_code: GE
|
||||
region_name: Gelderland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:15:28.609076+00:00'
|
||||
source_url: https://www.tracesofwar.nl/sights/157857/Museum-Opdat-wij-niet-Vergeten-1939-1945.htm
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:44.440421+00:00'
|
||||
|
|
@ -51,6 +66,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.tracesofwar.nl/sights/157857/Museum-Opdat-wij-niet-Vergeten-1939-1945.htm
|
||||
extraction_timestamp: '2025-12-17T10:15:28.609076+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -59,3 +83,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Harreveld, Gelderland'
|
||||
|
|
@ -22,28 +22,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/museum-geelvinck-hinlopen-huis.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Heerde
|
||||
region: Gelderland
|
||||
country: NL
|
||||
address: Kamperweg 23, 8181 CS Heerde
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-MGHH
|
||||
ghcid_current: NL-GE-HEE-M-MGHH
|
||||
ghcid_original: NL-XX-XXX-M-MGHH
|
||||
ghcid_uuid: 2f84f10f-a9d9-518a-867b-76fd5744c9c8
|
||||
ghcid_uuid_sha256: dbc820ca-9861-8e25-9543-d30ccde30e84
|
||||
ghcid_numeric: 15836944144160349733
|
||||
ghcid_uuid: 26d6aef0-f3cd-5237-ac57-d1cdfa9b3ee1
|
||||
ghcid_uuid_sha256: 76b6959c-be1e-84cb-b010-a9e54b718f30
|
||||
ghcid_numeric: 8554189042673935563
|
||||
record_id: 3d9547a0-45c3-4759-8b11-f8193c5abccb
|
||||
generation_timestamp: '2025-12-16T21:06:38.518452+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-MGHH
|
||||
ghcid_numeric: 15836944144160349733
|
||||
valid_from: '2025-12-16T21:06:38.518452+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:56:09.444275+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-GE-HEE-M-MGHH
|
||||
ghcid_numeric: 8554189042673935563
|
||||
valid_from: '2025-12-17T10:56:09.444275+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Heerde, Gelderland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: HEE
|
||||
city_name: Heerde
|
||||
region_code: GE
|
||||
region_name: Gelderland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:56:09.444275+00:00'
|
||||
source_url: https://geelvinck.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:38.518452+00:00'
|
||||
|
|
@ -60,9 +70,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://geelvinck.nl/
|
||||
extraction_timestamp: '2025-12-17T10:56:09.444275+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Heerde, Gelderland'
|
||||
|
|
@ -21,23 +21,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/museum-de-brandkas-van-henny.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Zutphen
|
||||
region: Gelderland
|
||||
country: NL
|
||||
address: Ravenstraatje 3, 7201 DG Zutphen
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-MBH
|
||||
ghcid_current: NL-GE-ZUT-M-MBH
|
||||
ghcid_original: NL-XX-XXX-M-MBH
|
||||
ghcid_uuid: cfc24f04-4169-55af-891f-ed4942860a5b
|
||||
ghcid_uuid_sha256: f10091e2-000c-8152-bb4a-9c6e2dc1c4bb
|
||||
ghcid_numeric: 17366040562990059858
|
||||
ghcid_uuid: 8f3789a5-dac1-584a-a369-881ca1fcb35a
|
||||
ghcid_uuid_sha256: f0f4973c-c174-823c-8ec4-f5bba7997043
|
||||
ghcid_numeric: 17362668750619554364
|
||||
record_id: 7cfce701-5c47-477c-9973-7f9e578d177b
|
||||
generation_timestamp: '2025-12-16T21:06:40.699134+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-MBH
|
||||
ghcid_numeric: 17366040562990059858
|
||||
valid_from: '2025-12-17T08:44:26.075139+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:15:28.621915+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-GE-ZUT-M-MBH
|
||||
ghcid_numeric: 17362668750619554364
|
||||
valid_from: '2025-12-17T10:15:28.621915+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Zutphen, Gelderland
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: ZUT
|
||||
city_name: Zutphen
|
||||
region_code: GE
|
||||
region_name: Gelderland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:15:28.621915+00:00'
|
||||
source_url: https://brandkashenny.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:40.699134+00:00'
|
||||
|
|
@ -54,6 +69,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://brandkashenny.nl/
|
||||
extraction_timestamp: '2025-12-17T10:15:28.621915+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -62,3 +86,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Zutphen, Gelderland'
|
||||
|
|
@ -18,28 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/blik-trommel-en-oudheden-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Niezijl
|
||||
region: Groningen
|
||||
country: NL
|
||||
address: Hoofdstraat 39, 9842 PC Niezijl
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-BTOM
|
||||
ghcid_current: NL-GR-NIE-M-BTOM
|
||||
ghcid_original: NL-XX-XXX-M-BTOM
|
||||
ghcid_uuid: 0e8e0112-b0fd-53c4-ad06-e5956c833244
|
||||
ghcid_uuid_sha256: 5b616934-3786-8a12-b647-addf66bdd3bc
|
||||
ghcid_numeric: 6584659803183176210
|
||||
ghcid_uuid: 451234ac-ac1a-59ff-8906-8921a26babaa
|
||||
ghcid_uuid_sha256: 8afdfde5-a833-8383-a403-9fbe787faf89
|
||||
ghcid_numeric: 10015440309153149827
|
||||
record_id: 8480c9b3-ebdf-47fe-8515-fb69b4a82c51
|
||||
generation_timestamp: '2025-12-16T21:06:45.483497+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-BTOM
|
||||
ghcid_numeric: 6584659803183176210
|
||||
valid_from: '2025-12-16T21:06:45.483497+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:33:15.814554+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-GR-NIE-M-BTOM
|
||||
ghcid_numeric: 10015440309153149827
|
||||
valid_from: '2025-12-17T09:33:15.814554+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Niezijl, Groningen
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: NIE
|
||||
city_name: Niezijl
|
||||
region_code: GR
|
||||
region_name: Groningen
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:33:15.814554+00:00'
|
||||
source_url: https://www.blikentrommelmuseum.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:45.483497+00:00'
|
||||
|
|
@ -56,9 +66,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.blikentrommelmuseum.nl/
|
||||
extraction_timestamp: '2025-12-17T09:33:15.814554+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Niezijl, Groningen'
|
||||
|
|
@ -18,28 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/fortuna-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Sittard
|
||||
region: Limburg
|
||||
country: NL
|
||||
address: Sittard, Limburg, Netherlands
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-FM
|
||||
ghcid_current: NL-LI-SIT-M-FM
|
||||
ghcid_original: NL-XX-XXX-M-FM
|
||||
ghcid_uuid: be219abf-cc81-5a14-aee9-dc9b1b72efcb
|
||||
ghcid_uuid_sha256: ad64fc4c-380d-8a38-8268-9c0bc7b7af6f
|
||||
ghcid_numeric: 12494388670520703544
|
||||
ghcid_uuid: 87c8fa4f-7f89-567f-9bcb-fdaa02cefa47
|
||||
ghcid_uuid_sha256: 05de3d19-96ff-8790-9b8c-8069d9f4891f
|
||||
ghcid_numeric: 422842595136202640
|
||||
record_id: 208c7f58-eaf3-47d8-8d44-f4d714e17ddd
|
||||
generation_timestamp: '2025-12-16T21:06:36.149767+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-FM
|
||||
ghcid_numeric: 12494388670520703544
|
||||
valid_from: '2025-12-16T21:06:36.149767+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:25:04.202624+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-LI-SIT-M-FM
|
||||
ghcid_numeric: 422842595136202640
|
||||
valid_from: '2025-12-17T09:25:04.202624+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Sittard, Limburg
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: SIT
|
||||
city_name: Sittard
|
||||
region_code: LI
|
||||
region_name: Limburg
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:25:04.202624+00:00'
|
||||
source_url: https://www.fortunasittard.nl/museum/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:36.149767+00:00'
|
||||
|
|
@ -56,9 +66,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.fortunasittard.nl/museum/
|
||||
extraction_timestamp: '2025-12-17T09:25:04.202624+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Sittard, Limburg'
|
||||
|
|
@ -18,23 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/frans-maas-museum-verzameling.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Venlo
|
||||
region: Limburg
|
||||
country: NL
|
||||
address: Groethofstraat 11L, 5916 PA Venlo
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-FMMV
|
||||
ghcid_current: NL-LI-VEN-M-FMMV
|
||||
ghcid_original: NL-XX-XXX-M-FMMV
|
||||
ghcid_uuid: f9f5904b-3f0e-589a-87d8-45c59c789c4f
|
||||
ghcid_uuid_sha256: da43503b-8690-884b-8e60-946b38ab8434
|
||||
ghcid_numeric: 15727502540298811467
|
||||
ghcid_uuid: cdd8c1c8-7fd3-5f8b-babd-54c369e694ca
|
||||
ghcid_uuid_sha256: dcb56ef8-9c2b-87f4-b515-6ce79a06fdf3
|
||||
ghcid_numeric: 15903739673179830260
|
||||
record_id: 0fe8c62f-329d-4dcd-b2d3-87f1ae6c591e
|
||||
generation_timestamp: '2025-12-16T21:06:38.565300+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-FMMV
|
||||
ghcid_numeric: 15727502540298811467
|
||||
valid_from: '2025-12-17T08:44:26.042454+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:18:03.391406+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-LI-VEN-M-FMMV
|
||||
ghcid_numeric: 15903739673179830260
|
||||
valid_from: '2025-12-17T10:18:03.391406+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Venlo, Limburg
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: VEN
|
||||
city_name: Venlo
|
||||
region_code: LI
|
||||
region_name: Limburg
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:18:03.391406+00:00'
|
||||
source_url: https://www.fransmaasvenlo.nl/contact/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:38.565300+00:00'
|
||||
|
|
@ -51,6 +66,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.fransmaasvenlo.nl/contact/
|
||||
extraction_timestamp: '2025-12-17T10:18:03.391406+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -59,3 +83,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Venlo, Limburg'
|
||||
|
|
@ -21,23 +21,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/museum-canonije.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Boxtel
|
||||
region: Noord-Brabant
|
||||
country: NL
|
||||
address: Duinendaal 9, 5281 AP Boxtel
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-MC-museum_canonije
|
||||
ghcid_current: NL-NB-BOX-M-MC-museum_canonije
|
||||
ghcid_original: NL-XX-XXX-M-MC-museum_canonije
|
||||
ghcid_uuid: 15100a44-415a-5ed9-afce-263513d42a1c
|
||||
ghcid_uuid_sha256: 66912ce3-a33b-8452-ac7f-3980eb9a6eaf
|
||||
ghcid_numeric: 7390737819699496018
|
||||
ghcid_uuid: 3d464ce4-bba5-5dbd-8e3b-525c78016137
|
||||
ghcid_uuid_sha256: 0a248f2b-2a81-8242-ad8c-f9badd60b151
|
||||
ghcid_numeric: 730866455100334658
|
||||
record_id: 90dc824a-f2bd-4406-9a5d-bfa30962be9b
|
||||
generation_timestamp: '2025-12-16T21:06:45.443420+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-MC-museum_canonije
|
||||
ghcid_numeric: 7390737819699496018
|
||||
valid_from: '2025-12-17T08:44:26.070346+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:41:01.921443+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-NB-BOX-M-MC-museum_canonije
|
||||
ghcid_numeric: 730866455100334658
|
||||
valid_from: '2025-12-17T10:41:01.921443+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Boxtel, Noord-Brabant
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: BOX
|
||||
city_name: Boxtel
|
||||
region_code: NB
|
||||
region_name: Noord-Brabant
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:41:01.921443+00:00'
|
||||
source_url: https://www.museumgidsnederland.nl/en/boxtel/museum-de-canonije/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:45.443420+00:00'
|
||||
|
|
@ -54,6 +69,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.museumgidsnederland.nl/en/boxtel/museum-de-canonije/
|
||||
extraction_timestamp: '2025-12-17T10:41:01.921443+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -62,3 +86,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Boxtel, Noord-Brabant'
|
||||
|
|
@ -22,28 +22,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/bierreclame-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Breda
|
||||
region: Noord-Brabant
|
||||
country: NL
|
||||
address: Haagweg 375, 4813 XC Breda
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-BM
|
||||
ghcid_current: NL-NB-BRE-M-BM
|
||||
ghcid_original: NL-XX-XXX-M-BM
|
||||
ghcid_uuid: 84ed37a1-9cef-55df-9e60-e2e684672c12
|
||||
ghcid_uuid_sha256: c1231a5d-45dd-8add-a7d4-d95d70d66ab9
|
||||
ghcid_numeric: 13916996261411379933
|
||||
ghcid_uuid: a2dbc8ee-b755-55ee-84b1-0f99466ca820
|
||||
ghcid_uuid_sha256: 8800c7b5-4e0d-8a90-8ee5-6e2e0ca99ede
|
||||
ghcid_numeric: 9800052370670697104
|
||||
record_id: a36adeaa-47da-4568-bf3d-798f7bdb1af0
|
||||
generation_timestamp: '2025-12-16T21:06:43.291899+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-BM
|
||||
ghcid_numeric: 13916996261411379933
|
||||
valid_from: '2025-12-16T21:06:43.291899+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:25:04.082081+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-NB-BRE-M-BM
|
||||
ghcid_numeric: 9800052370670697104
|
||||
valid_from: '2025-12-17T09:25:04.082081+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Breda, Noord-Brabant
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: BRE
|
||||
city_name: Breda
|
||||
region_code: NB
|
||||
region_name: Noord-Brabant
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:25:04.082081+00:00'
|
||||
source_url: https://bierreclamemuseum.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:43.291899+00:00'
|
||||
|
|
@ -60,9 +70,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://bierreclamemuseum.nl/
|
||||
extraction_timestamp: '2025-12-17T09:25:04.082081+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Breda, Noord-Brabant'
|
||||
|
|
@ -18,23 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/museum-ceuclum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Cuijk
|
||||
region: Noord-Brabant
|
||||
country: NL
|
||||
address: Castellum 1, 5431 EM Cuijk
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-MC
|
||||
ghcid_current: NL-NB-CUI-M-MC
|
||||
ghcid_original: NL-XX-XXX-M-MC
|
||||
ghcid_uuid: 5e2dd6dd-62ce-5e89-8ce7-31ad00950bc2
|
||||
ghcid_uuid_sha256: 12ec5579-cc21-8718-8082-0a26ec3b1e28
|
||||
ghcid_numeric: 1363558768790574872
|
||||
ghcid_uuid: 97f8ab3c-cfe2-53fb-ba27-dd2b3d058a83
|
||||
ghcid_uuid_sha256: 695826fd-daa1-82e0-9dbf-919364b88536
|
||||
ghcid_numeric: 7590860043669570272
|
||||
record_id: 8f7d0da9-11c4-4162-9cf3-3b848e24d9b2
|
||||
generation_timestamp: '2025-12-16T21:06:38.217498+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-MC
|
||||
ghcid_numeric: 1363558768790574872
|
||||
valid_from: '2025-12-17T08:44:26.063071+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:56:09.487585+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-NB-CUI-M-MC
|
||||
ghcid_numeric: 7590860043669570272
|
||||
valid_from: '2025-12-17T10:56:09.487585+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Cuijk, Noord-Brabant
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: CUI
|
||||
city_name: Cuijk
|
||||
region_code: NB
|
||||
region_name: Noord-Brabant
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:56:09.487585+00:00'
|
||||
source_url: https://www.museumceuclum.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:38.217498+00:00'
|
||||
|
|
@ -51,6 +66,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.museumceuclum.nl/
|
||||
extraction_timestamp: '2025-12-17T10:56:09.487585+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -59,3 +83,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Cuijk, Noord-Brabant'
|
||||
|
|
@ -22,23 +22,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/crypto-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Eindhoven
|
||||
region: Noord-Brabant
|
||||
country: NL
|
||||
address: Crypto Museum, Eindhoven, Netherlands
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-CM-crypto_museum
|
||||
ghcid_current: NL-NB-EIN-M-CM-crypto_museum
|
||||
ghcid_original: NL-XX-XXX-M-CM-crypto_museum
|
||||
ghcid_uuid: e2d3a427-14ff-5a42-9504-f9e67b5cb6fb
|
||||
ghcid_uuid_sha256: be8c4743-a6b9-86ac-bcab-ae0016ba508b
|
||||
ghcid_numeric: 13730427719831340716
|
||||
ghcid_uuid: e3b135ce-d513-5cf6-9e94-ecfba6686a53
|
||||
ghcid_uuid_sha256: c7a798b7-74cc-85cc-b39a-69aa49b9fdb1
|
||||
ghcid_numeric: 14386635448364312012
|
||||
record_id: f760bfbd-3158-41b8-b25d-07e8218aff7c
|
||||
generation_timestamp: '2025-12-16T21:06:45.197167+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-CM-crypto_museum
|
||||
ghcid_numeric: 13730427719831340716
|
||||
valid_from: '2025-12-17T08:44:25.987908+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:17:11.063469+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-NB-EIN-M-CM-crypto_museum
|
||||
ghcid_numeric: 14386635448364312012
|
||||
valid_from: '2025-12-17T09:17:11.063469+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Eindhoven, Noord-Brabant
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: EIN
|
||||
city_name: Eindhoven
|
||||
region_code: NB
|
||||
region_name: Noord-Brabant
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:17:11.063469+00:00'
|
||||
source_url: https://www.cryptomuseum.com/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:45.197167+00:00'
|
||||
|
|
@ -55,6 +70,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.cryptomuseum.com/
|
||||
extraction_timestamp: '2025-12-17T09:17:11.063469+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -63,3 +87,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Eindhoven, Noord-Brabant'
|
||||
|
|
@ -18,28 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/ambachtelijke-zagerij-en-klompenmakerij-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Etten-Leur
|
||||
region: Noord-Brabant
|
||||
country: NL
|
||||
address: Schuitvaartjaagpad 179, 4873 NS Etten-Leur
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-AZKM
|
||||
ghcid_current: NL-NB-ETL-M-AZKM
|
||||
ghcid_original: NL-XX-XXX-M-AZKM
|
||||
ghcid_uuid: 79baa8e8-7730-5cb3-9781-8a42082b4427
|
||||
ghcid_uuid_sha256: 230c046b-3ca2-865d-bb53-98d69bb656cd
|
||||
ghcid_numeric: 2525398349673322077
|
||||
ghcid_uuid: 57483d0d-fba0-502f-89fd-9e2149db05e4
|
||||
ghcid_uuid_sha256: 6922be22-4716-888a-82fb-d5098b9c0f1b
|
||||
ghcid_numeric: 7575826577621440650
|
||||
record_id: 47d673a1-1143-42aa-a27c-56387035a200
|
||||
generation_timestamp: '2025-12-16T21:06:45.151954+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-AZKM
|
||||
ghcid_numeric: 2525398349673322077
|
||||
valid_from: '2025-12-16T21:06:45.151954+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:33:15.831394+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-NB-ETL-M-AZKM
|
||||
ghcid_numeric: 7575826577621440650
|
||||
valid_from: '2025-12-17T09:33:15.831394+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Etten-Leur, Noord-Brabant
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: ETL
|
||||
city_name: Etten-Leur
|
||||
region_code: NB
|
||||
region_name: Noord-Brabant
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:33:15.831394+00:00'
|
||||
source_url: https://www.klompenmakerij.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:45.151954+00:00'
|
||||
|
|
@ -56,9 +66,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.klompenmakerij.nl/
|
||||
extraction_timestamp: '2025-12-17T09:33:15.831394+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Etten-Leur, Noord-Brabant'
|
||||
|
|
@ -21,28 +21,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/edah-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Helmond
|
||||
region: Noord-Brabant
|
||||
country: NL
|
||||
address: Helmond, Noord-Brabant, Netherlands
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-EM
|
||||
ghcid_current: NL-NB-HEL-M-EM
|
||||
ghcid_original: NL-XX-XXX-M-EM
|
||||
ghcid_uuid: abfeb5ac-0e66-5995-8d86-8a3cee6fbe6d
|
||||
ghcid_uuid_sha256: 6fcf6f37-53e2-8880-ab6f-44b77364aa56
|
||||
ghcid_numeric: 8056780541810337920
|
||||
ghcid_uuid: 9568b2dc-355a-54bb-8e42-cc7ee9003173
|
||||
ghcid_uuid_sha256: ae82fbff-3874-8406-9567-a9aae50c658c
|
||||
ghcid_numeric: 12574890183154267142
|
||||
record_id: d19ceb8a-e1ca-4669-8c0d-8584d562c8f5
|
||||
generation_timestamp: '2025-12-16T21:06:37.295988+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-EM
|
||||
ghcid_numeric: 8056780541810337920
|
||||
valid_from: '2025-12-16T21:06:37.295988+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:25:04.116169+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-NB-HEL-M-EM
|
||||
ghcid_numeric: 12574890183154267142
|
||||
valid_from: '2025-12-17T09:25:04.116169+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Helmond, Noord-Brabant
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: HEL
|
||||
city_name: Helmond
|
||||
region_code: NB
|
||||
region_name: Noord-Brabant
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:25:04.116169+00:00'
|
||||
source_url: https://www.edahmuseum.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:37.295988+00:00'
|
||||
|
|
@ -59,9 +69,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.edahmuseum.nl/
|
||||
extraction_timestamp: '2025-12-17T09:25:04.116169+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Helmond, Noord-Brabant'
|
||||
|
|
@ -18,23 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/stichting-abrahamdag.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Oosterhout
|
||||
region: Noord-Brabant
|
||||
country: NL
|
||||
address: Van Wijngaardestraat 46, 4901VM Oosterhout
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-A
|
||||
ghcid_current: NL-NB-OOS-M-A
|
||||
ghcid_original: NL-XX-XXX-M-A
|
||||
ghcid_uuid: 3dbf1031-5d5d-5640-b949-96e68e4df376
|
||||
ghcid_uuid_sha256: b4cf931e-ab67-8939-b358-c1b70b1ed7f8
|
||||
ghcid_numeric: 13028794006940141881
|
||||
ghcid_uuid: 1b16490b-f37d-5b67-b035-37c14ae23c53
|
||||
ghcid_uuid_sha256: 3c7e1c0c-6706-822a-bdac-1946550f8bcf
|
||||
ghcid_numeric: 4358952328934863402
|
||||
record_id: 13121906-9f48-4cd3-81e5-32e43aae4a5a
|
||||
generation_timestamp: '2025-12-16T21:06:45.023838+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-A
|
||||
ghcid_numeric: 13028794006940141881
|
||||
valid_from: '2025-12-17T08:44:26.069013+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:08:05.575717+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-NB-OOS-M-A
|
||||
ghcid_numeric: 4358952328934863402
|
||||
valid_from: '2025-12-17T10:08:05.575717+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Oosterhout, Noord-Brabant
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: OOS
|
||||
city_name: Oosterhout
|
||||
region_code: NB
|
||||
region_name: Noord-Brabant
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:08:05.575717+00:00'
|
||||
source_url: https://abrahamdag.com/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:45.023838+00:00'
|
||||
|
|
@ -51,6 +66,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://abrahamdag.com/
|
||||
extraction_timestamp: '2025-12-17T10:08:05.575717+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -59,3 +83,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Oosterhout, Noord-Brabant'
|
||||
|
|
@ -226,23 +226,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/huis73-nl.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: '''s-Hertogenbosch'
|
||||
region: Noord-Brabant
|
||||
country: NL
|
||||
address: Hekellaan 2, 5211 LX 's-Hertogenbosch
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-L-HN
|
||||
ghcid_current: NL-NB-SHE-L-HN
|
||||
ghcid_original: NL-XX-XXX-L-HN
|
||||
ghcid_uuid: 3e812b40-08d1-5d01-b3f4-d3eb2b053f41
|
||||
ghcid_uuid_sha256: 193b35bb-74cd-807d-81e9-68924d148c39
|
||||
ghcid_numeric: 1818105953808932989
|
||||
ghcid_uuid: 0f399e85-7c29-5e69-b963-c635aaa79cf6
|
||||
ghcid_uuid_sha256: 5b7d526a-8eec-8b1b-8a68-416064436348
|
||||
ghcid_numeric: 6592516047158119195
|
||||
record_id: 348d146d-cf2e-4a95-be5a-2d5aa300ebaa
|
||||
generation_timestamp: '2025-12-16T21:06:38.288769+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-L-HN
|
||||
ghcid_numeric: 1818105953808932989
|
||||
valid_from: '2025-12-17T08:44:26.044915+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:06:03.418243+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-NB-SHE-L-HN
|
||||
ghcid_numeric: 6592516047158119195
|
||||
valid_from: '2025-12-17T10:06:03.418243+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - 's-Hertogenbosch, Noord-Brabant
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: SHE
|
||||
city_name: '''s-Hertogenbosch'
|
||||
region_code: NB
|
||||
region_name: Noord-Brabant
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:06:03.418243+00:00'
|
||||
source_url: https://www.huis73.nl/locaties
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:38.288769+00:00'
|
||||
|
|
@ -259,6 +274,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.huis73.nl/locaties
|
||||
extraction_timestamp: '2025-12-17T10:06:03.418243+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -267,3 +291,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: ''s-Hertogenbosch, Noord-Brabant'
|
||||
|
|
@ -22,23 +22,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/museum-van-brabantse-mutsen-en-poffers.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Sint-Oedenrode
|
||||
region: Noord-Brabant
|
||||
country: NL
|
||||
address: Kerkstraat 20, 5492 AH Sint-Oedenrode
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-MBMP
|
||||
ghcid_current: NL-NB-SOR-M-MBMP
|
||||
ghcid_original: NL-XX-XXX-M-MBMP
|
||||
ghcid_uuid: 21336fc7-3662-5602-9119-7d73c036534e
|
||||
ghcid_uuid_sha256: 593b5119-0fd3-86ae-90c6-c45eb4887beb
|
||||
ghcid_numeric: 6429822061083035310
|
||||
ghcid_uuid: 31deef24-1de5-533c-a94b-41cb77270114
|
||||
ghcid_uuid_sha256: 797305b2-f764-8c7b-a93b-56cc8f1a6ab1
|
||||
ghcid_numeric: 8751344767123889275
|
||||
record_id: a2dc4dee-6368-4d74-a185-413ed2e74f2f
|
||||
generation_timestamp: '2025-12-16T21:06:45.177876+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-MBMP
|
||||
ghcid_numeric: 6429822061083035310
|
||||
valid_from: '2025-12-17T08:44:25.982154+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:56:09.506688+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-NB-SOR-M-MBMP
|
||||
ghcid_numeric: 8751344767123889275
|
||||
valid_from: '2025-12-17T10:56:09.506688+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Sint-Oedenrode, Noord-Brabant
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: SOR
|
||||
city_name: Sint-Oedenrode
|
||||
region_code: NB
|
||||
region_name: Noord-Brabant
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:56:09.506688+00:00'
|
||||
source_url: https://mutsenmuseum.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:45.177876+00:00'
|
||||
|
|
@ -55,6 +70,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://mutsenmuseum.nl/
|
||||
extraction_timestamp: '2025-12-17T10:56:09.506688+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -63,3 +87,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Sint-Oedenrode, Noord-Brabant'
|
||||
|
|
@ -18,23 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/museum-buitenlust.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Waalwijk
|
||||
region: Noord-Brabant
|
||||
country: NL
|
||||
address: Jan de Rooystraat 14, 5141 EN Waalwijk
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-MB-museum_buitenlust
|
||||
ghcid_current: NL-NB-WAA-M-MB-museum_buitenlust
|
||||
ghcid_original: NL-XX-XXX-M-MB-museum_buitenlust
|
||||
ghcid_uuid: 78e29a5b-ff12-532e-b6a9-26b96527d012
|
||||
ghcid_uuid_sha256: 5d687b60-5e1c-83ce-9fd3-be1ab3eda87c
|
||||
ghcid_numeric: 6730765296931226574
|
||||
ghcid_uuid: df4d5ef7-4ec4-57be-b445-576a334c5cfb
|
||||
ghcid_uuid_sha256: e3bb7ab7-eaa4-8dd1-aadc-a05bb8f7a822
|
||||
ghcid_numeric: 16409844597588803025
|
||||
record_id: ed0fd5bd-9d38-4b94-bcbc-9b927711b645
|
||||
generation_timestamp: '2025-12-16T21:06:42.414009+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-MB-museum_buitenlust
|
||||
ghcid_numeric: 6730765296931226574
|
||||
valid_from: '2025-12-17T08:44:26.056155+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:41:01.904174+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-NB-WAA-M-MB-museum_buitenlust
|
||||
ghcid_numeric: 16409844597588803025
|
||||
valid_from: '2025-12-17T10:41:01.904174+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Waalwijk, Noord-Brabant
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: WAA
|
||||
city_name: Waalwijk
|
||||
region_code: NB
|
||||
region_name: Noord-Brabant
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:41:01.904174+00:00'
|
||||
source_url: https://www.museumbuitenlust.nl/contact
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:42.414009+00:00'
|
||||
|
|
@ -51,6 +66,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.museumbuitenlust.nl/contact
|
||||
extraction_timestamp: '2025-12-17T10:41:01.904174+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -59,3 +83,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Waalwijk, Noord-Brabant'
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -18,23 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/allard-pierson-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Amsterdam
|
||||
region: Noord-Holland
|
||||
country: NL
|
||||
address: Oude Turfmarkt 127, 1012 GC Amsterdam
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-APM-allard_pierson_museum
|
||||
ghcid_current: NL-NH-AMS-M-APM-allard_pierson_museum
|
||||
ghcid_original: NL-XX-XXX-M-APM-allard_pierson_museum
|
||||
ghcid_uuid: 48392b8c-36a8-53a6-a03d-271eaf106bb4
|
||||
ghcid_uuid_sha256: e9b99db6-f222-8dc7-bbbe-18ed689ad863
|
||||
ghcid_numeric: 16841665690600619463
|
||||
ghcid_uuid: 0ec4308c-0f54-5138-99c6-809709534df8
|
||||
ghcid_uuid_sha256: 4f66990b-cd99-83cd-9376-1c589d0ffbb1
|
||||
ghcid_numeric: 5721428652593849293
|
||||
record_id: ffc23f4f-a760-406f-b103-46f70b81736a
|
||||
generation_timestamp: '2025-12-16T21:06:45.602986+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-APM-allard_pierson_museum
|
||||
ghcid_numeric: 16841665690600619463
|
||||
valid_from: '2025-12-17T08:44:25.980684+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:17:11.075305+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-NH-AMS-M-APM-allard_pierson_museum
|
||||
ghcid_numeric: 5721428652593849293
|
||||
valid_from: '2025-12-17T09:17:11.075305+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Amsterdam, Noord-Holland
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: AMS
|
||||
city_name: Amsterdam
|
||||
region_code: NH
|
||||
region_name: Noord-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:17:11.075305+00:00'
|
||||
source_url: https://www.allardpierson.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:45.602986+00:00'
|
||||
|
|
@ -51,6 +66,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.allardpierson.nl/
|
||||
extraction_timestamp: '2025-12-17T09:17:11.075305+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -59,3 +83,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Amsterdam, Noord-Holland'
|
||||
|
|
@ -18,28 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/cow-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Amsterdam
|
||||
region: Noord-Holland
|
||||
country: NL
|
||||
address: Leliegracht 4, Amsterdam
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-CM-cow_museum
|
||||
ghcid_current: NL-NH-AMS-M-CM-cow_museum
|
||||
ghcid_original: NL-XX-XXX-M-CM-cow_museum
|
||||
ghcid_uuid: 9590fe8f-c06b-55d1-a4f0-8a497b9a9d6b
|
||||
ghcid_uuid_sha256: 7748f0fe-3dc8-8f0f-b5d3-6e253897ec97
|
||||
ghcid_numeric: 8595384863585521423
|
||||
ghcid_uuid: 552df309-3f63-5bce-ba90-0a8cd7fab8c1
|
||||
ghcid_uuid_sha256: 6e182c92-bd64-882a-9adc-0a40d4b7ab19
|
||||
ghcid_numeric: 7933139752367454250
|
||||
record_id: 5ae057ba-c6a2-43d0-a200-1285bcb507a7
|
||||
generation_timestamp: '2025-12-16T21:06:42.129981+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-CM-cow_museum
|
||||
ghcid_numeric: 8595384863585521423
|
||||
valid_from: '2025-12-16T21:06:42.129981+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:17:11.106062+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-NH-AMS-M-CM-cow_museum
|
||||
ghcid_numeric: 7933139752367454250
|
||||
valid_from: '2025-12-17T09:17:11.106062+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Amsterdam, Noord-Holland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: AMS
|
||||
city_name: Amsterdam
|
||||
region_code: NH
|
||||
region_name: Noord-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:17:11.106062+00:00'
|
||||
source_url: https://cowmuseum.amsterdam/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:42.129981+00:00'
|
||||
|
|
@ -56,9 +66,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://cowmuseum.amsterdam/
|
||||
extraction_timestamp: '2025-12-17T09:17:11.106062+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Amsterdam, Noord-Holland'
|
||||
|
|
@ -18,28 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/cacao-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Amsterdam
|
||||
region: Noord-Holland
|
||||
country: NL
|
||||
address: Amsterdam, Netherlands
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-CM
|
||||
ghcid_current: NL-NH-AMS-M-CM
|
||||
ghcid_original: NL-XX-XXX-M-CM
|
||||
ghcid_uuid: 583583d1-81e1-58c3-93fb-c5e26dd537b3
|
||||
ghcid_uuid_sha256: 9a9161f8-d0d2-8a31-a4cd-762b71024dc4
|
||||
ghcid_numeric: 11137791074747161137
|
||||
ghcid_uuid: 47f14c04-b37a-5e60-aa25-de2ed442b7ff
|
||||
ghcid_uuid_sha256: 3411844b-6dbc-869e-8f22-4cc95c99b917
|
||||
ghcid_numeric: 3751925424074823326
|
||||
record_id: 1241c462-d2b4-4afa-b46c-e2b0f3c22fde
|
||||
generation_timestamp: '2025-12-16T21:06:40.671775+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-CM
|
||||
ghcid_numeric: 11137791074747161137
|
||||
valid_from: '2025-12-16T21:06:40.671775+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:25:04.103278+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-NH-AMS-M-CM
|
||||
ghcid_numeric: 3751925424074823326
|
||||
valid_from: '2025-12-17T09:25:04.103278+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Amsterdam, Noord-Holland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: AMS
|
||||
city_name: Amsterdam
|
||||
region_code: NH
|
||||
region_name: Noord-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:25:04.103278+00:00'
|
||||
source_url: https://www.cacaomuseum.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:40.671775+00:00'
|
||||
|
|
@ -56,9 +66,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.cacaomuseum.nl/
|
||||
extraction_timestamp: '2025-12-17T09:25:04.103278+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Amsterdam, Noord-Holland'
|
||||
|
|
@ -21,23 +21,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/dutch-directors-guild-ddg.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Amsterdam
|
||||
region: Noord-Holland
|
||||
country: NL
|
||||
address: De Lairessestraat 125 sous, 1075 HH Amsterdam
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-DDGD
|
||||
ghcid_current: NL-NH-AMS-M-DDGD
|
||||
ghcid_original: NL-XX-XXX-M-DDGD
|
||||
ghcid_uuid: dfc8ca6f-3c25-5bd7-9444-fc482affad31
|
||||
ghcid_uuid_sha256: 122993cf-061a-8925-b3a5-bdb49e87451f
|
||||
ghcid_numeric: 1308739684097775909
|
||||
ghcid_uuid: 918191cb-0b1b-52ef-9f89-a91876e7b672
|
||||
ghcid_uuid_sha256: a132b2a0-d58a-8be6-b7e0-bf41b618d481
|
||||
ghcid_numeric: 11615542792789044198
|
||||
record_id: e3bb2498-22cf-48e3-8d5c-9c5dc0c398bb
|
||||
generation_timestamp: '2025-12-16T21:06:46.065288+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-DDGD
|
||||
ghcid_numeric: 1308739684097775909
|
||||
valid_from: '2025-12-17T08:44:26.036192+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:06:03.451255+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-NH-AMS-M-DDGD
|
||||
ghcid_numeric: 11615542792789044198
|
||||
valid_from: '2025-12-17T10:06:03.451255+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Amsterdam, Noord-Holland
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: AMS
|
||||
city_name: Amsterdam
|
||||
region_code: NH
|
||||
region_name: Noord-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:06:03.451255+00:00'
|
||||
source_url: https://www.directorsguild.nl/contact/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:46.065288+00:00'
|
||||
|
|
@ -54,6 +69,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.directorsguild.nl/contact/
|
||||
extraction_timestamp: '2025-12-17T10:06:03.451255+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -62,3 +86,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Amsterdam, Noord-Holland'
|
||||
|
|
@ -18,28 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/eddie-the-eagle-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Amsterdam
|
||||
region: Noord-Holland
|
||||
country: NL
|
||||
address: Amsterdam, Netherlands
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-EEM
|
||||
ghcid_current: NL-NH-AMS-M-EEM
|
||||
ghcid_original: NL-XX-XXX-M-EEM
|
||||
ghcid_uuid: 13bc9959-7082-5d46-bd65-825b494a755f
|
||||
ghcid_uuid_sha256: 4a81f985-bb1f-8275-98cf-b5550faf1698
|
||||
ghcid_numeric: 5368846583567700597
|
||||
ghcid_uuid: ca8ffd5a-ae44-5a97-aa5f-b470c2889849
|
||||
ghcid_uuid_sha256: 0b19894f-1307-8a1a-828a-3f5550e5bc6d
|
||||
ghcid_numeric: 799821381549623834
|
||||
record_id: bd5b97ab-2d9b-411a-87ef-4190f7694ea5
|
||||
generation_timestamp: '2025-12-16T21:06:36.344748+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-EEM
|
||||
ghcid_numeric: 5368846583567700597
|
||||
valid_from: '2025-12-16T21:06:36.344748+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:25:04.153616+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-NH-AMS-M-EEM
|
||||
ghcid_numeric: 799821381549623834
|
||||
valid_from: '2025-12-17T09:25:04.153616+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Amsterdam, Noord-Holland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: AMS
|
||||
city_name: Amsterdam
|
||||
region_code: NH
|
||||
region_name: Noord-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:25:04.153616+00:00'
|
||||
source_url: https://www.eddie-the-eagle-museum.com/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:36.344748+00:00'
|
||||
|
|
@ -56,9 +66,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.eddie-the-eagle-museum.com/
|
||||
extraction_timestamp: '2025-12-17T09:25:04.153616+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Amsterdam, Noord-Holland'
|
||||
|
|
@ -18,28 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/erotisch-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Amsterdam
|
||||
region: Noord-Holland
|
||||
country: NL
|
||||
address: Oudezijds Achterburgwal 54, 1012 DP Amsterdam
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-EM-erotisch_museum
|
||||
ghcid_current: NL-NH-AMS-M-EM-erotisch_museum
|
||||
ghcid_original: NL-XX-XXX-M-EM-erotisch_museum
|
||||
ghcid_uuid: bb4cab24-67cd-5fd5-81ae-a23a875e15b6
|
||||
ghcid_uuid_sha256: 7e6cc478-4325-8950-8c84-78ec611b00c8
|
||||
ghcid_numeric: 9109872167065323856
|
||||
ghcid_uuid: 5f62f68f-f9fa-5300-8be8-e189faf31ea4
|
||||
ghcid_uuid_sha256: fff75562-9984-8ad1-87c7-cf7a85b16c89
|
||||
ghcid_numeric: 18444304680889912017
|
||||
record_id: b35f2b5c-438b-4f53-98e8-6836db080fdd
|
||||
generation_timestamp: '2025-12-16T21:06:40.810369+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-EM-erotisch_museum
|
||||
ghcid_numeric: 9109872167065323856
|
||||
valid_from: '2025-12-16T21:06:40.810369+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:57:11.842394+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-NH-AMS-M-EM-erotisch_museum
|
||||
ghcid_numeric: 18444304680889912017
|
||||
valid_from: '2025-12-17T09:57:11.842394+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Amsterdam, Noord-Holland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: AMS
|
||||
city_name: Amsterdam
|
||||
region_code: NH
|
||||
region_name: Noord-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:57:11.842394+00:00'
|
||||
source_url: https://erotisch-museum.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:40.810369+00:00'
|
||||
|
|
@ -56,9 +66,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://erotisch-museum.nl/
|
||||
extraction_timestamp: '2025-12-17T09:57:11.842394+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Amsterdam, Noord-Holland'
|
||||
|
|
@ -18,28 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/van-gogh-museum-enterprises-b-v.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Amsterdam
|
||||
region: Noord-Holland
|
||||
country: NL
|
||||
address: Gabriel Metsustraat 8, 1071 EA Amsterdam
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-GMEBV
|
||||
ghcid_current: NL-NH-AMS-M-GMEBV
|
||||
ghcid_original: NL-XX-XXX-M-GMEBV
|
||||
ghcid_uuid: c08ffbd9-798e-5759-99a9-6ebcca90eaf6
|
||||
ghcid_uuid_sha256: d180b682-bda8-8c1b-a59b-23d28bbadf01
|
||||
ghcid_numeric: 15096266623589833755
|
||||
ghcid_uuid: 37b64938-f8fa-5c8e-9491-518df13f6217
|
||||
ghcid_uuid_sha256: b111cb97-78ad-8caa-88f5-ee638ae4422c
|
||||
ghcid_numeric: 12759203070742441130
|
||||
record_id: 1b2f11f2-09a6-4238-9ece-7ecf802f2bf6
|
||||
generation_timestamp: '2025-12-16T21:06:39.631236+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-GMEBV
|
||||
ghcid_numeric: 15096266623589833755
|
||||
valid_from: '2025-12-16T21:06:39.631236+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:57:11.875121+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-NH-AMS-M-GMEBV
|
||||
ghcid_numeric: 12759203070742441130
|
||||
valid_from: '2025-12-17T09:57:11.875121+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Amsterdam, Noord-Holland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: AMS
|
||||
city_name: Amsterdam
|
||||
region_code: NH
|
||||
region_name: Noord-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:57:11.875121+00:00'
|
||||
source_url: https://www.vangoghmuseum.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:39.631236+00:00'
|
||||
|
|
@ -56,9 +66,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.vangoghmuseum.nl/
|
||||
extraction_timestamp: '2025-12-17T09:57:11.875121+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Amsterdam, Noord-Holland'
|
||||
|
|
@ -21,23 +21,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/museum-galerie-rat.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Den Burg
|
||||
region: Noord-Holland
|
||||
country: NL
|
||||
address: Burgwal 20, 1791 Den Burg, Texel
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-MGR
|
||||
ghcid_current: NL-NH-DEB-M-MGR
|
||||
ghcid_original: NL-XX-XXX-M-MGR
|
||||
ghcid_uuid: 429368ac-f508-5f8a-85c9-45a0e2f17ede
|
||||
ghcid_uuid_sha256: e316442c-29a1-85ba-8466-96b219533229
|
||||
ghcid_numeric: 16363341252565001658
|
||||
ghcid_uuid: 181eacab-1bc0-5d2e-9cb3-323d13d4af7d
|
||||
ghcid_uuid_sha256: 62635b76-af7b-8348-a0c4-3e807eb3c11b
|
||||
ghcid_numeric: 7089610803719668552
|
||||
record_id: ac9f145d-7a7a-433d-871a-3f2b6d08b418
|
||||
generation_timestamp: '2025-12-16T21:06:38.459660+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-MGR
|
||||
ghcid_numeric: 16363341252565001658
|
||||
valid_from: '2025-12-17T08:44:26.014402+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:56:09.466192+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-NH-DEB-M-MGR
|
||||
ghcid_numeric: 7089610803719668552
|
||||
valid_from: '2025-12-17T10:56:09.466192+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Den Burg, Noord-Holland
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: DEB
|
||||
city_name: Den Burg
|
||||
region_code: NH
|
||||
region_name: Noord-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:56:09.466192+00:00'
|
||||
source_url: https://www.mapquest.com/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:38.459660+00:00'
|
||||
|
|
@ -54,6 +69,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.mapquest.com/
|
||||
extraction_timestamp: '2025-12-17T10:56:09.466192+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -62,3 +86,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Den Burg, Noord-Holland'
|
||||
|
|
@ -18,28 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/flessenscheepjes-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Enkhuizen
|
||||
region: Noord-Holland
|
||||
country: NL
|
||||
address: Zuiderspui 1, 1601 GN Enkhuizen
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-FM-flessenscheepjes_museum
|
||||
ghcid_current: NL-NH-ENK-M-FM-flessenscheepjes_museum
|
||||
ghcid_original: NL-XX-XXX-M-FM-flessenscheepjes_museum
|
||||
ghcid_uuid: 97c8b3fd-d3fe-56f8-8347-200b16870c5a
|
||||
ghcid_uuid_sha256: ac6ed3ce-d178-8ef6-aa8b-809066354c99
|
||||
ghcid_numeric: 12425101307192844022
|
||||
ghcid_uuid: 9b9ca02a-fdf6-56d6-b459-606fbeb06876
|
||||
ghcid_uuid_sha256: 200ffdc7-eea1-8fc5-8519-21259ce03cf6
|
||||
ghcid_numeric: 2310344169008254917
|
||||
record_id: cbc2e3fa-e583-45d3-a439-9068faa1ad07
|
||||
generation_timestamp: '2025-12-16T21:06:40.027467+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-FM-flessenscheepjes_museum
|
||||
ghcid_numeric: 12425101307192844022
|
||||
valid_from: '2025-12-16T21:06:40.027467+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:25:04.131278+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-NH-ENK-M-FM-flessenscheepjes_museum
|
||||
ghcid_numeric: 2310344169008254917
|
||||
valid_from: '2025-12-17T09:25:04.131278+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Enkhuizen, Noord-Holland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: ENK
|
||||
city_name: Enkhuizen
|
||||
region_code: NH
|
||||
region_name: Noord-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:25:04.131278+00:00'
|
||||
source_url: https://www.flessenscheepjesmuseum.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:40.027467+00:00'
|
||||
|
|
@ -56,9 +66,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.flessenscheepjesmuseum.nl/
|
||||
extraction_timestamp: '2025-12-17T09:25:04.131278+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Enkhuizen, Noord-Holland'
|
||||
|
|
@ -18,28 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/c1000-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Hellendoorn
|
||||
region: Overijssel
|
||||
country: NL
|
||||
address: Katenhorstweg 2, 7447 RN Hellendoorn
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-CM-c1000_museum
|
||||
ghcid_current: NL-OV-HEL-M-CM-c1000_museum
|
||||
ghcid_original: NL-XX-XXX-M-CM-c1000_museum
|
||||
ghcid_uuid: 1ca741dc-8ca8-5e04-a15f-3d12fbcef31d
|
||||
ghcid_uuid_sha256: b92c4170-b851-81f2-8aa3-b844c73b7104
|
||||
ghcid_numeric: 13343111748376396274
|
||||
ghcid_uuid: 23ca8d87-2929-59d3-a67d-9a70e3a30e81
|
||||
ghcid_uuid_sha256: 13ca320c-b106-895a-a8a2-7cf17e62a22b
|
||||
ghcid_numeric: 1426007262107171162
|
||||
record_id: 1428fc8d-3fcf-41a4-b9cf-f000c2cee234
|
||||
generation_timestamp: '2025-12-16T21:06:46.862264+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-CM-c1000_museum
|
||||
ghcid_numeric: 13343111748376396274
|
||||
valid_from: '2025-12-16T21:06:46.862264+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:33:15.763818+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-OV-HEL-M-CM-c1000_museum
|
||||
ghcid_numeric: 1426007262107171162
|
||||
valid_from: '2025-12-17T09:33:15.763818+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Hellendoorn, Overijssel
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: HEL
|
||||
city_name: Hellendoorn
|
||||
region_code: OV
|
||||
region_name: Overijssel
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:33:15.763818+00:00'
|
||||
source_url: https://www.c1000museum.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:46.862264+00:00'
|
||||
|
|
@ -56,9 +66,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.c1000museum.nl/
|
||||
extraction_timestamp: '2025-12-17T09:33:15.763818+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Hellendoorn, Overijssel'
|
||||
|
|
@ -18,23 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/museum-erve-hofman.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Hellendoorn
|
||||
region: Overijssel
|
||||
country: NL
|
||||
address: Hofmanstraat 2, 7447 AS Hellendoorn
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-MEH
|
||||
ghcid_current: NL-OV-HEL-M-MEH
|
||||
ghcid_original: NL-XX-XXX-M-MEH
|
||||
ghcid_uuid: c3289aa2-9498-57b6-b7f9-02e6680143b4
|
||||
ghcid_uuid_sha256: fb2403ee-f058-84a1-8014-d00e7832e0fc
|
||||
ghcid_numeric: 18096593527450940577
|
||||
ghcid_uuid: c48fb5e7-c5cc-544d-a446-cbaf423febc6
|
||||
ghcid_uuid_sha256: f974021f-883b-8c6c-bb8f-18ee6041b691
|
||||
ghcid_numeric: 17974994347195337836
|
||||
record_id: aafc1760-7283-4a18-8aea-ff347f86bcd5
|
||||
generation_timestamp: '2025-12-16T21:06:37.002843+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-MEH
|
||||
ghcid_numeric: 18096593527450940577
|
||||
valid_from: '2025-12-17T08:44:26.021837+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:41:01.945830+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-OV-HEL-M-MEH
|
||||
ghcid_numeric: 17974994347195337836
|
||||
valid_from: '2025-12-17T10:41:01.945830+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Hellendoorn, Overijssel
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: HEL
|
||||
city_name: Hellendoorn
|
||||
region_code: OV
|
||||
region_name: Overijssel
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:41:01.945830+00:00'
|
||||
source_url: https://www.oaldheldern.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:37.002843+00:00'
|
||||
|
|
@ -51,6 +66,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.oaldheldern.nl/
|
||||
extraction_timestamp: '2025-12-17T10:41:01.945830+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -59,3 +83,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Hellendoorn, Overijssel'
|
||||
|
|
@ -21,23 +21,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/museum-averlo-frieswijk-schalkhaar.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Schalkhaar
|
||||
region: Overijssel
|
||||
country: NL
|
||||
address: Frieswijkerweg 7, 7433 RB Schalkhaar
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-MAFS
|
||||
ghcid_current: NL-OV-SCK-M-MAFS
|
||||
ghcid_original: NL-XX-XXX-M-MAFS
|
||||
ghcid_uuid: a0095672-8164-5162-bbeb-1326d8c47c2e
|
||||
ghcid_uuid_sha256: 418c9005-af59-8721-aff7-09d320097bdc
|
||||
ghcid_numeric: 4723308463295911713
|
||||
ghcid_uuid: 55dcd23c-2bde-524f-b846-b568f848c489
|
||||
ghcid_uuid_sha256: 65d5a742-39a5-845f-81de-a188a31e9760
|
||||
ghcid_numeric: 7337955070746895455
|
||||
record_id: cf3d0896-1d2a-49d1-94a2-a5962c995d76
|
||||
generation_timestamp: '2025-12-16T21:06:45.760441+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-MAFS
|
||||
ghcid_numeric: 4723308463295911713
|
||||
valid_from: '2025-12-17T08:44:26.071718+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:56:09.477040+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-OV-SCK-M-MAFS
|
||||
ghcid_numeric: 7337955070746895455
|
||||
valid_from: '2025-12-17T10:56:09.477040+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Schalkhaar, Overijssel
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: SCK
|
||||
city_name: Schalkhaar
|
||||
region_code: OV
|
||||
region_name: Overijssel
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:56:09.477040+00:00'
|
||||
source_url: https://www.museum-afs.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:45.760441+00:00'
|
||||
|
|
@ -54,6 +69,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.museum-afs.nl/
|
||||
extraction_timestamp: '2025-12-17T10:56:09.477040+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -62,3 +86,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Schalkhaar, Overijssel'
|
||||
|
|
@ -21,23 +21,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/museum-dijkmagazijn-de-heul.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Schalkwijk
|
||||
region: Utrecht
|
||||
country: NL
|
||||
address: Provincialeweg 70, 3998 JK Schalkwijk
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-MDH
|
||||
ghcid_current: NL-UT-SCH-M-MDH
|
||||
ghcid_original: NL-XX-XXX-M-MDH
|
||||
ghcid_uuid: 9a5ac714-4ad1-59f8-9262-72cc6d85edeb
|
||||
ghcid_uuid_sha256: f2883142-38d3-88b3-87c8-2eaa798c5d80
|
||||
ghcid_numeric: 17476272514502215859
|
||||
ghcid_uuid: c9455255-b55c-5df9-ac7d-b2dcebd80af4
|
||||
ghcid_uuid_sha256: 9fc9707e-9dda-8a57-b694-011dc66da335
|
||||
ghcid_numeric: 11513857611465906775
|
||||
record_id: a8300219-47d6-4b17-b9da-5e8e5e96ce49
|
||||
generation_timestamp: '2025-12-16T21:06:45.958015+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-MDH
|
||||
ghcid_numeric: 17476272514502215859
|
||||
valid_from: '2025-12-17T08:44:26.064337+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:41:01.934919+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-UT-SCH-M-MDH
|
||||
ghcid_numeric: 11513857611465906775
|
||||
valid_from: '2025-12-17T10:41:01.934919+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Schalkwijk, Utrecht
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: SCH
|
||||
city_name: Schalkwijk
|
||||
region_code: UT
|
||||
region_name: Utrecht
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:41:01.934919+00:00'
|
||||
source_url: https://www.museuminschalkwijk.nl/contact.html
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:45.958015+00:00'
|
||||
|
|
@ -54,6 +69,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.museuminschalkwijk.nl/contact.html
|
||||
extraction_timestamp: '2025-12-17T10:41:01.934919+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -62,3 +86,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Schalkwijk, Utrecht'
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
custodian_name:
|
||||
emic_name: Ajax Museum
|
||||
emic_name_source: linkedin
|
||||
institution_type:
|
||||
- M
|
||||
linkedin_enrichment:
|
||||
linkedin_url: https://www.linkedin.com/company/ajax-museum
|
||||
linkedin_slug: ajax-museum
|
||||
industry: Museums, Historical Sites, and Zoos
|
||||
website: https://lnkd.in/ezz5r9nF
|
||||
follower_count: 5,707
|
||||
staff_count: 1
|
||||
heritage_staff_count: 0
|
||||
heritage_staff: []
|
||||
enrichment_timestamp: '2025-12-16T21:06:39.152742+00:00'
|
||||
provenance:
|
||||
source: linkedin_company_scrape
|
||||
original_file: data/custodian/linkedin/ajax-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
country: NL
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-AM
|
||||
ghcid_original: NL-XX-XXX-M-AM
|
||||
ghcid_uuid: 44e6a20c-0e8e-509c-a126-749148b44831
|
||||
ghcid_uuid_sha256: f2392c8d-320f-8be0-9a91-2e8d6d706953
|
||||
ghcid_numeric: 17454030815792942048
|
||||
record_id: 1bbc810e-b76d-4d89-8351-99e851dae39f
|
||||
generation_timestamp: '2025-12-16T21:06:39.152742+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-AM
|
||||
ghcid_numeric: 17454030815792942048
|
||||
valid_from: '2025-12-16T21:06:39.152742+00:00'
|
||||
valid_to: null
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
country_code: NL
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:39.152742+00:00'
|
||||
sources:
|
||||
linkedin:
|
||||
- source_type: linkedin_company_profile
|
||||
data_tier: TIER_4_INFERRED
|
||||
source_file: data/custodian/linkedin/ajax-museum.yaml
|
||||
extraction_timestamp: '2025-12-16T21:06:39.152742+00:00'
|
||||
claims_extracted:
|
||||
- name
|
||||
- industry
|
||||
- location
|
||||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
custodian_name:
|
||||
emic_name: Hollands Kaas Museum
|
||||
emic_name_source: linkedin
|
||||
institution_type:
|
||||
- M
|
||||
linkedin_enrichment:
|
||||
linkedin_url: https://www.linkedin.com/company/hollands-kaas-museum
|
||||
linkedin_slug: hollands-kaas-museum
|
||||
industry: Museums, Historical Sites, and Zoos
|
||||
website: null
|
||||
follower_count: 5,618,343
|
||||
staff_count: 1
|
||||
heritage_staff_count: 0
|
||||
heritage_staff: []
|
||||
enrichment_timestamp: '2025-12-16T21:06:41.385742+00:00'
|
||||
provenance:
|
||||
source: linkedin_company_scrape
|
||||
original_file: data/custodian/linkedin/hollands-kaas-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
country: NL
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-HKM
|
||||
ghcid_original: NL-XX-XXX-M-HKM
|
||||
ghcid_uuid: b936825d-f6f5-5f75-9693-59ffe0983b6e
|
||||
ghcid_uuid_sha256: bfd2770a-451a-80ec-8095-a0315a56c4eb
|
||||
ghcid_numeric: 13822241092346228972
|
||||
record_id: b51e4aff-c0c7-40da-b064-3f308125b650
|
||||
generation_timestamp: '2025-12-16T21:06:41.385742+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-HKM
|
||||
ghcid_numeric: 13822241092346228972
|
||||
valid_from: '2025-12-16T21:06:41.385742+00:00'
|
||||
valid_to: null
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
country_code: NL
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:41.385742+00:00'
|
||||
sources:
|
||||
linkedin:
|
||||
- source_type: linkedin_company_profile
|
||||
data_tier: TIER_4_INFERRED
|
||||
source_file: data/custodian/linkedin/hollands-kaas-museum.yaml
|
||||
extraction_timestamp: '2025-12-16T21:06:41.385742+00:00'
|
||||
claims_extracted:
|
||||
- name
|
||||
- industry
|
||||
- location
|
||||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
|
|
@ -18,28 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/het-kaas-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Bodegraven
|
||||
region: Zuid-Holland
|
||||
country: NL
|
||||
address: Marktstraat 1, 2411 BE Bodegraven
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-KM
|
||||
ghcid_current: NL-ZH-BOD-M-KM
|
||||
ghcid_original: NL-XX-XXX-M-KM
|
||||
ghcid_uuid: 7846a559-0853-5206-b4b9-466f633e8f37
|
||||
ghcid_uuid_sha256: cb461ec7-5b09-808f-9bc2-1c805d2534dd
|
||||
ghcid_numeric: 14647428679643816079
|
||||
ghcid_uuid: a36d9483-7116-5e2a-97fe-182b3517f1ef
|
||||
ghcid_uuid_sha256: 2c53af73-8d88-8c89-b68d-cc86cc3491bb
|
||||
ghcid_numeric: 3194089471566462089
|
||||
record_id: 91fffa60-6f4f-4cb9-9579-385811b087a6
|
||||
generation_timestamp: '2025-12-16T21:06:36.526045+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-KM
|
||||
ghcid_numeric: 14647428679643816079
|
||||
valid_from: '2025-12-16T21:06:36.526045+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T10:08:05.564102+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-ZH-BOD-M-KM
|
||||
ghcid_numeric: 3194089471566462089
|
||||
valid_from: '2025-12-17T10:08:05.564102+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Bodegraven, Zuid-Holland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: BOD
|
||||
city_name: Bodegraven
|
||||
region_code: ZH
|
||||
region_name: Zuid-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T10:08:05.564102+00:00'
|
||||
source_url: https://www.hetkaasmuseum.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:36.526045+00:00'
|
||||
|
|
@ -56,9 +66,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.hetkaasmuseum.nl/
|
||||
extraction_timestamp: '2025-12-17T10:08:05.564102+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Bodegraven, Zuid-Holland'
|
||||
|
|
@ -16,12 +16,14 @@ linkedin_enrichment:
|
|||
headline: ''
|
||||
heritage_type: M
|
||||
- name: Oumaima Hajri أميمة حاجري
|
||||
headline: AI Ethicist/Data Scientist | Sr. International Advisor, Dutch DPA | AI Ethics & Society, University of Cambridge
|
||||
| Elsevier 30 under 30 | Public speaker
|
||||
headline: AI Ethicist/Data Scientist | Sr. International Advisor, Dutch DPA |
|
||||
AI Ethics & Society, University of Cambridge | Elsevier 30 under 30 | Public
|
||||
speaker
|
||||
linkedin_url: https://www.linkedin.com/in/oumaima-hajri
|
||||
heritage_type: S
|
||||
- name: Annemarie Bloemen-Patberg
|
||||
headline: Senior Strategic Advisor AI law @Dutch DCA / AI for Business @Oxford University Saïd Business School
|
||||
headline: Senior Strategic Advisor AI law @Dutch DCA / AI for Business @Oxford
|
||||
University Saïd Business School
|
||||
linkedin_url: https://www.linkedin.com/in/annemarie-bloemen-patberg-2203bb5
|
||||
heritage_type: E
|
||||
- name: Mies Beljaars - Snellen van Vollenhoven
|
||||
|
|
@ -60,7 +62,8 @@ linkedin_enrichment:
|
|||
linkedin_url: https://www.linkedin.com/in/samira-farouk-0b2520b6
|
||||
heritage_type: R
|
||||
- name: Emma-Julia Vuijk
|
||||
headline: Double Master Student Law and Technology & International and European Law - Werkstudent Autoriteit Persoonsgegevens
|
||||
headline: Double Master Student Law and Technology & International and European
|
||||
Law - Werkstudent Autoriteit Persoonsgegevens
|
||||
linkedin_url: https://www.linkedin.com/in/emmajuliavuijk
|
||||
heritage_type: E
|
||||
- name: Melike Yeniay
|
||||
|
|
@ -140,7 +143,8 @@ linkedin_enrichment:
|
|||
linkedin_url: https://www.linkedin.com/in/mr-anne-loes-van-den-brand-valk-73583557
|
||||
heritage_type: R
|
||||
- name: Ruqaya Zaki
|
||||
headline: BSc Law Student at Leiden University | Judicial supporter @ Dutch Data Protection Authority
|
||||
headline: BSc Law Student at Leiden University | Judicial supporter @ Dutch Data
|
||||
Protection Authority
|
||||
linkedin_url: https://www.linkedin.com/in/ruqaya-zaki
|
||||
heritage_type: E
|
||||
- name: Tessa van Wickevoort Crommelin-van Velzen
|
||||
|
|
@ -160,28 +164,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/autoriteit-persoonsgegevens-ap-dutch-dpa.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Den Haag
|
||||
region: Zuid-Holland
|
||||
country: NL
|
||||
address: Bezuidenhoutseweg 30, 2594 AV Den Haag
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-E-APADD
|
||||
ghcid_current: NL-ZH-DHA-E-APADD
|
||||
ghcid_original: NL-XX-XXX-E-APADD
|
||||
ghcid_uuid: d5ccddb8-f488-5cf8-98e1-bd7343c9a0fd
|
||||
ghcid_uuid_sha256: c6433cf3-6117-812b-ae67-99c499d6eb42
|
||||
ghcid_numeric: 14286329458952962347
|
||||
ghcid_uuid: 370a68bc-969d-53a9-811a-ee3b1d2ef12b
|
||||
ghcid_uuid_sha256: 237c1e87-0bb9-8f2e-a36d-36d950cbe0a3
|
||||
ghcid_numeric: 2556952253805727534
|
||||
record_id: acf11347-204a-40e8-8098-6ad5ea33e6f3
|
||||
generation_timestamp: '2025-12-16T21:06:42.375776+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-E-APADD
|
||||
ghcid_numeric: 14286329458952962347
|
||||
valid_from: '2025-12-16T21:06:42.375776+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:57:11.740348+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-ZH-DHA-E-APADD
|
||||
ghcid_numeric: 2556952253805727534
|
||||
valid_from: '2025-12-17T09:57:11.740348+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Den Haag, Zuid-Holland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: DHA
|
||||
city_name: Den Haag
|
||||
region_code: ZH
|
||||
region_name: Zuid-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:57:11.740348+00:00'
|
||||
source_url: https://autoriteitpersoonsgegevens.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:42.375776+00:00'
|
||||
|
|
@ -198,9 +212,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://autoriteitpersoonsgegevens.nl/
|
||||
extraction_timestamp: '2025-12-17T09:57:11.740348+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Den Haag, Zuid-Holland'
|
||||
|
|
@ -35,7 +35,8 @@ linkedin_enrichment:
|
|||
linkedin_url: https://www.linkedin.com/in/anna-holties-771939152
|
||||
heritage_type: M
|
||||
- name: Lianne Sleutjes
|
||||
headline: Woordvoerder Inspectie Justitie en Veiligheid at Ministerie van Justitie en Veiligheid
|
||||
headline: Woordvoerder Inspectie Justitie en Veiligheid at Ministerie van Justitie
|
||||
en Veiligheid
|
||||
linkedin_url: https://www.linkedin.com/in/lianne-sleutjes-a805338
|
||||
heritage_type: O
|
||||
- name: Daman Jafra
|
||||
|
|
@ -43,7 +44,8 @@ linkedin_enrichment:
|
|||
linkedin_url: https://www.linkedin.com/in/daman-jafra-32b1bb236
|
||||
heritage_type: E
|
||||
- name: Selene Fagel
|
||||
headline: Coördinerend specialistisch inspecteur (PhD) Familie inspecteur Vertrouwenspersoon Inspectie Justitie en Veiligheid
|
||||
headline: Coördinerend specialistisch inspecteur (PhD) Familie inspecteur Vertrouwenspersoon
|
||||
Inspectie Justitie en Veiligheid
|
||||
linkedin_url: https://www.linkedin.com/in/selene-fagel-1287645
|
||||
heritage_type: E
|
||||
- name: Kirsten Van Noort
|
||||
|
|
@ -51,7 +53,8 @@ linkedin_enrichment:
|
|||
linkedin_url: https://www.linkedin.com/in/kirsten-van-noort-4385812b7
|
||||
heritage_type: M
|
||||
- name: Wieger van der Heide
|
||||
headline: Senior inspecteur bij het Ministerie van VenJ / Inspectie Veiligheid en Justitie
|
||||
headline: Senior inspecteur bij het Ministerie van VenJ / Inspectie Veiligheid
|
||||
en Justitie
|
||||
linkedin_url: https://www.linkedin.com/in/wieger-van-der-heide-91857478
|
||||
heritage_type: O
|
||||
- name: Madhu R.
|
||||
|
|
@ -72,28 +75,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/inspectie-justitie-en-veiligheid.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Den Haag
|
||||
region: Zuid-Holland
|
||||
country: NL
|
||||
address: Hoge Nieuwstraat 8, 2514 EL Den Haag
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-E-IJV
|
||||
ghcid_current: NL-ZH-DHA-E-IJV
|
||||
ghcid_original: NL-XX-XXX-E-IJV
|
||||
ghcid_uuid: f17d729f-be1d-544f-846f-893be1caf69c
|
||||
ghcid_uuid_sha256: 236e4c6e-f6da-885a-9aa7-2c0be92d8a2a
|
||||
ghcid_numeric: 2553062078237304922
|
||||
ghcid_uuid: f7fc9fb4-146c-51e9-a5e8-c8fea8c7c34c
|
||||
ghcid_uuid_sha256: 1251c359-7bcf-8c90-8a5e-1718a915cafe
|
||||
ghcid_numeric: 1320050954892979344
|
||||
record_id: 4c890a50-1738-45fd-b173-a38eb7fffa3f
|
||||
generation_timestamp: '2025-12-16T21:06:47.359970+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-E-IJV
|
||||
ghcid_numeric: 2553062078237304922
|
||||
valid_from: '2025-12-16T21:06:47.359970+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:57:11.821589+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-ZH-DHA-E-IJV
|
||||
ghcid_numeric: 1320050954892979344
|
||||
valid_from: '2025-12-17T09:57:11.821589+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Den Haag, Zuid-Holland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: DHA
|
||||
city_name: Den Haag
|
||||
region_code: ZH
|
||||
region_name: Zuid-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:57:11.821589+00:00'
|
||||
source_url: https://www.inspectie-jenv.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:47.359970+00:00'
|
||||
|
|
@ -110,9 +123,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.inspectie-jenv.nl/
|
||||
extraction_timestamp: '2025-12-17T09:57:11.821589+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Den Haag, Zuid-Holland'
|
||||
|
|
@ -30,7 +30,8 @@ linkedin_enrichment:
|
|||
linkedin_url: https://www.linkedin.com/in/pieter-bots-78140a22
|
||||
heritage_type: E
|
||||
- name: Annet Pasveer
|
||||
headline: Architectuurhistoricus | Senior beleidsadviseur Monumenten & Archeologie bij Raad voor Cultuur
|
||||
headline: Architectuurhistoricus | Senior beleidsadviseur Monumenten & Archeologie
|
||||
bij Raad voor Cultuur
|
||||
linkedin_url: https://www.linkedin.com/in/annetpasveer
|
||||
heritage_type: R
|
||||
- name: Mirjam Sneeuwloper
|
||||
|
|
@ -38,7 +39,8 @@ linkedin_enrichment:
|
|||
linkedin_url: https://www.linkedin.com/in/mirjamsneeuwloper
|
||||
heritage_type: M
|
||||
- name: Kiran Sukul
|
||||
headline: Sr. beleidsadviseur musea Raad voor Cultuur • Kunst, Cultuur & Erfgoed • Governance
|
||||
headline: Sr. beleidsadviseur musea Raad voor Cultuur • Kunst, Cultuur & Erfgoed
|
||||
• Governance
|
||||
linkedin_url: https://www.linkedin.com/in/sukul13881
|
||||
heritage_type: M
|
||||
- name: Ronald Nijboer
|
||||
|
|
@ -55,7 +57,8 @@ linkedin_enrichment:
|
|||
linkedin_url: https://www.linkedin.com/in/philippine-jenster-15567156
|
||||
heritage_type: L
|
||||
- name: Kirsten de Graaff
|
||||
headline: Specialist/adviseur cultuureducatie, cultuurmakers en woordkunst. Beleidsmatig, strategisch en uitvoerend.
|
||||
headline: Specialist/adviseur cultuureducatie, cultuurmakers en woordkunst. Beleidsmatig,
|
||||
strategisch en uitvoerend.
|
||||
linkedin_url: https://www.linkedin.com/in/kirsten-de-graaff-76467a2
|
||||
heritage_type: E
|
||||
- name: Marc de Beyer
|
||||
|
|
@ -66,7 +69,8 @@ linkedin_enrichment:
|
|||
headline: Bureau of the Dutch Council for Culture, The Hague
|
||||
linkedin_url: https://www.linkedin.com/in/marieke-van-ommeren-b6206438
|
||||
- name: Christien Bok
|
||||
headline: creatieve initiator van innovaties voor veilig en toegankelijk onderwijs van hoge kwaliteit
|
||||
headline: creatieve initiator van innovaties voor veilig en toegankelijk onderwijs
|
||||
van hoge kwaliteit
|
||||
linkedin_url: https://www.linkedin.com/in/christienbok
|
||||
heritage_type: E
|
||||
- name: Lejo Schenk
|
||||
|
|
@ -84,7 +88,8 @@ linkedin_enrichment:
|
|||
linkedin_url: https://www.linkedin.com/in/evakleeman
|
||||
heritage_type: M
|
||||
- name: Judi Mesman
|
||||
headline: Distinguished professor societal responsibility and impact, author of Leadership in color (Leiderschap in kleur)
|
||||
headline: Distinguished professor societal responsibility and impact, author of
|
||||
Leadership in color (Leiderschap in kleur)
|
||||
linkedin_url: https://www.linkedin.com/in/judi-mesman-64279b140
|
||||
heritage_type: E
|
||||
- name: Madelon Van Wandelen
|
||||
|
|
@ -125,7 +130,8 @@ linkedin_enrichment:
|
|||
linkedin_url: https://www.linkedin.com/in/gwen-parry-7923ba13b
|
||||
heritage_type: R
|
||||
- name: Femke van Hest
|
||||
headline: Strategisch adviseur cultuur gemeente ‘s-Hertogenbosch | Adviseur Raad voor Cultuur
|
||||
headline: Strategisch adviseur cultuur gemeente ‘s-Hertogenbosch | Adviseur Raad
|
||||
voor Cultuur
|
||||
linkedin_url: https://www.linkedin.com/in/femkevanhest
|
||||
heritage_type: O
|
||||
- name: Ellen Hardy
|
||||
|
|
@ -146,8 +152,8 @@ linkedin_enrichment:
|
|||
headline: directeur-bestuurder bij Cultuurkwadraat / directeur Cultuurconsortium
|
||||
linkedin_url: https://www.linkedin.com/in/josje-de-regt-0a018b52
|
||||
- name: Wilbert Mutsaers
|
||||
headline: CEO/Algemeen Directeur Agents After All | Kroonlid Raad voor Cultuur | ex-Spotify, Mojo/Live Nation, NPO 3FM/FunX,
|
||||
Universal Music & Sony Music
|
||||
headline: CEO/Algemeen Directeur Agents After All | Kroonlid Raad voor Cultuur
|
||||
| ex-Spotify, Mojo/Live Nation, NPO 3FM/FunX, Universal Music & Sony Music
|
||||
linkedin_url: https://www.linkedin.com/in/wilbert-mutsaers-22256a
|
||||
- name: Lies Wijnterp PhD
|
||||
headline: Researcher, cultural manager
|
||||
|
|
@ -160,11 +166,13 @@ linkedin_enrichment:
|
|||
headline: Zakelijk leider in de culturele sector | Lerende bij De Metselarij
|
||||
linkedin_url: https://www.linkedin.com/in/elisiape%C3%A7as
|
||||
- name: Otto Berg
|
||||
headline: Auditor bij CBCT - Certificeringsorganisatie Bibliotheekwerk, Cultuur en Taal
|
||||
headline: Auditor bij CBCT - Certificeringsorganisatie Bibliotheekwerk, Cultuur
|
||||
en Taal
|
||||
linkedin_url: https://www.linkedin.com/in/otto-berg-30686410
|
||||
heritage_type: L
|
||||
- name: Vanessa Lann
|
||||
headline: Head of Composition for Film and Theatre, at ArtEZ University of the Arts
|
||||
headline: Head of Composition for Film and Theatre, at ArtEZ University of the
|
||||
Arts
|
||||
linkedin_url: https://www.linkedin.com/in/vanessa-lann-a51724276
|
||||
heritage_type: E
|
||||
- name: Ilonka Kolthof
|
||||
|
|
@ -179,7 +187,8 @@ linkedin_enrichment:
|
|||
linkedin_url: https://www.linkedin.com/in/joepvossebeld
|
||||
heritage_type: M
|
||||
- name: Edo Righini
|
||||
headline: Director of de Doelen; Crown Member of the Dutch Council for Culture. EMBA HEC Paris
|
||||
headline: Director of de Doelen; Crown Member of the Dutch Council for Culture.
|
||||
EMBA HEC Paris
|
||||
linkedin_url: https://www.linkedin.com/in/edo-righini-7b303129
|
||||
- name: Marc Versteeg
|
||||
headline: voor de culturele sector
|
||||
|
|
@ -254,28 +263,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/raad-voor-cultuur.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Den Haag
|
||||
region: Zuid-Holland
|
||||
country: NL
|
||||
address: Prins Willem Alexanderhof 20, 2595 BE Den Haag
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-E-RC
|
||||
ghcid_current: NL-ZH-DHA-E-RC
|
||||
ghcid_original: NL-XX-XXX-E-RC
|
||||
ghcid_uuid: de024a36-abbe-53d1-b137-9a2518941e40
|
||||
ghcid_uuid_sha256: dcb4a290-0d37-8352-a637-cacbd563e68f
|
||||
ghcid_numeric: 15903514923732800338
|
||||
ghcid_uuid: 1c22d683-b4d0-5fc4-afec-ad60ef82995f
|
||||
ghcid_uuid_sha256: 7d58a26e-c63a-8a79-8643-8117147c1b10
|
||||
ghcid_numeric: 9032147649347328633
|
||||
record_id: 716f5d24-baa6-409c-a708-9c07f0241568
|
||||
generation_timestamp: '2025-12-16T21:06:42.755062+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-E-RC
|
||||
ghcid_numeric: 15903514923732800338
|
||||
valid_from: '2025-12-16T21:06:42.755062+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:57:11.795457+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-ZH-DHA-E-RC
|
||||
ghcid_numeric: 9032147649347328633
|
||||
valid_from: '2025-12-17T09:57:11.795457+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Den Haag, Zuid-Holland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: DHA
|
||||
city_name: Den Haag
|
||||
region_code: ZH
|
||||
region_name: Zuid-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:57:11.795457+00:00'
|
||||
source_url: https://raadvoorcultuur.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:42.755062+00:00'
|
||||
|
|
@ -292,9 +311,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://raadvoorcultuur.nl/
|
||||
extraction_timestamp: '2025-12-17T09:57:11.795457+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Den Haag, Zuid-Holland'
|
||||
|
|
@ -28,7 +28,8 @@ linkedin_enrichment:
|
|||
linkedin_url: https://www.linkedin.com/in/jannekevanasperen
|
||||
heritage_type: M
|
||||
- name: Tamar van Riessen
|
||||
headline: PhD-Candidate Art History & Visual Culture | Curator 17th-Century Paintings | Art Historian
|
||||
headline: PhD-Candidate Art History & Visual Culture | Curator 17th-Century Paintings
|
||||
| Art Historian
|
||||
linkedin_url: https://www.linkedin.com/in/tamar-van-riessen-959248177
|
||||
heritage_type: M
|
||||
- name: Femke van Leeuwen-Jansen
|
||||
|
|
@ -44,28 +45,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/codart.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Den Haag
|
||||
region: Zuid-Holland
|
||||
country: NL
|
||||
address: Koninginnegracht 15, 2514 AB Den Haag
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-C-codart
|
||||
ghcid_current: NL-ZH-DHA-M-C-codart
|
||||
ghcid_original: NL-XX-XXX-M-C-codart
|
||||
ghcid_uuid: 8cf5c2ed-b8fe-523f-89e5-e996147f2943
|
||||
ghcid_uuid_sha256: 9d0c059d-9b9e-8965-9c08-705eef0556ee
|
||||
ghcid_numeric: 11316426138154068325
|
||||
ghcid_uuid: 932104a7-6e72-57c3-a4d1-f3c6218f277e
|
||||
ghcid_uuid_sha256: 11cb3492-54b6-8435-ab01-46e83370b99f
|
||||
ghcid_numeric: 1282176322008179765
|
||||
record_id: 0afab665-c130-445d-a2e0-4c79f1ed1759
|
||||
generation_timestamp: '2025-12-16T21:06:44.128751+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-C-codart
|
||||
ghcid_numeric: 11316426138154068325
|
||||
valid_from: '2025-12-16T21:06:44.128751+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:33:15.796477+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-ZH-DHA-M-C-codart
|
||||
ghcid_numeric: 1282176322008179765
|
||||
valid_from: '2025-12-17T09:33:15.796477+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Den Haag, Zuid-Holland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: DHA
|
||||
city_name: Den Haag
|
||||
region_code: ZH
|
||||
region_name: Zuid-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:33:15.796477+00:00'
|
||||
source_url: https://www.codart.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:44.128751+00:00'
|
||||
|
|
@ -82,9 +93,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.codart.nl/
|
||||
extraction_timestamp: '2025-12-17T09:33:15.796477+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Den Haag, Zuid-Holland'
|
||||
|
|
@ -103,23 +103,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/cultuurschakel.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Den Haag
|
||||
region: Zuid-Holland
|
||||
country: NL
|
||||
address: Den Haag, Netherlands
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-C
|
||||
ghcid_current: NL-ZH-DHA-M-C
|
||||
ghcid_original: NL-XX-XXX-M-C
|
||||
ghcid_uuid: a1434c1d-73d9-5630-a0b7-2f2a87bdcdec
|
||||
ghcid_uuid_sha256: e5e6fc5b-a7f2-8ea3-8625-eeab02101858
|
||||
ghcid_numeric: 16566205749918891683
|
||||
ghcid_uuid: 65f3c875-a26d-5d51-acdb-f534a44aeecc
|
||||
ghcid_uuid_sha256: 426f1a86-c2b5-8d14-a981-320d7e1211da
|
||||
ghcid_numeric: 4787074095012740372
|
||||
record_id: a97e198d-10ca-4595-9b13-073a8b741f07
|
||||
generation_timestamp: '2025-12-16T21:06:38.738101+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-C
|
||||
ghcid_numeric: 16566205749918891683
|
||||
valid_from: '2025-12-17T08:44:25.966336+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:33:15.877796+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-ZH-DHA-M-C
|
||||
ghcid_numeric: 4787074095012740372
|
||||
valid_from: '2025-12-17T09:33:15.877796+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Den Haag, Zuid-Holland
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: DHA
|
||||
city_name: Den Haag
|
||||
region_code: ZH
|
||||
region_name: Zuid-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:33:15.877796+00:00'
|
||||
source_url: https://www.cultuurschakel.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:38.738101+00:00'
|
||||
|
|
@ -136,6 +151,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.cultuurschakel.nl/
|
||||
extraction_timestamp: '2025-12-17T09:33:15.877796+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -144,3 +168,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Den Haag, Zuid-Holland'
|
||||
|
|
@ -18,28 +18,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/kresse-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Gouda
|
||||
region: Zuid-Holland
|
||||
country: NL
|
||||
address: Achter de Kerk 14, 2801 JX Gouda
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-KM-kresse_museum
|
||||
ghcid_current: NL-ZH-GOU-M-KM-kresse_museum
|
||||
ghcid_original: NL-XX-XXX-M-KM-kresse_museum
|
||||
ghcid_uuid: 6b8e1a47-41dc-5654-9b67-84c813805af0
|
||||
ghcid_uuid_sha256: b0d1df63-6965-8cbe-8184-25ee491660e4
|
||||
ghcid_numeric: 12741210438870891710
|
||||
ghcid_uuid: f4fe63e9-9e94-5cbd-83b9-4744f5262e8b
|
||||
ghcid_uuid_sha256: 8c0997a3-e194-8d2d-9beb-882f3022388a
|
||||
ghcid_numeric: 10090763170220346669
|
||||
record_id: 3c3397e5-cde3-4cd9-91e8-4b55728a88af
|
||||
generation_timestamp: '2025-12-16T21:06:43.981200+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-KM-kresse_museum
|
||||
ghcid_numeric: 12741210438870891710
|
||||
valid_from: '2025-12-16T21:06:43.981200+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:57:11.866127+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-ZH-GOU-M-KM-kresse_museum
|
||||
ghcid_numeric: 10090763170220346669
|
||||
valid_from: '2025-12-17T09:57:11.866127+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Gouda, Zuid-Holland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: GOU
|
||||
city_name: Gouda
|
||||
region_code: ZH
|
||||
region_name: Zuid-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:57:11.866127+00:00'
|
||||
source_url: https://www.kressemuseum.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:43.981200+00:00'
|
||||
|
|
@ -56,9 +66,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.kressemuseum.nl/
|
||||
extraction_timestamp: '2025-12-17T09:57:11.866127+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Gouda, Zuid-Holland'
|
||||
|
|
@ -25,23 +25,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/atlantikwall-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: null
|
||||
region: null
|
||||
city: Noordwijk
|
||||
region: Zuid-Holland
|
||||
country: NL
|
||||
address: Noordwijk, Zuid-Holland, Netherlands
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-AM-atlantikwall_museum
|
||||
ghcid_current: NL-ZH-NRW-M-AM-atlantikwall_museum
|
||||
ghcid_original: NL-XX-XXX-M-AM-atlantikwall_museum
|
||||
ghcid_uuid: 73fb4ed0-8683-5f49-871a-7e38d3876855
|
||||
ghcid_uuid_sha256: 5203625a-77fe-8dd0-baa4-1154287a2d72
|
||||
ghcid_numeric: 5909675276739976656
|
||||
ghcid_uuid: af7ac4a4-ffe3-5edc-850f-b31a18c67b95
|
||||
ghcid_uuid_sha256: 5b8a41e9-fc43-8522-80bc-a47b6e5ed0a6
|
||||
ghcid_numeric: 6596157077453010210
|
||||
record_id: e0c12c4e-b088-48d4-a5dd-d2a7a73ca493
|
||||
generation_timestamp: '2025-12-16T21:06:41.396388+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-AM-atlantikwall_museum
|
||||
ghcid_numeric: 5909675276739976656
|
||||
valid_from: '2025-12-17T08:44:26.007952+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:25:04.226988+00:00'
|
||||
reason: Reverted incorrect LinkedIn HTML extraction - original XX-XXX restored
|
||||
- ghcid: NL-ZH-NRW-M-AM-atlantikwall_museum
|
||||
ghcid_numeric: 6596157077453010210
|
||||
valid_from: '2025-12-17T09:25:04.226988+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Noordwijk, Zuid-Holland
|
||||
location_resolution:
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: NRW
|
||||
city_name: Noordwijk
|
||||
region_code: ZH
|
||||
region_name: Zuid-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:25:04.226988+00:00'
|
||||
source_url: https://www.atlantikwallmuseum.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:41.396388+00:00'
|
||||
|
|
@ -58,6 +73,15 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://www.atlantikwallmuseum.nl/
|
||||
extraction_timestamp: '2025-12-17T09:25:04.226988+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
|
|
@ -66,3 +90,4 @@ provenance:
|
|||
- 'Location resolution method: UNRESOLVED'
|
||||
- Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction
|
||||
was extracting wrong company's data
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Noordwijk, Zuid-Holland'
|
||||
|
|
@ -43,28 +43,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/moslim-archief.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Rotterdam
|
||||
region: Zuid-Holland
|
||||
country: NL
|
||||
address: Rotterdam, Netherlands
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-A-MA
|
||||
ghcid_current: NL-ZH-ROT-A-MA
|
||||
ghcid_original: NL-XX-XXX-A-MA
|
||||
ghcid_uuid: 7215318b-74db-57ce-9921-09db2d6b8e20
|
||||
ghcid_uuid_sha256: 50024496-b73c-85c0-938d-b77e01e80c85
|
||||
ghcid_numeric: 5765245887097644480
|
||||
ghcid_uuid: 18570ab5-c0f9-5bb4-8e4c-11e35cfdebb8
|
||||
ghcid_uuid_sha256: 92b222fc-d21c-8eb2-8ff4-01400305db5e
|
||||
ghcid_numeric: 10570549744644181682
|
||||
record_id: d2e14ba0-55d0-42c1-b324-1b4b6226d836
|
||||
generation_timestamp: '2025-12-16T21:06:36.624598+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-A-MA
|
||||
ghcid_numeric: 5765245887097644480
|
||||
valid_from: '2025-12-16T21:06:36.624598+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:33:15.780333+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-ZH-ROT-A-MA
|
||||
ghcid_numeric: 10570549744644181682
|
||||
valid_from: '2025-12-17T09:33:15.780333+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Rotterdam, Zuid-Holland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: ROT
|
||||
city_name: Rotterdam
|
||||
region_code: ZH
|
||||
region_name: Zuid-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:33:15.780333+00:00'
|
||||
source_url: https://moslimarchief.nl/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:36.624598+00:00'
|
||||
|
|
@ -81,9 +91,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://moslimarchief.nl/
|
||||
extraction_timestamp: '2025-12-17T09:33:15.780333+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Rotterdam, Zuid-Holland'
|
||||
|
|
@ -21,28 +21,38 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/dutch-pinball-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
city: Rotterdam
|
||||
region: Zuid-Holland
|
||||
country: NL
|
||||
address: Voorhaven 17, 3024 RC Rotterdam
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-DPM
|
||||
ghcid_current: NL-ZH-ROT-M-DPM
|
||||
ghcid_original: NL-XX-XXX-M-DPM
|
||||
ghcid_uuid: 9c34f38d-168a-5223-9726-799e16461c0c
|
||||
ghcid_uuid_sha256: 17c1e31a-45c8-8454-8fa4-db8bdc7d464f
|
||||
ghcid_numeric: 1711899035356951636
|
||||
ghcid_uuid: ec309d57-2eac-5371-a08c-360af34e4b08
|
||||
ghcid_uuid_sha256: 43c4854a-6502-8ffb-a89c-bbf1b151ff96
|
||||
ghcid_numeric: 4883174450545205243
|
||||
record_id: 49ba58fd-88c2-47fb-9ca5-b2b694147e0e
|
||||
generation_timestamp: '2025-12-16T21:06:43.663154+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-DPM
|
||||
ghcid_numeric: 1711899035356951636
|
||||
valid_from: '2025-12-16T21:06:43.663154+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:17:11.091657+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: NL-ZH-ROT-M-DPM
|
||||
ghcid_numeric: 4883174450545205243
|
||||
valid_from: '2025-12-17T09:17:11.091657+00:00'
|
||||
valid_to: null
|
||||
reason: Location enriched via Exa web search - Rotterdam, Zuid-Holland
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: ROT
|
||||
city_name: Rotterdam
|
||||
region_code: ZH
|
||||
region_name: Zuid-Holland
|
||||
country_code: NL
|
||||
resolution_date: '2025-12-17T09:17:11.091657+00:00'
|
||||
source_url: https://dutchpinballmuseum.com/
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:43.663154+00:00'
|
||||
|
|
@ -59,9 +69,19 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_3_CROWD_SOURCED
|
||||
source_url: https://dutchpinballmuseum.com/
|
||||
extraction_timestamp: '2025-12-17T09:17:11.091657+00:00'
|
||||
claims_extracted:
|
||||
- city
|
||||
- region
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Location enriched on 2025-12-17 via Exa web search: Rotterdam, Zuid-Holland'
|
||||
|
|
@ -2,7 +2,7 @@ custodian_name:
|
|||
emic_name: Municipality of Gaza
|
||||
emic_name_source: linkedin
|
||||
institution_type:
|
||||
- M
|
||||
- O
|
||||
linkedin_enrichment:
|
||||
linkedin_url: https://www.linkedin.com/company/municipality-of-gaza
|
||||
linkedin_slug: municipality-of-gaza
|
||||
|
|
@ -18,28 +18,40 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/municipality-of-gaza.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
country: NL
|
||||
city: Gaza City
|
||||
region: Gaza Strip
|
||||
country: PS
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-MG
|
||||
ghcid_original: NL-XX-XXX-M-MG
|
||||
ghcid_uuid: 38ca88a9-b9da-55b8-80b0-f3d223ceb625
|
||||
ghcid_uuid_sha256: b26a272e-97aa-8e77-96fa-3a370b897bb8
|
||||
ghcid_numeric: 12856131167348993655
|
||||
ghcid_current: PS-GZ-GAZ-O-MG
|
||||
ghcid_original: PS-GZ-GAZ-O-MG
|
||||
ghcid_uuid: 2657ff95-d637-595a-88aa-541c7020c8d5
|
||||
ghcid_uuid_sha256: 2e49ae93-4105-89bb-9667-65aac87ec550
|
||||
ghcid_numeric: 3335388946518841787
|
||||
record_id: ea2d7d50-d449-4959-9bf6-8f00e2bddeb1
|
||||
generation_timestamp: '2025-12-16T21:06:41.550370+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-MG
|
||||
ghcid_numeric: 12856131167348993655
|
||||
valid_from: '2025-12-16T21:06:41.550370+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:53:29.215327+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: PS-GZ-GAZ-O-MG
|
||||
ghcid_numeric: 3335388946518841787
|
||||
valid_from: '2025-12-17T09:53:29.215327+00:00'
|
||||
valid_to: null
|
||||
reason: 'Country code corrected: NL -> PS (Palestine). Location: Gaza City, Gaza
|
||||
Strip'
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
country_code: NL
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: GAZ
|
||||
city_name: Gaza City
|
||||
region_code: GZ
|
||||
region_name: Gaza Strip
|
||||
country_code: PS
|
||||
resolution_date: '2025-12-17T09:53:29.215327+00:00'
|
||||
source_url: https://www.gaza-city.org
|
||||
notes: Municipal government, founded 1898. Type corrected from M (Museum) to O
|
||||
(Official Institution)
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:41.550370+00:00'
|
||||
|
|
@ -56,9 +68,21 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_2_VERIFIED
|
||||
source_url: https://www.gaza-city.org
|
||||
extraction_timestamp: '2025-12-17T09:53:29.215327+00:00'
|
||||
claims_extracted:
|
||||
- country
|
||||
- region
|
||||
- city
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Country code corrected on 2025-12-17: NL was incorrect, institution is in Palestine
|
||||
(PS)'
|
||||
|
|
@ -2,7 +2,7 @@ custodian_name:
|
|||
emic_name: Ford museum
|
||||
emic_name_source: linkedin
|
||||
institution_type:
|
||||
- M
|
||||
- O
|
||||
linkedin_enrichment:
|
||||
linkedin_url: https://www.linkedin.com/company/ford-museum
|
||||
linkedin_slug: ford-museum
|
||||
|
|
@ -21,28 +21,40 @@ linkedin_enrichment:
|
|||
original_file: data/custodian/linkedin/ford-museum.yaml
|
||||
schema_version: 1.0.0
|
||||
location:
|
||||
city: ''
|
||||
region: XX
|
||||
country: NL
|
||||
city: Grand Rapids
|
||||
region: Michigan
|
||||
country: US
|
||||
address: 303 Pearl Street NW, Grand Rapids, MI 49504
|
||||
ghcid:
|
||||
ghcid_current: NL-XX-XXX-M-FM-ford_museum
|
||||
ghcid_original: NL-XX-XXX-M-FM-ford_museum
|
||||
ghcid_uuid: 4be08f3c-3b19-5820-9dc7-522026b51400
|
||||
ghcid_uuid_sha256: cf7d92d4-f6c8-81d0-a27d-128f93e68e0b
|
||||
ghcid_numeric: 14951267781310939600
|
||||
ghcid_current: US-MI-GRA-O-FM-ford_museum
|
||||
ghcid_original: US-MI-GRA-O-FM-ford_museum
|
||||
ghcid_uuid: fb8e145b-80ae-5b68-9fbd-93240bd798a9
|
||||
ghcid_uuid_sha256: 8dd63d17-5941-825f-a6b4-872358ea25eb
|
||||
ghcid_numeric: 10220423574854939231
|
||||
record_id: 89d83d98-3121-4969-bd7e-a06f14edc812
|
||||
generation_timestamp: '2025-12-16T21:06:42.087370+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-XX-XXX-M-FM-ford_museum
|
||||
ghcid_numeric: 14951267781310939600
|
||||
valid_from: '2025-12-16T21:06:42.087370+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-17T09:41:23.620522+00:00'
|
||||
reason: Initial GHCID assignment from LinkedIn batch import
|
||||
- ghcid: US-MI-GRA-O-FM-ford_museum
|
||||
ghcid_numeric: 10220423574854939231
|
||||
valid_from: '2025-12-17T09:41:23.620522+00:00'
|
||||
valid_to: null
|
||||
reason: 'Country code corrected: NL -> US (United States). Location: Grand Rapids,
|
||||
Michigan'
|
||||
location_resolution:
|
||||
method: UNRESOLVED
|
||||
city_code: XXX
|
||||
region_code: XX
|
||||
country_code: NL
|
||||
method: EXA_WEB_SEARCH
|
||||
city_code: GRA
|
||||
city_name: Grand Rapids
|
||||
region_code: MI
|
||||
region_name: Michigan
|
||||
country_code: US
|
||||
resolution_date: '2025-12-17T09:41:23.620522+00:00'
|
||||
source_url: https://www.fordlibrarymuseum.gov/visit/museum
|
||||
notes: Part of National Archives system, commemorates 38th US President
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-12-16T21:06:42.087370+00:00'
|
||||
|
|
@ -59,9 +71,21 @@ provenance:
|
|||
- website
|
||||
- staff_count
|
||||
- heritage_staff
|
||||
web_search:
|
||||
- source_type: exa_web_search
|
||||
data_tier: TIER_2_VERIFIED
|
||||
source_url: https://www.fordlibrarymuseum.gov/visit/museum
|
||||
extraction_timestamp: '2025-12-17T09:41:23.620522+00:00'
|
||||
claims_extracted:
|
||||
- country
|
||||
- region
|
||||
- city
|
||||
- address
|
||||
data_tier_summary:
|
||||
TIER_4_INFERRED:
|
||||
- linkedin_company_profile
|
||||
notes:
|
||||
- Created from unmatched LinkedIn company profile
|
||||
- 'Location resolution method: UNRESOLVED'
|
||||
- 'Country code corrected on 2025-12-17: NL was incorrect, institution is in United
|
||||
States (US)'
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"generated": "2025-12-17T08:54:52.876Z",
|
||||
"generated": "2025-12-17T10:54:35.492Z",
|
||||
"version": "1.0.0",
|
||||
"categories": [
|
||||
{
|
||||
|
|
|
|||
|
|
@ -363,6 +363,10 @@
|
|||
to { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
.conversation-embedding-panel__spinning {
|
||||
animation: spin 1s linear infinite;
|
||||
}
|
||||
|
||||
.conversation-embedding-panel__empty svg {
|
||||
color: #999;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@ import {
|
|||
Info,
|
||||
Globe,
|
||||
Target,
|
||||
Settings,
|
||||
Download,
|
||||
} from 'lucide-react';
|
||||
import { EmbeddingProjector, type EmbeddingPoint } from '../database/EmbeddingProjector';
|
||||
import { isTargetInsideAny } from '../../utils/dom';
|
||||
|
|
@ -94,6 +96,12 @@ export interface ConversationEmbeddingPanelProps {
|
|||
onModeChange?: (mode: 'global' | 'context') => void;
|
||||
/** Number of context points available (for badge display) */
|
||||
contextPointsCount?: number;
|
||||
/** Total points in the collection (for Load All display) */
|
||||
totalPointsCount?: number;
|
||||
/** Called when user clicks Load All */
|
||||
onLoadAll?: () => void;
|
||||
/** Whether Load All is currently in progress */
|
||||
isLoadingAll?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -116,6 +124,9 @@ const ConversationEmbeddingPanelComponent: React.FC<ConversationEmbeddingPanelPr
|
|||
mode = 'global',
|
||||
onModeChange,
|
||||
contextPointsCount = 0,
|
||||
totalPointsCount,
|
||||
onLoadAll,
|
||||
isLoadingAll = false,
|
||||
}) => {
|
||||
const panelRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
|
|
@ -456,9 +467,26 @@ const ConversationEmbeddingPanelComponent: React.FC<ConversationEmbeddingPanelPr
|
|||
: (language === 'nl' ? 'Eenvoudige weergave' : 'Simple view')}
|
||||
aria-label={simpleMode ? 'Switch to advanced' : 'Switch to simple'}
|
||||
>
|
||||
<span className="conversation-embedding-panel__mode-icon">
|
||||
{simpleMode ? '◧' : '▣'}
|
||||
</span>
|
||||
<Settings size={16} />
|
||||
</button>
|
||||
)}
|
||||
|
||||
{/* Load All button */}
|
||||
{onLoadAll && mode === 'global' && (
|
||||
<button
|
||||
className="conversation-embedding-panel__control-btn"
|
||||
onClick={onLoadAll}
|
||||
disabled={isLoadingAll}
|
||||
title={language === 'nl'
|
||||
? `Laad alle punten${totalPointsCount ? ` (${totalPointsCount.toLocaleString()})` : ''}`
|
||||
: `Load all points${totalPointsCount ? ` (${totalPointsCount.toLocaleString()})` : ''}`}
|
||||
aria-label="Load all points"
|
||||
>
|
||||
{isLoadingAll ? (
|
||||
<Loader2 size={18} className="conversation-embedding-panel__spinning" />
|
||||
) : (
|
||||
<Download size={18} />
|
||||
)}
|
||||
</button>
|
||||
)}
|
||||
|
||||
|
|
|
|||
|
|
@ -1276,11 +1276,13 @@ export function EmbeddingProjector({
|
|||
positions[i * 3 + 1] = point.y;
|
||||
positions[i * 3 + 2] = point.z ?? 0;
|
||||
|
||||
// Get color for point (initial color without selection)
|
||||
const color = new THREE.Color(getPointColor(point.originalIndex));
|
||||
colors[i * 3] = color.r;
|
||||
colors[i * 3 + 1] = color.g;
|
||||
colors[i * 3 + 2] = color.b;
|
||||
// Use default color - actual colors will be set by the color update effect
|
||||
// This avoids having getPointColor as a dependency which would recreate
|
||||
// the scene (and reset camera) when selection/highlighting changes
|
||||
const defaultColor = new THREE.Color(COLORS[0]);
|
||||
colors[i * 3] = defaultColor.r;
|
||||
colors[i * 3 + 1] = defaultColor.g;
|
||||
colors[i * 3 + 2] = defaultColor.b;
|
||||
|
||||
// Initial size (will be updated by selection effect)
|
||||
sizes[i] = 4;
|
||||
|
|
@ -1458,7 +1460,11 @@ export function EmbeddingProjector({
|
|||
highlightedHalosRef.current = null;
|
||||
}
|
||||
};
|
||||
}, [viewMode, projectedPoints, getPointColor]);
|
||||
// Note: getPointColor is intentionally NOT a dependency here.
|
||||
// Colors are initialized with a default and updated by the separate color/size effect.
|
||||
// Including getPointColor would recreate the scene (reset camera) on selection changes.
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [viewMode, projectedPoints]);
|
||||
|
||||
// Update point sizes and colors when selection changes (without recreating the scene)
|
||||
useEffect(() => {
|
||||
|
|
|
|||
|
|
@ -207,6 +207,45 @@ export function QdrantPanel({ compact = false }: QdrantPanelProps) {
|
|||
}
|
||||
}, [selectedCollection, nextOffset, scrollPoints]);
|
||||
|
||||
// Load ALL points from the collection (for visualization/search)
|
||||
const loadAllPoints = useCallback(async () => {
|
||||
if (!selectedCollection) return;
|
||||
|
||||
setIsLoadingPoints(true);
|
||||
try {
|
||||
const allPoints: QdrantPoint[] = [];
|
||||
let currentOffset: string | number | null = null;
|
||||
const batchSize = 500; // Load in larger batches for efficiency
|
||||
|
||||
// Scroll through all points
|
||||
while (true) {
|
||||
const result = await scrollPoints(
|
||||
selectedCollection.name,
|
||||
batchSize,
|
||||
currentOffset
|
||||
);
|
||||
|
||||
allPoints.push(...result.points);
|
||||
|
||||
// Update UI with progress
|
||||
setPoints([...allPoints]);
|
||||
|
||||
if (!result.nextOffset) {
|
||||
break; // No more points
|
||||
}
|
||||
currentOffset = result.nextOffset;
|
||||
}
|
||||
|
||||
setPoints(allPoints);
|
||||
setNextOffset(null); // All loaded
|
||||
console.log(`[QdrantPanel] Loaded all ${allPoints.length} points from collection`);
|
||||
} catch (err) {
|
||||
console.error('Failed to load all points:', err);
|
||||
} finally {
|
||||
setIsLoadingPoints(false);
|
||||
}
|
||||
}, [selectedCollection, scrollPoints]);
|
||||
|
||||
// Select a collection
|
||||
const selectCollection = useCallback(async (collection: QdrantCollection) => {
|
||||
setSelectedCollection(collection);
|
||||
|
|
@ -641,13 +680,23 @@ export function QdrantPanel({ compact = false }: QdrantPanelProps) {
|
|||
</span>
|
||||
)}
|
||||
{nextOffset !== null && points.length > 0 && (
|
||||
<button
|
||||
className="secondary-button"
|
||||
onClick={() => loadPoints(true)}
|
||||
disabled={isLoadingPoints}
|
||||
>
|
||||
Load more
|
||||
</button>
|
||||
<>
|
||||
<button
|
||||
className="secondary-button"
|
||||
onClick={() => loadPoints(true)}
|
||||
disabled={isLoadingPoints}
|
||||
>
|
||||
Load more
|
||||
</button>
|
||||
<button
|
||||
className="secondary-button"
|
||||
onClick={loadAllPoints}
|
||||
disabled={isLoadingPoints}
|
||||
title="Load all points for comprehensive search"
|
||||
>
|
||||
{isLoadingPoints ? 'Loading...' : 'Load All'}
|
||||
</button>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
|
|
|||
|
|
@ -96,6 +96,7 @@
|
|||
font-family: 'Roboto', Helvetica, Arial, sans-serif;
|
||||
cursor: pointer;
|
||||
transition: border-color 0.2s;
|
||||
min-width: 220px;
|
||||
}
|
||||
|
||||
.mapping-explorer__category-select:focus {
|
||||
|
|
|
|||
|
|
@ -17,11 +17,13 @@ import {
|
|||
getCategoryForSourceByType,
|
||||
getStatisticsForDataSource,
|
||||
exportToLinkMLMapYaml,
|
||||
CATEGORY_GROUP_LABELS,
|
||||
type EnrichmentSourceMapping,
|
||||
type FieldMapping,
|
||||
type TransformationType,
|
||||
type DataSourceType,
|
||||
type MappingStatus,
|
||||
type CategoryGroup,
|
||||
} from '../../lib/linkml/custodian-data-mappings';
|
||||
import './MappingExplorer.css';
|
||||
|
||||
|
|
@ -760,11 +762,19 @@ export const MappingExplorer: React.FC<MappingExplorerProps> = ({ language = 'en
|
|||
onChange={(e) => setSelectedCategory(e.target.value || null)}
|
||||
>
|
||||
<option value="">{t('allCategories')}</option>
|
||||
{currentCategories.map(cat => (
|
||||
<option key={cat.id} value={cat.id}>
|
||||
{cat.icon} {language === 'nl' ? cat.nameNl : cat.name}
|
||||
</option>
|
||||
))}
|
||||
{(Object.keys(CATEGORY_GROUP_LABELS) as CategoryGroup[]).map(group => {
|
||||
const groupCategories = currentCategories.filter(c => c.group === group);
|
||||
if (groupCategories.length === 0) return null;
|
||||
return (
|
||||
<optgroup key={group} label={CATEGORY_GROUP_LABELS[group][language]}>
|
||||
{groupCategories.map(cat => (
|
||||
<option key={cat.id} value={cat.id}>
|
||||
{cat.icon} {language === 'nl' ? cat.nameNl : cat.name}
|
||||
</option>
|
||||
))}
|
||||
</optgroup>
|
||||
);
|
||||
})}
|
||||
</select>
|
||||
|
||||
<select
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -883,6 +883,9 @@ const ConversationPage: React.FC = () => {
|
|||
// Projector mode: 'global' shows all 500 points, 'context' shows only RAG results with vectors
|
||||
const [projectorMode, setProjectorMode] = useState<'global' | 'context'>('global');
|
||||
const [contextEmbeddingPoints, setContextEmbeddingPoints] = useState<EmbeddingPoint[]>([]);
|
||||
// Load All state for embedding projector
|
||||
const [isLoadingAllEmbeddings, setIsLoadingAllEmbeddings] = useState(false);
|
||||
const [totalEmbeddingPoints, setTotalEmbeddingPoints] = useState<number | undefined>(undefined);
|
||||
|
||||
// Knowledge Graph Projector state
|
||||
const [showGraphProjector, setShowGraphProjector] = useState(false);
|
||||
|
|
@ -1062,6 +1065,42 @@ const ConversationPage: React.FC = () => {
|
|||
setContextEmbeddingPoints(pointsWithVectors);
|
||||
}, [lastContext?.qdrantResults]);
|
||||
|
||||
// Load ALL embeddings from Qdrant (for comprehensive search)
|
||||
const loadAllEmbeddings = useCallback(async () => {
|
||||
if (!qdrantStatus.isConnected || isLoadingAllEmbeddings) return;
|
||||
|
||||
setIsLoadingAllEmbeddings(true);
|
||||
try {
|
||||
const collectionName = 'heritage_custodians';
|
||||
const allPoints: EmbeddingPoint[] = [];
|
||||
let offset: string | number | null = null;
|
||||
|
||||
// Scroll through all points in batches
|
||||
while (true) {
|
||||
const { points, nextOffset } = await scrollPoints(collectionName, 500, offset);
|
||||
|
||||
const batch: EmbeddingPoint[] = points.map(p => ({
|
||||
id: p.id,
|
||||
vector: p.vector,
|
||||
payload: p.payload,
|
||||
}));
|
||||
|
||||
allPoints.push(...batch);
|
||||
|
||||
if (!nextOffset || points.length === 0) break;
|
||||
offset = nextOffset;
|
||||
}
|
||||
|
||||
setEmbeddingPoints(allPoints);
|
||||
setTotalEmbeddingPoints(allPoints.length);
|
||||
console.log(`[EmbeddingProjector] Loaded all ${allPoints.length} points`);
|
||||
} catch (err) {
|
||||
console.error('Failed to load all embeddings:', err);
|
||||
} finally {
|
||||
setIsLoadingAllEmbeddings(false);
|
||||
}
|
||||
}, [qdrantStatus.isConnected, isLoadingAllEmbeddings, scrollPoints]);
|
||||
|
||||
// Handle panel resize with mouse
|
||||
useEffect(() => {
|
||||
if (!isResizing) return;
|
||||
|
|
@ -1988,6 +2027,9 @@ const ConversationPage: React.FC = () => {
|
|||
mode={projectorMode}
|
||||
onModeChange={setProjectorMode}
|
||||
contextPointsCount={contextEmbeddingPoints.length}
|
||||
totalPointsCount={totalEmbeddingPoints}
|
||||
onLoadAll={loadAllEmbeddings}
|
||||
isLoadingAll={isLoadingAllEmbeddings}
|
||||
/>
|
||||
)}
|
||||
|
||||
|
|
|
|||
|
|
@ -921,6 +921,148 @@
|
|||
font-weight: 500;
|
||||
}
|
||||
|
||||
.query-limit-refresh-button {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.375rem;
|
||||
margin-top: 0.5rem;
|
||||
padding: 0.5rem 0.75rem;
|
||||
background: #4a7dff;
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 6px;
|
||||
font-size: 0.75rem;
|
||||
font-weight: 500;
|
||||
cursor: pointer;
|
||||
transition: background 0.2s ease;
|
||||
}
|
||||
|
||||
.query-limit-refresh-button:hover {
|
||||
background: #3366e6;
|
||||
}
|
||||
|
||||
.query-limit-refresh-button:disabled {
|
||||
opacity: 0.6;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
/* Query Mode Section (inside query-limit-section) */
|
||||
.query-mode-subsection {
|
||||
margin-top: 1rem;
|
||||
padding-top: 1rem;
|
||||
border-top: 1px solid #e0e0e0;
|
||||
}
|
||||
|
||||
.query-mode-subsection h4 {
|
||||
margin: 0 0 0.5rem 0;
|
||||
font-size: 0.8125rem;
|
||||
color: #172a59;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.query-mode-desc {
|
||||
font-size: 0.75rem;
|
||||
color: #666;
|
||||
margin: 0 0 0.75rem 0;
|
||||
}
|
||||
|
||||
.query-mode-options {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.query-mode-option {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.625rem 0.75rem;
|
||||
background: white;
|
||||
border: 1px solid #e0e0e0;
|
||||
border-radius: 6px;
|
||||
cursor: pointer;
|
||||
transition: all 0.15s ease;
|
||||
}
|
||||
|
||||
.query-mode-option:hover {
|
||||
border-color: #4a7dff;
|
||||
background: #f8f9ff;
|
||||
}
|
||||
|
||||
.query-mode-option.active {
|
||||
border-color: #4a7dff;
|
||||
background: #ebefff;
|
||||
}
|
||||
|
||||
.query-mode-option input[type="radio"] {
|
||||
margin: 0;
|
||||
accent-color: #4a7dff;
|
||||
}
|
||||
|
||||
.query-mode-content {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.125rem;
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.query-mode-label {
|
||||
font-size: 0.8125rem;
|
||||
font-weight: 500;
|
||||
color: #172a59;
|
||||
}
|
||||
|
||||
.query-mode-hint {
|
||||
font-size: 0.6875rem;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
.query-mode-option.active .query-mode-label {
|
||||
color: #4a7dff;
|
||||
}
|
||||
|
||||
.query-mode-option.query-mode-custom {
|
||||
background: linear-gradient(135deg, #f8f9ff 0%, #fff 100%);
|
||||
border-style: dashed;
|
||||
}
|
||||
|
||||
.query-mode-option.query-mode-custom:hover {
|
||||
background: linear-gradient(135deg, #ebefff 0%, #f8f9ff 100%);
|
||||
}
|
||||
|
||||
.query-mode-external-icon {
|
||||
font-size: 0.875rem;
|
||||
color: #4a7dff;
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
.query-mode-refresh-button {
|
||||
margin-top: 0.75rem;
|
||||
width: 100%;
|
||||
padding: 0.5rem 0.75rem;
|
||||
background: #4a7dff;
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 6px;
|
||||
cursor: pointer;
|
||||
font-weight: 500;
|
||||
font-size: 0.8125rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
gap: 0.5rem;
|
||||
transition: background 0.15s ease;
|
||||
}
|
||||
|
||||
.query-mode-refresh-button:hover {
|
||||
background: #3366e6;
|
||||
}
|
||||
|
||||
.query-mode-refresh-button:disabled {
|
||||
opacity: 0.6;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
/* Node Info Section */
|
||||
.node-info-section {
|
||||
padding: 1rem 1.5rem;
|
||||
|
|
@ -2154,6 +2296,75 @@ body:has(.visualize-page.is-mobile .sidebar--mobile:not(.collapsed)) {
|
|||
color: #f39c12;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .query-limit-refresh-button {
|
||||
background: #4a7dff;
|
||||
color: white;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .query-limit-refresh-button:hover {
|
||||
background: #6b9eff;
|
||||
}
|
||||
|
||||
/* Query Mode Section - Dark Mode */
|
||||
[data-theme="dark"] .query-mode-subsection {
|
||||
border-top-color: #3d3d5c;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .query-mode-subsection h4 {
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .query-mode-desc {
|
||||
color: #a0a0b0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .query-mode-option {
|
||||
background: #1e1e32;
|
||||
border-color: #3d3d5c;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .query-mode-option:hover {
|
||||
border-color: #4a7dff;
|
||||
background: #2d2d4a;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .query-mode-option.active {
|
||||
border-color: #4a7dff;
|
||||
background: #2d2d4a;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .query-mode-label {
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .query-mode-hint {
|
||||
color: #888;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .query-mode-option.active .query-mode-label {
|
||||
color: #6b9eff;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .query-mode-option.query-mode-custom {
|
||||
background: linear-gradient(135deg, #1e1e32 0%, #252545 100%);
|
||||
}
|
||||
|
||||
[data-theme="dark"] .query-mode-option.query-mode-custom:hover {
|
||||
background: linear-gradient(135deg, #2d2d4a 0%, #1e1e32 100%);
|
||||
}
|
||||
|
||||
[data-theme="dark"] .query-mode-external-icon {
|
||||
color: #6b9eff;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .query-mode-refresh-button {
|
||||
background: #4a7dff;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .query-mode-refresh-button:hover {
|
||||
background: #6b9eff;
|
||||
}
|
||||
|
||||
/* Node Info Section */
|
||||
[data-theme="dark"] .node-info-section {
|
||||
background: #2d2d4a;
|
||||
|
|
|
|||
|
|
@ -114,6 +114,16 @@ const TEXT = {
|
|||
custodiansAvailable: { nl: 'beschikbaar', en: 'available' },
|
||||
performanceWarning: { nl: '⚠️ Meer dan 1000 kan traag zijn', en: '⚠️ More than 1000 may be slow' },
|
||||
|
||||
// RDF Query Mode
|
||||
queryMode: { nl: 'Query Modus', en: 'Query Mode' },
|
||||
queryModeDesc: { nl: 'Selecteer hoe data wordt opgehaald', en: 'Select how data is fetched' },
|
||||
queryModeDetailed: { nl: 'Gedetailleerd (standaard)', en: 'Detailed (default)' },
|
||||
queryModeDetailedHint: { nl: 'Specifieke eigenschappen, sneller', en: 'Specific properties, faster' },
|
||||
queryModeGeneric: { nl: 'Generiek (alle relaties)', en: 'Generic (all relations)' },
|
||||
queryModeGenericHint: { nl: 'Alle triples, meer connectiviteit', en: 'All triples, more connectivity' },
|
||||
queryModeCustom: { nl: 'Aangepaste query...', en: 'Custom query...' },
|
||||
queryModeCustomHint: { nl: 'Open Query Builder', en: 'Open Query Builder' },
|
||||
|
||||
// Selected node
|
||||
selectedNode: { nl: 'Geselecteerd knooppunt', en: 'Selected Node' },
|
||||
id: { nl: 'ID', en: 'ID' },
|
||||
|
|
@ -368,14 +378,28 @@ export function Visualize() {
|
|||
|
||||
// RDF query limit - default to 500 to prevent browser overload
|
||||
// Oxigraph contains 27,000+ custodians; rendering all at once crashes the browser
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
||||
const [rdfLimit, setRdfLimit] = useState<number>(() => {
|
||||
const saved = localStorage.getItem('visualize-rdf-limit');
|
||||
return saved ? parseInt(saved, 10) : 500;
|
||||
});
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
||||
const [totalCustodiansAvailable, setTotalCustodiansAvailable] = useState<number | null>(null);
|
||||
|
||||
// Track the limit that was used for the current cached data
|
||||
const cachedRdfLimitRef = useRef<number | null>(null);
|
||||
|
||||
// Prevent duplicate RDF fetch requests (React StrictMode protection)
|
||||
const rdfFetchInProgressRef = useRef<boolean>(false);
|
||||
|
||||
// RDF Query Mode - allows switching between detailed (limited properties) and generic (all triples)
|
||||
type RdfQueryMode = 'detailed' | 'generic' | 'custom';
|
||||
const [rdfQueryMode, setRdfQueryMode] = useState<RdfQueryMode>(() => {
|
||||
const saved = localStorage.getItem('visualize-rdf-query-mode');
|
||||
return (saved === 'detailed' || saved === 'generic' || saved === 'custom') ? saved : 'detailed';
|
||||
});
|
||||
|
||||
// Track the mode used for cached data (to show Apply button when changed)
|
||||
const cachedRdfQueryModeRef = useRef<RdfQueryMode | null>(null);
|
||||
|
||||
// Hooks
|
||||
const { isInitialized, isLoading: dbLoading, storageInfo } = useDatabase();
|
||||
const { parse, isLoading: parserLoading, error: parserError } = useRdfParser();
|
||||
|
|
@ -489,6 +513,13 @@ export function Visualize() {
|
|||
|
||||
// Generate RDF overview - Fetch and visualize all heritage custodian RDF data
|
||||
const handleGenerateRdf = useCallback(async () => {
|
||||
// Prevent duplicate concurrent fetches (React StrictMode protection)
|
||||
if (rdfFetchInProgressRef.current) {
|
||||
console.log('[RDF] Fetch already in progress, skipping duplicate request');
|
||||
return;
|
||||
}
|
||||
|
||||
rdfFetchInProgressRef.current = true;
|
||||
setGeneratingRdf(true);
|
||||
// Don't clear UML visualization - we want to keep both cached
|
||||
setCurrentCategory('rdf');
|
||||
|
|
@ -533,84 +564,116 @@ export function Visualize() {
|
|||
console.warn('Could not fetch custodian count:', countErr);
|
||||
}
|
||||
|
||||
// SPARQL query aligned with actual RDF generated by oxigraph_sync.py and oxigraph_person_sync.py
|
||||
// Namespaces match the Python sync scripts:
|
||||
// - nde: <https://nde.nl/ontology/hc/class/> for Custodian type
|
||||
// - hc: <https://w3id.org/heritage/custodian/> for ghcid, isil predicates
|
||||
// - hp: <https://w3id.org/heritage/person/> for person URIs
|
||||
const constructQuery = `
|
||||
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
||||
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
||||
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
||||
PREFIX schema: <http://schema.org/>
|
||||
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
||||
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
||||
PREFIX org: <http://www.w3.org/ns/org#>
|
||||
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
|
||||
PREFIX cidoc: <http://www.cidoc-crm.org/cidoc-crm/>
|
||||
PREFIX nde: <https://nde.nl/ontology/hc/class/>
|
||||
PREFIX hc: <https://w3id.org/heritage/custodian/>
|
||||
PREFIX hp: <https://w3id.org/heritage/person/>
|
||||
// SPARQL query - conditional based on rdfQueryMode
|
||||
// - detailed: Specific properties only (faster, less connectivity)
|
||||
// - generic: All triples for custodians (slower, full connectivity)
|
||||
let constructQuery: string;
|
||||
|
||||
if (rdfQueryMode === 'generic') {
|
||||
// Generic query: Returns ALL triples for custodians
|
||||
// This provides maximum connectivity in the graph but may be slower
|
||||
console.log('Using GENERIC query mode - fetching all triples');
|
||||
constructQuery = `
|
||||
PREFIX nde: <https://nde.nl/ontology/hc/class/>
|
||||
|
||||
CONSTRUCT { ?s ?p ?o }
|
||||
WHERE {
|
||||
{
|
||||
# All triples where custodian is subject
|
||||
?s a nde:Custodian .
|
||||
?s ?p ?o .
|
||||
}
|
||||
UNION
|
||||
{
|
||||
# All triples where custodian is object (incoming links)
|
||||
?o a nde:Custodian .
|
||||
?s ?p ?o .
|
||||
}
|
||||
}
|
||||
LIMIT ${rdfLimit * 10}
|
||||
`;
|
||||
// Note: Higher limit because generic query returns more triples per custodian
|
||||
} else {
|
||||
// Detailed query: Specific properties only (default)
|
||||
// Namespaces match the Python sync scripts:
|
||||
// - nde: <https://nde.nl/ontology/hc/class/> for Custodian type
|
||||
// - hc: <https://w3id.org/heritage/custodian/> for ghcid, isil predicates
|
||||
// - hp: <https://w3id.org/heritage/person/> for person URIs
|
||||
console.log('Using DETAILED query mode - specific properties');
|
||||
constructQuery = `
|
||||
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
||||
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
||||
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
||||
PREFIX schema: <http://schema.org/>
|
||||
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
||||
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
||||
PREFIX org: <http://www.w3.org/ns/org#>
|
||||
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
|
||||
PREFIX cidoc: <http://www.cidoc-crm.org/cidoc-crm/>
|
||||
PREFIX nde: <https://nde.nl/ontology/hc/class/>
|
||||
PREFIX hc: <https://w3id.org/heritage/custodian/>
|
||||
PREFIX hp: <https://w3id.org/heritage/person/>
|
||||
|
||||
CONSTRUCT {
|
||||
# Custodians - core data
|
||||
?custodian a nde:Custodian ;
|
||||
rdfs:label ?label ;
|
||||
skos:prefLabel ?prefLabel ;
|
||||
schema:name ?name ;
|
||||
hc:ghcid ?ghcid ;
|
||||
hc:isil ?isil ;
|
||||
schema:url ?website ;
|
||||
foaf:homepage ?homepage ;
|
||||
owl:sameAs ?wikidata .
|
||||
|
||||
# Location (schema:location, not crm:P53)
|
||||
?custodian schema:location ?location .
|
||||
?location geo:lat ?lat ;
|
||||
geo:long ?lon .
|
||||
|
||||
# Persons linked to custodians
|
||||
?person a schema:Person ;
|
||||
rdfs:label ?personLabel ;
|
||||
schema:name ?personName ;
|
||||
schema:worksFor ?custodian ;
|
||||
org:memberOf ?custodian ;
|
||||
schema:jobTitle ?jobTitle .
|
||||
}
|
||||
WHERE {
|
||||
# Get all custodians (nde:Custodian is the primary type)
|
||||
?custodian a nde:Custodian .
|
||||
OPTIONAL { ?custodian rdfs:label ?label }
|
||||
OPTIONAL { ?custodian skos:prefLabel ?prefLabel }
|
||||
OPTIONAL { ?custodian schema:name ?name }
|
||||
OPTIONAL { ?custodian hc:ghcid ?ghcid }
|
||||
OPTIONAL { ?custodian hc:isil ?isil }
|
||||
OPTIONAL { ?custodian schema:url ?website }
|
||||
OPTIONAL { ?custodian foaf:homepage ?homepage }
|
||||
OPTIONAL {
|
||||
?custodian owl:sameAs ?wikidata .
|
||||
FILTER(STRSTARTS(STR(?wikidata), "http://www.wikidata.org/"))
|
||||
}
|
||||
|
||||
# Location using schema:location (as generated by oxigraph_sync.py)
|
||||
OPTIONAL {
|
||||
CONSTRUCT {
|
||||
# Custodians - core data
|
||||
?custodian a nde:Custodian ;
|
||||
rdfs:label ?label ;
|
||||
skos:prefLabel ?prefLabel ;
|
||||
schema:name ?name ;
|
||||
hc:ghcid ?ghcid ;
|
||||
hc:isil ?isil ;
|
||||
schema:url ?website ;
|
||||
foaf:homepage ?homepage ;
|
||||
owl:sameAs ?wikidata .
|
||||
|
||||
# Location (schema:location, not crm:P53)
|
||||
?custodian schema:location ?location .
|
||||
OPTIONAL { ?location geo:lat ?lat }
|
||||
OPTIONAL { ?location geo:long ?lon }
|
||||
}
|
||||
|
||||
# Persons linked to custodians via schema:worksFor
|
||||
OPTIONAL {
|
||||
?location geo:lat ?lat ;
|
||||
geo:long ?lon .
|
||||
|
||||
# Persons linked to custodians
|
||||
?person a schema:Person ;
|
||||
schema:worksFor ?custodian .
|
||||
OPTIONAL { ?person rdfs:label ?personLabel }
|
||||
OPTIONAL { ?person schema:name ?personName }
|
||||
OPTIONAL { ?person schema:jobTitle ?jobTitle }
|
||||
OPTIONAL { ?person org:memberOf ?custodian }
|
||||
rdfs:label ?personLabel ;
|
||||
schema:name ?personName ;
|
||||
schema:worksFor ?custodian ;
|
||||
org:memberOf ?custodian ;
|
||||
schema:jobTitle ?jobTitle .
|
||||
}
|
||||
}
|
||||
LIMIT ${rdfLimit}
|
||||
`;
|
||||
WHERE {
|
||||
# Get all custodians (nde:Custodian is the primary type)
|
||||
?custodian a nde:Custodian .
|
||||
OPTIONAL { ?custodian rdfs:label ?label }
|
||||
OPTIONAL { ?custodian skos:prefLabel ?prefLabel }
|
||||
OPTIONAL { ?custodian schema:name ?name }
|
||||
OPTIONAL { ?custodian hc:ghcid ?ghcid }
|
||||
OPTIONAL { ?custodian hc:isil ?isil }
|
||||
OPTIONAL { ?custodian schema:url ?website }
|
||||
OPTIONAL { ?custodian foaf:homepage ?homepage }
|
||||
OPTIONAL {
|
||||
?custodian owl:sameAs ?wikidata .
|
||||
FILTER(STRSTARTS(STR(?wikidata), "http://www.wikidata.org/"))
|
||||
}
|
||||
|
||||
# Location using schema:location (as generated by oxigraph_sync.py)
|
||||
OPTIONAL {
|
||||
?custodian schema:location ?location .
|
||||
OPTIONAL { ?location geo:lat ?lat }
|
||||
OPTIONAL { ?location geo:long ?lon }
|
||||
}
|
||||
|
||||
# Persons linked to custodians via schema:worksFor
|
||||
OPTIONAL {
|
||||
?person a schema:Person ;
|
||||
schema:worksFor ?custodian .
|
||||
OPTIONAL { ?person rdfs:label ?personLabel }
|
||||
OPTIONAL { ?person schema:name ?personName }
|
||||
OPTIONAL { ?person schema:jobTitle ?jobTitle }
|
||||
OPTIONAL { ?person org:memberOf ?custodian }
|
||||
}
|
||||
}
|
||||
LIMIT ${rdfLimit}
|
||||
`;
|
||||
}
|
||||
|
||||
let rdfData = '';
|
||||
let dataFormat: 'application/n-triples' | 'text/turtle' = 'application/n-triples';
|
||||
|
|
@ -678,12 +741,15 @@ export function Visualize() {
|
|||
console.log(`RDF loaded: showing all ${result.nodes.length} nodes across ${typesArray.length} types: ${typesArray.join(', ')}`);
|
||||
}
|
||||
|
||||
// Update cache state
|
||||
// Update cache state and track the limit and mode used
|
||||
setHasRdfCache(true);
|
||||
setRdfNodeCount(result.nodes.length);
|
||||
cachedRdfLimitRef.current = rdfLimit; // Remember which limit was used for this cache
|
||||
cachedRdfQueryModeRef.current = rdfQueryMode; // Remember which mode was used
|
||||
|
||||
// Update filename with count
|
||||
setFileName(`NDE Heritage Custodians (${result.nodes.length} entities)`);
|
||||
// Update filename with count and mode indicator
|
||||
const modeLabel = rdfQueryMode === 'generic' ? 'all triples' : 'detailed';
|
||||
setFileName(`NDE Heritage Custodians (${result.nodes.length} entities, ${modeLabel})`);
|
||||
|
||||
} catch (err) {
|
||||
console.error('Error generating RDF overview:', err);
|
||||
|
|
@ -694,8 +760,9 @@ export function Visualize() {
|
|||
);
|
||||
} finally {
|
||||
setGeneratingRdf(false);
|
||||
rdfFetchInProgressRef.current = false; // Allow new fetches
|
||||
}
|
||||
}, [parse, loadGraphData]);
|
||||
}, [rdfLimit, rdfQueryMode, parse, loadGraphData]);
|
||||
|
||||
// Close dropdowns when clicking outside
|
||||
useEffect(() => {
|
||||
|
|
@ -977,8 +1044,11 @@ export function Visualize() {
|
|||
}, [hasUmlCache, handleGenerateUml]);
|
||||
|
||||
const handleSwitchToRdf = useCallback(() => {
|
||||
if (!hasRdfCache) {
|
||||
// No cache, generate it
|
||||
// Check if we have cache AND it was fetched with the current limit setting
|
||||
const cacheValid = hasRdfCache && cachedRdfLimitRef.current === rdfLimit;
|
||||
|
||||
if (!cacheValid) {
|
||||
// No cache, or limit has changed - fetch fresh data
|
||||
handleGenerateRdf();
|
||||
} else {
|
||||
// Switch to cached RDF view
|
||||
|
|
@ -986,7 +1056,7 @@ export function Visualize() {
|
|||
setLayoutType('force');
|
||||
localStorage.setItem('visualize-layout-type', 'force');
|
||||
}
|
||||
}, [hasRdfCache, handleGenerateRdf]);
|
||||
}, [hasRdfCache, rdfLimit, handleGenerateRdf]);
|
||||
|
||||
// Check if we have content to display
|
||||
const hasRdfContent = filteredNodes.length > 0;
|
||||
|
|
@ -1181,6 +1251,8 @@ export function Visualize() {
|
|||
const newLimit = parseInt(e.target.value, 10);
|
||||
setRdfLimit(newLimit);
|
||||
localStorage.setItem('visualize-rdf-limit', String(newLimit));
|
||||
// Invalidate cache so next view switch triggers refresh
|
||||
// User will need to click "Refresh RDF" or switch away and back
|
||||
}}
|
||||
>
|
||||
<option value="100">100</option>
|
||||
|
|
@ -1192,6 +1264,17 @@ export function Visualize() {
|
|||
<option value="10000">10,000</option>
|
||||
<option value="50000">All (50,000+)</option>
|
||||
</select>
|
||||
{/* Show refresh prompt if limit changed from cached value */}
|
||||
{hasRdfCache && cachedRdfLimitRef.current !== null && cachedRdfLimitRef.current !== rdfLimit && (
|
||||
<button
|
||||
className="query-limit-refresh-button"
|
||||
onClick={handleGenerateRdf}
|
||||
disabled={_generatingRdf}
|
||||
>
|
||||
<RefreshCw size={14} />
|
||||
{language === 'nl' ? 'Toepassen' : 'Apply'}
|
||||
</button>
|
||||
)}
|
||||
{totalCustodiansAvailable && (
|
||||
<p className="query-limit-info">
|
||||
{rdfNodeCount > 0 ? rdfNodeCount : rdfLimit} {t('showingOf')} {totalCustodiansAvailable.toLocaleString()} {t('custodiansAvailable')}
|
||||
|
|
@ -1200,6 +1283,70 @@ export function Visualize() {
|
|||
{rdfLimit > 1000 && (
|
||||
<p className="query-limit-warning">{t('performanceWarning')}</p>
|
||||
)}
|
||||
|
||||
{/* Query Mode Selector */}
|
||||
<div className="query-mode-subsection">
|
||||
<h4>{t('queryMode')}</h4>
|
||||
<p className="query-mode-desc">{t('queryModeDesc')}</p>
|
||||
<div className="query-mode-options">
|
||||
<label className={`query-mode-option ${rdfQueryMode === 'detailed' ? 'active' : ''}`}>
|
||||
<input
|
||||
type="radio"
|
||||
name="queryMode"
|
||||
value="detailed"
|
||||
checked={rdfQueryMode === 'detailed'}
|
||||
onChange={() => {
|
||||
setRdfQueryMode('detailed');
|
||||
localStorage.setItem('visualize-rdf-query-mode', 'detailed');
|
||||
}}
|
||||
/>
|
||||
<div className="query-mode-content">
|
||||
<span className="query-mode-label">{t('queryModeDetailed')}</span>
|
||||
<span className="query-mode-hint">{t('queryModeDetailedHint')}</span>
|
||||
</div>
|
||||
</label>
|
||||
<label className={`query-mode-option ${rdfQueryMode === 'generic' ? 'active' : ''}`}>
|
||||
<input
|
||||
type="radio"
|
||||
name="queryMode"
|
||||
value="generic"
|
||||
checked={rdfQueryMode === 'generic'}
|
||||
onChange={() => {
|
||||
setRdfQueryMode('generic');
|
||||
localStorage.setItem('visualize-rdf-query-mode', 'generic');
|
||||
}}
|
||||
/>
|
||||
<div className="query-mode-content">
|
||||
<span className="query-mode-label">{t('queryModeGeneric')}</span>
|
||||
<span className="query-mode-hint">{t('queryModeGenericHint')}</span>
|
||||
</div>
|
||||
</label>
|
||||
<label
|
||||
className="query-mode-option query-mode-custom"
|
||||
onClick={(e) => {
|
||||
e.preventDefault();
|
||||
window.open('/query-builder', '_blank');
|
||||
}}
|
||||
>
|
||||
<div className="query-mode-content">
|
||||
<span className="query-mode-label">{t('queryModeCustom')}</span>
|
||||
<span className="query-mode-hint">{t('queryModeCustomHint')}</span>
|
||||
</div>
|
||||
<span className="query-mode-external-icon">↗</span>
|
||||
</label>
|
||||
</div>
|
||||
{/* Show Apply button if mode changed from cached value */}
|
||||
{hasRdfCache && cachedRdfQueryModeRef.current !== null && cachedRdfQueryModeRef.current !== rdfQueryMode && (
|
||||
<button
|
||||
className="query-mode-refresh-button"
|
||||
onClick={handleGenerateRdf}
|
||||
disabled={_generatingRdf}
|
||||
>
|
||||
<RefreshCw size={14} />
|
||||
{language === 'nl' ? 'Toepassen' : 'Apply'}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
|
|
|
|||
291
scripts/apply_verified_enrichments.py
Normal file
291
scripts/apply_verified_enrichments.py
Normal file
|
|
@ -0,0 +1,291 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Apply verified location enrichments to XXX files and rename them.
|
||||
|
||||
This script:
|
||||
1. Updates ghcid_current with the correct region/city codes
|
||||
2. Updates location with city/region
|
||||
3. Adds ghcid_history entry
|
||||
4. Updates provenance notes
|
||||
5. Renames file to match new GHCID
|
||||
"""
|
||||
|
||||
import yaml
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
import hashlib
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# Verified enrichments from Exa web search
|
||||
# History:
|
||||
# - Batch 1 (2025-12-17): 4 files - Crypto Museum, Allard Pierson, DPM Rotterdam, Cow Museum
|
||||
# - Batch 2 (2025-12-17): 8 files - Bierreclame, Cacao, Edah, Flessenscheepjes, Eddie the Eagle, Jopie Huisman, Fortuna, Atlantikwall
|
||||
# - Batch 3 (2025-12-17): 7 files - Ajax, C1000, Moslim Archief, CODART, Blik Trommel, Klompenmakerij, CultuurSchakel
|
||||
# - Batch 4 (2025-12-17): 7 files - Autoriteit Persoonsgegevens, Raad voor Cultuur, IJV, Erotisch Museum, Hollands Kaas Museum, Kresse Museum, Van Gogh Museum Enterprises
|
||||
# - Batch 5 (2025-12-17): 4 files - Huis73, Dutch Directors Guild, Het Kaas Museum (Bodegraven), Stichting Abrahamdag
|
||||
# - Batch 6 (2025-12-17): 2 files - Museum 1939-1945, Brandkas van Henny
|
||||
# - Batch 7 (2025-12-17): 5 files - Frans Maas Museum, Museum Buitenlust, Museum De Canonije, Museum Dijkmagazijn De Heul, Museumboerderij Erve Hofman
|
||||
# - Batch 8 (2025-12-17): 7 files - Museum Janning, Museum Geelvinck Hinlopen Huis, Museumboerderij De Grote Glind, Museum Galerie RAT, Museum Averlo-Frieswijk-Schalkhaar, Museum Ceuclum, Museum van Brabantse Mutsen en Poffers
|
||||
# Total enriched: 44 files
|
||||
# Remaining: ~133 NL-XX-XXX files
|
||||
#
|
||||
# All previously processed entries have been removed from VERIFIED_ENRICHMENTS.
|
||||
# Only add new entries that have not been processed yet.
|
||||
|
||||
VERIFIED_ENRICHMENTS = [
|
||||
# Batch 8 - 2025-12-17
|
||||
{
|
||||
'old_filename': 'NL-XX-XXX-M-MJ-museum_janning.yaml',
|
||||
'institution_name': 'Museum Janning',
|
||||
'city': 'Nieuw Schoonebeek',
|
||||
'region': 'Drenthe',
|
||||
'region_code': 'DR',
|
||||
'city_code': 'NIS',
|
||||
'address': 'Europaweg 143a, 7766 AE Nieuw Schoonebeek',
|
||||
'source': 'exa_web_search',
|
||||
'source_url': 'https://www.museumjanning.nl/',
|
||||
},
|
||||
{
|
||||
'old_filename': 'NL-XX-XXX-M-MGHH.yaml',
|
||||
'institution_name': 'Museum Geelvinck Hinlopen Huis',
|
||||
'city': 'Heerde',
|
||||
'region': 'Gelderland',
|
||||
'region_code': 'GE',
|
||||
'city_code': 'HEE',
|
||||
'address': 'Kamperweg 23, 8181 CS Heerde',
|
||||
'source': 'exa_web_search',
|
||||
'source_url': 'https://geelvinck.nl/',
|
||||
},
|
||||
{
|
||||
'old_filename': 'NL-XX-XXX-M-MGG.yaml',
|
||||
'institution_name': 'Museumboerderij De Grote Glind',
|
||||
'city': 'Barneveld',
|
||||
'region': 'Gelderland',
|
||||
'region_code': 'GE',
|
||||
'city_code': 'BAR',
|
||||
'address': 'Scherpenzeelseweg 158, 3772 MG Barneveld',
|
||||
'source': 'exa_web_search',
|
||||
'source_url': 'https://www.degroteglind.nl/',
|
||||
},
|
||||
{
|
||||
'old_filename': 'NL-XX-XXX-M-MGR.yaml',
|
||||
'institution_name': 'Museum Galerie RAT',
|
||||
'city': 'Den Burg',
|
||||
'region': 'Noord-Holland',
|
||||
'region_code': 'NH',
|
||||
'city_code': 'DEB',
|
||||
'address': 'Burgwal 20, 1791 Den Burg, Texel',
|
||||
'source': 'exa_web_search',
|
||||
'source_url': 'https://www.mapquest.com/',
|
||||
},
|
||||
{
|
||||
'old_filename': 'NL-XX-XXX-M-MAFS.yaml',
|
||||
'institution_name': 'Museum Averlo-Frieswijk-Schalkhaar',
|
||||
'city': 'Schalkhaar',
|
||||
'region': 'Overijssel',
|
||||
'region_code': 'OV',
|
||||
'city_code': 'SCK',
|
||||
'address': 'Frieswijkerweg 7, 7433 RB Schalkhaar',
|
||||
'source': 'exa_web_search',
|
||||
'source_url': 'https://www.museum-afs.nl/',
|
||||
},
|
||||
{
|
||||
'old_filename': 'NL-XX-XXX-M-MC.yaml',
|
||||
'institution_name': 'Museum Ceuclum',
|
||||
'city': 'Cuijk',
|
||||
'region': 'Noord-Brabant',
|
||||
'region_code': 'NB',
|
||||
'city_code': 'CUI',
|
||||
'address': 'Castellum 1, 5431 EM Cuijk',
|
||||
'source': 'exa_web_search',
|
||||
'source_url': 'https://www.museumceuclum.nl/',
|
||||
},
|
||||
{
|
||||
'old_filename': 'NL-XX-XXX-M-MBMP.yaml',
|
||||
'institution_name': 'Museum van Brabantse Mutsen en Poffers',
|
||||
'city': 'Sint-Oedenrode',
|
||||
'region': 'Noord-Brabant',
|
||||
'region_code': 'NB',
|
||||
'city_code': 'SOR',
|
||||
'address': 'Kerkstraat 20, 5492 AH Sint-Oedenrode',
|
||||
'source': 'exa_web_search',
|
||||
'source_url': 'https://mutsenmuseum.nl/',
|
||||
},
|
||||
]
|
||||
|
||||
def generate_ghcid_uuid(ghcid_string: str) -> str:
|
||||
"""Generate UUID v5 from GHCID string."""
|
||||
GLAM_NAMESPACE = uuid.UUID('6ba7b810-9dad-11d1-80b4-00c04fd430c8') # DNS namespace
|
||||
return str(uuid.uuid5(GLAM_NAMESPACE, ghcid_string))
|
||||
|
||||
def generate_ghcid_numeric(ghcid_string: str) -> int:
|
||||
"""Generate 64-bit numeric ID from GHCID string."""
|
||||
sha256_hash = hashlib.sha256(ghcid_string.encode()).digest()
|
||||
return int.from_bytes(sha256_hash[:8], byteorder='big')
|
||||
|
||||
def generate_ghcid_uuid_sha256(ghcid_string: str) -> str:
|
||||
"""Generate UUID v8 (SHA-256 based) from GHCID string."""
|
||||
sha256_hash = hashlib.sha256(ghcid_string.encode()).digest()
|
||||
# Create UUID v8 format
|
||||
uuid_bytes = bytearray(sha256_hash[:16])
|
||||
uuid_bytes[6] = (uuid_bytes[6] & 0x0F) | 0x80 # Version 8
|
||||
uuid_bytes[8] = (uuid_bytes[8] & 0x3F) | 0x80 # Variant
|
||||
return str(uuid.UUID(bytes=bytes(uuid_bytes)))
|
||||
|
||||
def apply_enrichment(custodian_dir: Path, enrichment: dict) -> tuple[str | None, str | None]:
|
||||
"""Apply enrichment to a file and return (old_path, new_path)."""
|
||||
old_path = custodian_dir / enrichment['old_filename']
|
||||
|
||||
if not old_path.exists():
|
||||
print(f" ❌ File not found: {old_path}")
|
||||
return None, None
|
||||
|
||||
# Load YAML
|
||||
with open(old_path, 'r', encoding='utf-8') as f:
|
||||
data = yaml.safe_load(f)
|
||||
|
||||
# Extract current GHCID components
|
||||
old_ghcid = data['ghcid']['ghcid_current']
|
||||
|
||||
# Parse old GHCID to get type and abbreviation
|
||||
# Format: NL-XX-XXX-{type}-{abbrev}[-{name_suffix}]
|
||||
match = re.match(r'NL-XX-XXX-([A-Z])-([A-Z0-9]+)(?:-(.+))?', old_ghcid)
|
||||
if not match:
|
||||
print(f" ❌ Could not parse GHCID: {old_ghcid}")
|
||||
return None, None
|
||||
|
||||
inst_type = match.group(1)
|
||||
abbrev = match.group(2)
|
||||
name_suffix = match.group(3) # May be None
|
||||
|
||||
# Build new GHCID
|
||||
new_ghcid = f"NL-{enrichment['region_code']}-{enrichment['city_code']}-{inst_type}-{abbrev}"
|
||||
if name_suffix:
|
||||
new_ghcid += f"-{name_suffix}"
|
||||
|
||||
# Generate new identifiers
|
||||
new_uuid = generate_ghcid_uuid(new_ghcid)
|
||||
new_uuid_sha256 = generate_ghcid_uuid_sha256(new_ghcid)
|
||||
new_numeric = generate_ghcid_numeric(new_ghcid)
|
||||
|
||||
timestamp = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
# Update location
|
||||
data['location'] = {
|
||||
'city': enrichment['city'],
|
||||
'region': enrichment['region'],
|
||||
'country': 'NL',
|
||||
}
|
||||
if enrichment.get('address'):
|
||||
data['location']['address'] = enrichment['address']
|
||||
|
||||
# Update ghcid
|
||||
old_numeric = data['ghcid'].get('ghcid_numeric', 0)
|
||||
|
||||
# Add to ghcid_history - mark old as ended
|
||||
if 'ghcid_history' not in data['ghcid']:
|
||||
data['ghcid_history'] = []
|
||||
|
||||
# Close out the old entry
|
||||
for entry in data['ghcid']['ghcid_history']:
|
||||
if entry.get('valid_to') is None:
|
||||
entry['valid_to'] = timestamp
|
||||
|
||||
# Add new history entry
|
||||
data['ghcid']['ghcid_history'].append({
|
||||
'ghcid': new_ghcid,
|
||||
'ghcid_numeric': new_numeric,
|
||||
'valid_from': timestamp,
|
||||
'valid_to': None,
|
||||
'reason': f"Location enriched via Exa web search - {enrichment['city']}, {enrichment['region']}"
|
||||
})
|
||||
|
||||
# Update current GHCID
|
||||
data['ghcid']['ghcid_current'] = new_ghcid
|
||||
data['ghcid']['ghcid_uuid'] = new_uuid
|
||||
data['ghcid']['ghcid_uuid_sha256'] = new_uuid_sha256
|
||||
data['ghcid']['ghcid_numeric'] = new_numeric
|
||||
|
||||
# Add location_resolution
|
||||
data['ghcid']['location_resolution'] = {
|
||||
'method': 'EXA_WEB_SEARCH',
|
||||
'city_code': enrichment['city_code'],
|
||||
'city_name': enrichment['city'],
|
||||
'region_code': enrichment['region_code'],
|
||||
'region_name': enrichment['region'],
|
||||
'country_code': 'NL',
|
||||
'resolution_date': timestamp,
|
||||
'source_url': enrichment.get('source_url'),
|
||||
}
|
||||
|
||||
# Update provenance notes
|
||||
if 'provenance' not in data:
|
||||
data['provenance'] = {}
|
||||
if 'notes' not in data['provenance']:
|
||||
data['provenance']['notes'] = []
|
||||
data['provenance']['notes'].append(
|
||||
f"Location enriched on {timestamp[:10]} via Exa web search: {enrichment['city']}, {enrichment['region']}"
|
||||
)
|
||||
|
||||
# Add web search source to provenance
|
||||
if 'sources' not in data['provenance']:
|
||||
data['provenance']['sources'] = {}
|
||||
if 'web_search' not in data['provenance']['sources']:
|
||||
data['provenance']['sources']['web_search'] = []
|
||||
data['provenance']['sources']['web_search'].append({
|
||||
'source_type': 'exa_web_search',
|
||||
'data_tier': 'TIER_3_CROWD_SOURCED',
|
||||
'source_url': enrichment.get('source_url'),
|
||||
'extraction_timestamp': timestamp,
|
||||
'claims_extracted': ['city', 'region', 'address'],
|
||||
})
|
||||
|
||||
# Write updated YAML to new filename
|
||||
new_filename = new_ghcid.replace('/', '_') + '.yaml'
|
||||
new_path = custodian_dir / new_filename
|
||||
|
||||
with open(new_path, 'w', encoding='utf-8') as f:
|
||||
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
||||
|
||||
# Remove old file
|
||||
if old_path != new_path:
|
||||
old_path.unlink()
|
||||
|
||||
return str(old_path), str(new_path)
|
||||
|
||||
|
||||
def main():
|
||||
custodian_dir = Path('/Users/kempersc/apps/glam/data/custodian')
|
||||
|
||||
print("=" * 60)
|
||||
print("Applying Verified Location Enrichments")
|
||||
print("=" * 60)
|
||||
|
||||
if not VERIFIED_ENRICHMENTS:
|
||||
print("\nNo enrichments to process. Add entries to VERIFIED_ENRICHMENTS list.")
|
||||
return
|
||||
|
||||
success_count = 0
|
||||
|
||||
for enrichment in VERIFIED_ENRICHMENTS:
|
||||
print(f"\nProcessing: {enrichment['old_filename']}")
|
||||
print(f" → {enrichment['city']}, {enrichment['region']} ({enrichment['region_code']}-{enrichment['city_code']})")
|
||||
|
||||
old_path, new_path = apply_enrichment(custodian_dir, enrichment)
|
||||
|
||||
if old_path and new_path:
|
||||
old_name = os.path.basename(old_path)
|
||||
new_name = os.path.basename(new_path)
|
||||
print(f" ✅ Renamed: {old_name}")
|
||||
print(f" → {new_name}")
|
||||
success_count += 1
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print(f"Summary: {success_count}/{len(VERIFIED_ENRICHMENTS)} files enriched and renamed")
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
297
scripts/enrich_xxx_via_web_search.py
Normal file
297
scripts/enrich_xxx_via_web_search.py
Normal file
|
|
@ -0,0 +1,297 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Enrich NL-XX-XXX files with correct location data via web search.
|
||||
|
||||
The LinkedIn HTML extraction method was flawed - it extracted location data from
|
||||
wrong companies in the HTML. This script uses web search to find correct locations.
|
||||
|
||||
Strategy:
|
||||
1. Read custodian name and website from YAML file
|
||||
2. Search web for "[name] Netherlands location address city"
|
||||
3. Parse results to extract city/region
|
||||
4. Update YAML file with correct location
|
||||
5. Regenerate GHCID based on new location
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
import json
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional, Tuple
|
||||
|
||||
# Directory containing custodian files
|
||||
CUSTODIAN_DIR = Path("/Users/kempersc/apps/glam/data/custodian")
|
||||
|
||||
# GeoNames database for settlement lookup
|
||||
GEONAMES_DB = Path("/Users/kempersc/apps/glam/data/reference/geonames.db")
|
||||
|
||||
# Dutch province mapping
|
||||
PROVINCE_MAP = {
|
||||
'drenthe': 'DR',
|
||||
'friesland': 'FR', 'fryslân': 'FR',
|
||||
'gelderland': 'GE',
|
||||
'groningen': 'GR',
|
||||
'limburg': 'LI',
|
||||
'noord-brabant': 'NB', 'north brabant': 'NB', 'nordbrabant': 'NB', 'brabant': 'NB',
|
||||
'noord-holland': 'NH', 'north holland': 'NH',
|
||||
'overijssel': 'OV',
|
||||
'utrecht': 'UT',
|
||||
'zeeland': 'ZE',
|
||||
'zuid-holland': 'ZH', 'south holland': 'ZH',
|
||||
'flevoland': 'FL',
|
||||
}
|
||||
|
||||
# Dutch city to 3-letter code mapping (common cities)
|
||||
CITY_CODES = {
|
||||
'amsterdam': 'AMS',
|
||||
'rotterdam': 'ROT',
|
||||
'den haag': 'DHA', 'the hague': 'DHA', "'s-gravenhage": 'DHA',
|
||||
'utrecht': 'UTR',
|
||||
'eindhoven': 'EIN',
|
||||
'groningen': 'GRO',
|
||||
'tilburg': 'TIL',
|
||||
'almere': 'ALM',
|
||||
'breda': 'BRE',
|
||||
'nijmegen': 'NIJ',
|
||||
'apeldoorn': 'APE',
|
||||
'haarlem': 'HAA',
|
||||
'arnhem': 'ARN',
|
||||
'enschede': 'ENS',
|
||||
'amersfoort': 'AME',
|
||||
'zaanstad': 'ZAA',
|
||||
'haarlemmermeer': 'HMM',
|
||||
'zwolle': 'ZWO',
|
||||
'leiden': 'LEI',
|
||||
'maastricht': 'MAA',
|
||||
'dordrecht': 'DOR',
|
||||
'zoetermeer': 'ZOE',
|
||||
'deventer': 'DEV',
|
||||
'delft': 'DEL',
|
||||
'alkmaar': 'ALK',
|
||||
'venlo': 'VEN',
|
||||
'leeuwarden': 'LEE',
|
||||
'heerlen': 'HEE',
|
||||
'hilversum': 'HIL',
|
||||
'assen': 'ASS',
|
||||
'schiedam': 'SCH',
|
||||
'weert': 'WEE',
|
||||
'duivendrecht': 'DUI',
|
||||
'noordwijk': 'NOO',
|
||||
}
|
||||
|
||||
|
||||
def get_city_code(city: str) -> str:
|
||||
"""Get 3-letter code for a city."""
|
||||
city_lower = city.lower().strip()
|
||||
if city_lower in CITY_CODES:
|
||||
return CITY_CODES[city_lower]
|
||||
# Generate code from first 3 letters
|
||||
clean = re.sub(r'[^a-z]', '', city_lower)
|
||||
return clean[:3].upper() if len(clean) >= 3 else clean.upper().ljust(3, 'X')
|
||||
|
||||
|
||||
def get_region_code(region: str) -> Optional[str]:
|
||||
"""Get 2-letter province code from region name."""
|
||||
region_lower = region.lower().strip()
|
||||
for key, code in PROVINCE_MAP.items():
|
||||
if key in region_lower:
|
||||
return code
|
||||
return None
|
||||
|
||||
|
||||
def extract_location_from_search_results(results: list) -> Optional[dict]:
|
||||
"""Extract city and region from Exa search results."""
|
||||
|
||||
# Patterns to match Dutch locations
|
||||
patterns = [
|
||||
# "City, Netherlands" or "City (Province)"
|
||||
r'(\w+(?:\s+\w+)?)\s*,\s*Netherlands\s*\((\w+(?:\s+\w+)?)\)',
|
||||
# "in City, Province"
|
||||
r'in\s+(\w+(?:\s+\w+)?)\s*,\s*(Noord-Holland|Zuid-Holland|Noord-Brabant|Gelderland|Limburg|Overijssel|Friesland|Drenthe|Groningen|Utrecht|Zeeland|Flevoland)',
|
||||
# "legal seat in City"
|
||||
r'legal\s+seat\s+in\s+(\w+)',
|
||||
# "Address: ... City"
|
||||
r'Address[:\s]+[^,]+,\s*(\d{4}\s*[A-Z]{2})\s+(\w+)',
|
||||
# Dutch postal code pattern
|
||||
r'(\d{4}\s*[A-Z]{2})\s+(\w+(?:\s+\w+)?)\s*,?\s*(?:Netherlands|NL)',
|
||||
]
|
||||
|
||||
for result in results:
|
||||
text = result.get('text', '') + ' ' + result.get('title', '')
|
||||
|
||||
# Try each pattern
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, text, re.IGNORECASE)
|
||||
if match:
|
||||
groups = match.groups()
|
||||
if len(groups) >= 2:
|
||||
# Check if first group is postal code
|
||||
if re.match(r'\d{4}\s*[A-Z]{2}', groups[0]):
|
||||
city = groups[1]
|
||||
region = None
|
||||
else:
|
||||
city = groups[0]
|
||||
region = groups[1] if len(groups) > 1 else None
|
||||
else:
|
||||
city = groups[0]
|
||||
region = None
|
||||
|
||||
city = city.strip()
|
||||
region_code = get_region_code(region) if region else None
|
||||
|
||||
return {
|
||||
'city': city,
|
||||
'region_code': region_code,
|
||||
'source_text': text[:200]
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def search_institution_location(name: str, website: Optional[str] = None) -> Optional[dict]:
|
||||
"""Search web for institution location using Exa."""
|
||||
|
||||
# Build search query
|
||||
query = f'"{name}" Netherlands location address city'
|
||||
if website and 'lnkd.in' not in website:
|
||||
# Add website domain to query for better results
|
||||
domain = re.sub(r'https?://(www\.)?', '', website).split('/')[0]
|
||||
query = f'site:{domain} OR "{name}" Netherlands address city location'
|
||||
|
||||
# Use Exa via subprocess (since we can't import the MCP client directly)
|
||||
# For now, return None - we'll use the MCP tool directly in the main flow
|
||||
return None
|
||||
|
||||
|
||||
def find_xxx_files_needing_enrichment():
|
||||
"""Find NL-XX-XXX files that need location enrichment."""
|
||||
files = []
|
||||
|
||||
for f in sorted(CUSTODIAN_DIR.glob("NL-XX-XXX-*.yaml")):
|
||||
try:
|
||||
with open(f, 'r', encoding='utf-8') as file:
|
||||
content = yaml.safe_load(file)
|
||||
|
||||
if not content:
|
||||
continue
|
||||
|
||||
# Get institution name
|
||||
name = content.get('custodian_name', {}).get('emic_name', '')
|
||||
|
||||
# Get website
|
||||
website = content.get('linkedin_enrichment', {}).get('website')
|
||||
|
||||
# Get LinkedIn slug
|
||||
slug = content.get('linkedin_enrichment', {}).get('linkedin_slug', '')
|
||||
|
||||
files.append({
|
||||
'file': f,
|
||||
'name': name,
|
||||
'website': website,
|
||||
'slug': slug,
|
||||
'content': content
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"Error reading {f}: {e}")
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def update_file_with_location(file_info: dict, city: str, region_code: str, source: str):
|
||||
"""Update a YAML file with correct location data."""
|
||||
f = file_info['file']
|
||||
content = file_info['content']
|
||||
name = file_info['name']
|
||||
|
||||
# Get city code
|
||||
city_code = get_city_code(city)
|
||||
|
||||
# Update location
|
||||
content['location'] = {
|
||||
'city': city,
|
||||
'region': region_code,
|
||||
'country': 'NL'
|
||||
}
|
||||
|
||||
# Generate new GHCID
|
||||
# Extract type and abbreviation from filename
|
||||
filename = f.stem
|
||||
# Pattern: NL-XX-XXX-{TYPE}-{ABBREV}[-{name_suffix}]
|
||||
match = re.match(r'NL-XX-XXX-([A-Z])-(.+)', filename)
|
||||
if match:
|
||||
inst_type = match.group(1)
|
||||
abbrev_suffix = match.group(2)
|
||||
|
||||
new_ghcid = f"NL-{region_code}-{city_code}-{inst_type}-{abbrev_suffix}"
|
||||
|
||||
# Update GHCID
|
||||
if 'ghcid' not in content:
|
||||
content['ghcid'] = {}
|
||||
|
||||
old_ghcid = content['ghcid'].get('ghcid_current', filename)
|
||||
content['ghcid']['ghcid_current'] = new_ghcid
|
||||
content['ghcid']['ghcid_original'] = old_ghcid
|
||||
|
||||
# Update history
|
||||
content['ghcid']['ghcid_history'] = [{
|
||||
'ghcid': new_ghcid,
|
||||
'ghcid_numeric': content['ghcid'].get('ghcid_numeric'),
|
||||
'valid_from': datetime.now(timezone.utc).isoformat(),
|
||||
'valid_to': None,
|
||||
'reason': f'Location enriched via web search: {city}, {region_code}'
|
||||
}]
|
||||
|
||||
# Add location resolution
|
||||
content['ghcid']['location_resolution'] = {
|
||||
'method': 'WEB_SEARCH',
|
||||
'city': city,
|
||||
'city_code': city_code,
|
||||
'region_code': region_code,
|
||||
'country_code': 'NL',
|
||||
'source': source,
|
||||
'resolution_date': datetime.now(timezone.utc).isoformat()
|
||||
}
|
||||
|
||||
# Add provenance note
|
||||
if 'provenance' not in content:
|
||||
content['provenance'] = {}
|
||||
if 'notes' not in content['provenance']:
|
||||
content['provenance']['notes'] = []
|
||||
content['provenance']['notes'].append(
|
||||
f"Location enriched via web search on {datetime.now(timezone.utc).strftime('%Y-%m-%d')}: {city}, {region_code}"
|
||||
)
|
||||
|
||||
# Write back
|
||||
with open(f, 'w', encoding='utf-8') as file:
|
||||
yaml.dump(content, file, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
||||
|
||||
return new_ghcid if match else None
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to list files needing enrichment."""
|
||||
print("Finding NL-XX-XXX files needing location enrichment...\n")
|
||||
|
||||
files = find_xxx_files_needing_enrichment()
|
||||
print(f"Found {len(files)} files\n")
|
||||
|
||||
# Group by whether they have website
|
||||
with_website = [f for f in files if f['website'] and 'lnkd.in' not in str(f['website'])]
|
||||
without_website = [f for f in files if not f['website'] or 'lnkd.in' in str(f['website'])]
|
||||
|
||||
print(f"Files with valid website: {len(with_website)}")
|
||||
print(f"Files without valid website: {len(without_website)}")
|
||||
|
||||
print("\n--- Sample files with websites (first 20) ---")
|
||||
for f in with_website[:20]:
|
||||
print(f" {f['name']}")
|
||||
print(f" Website: {f['website']}")
|
||||
print(f" File: {f['file'].name}")
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
299
scripts/fix_non_dutch_institutions.py
Normal file
299
scripts/fix_non_dutch_institutions.py
Normal file
|
|
@ -0,0 +1,299 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fix institutions incorrectly assigned to NL (Netherlands) that are actually in other countries.
|
||||
|
||||
These institutions were imported from LinkedIn batch import but have wrong country codes.
|
||||
"""
|
||||
|
||||
import yaml
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
import hashlib
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# Non-Dutch institutions to fix
|
||||
# Verified via Exa web search 2025-12-17
|
||||
NON_DUTCH_INSTITUTIONS = [
|
||||
{
|
||||
'old_filename': 'NL-XX-XXX-A-HAEU.yaml',
|
||||
'institution_name': 'Historical Archives of the European Union',
|
||||
'country': 'IT',
|
||||
'country_name': 'Italy',
|
||||
'region': 'Tuscany',
|
||||
'region_code': '52', # Italian region code
|
||||
'city': 'Firenze',
|
||||
'city_code': 'FIR',
|
||||
'address': 'Via Bolognese 156, 50139 Firenze, Villa Salviati',
|
||||
'source_url': 'https://archives.eui.eu/en/repositories/1',
|
||||
'notes': 'Part of European University Institute, Florence'
|
||||
},
|
||||
{
|
||||
'old_filename': 'NL-XX-XXX-A-VZWADEB.yaml',
|
||||
'institution_name': 'v.z.w. Archief- en Documentatiecentrum Erfgoed Binnenvaart',
|
||||
'country': 'BE',
|
||||
'country_name': 'Belgium',
|
||||
'region': 'West-Vlaanderen',
|
||||
'region_code': 'VWV',
|
||||
'city': 'Oudenburg',
|
||||
'city_code': 'OUD',
|
||||
'address': 'Vaartdijk zuid 11, 8460 Oudenburg (aboard Museumschip Tordino)',
|
||||
'source_url': 'http://binnenvaarterfgoed.be/',
|
||||
'notes': 'Belgian v.z.w. (vzw = Belgian non-profit), located aboard museum ship'
|
||||
},
|
||||
{
|
||||
'old_filename': 'NL-XX-XXX-M-FM-ford_museum.yaml',
|
||||
'institution_name': 'Gerald R. Ford Presidential Museum',
|
||||
'country': 'US',
|
||||
'country_name': 'United States',
|
||||
'region': 'Michigan',
|
||||
'region_code': 'MI',
|
||||
'city': 'Grand Rapids',
|
||||
'city_code': 'GRA',
|
||||
'address': '303 Pearl Street NW, Grand Rapids, MI 49504',
|
||||
'source_url': 'https://www.fordlibrarymuseum.gov/visit/museum',
|
||||
'notes': 'Part of National Archives system, commemorates 38th US President',
|
||||
# Update institution_type from M to O (Official Institution - Presidential Library)
|
||||
'new_institution_type': 'O',
|
||||
},
|
||||
{
|
||||
'old_filename': 'NL-XX-XXX-M-DAJ.yaml',
|
||||
'institution_name': 'Diorama Arsip Jogja',
|
||||
'country': 'ID',
|
||||
'country_name': 'Indonesia',
|
||||
'region': 'Daerah Istimewa Yogyakarta',
|
||||
'region_code': 'YO',
|
||||
'city': 'Bantul',
|
||||
'city_code': 'BAN',
|
||||
'address': 'LT 1 Gedung DEPO ARSIP, Jl. Janti, Banguntapan, Kabupaten Bantul, Yogyakarta 55198',
|
||||
'source_url': 'https://dioramaarsip.jogjaprov.go.id/home',
|
||||
'notes': 'Digital archive diorama of Yogyakarta history, opened February 2022',
|
||||
# It's actually an Archive (A), not Museum (M)
|
||||
'new_institution_type': 'A',
|
||||
},
|
||||
# Batch 2: Added 2025-12-17 - More Indonesian and Palestinian institutions
|
||||
{
|
||||
'old_filename': 'NL-XX-XXX-M-MBV.yaml',
|
||||
'institution_name': 'Museum Benteng Vredeburg',
|
||||
'country': 'ID',
|
||||
'country_name': 'Indonesia',
|
||||
'region': 'Daerah Istimewa Yogyakarta',
|
||||
'region_code': 'YO',
|
||||
'city': 'Yogyakarta',
|
||||
'city_code': 'YOG',
|
||||
'address': 'Jl. Margo Mulyo No.6, Ngupasan, Kec. Gondomanan, Kota Yogyakarta 55122',
|
||||
'source_url': 'https://forevervacation.com/yogyakarta/museum-benteng-vredeburg',
|
||||
'notes': 'Dutch colonial fortress converted to museum in 1992, documents Indonesian independence struggle',
|
||||
},
|
||||
{
|
||||
'old_filename': 'NL-XX-XXX-M-MBP.yaml',
|
||||
'institution_name': 'Museum Batik Pekalongan',
|
||||
'country': 'ID',
|
||||
'country_name': 'Indonesia',
|
||||
'region': 'Jawa Tengah',
|
||||
'region_code': 'JT', # Central Java
|
||||
'city': 'Pekalongan',
|
||||
'city_code': 'PEK',
|
||||
'address': 'Jl. Jetayu No.1, Pekalongan 51152',
|
||||
'source_url': 'https://id.wikipedia.org/wiki/Museum_Batik_Pekalongan',
|
||||
'notes': 'UNESCO recognized museum for batik conservation, opened 12 July 2006 by President SBY',
|
||||
},
|
||||
{
|
||||
'old_filename': 'NL-XX-XXX-M-MG.yaml',
|
||||
'institution_name': 'Municipality of Gaza',
|
||||
'country': 'PS',
|
||||
'country_name': 'Palestine',
|
||||
'region': 'Gaza Strip',
|
||||
'region_code': 'GZ',
|
||||
'city': 'Gaza City',
|
||||
'city_code': 'GAZ',
|
||||
'address': None, # Address not verifiable due to current situation
|
||||
'source_url': 'https://www.gaza-city.org',
|
||||
'notes': 'Municipal government, founded 1898. Type corrected from M (Museum) to O (Official Institution)',
|
||||
# It's a municipality (government), not a museum
|
||||
'new_institution_type': 'O',
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def generate_ghcid_uuid(ghcid_string: str) -> str:
|
||||
"""Generate UUID v5 from GHCID string."""
|
||||
GLAM_NAMESPACE = uuid.UUID('6ba7b810-9dad-11d1-80b4-00c04fd430c8')
|
||||
return str(uuid.uuid5(GLAM_NAMESPACE, ghcid_string))
|
||||
|
||||
|
||||
def generate_ghcid_numeric(ghcid_string: str) -> int:
|
||||
"""Generate 64-bit numeric ID from GHCID string."""
|
||||
sha256_hash = hashlib.sha256(ghcid_string.encode()).digest()
|
||||
return int.from_bytes(sha256_hash[:8], byteorder='big')
|
||||
|
||||
|
||||
def generate_ghcid_uuid_sha256(ghcid_string: str) -> str:
|
||||
"""Generate UUID v8 (SHA-256 based) from GHCID string."""
|
||||
sha256_hash = hashlib.sha256(ghcid_string.encode()).digest()
|
||||
uuid_bytes = bytearray(sha256_hash[:16])
|
||||
uuid_bytes[6] = (uuid_bytes[6] & 0x0F) | 0x80 # Version 8
|
||||
uuid_bytes[8] = (uuid_bytes[8] & 0x3F) | 0x80 # Variant
|
||||
return str(uuid.UUID(bytes=bytes(uuid_bytes)))
|
||||
|
||||
|
||||
def fix_institution(custodian_dir: Path, inst: dict) -> tuple[str | None, str | None]:
|
||||
"""Fix a non-Dutch institution and return (old_path, new_path)."""
|
||||
old_path = custodian_dir / inst['old_filename']
|
||||
|
||||
if not old_path.exists():
|
||||
print(f" File not found: {old_path}")
|
||||
return None, None
|
||||
|
||||
# Load YAML
|
||||
with open(old_path, 'r', encoding='utf-8') as f:
|
||||
data = yaml.safe_load(f)
|
||||
|
||||
# Extract current GHCID components
|
||||
old_ghcid = data['ghcid']['ghcid_current']
|
||||
|
||||
# Parse old GHCID to get type and abbreviation
|
||||
# Format: NL-XX-XXX-{type}-{abbrev}[-{name_suffix}]
|
||||
match = re.match(r'NL-XX-XXX-([A-Z])-([A-Z0-9]+)(?:-(.+))?', old_ghcid)
|
||||
if not match:
|
||||
print(f" Could not parse GHCID: {old_ghcid}")
|
||||
return None, None
|
||||
|
||||
inst_type = match.group(1)
|
||||
abbrev = match.group(2)
|
||||
name_suffix = match.group(3) # May be None
|
||||
|
||||
# Check if we need to change institution type
|
||||
if inst.get('new_institution_type'):
|
||||
inst_type = inst['new_institution_type']
|
||||
# Also update the institution_type field
|
||||
data['institution_type'] = [inst_type]
|
||||
|
||||
# Build new GHCID with correct country
|
||||
new_ghcid = f"{inst['country']}-{inst['region_code']}-{inst['city_code']}-{inst_type}-{abbrev}"
|
||||
if name_suffix:
|
||||
new_ghcid += f"-{name_suffix}"
|
||||
|
||||
# Generate new identifiers
|
||||
new_uuid = generate_ghcid_uuid(new_ghcid)
|
||||
new_uuid_sha256 = generate_ghcid_uuid_sha256(new_ghcid)
|
||||
new_numeric = generate_ghcid_numeric(new_ghcid)
|
||||
|
||||
timestamp = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
# Update location
|
||||
data['location'] = {
|
||||
'city': inst['city'],
|
||||
'region': inst['region'],
|
||||
'country': inst['country'],
|
||||
}
|
||||
if inst.get('address'):
|
||||
data['location']['address'] = inst['address']
|
||||
|
||||
# Close out old ghcid_history entries
|
||||
if 'ghcid_history' not in data['ghcid']:
|
||||
data['ghcid']['ghcid_history'] = []
|
||||
|
||||
for entry in data['ghcid']['ghcid_history']:
|
||||
if entry.get('valid_to') is None:
|
||||
entry['valid_to'] = timestamp
|
||||
|
||||
# Add new history entry
|
||||
data['ghcid']['ghcid_history'].append({
|
||||
'ghcid': new_ghcid,
|
||||
'ghcid_numeric': new_numeric,
|
||||
'valid_from': timestamp,
|
||||
'valid_to': None,
|
||||
'reason': f"Country code corrected: NL -> {inst['country']} ({inst['country_name']}). "
|
||||
f"Location: {inst['city']}, {inst['region']}"
|
||||
})
|
||||
|
||||
# Update current GHCID
|
||||
data['ghcid']['ghcid_current'] = new_ghcid
|
||||
data['ghcid']['ghcid_original'] = new_ghcid # Also update original since NL was wrong
|
||||
data['ghcid']['ghcid_uuid'] = new_uuid
|
||||
data['ghcid']['ghcid_uuid_sha256'] = new_uuid_sha256
|
||||
data['ghcid']['ghcid_numeric'] = new_numeric
|
||||
|
||||
# Add location_resolution
|
||||
data['ghcid']['location_resolution'] = {
|
||||
'method': 'EXA_WEB_SEARCH',
|
||||
'city_code': inst['city_code'],
|
||||
'city_name': inst['city'],
|
||||
'region_code': inst['region_code'],
|
||||
'region_name': inst['region'],
|
||||
'country_code': inst['country'],
|
||||
'resolution_date': timestamp,
|
||||
'source_url': inst.get('source_url'),
|
||||
'notes': inst.get('notes'),
|
||||
}
|
||||
|
||||
# Update provenance
|
||||
if 'provenance' not in data:
|
||||
data['provenance'] = {}
|
||||
if 'notes' not in data['provenance']:
|
||||
data['provenance']['notes'] = []
|
||||
data['provenance']['notes'].append(
|
||||
f"Country code corrected on {timestamp[:10]}: NL was incorrect, "
|
||||
f"institution is in {inst['country_name']} ({inst['country']})"
|
||||
)
|
||||
|
||||
# Add web search source to provenance
|
||||
if 'sources' not in data['provenance']:
|
||||
data['provenance']['sources'] = {}
|
||||
if 'web_search' not in data['provenance']['sources']:
|
||||
data['provenance']['sources']['web_search'] = []
|
||||
data['provenance']['sources']['web_search'].append({
|
||||
'source_type': 'exa_web_search',
|
||||
'data_tier': 'TIER_2_VERIFIED', # Higher tier since we verified country
|
||||
'source_url': inst.get('source_url'),
|
||||
'extraction_timestamp': timestamp,
|
||||
'claims_extracted': ['country', 'region', 'city', 'address'],
|
||||
})
|
||||
|
||||
# Write updated YAML to new filename
|
||||
new_filename = new_ghcid.replace('/', '_') + '.yaml'
|
||||
new_path = custodian_dir / new_filename
|
||||
|
||||
with open(new_path, 'w', encoding='utf-8') as f:
|
||||
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
||||
|
||||
# Remove old file
|
||||
if old_path != new_path:
|
||||
old_path.unlink()
|
||||
|
||||
return str(old_path), str(new_path)
|
||||
|
||||
|
||||
def main():
|
||||
custodian_dir = Path('/Users/kempersc/apps/glam/data/custodian')
|
||||
|
||||
print("=" * 70)
|
||||
print("Fixing Non-Dutch Institutions (Country Code Corrections)")
|
||||
print("=" * 70)
|
||||
|
||||
success_count = 0
|
||||
|
||||
for inst in NON_DUTCH_INSTITUTIONS:
|
||||
print(f"\nProcessing: {inst['old_filename']}")
|
||||
print(f" Institution: {inst['institution_name']}")
|
||||
print(f" Correction: NL -> {inst['country']} ({inst['country_name']})")
|
||||
print(f" Location: {inst['city']}, {inst['region']}")
|
||||
|
||||
old_path, new_path = fix_institution(custodian_dir, inst)
|
||||
|
||||
if old_path and new_path:
|
||||
old_name = os.path.basename(old_path)
|
||||
new_name = os.path.basename(new_path)
|
||||
print(f" Renamed: {old_name}")
|
||||
print(f" -> {new_name}")
|
||||
success_count += 1
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print(f"Summary: {success_count}/{len(NON_DUTCH_INSTITUTIONS)} institutions corrected")
|
||||
print("=" * 70)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -202,14 +202,20 @@ def extract_metadata(data: dict[str, Any], filepath: Path) -> dict[str, Any]:
|
|||
metadata["country"] = loc["country"]
|
||||
if loc.get("city"):
|
||||
metadata["city"] = loc["city"]
|
||||
if loc.get("region"):
|
||||
# Use region_code (ISO 3166-2) for filtering, fallback to region name
|
||||
if loc.get("region_code"):
|
||||
metadata["region"] = loc["region_code"] # e.g., "NH" not "Noord-Holland"
|
||||
elif loc.get("region"):
|
||||
metadata["region"] = loc["region"]
|
||||
elif location:
|
||||
if location.get("country"):
|
||||
metadata["country"] = location["country"]
|
||||
if location.get("city"):
|
||||
metadata["city"] = location["city"]
|
||||
if location.get("region"):
|
||||
# Use region_code (ISO 3166-2) for filtering, fallback to region name
|
||||
if location.get("region_code"):
|
||||
metadata["region"] = location["region_code"] # e.g., "NH" not "Noord-Holland"
|
||||
elif location.get("region"):
|
||||
metadata["region"] = location["region"]
|
||||
|
||||
# Also extract country from GHCID if not found elsewhere
|
||||
|
|
@ -290,24 +296,15 @@ def extract_metadata(data: dict[str, Any], filepath: Path) -> dict[str, Any]:
|
|||
|
||||
|
||||
def find_institution_files(data_dir: Path) -> list[Path]:
|
||||
"""Find all institution YAML files in the data directory."""
|
||||
"""Find all institution YAML files in the data directory.
|
||||
|
||||
Optimized for large directories using os.listdir instead of glob.
|
||||
"""
|
||||
import os
|
||||
|
||||
files = []
|
||||
|
||||
# Look for YAML files in common patterns
|
||||
patterns = [
|
||||
"*.yaml",
|
||||
"*.yml",
|
||||
"**/*.yaml",
|
||||
"**/*.yml",
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
files.extend(data_dir.glob(pattern))
|
||||
|
||||
# Deduplicate
|
||||
files = list(set(files))
|
||||
|
||||
# Filter out non-institution files
|
||||
# Filter patterns
|
||||
excluded_patterns = [
|
||||
"_schema",
|
||||
"_config",
|
||||
|
|
@ -316,12 +313,37 @@ def find_institution_files(data_dir: Path) -> list[Path]:
|
|||
"example_",
|
||||
]
|
||||
|
||||
filtered = []
|
||||
for f in files:
|
||||
if not any(excl in f.name.lower() for excl in excluded_patterns):
|
||||
filtered.append(f)
|
||||
def is_valid_file(name: str) -> bool:
|
||||
"""Check if file is a valid institution YAML file."""
|
||||
if not name.endswith(('.yaml', '.yml')):
|
||||
return False
|
||||
if name.startswith('.'):
|
||||
return False
|
||||
name_lower = name.lower()
|
||||
return not any(excl in name_lower for excl in excluded_patterns)
|
||||
|
||||
return sorted(filtered)
|
||||
# Get top-level YAML files (most common case - fast with os.listdir)
|
||||
try:
|
||||
for name in os.listdir(data_dir):
|
||||
if is_valid_file(name):
|
||||
filepath = data_dir / name
|
||||
if filepath.is_file():
|
||||
files.append(filepath)
|
||||
except PermissionError:
|
||||
logger.warning(f"Permission denied accessing {data_dir}")
|
||||
|
||||
# Check known subdirectories for additional files
|
||||
known_subdirs = ["person", "web", "archived"]
|
||||
for subdir in known_subdirs:
|
||||
subdir_path = data_dir / subdir
|
||||
if subdir_path.exists():
|
||||
for root, _, filenames in os.walk(subdir_path):
|
||||
root_path = Path(root)
|
||||
for name in filenames:
|
||||
if is_valid_file(name):
|
||||
files.append(root_path / name)
|
||||
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def main():
|
||||
|
|
@ -371,6 +393,12 @@ def main():
|
|||
action="store_true",
|
||||
help="Parse files but don't index"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--limit",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Limit number of files to process (for testing)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
|
@ -384,6 +412,11 @@ def main():
|
|||
files = find_institution_files(args.data_dir)
|
||||
logger.info(f"Found {len(files)} institution files")
|
||||
|
||||
# Apply limit if specified
|
||||
if args.limit:
|
||||
files = files[:args.limit]
|
||||
logger.info(f"Limited to {len(files)} files for processing")
|
||||
|
||||
if not files:
|
||||
logger.warning("No institution files found")
|
||||
sys.exit(0)
|
||||
|
|
|
|||
6643
scripts/sync/mappings.py
Normal file
6643
scripts/sync/mappings.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -179,14 +179,20 @@ def extract_metadata(data: dict[str, Any], filepath: Path) -> dict[str, Any]:
|
|||
metadata["country"] = loc["country"]
|
||||
if loc.get("city"):
|
||||
metadata["city"] = loc["city"]
|
||||
if loc.get("region"):
|
||||
# Use region_code (ISO 3166-2) for filtering, fallback to region name
|
||||
if loc.get("region_code"):
|
||||
metadata["region"] = loc["region_code"] # e.g., "NH" not "Noord-Holland"
|
||||
elif loc.get("region"):
|
||||
metadata["region"] = loc["region"]
|
||||
elif location:
|
||||
if location.get("country"):
|
||||
metadata["country"] = location["country"]
|
||||
if location.get("city"):
|
||||
metadata["city"] = location["city"]
|
||||
if location.get("region"):
|
||||
# Use region_code (ISO 3166-2) for filtering, fallback to region name
|
||||
if location.get("region_code"):
|
||||
metadata["region"] = location["region_code"] # e.g., "NH" not "Noord-Holland"
|
||||
elif location.get("region"):
|
||||
metadata["region"] = location["region"]
|
||||
|
||||
# Country from GHCID
|
||||
|
|
|
|||
Loading…
Reference in a new issue