glam/scripts
2026-01-11 22:26:37 +01:00
..
lib fix: add safety measures to prevent data loss during enrichment 2025-12-09 12:27:09 +01:00
parsers add isil entries 2025-11-19 23:25:22 +01:00
scrapers updated schemata 2025-11-21 22:12:33 +01:00
sync Refactor code structure for improved readability and maintainability 2026-01-09 11:05:26 +01:00
add_alternative_names_category2.py add pids 2025-12-01 23:55:55 +01:00
add_alternative_names_libraries.py add pids 2025-12-01 23:55:55 +01:00
add_ch_annotator_location_claims.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
add_geographic_annotations_to_enum.py feat: Complete Country Class Implementation and Hypernyms Removal 2025-11-23 13:09:38 +01:00
add_isil_gap_notes.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
add_legal_status.py add pids 2025-12-01 23:55:55 +01:00
add_manual_wikidata.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
add_missing_locations.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
add_person_observations_to_custodians.py enrich all custodian timespan 2025-12-15 22:31:41 +01:00
add_provenance_to_enriched.py Refactor code structure for improved readability and maintainability 2025-11-29 12:27:39 +01:00
add_slot_mappings.py Fix LinkML URI conflicts and generate RDF outputs 2026-01-07 12:32:59 +01:00
add_specificity_annotations.py Enhance specificity scoring system integration with existing infrastructure 2026-01-05 17:37:49 +01:00
add_temporal_extent_defunct.py add pids 2025-12-01 23:55:55 +01:00
add_timespan_to_custodians.py add new entries 2025-12-07 23:08:02 +01:00
add_uuids_to_latin_american_institutions.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
add_valid_isil_urls.py enrich entries 2025-12-09 10:46:43 +01:00
add_valid_isil_urls_fast.py enrich entries 2025-12-09 10:46:43 +01:00
add_web_claim_provenance.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
add_xpath_provenance.py archive websites 2025-11-29 18:05:16 +01:00
add_yaml_provenance.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
agentic_annotator.py annotation standards added 2025-12-05 15:30:23 +01:00
analyze_aron_metadata_sample.py updated schemata 2025-11-21 22:12:33 +01:00
analyze_claim_types.py validate enrichments 2025-12-02 14:36:01 +01:00
analyze_egypt_matches.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
analyze_entity_duplicates.py enrich person custodian 2025-12-14 17:09:55 +01:00
analyze_ghcid_collisions.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
analyze_japan_prefectures.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
analyze_layout_patterns.py enrich person custodian 2025-12-14 17:09:55 +01:00
analyze_mexican_geography.py add isil entries 2025-11-19 23:25:22 +01:00
analyze_person_entities.py add person profiles with PPID 2026-01-09 18:26:58 +01:00
analyze_web_validation_results.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
annotate_custodian_types_glm.py enrich entries and persons 2025-12-10 18:04:25 +01:00
apply_ch_annotator_algeria.py add new entries 2025-12-07 00:26:01 +01:00
apply_ch_annotator_batch.py add new entries 2025-12-07 00:26:01 +01:00
apply_collision_resolution_dutch_datasets.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
apply_linkedin_locations.py enrich profiles 2026-01-09 20:35:19 +01:00
apply_verified_enrichments.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
apply_wikidata_links.py enrich person custodian 2025-12-14 17:09:55 +01:00
apply_wikidata_validation.py add isil entries 2025-11-19 23:25:22 +01:00
archive_digital_platforms.py annotation standards added 2025-12-05 15:30:23 +01:00
archive_missing_websites.py enrich entries 2025-12-01 16:06:34 +01:00
archive_platforms_batch.py improve annotation prompt 2025-12-05 15:51:39 +01:00
archive_website_full.py update entries 2025-11-30 23:30:29 +01:00
audit_web_claims.py add pids 2025-12-01 23:55:55 +01:00
authenticate_linkedin_unipile.py correct HCID! 2025-12-10 13:01:13 +01:00
backfill_authoritative_enrichment_history.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
backfill_chilean_enrichment_history.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
backfill_north_africa_enrichment_history.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
backfill_remaining_enrichment_history.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
batch11_final_validation.json Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
batch11_query_results.json Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
batch11_reviewed_matches.json Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
batch12_final_validation.json Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
batch12_library_query_results.json Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
batch13_manual_search_results.json Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
batch14_manual_results.json Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
batch14_quick_search_results.json Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
batch_annotate_nde.py annotation standards added 2025-12-05 15:30:23 +01:00
batch_correct_heritage_relevance.py enrich HC profiles 2026-01-02 02:11:04 +01:00
batch_crawl4ai_recrawl.py feat(scripts): Add batch crawling and data quality scripts 2025-12-15 01:47:46 +01:00
batch_extract_institutions.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
batch_extract_linkedin_exa.py enrich person custodian 2025-12-14 17:09:55 +01:00
batch_extract_mission_statements.py enrich CH entries with mission statements 2026-01-04 13:12:32 +01:00
batch_extract_partnerships.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
batch_extract_web_annotations.py annotation standards added 2025-12-05 15:30:23 +01:00
batch_extract_web_claims.py update enriched entries 2025-12-03 17:38:46 +01:00
batch_firecrawl_recrawl.py feat(scripts): Add batch crawling and data quality scripts 2025-12-15 01:47:46 +01:00
batch_fix_validation_errors.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
batch_httpx_scrape.py feat(scripts): Add batch crawling and data quality scripts 2025-12-15 01:47:46 +01:00
batch_httpx_scrape_aggressive.py enrich all custodian timespan 2025-12-15 22:31:41 +01:00
batch_parse_linkedin_html.py enrich person custodian 2025-12-14 17:09:55 +01:00
batch_parse_linkedin_manual.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
batch_parse_linkedin_manual_v2.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
batch_parse_linkedin_orgs.py enrich entries and persons 2025-12-10 18:04:25 +01:00
batch_parse_linkedin_simple.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
batch_scrape_austrian_isil.py add isil entries 2025-11-19 23:25:22 +01:00
batch_scrape_austrian_pages.sh add isil entries 2025-11-19 23:25:22 +01:00
batch_update_helper.py add isil entries 2025-11-19 23:25:22 +01:00
batch_update_zcbs.py update entries 2025-11-30 23:30:29 +01:00
benchmark_geo_filtering.py add new entries and semantic routing 2025-12-17 10:11:56 +01:00
bosnia_isil_scraper.py add isil entries 2025-11-19 23:25:22 +01:00
build_geonames_db.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
build_linkedin_index.py extend ontology 2025-12-16 20:27:39 +01:00
build_unified_database.py updated schemata 2025-11-21 22:12:33 +01:00
build_unified_database_v2.py update enriched entries 2025-12-03 17:38:46 +01:00
categorize_pending_files.py feat(data): merge staff data from 35 PENDING files into enriched custodians 2026-01-09 14:51:17 +01:00
centralize_inline_slots.py refactor(schema): centralize 1515 inline slot definitions per Rule 48 2026-01-11 22:02:14 +01:00
check_argentina_enrichment_status.sh add isil entries 2025-11-19 23:25:22 +01:00
check_austrian_scraping_progress.py add isil entries 2025-11-19 23:25:22 +01:00
check_geocoding_progress.sh Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
check_switzerland_progress.sh add isil entries 2025-11-19 23:25:22 +01:00
check_validation_progress.py add isil entries 2025-11-19 23:25:22 +01:00
clean_and_enrich_archiveslab.py feat: implement LLM-based extraction for Archives Lab content 2025-12-05 23:16:21 +01:00
clean_person_data.py correct person labels 2025-12-14 17:58:55 +01:00
cleanup_class_descriptions.py Fix LinkML schema for valid RDF generation with proper slot_uri 2026-01-07 13:48:03 +01:00
cleanup_class_descriptions_v2.py Fix LinkML schema for valid RDF generation with proper slot_uri 2026-01-07 13:48:03 +01:00
cleanup_contact_false_positives.py enrich person custodian 2025-12-14 17:09:55 +01:00
cleanup_contact_false_positives_v2.py enrich person custodian 2025-12-14 17:09:55 +01:00
cleanup_entities.py enrich person custodian 2025-12-14 17:09:55 +01:00
cleanup_fabricated_web_enrichment.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
cleanup_linkedin_mismatches.py Evaluate data enrichments of persons 2026-01-11 12:15:27 +01:00
cleanup_person_web_claims.py Evaluate data enrichments of persons 2026-01-11 12:15:27 +01:00
cleanup_redundant_descriptions.py Fix LinkML URI conflicts and generate RDF outputs 2026-01-07 12:32:59 +01:00
cleanup_web_claims.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
cleanup_web_claims_fast.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
combine_latin_american_datasets.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
consolidate_le_passe_duplicates.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
convert_bulgarian_isil_to_linkml.py edit Japanese entries 2025-12-09 09:16:19 +01:00
convert_isil_csv_to_yaml.py edit Japanese entries 2025-12-09 09:16:19 +01:00
convert_library_isil_csv_to_yaml.py edit Japanese entries 2025-12-09 09:16:19 +01:00
convert_nde_csv_to_yaml.py add isil entries 2025-11-19 23:25:22 +01:00
convert_palestinian_to_custodian.py add new entries 2025-12-07 00:26:01 +01:00
convert_switzerland_linkml.py add isil entries 2025-11-19 23:25:22 +01:00
convert_website_enrichment_to_claims.py Refactor code structure for improved readability and maintainability 2025-11-29 12:27:39 +01:00
convert_wikidata_to_linkml.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
crawl_kien_custodians.py annotation standards added 2025-12-05 15:30:23 +01:00
crawl_kien_playwright.py annotation standards added 2025-12-05 15:30:23 +01:00
create_basic_entity_profiles.py enrich person custodian 2025-12-14 17:09:55 +01:00
create_custodian_from_ch_annotator.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
create_custodians_from_linkedin.py add new entries and semantic routing 2025-12-17 10:11:56 +01:00
create_minimal_profiles.py enrich custodians 2025-12-11 22:32:09 +01:00
create_missing_slots.py Refactor code structure for improved readability and maintainability 2026-01-09 11:05:26 +01:00
create_mow_custodians.py add new entries 2025-12-07 00:26:01 +01:00
create_palestinian_linkml_instances.py added web annotations 2025-12-06 19:50:04 +01:00
crosslink_czech_datasets.py add isil entries 2025-11-19 23:25:22 +01:00
crosslink_czech_datasets_quick.py add isil entries 2025-11-19 23:25:22 +01:00
crossref_kien_wikidata.py annotation standards added 2025-12-05 15:30:23 +01:00
debug_diocesan_library.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
debug_enrich_tunisia.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
debug_matching_logic.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
debug_sousse_wikidata.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
debug_unipile_api.py correct HCID! 2025-12-10 13:01:13 +01:00
deduplicate_classes.py Implement fast WCMS migration script with state file checkpointing and batch processing 2026-01-11 22:26:37 +01:00
deduplicate_mexican_institutions.py add isil entries 2025-11-19 23:25:22 +01:00
deduplicate_tunisia.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
demo_enrichment_history.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
demo_v5_success.sh Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
derive_custodian_name.py enrich entries 2025-12-01 16:06:34 +01:00
derive_custodian_name_v2.py update enriched entries 2025-12-03 17:38:46 +01:00
detect_gmaps_mismatches.py fix: mark 8 more Google Maps false matches detected via name mismatch 2026-01-08 13:26:53 +01:00
detect_name_mismatch.py feat(scripts): Add batch crawling and data quality scripts 2025-12-15 01:47:46 +01:00
detect_youtube_misattributions.py update enriched entries 2025-12-03 17:38:46 +01:00
detect_youtube_misattributions_fast.py update enriched entries 2025-12-03 17:38:46 +01:00
diagnose_egypt_matching.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
discover_custodian_websites.py enrich entries 2025-12-26 14:30:31 +01:00
discover_websites_crawl4ai.py enrich custodian entries with logo 2025-12-27 02:15:17 +01:00
discover_websites_linkup.py enrich custodian entries with logo 2025-12-27 02:15:17 +01:00
download_austrian_isil_via_browser.py add isil entries 2025-11-19 23:25:22 +01:00
download_dutch_municipalities_geojson.py update enriched entries 2025-12-03 17:38:46 +01:00
enhance_tunisia_dataset.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_algeria_wikidata.py edit Japanese entries 2025-12-09 09:16:19 +01:00
enrich_algeria_wikidata_fuzzy.py edit Japanese entries 2025-12-09 09:16:19 +01:00
enrich_argentina_wikidata.py add isil entries 2025-11-19 23:25:22 +01:00
enrich_austrian_cities.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
enrich_belgian_cities.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
enrich_belgian_locations.py add isil entries 2025-11-19 23:25:22 +01:00
enrich_belgian_v2.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
enrich_belgian_wikidata.py add isil entries 2025-11-19 23:25:22 +01:00
enrich_belgium_eu.py edit Japanese entries 2025-12-09 09:16:19 +01:00
enrich_belgium_isil.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
enrich_belgium_manual.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_belgium_wikidata_fuzzy.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
enrich_bulgarian_cities.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
enrich_bulgarian_regions.py add isil entries 2025-11-19 23:25:22 +01:00
enrich_bulgarian_wikidata.py add isil entries 2025-11-19 23:25:22 +01:00
enrich_by_location.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
enrich_chilean_batch1_manual.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch2_corrected.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch2_universities.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch2_university_depts.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch3.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch4.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch5.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch6.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch7.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch8.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch10.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch11.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch13.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch14.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch15.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch16.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch17.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch18.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch19.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch20_v0.2.2.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch20_v0.2.2.py.backup Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch20_v0.2.2_test.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_batch20_v0.2.2_test.py.bak Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_chilean_institutions.py edit Japanese entries 2025-12-09 09:16:19 +01:00
enrich_cities_google.py enrich JP BE AR profiles 2025-12-30 23:07:03 +01:00
enrich_custodian_emic_names.py feat(enrichment): add emic name enrichment and update CustodianName schema 2025-12-08 14:58:50 +01:00
enrich_custodian_files.py add new entries 2025-12-07 00:26:01 +01:00
enrich_custodian_logos.py enrich entries 2025-12-21 22:12:34 +01:00
enrich_custodian_logos_crawl4ai.py enrich entries 2025-12-26 14:30:31 +01:00
enrich_custodian_logos_playwright.py enrich entries 2025-12-21 22:12:34 +01:00
enrich_custodian_youtube_maps.py Refactor code structure for improved readability and maintainability 2025-12-09 11:15:51 +01:00
enrich_custodians_generic.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
enrich_custodians_google_maps_playwright.py enrich entries 2025-12-09 10:46:43 +01:00
enrich_custodians_wikidata_full.py reconstruct location blocks 2025-12-09 12:25:16 +01:00
enrich_custodians_wikidata_inception.py normalise custodian entries 2025-12-09 07:56:35 +01:00
enrich_czech_cities.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
enrich_czech_cities_fast.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
enrich_czech_sigla.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
enrich_czech_wikidata.py edit Japanese entries 2025-12-09 09:16:19 +01:00
enrich_czech_wikidata_fuzzy.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
enrich_denmark_wikidata.py add isil entries 2025-11-19 23:25:22 +01:00
enrich_descriptions.py feat(enrichment): add emic name enrichment and update CustodianName schema 2025-12-08 14:58:50 +01:00
enrich_digital_platforms.py annotation standards added 2025-12-05 15:30:23 +01:00
enrich_digital_platforms_fast.py annotation standards added 2025-12-05 15:30:23 +01:00
enrich_dutch_custodians_crawl4ai.py feat(scripts): Add batch crawling and data quality scripts 2025-12-15 01:47:46 +01:00
enrich_dutch_custodians_firecrawl.py correct person labels 2025-12-14 17:29:39 +01:00
enrich_dutch_institutions_fuzzy.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_egypt_viaf.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_egypt_wikidata.py edit Japanese entries 2025-12-09 09:16:19 +01:00
enrich_empty_experience_linkup.py enrich HC profiles 2026-01-02 02:11:04 +01:00
enrich_from_osm.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_from_osm_batched.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_from_viaf.py edit Japanese entries 2025-12-09 09:16:19 +01:00
enrich_from_wikidata.py edit Japanese entries 2025-12-09 09:16:19 +01:00
enrich_gb_batch1.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_gb_manual.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_gb_manual_v2.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_georgia_batch1.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_georgia_batch2_alternative_names.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_georgia_batch3_manual.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_global_with_wikidata.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_global_with_wikidata_fast.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_hyponyms_with_ontology.py Add initial versions of custodian hub UML diagrams in Mermaid and PlantUML formats 2025-11-22 14:33:51 +01:00
enrich_hyponyms_with_wikidata.py add isil entries 2025-11-19 23:25:22 +01:00
enrich_hyponyms_with_wikidata.py.bak add isil entries 2025-11-19 23:25:22 +01:00
enrich_institutions_wikidata_sparql.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_it_manual.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_japan_wikidata_real.py updated schemata 2025-11-21 22:12:33 +01:00
enrich_japan_with_qnumbers.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_japanese_cities.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
enrich_kb_libraries_exa.py Add script to validate KB library entries and generate enrichment report 2025-11-28 14:48:33 +01:00
enrich_kb_libraries_exa_provenance.py Refactor code structure for improved readability and maintainability 2025-11-29 12:27:39 +01:00
enrich_kb_libraries_google_maps.py Add script to validate KB library entries and generate enrichment report 2025-11-28 14:48:33 +01:00
enrich_kb_libraries_wikidata.py Add script to validate KB library entries and generate enrichment report 2025-11-28 14:48:33 +01:00
enrich_kien_ghcid.py annotation standards added 2025-12-05 15:30:23 +01:00
enrich_kien_wikidata.py annotation standards added 2025-12-05 15:30:23 +01:00
enrich_known_institutions_manual.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_latam_alternative_names.py edit Japanese entries 2025-12-09 09:16:19 +01:00
enrich_latam_institutions_fuzzy.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_libya_wikidata.py edit Japanese entries 2025-12-09 09:16:19 +01:00
enrich_libya_wikidata_fuzzy.py edit Japanese entries 2025-12-09 09:16:19 +01:00
enrich_linkedin_profiles_linkup.py enrich HC profiles 2026-01-02 02:11:04 +01:00
enrich_linkedin_profiles_unipile.py correct HCID! 2025-12-10 13:01:13 +01:00
enrich_linkedin_ultimate.py correct HCID! 2025-12-10 13:01:13 +01:00
enrich_linkedin_ultimate_auth.py correct HCID! 2025-12-10 13:01:13 +01:00
enrich_low_coverage_countries_fuzzy.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_luxembourg_manual.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_mexican_cities.py add isil entries 2025-11-19 23:25:22 +01:00
enrich_mexican_institutions.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_mexico_batch01.py edit Japanese entries 2025-12-09 09:16:19 +01:00
enrich_mexico_batch02.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_missing_websites.py update entries 2025-11-30 23:30:29 +01:00
enrich_nde_entries.py update entries 2025-11-30 23:30:29 +01:00
enrich_nde_entries_ghcid.py update enriched entries 2025-12-03 17:38:46 +01:00
enrich_nde_fast.py Refactor code structure for improved readability and maintainability 2025-11-28 11:44:21 +01:00
enrich_nde_from_wikidata.py Refactor code structure for improved readability and maintainability 2025-11-27 17:43:14 +01:00
enrich_nde_full_dataset.py add isil entries 2025-11-19 23:25:22 +01:00
enrich_nde_genealogiewerkbalk.py update enriched entries 2025-12-03 17:38:46 +01:00
enrich_nde_google_maps.py Refactor code structure for improved readability and maintainability 2025-11-29 12:27:39 +01:00
enrich_nde_with_wikidata.py add isil entries 2025-11-19 23:25:22 +01:00
enrich_not_found_with_exa.py Refactor code structure for improved readability and maintainability 2025-11-28 11:44:21 +01:00
enrich_palestinian_gaza_2024.py added web annotations 2025-12-06 19:50:04 +01:00
enrich_palestinian_google_maps.py added web annotations 2025-12-06 19:50:04 +01:00
enrich_person_comprehensive.py Evaluate data enrichments of persons 2026-01-11 12:15:27 +01:00
enrich_phase2_mexico.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_phase2_netherlands.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_ppids.py feat(scripts): expand university location mappings and add web enrichment 2026-01-09 21:10:14 +01:00
enrich_ppids_linkup.py feat(scripts): add person enrichment and slot mapping utilities 2026-01-10 13:32:32 +01:00
enrich_ppids_web.py feat(scripts): expand university location mappings and add web enrichment 2026-01-09 21:10:14 +01:00
enrich_profiles_linkup.py enrich HC profiles 2026-01-02 02:11:04 +01:00
enrich_ps_service_area.py add new entries 2025-12-07 23:08:02 +01:00
enrich_sachsen_anhalt_archives_manual.py updated schemata 2025-11-21 22:12:33 +01:00
enrich_single_profile_linkup.py enrich HC profiles 2026-01-02 02:11:04 +01:00
enrich_swiss_isil_cities.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
enrich_swiss_wikidata_fuzzy.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
enrich_timespan_linkup.py add timespans 2025-12-16 09:02:52 +01:00
enrich_tunisia_wikidata.py edit Japanese entries 2025-12-09 09:16:19 +01:00
enrich_tunisia_wikidata_fuzzy.py edit Japanese entries 2025-12-09 09:16:19 +01:00
enrich_tunisia_wikidata_validated.py edit Japanese entries 2025-12-09 09:16:19 +01:00
enrich_unesco_ich.py add new entries 2025-12-07 00:26:01 +01:00
enrich_unesco_mow.py add new entries 2025-12-07 00:26:01 +01:00
enrich_unesco_world_heritage.py add new entries 2025-12-07 00:26:01 +01:00
enrich_urls_exa.py archive websites 2025-11-29 18:05:16 +01:00
enrich_us_manual.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
enrich_with_lap_gaza_report.py normalise custodian entries 2025-12-09 07:56:35 +01:00
enrich_xxx_from_linkedin_html.py add new entries and semantic routing 2025-12-17 10:11:56 +01:00
enrich_xxx_placeholders.py add new entries and semantic routing 2025-12-17 10:11:56 +01:00
enrich_xxx_via_web_search.py clean up GHCID 2025-12-17 11:58:40 +01:00
enrich_youtube.py validate enrichments 2025-12-02 14:36:01 +01:00
enrich_youtube_from_wikidata.py enrich entries and persons 2025-12-10 18:04:25 +01:00
enrich_youtube_videos.py normalise custodian entries 2025-12-09 07:56:35 +01:00
exa_search_kien_locations.py annotation standards added 2025-12-05 15:30:23 +01:00
execute_archive_query_corrected.py add isil entries 2025-11-19 23:25:22 +01:00
execute_web_validation.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
export_algeria_to_rdf.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
export_argentina_to_linkml.py add isil entries 2025-11-19 23:25:22 +01:00
export_belgian_institutions.py add isil entries 2025-11-19 23:25:22 +01:00
export_belgian_rdf.py add isil entries 2025-11-19 23:25:22 +01:00
export_denmark_rdf.py add isil entries 2025-11-19 23:25:22 +01:00
export_denmark_rdf_enriched.py add isil entries 2025-11-19 23:25:22 +01:00
export_eu_isil_to_linkml.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
export_hc_storage_rdf.py enrich CH entries with mission statements 2026-01-04 13:12:32 +01:00
export_japanese_isil_to_linkml.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
export_latin_american_datasets.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
export_libya_to_rdf.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
export_nde_enriched.py Refactor code structure for improved readability and maintainability 2025-11-28 11:44:21 +01:00
export_nde_for_duckdb.py added web annotations 2025-12-06 19:50:04 +01:00
export_nde_map_json.py add new entries 2025-12-07 23:08:02 +01:00
export_nde_stats_json.py added web annotations 2025-12-06 19:50:04 +01:00
export_palestinian_rdf.py added web annotations 2025-12-06 19:50:04 +01:00
export_switzerland_csv.py add isil entries 2025-11-19 23:25:22 +01:00
extract-types-vocab.ts feat(scripts): improve types-vocab extraction to derive all vocabulary from schema 2026-01-10 15:37:52 +01:00
extract_about_page_data.py extend ontology 2025-12-16 20:27:39 +01:00
extract_argentina_wikidata.py add isil entries 2025-11-19 23:25:22 +01:00
extract_comprehensive_profiles.py enrich custodians 2025-12-11 22:32:09 +01:00
extract_contact_page_data.py enrich person custodian 2025-12-14 17:09:55 +01:00
extract_custodian_name.py enrich entries 2025-12-01 16:06:34 +01:00
extract_custodian_source_classes.py update frontend 2026-01-08 15:56:28 +01:00
extract_direct.py enrich custodians 2025-12-11 22:32:09 +01:00
extract_egypt_institutions.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
extract_egypt_step2.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
extract_egypt_step3.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
extract_egypt_step4.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
extract_global_wikidata.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
extract_html_claims.py enrich person profiles 2025-12-12 12:51:10 +01:00
extract_hybrid.py enrich person custodian 2025-12-14 17:09:55 +01:00
extract_inline_slots.py Refactor code structure for improved readability and maintainability 2026-01-09 11:05:26 +01:00
extract_isil_from_wikidata.py edit Japanese entries 2025-12-09 09:16:19 +01:00
extract_kien_locations_from_names.py annotation standards added 2025-12-05 15:30:23 +01:00
extract_linkedin_locations.py enrich profiles 2026-01-09 20:35:19 +01:00
extract_linkedin_profile_exa.py enrich custodians 2025-12-11 22:32:09 +01:00
extract_linkedin_profiles.py correct HCID! 2025-12-10 13:01:13 +01:00
extract_linkedin_profiles_v2.py correct HCID! 2025-12-10 13:01:13 +01:00
extract_linkedin_urls_from_html.py enrich entries and persons 2025-12-10 18:04:25 +01:00
extract_locations_ch_annotator.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
extract_mission_statement.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
extract_palestinian_claims.py feat: implement LLM-based extraction for Archives Lab content 2025-12-05 23:16:21 +01:00
extract_person_entities_from_annotations.py enrich person custodian 2025-12-14 17:09:55 +01:00
extract_persons_with_provenance.py feat(scripts): add person enrichment and slot mapping utilities 2026-01-10 13:32:32 +01:00
extract_profiles_direct.py enrich custodians 2025-12-11 22:32:09 +01:00
extract_profiles_simple.py enrich custodians 2025-12-11 22:32:09 +01:00
extract_profiles_working.py enrich custodians 2025-12-11 22:32:09 +01:00
extract_q_numbers_robust.py add isil entries 2025-11-19 23:25:22 +01:00
extract_reenrich_urls_fast.py enrich entries 2025-12-21 22:12:34 +01:00
extract_specific_profiles.py enrich custodians 2025-12-11 22:32:09 +01:00
extract_timeline_events.py enrich HC profiles 2026-01-02 02:11:04 +01:00
extract_trove_contributors.py add isil entries 2025-11-19 23:25:22 +01:00
extract_viaf_from_wikidata.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
extract_website_headers.py validate enrichments 2025-12-02 14:36:01 +01:00
extract_website_layout_docling.py validate enrichments 2025-12-02 14:36:01 +01:00
extract_wikidata_geography.py feat: Complete Country Class Implementation and Hypernyms Removal 2025-11-23 13:09:38 +01:00
extract_with_agents.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
extract_with_patterns.py enrich person custodian 2025-12-14 17:09:55 +01:00
extract_zwolle_profiles.py enrich custodians 2025-12-11 22:32:09 +01:00
fetch_all_linkedin_profiles.py moved web archives 2025-12-12 00:40:26 +01:00
fetch_and_extract_url.py feat: implement LLM-based extraction for Archives Lab content 2025-12-05 23:16:21 +01:00
fetch_linkedin_connections_unipile.py correct HCID! 2025-12-10 13:01:13 +01:00
fetch_linkedin_profiles_complete.py enrich person custodian 2025-12-14 17:09:55 +01:00
fetch_linkedin_profiles_exa.py enrich custodians 2025-12-11 22:32:09 +01:00
fetch_linkedin_profiles_exa_final.py moved web archives 2025-12-12 00:40:26 +01:00
fetch_linkedin_profiles_exa_v2.py enrich custodians 2025-12-11 22:32:09 +01:00
fetch_remaining_custodians.py annotation standards added 2025-12-05 15:30:23 +01:00
fetch_remaining_linkedin_profiles.py enrich custodians 2025-12-11 22:32:09 +01:00
fetch_surname_data.py Refactor code structure for improved readability and maintainability 2026-01-11 15:27:14 +01:00
fetch_website_markdown.py Refactor code structure for improved readability and maintainability 2025-11-29 12:27:39 +01:00
fetch_website_playwright.py Refactor code structure for improved readability and maintainability 2025-11-29 12:27:39 +01:00
final_enrichment_validation_report.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
finalize_batch11.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
finalize_batch12.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
find_wikidata_for_missing_nde.py Refactor code structure for improved readability and maintainability 2025-11-28 11:44:21 +01:00
fix_alias_fields.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
fix_alias_fields_v2.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
fix_ar_region_codes.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
fix_ar_xx_xxx_resolved.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
fix_archive_class_files.py Fix LinkML URI conflicts and generate RDF outputs 2026-01-07 12:32:59 +01:00
fix_argentina_region_codes.py correct HCID! 2025-12-10 13:01:13 +01:00
fix_austrian_region_codes.py correct HCID! 2025-12-10 13:01:13 +01:00
fix_bad_enrichments.py add new entries 2025-12-07 00:26:01 +01:00
fix_bce_dates_final.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
fix_belarus_region_codes.py correct HCID! 2025-12-10 13:01:13 +01:00
fix_belarus_region_codes_v2.py correct HCID! 2025-12-10 13:01:13 +01:00
fix_belgian_cities.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
fix_belgium_region_codes.py correct HCID! 2025-12-10 13:01:13 +01:00
fix_collision_victims.py feat(scripts): Add batch crawling and data quality scripts 2025-12-15 01:47:46 +01:00
fix_czech_collisions.py correct HCID! 2025-12-10 13:01:13 +01:00
fix_czech_region_codes.py correct HCID! 2025-12-10 13:01:13 +01:00
fix_duplicate_temporal_coverage.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
fix_egypt_false_positives.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
fix_fake_isil_urls.py normalise dutch entries 2025-12-09 08:02:27 +01:00
fix_generic_platform_names.py feat(scripts): Add batch crawling and data quality scripts 2025-12-15 01:47:46 +01:00
fix_generic_platform_names_fast.py feat(scripts): Add batch crawling and data quality scripts 2025-12-15 01:47:46 +01:00
fix_geonames_numeric_codes.py correct HCID! 2025-12-10 13:01:13 +01:00
fix_ghcid_diacritics.py feat(ghcid): add diacritics normalization and transliteration scripts 2025-12-08 14:59:28 +01:00
fix_ghcid_location_mismatches.py refactor(scripts): generalize GHCID location fixer for all institution types 2026-01-09 11:54:28 +01:00
fix_ghcid_mismatches.py enrich entries 2025-12-21 22:12:34 +01:00
fix_ghcid_special_chars.py fix: remove custodian files with invalid GHCID special characters 2025-12-07 14:23:50 +01:00
fix_ghcid_type.py feat(scripts): Add batch crawling and data quality scripts 2025-12-15 01:47:46 +01:00
fix_ghriba_geocoding.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
fix_gmaps_false_matches.py fix: mark 8 more Google Maps false matches detected via name mismatch 2026-01-08 13:26:53 +01:00
fix_inst_abbreviations.py correct HCID! 2025-12-10 13:01:13 +01:00
fix_islamic_art_museum.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
fix_japan_region_codes.py correct HCID! 2025-12-10 13:01:13 +01:00
fix_japan_synthetic_qnumbers.py updated schemata 2025-11-21 22:12:33 +01:00
fix_libya_validation_errors.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
fix_metadata_standards.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
fix_mexican_geography.py add isil entries 2025-11-19 23:25:22 +01:00
fix_missing_entity_profiles.py enrich all custodian timespan 2025-12-15 22:31:41 +01:00
fix_moza_false_matches.py fix: mark 8 more Google Maps false matches detected via name mismatch 2026-01-08 13:26:53 +01:00
fix_name_mismatch_false_matches.py fix: mark 8 more Google Maps false matches detected via name mismatch 2026-01-08 13:26:53 +01:00
fix_nl_missing_uuids.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
fix_non_ascii_ghcids.py add new entries and semantic routing 2025-12-17 10:11:56 +01:00
fix_non_dutch_institutions.py clean up GHCID 2025-12-17 11:58:40 +01:00
fix_ps_ghcid_naming.py add new entries 2025-12-07 00:26:01 +01:00
fix_recordsettype_slots.py Fix LinkML schema for valid RDF generation with proper slot_uri 2026-01-07 13:48:03 +01:00
fix_remaining_ar_xx_xxx.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
fix_remaining_false_matches.py fix: mark 15 more Google Maps false matches via comprehensive review 2026-01-08 15:21:31 +01:00
fix_remaining_non_ascii.py add new entries and semantic routing 2025-12-17 10:11:56 +01:00
fix_remaining_numeric_codes.py correct HCID! 2025-12-10 13:01:13 +01:00
fix_remaining_validation_errors.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
fix_sbm_duplicate.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
fix_sbm_duplicate_stream.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
fix_simon_kemper_contamination.py enrich all custodian timespan 2025-12-15 22:31:41 +01:00
fix_slot_imports.py Refactor code structure for improved readability and maintainability 2026-01-09 11:05:26 +01:00
fix_slot_imports_v2.py Refactor code structure for improved readability and maintainability 2026-01-09 11:05:26 +01:00
fix_swiss_region_codes.py correct HCID! 2025-12-10 13:01:13 +01:00
fix_validation_errors.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
fix_validation_errors_by_country.py add isil entries 2025-11-19 23:25:22 +01:00
fix_wie_is_wie_entry.py update enriched entries 2025-12-03 17:38:46 +01:00
fix_xxx_country_codes.py add new entries and semantic routing 2025-12-17 10:11:56 +01:00
fix_xxx_placeholders.py add new entries 2025-12-07 00:26:01 +01:00
fix_yaml_examples.py Fix LinkML schema for valid RDF generation with proper slot_uri 2026-01-07 13:48:03 +01:00
fix_yaml_examples_indentation.py Fix LinkML schema for valid RDF generation with proper slot_uri 2026-01-07 13:48:03 +01:00
fix_yaml_examples_v2.py Fix LinkML schema for valid RDF generation with proper slot_uri 2026-01-07 13:48:03 +01:00
fix_yaml_history.py chore: add YAML history fix scripts and update ducklake/deploy tooling 2025-12-07 18:45:52 +01:00
fix_yaml_history_v2.py chore: add YAML history fix scripts and update ducklake/deploy tooling 2025-12-07 18:45:52 +01:00
fix_youtube_misattribution.py update enriched entries 2025-12-03 17:38:46 +01:00
format_linkedin_profile.py enrich custodians 2025-12-11 22:32:09 +01:00
generate_archive_record_set_types.py Fix LinkML URI conflicts and generate RDF outputs 2026-01-07 12:32:59 +01:00
generate_archive_werkgebied_mapping.py update enriched entries 2025-12-03 17:38:46 +01:00
generate_argentina_uuids.py add isil entries 2025-11-19 23:25:22 +01:00
generate_botanical_query_with_exclusions.py add isil entries 2025-11-19 23:25:22 +01:00
generate_bulgarian_city_regions.py add isil entries 2025-11-19 23:25:22 +01:00
generate_complete_mermaid_diagram.py Refactor code structure for improved readability and maintainability 2025-11-28 11:44:21 +01:00
generate_custodian_type_enums.py Add new enums for PersonalCollectionType, ResearchCenterType, and TasteScentHeritage classifications; implement validation script for custodian names against authoritative sources 2025-12-01 18:39:22 +01:00
generate_entity_stats.py enrich person custodian 2025-12-14 17:09:55 +01:00
generate_feature_type_instances.py update entries 2025-11-30 23:30:29 +01:00
generate_gallery_query_with_exclusions.py add isil entries 2025-11-19 23:25:22 +01:00
generate_ghcids_algeria.py add new entries 2025-12-07 00:26:01 +01:00
generate_ghcids_egypt.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
generate_ghcids_latin_america.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
generate_graphviz_from_owl.py Add UML diagrams and scripts for custodian schema 2025-11-23 23:05:33 +01:00
generate_heritage_form_instances.py annotation standards added 2025-12-05 15:30:23 +01:00
generate_kien_custodian_entries.py annotation standards added 2025-12-05 15:30:23 +01:00
generate_linkedin_custodian_yaml.py extend ontology 2025-12-16 20:27:39 +01:00
generate_mermaid_modular.py enrich entries 2025-12-01 16:06:34 +01:00
generate_mermaid_with_instances.py feat: Add script to generate Mermaid ER diagrams with instance data from LinkML schemas 2025-12-01 16:58:03 +01:00
generate_missing_annotations.py enrich person custodian 2025-12-14 17:09:55 +01:00
generate_nde_map.py Refactor code structure for improved readability and maintainability 2025-11-28 11:44:21 +01:00
generate_palestinian_ghcids.py added web annotations 2025-12-06 19:50:04 +01:00
generate_person_sql.py enrich person profiles 2025-12-12 12:51:10 +01:00
generate_plantuml_modular.py updated schemata 2025-11-21 22:12:33 +01:00
generate_ppids.py fix(ppid): fix unidecode import reference typo 2026-01-09 18:29:36 +01:00
generate_profile_risk_report.py feat(data): add Belgian surnames dataset with metadata and surname counts 2026-01-11 13:50:20 +01:00
generate_shacl.py enrich entries 2025-12-01 16:06:34 +01:00
generate_sparql_validation_rules.py Make AGENTS more succint by referring to opencode rules & enrich custodians 2025-12-28 14:56:35 +01:00
generate_static_rdf_for_frontend.py enrich entries 2025-12-23 13:27:35 +01:00
generate_switzerland_report.py add isil entries 2025-11-19 23:25:22 +01:00
generate_type_prototypes.py add new entries and semantic routing 2025-12-17 10:11:56 +01:00
generate_uml_diagrams.py Add UML diagrams and scripts for custodian schema 2025-11-23 23:05:33 +01:00
generate_werkgebied_mapping.py annotation standards added 2025-12-05 15:30:23 +01:00
generate_wikidata_review_report.py add isil entries 2025-11-19 23:25:22 +01:00
geocode_bulgarian_missing.py add isil entries 2025-11-19 23:25:22 +01:00
geocode_canadian_institutions.py add isil entries 2025-11-19 23:25:22 +01:00
geocode_chilean_institutions.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
geocode_cz_from_ghcid.py geocode: add coordinates to JP compound cities and CZ files from GeoNames 2025-12-09 21:49:40 +01:00
geocode_eg_from_city_code.py geocode: add coordinates to BG and EG custodian files 2025-12-09 21:59:58 +01:00
geocode_from_city_name.py geocode: add coordinates to 147 Swiss custodian files 2025-12-09 22:38:33 +01:00
geocode_from_geonames_id.py geocode: add coordinates to BG and EG custodian files 2025-12-09 21:59:58 +01:00
geocode_global_institutions.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
geocode_japan_postal.py geocode: add coordinates to 540 Japanese custodian files using postal codes 2025-12-10 00:27:33 +01:00
geocode_jp_compound_cities.py geocode: add coordinates to JP compound cities and CZ files from GeoNames 2025-12-09 21:49:40 +01:00
geocode_mexican_institutions.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
geocode_missing_coordinates.py Refactor code structure for improved readability and maintainability 2026-01-11 15:27:14 +01:00
geocode_missing_from_geonames.py feat: Add legal form filtering rule for CustodianName 2025-12-09 16:58:41 +01:00
geocode_remaining_26.py geocode: complete 100% coverage - add coordinates to final 26 files (CZ, BE, AR, LB, ML) 2025-12-10 01:07:34 +01:00
identify_profiles_by_origin.py feat(data): add Belgian surnames dataset with metadata and surname counts 2026-01-11 13:50:20 +01:00
import_argentina_wikidata_institutions.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
import_persons_to_postgres.py enrich person custodian 2025-12-14 17:09:55 +01:00
index_institutions_direct.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
index_institutions_qdrant.py clean up GHCID 2025-12-17 11:58:40 +01:00
index_persons_qdrant.py enrich entries 2025-12-26 21:41:18 +01:00
ingest_typedb_data.py enrich person custodian 2025-12-14 17:09:55 +01:00
integrate_canadian_dataset.py add isil entries 2025-11-19 23:25:22 +01:00
integrate_ch_annotator_to_custodian.py add new entries 2025-12-07 00:26:01 +01:00
integrate_layout_features.py validate enrichments 2025-12-02 14:36:01 +01:00
integrate_museumregister_nl.py validate enrichments 2025-12-02 14:36:01 +01:00
lap_gaza_report_extractor.py normalise custodian entries 2025-12-09 07:56:35 +01:00
link_entities_to_wikidata.py enrich person custodian 2025-12-14 17:09:55 +01:00
link_kien_to_heritage_forms.py annotation standards added 2025-12-05 15:30:23 +01:00
link_person_observations.py enrich person custodian 2025-12-14 17:09:55 +01:00
linkedin_batch_complete.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
linkedin_batch_comprehensive.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
linkedin_batch_fast.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
linkedin_batch_final.py enrich JP BE AR profiles 2025-12-30 23:07:03 +01:00
linkedin_batch_simple.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
linkedin_comprehensive_extraction.py correct HCID! 2025-12-10 13:01:13 +01:00
linkedin_final_extraction.py correct HCID! 2025-12-10 13:01:13 +01:00
linkedin_h1_fast.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
linkedin_ultimate_extraction.py correct HCID! 2025-12-10 13:01:13 +01:00
linkml_validators.py feat: Complete Country Class Implementation and Hypernyms Removal 2025-11-23 13:09:38 +01:00
llm_extract_archiveslab.py feat: implement LLM-based extraction for Archives Lab content 2025-12-05 23:16:21 +01:00
load_boundaries_postgis.py add new entries 2025-12-07 23:08:02 +01:00
load_custodians_to_ducklake.py normalise custodian entries 2025-12-09 07:56:35 +01:00
load_custodians_to_ducklake_v3.py enrich person profiles 2025-12-12 12:51:10 +01:00
load_linkml_to_postgres.py add new entries 2025-12-07 00:26:01 +01:00
load_person_entities.py enrich person profiles 2025-12-12 12:51:10 +01:00
load_typedb_data.py added web annotations 2025-12-06 19:50:04 +01:00
load_typedb_data_with_relations.py feat(loaders): update DuckLake and TypeDB loaders with relation support 2025-12-08 15:00:14 +01:00
load_typedb_schema.py enrich person custodian 2025-12-14 17:09:55 +01:00
load_typedb_schema.sh updated schemata 2025-11-21 22:12:33 +01:00
manual_web_verification_batch14.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
manual_wikidata_search_batch6.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
manual_wikidata_search_batch13.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
manual_wikidata_search_batch14.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
mark_archive_failures.py improve annotator 2025-12-05 16:25:39 +01:00
mark_virtual_itinerant_ar.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
match_custodians_to_yaml.py enrich all custodian timespan 2025-12-15 22:31:41 +01:00
match_custodians_to_yaml_final.py enrich all custodian timespan 2025-12-15 22:31:41 +01:00
match_custodians_to_yaml_v2.py enrich all custodian timespan 2025-12-15 22:31:41 +01:00
match_linkedin_by_name.py extend ontology 2025-12-16 20:27:39 +01:00
match_linkedin_by_name_fast.py extend ontology 2025-12-16 20:27:39 +01:00
match_linkedin_names_ultra.py extend ontology 2025-12-16 20:27:39 +01:00
match_pending_to_linkedin.py enrich profiles 2026-01-09 20:35:19 +01:00
match_zcbs_nde.py update entries 2025-11-30 23:30:29 +01:00
merge_austrian_isil_pages.py add isil entries 2025-11-19 23:25:22 +01:00
merge_batch16.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
merge_bayern_complete.py updated schemata 2025-11-21 22:12:33 +01:00
merge_collision_files.py data(custodian): merge PENDING collision files into existing custodians 2026-01-09 18:33:00 +01:00
merge_egypt_steps.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
merge_enriched_to_global.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
merge_gb_enriched.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
merge_georgia_enrichment.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
merge_georgia_enrichment_streaming.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
merge_global_datasets.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
merge_it_enriched.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
merge_linkedin_to_custodians.py extend ontology 2025-12-16 20:27:39 +01:00
merge_pending_by_name.py feat(merge): add script to merge PENDING files by matching emic names with existing files 2026-01-09 16:42:55 +01:00
merge_sachsen_anhalt_complete.py updated schemata 2025-11-21 22:12:33 +01:00
merge_sachsen_anhalt_datasets.py updated schemata 2025-11-21 22:12:33 +01:00
merge_sachsen_complete.py updated schemata 2025-11-21 22:12:33 +01:00
merge_sparql_and_isil_enrichments.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
merge_staff_data.py feat(data): merge staff data from 35 PENDING files into enriched custodians 2026-01-09 14:51:17 +01:00
merge_us_enriched.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
merge_us_enrichment.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
merge_viaf_mappings.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
migrate_annotations_to_slots.py Migrate all 293 class files to ontology-aligned slots 2026-01-06 12:24:54 +01:00
migrate_claims_to_canonical.py validate enrichments 2025-12-02 14:36:01 +01:00
migrate_class_metadata_imports.py update frontend 2026-01-08 15:56:28 +01:00
migrate_claude_conversation_provenance.py enrich custodian profiles 2026-01-02 02:10:18 +01:00
migrate_custodian_types_rationale.py Refactor code structure for improved readability and maintainability 2026-01-09 11:05:26 +01:00
migrate_custodian_types_to_has_or_had.py Refactor code structure for improved readability and maintainability 2026-01-09 11:05:26 +01:00
migrate_custodian_types_to_uri.py Fix LinkML URI conflicts and generate RDF outputs 2026-01-07 12:32:59 +01:00
migrate_egyptian_from_ch.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
migrate_entity_quick_test.py feat(data): add Belgian surnames dataset with metadata and surname counts 2026-01-11 13:50:20 +01:00
migrate_entity_to_ppid.py feat(data): add Belgian surnames dataset with metadata and surname counts 2026-01-11 13:50:20 +01:00
migrate_entity_to_ppid_v2.py feat(data): add Belgian surnames dataset with metadata and surname counts 2026-01-11 13:50:20 +01:00
migrate_entity_to_ppid_v3.py feat(data): add Belgian surnames dataset with metadata and surname counts 2026-01-11 13:50:20 +01:00
migrate_entity_to_ppid_v4.py feat(data): add Belgian surnames dataset with metadata and surname counts 2026-01-11 13:50:20 +01:00
migrate_entity_to_ppid_v5.py enrich person profiles 2026-01-11 18:08:40 +01:00
migrate_geocode_provenance.py Refactor code structure for improved readability and maintainability 2026-01-11 15:27:14 +01:00
migrate_legal_form_to_iso20275.py updated schemata 2025-11-21 22:12:33 +01:00
migrate_provenance_phase1.py enrich custodian profiles 2026-01-02 02:10:18 +01:00
migrate_provenance_phase1_5.py enrich custodian profiles 2026-01-02 02:10:18 +01:00
migrate_qdrant_to_minilm.py add timespans 2025-12-16 09:02:52 +01:00
migrate_qnumber_to_snakecase.py add new entries 2025-12-07 00:26:01 +01:00
migrate_to_schema_v0.2.2_enrichment.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
migrate_wcms_resume.py Implement fast WCMS migration script with state file checkpointing and batch processing 2026-01-11 22:26:37 +01:00
migrate_wcms_users.py enrich person profiles 2026-01-11 18:08:40 +01:00
migrate_web_archives.py fix(scripts): force table recreation in web archives migration 2025-12-07 18:47:46 +01:00
migrate_web_dirs_to_ghcid.py enrich custodian entries with logo 2025-12-27 02:15:17 +01:00
monitor_enrichment.py add isil entries 2025-11-19 23:25:22 +01:00
museum_register_enrichment.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
nde_to_hc_rdf.py validate enrichments 2025-12-02 14:36:01 +01:00
normalize_custodian_files.py enrich entries 2025-12-09 10:46:43 +01:00
normalize_field_names.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
normalize_mexican_cities.py add isil entries 2025-11-19 23:25:22 +01:00
normalize_platform_types.py annotation standards added 2025-12-05 15:30:23 +01:00
normalize_platform_types_fast.py annotation standards added 2025-12-05 15:30:23 +01:00
optimize_agents_md.py enrich HC profiles 2026-01-02 02:11:04 +01:00
owl_to_mermaid.py Add scripts for converting OWL/Turtle ontology to Mermaid and PlantUML diagrams 2025-11-22 23:01:13 +01:00
owl_to_plantuml.py Add scripts for converting OWL/Turtle ontology to Mermaid and PlantUML diagrams 2025-11-22 23:01:13 +01:00
parse_austrian_isil.py add isil entries 2025-11-19 23:25:22 +01:00
parse_custodian_staff.py enrich entries and persons 2025-12-10 18:04:25 +01:00
parse_iso20275_codes.py Add initial versions of custodian hub UML diagrams in Mermaid and PlantUML formats 2025-11-22 14:33:51 +01:00
parse_japanese_isil.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
parse_linkedin_connections.py feat(scripts): Add batch crawling and data quality scripts 2025-12-15 01:47:46 +01:00
parse_linkedin_html.py enrich all custodian timespan 2025-12-15 22:31:41 +01:00
parse_zcbs_list.py update entries 2025-11-30 23:30:29 +01:00
patch_all_missing_derived_from.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
patch_derived_from_fast.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
patch_web_enrichment_provenance.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
patch_wikidata_derived_from.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
patch_wikidata_provenance.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
populate_cache.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
prefill_obvious_errors.py updated schemata 2025-11-21 22:12:33 +01:00
prepare_wikidata_enrichment.py add isil entries 2025-11-19 23:25:22 +01:00
process_entity_to_ppid.py feat(scripts): add entity-to-PPID processing script 2026-01-10 13:58:06 +01:00
query_biblioteca_nacional_z3950.py add isil entries 2025-11-19 23:25:22 +01:00
query_wikidata_chilean_archives.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
query_wikidata_chilean_batch10.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
query_wikidata_chilean_batch11.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
query_wikidata_chilean_libraries.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
query_wikidata_chilean_museums.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
query_wikidata_libraries_batch12.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
query_wikidata_mexican_institutions.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
quick_ducklake_load.py update postgis data 2025-12-10 23:51:51 +01:00
quick_wikidata_search_batch14.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
README_linkedin_fetcher.md moved web archives 2025-12-12 00:40:26 +01:00
reclassify_kien_taste_smell.py annotation standards added 2025-12-05 15:30:23 +01:00
reclassify_non_dutch_pending.py enrich profiles 2026-01-09 20:35:19 +01:00
reenrich_fabricated_web_claims.py enrich entries 2025-12-21 22:12:34 +01:00
reenrich_wikidata_with_verification.py feat(wikidata): add re-enrichment and duplicate removal scripts 2025-12-08 14:59:38 +01:00
reenrich_with_xpath.py enrich entries 2025-12-21 22:12:34 +01:00
regenerate_all.sh Add initial versions of custodian hub UML diagrams in Mermaid and PlantUML formats 2025-11-22 14:33:51 +01:00
regenerate_ghcids_emic_name.py normalise custodian entries 2025-12-09 07:56:35 +01:00
regenerate_historical_ghcids.py update entries 2025-11-30 23:30:29 +01:00
regenerate_manifest.py update frontend 2026-01-08 15:56:28 +01:00
remove_inline_slots.py Refactor code structure for improved readability and maintainability 2026-01-09 11:05:26 +01:00
remove_wikidata_duplicates.py feat(wikidata): add re-enrichment and duplicate removal scripts 2025-12-08 14:59:38 +01:00
rename_ontology_prefixed_slots.py Refactor code structure for improved readability and maintainability 2026-01-09 11:05:26 +01:00
rename_plural_slot.py feat(scripts): improve types-vocab extraction to derive all vocabulary from schema 2026-01-10 15:37:52 +01:00
reprocess_linkup_archives.py add timespans 2025-12-16 09:02:52 +01:00
reprocess_pico_structured.py enrich person custodian 2025-12-14 17:09:55 +01:00
resolve_ar_xx_regions.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
resolve_cities_from_file_coords.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
resolve_cities_wikidata.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
resolve_country_codes.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
resolve_cz_xx_regions.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
resolve_diacritics_collisions.py feat(ghcid): add diacritics normalization and transliteration scripts 2025-12-08 14:59:28 +01:00
resolve_locations_by_name.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
resolve_locations_geonames.py fix: remove custodian files with invalid GHCID special characters 2025-12-07 14:23:50 +01:00
resolve_locations_p131.py fix: remove custodian files with invalid GHCID special characters 2025-12-07 14:23:50 +01:00
resolve_nl_xxx_locations.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
resolve_pending_by_city_name.py enrich profiles 2026-01-09 20:35:19 +01:00
resolve_pending_by_region.py enrich profiles 2026-01-09 20:35:19 +01:00
resolve_pending_comprehensive.py enrich profiles 2026-01-09 20:35:19 +01:00
resolve_pending_known_orgs.py feat(scripts): expand university location mappings and add web enrichment 2026-01-09 21:10:14 +01:00
resolve_pending_locations.py feat(scripts): add resolve_pending_locations.py for GHCID resolution 2026-01-09 12:18:46 +01:00
resolve_pending_wikidata.py add person profiles with PPID 2026-01-09 18:26:58 +01:00
resolve_pending_wikidata_v2.py enrich profiles 2026-01-09 20:35:19 +01:00
resolve_qp_labels.py Refactor code structure for improved readability and maintainability 2025-11-27 17:43:14 +01:00
resolve_regions_from_city.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
resolve_xx_regions.py add new entries 2025-12-07 00:26:01 +01:00
resolve_xx_regions_wikidata.py add new entries 2025-12-07 00:26:01 +01:00
resume_osm_enrichment.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
retry_archive_failures.py improve annotator 2025-12-05 16:25:39 +01:00
retry_japanese_geocoding.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
reverse_geocode_mexican_cities.py add isil entries 2025-11-19 23:25:22 +01:00
revert_belgium_fuzzy.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
revert_incorrect_xxx_enrichment.py add new entries and semantic routing 2025-12-17 10:11:56 +01:00
review_batch11_matches.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
review_linkup_enrichments.py enrich HC profiles 2026-01-02 02:11:04 +01:00
run_linkedin_fetcher.sh enrich person profiles 2025-12-12 12:51:10 +01:00
scan_dutch_data_quality.py feat(scripts): Add batch crawling and data quality scripts 2025-12-15 01:47:46 +01:00
scan_dutch_fast.py feat(scripts): Add batch crawling and data quality scripts 2025-12-15 01:47:46 +01:00
scrape_austrian_isil.py add isil entries 2025-11-19 23:25:22 +01:00
scrape_austrian_isil_batch.py add isil entries 2025-11-19 23:25:22 +01:00
scrape_austrian_isil_complete.py add isil entries 2025-11-19 23:25:22 +01:00
scrape_austrian_isil_mcp.py add isil entries 2025-11-19 23:25:22 +01:00
scrape_austrian_isil_requests.py add isil entries 2025-11-19 23:25:22 +01:00
scrape_austrian_isil_systematic.py add isil entries 2025-11-19 23:25:22 +01:00
scrape_google_maps_reviews.py enrich entries 2025-12-09 10:46:43 +01:00
scrape_kien_profiles_for_locations.py annotation standards added 2025-12-05 15:30:23 +01:00
scrape_museumregister_nl.py update entries 2025-11-30 23:30:29 +01:00
scrape_pages_14_to_20.py add isil entries 2025-11-19 23:25:22 +01:00
search_major_egypt_wikidata.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
setup_ducklake_views.py add new entries 2025-12-07 00:26:01 +01:00
simplify_municipalities_geojson.py update enriched entries 2025-12-03 17:38:46 +01:00
spot_check_fuzzy_matches.py updated schemata 2025-11-21 22:12:33 +01:00
spot_check_fuzzy_matches_fast.py updated schemata 2025-11-21 22:12:33 +01:00
start_api.sh enrich custodians 2025-12-11 22:32:09 +01:00
structuralize_class_descriptions.py Fix LinkML schema for valid RDF generation with proper slot_uri 2026-01-07 13:48:03 +01:00
structuralize_slot_descriptions.py Fix LinkML schema for valid RDF generation with proper slot_uri 2026-01-07 13:48:03 +01:00
summarize_egypt_enrichment.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
sync_all_databases.py enrich person custodian 2025-12-14 17:09:55 +01:00
sync_custodians_to_oxigraph.py feat(data): add Belgian surnames dataset with metadata and surname counts 2026-01-11 13:50:20 +01:00
test_alternative_names.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
test_argentina_wikidata.py add isil entries 2025-11-19 23:25:22 +01:00
test_collision_resolution.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
test_eu_parser.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
test_extended_annotation.py annotation standards added 2025-12-05 15:30:23 +01:00
test_fetch_profiles.py moved web archives 2025-12-12 00:40:26 +01:00
test_google_maps_parser.py enrich entries 2025-12-09 10:46:43 +01:00
test_hybrid_retriever.py edit Japanese entries 2025-12-09 09:16:19 +01:00
test_library_query.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
test_link_extraction.py enrich JP BE AR profiles 2025-12-30 23:07:03 +01:00
test_linkedin_urls.py enrich custodians 2025-12-11 22:32:09 +01:00
test_live_annotation.py annotation standards added 2025-12-05 15:30:23 +01:00
test_llm_annotator.py annotation standards added 2025-12-05 15:30:23 +01:00
test_nationaalarchief_endpoints.py enrich person custodian 2025-12-14 17:09:55 +01:00
test_pico_arabic_waqf.py Add test script for PiCo extraction from Arabic waqf documents 2025-12-12 17:50:17 +01:00
test_pico_batch.py Add test script for PiCo extraction from Arabic waqf documents 2025-12-12 17:50:17 +01:00
test_production_function.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
test_relationship_validation.py annotation standards added 2025-12-05 15:30:23 +01:00
test_schema_annotation.py annotation standards added 2025-12-05 15:30:23 +01:00
test_sousse_enrichment.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
test_sousse_only.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
test_subagent_extraction.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
test_subagent_v5_integration.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
test_unipile_api.py correct HCID! 2025-12-10 13:01:13 +01:00
test_unipile_simple.py correct HCID! 2025-12-10 13:01:13 +01:00
test_v5_extraction.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
test_web_validation.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
test_webclaim_validator.py annotation standards added 2025-12-05 15:30:23 +01:00
test_wikidata_debug.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
test_yaml_integrity.py add isil entries 2025-11-19 23:25:22 +01:00
transform_crawl4ai_to_digital_platform.py feat(scripts): Add batch crawling and data quality scripts 2025-12-15 01:47:46 +01:00
transliterate_emic_names.py feat(ghcid): add diacritics normalization and transliteration scripts 2025-12-08 14:59:28 +01:00
tsconfig.json feat(archief-assistent): integrate ontology-driven vocabulary into semantic cache 2026-01-10 13:30:30 +01:00
unify_all_datasets.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
update_class_slot_references.py feat(scripts): add person enrichment and slot mapping utilities 2026-01-10 13:32:32 +01:00
update_ghcid_with_geonames.py feat(scripts): add city enrichment and location resolution utilities 2025-12-07 14:26:59 +01:00
update_intangible_heritage_enum.py annotation standards added 2025-12-05 15:30:23 +01:00
update_nde_batch_2.py add isil entries 2025-11-19 23:25:22 +01:00
update_nde_batch_3.py add isil entries 2025-11-19 23:25:22 +01:00
update_nde_yaml_with_wikidata_test_batch.py add isil entries 2025-11-19 23:25:22 +01:00
update_slot_mappings.py feat(scripts): add person enrichment and slot mapping utilities 2026-01-10 13:32:32 +01:00
validate_all_instances.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
validate_combined_dataset.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
validate_csv_to_yaml_conversion.py add isil entries 2025-11-19 23:25:22 +01:00
validate_custodian_name.py Add new enums for PersonalCollectionType, ResearchCenterType, and TasteScentHeritage classifications; implement validation script for custodian names against authoritative sources 2025-12-01 18:39:22 +01:00
validate_custodian_schema.py enrich custodians 2025-12-11 22:32:09 +01:00
validate_dutch_extraction.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
validate_enrichment_history_targeted.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
validate_finding_aid_type.py remove a,bihguous web-claims 2025-12-21 00:01:54 +01:00
validate_geocoding_results.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
validate_geographic_restrictions.py feat: Complete Country Class Implementation and Hypernyms Removal 2025-11-23 13:09:38 +01:00
validate_hc_storage_examples.py enrich CH entries with mission statements 2026-01-04 13:12:32 +01:00
validate_japan_dataset.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
validate_kb_libraries_report.py Add script to validate KB library entries and generate enrichment report 2025-11-28 14:48:33 +01:00
validate_organization_names.py update enriched entries 2025-12-03 17:38:46 +01:00
validate_provenance.py add sparql validator and RAG injector 2025-12-30 03:43:31 +01:00
validate_slot_mappings.py enrich person profiles 2026-01-10 14:14:04 +01:00
validate_social_media_links.py validate enrichments 2025-12-02 14:36:01 +01:00
validate_temporal_consistency.py Add scripts for converting OWL/Turtle ontology to Mermaid and PlantUML diagrams 2025-11-22 23:01:13 +01:00
validate_timeline_events.py enrich HC profiles 2026-01-02 02:11:04 +01:00
validate_web_claims.py validate enrichments 2025-12-02 14:36:01 +01:00
validate_wikidata_enrichments.py normalise custodian entries 2025-12-09 07:56:35 +01:00
validate_with_shacl.py Add SHACL validation shapes and validation script for Heritage Custodian Ontology 2025-11-22 23:22:10 +01:00
validate_yaml_instance.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
verify_enrichment_glm.py normalise custodian entries 2025-12-09 07:56:35 +01:00
verify_phase1_enrichment.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
verify_website_links.py extend ontology 2025-12-16 20:27:39 +01:00
verify_youtube_enrichment_glm.py enrich entries and persons 2025-12-10 18:04:25 +01:00
wikidata_type_mapping.py Add comprehensive tests for NLP institution extraction and RDF partnership integration 2025-11-19 23:20:47 +01:00
youtube_only_from_list.py recover location blocks 2025-12-09 11:34:56 +01:00