data(person): additional person profile enrichments

This commit is contained in:
kempersc 2026-01-11 00:41:59 +01:00
parent 0a888ec682
commit 0df26a6e44
4 changed files with 495 additions and 7 deletions

View file

@ -23,8 +23,21 @@
"source": "linkedin_profile"
},
"birth_date": {
"edtf": "XXXX",
"precision": "unknown"
"edtf": "1963",
"precision": "year",
"provenance": {
"statement_created_at": "2026-01-10T23:40:55.376623+00:00",
"source_archived_at": "2026-01-10T23:40:51.993269+00:00",
"retrieval_agent": "enrich_person_comprehensive.py v1.2.0",
"retrieval_method": "linkup_web_search",
"source_url": "https://rug.academia.edu/Andr%C3%A9vanHolk",
"source_title": "André van Holk | University of Groningen - Academia.edu",
"source_snippet": "André van Holk was born on October 8, 1963, in Hannover, Lower Saxony, Germany. He",
"search_query": "\"André van Holk\" born biography",
"extraction_method": "regex_pattern_matching",
"verified": false,
"verification_status": "machine_extracted"
}
},
"is_living": true,
"heritage_relevance": {
@ -108,6 +121,254 @@
"html_file": "/Volumes/KINGSTON/data/glam/data/custodian/person/affiliated/manual/(23) Rijksmuseum van Oudheden_ People _ LinkedIn.html",
"xpath_match_score": 1.0,
"retrieval_agent": "extract_persons_with_provenance.py"
},
{
"claim_type": "birth_year",
"claim_value": 1963,
"provenance": {
"statement_created_at": "2026-01-10T23:40:55.376623+00:00",
"source_archived_at": "2026-01-10T23:40:51.993269+00:00",
"retrieval_agent": "enrich_person_comprehensive.py v1.2.0",
"retrieval_method": "linkup_web_search",
"api_endpoint": "https://api.linkup.so/v1/search",
"search_query": "\"André van Holk\" born biography",
"search_depth": "standard",
"source_url": "https://rug.academia.edu/Andr%C3%A9vanHolk",
"source_title": "André van Holk | University of Groningen - Academia.edu",
"source_snippet": "André van Holk was born on October 8, 1963, in Hannover, Lower Saxony, Germany. He",
"extraction_method": "regex_pattern_matching",
"pattern_type": "us_date",
"verified": false,
"verification_status": "machine_extracted",
"requires_human_review": true,
"http_status": 200,
"answer_position": "answer[0:82]",
"all_sources": [
{
"url": "https://rug.academia.edu/Andr%C3%A9vanHolk",
"name": "André van Holk | University of Groningen - Academia.edu"
},
{
"url": "https://rug.academia.edu/aVanHolk",
"name": "André F L Van Holk | University of Groningen - Academia.edu"
},
{
"url": "https://scholar.google.com/citations?user=5UIYKewAAAAJ&hl=nl",
"name": "André van Holk"
},
{
"url": "https://www.rug.nl/research/groningen-institute-of-archaeology/about-the-institute/staff/andrevanholk?lang=en",
"name": "Prof. A.(André) F.L. van Holk | About the institute | University of Groningen"
},
{
"url": "https://www.academia.edu/34556245/The_Zuiderzee_the_Netherlands_Highway_fishing_ground_and_power_landscape",
"name": "(PDF) The Zuiderzee (the Netherlands). Highway, fishing ground and power landscape | André van Holk - Academia.edu"
}
],
"source_count": 20,
"answer_content_hash": "1d8f6c63234c966b"
}
},
{
"claim_type": "position",
"claim_value": {
"title": "Professor",
"organization": "Maritime Archaeology at the University of Groningen",
"year": null
},
"provenance": {
"statement_created_at": "2026-01-10T23:41:00.342283+00:00",
"source_archived_at": "2026-01-10T23:40:56.381010+00:00",
"retrieval_agent": "enrich_person_comprehensive.py v1.2.0",
"retrieval_method": "linkup_web_search",
"api_endpoint": "https://api.linkup.so/v1/search",
"search_query": "\"André van Holk\" Professor Maritieme Archeologie education career university",
"search_depth": "standard",
"source_url": "https://rug.academia.edu/aVanHolk",
"source_title": "André F L Van Holk | University of Groningen - Academia.edu",
"source_snippet": "Holk is an endowed Professor of Maritime Archaeology at the University of Groningen, specifically at th",
"extraction_method": "regex_pattern_matching",
"pattern_type": "position",
"verified": false,
"verification_status": "machine_extracted",
"requires_human_review": true,
"http_status": 200,
"answer_position": "answer[15:118]",
"all_sources": [
{
"url": "https://rug.academia.edu/aVanHolk",
"name": "André F L Van Holk | University of Groningen - Academia.edu"
},
{
"url": "https://www.rug.nl/research/groningen-institute-of-archaeology/about-the-institute/staff/andrevanholk?lang=en",
"name": "Prof. A.(André) F.L. van Holk | About the institute | University of Groningen"
},
{
"url": "https://www.rug.nl/research/portal/publications/maritieme-archeologie-van-de-kogge(1d922963-22a4-4ada-b762-89ecb49db429).html",
"name": "Maritieme archeologie van de kogge - Research database - University of Groningen"
},
{
"url": "https://www.academia.edu/34556245/The_Zuiderzee_the_Netherlands_Highway_fishing_ground_and_power_landscape",
"name": "(PDF) The Zuiderzee (the Netherlands). Highway, fishing ground and power landscape | André van Holk - Academia.edu"
},
{
"url": "https://research.rug.nl/en/publications/archeologie-van-de-binnenvaart-wonen-en-werken-aan-boord-van-binn",
"name": "Archeologie van de binnenvaart. Wonen en werken aan boord van binnenvaartschepen (1600-1900) — the University of Groningen research portal"
}
],
"source_count": 20,
"answer_content_hash": "5c3935d6d3e1e026"
}
},
{
"claim_type": "contact_detail",
"claim_value": {
"type": "academia_url",
"value": "https://rug.academia.edu/Andr"
},
"provenance": {
"statement_created_at": "2026-01-10T23:41:27.773332+00:00",
"source_archived_at": "2026-01-10T23:41:11.495059+00:00",
"retrieval_agent": "enrich_person_comprehensive.py v1.2.0",
"retrieval_method": "linkup_web_search",
"api_endpoint": "https://api.linkup.so/v1/search",
"search_query": "\"André van Holk\" researchgate academia.edu google scholar profile",
"search_depth": "standard",
"source_url": "https://scholar.google.com/citations?user=5UIYKewAAAAJ&hl=nl",
"source_title": "André van Holk - Google Scholar",
"source_snippet": "toric studies. \n Links: \n https://rug.academia.edu/Andr%C3%A9vanHolk \n https://rug.",
"extraction_method": "regex_pattern_matching",
"pattern_type": "academia_url",
"verified": false,
"verification_status": "machine_extracted",
"requires_human_review": true,
"http_status": 200,
"answer_position": "answer[692:781]",
"all_sources": [
{
"url": "https://scholar.google.com/citations?user=5UIYKewAAAAJ&hl=nl",
"name": "André van Holk - Google Scholar"
},
{
"url": "https://rug.academia.edu/Andr%C3%A9vanHolk",
"name": "André van Holk | University of Groningen - Academia.edu"
},
{
"url": "https://link.springer.com/article/10.1007/BF02529754",
"name": "The syntax of theslovo-er | Russian Linguistics"
},
{
"url": "https://rug.academia.edu/aVanHolk",
"name": "André F L Van Holk | University of Groningen - Academia.edu"
},
{
"url": "https://link.springer.com/article/10.1007/BF01839411",
"name": "A farewell to L'ubomír Durovič | Russian Linguistics"
}
],
"source_count": 52,
"answer_content_hash": "ef301b9d42117afe"
}
},
{
"claim_type": "contact_detail",
"claim_value": {
"type": "academia_url",
"value": "https://rug.academia.edu/aVanHolk"
},
"provenance": {
"statement_created_at": "2026-01-10T23:41:27.773835+00:00",
"source_archived_at": "2026-01-10T23:41:11.495059+00:00",
"retrieval_agent": "enrich_person_comprehensive.py v1.2.0",
"retrieval_method": "linkup_web_search",
"api_endpoint": "https://api.linkup.so/v1/search",
"search_query": "\"André van Holk\" researchgate academia.edu google scholar profile",
"search_depth": "standard",
"source_url": "https://scholar.google.com/citations?user=5UIYKewAAAAJ&hl=nl",
"source_title": "André van Holk - Google Scholar",
"source_snippet": "mia.edu/Andr%C3%A9vanHolk \n https://rug.academia.edu/aVanHolk\n\n- University of Groningen st",
"extraction_method": "regex_pattern_matching",
"pattern_type": "academia_url",
"verified": false,
"verification_status": "machine_extracted",
"requires_human_review": true,
"http_status": 200,
"answer_position": "answer[739:832]",
"all_sources": [
{
"url": "https://scholar.google.com/citations?user=5UIYKewAAAAJ&hl=nl",
"name": "André van Holk - Google Scholar"
},
{
"url": "https://rug.academia.edu/Andr%C3%A9vanHolk",
"name": "André van Holk | University of Groningen - Academia.edu"
},
{
"url": "https://link.springer.com/article/10.1007/BF02529754",
"name": "The syntax of theslovo-er | Russian Linguistics"
},
{
"url": "https://rug.academia.edu/aVanHolk",
"name": "André F L Van Holk | University of Groningen - Academia.edu"
},
{
"url": "https://link.springer.com/article/10.1007/BF01839411",
"name": "A farewell to L'ubomír Durovič | Russian Linguistics"
}
],
"source_count": 52,
"answer_content_hash": "ef301b9d42117afe"
}
},
{
"claim_type": "contact_detail",
"claim_value": {
"type": "google_scholar_url",
"value": "https://scholar.google.com/citations?user=5UIYKewAAAAJ&hl=nl"
},
"provenance": {
"statement_created_at": "2026-01-10T23:41:27.774002+00:00",
"source_archived_at": "2026-01-10T23:41:11.495059+00:00",
"retrieval_agent": "enrich_person_comprehensive.py v1.2.0",
"retrieval_method": "linkup_web_search",
"api_endpoint": "https://api.linkup.so/v1/search",
"search_query": "\"André van Holk\" researchgate academia.edu google scholar profile",
"search_depth": "standard",
"source_url": "https://scholar.google.com/citations?user=5UIYKewAAAAJ&hl=nl",
"source_title": "André van Holk - Google Scholar",
"source_snippet": "and related fields. \n Link: https://scholar.google.com/citations?user=5UIYKewAAAAJ&hl=nl\n\n- Academia.edu: Has profiles",
"extraction_method": "regex_pattern_matching",
"pattern_type": "google_scholar_url",
"verified": false,
"verification_status": "machine_extracted",
"requires_human_review": true,
"http_status": 200,
"answer_position": "answer[364:484]",
"all_sources": [
{
"url": "https://scholar.google.com/citations?user=5UIYKewAAAAJ&hl=nl",
"name": "André van Holk - Google Scholar"
},
{
"url": "https://rug.academia.edu/Andr%C3%A9vanHolk",
"name": "André van Holk | University of Groningen - Academia.edu"
},
{
"url": "https://link.springer.com/article/10.1007/BF02529754",
"name": "The syntax of theslovo-er | Russian Linguistics"
},
{
"url": "https://rug.academia.edu/aVanHolk",
"name": "André F L Van Holk | University of Groningen - Academia.edu"
},
{
"url": "https://link.springer.com/article/10.1007/BF01839411",
"name": "A farewell to L'ubomír Durovič | Russian Linguistics"
}
],
"source_count": 52,
"answer_content_hash": "ef301b9d42117afe"
}
}
],
"source_observations": [
@ -131,5 +392,21 @@
]
},
"linkedin_slug": "andré-van-holk-770a7630",
"ppid_collision_suffix": "andr__van_holk_770a7630"
"ppid_collision_suffix": "andr__van_holk_770a7630",
"enrichment_history": [
{
"enrichment_timestamp": "2026-01-10T23:40:51.992781+00:00",
"enrichment_agent": "enrich_person_comprehensive.py v1.2.0",
"person_name": "André van Holk",
"context_used": "Professor Maritieme Archeologie",
"searches_performed": [
"\"André van Holk\" born biography",
"\"André van Holk\" Professor Maritieme Archeologie education career university",
"\"André van Holk\" publications awards honors books",
"\"André van Holk\" contact email twitter linkedin orcid profile photo",
"\"André van Holk\" researchgate academia.edu google scholar profile"
],
"data_fabrication_check": "PASSED"
}
]
}

View file

@ -108,6 +108,56 @@
"html_file": "/Volumes/KINGSTON/data/glam/data/custodian/person/affiliated/manual/(30) Museum Ons'_ Lieve Heer op Solder_ People _ LinkedIn.html",
"xpath_match_score": 1.0,
"retrieval_agent": "extract_persons_with_provenance.py"
},
{
"claim_type": "contact_detail",
"claim_value": {
"type": "linkedin_url",
"value": "https://www.linkedin.com/in/birgit-b%C3%BCchner-867aa96"
},
"provenance": {
"statement_created_at": "2026-01-10T23:40:11.115275+00:00",
"source_archived_at": "2026-01-10T23:40:07.498257+00:00",
"retrieval_agent": "enrich_person_comprehensive.py v1.2.0",
"retrieval_method": "linkup_web_search",
"api_endpoint": "https://api.linkup.so/v1/search",
"search_query": "\"Birgit Büchner\" contact email twitter linkedin orcid profile photo",
"search_depth": "standard",
"source_url": "https://www.linkedin.com/in/birgit-b%C3%BCchner-867aa96/",
"source_title": "Birgit Büchner - Director at Museum Ons' Lieve Heer op Solder | LinkedIn",
"source_snippet": ". The LinkedIn profile is at: https://www.linkedin.com/in/birgit-b%C3%BCchner-867aa96/ but no direct contact details",
"extraction_method": "regex_pattern_matching",
"pattern_type": "linkedin_url",
"verified": false,
"verification_status": "machine_extracted",
"requires_human_review": true,
"http_status": 200,
"answer_position": "answer[110:226]",
"all_sources": [
{
"url": "https://www.linkedin.com/in/birgit-b%C3%BCchner-867aa96/",
"name": "Birgit Büchner - Director at Museum Ons' Lieve Heer op Solder | LinkedIn"
},
{
"url": "https://nl.linkedin.com/posts/birgit-b%C3%BCchner-867aa96_ik-ben-erg-blij-met-dit-initiatief-vanuit-activity-7063419090501038080-HrpL",
"name": "Birgit Büchner op LinkedIn: Ik ben erg blij met dit initiatief vanuit ons jongerenprogramma Voices of…"
},
{
"url": "https://au.linkedin.com/posts/birgit-b%C3%BCchner-867aa96_het-was-de-hele-week-nog-nagenieten-de-museumnacht-activity-6997135345909645312-gof-?trk=public_profile_like_view",
"name": "Birgit Büchner on LinkedIn: Het was de hele week nog nagenieten. De Museumnacht Amsterdam beleefde een…"
},
{
"url": "https://orcid.org/signin",
"name": "ORCID"
},
{
"url": "https://support.orcid.org/hc/en-us/articles/360006973833-Add-links-to-personal-websites-to-your-ORCID-record",
"name": "Add links to personal websites to your ORCID record ORCID"
}
],
"source_count": 11,
"answer_content_hash": "7b3d359956b02cd0"
}
}
],
"source_observations": [
@ -130,5 +180,21 @@
"/Users/kempersc/apps/glam/data/custodian/person/entity/birgit-b%C3%BCchner-867aa96_20260109T224531Z.json"
]
},
"linkedin_slug": "birgit-büchner-867aa96"
"linkedin_slug": "birgit-büchner-867aa96",
"enrichment_history": [
{
"enrichment_timestamp": "2026-01-10T23:39:53.751200+00:00",
"enrichment_agent": "enrich_person_comprehensive.py v1.2.0",
"person_name": "Birgit Büchner",
"context_used": "Director at Museum Ons' Lieve Heer op Solder",
"searches_performed": [
"\"Birgit Büchner\" born biography",
"\"Birgit Büchner\" Director at Museum Ons' Lieve Heer op Solder education career university",
"\"Birgit Büchner\" publications awards honors books",
"\"Birgit Büchner\" contact email twitter linkedin orcid profile photo",
"\"Birgit Büchner\" researchgate academia.edu google scholar profile"
],
"data_fabrication_check": "PASSED"
}
]
}

View file

@ -23,8 +23,21 @@
"source": "linkedin_profile"
},
"birth_date": {
"edtf": "XXXX",
"precision": "unknown"
"edtf": "1970",
"precision": "year",
"provenance": {
"statement_created_at": "2026-01-10T23:40:28.951121+00:00",
"source_archived_at": "2026-01-10T23:40:23.786992+00:00",
"retrieval_agent": "enrich_person_comprehensive.py v1.2.0",
"retrieval_method": "linkup_web_search",
"source_url": "https://fr.wikipedia.org/wiki/Fran%C3%A7ois_Bart",
"source_title": "François Bart — Wikipédia",
"source_snippet": "Montaigne. Il est agrégé de géographie (1970) et docteur d'État en géographie (1988),",
"search_query": "\"François Bartique\" born biography",
"extraction_method": "regex_pattern_matching",
"verified": false,
"verification_status": "machine_extracted"
}
},
"is_living": true,
"heritage_relevance": {
@ -108,6 +121,104 @@
"html_file": "/Volumes/KINGSTON/data/glam/data/custodian/person/affiliated/manual/(30) The Museum of Modern Art_ People _ LinkedIn.html",
"xpath_match_score": 1.0,
"retrieval_agent": "extract_persons_with_provenance.py"
},
{
"claim_type": "birth_year",
"claim_value": 1970,
"provenance": {
"statement_created_at": "2026-01-10T23:40:28.951121+00:00",
"source_archived_at": "2026-01-10T23:40:23.786992+00:00",
"retrieval_agent": "enrich_person_comprehensive.py v1.2.0",
"retrieval_method": "linkup_web_search",
"api_endpoint": "https://api.linkup.so/v1/search",
"search_query": "\"François Bartique\" born biography",
"search_depth": "standard",
"source_url": "https://fr.wikipedia.org/wiki/Fran%C3%A7ois_Bart",
"source_title": "François Bart — Wikipédia",
"source_snippet": "Montaigne. Il est agrégé de géographie (1970) et docteur d'État en géographie (1988),",
"extraction_method": "regex_pattern_matching",
"pattern_type": "year_paren",
"verified": false,
"verification_status": "machine_extracted",
"requires_human_review": true,
"http_status": 200,
"answer_position": "answer[263:348]",
"all_sources": [
{
"url": "https://fr.wikipedia.org/wiki/Fran%C3%A7ois_Bart",
"name": "François Bart — Wikipédia"
},
{
"url": "https://rdv-histoire.com/intervenants/francois-bart",
"name": "François BART | Les Rendez-vous de l'histoire"
},
{
"url": "https://www.wikidata.org/wiki/Q33141709",
"name": "François Baratte - Wikidata"
},
{
"url": "https://www.wikidata.org/wiki/Q15967400",
"name": "Philippe-François Bart - Wikidata"
},
{
"url": "https://en.wikipedia.org/wiki/Frédéric_François",
"name": "Frédéric François - Wikipedia"
}
],
"source_count": 20,
"answer_content_hash": "8d7088e8851333d0"
}
},
{
"claim_type": "position",
"claim_value": {
"title": "director",
"organization": "The Museum of Modern Art",
"year": null
},
"provenance": {
"statement_created_at": "2026-01-10T23:40:33.801038+00:00",
"source_archived_at": "2026-01-10T23:40:29.951554+00:00",
"retrieval_agent": "enrich_person_comprehensive.py v1.2.0",
"retrieval_method": "linkup_web_search",
"api_endpoint": "https://api.linkup.so/v1/search",
"search_query": "\"François Bartique\" Freelance artdirector bij The Museum of Modern Art education career university",
"search_depth": "standard",
"source_url": "https://nl.linkedin.com/in/françois-bartique-883b2928a",
"source_title": "François Bartique - Freelance artdirector - The Museum of ...",
"source_snippet": "as a freelance art director at The Museum of Modern Art.",
"extraction_method": "regex_pattern_matching",
"pattern_type": "position",
"verified": false,
"verification_status": "machine_extracted",
"requires_human_review": true,
"http_status": 200,
"answer_position": "answer[111:167]",
"all_sources": [
{
"url": "https://nl.linkedin.com/in/françois-bartique-883b2928a",
"name": "François Bartique - Freelance artdirector - The Museum of ..."
},
{
"url": "https://www.linkedin.com/company/san-francisco-museum-of-modern-art",
"name": "SFMOMA San Francisco Museum of Modern Art | LinkedIn"
},
{
"url": "https://www.linkedin.com/in/meredithlawhead/",
"name": "Meredith Lawhead - The Museum of Modern Art | LinkedIn"
},
{
"url": "https://www.linkedin.com/company/modern-art-museum-of-fort-worth",
"name": "Modern Art Museum of Fort Worth | LinkedIn"
},
{
"url": "https://www.nytimes.com/2018/11/14/arts/design/quentin-bajac-moma-photography.html",
"name": "MoMA Photography Chief Returns to Paris to Direct Museum - The New York Times"
}
],
"source_count": 20,
"answer_content_hash": "b1fcde150413f829"
}
}
],
"source_observations": [
@ -130,5 +241,21 @@
"/Users/kempersc/apps/glam/data/custodian/person/entity/fran%C3%A7ois-bartique-883b2928a_20260109T224539Z.json"
]
},
"linkedin_slug": "françois-bartique-883b2928a"
"linkedin_slug": "françois-bartique-883b2928a",
"enrichment_history": [
{
"enrichment_timestamp": "2026-01-10T23:40:23.786919+00:00",
"enrichment_agent": "enrich_person_comprehensive.py v1.2.0",
"person_name": "François Bartique",
"context_used": "Freelance artdirector bij The Museum of Modern Art",
"searches_performed": [
"\"François Bartique\" born biography",
"\"François Bartique\" Freelance artdirector bij The Museum of Modern Art education career university",
"\"François Bartique\" publications awards honors books",
"\"François Bartique\" contact email twitter linkedin orcid profile photo",
"\"François Bartique\" researchgate academia.edu google scholar profile"
],
"data_fabrication_check": "PASSED"
}
]
}

View file

@ -387,6 +387,24 @@ def enrich_person(name: str, context: str, api_key: str) -> Dict:
for m in extract_media(ans):
enrichment["web_claims"].append(create_claim("media_reference", {"type": m["type"], "value": m["value"]}, url, title, m["snippet"], q4, srcs, meta, ans, m["type"]))
time.sleep(1.0)
# Search 5: Academic Profiles (NEW in v1.2.0)
q5 = f'"{name}" researchgate academia.edu google scholar profile'
r5 = search_linkup(q5, api_key)
enrichment["enrichment_metadata"]["searches_performed"].append(q5)
if "error" not in r5:
ans, srcs = r5.get("answer", ""), r5.get("sources", [])
url, title = (srcs[0].get("url", ""), srcs[0].get("name", "")) if srcs else ("", "")
meta = r5.get("_meta", {})
if ans:
for c in extract_contacts(ans):
# Only add academic profile types from this search
if c["type"] in ["researchgate_url", "academia_url", "google_scholar_url"]:
enrichment["web_claims"].append(create_claim("contact_detail", {"type": c["type"], "value": c["value"]}, url, title, c["snippet"], q5, srcs, meta, ans, c["type"]))
return enrichment