glam/scripts/extract_zwolle_profiles.py
2025-12-11 22:32:09 +01:00

84 lines
No EOL
3.3 KiB
Python

#!/usr/bin/env python3
"""
Simple script to extract LinkedIn profiles using existing working pattern.
"""
import json
import os
import subprocess
from datetime import datetime, timezone
from pathlib import Path
def extract_profile_simple(linkedin_url: str, name: str, output_file: str, source_file: str, staff_id: str) -> bool:
"""Extract LinkedIn profile using existing working pattern."""
print(f"Extracting LinkedIn profile for: {name}")
# Use the exact command pattern that worked before
cmd = [
'python', 'scripts/extract_linkedin_profile_exa.py',
linkedin_url,
output_file,
'--source_file', source_file,
'--staff_id', staff_id
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
if result.returncode == 0:
print(f"✅ Successfully extracted profile for {name}")
return True
else:
print(f"❌ Failed to extract profile for {name}: {result.stderr}")
return False
except Exception as e:
print(f"❌ Exception extracting profile for {name}: {e}")
return False
def main():
"""Main function to extract specific LinkedIn profiles."""
# Define specific profiles to extract from Academiehuis Grote Kerk Zwolle
profiles = [
{
'linkedin_url': 'https://www.linkedin.com/in/anja-van-hoorn-657b66223',
'name': 'Anja van Hoorn',
'output_file': '/Users/kempersc/apps/glam/data/custodian/person/entity/anja-van-hoorn-657b66223_20251210T160000Z.json',
'source_file': '/Users/kempersc/apps/glam/data/custodian/person/affiliated/parsed/academiehuis-grote-kerk-zwolle_staff_20251210T155412Z.json',
'staff_id': 'academiehuis-grote-kerk-zwolle_staff_0001_anja_van_hoorn'
},
{
'linkedin_url': 'https://www.linkedin.com/in/inez-van-kleef',
'name': 'Inez van Kleef',
'output_file': '/Users/kempersc/apps/glam/data/custodian/person/entity/inez-van-kleef_20251210T160000Z.json',
'source_file': '/Users/kempersc/apps/glam/data/custodian/person/affiliated/parsed/academiehuis-grote-kerk-zwolle_staff_20251210T155412Z.json',
'staff_id': 'academiehuis-grote-kerk-zwolle_staff_0002_inez_van_kleef'
},
{
'linkedin_url': 'https://www.linkedin.com/in/marga-edens-a284175',
'name': 'Marga Edens',
'output_file': '/Users/kempersc/apps/glam/data/custodian/person/entity/marga-edens-a284175_20251210T160000Z.json',
'source_file': '/Users/kempersc/apps/glam/data/custodian/person/affiliated/parsed/academiehuis-grote-kerk-zwolle_staff_20251210T155412Z.json',
'staff_id': 'academiehuis-grote-kerk-zwolle_staff_0003_marga_edens'
}
]
success_count = 0
total_cost = 0.0
for profile in profiles:
if extract_profile_simple(**profile):
success_count += 1
total_cost += 0.001
# Delay between requests
import time
time.sleep(3)
print(f"\n📊 Extraction Summary:")
print(f"✅ Successfully processed: {success_count}")
print(f"💰 Total cost: ${total_cost:.3f}")
print(f"📁 Files saved to: /Users/kempersc/apps/glam/data/custodian/person/entity")
if __name__ == "__main__":
main()