#!/usr/bin/env python3 """ Simple script to extract LinkedIn profiles using existing working pattern. """ import json import os import subprocess from datetime import datetime, timezone from pathlib import Path def extract_profile_simple(linkedin_url: str, name: str, output_file: str, source_file: str, staff_id: str) -> bool: """Extract LinkedIn profile using existing working pattern.""" print(f"Extracting LinkedIn profile for: {name}") # Use the exact command pattern that worked before cmd = [ 'python', 'scripts/extract_linkedin_profile_exa.py', linkedin_url, output_file, '--source_file', source_file, '--staff_id', staff_id ] try: result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) if result.returncode == 0: print(f"āœ… Successfully extracted profile for {name}") return True else: print(f"āŒ Failed to extract profile for {name}: {result.stderr}") return False except Exception as e: print(f"āŒ Exception extracting profile for {name}: {e}") return False def main(): """Main function to extract specific LinkedIn profiles.""" # Define specific profiles to extract from Academiehuis Grote Kerk Zwolle profiles = [ { 'linkedin_url': 'https://www.linkedin.com/in/anja-van-hoorn-657b66223', 'name': 'Anja van Hoorn', 'output_file': '/Users/kempersc/apps/glam/data/custodian/person/entity/anja-van-hoorn-657b66223_20251210T160000Z.json', 'source_file': '/Users/kempersc/apps/glam/data/custodian/person/affiliated/parsed/academiehuis-grote-kerk-zwolle_staff_20251210T155412Z.json', 'staff_id': 'academiehuis-grote-kerk-zwolle_staff_0001_anja_van_hoorn' }, { 'linkedin_url': 'https://www.linkedin.com/in/inez-van-kleef', 'name': 'Inez van Kleef', 'output_file': '/Users/kempersc/apps/glam/data/custodian/person/entity/inez-van-kleef_20251210T160000Z.json', 'source_file': '/Users/kempersc/apps/glam/data/custodian/person/affiliated/parsed/academiehuis-grote-kerk-zwolle_staff_20251210T155412Z.json', 'staff_id': 'academiehuis-grote-kerk-zwolle_staff_0002_inez_van_kleef' }, { 'linkedin_url': 'https://www.linkedin.com/in/marga-edens-a284175', 'name': 'Marga Edens', 'output_file': '/Users/kempersc/apps/glam/data/custodian/person/entity/marga-edens-a284175_20251210T160000Z.json', 'source_file': '/Users/kempersc/apps/glam/data/custodian/person/affiliated/parsed/academiehuis-grote-kerk-zwolle_staff_20251210T155412Z.json', 'staff_id': 'academiehuis-grote-kerk-zwolle_staff_0003_marga_edens' } ] success_count = 0 total_cost = 0.0 for profile in profiles: if extract_profile_simple(**profile): success_count += 1 total_cost += 0.001 # Delay between requests import time time.sleep(3) print(f"\nšŸ“Š Extraction Summary:") print(f"āœ… Successfully processed: {success_count}") print(f"šŸ’° Total cost: ${total_cost:.3f}") print(f"šŸ“ Files saved to: /Users/kempersc/apps/glam/data/custodian/person/entity") if __name__ == "__main__": main()