fix(ppid): fix unidecode import reference typo

This commit is contained in:
kempersc 2026-01-09 18:29:36 +01:00
parent c45367c60f
commit 04791a7a91

View file

@ -77,9 +77,9 @@ def normalize_name(name: str) -> str:
for c in name
)
if has_non_latin and HAS_UNIDECODE:
if has_non_latin and HAS_UNIDECODE and _unidecode is not None:
# Use unidecode for Hebrew, Arabic, Chinese, etc.
ascii_name = unidecode(name)
ascii_name = _unidecode(name)
else:
# Use NFD decomposition for Latin scripts with diacritics
normalized = unicodedata.normalize('NFD', name)
@ -217,9 +217,46 @@ def load_person_entity(filepath: Path) -> Optional[dict]:
return None
def get_person_name_both(data: dict) -> tuple[str, str]:
"""Extract person name from entity data, returning both original and romanized.
Returns:
tuple: (display_name, original_name)
- display_name: romanized/ASCII name for PPID
- original_name: original name (may be non-Latin script)
"""
# Get original name
original_name = (
data.get('profile_data', {}).get('name') or
data.get('source_staff_info', {}).get('name') or
data.get('fallback_data', {}).get('name') or
''
).strip()
# Get romanized name if available
name_romanized = data.get('profile_data', {}).get('name_romanized')
if name_romanized:
return name_romanized.strip(), original_name
# Return original name for both if no romanization
return original_name, original_name
def get_person_name(data: dict) -> str:
"""Extract person name from entity data."""
# Try multiple locations
"""Extract person name from entity data.
Priority:
1. name_romanized (already transliterated)
2. name from profile_data
3. name from source_staff_info
4. name from fallback_data
"""
# First try romanized name (for Hebrew, Arabic, etc.)
name_romanized = data.get('profile_data', {}).get('name_romanized')
if name_romanized:
return name_romanized.strip()
# Try regular name fields
name = (
data.get('profile_data', {}).get('name') or
data.get('source_staff_info', {}).get('name') or
@ -255,7 +292,7 @@ def get_current_location(data: dict) -> Optional[str]:
def create_ppid_entity(data: dict, ppid: str, source_file: str) -> dict:
"""Create a new PPID entity structure from source data."""
name = get_person_name(data)
display_name, original_name = get_person_name_both(data)
entity = {
"ppid": ppid,
@ -266,11 +303,13 @@ def create_ppid_entity(data: dict, ppid: str, source_file: str) -> dict:
"first_date": "XXXX",
"last_location": "XX-XX-XXX",
"last_date": "XXXX",
"name_tokens": extract_name_tokens(name)
"name_tokens": extract_name_tokens(display_name)
},
"name": {
"full_name": name,
"name_tokens": extract_name_tokens(name),
"full_name": original_name,
"display_name": display_name,
"name_romanized": display_name if display_name != original_name else None,
"name_tokens": extract_name_tokens(display_name),
"source": "linkedin_profile"
},
"birth_date": {