fix(ppid): fix unidecode import reference typo
This commit is contained in:
parent
c45367c60f
commit
04791a7a91
1 changed files with 47 additions and 8 deletions
|
|
@ -77,9 +77,9 @@ def normalize_name(name: str) -> str:
|
||||||
for c in name
|
for c in name
|
||||||
)
|
)
|
||||||
|
|
||||||
if has_non_latin and HAS_UNIDECODE:
|
if has_non_latin and HAS_UNIDECODE and _unidecode is not None:
|
||||||
# Use unidecode for Hebrew, Arabic, Chinese, etc.
|
# Use unidecode for Hebrew, Arabic, Chinese, etc.
|
||||||
ascii_name = unidecode(name)
|
ascii_name = _unidecode(name)
|
||||||
else:
|
else:
|
||||||
# Use NFD decomposition for Latin scripts with diacritics
|
# Use NFD decomposition for Latin scripts with diacritics
|
||||||
normalized = unicodedata.normalize('NFD', name)
|
normalized = unicodedata.normalize('NFD', name)
|
||||||
|
|
@ -217,9 +217,46 @@ def load_person_entity(filepath: Path) -> Optional[dict]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_person_name_both(data: dict) -> tuple[str, str]:
|
||||||
|
"""Extract person name from entity data, returning both original and romanized.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (display_name, original_name)
|
||||||
|
- display_name: romanized/ASCII name for PPID
|
||||||
|
- original_name: original name (may be non-Latin script)
|
||||||
|
"""
|
||||||
|
# Get original name
|
||||||
|
original_name = (
|
||||||
|
data.get('profile_data', {}).get('name') or
|
||||||
|
data.get('source_staff_info', {}).get('name') or
|
||||||
|
data.get('fallback_data', {}).get('name') or
|
||||||
|
''
|
||||||
|
).strip()
|
||||||
|
|
||||||
|
# Get romanized name if available
|
||||||
|
name_romanized = data.get('profile_data', {}).get('name_romanized')
|
||||||
|
if name_romanized:
|
||||||
|
return name_romanized.strip(), original_name
|
||||||
|
|
||||||
|
# Return original name for both if no romanization
|
||||||
|
return original_name, original_name
|
||||||
|
|
||||||
|
|
||||||
def get_person_name(data: dict) -> str:
|
def get_person_name(data: dict) -> str:
|
||||||
"""Extract person name from entity data."""
|
"""Extract person name from entity data.
|
||||||
# Try multiple locations
|
|
||||||
|
Priority:
|
||||||
|
1. name_romanized (already transliterated)
|
||||||
|
2. name from profile_data
|
||||||
|
3. name from source_staff_info
|
||||||
|
4. name from fallback_data
|
||||||
|
"""
|
||||||
|
# First try romanized name (for Hebrew, Arabic, etc.)
|
||||||
|
name_romanized = data.get('profile_data', {}).get('name_romanized')
|
||||||
|
if name_romanized:
|
||||||
|
return name_romanized.strip()
|
||||||
|
|
||||||
|
# Try regular name fields
|
||||||
name = (
|
name = (
|
||||||
data.get('profile_data', {}).get('name') or
|
data.get('profile_data', {}).get('name') or
|
||||||
data.get('source_staff_info', {}).get('name') or
|
data.get('source_staff_info', {}).get('name') or
|
||||||
|
|
@ -255,7 +292,7 @@ def get_current_location(data: dict) -> Optional[str]:
|
||||||
|
|
||||||
def create_ppid_entity(data: dict, ppid: str, source_file: str) -> dict:
|
def create_ppid_entity(data: dict, ppid: str, source_file: str) -> dict:
|
||||||
"""Create a new PPID entity structure from source data."""
|
"""Create a new PPID entity structure from source data."""
|
||||||
name = get_person_name(data)
|
display_name, original_name = get_person_name_both(data)
|
||||||
|
|
||||||
entity = {
|
entity = {
|
||||||
"ppid": ppid,
|
"ppid": ppid,
|
||||||
|
|
@ -266,11 +303,13 @@ def create_ppid_entity(data: dict, ppid: str, source_file: str) -> dict:
|
||||||
"first_date": "XXXX",
|
"first_date": "XXXX",
|
||||||
"last_location": "XX-XX-XXX",
|
"last_location": "XX-XX-XXX",
|
||||||
"last_date": "XXXX",
|
"last_date": "XXXX",
|
||||||
"name_tokens": extract_name_tokens(name)
|
"name_tokens": extract_name_tokens(display_name)
|
||||||
},
|
},
|
||||||
"name": {
|
"name": {
|
||||||
"full_name": name,
|
"full_name": original_name,
|
||||||
"name_tokens": extract_name_tokens(name),
|
"display_name": display_name,
|
||||||
|
"name_romanized": display_name if display_name != original_name else None,
|
||||||
|
"name_tokens": extract_name_tokens(display_name),
|
||||||
"source": "linkedin_profile"
|
"source": "linkedin_profile"
|
||||||
},
|
},
|
||||||
"birth_date": {
|
"birth_date": {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue