#!/usr/bin/env python3 """ Test the Google Maps snapshot parser with real data from Rijksmuseum. This script demonstrates parsing the Playwright accessibility snapshot to extract structured Google Maps data. """ import json import sys from pathlib import Path # Add scripts to path for imports sys.path.insert(0, str(Path(__file__).parent)) from scrape_google_maps_reviews import ( parse_snapshot_for_reviews, to_json_serializable, GoogleMapsScrapedData, ReviewBreakdown, RelatedPlace, ReviewData, ) # Sample snapshot data extracted from Rijksmuseum Google Maps page # This is a subset of the real accessibility tree for testing SAMPLE_SNAPSHOT = """ - Page URL: https://www.google.com/maps/place/Rijksmuseum/@52.3599976,4.8852188,17z/data=!3m1!4b1!4m6!3m5!1s0x47c609eec1bb16e5:0xd54373ae6a408585!8m2!3d52.3599976!4d4.8852188!16zL20vMDZqc2Y?entry=ttu - Page Title: Rijksmuseum - Google Maps - heading "Rijksmuseum" [level=1] [ref=e131]: Rijksmuseum - img "4.7 stars" [ref=e137] - img "108,805 reviews" [ref=e145]: (108,805) - button "Art museum" [ref=e159] - img "5 stars, 81,221 reviews" [ref=e668]: "5" - img "4 stars, 21,263 reviews" [ref=e671]: "4" - img "3 stars, 4,356 reviews" [ref=e674]: "3" - img "2 stars, 907 reviews" [ref=e677]: "2" - img "1 stars, 1,058 reviews" [ref=e680]: "1" - button "Address: Museumstraat 1, 1071 XX Amsterdam" [ref=e290] - button "Open · Closes 5 pm · See more hours" [ref=e302] - link "Website: rijksmuseum.nl" [ref=e313] - button "Phone: 020 674 7000" [ref=e329] - button "Plus code: 9V5P+X3 Amsterdam" [ref=e345] - img "Currently 28% busy, usually 41% busy." [ref=e487] - radio "rembrandt, mentioned in 2,834 reviews" [ref=e752] - radio "the night watch, mentioned in 1,846 reviews" [ref=e757] - radio "johannes vermeer, mentioned in 1,359 reviews" [ref=e762] - radio "history of the netherlands, mentioned in 527 reviews" [ref=e767] - link "Van Gogh Museum · 4.6 stars · 101,806 reviews · Art museum" [ref=e923] - link "Rembrandt House Museum · 4.5 stars · 14,408 reviews · Art museum" [ref=e935] - link "Anne Frank House · 4.5 stars · 73,577 reviews · Museum" [ref=e947] - link "Stedelijk Museum Amsterdam · 4.4 stars · 14,860 reviews · Art museum" [ref=e959] - generic "Ariel Jewel" [ref=e775]: - button "Ariel Jewel Local Guide · 199 reviews · 682 photos" [ref=e782] - img "5 stars" [ref=e790] - generic [ref=e796]: a week ago - text: The only way I can describe this museum aptly is by comparing it the Louvre. It was so much better than the Louvre in every sense of the word. - generic "Kevin Bourke" [ref=e822]: - button "Kevin Bourke Local Guide · 16 reviews · 15 photos" [ref=e829] - img "4 stars" [ref=e837] - generic [ref=e843]: a month ago - text: Stunning setting on par with the Louvre; goldmine of Dutch Masters but also extensive collections of porcelain, china, silverware, jewelry, firearms, nautical models and other items. - generic "Vidya H" [ref=e866]: - button "Vidya H Local Guide · 258 reviews · 1,274 photos" [ref=e873] - img "5 stars" [ref=e881] - generic [ref=e887]: a month ago - text: The Rijksmuseum is truly massive — a treasure trove filled with everything from exquisite porcelain and jewelry to world-class paintings and sculptures. """ def main(): print("=" * 70) print("Testing Google Maps Snapshot Parser") print("=" * 70) # Parse the sample snapshot data = parse_snapshot_for_reviews(SAMPLE_SNAPSHOT) # Convert to JSON result = to_json_serializable(data) # Print results print("\n📍 Place Information:") print(f" Name: {data.name}") print(f" Rating: {data.rating} ⭐") print(f" Total Reviews: {data.total_reviews:,}") print(f" Address: {data.address}") print(f" Phone: {data.phone}") print(f" Website: {data.website}") print(f" Plus Code: {data.plus_code}") print(f" Hours: {data.hours_text}") print(f" Open Now: {data.is_open_now}") if data.latitude and data.longitude: print(f" Coordinates: {data.latitude}, {data.longitude}") print("\n📊 Review Breakdown:") if data.review_breakdown: print(f" 5 stars: {data.review_breakdown.five_star:,}") print(f" 4 stars: {data.review_breakdown.four_star:,}") print(f" 3 stars: {data.review_breakdown.three_star:,}") print(f" 2 stars: {data.review_breakdown.two_star:,}") print(f" 1 star: {data.review_breakdown.one_star:,}") print("\n💬 Review Topics:") for topic, count in sorted(data.review_topics.items(), key=lambda x: -x[1]): print(f" {topic}: {count:,}") print("\n📈 Live Busyness:") if data.live_busyness: print(f" Currently {data.live_busyness}% busy") print("\n🔗 Related Places:") for place in data.related_places: print(f" {place.name}: {place.rating}⭐ ({place.review_count:,} reviews) - {place.place_type}") print("\n📝 Sample Reviews:") for i, review in enumerate(data.reviews[:3]): print(f"\n Review {i+1}:") print(f" Author: {review.author_name}") print(f" Rating: {review.rating}⭐") print(f" Time: {review.relative_time}") print(f" Local Guide: {review.local_guide}") if review.text: print(f" Text: {review.text[:100]}...") print("\n" + "=" * 70) print("Full JSON Output:") print("=" * 70) print(json.dumps(result, indent=2, ensure_ascii=False)) # Save to file output_path = Path(__file__).parent.parent / "data" / "test" / "rijksmuseum_google_maps.json" output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: json.dump(result, f, indent=2, ensure_ascii=False) print(f"\n✅ Saved to: {output_path}") return 0 if __name__ == "__main__": sys.exit(main())