#!/usr/bin/env python3
"""Mass scrape ideal-investisseur.fr for rows 65-111 + remaining gaps in 2-64"""
import json, re, time, sys
from scrapling.fetchers import Fetcher

SHEET_ID = "1dBGv3jIsFDLMJInANzyLvUAF0HYb5iRZ_RQljo-i7XM"

# Build search URL for each SCPI
SCPI_LIST_65_111 = [
    (65, "LF Croissance et Territoires"),
    (66, "LF Europimmo"),
    (67, "LF Grand Paris Patrimoine"),
    (68, "LF Opportunité Immo"),
    (69, "Selectinvest 1"),
    (70, "Foncière Des Praticiens"),
    (71, "Osmo Energie"),
    (72, "Reason"),
    (73, "My Share Education"),
    (74, "My Share SCPI"),
    (75, "NCap Continent"),
    (76, "NCap Education Santé"),
    (77, "NCap Régions"),
    (78, "Novaxia Neo"),
    (79, "Novapierre 1"),
    (80, "Paref Evo"),
    (81, "Paref Hexa"),
    (82, "Paref Prima"),
    (83, "Perial Grand Paris"),
    (84, "Perial Hospitalité Europe"),
    (85, "Perial O2"),
    (86, "Perial Opportunités Europe"),
    (87, "Perial Opportunités Territoires"),
    (88, "Patrimmo Commerce"),
    (89, "Praemia Hotels Europe"),
    (90, "Primopierre"),
    (91, "Primovie"),
    (92, "Principal Inside"),
    (93, "Remake Live"),
    (94, "Remake UK 2025"),
    (95, "Efimmo 1"),
    (96, "Immorente"),
    (97, "Sofiboutique"),
    (98, "Sofidy Europe Invest"),
    (99, "Sofidynamic"),
    (100, "Sofipierre"),
    (101, "Coeur d'Avenir"),
    (102, "Coeur d'Europe"),
    (103, "Coeur de régions"),
    (104, "Coeur de ville"),
    (105, "Esprit Horizon"),
    (106, "ESG Pierre Capitale"),
    (107, "Mistral Sélection"),
    (108, "Telamon Borea"),
    (109, "LOG IN"),
    (110, "Urban Coeur Commerce"),
    (111, "Wemo One"),
]

# Map SCPI names to ideal-investisseur page IDs (from the comparatif page we fetched)
SCPI_II_URLS = {
    "LF Croissance et Territoires": "https://www.ideal-investisseur.fr/scpi-avis/cm-cic-pierre-investissement-1038.html",
    "LF Europimmo": None,
    "LF Grand Paris Patrimoine": None,
    "LF Opportunité Immo": "https://www.ideal-investisseur.fr/scpi-avis/lf-opportunite-immo-1115.html",
    "Selectinvest 1": None,
    "Foncière Des Praticiens": "https://www.ideal-investisseur.fr/scpi-avis/fonciere-des-praticiens-1074.html",
    "Osmo Energie": "https://www.ideal-investisseur.fr/scpi-avis/osmo-energie-10135.html",
    "Reason": "https://www.ideal-investisseur.fr/scpi-avis/reason-10142.html",
    "My Share Education": "https://www.ideal-investisseur.fr/scpi-avis/my-share-education-10124.html",
    "My Share SCPI": "https://www.ideal-investisseur.fr/scpi-avis/my-share-scpi-10125.html",
    "NCap Continent": "https://www.ideal-investisseur.fr/scpi-avis/ncap-continent-10023.html",
    "NCap Education Santé": "https://www.ideal-investisseur.fr/scpi-avis/ncap-education-sante-1108.html",
    "NCap Régions": "https://www.ideal-investisseur.fr/scpi-avis/ncap-regions-1140.html",
    "Novaxia Neo": "https://www.ideal-investisseur.fr/scpi-avis/novaxia-neo-10134.html",
    "Novapierre 1": "https://www.ideal-investisseur.fr/scpi-avis/novapierre-1075.html",
    "Paref Evo": None,
    "Paref Hexa": None,
    "Paref Prima": "https://www.ideal-investisseur.fr/scpi-avis/paref-prima-10163.html",
    "Perial Grand Paris": None,
    "Perial Hospitalité Europe": None,
    "Perial O2": None,
    "Perial Opportunités Europe": None,
    "Perial Opportunités Territoires": "https://www.ideal-investisseur.fr/scpi-avis/perial-opportunites-territoires-10143.html",
    "Patrimmo Commerce": "https://www.ideal-investisseur.fr/scpi-avis/patrimmo-commerce-10144.html",
    "Praemia Hotels Europe": None,
    "Primopierre": "https://www.ideal-investisseur.fr/scpi-avis/primopierre-10148.html",
    "Primovie": "https://www.ideal-investisseur.fr/scpi-avis/primovie-10035.html",
    "Principal Inside": None,
    "Remake Live": "https://www.ideal-investisseur.fr/scpi-avis/remake-live-10017.html",
    "Remake UK 2025": "https://www.ideal-investisseur.fr/scpi-avis/remake-uk-2025-10018.html",
    "Efimmo 1": "https://www.ideal-investisseur.fr/scpi-avis/efimmo-1-10039.html",
    "Immorente": "https://www.ideal-investisseur.fr/scpi-avis/scpi-immorente-1026.html",
    "Sofiboutique": "https://www.ideal-investisseur.fr/scpi-avis/sofiboutique-1110.html",
    "Sofidy Europe Invest": "https://www.ideal-investisseur.fr/scpi-avis/sofidy-europe-invest-10147.html",
    "Sofidynamic": None,
    "Sofipierre": "https://www.ideal-investisseur.fr/scpi-avis/sofipierre-1129.html",
    "Coeur d'Avenir": "https://www.ideal-investisseur.fr/scpi-avis/coeur-d-avenir-10041.html",
    "Coeur d'Europe": "https://www.ideal-investisseur.fr/scpi-avis/coeur-d-europe-sogenial-10042.html",
    "Coeur de régions": "https://www.ideal-investisseur.fr/scpi-avis/sogenial-coeur-de-regions-1103.html",
    "Coeur de ville": "https://www.ideal-investisseur.fr/scpi-avis/coeur-de-ville-1046.html",
    "Esprit Horizon": "https://www.ideal-investisseur.fr/scpi-avis/esprit-horizon-10128.html",
    "ESG Pierre Capitale": "https://www.ideal-investisseur.fr/scpi-avis/esg-pierre-capitale-10021.html",
    "Mistral Sélection": "https://www.ideal-investisseur.fr/scpi-avis/mistral-selection-10130.html",
    "Telamon Borea": "https://www.ideal-investisseur.fr/scpi-avis/telamon-borea-10132.html",
    "LOG IN": "https://www.ideal-investisseur.fr/scpi-avis/log-in-theroreim-10036.html",
    "Urban Coeur Commerce": "https://www.ideal-investisseur.fr/scpi-avis/urban-coeur-commerce-1139.html",
    "Wemo One": "https://www.ideal-investisseur.fr/scpi-avis/wemo-one-10044.html",
}

def extract_data(url):
    """Extract SCPI data from ideal-investisseur page"""
    page = Fetcher.get(url)
    data = {}
    
    cells = page.css('td')
    for i in range(0, len(cells)-1, 2):
        label = cells[i].text.strip()
        value = cells[i+1].text.strip() if i+1 < len(cells) else ''
        
        if not value or value in ['-', '- %', '- €']:
            continue
            
        if 'Création' in label:
            data['date_creation'] = value
        elif 'Taux de distribution brut' in label:
            data['td'] = value
        elif 'Performance brute globale' in label:
            data['pga'] = value
        elif 'TRI 10 ans' in label:
            data['tri_10'] = value
        elif 'Prix de souscription' in label:
            data['prix_souscription'] = value
        elif 'Prix de retrait' in label:
            data['prix_retrait'] = value
        elif 'Valeur de reconstitution' in label and 'Date' not in label:
            data['val_reconstitution'] = value
        elif 'Valeur de réalisation' in label and 'Date' not in label:
            data['val_realisation'] = value
        elif 'TOF' in label:
            data['tof'] = value
        elif 'endettement' in label.lower():
            data['endettement'] = value
        elif 'Capitalisation' in label:
            data['capitalisation'] = value
        elif 'Surface totale' in label:
            data['surface'] = value
        elif 'Durée des baux' in label:
            data['duree_baux'] = value
        elif 'Nombre d' in label and 'meubles' in label.lower():
            data['nb_immeubles'] = value
        elif 'Nombre de locataires' in label:
            data['nb_locataires'] = value
        elif 'Nb parts en retrait' in label:
            data['parts_retrait'] = value
        elif 'Classification SFDR' in label:
            data['sfdr'] = value
        elif 'Label ISR' in label:
            data['label_isr'] = value
    
    # Also try to extract sector info from the page description
    # Look for the cards/summary section
    body = page.css('body')
    if body:
        text = body[0].text
        # Extract geographic and activity sectors from summary cards
        for line in text.split('\n'):
            line = line.strip()
            if not line:
                continue
    
    return data

results = {}
for row_num, scpi_name in SCPI_LIST_65_111:
    url = SCPI_II_URLS.get(scpi_name)
    if url is None:
        print(f"SKIP Row {row_num} ({scpi_name}) — no ideal-investisseur URL")
        results[scpi_name] = {"row": row_num, "source": "none", "data": {}}
        continue
    
    try:
        print(f"Row {row_num} ({scpi_name})...", end=" ", flush=True)
        data = extract_data(url)
        results[scpi_name] = {"row": row_num, "source": "ideal-investisseur", "data": data}
        print(f"OK ({len(data)} fields)")
        time.sleep(0.4)
    except Exception as e:
        print(f"ERROR: {e}")
        results[scpi_name] = {"row": row_num, "source": "error", "data": {}}

# Save results
with open('/tmp/scpi_65_111_results.json', 'w') as f:
    json.dump(results, f, ensure_ascii=False, indent=2)

# Print summary
found = sum(1 for r in results.values() if r['source'] == 'ideal-investisseur' and len(r['data']) > 0)
print(f"\n✅ Scraped {found}/{len(SCPI_LIST_65_111)} SCPI")
