#!/usr/bin/env python3
"""SCPI Data Fixer - Scrapes ideal-investisseur.fr for verified data"""
import json, re, time, sys
from scrapling.fetchers import Fetcher

SHEET_ID = "1dBGv3jIsFDLMJInANzyLvUAF0HYb5iRZ_RQljo-i7XM"

# Mapping of SCPI names to their ideal-investisseur URLs
SCPI_URLS = {
    "Eden": "https://www.ideal-investisseur.fr/scpi-avis/eden-10150.html",
    "Elialys": "https://www.ideal-investisseur.fr/scpi-avis/elialys-advenis-1105.html",
    "Eurovalys": "https://www.ideal-investisseur.fr/scpi-avis/eurovalys-1043.html",
    "Aestiam Agora": "https://www.ideal-investisseur.fr/scpi-avis/aestiam-agora-1028.html",
    "Aestiam Horizon": "https://www.ideal-investisseur.fr/scpi-avis/aestiam-horizon-1071.html",
    "Linaclub": "https://www.ideal-investisseur.fr/scpi-avis/linaclub-10139.html",
    "AEW Commerces Europe": None,  # Need to find
    "AEW Diversification Allemagne": None,
    "AEW Opportunités Europe": None,
    "AEW Patrimoine Santé": "https://www.ideal-investisseur.fr/scpi-avis/aew-patrimoine-sante-10027.html",
    "Atout Pierre Diversification": None,
    "Activimmo": None,
    "Comète": "https://www.ideal-investisseur.fr/scpi-avis/comete-alderan-10045.html",
    "Allianz Pierre": "https://www.ideal-investisseur.fr/scpi-avis/allianz-divercity-10008.html",
    "Alta Convictions": "https://www.ideal-investisseur.fr/scpi-avis/alta-convictions-10048.html",
    "Altixia Cadence XII": "https://www.ideal-investisseur.fr/scpi-avis/altixia-cadence-XII-12-1101.html",
    "Altixia Commerces": "https://www.ideal-investisseur.fr/scpi-avis/altixia-commerces-1102.html",
    "Edissimmo": None,
    "Genepierre": None,
    "Rivoli Avenir Patrimoine": None,
    "MomenTime": "https://www.ideal-investisseur.fr/scpi-avis/MomenTime-10149.html",
    "Transitions Europe": "https://www.ideal-investisseur.fr/scpi-avis/transitions-europe-10040.html",
    "New Gen": None,
    "Epargne Pierre": None,
    "Epargne Pierre Europe": "https://www.ideal-investisseur.fr/scpi-avis/epargne-pierre-europe-10024.html",
    "Epargne Pierre Sophia": "https://www.ideal-investisseur.fr/scpi-avis/epargne-pierre-sophia-10144.html",
    "Atream Hôtels": "https://www.ideal-investisseur.fr/scpi-avis/scpi-atream-hotels-1032.html",
    "Upeka": "https://www.ideal-investisseur.fr/scpi-avis/upeka-10046.html",
    "Accès Valeur Pierre": "https://www.ideal-investisseur.fr/scpi-avis/scpi-acces-valeur-pierre-1029.html",
    "Accimmo Pierre": None,
    "Imarea Pierre": "https://www.ideal-investisseur.fr/scpi-avis/Imarea-Pierre-10151.html",
    "Opus Real": None,
    "Optimale": "https://www.ideal-investisseur.fr/scpi-avis/optimale-consultim-1150.html",
    "Corum Eurion": "https://www.ideal-investisseur.fr/scpi-avis/corum-eurion-10000.html",
    "Corum Origin": "https://www.ideal-investisseur.fr/scpi-avis/scpi-corum-1000.html",
    "Corum USA": None,
    "Corum XL": "https://www.ideal-investisseur.fr/scpi-avis/corum-xl-1097.html",
    "Darwin RE01": "https://www.ideal-investisseur.fr/scpi-avis/darwin-re01-10138.html",
    "Edmond de Rothschild Europa": "https://www.ideal-investisseur.fr/scpi-avis/edr-europa-10133.html",
    "Elevation Tertiom": "https://www.ideal-investisseur.fr/scpi-avis/elevation-tertiom-10137.html",
    "Epsicap Explore": "https://www.ideal-investisseur.fr/scpi-avis/epsicap-explore-10140.html",
    "Epsicap Nano": "https://www.ideal-investisseur.fr/scpi-avis/epsilon-360-10019.html",
    "Euryale Horizons Santé": None,
    "Pierval Santé": "https://www.ideal-investisseur.fr/scpi-avis/pierval-sante-1055.html",
    "Buroboutic Métropoles": None,
    "Ficommerce Proximité": None,
    "Logipierre 3 Résidences Services": "https://www.ideal-investisseur.fr/scpi-avis/logipierre-3-1116.html",
    "Pierre Expansion Santé": None,
    "Selectipierre 2 - Paris": "https://www.ideal-investisseur.fr/scpi-avis/selectipierre-2-1072.html",
    "Cap Foncières et Territoires": "https://www.ideal-investisseur.fr/scpi-avis/cap-foncieres-et-territoire-1045.html",
    "GMA Essentialis": None,
    "Affinités Pierre": None,
    "Attraits Pierre": "https://www.ideal-investisseur.fr/scpi-avis/attraits-pierre-10141.html",
    "Elysées Grand Large": "https://www.ideal-investisseur.fr/scpi-avis/elysee-grand-large-10145.html",
    "Elysées Pierre": None,
    "Cristal Life": "https://www.ideal-investisseur.fr/scpi-avis/cristal-life-10033.html",
    "Cristal Rente": "https://www.ideal-investisseur.fr/scpi-avis/cristal-rente-intergestion-1104.html",
    "Iroko Atlas": "https://www.ideal-investisseur.fr/scpi-avis/iroko-atlas-10136.html",
    "Iroko Zen": "https://www.ideal-investisseur.fr/scpi-avis/iroko-zen-10007.html",
    "Kyaneos Pierre": None,
    "Crédit Mutuel Pierre 1": None,
    "Epargne Foncière": None,
    "LF Avenir Santé": "https://www.ideal-investisseur.fr/scpi-avis/lf-avenir-sante-10038.html",
}

def extract_ideal_investisseur(url):
    """Extract SCPI data from ideal-investisseur.fr page"""
    page = Fetcher.get(url)
    data = {}
    
    # Extract from table cells
    cells = page.css('td')
    for i in range(0, len(cells)-1, 2):
        label = cells[i].text.strip()
        value = cells[i+1].text.strip() if i+1 < len(cells) else ''
        
        if 'Société de gestion' in label:
            data['gestionnaire'] = value
        elif 'Création' in label:
            data['date_creation'] = value
        elif 'Taux de distribution brut' in label:
            data['td'] = value
        elif 'Performance brute globale annuelle' in label:
            data['pga'] = value
        elif 'TRI 10 ans' in label:
            data['tri_10'] = value
        elif 'TRI 15 ans' in label:
            data['tri_15'] = value
        elif 'Prix de souscription' in label:
            data['prix_souscription'] = value
        elif 'Prix de retrait' in label:
            data['prix_retrait'] = value
        elif 'Valeur de réalisation' in label and 'Date' not in label:
            data['val_realisation'] = value
        elif 'Valeur de reconstitution' in label and 'Date' not in label:
            data['val_reconstitution'] = value
        elif 'TOF' in label:
            data['tof'] = value
        elif 'Taux d\'endettement' in label or 'endettement' in label.lower():
            data['endettement'] = value
        elif 'Capitalisation' in label:
            data['capitalisation'] = value
        elif 'Surface totale' in label:
            data['surface'] = value
        elif 'Durée des baux' in label:
            data['duree_baux'] = value
        elif 'Nombre d\'immeubles' in label or "Nombre d'immeubles" in label:
            data['nb_immeubles'] = value
        elif 'Nombre de locataires' in label:
            data['nb_locataires'] = value
        elif 'Nb parts en retrait' in label:
            data['parts_retrait'] = value
        elif 'Classification SFDR' in label:
            data['sfdr'] = value
        elif 'Label ISR' in label:
            data['label_isr'] = value
    
    return data

# Only process rows with errors first
PRIORITY_ROWS = {
    "Eden": True,
    "Elialys": True,
}

results = {}
for scpi_name, url in SCPI_URLS.items():
    if url is None:
        print(f"SKIP {scpi_name} (no URL)")
        continue
    
    try:
        print(f"Fetching {scpi_name}...", end=" ", flush=True)
        data = extract_ideal_investisseur(url)
        results[scpi_name] = data
        print(f"OK ({len(data)} fields)")
        time.sleep(0.5)
    except Exception as e:
        print(f"ERROR: {e}")

print("\n\n=== RESULTS ===")
print(json.dumps(results, indent=2, ensure_ascii=False))
