#!/usr/bin/env python3
"""
Pass 8: Extract structured JSON data from ideal-investisseur SCPI pages.
The pages contain <script type="application/json"> with ALL the data we need.
"""
import json, re, subprocess, time, sys
sys.path.insert(0, '/home/shingokuga/.openclaw/workspace/skills/scrapling-web-scraper/venv/lib/python3.12/site-packages')
from scrapling import Fetcher

SHEET_ID = "1dBGv3jIsFDLMJInANzyLvUAF0HYb5iRZ_RQljo-i7XM"

# Complete URL map (manual + sitemap)
URL_MAP = {
    "Eden": "https://www.ideal-investisseur.fr/scpi-avis/eden-10150.html",
    "Elialys": "https://www.ideal-investisseur.fr/scpi-avis/elialys-1003.html",
    "Eurovalys": "https://www.ideal-investisseur.fr/scpi-avis/eurovalys-1002.html",
    "Aestiam Agora": "https://www.ideal-investisseur.fr/scpi-avis/aestiam-agora-1028.html",
    "Aestiam Horizon": "https://www.ideal-investisseur.fr/scpi-avis/aestiam-horizon-1071.html",
    "Linaclub": "https://www.ideal-investisseur.fr/scpi-avis/linaclub-1004.html",
    "AEW Commerces Europe": "https://www.ideal-investisseur.fr/scpi-avis/aew-commerces-europe-10058.html",
    "AEW Diversification Allemagne": "https://www.ideal-investisseur.fr/scpi-avis/aew-diversification-allemagne-10025.html",
    "AEW Opportunités Europe": "https://www.ideal-investisseur.fr/scpi-avis/aew-opportunites-europe-10026.html",
    "AEW Patrimoine Santé": "https://www.ideal-investisseur.fr/scpi-avis/aew-patrimoine-sante-10027.html",
    "Atout Pierre Diversification": "https://www.ideal-investisseur.fr/scpi-avis/atout-pierre-diversification-1146.html",
    "Activimmo": "https://www.ideal-investisseur.fr/scpi-avis/activimmo-10009.html",
    "Comète": "https://www.ideal-investisseur.fr/scpi-avis/comete-alderan-10045.html",
    "Allianz Pierre": "https://www.ideal-investisseur.fr/scpi-avis/allianz-pierre-1030.html",
    "Alta Convictions": "https://www.ideal-investisseur.fr/scpi-avis/alta-convictions-10153.html",
    "Altixia Cadence XII": "https://www.ideal-investisseur.fr/scpi-avis/altixia-cadence-xii-1004.html",
    "Altixia Commerces": "https://www.ideal-investisseur.fr/scpi-avis/altixia-commerces-1006.html",
    "Edissimmo": "https://www.ideal-investisseur.fr/scpi-avis/edissimmo-1054.html",
    "Genepierre": "https://www.ideal-investisseur.fr/scpi-avis/genepierre-1077.html",
    "Rivoli Avenir Patrimoine": "https://www.ideal-investisseur.fr/scpi-avis/scpi-rivoli-avenir-patrimoine-1025.html",
    "MomenTime": "https://www.ideal-investisseur.fr/scpi-avis/MomenTime-10149.html",
    "Transitions Europe": "https://www.ideal-investisseur.fr/scpi-avis/transitions-europe-10040.html",
    "New Gen": "https://www.ideal-investisseur.fr/scpi-avis/newgen-immo-10021.html",
    "Epargne Pierre": "https://www.ideal-investisseur.fr/scpi-avis/epargne-pierre-10014.html",
    "Epargne Pierre Europe": "https://www.ideal-investisseur.fr/scpi-avis/epargne-pierre-europe-10143.html",
    "Epargne Pierre Sophia": "https://www.ideal-investisseur.fr/scpi-avis/epargne-pierre-sophia-10144.html",
    "Atream Hôtels": "https://www.ideal-investisseur.fr/scpi-avis/scpi-atream-hotels-1032.html",
    "Upeka": "https://www.ideal-investisseur.fr/scpi-avis/upeka-10147.html",
    "Accès Valeur Pierre": "https://www.ideal-investisseur.fr/scpi-avis/scpi-acces-valeur-pierre-1029.html",
    "Accimmo Pierre": "https://www.ideal-investisseur.fr/scpi-avis/accimmo-pierre-1050.html",
    "Imarea Pierre": "https://www.ideal-investisseur.fr/scpi-avis/imarea-pierre-10111.html",
    "Opus Real": "https://www.ideal-investisseur.fr/scpi-avis/opus-real-bnp-reim-1119.html",
    "Optimale": "https://www.ideal-investisseur.fr/scpi-avis/optimale-10005.html",
    "Corum Eurion": "https://www.ideal-investisseur.fr/scpi-avis/corum-eurion-10000.html",
    "Corum Origin": "https://www.ideal-investisseur.fr/scpi-avis/scpi-corum-1000.html",
    "Corum USA": "https://www.ideal-investisseur.fr/scpi-avis/corum-usa-1001.html",
    "Corum XL": "https://www.ideal-investisseur.fr/scpi-avis/corum-xl-1097.html",
    "Darwin RE01": "https://www.ideal-investisseur.fr/scpi-avis/darwin-re01-10138.html",
    "Edmond de Rothschild Europa": "https://www.ideal-investisseur.fr/scpi-avis/edr-europa-10133.html",
    "Elevation Tertiom": "https://www.ideal-investisseur.fr/scpi-avis/elevation-tertiom-10137.html",
    "Epsicap Explore": "https://www.ideal-investisseur.fr/scpi-avis/epsilon-360-10019.html",
    "Epsicap Nano": "https://www.ideal-investisseur.fr/scpi-avis/epsicap-nano-10034.html",
    "Euryale Horizons Santé": "https://www.ideal-investisseur.fr/scpi-avis/euryale-horizons-sante-10035.html",
    "Pierval Santé": "https://www.ideal-investisseur.fr/scpi-avis/pierval-sante-1055.html",
    "Buroboutic Métropoles": "https://www.ideal-investisseur.fr/scpi-avis/buroboutic-metropoles-10116.html",
    "Ficommerce Proximité": "https://www.ideal-investisseur.fr/scpi-avis/ficommerce-proximite-10115.html",
    "Logipierre 3 Résidences Services": "https://www.ideal-investisseur.fr/scpi-avis/logipierre-residences-services-10118.html",
    "Pierre Expansion Santé": "https://www.ideal-investisseur.fr/scpi-avis/pierre-expansion-sante-10117.html",
    "Selectipierre 2 - Paris": "https://www.ideal-investisseur.fr/scpi-avis/selectipierre-2-1072.html",
    "Cap Foncières et Territoires": "https://www.ideal-investisseur.fr/scpi-avis/cap-foncieres-et-territoire-1045.html",
    "GMA Essentialis": "https://www.ideal-investisseur.fr/scpi-avis/gma-essentialis-10008.html",
    "Affinités Pierre": "https://www.ideal-investisseur.fr/scpi-avis/scpi-affinites-pierre-1031.html",
    "Attraits Pierre": "https://www.ideal-investisseur.fr/scpi-avis/attraits-pierre-10141.html",
    "Elysées Grand Large": "https://www.ideal-investisseur.fr/scpi-avis/elysees-grand-large-10103.html",
    "Elysées Pierre": "https://www.ideal-investisseur.fr/scpi-avis/elysees-pierre-1041.html",
    "Cristal Life": "https://www.ideal-investisseur.fr/scpi-avis/cristal-life-10033.html",
    "Cristal Rente": "https://www.ideal-investisseur.fr/scpi-avis/cristal-rente-10033.html",
    "Iroko Atlas": "https://www.ideal-investisseur.fr/scpi-avis/iroko-atlas-10006.html",
    "Iroko Zen": "https://www.ideal-investisseur.fr/scpi-avis/iroko-zen-10007.html",
    "Kyaneos Pierre": "https://www.ideal-investisseur.fr/scpi-avis/kyaneos-denormandie-3-10032.html",
    "Crédit Mutuel Pierre 1": "https://www.ideal-investisseur.fr/scpi-avis/credit-mutuel-pierre-1-1065.html",
    "Epargne Foncière": "https://www.ideal-investisseur.fr/scpi-avis/scpi-epargne-fonciere-1001.html",
    "LF Avenir Santé": "https://www.ideal-investisseur.fr/scpi-avis/lf-avenir-sante-1062.html",
    "LF Croissance et Territoires": "https://www.ideal-investisseur.fr/scpi-avis/lf-croissance-et-territoires-10152.html",
    "LF Europimmo": "https://www.ideal-investisseur.fr/scpi-avis/lf-europimmo-1129.html",
    "LF Grand Paris Patrimoine": "https://www.ideal-investisseur.fr/scpi-avis/lf-grand-paris-patrimoine-1114.html",
    "LF Opportunité Immo": "https://www.ideal-investisseur.fr/scpi-avis/lf-opportunite-immo-1115.html",
    "Selectinvest 1": "https://www.ideal-investisseur.fr/scpi-avis/selectinvest-1-1126.html",
    "Foncière Des Praticiens": "https://www.ideal-investisseur.fr/scpi-avis/fonciere-des-praticiens-1144.html",
    "Osmo Energie": "https://www.ideal-investisseur.fr/scpi-avis/osmo-energie-1147.html",
    "Reason": "https://www.ideal-investisseur.fr/scpi-avis/reason-10039.html",
    "My Share Education": "https://www.ideal-investisseur.fr/scpi-avis/my-share-education-10124.html",
    "My Share SCPI": "https://www.ideal-investisseur.fr/scpi-avis/my-share-scpi-1145.html",
    "NCap Continent": "https://www.ideal-investisseur.fr/scpi-avis/ncap-continent-10122.html",
    "NCap Education Santé": "https://www.ideal-investisseur.fr/scpi-avis/ncap-education-sante-10123.html",
    "NCap Régions": "https://www.ideal-investisseur.fr/scpi-avis/ncap-regions-1140.html",
    "Novaxia Neo": "https://www.ideal-investisseur.fr/scpi-avis/novaxia-neo-1099.html",
    "Novapierre 1": "https://www.ideal-investisseur.fr/scpi-avis/novapierre-1075.html",
    "Paref Evo": "https://www.ideal-investisseur.fr/scpi-avis/paref-evo-10162.html",
    "Paref Hexa": "https://www.ideal-investisseur.fr/scpi-avis/paref-hexa-10164.html",
    "Paref Prima": "https://www.ideal-investisseur.fr/scpi-avis/paref-prima-10163.html",
    "Perial Grand Paris": "https://www.ideal-investisseur.fr/scpi-avis/pf-grand-paris-1067.html",
    "Perial Hospitalité Europe": "https://www.ideal-investisseur.fr/scpi-avis/pf-hospitalite-europe-1070.html",
    "Perial O2": "https://www.ideal-investisseur.fr/scpi-avis/pf-o2-1068.html",
    "Perial Opportunités Europe": "https://www.ideal-investisseur.fr/scpi-avis/perial-opportunites-europe-1066.html",
    "Perial Opportunités Territoires": "https://www.ideal-investisseur.fr/scpi-avis/perial-opportunites-territoires-1065.html",
    "Patrimmo Commerce": "https://www.ideal-investisseur.fr/scpi-avis/patrimmo-commerce-10110.html",
    "Praemia Hotels Europe": "https://www.ideal-investisseur.fr/scpi-avis/praemia-hotels-europe-1131.html",
    "Primopierre": "https://www.ideal-investisseur.fr/scpi-avis/primopierre-1081.html",
    "Primovie": "https://www.ideal-investisseur.fr/scpi-avis/scpi-primovie-primonial-1024.html",
    "Principal Inside": "https://www.ideal-investisseur.fr/scpi-avis/principal-inside-1132.html",
    "Remake Live": "https://www.ideal-investisseur.fr/scpi-avis/remake-live-10017.html",
    "Remake UK 2025": "https://www.ideal-investisseur.fr/scpi-avis/remake-uk-1127.html",
    "Efimmo 1": "https://www.ideal-investisseur.fr/scpi-avis/efimmo-1-1063.html",
    "Immorente": "https://www.ideal-investisseur.fr/scpi-avis/scpi-immorente-1026.html",
    "Sofiboutique": "https://www.ideal-investisseur.fr/scpi-avis/sofiboutique-10023.html",
    "Sofidy Europe Invest": "https://www.ideal-investisseur.fr/scpi-avis/sofidy-europe-invest-10129.html",
    "Sofidynamic": "https://www.ideal-investisseur.fr/scpi-avis/sofidynamic-10128.html",
    "Sofipierre": "https://www.ideal-investisseur.fr/scpi-avis/sofipierre-10020.html",
    "Coeur d'Avenir": "https://www.ideal-investisseur.fr/scpi-avis/coeur-d-avenir-1102.html",
    "Coeur d'Europe": "https://www.ideal-investisseur.fr/scpi-avis/coeur-d-europe-sogenial-10042.html",
    "Coeur de régions": "https://www.ideal-investisseur.fr/scpi-avis/sogenial-coeur-de-regions-1103.html",
    "Coeur de ville": "https://www.ideal-investisseur.fr/scpi-avis/coeur-de-ville-10044.html",
    "Esprit Horizon": "https://www.ideal-investisseur.fr/scpi-avis/esprit-horizon-10136.html",
    "ESG Pierre Capitale": "https://www.ideal-investisseur.fr/scpi-avis/esg-pierre-capitale-1134.html",
    "Mistral Sélection": "https://www.ideal-investisseur.fr/scpi-avis/mistral-selection-10130.html",
    "Telamon Borea": "https://www.ideal-investisseur.fr/scpi-avis/borea-1106.html",
    "LOG IN": "https://www.ideal-investisseur.fr/scpi-avis/log-in-theroreim-10036.html",
    "Urban Coeur Commerce": "https://www.ideal-investisseur.fr/scpi-avis/urban-vitalim-3-1141.html",
    "Wemo One": "https://www.ideal-investisseur.fr/scpi-avis/wemo-one-10131.html",
}

def read_sheet():
    cmd = ['gws', 'sheets', 'spreadsheets', 'values', 'get',
           '--params', json.dumps({"spreadsheetId": SHEET_ID, "range": "Listing sites!A1:Y111"})]
    r = subprocess.run(cmd, capture_output=True, text=True, cwd='/home/shingokuga/.openclaw/workspace')
    return json.loads(r.stdout).get('values', [])

def gws_update(range_str, value):
    cmd = ['gws', 'sheets', 'spreadsheets', 'values', 'update',
           '--params', json.dumps({"spreadsheetId": SHEET_ID, "range": f"Listing sites!{range_str}", "valueInputOption": "USER_ENTERED"}),
           '--json', json.dumps({"range": f"Listing sites!{range_str}", "values": [[value]]})]
    r = subprocess.run(cmd, capture_output=True, text=True, cwd='/home/shingokuga/.openclaw/workspace')
    return 'updatedCells' in r.stdout

def extract_json_data(html):
    """Extract JSON data block from ideal-investisseur page"""
    blocks = re.findall(r'<script[^>]*type="application/json"[^>]*>(.*?)</script>', html, re.S)
    for block in blocks:
        try:
            data = json.loads(block)
            if 'latest' in data:
                return data
        except:
            pass
    return None

def json_to_sheet(data):
    """Map JSON fields to sheet columns"""
    d = {}
    latest = data.get('latest', {})
    scpi_data = data.get('scpi', {})
    
    # K: Endettement
    v = latest.get('taux_endettement')
    if v and v != '0' and v != '0.00': d['K'] = str(v) + '%'
    
    # S: TOF
    v = latest.get('tof')
    if v and v != '0' and v != '0.00': d['S'] = str(v) + '%'
    
    # V: Prix souscription
    v = latest.get('prix_souscription')
    if v: d['V'] = str(v) + ' €'
    
    # W: Valeur reconstitution
    v = latest.get('valeur_reconstitution')
    if v and str(v) != '0' and str(v) != '0.00': d['W'] = str(v) + ' €'
    
    # T: TD (taux distribution = performance globale)
    v = latest.get('taux_distribution')
    if v and str(v) != '0' and str(v) != '0.00': d['T'] = str(v) + '%'
    
    # U: TRI 10 ans
    v = latest.get('tri_10_ans')
    if v and str(v) != '0' and str(v) != '0.00': d['U'] = str(v) + '%'
    
    # G: Label ISR
    v = latest.get('label_isr')
    if v is not None:
        d['G'] = 'Oui' if str(v).lower() not in ('null', '', 'non', '0') else 'Non'
    
    # J: Capitalisation
    v = latest.get('capitalisation')
    if v: d['J'] = str(v)
    
    # M: Parts retrait
    v = latest.get('parts_en_attente_retrait')
    if v and str(v) != '0' and str(v) != 'null': d['M'] = str(v)
    
    # Dividende annuel (for context)
    v = latest.get('dividende_annuel')
    
    return d

# Read sheet
rows = read_sheet()
needs = {}
for i in range(1, len(rows)):
    row = rows[i]
    r = i + 1
    scpi = row[1].strip() if len(row) > 1 else ''
    if not scpi: continue
    empty = {}
    for j in range(3, 25):
        val = row[j].strip() if j < len(row) else ''
        if val in ('', 'N/A', 'Non trouvé', '-'):
            col = chr(65+j)
            empty[col] = j
    if empty:
        needs[r] = (scpi, empty)

print(f"Rows needing fixes: {len(needs)}")
print(f"URLs available: {len(URL_MAP)}")

fetcher = Fetcher(auto_match=False)
total_cells = 0
total_rows = 0
no_url = []
no_json = []

for r, (scpi, empty) in sorted(needs.items()):
    if scpi not in URL_MAP:
        no_url.append(scpi)
        continue
    
    url = URL_MAP[scpi]
    try:
        page = fetcher.get(url, headers={'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1)'})
        if page.status != 200 or not page.body or len(page.body) < 500:
            no_json.append(f"{scpi} (HTTP {page.status})")
            continue
        
        html = page.body.decode('utf-8', errors='ignore')
        data = extract_json_data(html)
        if not data or 'latest' not in data:
            no_json.append(f"{scpi} (no JSON)")
            continue
    except Exception as e:
        no_json.append(f"{scpi} (error: {e})")
        continue
    
    mapped = json_to_sheet(data)
    updates = [(col, mapped[col]) for col in empty if col in mapped]
    if not updates:
        no_json.append(f"{scpi} (no matching fields)")
        continue
    
    print(f"Row {r:3d} ({scpi:35s}): ", end="", flush=True)
    row_cells = 0
    for col, value in updates:
        if gws_update(f"{col}{r}", value):
            print(f"{col}✅ ", end="", flush=True)
            row_cells += 1
            time.sleep(0.15)
        else:
            print(f"{col}❌ ", end="", flush=True)
    print(f"({row_cells})")
    total_rows += 1
    total_cells += row_cells
    time.sleep(0.3)

print(f"\n\n🏁 TOTAL: {total_rows} rows, {total_cells} cells filled via JSON extraction")
print(f"\nNo URL ({len(no_url)}): {', '.join(no_url[:10])}")
print(f"No JSON ({len(no_json)}): {', '.join(no_json[:15])}")
