#!/usr/bin/env python3
"""SCPI Batch Filler v4 — Primaliance with proper slug matching from sitemap."""
import json, re, subprocess, time, sys, urllib.request

SHEET_ID = "1dBGv3jIsFDLMJInANzyLvUAF0HYb5iRZ_RQljo-i7XM"

def read_sheet():
    cmd = ['gws', 'sheets', 'spreadsheets', 'values', 'get',
           '--params', json.dumps({"spreadsheetId": SHEET_ID, "range": "Listing sites!A1:Y111"})]
    r = subprocess.run(cmd, capture_output=True, text=True, cwd='/home/shingokuga/.openclaw/workspace')
    return json.loads(r.stdout).get('values', [])

def gws_update(range_str, value):
    cmd = ['gws', 'sheets', 'spreadsheets', 'values', 'update',
           '--params', json.dumps({"spreadsheetId": SHEET_ID, "range": f"Listing sites!{range_str}", "valueInputOption": "USER_ENTERED"}),
           '--json', json.dumps({"range": f"Listing sites!{range_str}", "values": [[value]]})]
    r = subprocess.run(cmd, capture_output=True, text=True, cwd='/home/shingokuga/.openclaw/workspace')
    return 'updatedCells' in r.stdout

def extract_primaliance(html):
    data = {}
    def find_val(label, max_dist=500):
        idx = html.find(label)
        if idx < 0: return None
        chunk = html[idx:idx+max_dist]
        m = re.search(r'<span class="major-nb">([^<]+)</span>', chunk)
        return m.group(1).strip() if m else None
    
    r = find_val('Frais de souscription')
    if r: data['comm_souscription'] = r
    r = find_val('Frais de gestion')
    if r: data['comm_gestion'] = r
    r = find_val('Délai de jouissance')
    if r: data['delai_jouissance'] = f"{r.replace('.',',')} mois"
    r = find_val('Nombre de parts')
    if r: data['nb_parts'] = r
    r = find_val('Type de capital')
    if r: data['capital_type'] = r.capitalize()
    idx = html.find('endettement')
    if idx > 0:
        chunk = html[idx:idx+500]
        m = re.search(r'<span class="major-nb">([^<]+)</span>', chunk)
        if m and m.group(1).strip() not in ('-', ''): data['taux_endettement'] = m.group(1).strip()
    r = find_val('RAN en jours')
    if r: data['ran_jours'] = r
    m = re.search(r'(?:minimum|à\s+partir\s+de)[^0-9]*(\d[\d\s,.]*?)(?:\s*€|\s*</span>)', html, re.S|re.I)
    if m: data['souscription_min'] = m.group(1).strip().rstrip(',').rstrip('.') + ' €'
    return data

def normalize(name):
    """Normalize SCPI name for matching"""
    n = name.lower().strip()
    for c in 'éèêë': n = n.replace(c, 'e')
    for c in 'âà': n = n.replace(c, 'a')
    for c in 'îï': n = n.replace(c, 'i')
    for c in 'ô': n = n.replace(c, 'o')
    for c in 'ûü': n = n.replace(c, 'u')
    n = n.replace('ç', 'c').replace("'", "").replace(" ", "").replace("-", "").replace("(", "").replace(")", "")
    return n

# Load Primaliance slugs
with open('/tmp/primaliance_slugs.txt') as f:
    slugs = [line.strip() for line in f if line.strip()]

# Build normalized name -> slug map
slug_map = {}
for slug in slugs:
    name_part = re.sub(r'^\d+-scpi-', '', slug)
    norm = normalize(name_part)
    slug_map[norm] = slug

# Manual overrides for tricky matches
MANUAL_MAP = {
    'edissimmo': '55-scpi-edissimmo',
    'epargnefonciere': '49-scpi-epargne-fonciere',
    'genepierre': '80-scpi-genepierre',
    'primovie': '158-scpi-primovie',
    'primopierre': '33-scpi-primopierre',
    'selectinvest1': '73-scpi-selectinvest-1',
    'immorente': '85-scpi-immorente',
    'sofiboutique': '137-scpi-sofiboutique',
    'coeurdeurope': '84-scpi-coeur-d-europe',
    'coeurdavenir': '79-scpi-coeur-d-avenir',
    'coeurderegions': '83-scpi-coeur-de-regions',
    'coeurdeville': '167-scpi-coeur-de-ville',
    'efimmo1': '55-scpi-efimmo-1',
    'lfopportuniteimmo': '159-scpi-lf-opportunite-immo',
    'novapierre1': '107-scpi-novapierre',
    'piervalsante': '92-scpi-pierval-sante',
    'logipierre3residencesservices': '42-scpi-logipierre-3',
    'pierreexpansionsante': '43-scpi-pierre-expansion-sante',
    'selectipierre2paris': '46-scpi-selectipierre-2',
    'lfavensante': '74-scpi-lf-avenir-sante',
    'corumxl': '122-scpi-corum-xl',
    'corumeurion': '81-scpi-corum-eurion',
    'lfgrandparispatrimoine': '78-scpi-lf-grand-paris-patrimoine',
    'lfcroissanceetterritoires': '176-scpi-lf-croissance-et-territoires',
    'lfeuropimmo': '105-scpi-lf-europimmo',
    'parefevo': '175-scpi-paref-evo',
    'parefprima': '174-scpi-paref-prima',
    'parefhexa': '108-scpi-paref-hexa',
    'perialgrandparis': '4-scpi-perial-grand-paris',
    'perialo2': '9-scpi-perial-o2',
    'perialhospitaliteeurope': '103-scpi-perial-hospitalite-europe',
    'perialopportuniteeurope': '8-scpi-perial-opportunites-europe',
    'remakelive': '88-scpi-remake-live',
    'cristalrente': '136-scpi-cristal-rente',
    'cristallife': '161-scpi-cristal-life',
    'mistralselection': '165-scpi-mistral-selection',
    'darwinre01': '160-scpi-darwin-re01',
    'principalinside': '156-scpi-principal-inside',
    'praemiahotelseurope': '162-scpi-praemia-hotel-europe',
    'sofidyeuropeinvest': '139-scpi-sofidy-europe-invest',
    'sofidynamic': '140-scpi-sofidynamic',
    'sofipierre': '82-scpi-sofipierre',
    'patrimmocommerce': '147-scpi-patrimmo-commerce',
    'esprithorizon': '178-scpi-esprit-horizon',
    'esgpierrecapitale': '177-scpi-esg-pierre-capitale',
    'telamonborea': '179-scpi-telamon-borea',
    'urbancoeurcommerce': '173-scpi-urban-coeur-commerce',
    'wemoone': '164-scpi-wemo-one',
    'corumusa': '121-scpi-corum-usa',
    'remakeuk2025': '180-scpi-remake-uk-2025',
    'irokozen': '95-scpi-iroko-zen',
    'irokoatlas': '94-scpi-iroko-atlas',
    'log': '102-scpi-log-in',
    'login': '102-scpi-log-in',
    'ncapcontinent': '153-scpi-ncap-continent',
    'ncapeducationsante': '154-scpi-ncap-education-sante',
    'ncapregions': '155-scpi-ncap-regions',
    'mysharescpi': '170-scpi-my-share-scpi',
    'myshareeducation': '169-scpi-my-share-education',
    'edmondderothschildeuropa': '128-scpi-edmond-de-rothschild-europa',
    'kyaneospierre': '97-scpi-kyaneos-pierre',
    'euryalehorizonssante': '166-scpi-euryale-horizons-sante',
    'epsicapexplore': '126-scpi-epsicap-explore',
    'epsicapnano': '127-scpi-epsicap-nano',
    'burobouticmetropoles': '106-scpi-buroboutic',
    'ficommerceproximite': '37-scpi-ficommerce',
    'optimale': '100-scpi-optimale',
    'reason': '99-scpi-reason',
    'osmoenergie': '117-scpi-osmo-energie',
    'foncieredespraticiens': '129-scpi-fonciere-des-praticiens',
    'novaxianeo': '138-scpi-novaxia-neo',
    'aestiamhorizon': '170-scpi-aestiam-horizon',
    'perialopportunitesterritoires': '171-scpi-perial-opportunites-territoires',
    'activimmo': '113-scpi-activimmo',
    'transitionsurope': '316-scpi-transitions-europe',
    'aewdiversificationallemagne': '150-scpi-aew-diversification-allemagne',
    'aewopportuniteeurope': '13-scpi-aew-commerces-europe',
    'aewpatrimoinesante': '14-scpi-aew-patrimoine-sante',
}
slug_map.update(MANUAL_MAP)

# Read sheet
print("Reading sheet...")
rows = read_sheet()

needs = {}
for i in range(1, len(rows)):
    row = rows[i]
    r = i + 1
    scpi = row[1].strip() if len(row) > 1 else ''
    if not scpi: continue
    empty = {}
    for j in range(3, 25):
        val = row[j].strip() if j < len(row) else ''
        if val in ('', 'N/A', 'Non trouvé', '-'):
            col = chr(65+j)
            if col not in {'N', 'O'}:  # Skip RAN/PGE
                empty[col] = j
    if empty:
        needs[r] = (scpi, empty)

print(f"Rows needing data (excl N/O): {len(needs)}")

# Match SCPI names to Primaliance slugs
unmatched = []
total_cells = 0
total_rows = 0

for r, (scpi, empty) in sorted(needs.items()):
    norm = normalize(scpi)
    slug = slug_map.get(norm)
    
    if not slug:
        # Try partial match
        for s_norm, s_slug in slug_map.items():
            if norm in s_norm or s_norm in norm:
                slug = s_slug
                break
    
    if not slug:
        unmatched.append(scpi)
        continue
    
    url = f"https://www.primaliance.com/scpi-de-rendement/{slug}"
    
    try:
        req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36'})
        resp = urllib.request.urlopen(req, timeout=10)
        if resp.status != 200: continue
        html = resp.read().decode('utf-8', errors='ignore')
    except:
        continue
    
    data = extract_primaliance(html)
    if not data: continue
    
    col_updates = []
    if 'X' in empty and 'comm_souscription' in data:
        col_updates.append(('X', f"{data['comm_souscription']}% TTC"))
    if 'Y' in empty and 'comm_gestion' in data:
        col_updates.append(('Y', f"{data['comm_gestion']}% TTC"))
    if 'Q' in empty and 'delai_jouissance' in data:
        col_updates.append(('Q', data['delai_jouissance']))
    if 'L' in empty and 'nb_parts' in data:
        col_updates.append(('L', data['nb_parts']))
    if 'D' in empty and 'capital_type' in data:
        col_updates.append(('D', data['capital_type']))
    if 'K' in empty and 'taux_endettement' in data:
        col_updates.append(('K', f"{data['taux_endettement']}%"))
    if 'H' in empty and 'souscription_min' in data:
        col_updates.append(('H', data['souscription_min']))
    
    if not col_updates: continue
    
    print(f"Row {r:3d} ({scpi:35s}): ", end="", flush=True)
    row_cells = 0
    for col, value in col_updates:
        if gws_update(f"{col}{r}", value):
            print(f"{col}✅ ", end="", flush=True)
            row_cells += 1
            total_cells += 1
        else:
            print(f"{col}❌ ", end="", flush=True)
        time.sleep(0.12)
    print(f"({row_cells})")
    total_rows += 1
    time.sleep(0.3)

print(f"\n{'='*60}")
print(f"🏁 TOTAL: {total_rows} rows, {total_cells} cells filled")
if unmatched:
    print(f"\nUnmatched ({len(unmatched)}): {', '.join(unmatched)}")
