
    i<                        d Z ddlZddlZddlZddlZddlZddlZdZd Zd Z	d Z	i ddd	d
dddddddddddddddddddddd d!d"d#d$d%dd&d'i d(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIi dJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdki dldmdndodpdqdrdsdtdudvdudwdxdydzd{d|d}d~ddMddddddddddddi ddddddddddddddddddddddddddddddddddi dd~ddddddddddddddddddddddēddƓddȓddʓdd̓ddΓdddddddddלZ
dddddddddddddޜZi Ze
j                         D ]  \  ZZej!                         Zej                         D ]  \  ZZej)                  ee      Z ej)                  dd      j)                  dd      j)                  dd      j)                  dd      Zde de dee<    ej*                  j-                  dd       ddlmZ d Zd Zd Z ed        e       Zi Z ed e e            D ]~  Z!ee!   Z"e!dz   Z e e"      dkD  re"d   jG                         ndZ$e$s0i Z% edd      D ]7  Z&e& e e"      k  re"e&   jG                         ndZ'e'dv s( e(de&z         Z)e&e%e)<   9 e%sxe$e%fee<     ed e e              h dZ*ddhZ+ ed      Z,dZ-dZ.g Z/g Z0g Z1g Z2 e3ej                               D ]T  \  Z\  Z$Z%e%j                         D  ci c]  \  } }| e*v s| | c}} Z4e4se2jk                  e$       Ge$evre/jk                  e$       ]ee$   Z6 ee6e,      Z7e7se0jk                  e$        ee7e4      Z8e4D  cg c]  } | e8v s| e8|    f c} Z9e9se1jk                  e$        ededde$dddd       dZ:e9D ]P  \  Z)Z; e	e) e e;      r' ee) ddd       e:dz  Z: ejx                  d        : ee) de;dd  ddd       R  ede: d e e4       d       e.dz  Z.e-e:z  Z- ejx                  d       W  edd         ed	e. d
e- d        ed e e/       ddj{                  e/               ed e e0       ddj{                  e0               ed e e1       ddj{                  e1               ed e e2       ddj{                  e2dd        d        e>dd      5 Z? ej                  e-e.e/e0e1de?dd       ddd       yc c}} w c c} w # 1 sw Y   yxY w(  u   
SCPI Batch Filler v2 — Better extraction from ideal-investisseur JSON.
Focuses on fillable columns: K, L, M, H, P, Q, X, Y, D, U, T, S, V, W, G, J
    Nz,1dBGv3jIsFDLMJInANzyLvUAF0HYb5iRZ_RQljo-i7XMc                      ddddddt        j                  t        dd      g} t        j                  | d	d	d
      }t        j
                  |j                        j                  dg       S )Ngwssheetsspreadsheetsvaluesget--paramszListing sites!A1:Y111)spreadsheetIdrangeT$/home/shingokuga/.openclaw/workspacecapture_outputtextcwd)jsondumpsSHEET_ID
subprocessrunloadsstdoutr   )cmdrs     /tmp/scpi_batch_v2.py
read_sheetr   
   s^    (NHetzzHG^"_`bCs4d@fgA::ahh##Hb11    c                     ddddddt        j                  t        d|  dd	      d
t        j                  d|  |ggd      g	}t        j                  |ddd      }d|j
                  v S )Nr   r   spreadsettesr   updater	   Listing sites!USER_ENTEREDr
   r   valueInputOption--jsonr   r   Tr   r   updatedCellsr   r   r   r   r   r   	range_strvaluer   r   s       r   
gws_updater+          (NHhtzzHXaWbGc  zH  #I  JTZZN9+*FTYSZR[ \]_C 	s4d@fgAQXX%%r   c                     ddddddt        j                  t        d|  dd	      d
t        j                  d|  |ggd      g	}t        j                  |ddd      }d|j
                  v S )Nr   r   r   r   r   r	   r    r!   r"   r$   r%   Tr   r   r&   r'   r(   s       r   r+   r+      r,   r   Edeni'  Elialysi  	Eurovalysi  zAestiam Agorai  zAestiam Horizoni/  Linaclubi  zAEW Commerces EuropeiJ'  zAEW Diversification Allemagnei)'  u   AEW Opportunités Europei*'  u   AEW Patrimoine Santéi+'  zAtout Pierre Diversificationiz  	Activimmoi'  u   Comètei='  zAllianz Pierrei  zAlta Convictionsi'  zAltixia Cadence XIIzAltixia Commercesi  	Edissimmoi  
Genepierrei5  zRivoli Avenir Patrimoinei  	MomenTimei'  zTransitions Europei8'  zNew Geni%'  zEpargne Pierrei'  zEpargne Pierre Europei'  zEpargne Pierre Sophiai'  u   Atream Hôtelsi  Upekai'  u   Accès Valeur Pierrei  zAccimmo Pierrei  zImarea Pierrei'  z	Opus Reali_  Optimalei'  zCorum Eurioni'  zCorum Origini  z	Corum USAi  zCorum XLiI  zDarwin RE01i'  zEdmond de Rothschild Europai'  zElevation Tertiomi'  zEpsicap Explorei#'  zEpsicap Nanoi2'  u   Euryale Horizons Santéi3'  u   Pierval Santéi  u   Buroboutic Métropolesi'  u   Ficommerce Proximitéi'  u!   Logipierre 3 Résidences Servicesi'  u   Pierre Expansion Santéi'  zSelectipierre 2 - Parisi0  u   Cap Foncières et Territoiresi  zGMA Essentialisi'  u   Affinités Pierrei  zAttraits Pierrei'  u   Elysées Grand Largeiw'  u   Elysées Pierrei  zCristal Lifei1'  zCristal RentezIroko Atlasi'  z	Iroko Zeni'  zKyaneos Pierrei0'  u   Crédit Mutuel Pierre 1i)  u   Epargne Foncièreu   LF Avenir Santéi&  zLF Croissance et Territoiresi'  zLF Europimmoii  zLF Grand Paris PatrimoineiZ  u   LF Opportunité Immoi[  zSelectinvest 1if  u   Foncière Des Praticiensix  zOsmo Energiei{  Reasoni7'  zMy Share Educationi'  zMy Share SCPIiy  zNCap Continenti'  u   NCap Education Santéi'  u   NCap Régionsit  zNovaxia NeoiK  zNovapierre 1i3  z	Paref Evoi'  z
Paref Hexai'  zParef Primai'  zPerial Grand Parisi+  u   Perial Hospitalité Europei.  z	Perial O2i,  u   Perial Opportunités Europei*  u    Perial Opportunités TerritoireszPatrimmo Commercei~'  zPraemia Hotels Europeik  Primopierrei9  Primoviei   zPrincipal Insideil  zRemake Livei!'  zRemake UK 2025ig  zEfimmo 1i'  	Immorentei  Sofiboutiquei''  zSofidy Europe Investi'  Sofidynamici'  
Sofipierrei$'  zCoeur d'AveniriN  zCoeur d'Europei:'  u   Coeur de régionsiO  i<'  i'  in  i'  iR  i4'  iu  i'  )zCoeur de villezEsprit HorizonzESG Pierre Capitaleu   Mistral SélectionzTelamon BoreazLOG INzUrban Coeur CommercezWemo Oneeaiouc)   é   è   ê   ë   â   à   î   ï   ô   û   ü   ç'  -()z,https://www.ideal-investisseur.fr/scpi-avis/z.htmlzc/home/shingokuga/.openclaw/workspace/skills/scrapling-web-scraper/venv/lib/python3.12/site-packages)Fetcherc                    	 |j                  | ddi      }|j                  dk7  s$|j                  rt        |j                        dk  ry|j                  j	                  dd	      }t        j                  d
|t
        j                        }|D ]   }	 t        j                  |      }d|v r|c S " t        |      S #  Y 2xY w# t        $ r}t        d|        Y d}~yd}~ww xY w)z,Fetch page and extract data from JSON blocksz
User-Agentz'Mozilla/5.0 (compatible; Googlebot/2.1))headers   i  Nzutf-8ignore)errorsz7<script[^>]*type="application/json"[^>]*>(.*?)</script>latestz    Error: )r   statusbodylendecoderefindallSr   r   extract_from_html	Exceptionprint)urlfetcherpagehtmlblocksblockdatar?   s           r   fetch_and_extractro   b   s    {{37`(a{b;;#TYY#dii.32Fyy9 VX\^`^b^bc 	Ezz%(t#K $	 !&&	  A3 s=   AC AC B=-C 0C =C?C 	C%C  C%c           	      l   t        j                  dd| t         j                  t         j                  z        }t        j                  dd|t         j                  t         j                  z        }t        j                  dd|t         j                        }t        j                  dd|t         j                        }t        j                  dd	|      }d
D ]  \  }}|j	                  ||      } t        j                  dd	|      }t        j                  dd|      }ddi}ddgddgddgdgdgdgdgdgd}|j                         D ]W  \  }}|D ]M  }	t        j                  |	|t         j                        }
|
s+|
j                  d      j                         ||<    W Y t        |      dkD  r|S dS )zFallback HTML text extractionz<script[^>]*>.*?</script>rR   )flagsz<style[^>]*>.*?</style>z	<br\s*/?>
z%</?(?:p|div|h[1-6]|li|tr|td|th)[^>]*>z<[^>]+>rS   ))z&nbsp;rS   )z&amp;&)z&euro;   €)z&rsquo;rQ   )z&agrave;rJ   )z&eacute;rE   z[ \t]+z\n\s*\n+_sourcerk   z3Taux\s+d'endettement\s*:?\s*([0-9]+[,.]?[0-9]*)\s*%z&endettement\s*:?\s*([0-9]+[,.]?[0-9]*)z"TOF\s*:?\s*([0-9]+[,.]?[0-9]*)\s*%z/taux\s+d'occupation[^:]*:\s*([0-9]+[,.]?[0-9]*)u2   Prix\s+de\s+souscription\s*:?\s*([0-9\s,.]+)\s*€u&   souscription\s*:?\s*([0-9\s,.]+)\s*€z8Taux\s+de\s+distribution[^:]*:\s*([0-9]+[,.]?[0-9]*)\s*%u8   TRI\s+(?:à\s+)?10\s+ans?\s*:?\s*([0-9]+[,.]?[0-9]*)\s*%zG(?:Commission|Frais)\s+de\s+souscription\s*:?\s*([0-9]+[,.]?[0-9]*)\s*%zB(?:Commission|Frais)\s+de\s+gestion\s*:?\s*([0-9]+[,.]?[0-9]*)\s*%uB   Capitalisation\s*:?\s*([0-9]+[,.]?[0-9]*\s*(?:M€|Mds?€|Md€)))taux_endettementtofprix_souscriptiontaux_distribution
tri_10_anscomm_souscriptioncomm_gestioncapitalisation      N)
rb   subrd   Ireplaceitemssearchgroupstripr`   )rk   html2r   oldnewdpatternskeypatspatms              r   re   re   z   s   FF/TbddKEFF-r5RTT	JEFF<uBDD9EFF;T5PRPTPTUE66*c5)Dw &S||C%&66)S$'D66+tT*D	FA TU~57ijSU~YZRShi^_`a	H ^^% 	T 	C		#tRTT*A))+#		 A
1$$r   c                    i }| j                  di       | j                  di       }d| v}fd}|rd|v rI |d      }|r?|dvr;|j                  dd	      }	 t        |      }|d
kD  r|j                  d	d       d|d<   d|v r |d      }|r	|dvr||d<   d|v rI |d      }|r?|dvr;|j                  dd	      }	 t        |      }|d
kD  r|j                  d	d       d|d<   d|v rI |d      }|r?|dvr;|j                  dd	      }	 t        |      }|d
kD  r|j                  d	d       d|d<   d|v rI |d      }|r?|dvr;|j                  dd	      }	 t        |      }|d
kD  r|j                  d	d       d|d<   d|v r' |d      }|r|dk7  r|j                  d	d       d|d<   d|v r& |d      }|r|dvr|j                  d	d       d|d<   d|v r9 |d      }|/|j                         dv rd|d<   n|j                         dv rd |d<   d!|v r |d"      }|r||d!<   |S d#D ][  \  }	}
}|
|v s| j                  |	      }|s |j                  dd	      j                  d$d%      }	 t        |      }|d
kD  r	| | ||
<   ] | j                  d"      }|r	d!|v r||d!<   | j                  d      }|rd|v r| d|d<   |S #  Y GxY w#  Y xY w#  Y xY w#  Y axY w#  Y xY w)&z*Map extracted data to needed sheet columnsr]   scpiru   c                     j                  |       }|t        |      j                         dv ry t        |      j                         S )N)rR   nullNone)r   strr   )r   vr]   s     r   valzmap_data_to_cols.<locals>.val   s:    JJsO9A*>>1v||~r   Krv   )00.00z0,00,.r   %Mparts_en_attente_retrait)r   r   rd   rw   Try   Urz   Vrx   r   rt   Wvaleur_reconstitutionG	label_isr)true1ouiyesOui)falser   nonnoNonJr}   ))rv   r   r   )rw   rd   r   )ry   r   r   )rz   r   r   )r{   X% TTC)r|   Yr   rS   rR   )r   r   floatlower)rn   
empty_colsresultr   is_jsonr   r   v_cleannumr   colsuffixr]   s               @r   map_data_to_colsr      s   FXXh#F88FBDt#G *&'AQ33))C-.CQw)*3)<(=Q&?s *./AQm+s *E
AQ33))C-.CQw)*3)<(=Q&?s
 *'(AQ33))C-.CQw)*3)<(=Q&?s
 *L!AQ33))C-.CQw)*3)<(=Q&?s
 *'(AQ#X!"3!4 5S9s *+,AQ33!"3!4 5S9s *K A}779 ;;"'F3KWWY"=="'F3K *$%AsB M7!
 	Cf j HHSMiiS199#rBG#Gn7-.Cx.F3K	& HH%&
"F3KHH()
"Cs)F3KMo:js<   (K =(K 
(K (K +K#KKKK #K'zReading sheet...r~         )rR   zN/Au   Non trouvérT   A   zRows needing data: >   Dr   Hr   r   Lr   PQrd   r   r   r   r   r   r   NOF)
auto_matchzRow 3dz (35sz): T)endflushu   ✅ gQ?u   ❌(   z) /z
 fillable)g      ?rr   z<============================================================u   🏁 TOTAL: z rows, z cells filledz	
No URL (z, z	No data (z
No match (zSkipped (only N/O gaps) (   z...z/tmp/scpi_batch_v2_results.jsonw)total_cells
total_rowsno_urlno_datano_matchr   )ensure_asciiindent)A__doc__r   rb   r   timesysosr   r   r+   MANUAL_URLSACCENTSURL_MAPr   nameiidr   slugrD   r   r   pathinsert	scraplingrW   ro   re   r   rg   rowsneedsr   r`   rA   rowr   r   emptyjr   chrr   FILLABLE	NEEDS_PDFri   r   r   r   r   r   skippedsortedfillable_emptyappendrh   rn   mappedupdates	row_cellsr*   sleepjoinopenfdump)r   idxs   00r   <module>r      s2
   + * *92&&4
E4d4$/47F4t4'4/Eu4 $U4 -G4 U	4 %CD	4
 4
 "54
 +;D4 4  5d4 4  +D4 3?4 4 '254 %4 "+E4 e4 5e4 U4 %5d4 U4 +D4 3CD4 U4 (4 0:54 E4 *44 2=d4 4 $U4  "5!4  +>u!4" u#4" -e#4$ u%4$ '7%4& e'4& &=e'4( ()4* u+4* '@+4, $T-4, ,=e-4. /4.  1%/40 E140 $5d142 E342 +E344 5544 &u546 e746 7748 948  2494: #E;4: ,:4;4<  =4< (>t=4> d?4> 7?4@ DA4@ #EA4B %C4B "1$C4D eE4D 5eE4F TG4F )$G4F 1?G4H I4H %eI4H .;EI4J $K4J !=dK4L M4L 5dM4N 'O4P Q4P !8Q4R 4S4R $TS4T U4T ,UU4V dW4V 'W4X Y4X &uY4Z E[4Z $1%[4\ %]4\ *4]4^ e_4^ 1$_4` uuU eg4l #3CScsX[adjmsv
w
""$ UID#::<D "1||Aq!"<<R ((c2::3CKKCQSTDB4&#eTGDMU x y 0!%FOd  | 		q#d)	 !A
q'C	AA X\3q6<<>rDE1b\  !CHc!flln"11bd)CE#J	
 %=a! CJ<( ) \#J	
U
#
	

u{{}- #A}e/4{{}P83xc3hPNt7d
$-CS'*DtdN3F-;Mcsf}VC[!MG	D2bc
#
&Bd;I B
UaSk5)SE,Bd3NIDJJtSEeCRj\,"DAB 
Ai[#n-.j
9:!OJ9KDJJtG#J 6(m  ZL}MB C 
3v;-s499V#4"56 7 	#g,s499W#5"67 8 
3x=/TYYx%8$9: ; !#g,s499WRa[3I2J#N O	
+S1 cQDIIkvbiw  A  CD  SX  ab  cc cU Q  N4c cs$   U5U5<	U;	U; V  V	