
    i8              
       8   d Z ddlZddlZddlZddlZddlZddlZdZd Zd Z	 e
d       ddlmZmZ 	  edd	d
i      Z eed      Zej!                         j#                  d      Z ej&                  de      Z e
d ee       d       i dddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1dd2d3i d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUi dVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwi dxdydzd{d|d}d~dddddddddddddddYddddddddddddi ddddddddddddddddddddddddddddddddddi ddddddddddddēddƓddȓddʓdd̓ddΓddГddғddԓdd֓ddؓddړdddddddddZi Zej5                         D ]  \  ZZej;                         ZdD ]2  ZejA                  eedv rdnedv rdnedv rdnedv rdnedv rdnd      Z4 ejA                  dd      jA                  dd      jA                  dd      jA                  dd      Zde de dee<    ejB                  jE                  dd       ddl#m$Z$ d Z%d Z&d Z' e       Z(i Z) e*d ee(            D ]  Z+e(e+   Z,e+dz   Z- ee,      dkD  re,d   j]                         ndZ/e/s0i Z0 e*dd      D ]9  Z1e1 ee,      k  re,e1   j]                         ndZ2e2d v s) e3de1z         Z4e1e0e4<   ; e0sze/e0fe)e-<     e
d ee)               e
d ee               e$d      Z5dZ6dZ7i Z8 e9e)j5                               D ]  \  Z-\  Z/Z0e/evrde8e/<   ee/   Z: e%e:e5      Z;e;sde8e/<   + e'e;      Z<e0D  cg c]  } | e<v s| e<|    f c} Z=e=sde8e/<   V e
d	e-d
de/dddd       dZ>e=D ]M  \  Z4Z? e	e4 e- e?      r* e
e4 ddd       e>dz  Z> ej                  d       = e
e4 ddd       O  e
de> d       e7dz  Z7e6e>z  Z6e> de8e/<    ej                  d         e
dd         e
de7 de6 d       e8j5                         D cg c]  \  }}|dk(  s| c}}ZAe8j5                         D cg c]  \  }}|dk(  s| c}}ZBe8j5                         D cg c]  \  }}|dk(  s| c}}ZC e
d eeA       ddj                  eAdd                e
d eeB       ddj                  eBdd                e
d eeC       ddj                  eCdd                eEdd       5 ZF ej                  e8eFdd!"       ddd       y# e$ rZg Z e
de        Y dZ[cdZ[ww xY wc c} w c c}}w c c}}w c c}}w # 1 sw Y   yxY w(#  u,  
SCPI Batch Filler — Master script for scpi-scraper agent.
Uses ideal-investisseur JSON extraction + commentbieninvestir as fallback.
Processes ALL rows with gaps.

KEY DISCOVERY: ideal-investisseur pages contain structured JSON in 
<script type="application/json"> blocks with ALL financial data.
    Nz,1dBGv3jIsFDLMJInANzyLvUAF0HYb5iRZ_RQljo-i7XMc                      ddddddt        j                  t        dd      g} t        j                  | d	d	d
      }t        j
                  |j                        j                  dg       S )Ngwssheetsspreadsheetsvaluesget--paramszListing sites!A1:Y111)spreadsheetIdrangeT$/home/shingokuga/.openclaw/workspacecapture_outputtextcwd)jsondumpsSHEET_ID
subprocessrunloadsstdoutr   )cmdrs     /tmp/scpi_agent_batch.py
read_sheetr      s^    (NHetzzHG^"_`bCs4d@fgA::ahh##Hb11    c                     ddddddt        j                  t        d|  dd	      d
t        j                  d|  |ggd      g	}t        j                  |ddd      }d|j
                  v S )Nr   r   r   r   updater	   zListing sites!USER_ENTERED)r
   r   valueInputOptionz--json)r   r   Tr   r   updatedCells)r   r   r   r   r   r   )	range_strvaluer   r   s       r   
gws_updater$      s    (NHhtzzHXaWbGc  zH  #I  JTZZN9+*FTYSZR[ \]_C 	s4d@fgAQXX%%r   z Building URL map from sitemap...)urlopenRequestz-https://www.ideal-investisseur.fr/sitemap.xml
User-AgentzMozilla/5.0headers   )timeoututf-8z@<loc>(https://www\.ideal-investisseur\.fr/scpi-avis/[^<]+)</loc>z  Found z URLs in sitemapz  Sitemap error: Edeni'  Elialysi  	Eurovalysi  zAestiam Agorai  zAestiam Horizoni/  Linaclubi  zAEW Commerces EuropeiJ'  zAEW Diversification Allemagnei)'  u   AEW Opportunités Europei*'  u   AEW Patrimoine Santéi+'  zAtout Pierre Diversificationiz  	Activimmoi'  u   Comètei='  zAllianz Pierrei  zAlta Convictionsi'  zAltixia Cadence XIIzAltixia Commercesi  	Edissimmoi  
Genepierrei5  zRivoli Avenir Patrimoinei  	MomenTimei'  zTransitions Europei8'  zNew Geni%'  zEpargne Pierrei'  zEpargne Pierre Europei'  zEpargne Pierre Sophiai'  u   Atream Hôtelsi  Upekai'  u   Accès Valeur Pierrei  zAccimmo Pierrei  zImarea Pierrei'  z	Opus Reali_  Optimalei'  zCorum Eurioni'  zCorum Origini  z	Corum USAi  zCorum XLiI  zDarwin RE01i'  zEdmond de Rothschild Europai'  zElevation Tertiomi'  zEpsicap Explorei#'  zEpsicap Nanoi2'  u   Euryale Horizons Santéi3'  u   Pierval Santéi  u   Buroboutic Métropolesi'  u   Ficommerce Proximitéi'  u!   Logipierre 3 Résidences Servicesi'  u   Pierre Expansion Santéi'  zSelectipierre 2 - Parisi0  u   Cap Foncières et Territoiresi  zGMA Essentialisi'  u   Affinités Pierrei  zAttraits Pierrei'  u   Elysées Grand Largeiw'  u   Elysées Pierrei  zCristal Lifei1'  zCristal RentezIroko Atlasi'  z	Iroko Zeni'  zKyaneos Pierrei0'  u   Crédit Mutuel Pierre 1i)  u   Epargne Foncièreu   LF Avenir Santéi&  zLF Croissance et Territoiresi'  zLF Europimmoii  zLF Grand Paris PatrimoineiZ  u   LF Opportunité Immoi[  zSelectinvest 1if  u   Foncière Des Praticiensix  zOsmo Energiei{  Reasoni7'  zMy Share Educationi'  zMy Share SCPIiy  zNCap Continenti'  u   NCap Education Santéi'  u   NCap Régionsit  zNovaxia NeoiK  zNovapierre 1i3  z	Paref Evoi'  z
Paref Hexai'  zParef Primai'  zPerial Grand Parisi+  u   Perial Hospitalité Europei.  z	Perial O2i,  u   Perial Opportunités Europei*  u    Perial Opportunités TerritoireszPatrimmo Commercei~'  zPraemia Hotels Europeik  Primopierrei9  Primoviei   zPrincipal Insideil  zRemake Livei!'  zRemake UK 2025ig  zEfimmo 1i'  	Immorentei  Sofiboutiquei''  zSofidy Europe Investi'  Sofidynamici'  
Sofipierrei$'  zCoeur d'AveniriN  zCoeur d'Europei:'  u   Coeur de régionsiO  i<'  i'  in  i'  iR  i4'  iu  i'  )zCoeur de villezEsprit HorizonzESG Pierre Capitaleu   Mistral SélectionzTelamon BoreazLOG INzUrban Coeur CommercezWemo Oneu   éèêëâàîïôûüçu   éèêëeu   âàau   îïi   ôou   ûüuc'  -()z,https://www.ideal-investisseur.fr/scpi-avis/z.htmlzc/home/shingokuga/.openclaw/workspace/skills/scrapling-web-scraper/venv/lib/python3.12/site-packages)Fetcherc                    	 |j                  | ddi      }|j                  dk7  s$|j                  rt        |j                        dk  ry|j                  j	                  dd	      }t        j                  d
|t
        j                        }|D ]   }	 t        j                  |      }d|v r|c S " t        |      S #  Y 2xY w#  Y yxY w)z&Fetch page and extract JSON data blockr'   z'Mozilla/5.0 (compatible; Googlebot/2.1)r(      i  Nr,   ignore)errorsz7<script[^>]*type="application/json"[^>]*>(.*?)</script>latest)r   statusbodylendecoderefindallSr   r   extract_from_html)urlfetcherpagehtmlblocksblockdatas          r   
fetch_jsonr`   l   s    {{37`(a{b;;#TYY#dii.32Fyy9VX\^`^b^bc 	Ezz%(t#K $	 !&&	s1   AC AC B=-C 0C =C?C Cc                 "   t        j                  dd| t         j                  t         j                  z        } t        j                  dd| t         j                  t         j                  z        } t        j                  dd| t         j                        } t        j                  dd| t         j                        } t        j                  dd	|       }d
D ]  \  }}|j	                  ||      } t        j                  dd	|      }t        j                  dd|      }ddi}t        j
                  d|t         j                        }|r"|j                  d      j                         |d<   t        j
                  d|t         j                        }|r|j                  d      |d<   t        j
                  d|t         j                        }|r|j                  d      |d<   t        j
                  d|t         j                        }|rB|j                  d      j                         j	                  dd      j	                  d	d      |d<   t        j
                  d|t         j                        }|rB|j                  d      j                         j	                  dd      j	                  d	d      |d<   t        j
                  d|t         j                        }|r|j                  d      |d<   t        j
                  d|t         j                        }|r|j                  d      |d<   t        j
                  d|t         j                        rd |d!<   n*t        j
                  d"|t         j                        rd#|d!<   t        j
                  d$|t         j                        }|r)|j                  d      d	z   |j                  d%      z   |d&<   t        j
                  d'|t         j                        }|r2|j                  d      j                         j	                  d	d      |d(<   t        j
                  d)|t         j                        }|r|j                  d      |d*<   t        j
                  d+|t         j                        }|r|j                  d      |d,<   t        j
                  d-|t         j                        }|r|j                  d      d.z   |d/<   t        j
                  d0|t         j                        }|r"|j                  d      j                         |d1<   t        j
                  d2|t         j                        }|r2|j                  d      j                         j	                  d	d      |d3<   t        |      d%kD  r|S d4S )5z Fallback: extract from HTML textz<script[^>]*>.*?</script>rF   )flagsz<style[^>]*>.*?</style>z	<br\s*/?>
z%</?(?:p|div|h[1-6]|li|tr|td|th)[^>]*>z<[^>]+>rG   ))z&nbsp;rG   )z&amp;&)z&euro;u   €)z&rsquo;rE   z[ \t]+z\n\s*\n+_sourcer\   zCapital\s+(fixe|variable)   capital_typez(Taux\s+d'endettement\s+(\d+[,.]?\d*)\s*%taux_endettementz\bTOF\s+(\d+[,.]?\d*)\s*%tofu+   Prix\s+de\s+souscription\s+(\d[\d\s,.]*€)    €prix_souscriptionu/   Valeur\s+de\s+reconstitution\s+(\d[\d\s,.]*€)valeur_reconstitutionuK   Taux\s+de\s+distribution\s*(?:\d{4})?\s*:?\s*[−–-]?\s*(\d+[,.]?\d*)\s*%taux_distributionu?   TRI\s+(?:à\s+)?10\s+ans?\s*:?\s*[−–-]?\s*(\d+[,.]?\d*)\s*%
tri_10_anszLabel\s+ISR\s*(Non|No)F	label_isrzLabel\s+ISRTu/   Capitalisation\s+(\d+[,.]?\d*)\s*(M€|Mds?€)   capitalisationz7(\d[\d\s]*)\s*parts?\s*en\s*(?:attente\s*de\s*)?retraitparts_en_attente_retraitz<(?:Commission|Frais)\s+de\s+souscription\s+(\d+[,.]?\d*)\s*%comm_souscriptionz7(?:Commission|Frais)\s+de\s+gestion\s+(\d+[,.]?\d*)\s*%comm_gestionu7   (?:Délai|Durée)\s+de\s+jouissance\s*:?\s*(\d+)\s*moisz moisdelai_jouissanceuD   (?:Souscription|Investissement)\s+minimum\s*:?\s*(\d[\d\s,.]*\s*€)souscription_minu<   (?:Nombre\s+de\s+parts|Parts?\s+émises?)\s*:?\s*(\d[\d\s]*)nb_partsN)
rU   subrW   Ireplacesearchgroup
capitalizestriprS   )r\   r   oldnewdms         r   rX   rX      sB   66.DRTT	JD66,b$bdd244iHD66,d"$$7D66:D$bddSD66*c4(DS &S||C%&66)S$'D66+tT*D	FA
		.bdd;Aaggaj335!N

		=tRTTJA!''!*!

		.bdd;AQWWQZ!E(
		@$MA1771:#3#3#5#=#=fR#H#P#PQTUW#X!
 
		DdBDDQAqwwqz'7'7'9'A'A&'L'T'TUXY['\!#
$
		`bfhjhlhlmA1771:!
 
		TVZ\^\`\`aAAGGAJ!L/	yy*D"$$7%;	>4	.+
		DdBDDQA
S 01771: =!

		LdTVTXTXYA!''!**:*:*<*D*DS*L!&
'
		QSWY[Y]Y]^A1771:!
 
		LdTVTXTXYAaggaj!N

		LdTVTXTXYA!''!*w"6!

		Y[_acaeaefA!''!*"2"2"4!

		QSWY[Y]Y]^A!''!***,44S<!J-A
1$$r   c                    i }| j                  di       }d| v}|r|j                  d      }|rt        |      dvrt        |      dz   |d<   |j                  d      }|rt        |      dvrt        |      dz   |d<   |j                  d	      }|rt        |      d
k7  rt        |      dz   |d<   |j                  d      }|rt        |      dvrt        |      dz   |d<   |j                  d      }|rt        |      dvrt        |      dz   |d<   |j                  d      }|rt        |      dvrt        |      dz   |d<   |j                  d      }|"t        |      j                         dvrdnd|d<   |j                  d      }|rt        |      |d<   |j                  d      }|rt        |      dvrt        |      |d<   |j                  d      xs |j                  d      }|rt        |      dvrt        |      |d<   |j                  d       }|rt        |      d!z   |d"<   |j                  d#      }|rt        |      d!z   |d$<   |j                  d%      }|rt        |      |d&<   |j                  d'      }|rt        |      |d(<   |j                  d)      }|rt        |      |d*<   |j                  d+      }|rt        |      |d,<   | j                  d-i       }|j                  d.      }|rt        |      |d/<   |S d0D ]S  \  }}| j                  |      }|s|d1v rt        |      dz   ||<   0|d2v rt        |      d!z   ||<   Ft        |      ||<   U | j                  d      }|	|rdnd|d<   | j                  d      }|rt        |      d
k7  rt        |      |d<   | j                  d	      }|rt        |      dz   |d<   | j                  d      }|rt        |      d
k7  rt        |      dz   |d<   |S )3z*Map extracted data to sheet column lettersrP   re   rh   )0z0.00null%Kri   rW   rk   r   rj   Vrl   Wrm   Trn   Uro   )r   rF   nonr   OuiNonGrq   Jrr   )r   r   rF   Mrw   nombre_partsLrs   z% TTCXrt   Yru   Qrv   Hrg   D	fiscalitePscpidate_creationry   ))rh   r   )ri   rW   )rm   r   )rn   r   )rq   r   )rs   r   )rt   r   )ru   r   )rv   r   )rw   r   )rg   r   )r   rW   r   r   )r   r   )r   strlower)r_   r   rP   is_jsonvr   keycols           r   map_to_colsr      s   
AXXh#Ft#GJJ)*Q44s1v|afJJuQ44s1v|afJJ*+Q3Q&#JJ./Q44s1vafJJ*+Q44s1v|afJJ|$Q44s1v|afJJ{#=CFLLNBW,W5]b!C&JJ'(s1vafJJ12Q003q6!C&JJz"@fjj&@Q003q6!C&JJ*+s1v'afJJ~&s1v'afJJ)*s1vafJJ)*s1vafJJ~&s1vafJJ{#s1vaf xx#HH_%s1vaf8 H1
 
	&HC A++c!fslQsVI%A0@qv"1vaf
	& HH[!=A55!C&HH/0Q3Q#HH()s1vafHH,-Q3Q&#Hr   rf         )rF   zN/Au   Non trouvérH   A   z
Rows needing fixes: zURLs in map: F)
auto_matchno_urlno_datano_matchzRow 3dz (35sz): T)endflushu   ✅ gQ?u   ❌ z cellsg      ?z

z<============================================================u   🏁 TOTAL: z rows, z cells filledz	
No URL (z, 
   z	No data (z
No match (z/tmp/scpi_batch_results.jsonwrp   )ensure_asciiindent)H__doc__r   rU   r   timesysosr   r   r$   printurllib.requestr%   r&   reqrespreadrT   xmlrV   sitemap_urlsrS   	Exceptionr>   MANUAL_URLSURL_MAPitemsnameiidr   slugrD   rz   pathinsert	scraplingrK   r`   rX   r   rowsneedsr   r@   rowr   r~   r   emptyjvalchrr   rZ   total_cells
total_rowsresultssortedrY   r_   mappedupdates	row_cellsr#   sleepr   r   r   joinopenfdump)r   kr   s   000r   <module>r      s
   + * *92& ( ) +#
ALZgKh
iC3#D
))+

W
%C2::acfgL	HS&''7
894
E4d4$/47F4t4'4/Eu4 $U4 -G4 U	4 %CD	4
 4
 "54
 +;D4 4  5d4 4  +D4 3?4 4 '254 %4 "+E4 e4 5e4 U4 %5d4 U4 +D4 3CD4 U4 (4 0:54 E4 *44 2=d4 4 $U4  "5!4  +>u!4" u#4" -e#4$ u%4$ '7%4& e'4& &=e'4( ()4* u+4* '@+4, $T-4, ,=e-4. /4.  1%/40 E140 $5d142 E342 +E344 5544 &u546 e746 7748 948  2494: #E;4: ,:4;4<  =4< (>t=4> d?4> 7?4@ DA4@ #EA4B %C4B "1$C4D eE4D 5eE4F TG4F )$G4F 1?G4H I4H %eI4H .;EI4J $K4J !=dK4L M4L 5dM4N 'O4P Q4P !8Q4R 4S4R $TS4T U4T ,UU4V dW4V 'W4X Y4X &uY4Z E[4Z $1%[4\ %]4\ *4]4^ e_4^ 1$_4` uuU eg4n ""$ UID#::<D' ^||Aa:os!v+3Z[_eZeSVrsw{r{kn  IJ  NT  IT  BE  Z]  ^^<<R ((c2::3CKKCQSTDB4&#eTGDMU x y *;%zWt |
	q#d)	 !A
q'C	AA X\3q6<<>rDE1b\  !CHc!flln"11bd)CE#J	
 %=a! s5zl+ , c'l^$ %
U
#

u{{}- A}e7 
$-Cc7#D!F-2DccVmVC[!DG"	D2bc
#
&Bd;I 4
UaSk5)SE,Bd3NIDJJtSE,Bd34 
Ai[
!OJ9K k(GDMDJJt=B VHo  ZL}MB C}}	8!!x-!	8
:11	>1
: <#!AAOA< 
3v;-s499VCR[#9":; < 	#g,s499WSb\#:";< = 
3x=/TYYx}%=$>? @ 

(#. 8!DIIgquQ78 8m	  #L	aS
!""#n E. 
9
:<8 8sT   AW 	W9	W9W>W>,X;XX
%X
6XW6W11W6X