#!/usr/bin/env python3
"""
Full pipeline test: VoxCPM2 → Whisper → LLM → Baserow
"""
import requests, json, subprocess, os, tempfile, time
import numpy as np
import soundfile as sf

# Config
VOXCPM_URL = "http://192.168.1.127:8101"
WHISPER_PYTHON = "/home/shingokuga/Vllm/vllm/bin/python3"
BASEROW_URL = "http://192.168.1.127:8091"
LLM_URL = "http://192.168.1.127:8090"
RCC_TABLE_ID = 715

print("=" * 60)
print("🏗️  Rapport de Chantier — Full Pipeline Test")
print("=" * 60)

# Step 1: Generate audio with VoxCPM2
print("\n1️⃣  VoxCPM2 — Génération audio...")
text = "Bonjour et bienvenue dans ce rapport de chantier. Nous sommes le 23 avril 2026 et nous intervenons sur le chantier de rénovation du centre-ville de Montpellier. L'équipe au complet est présente : trois maçons, deux électriciens et un menuisier. Le chantier a démarré à 7 heures du matin et les conditions météorologiques sont excellentes. Les travaux concernent la rénovation des façades du bâtiment principal, avec environ 150 mètres carrés de surface. L'équipe de maçons a été très efficace et a terminé cette partie avant midi."

start = time.time()
r = requests.post(
    f"{VOXCPM_URL}/v1/audio/speech",
    json={
        "model": "openbmb/VoxCPM2",
        "input": f"(voix masculine professionnelle, ton sérieux) {text}",
        "cfg_value": 2.0,
        "inference_timesteps": 10,
    },
)
gen_time = time.time() - start
print(f"   ✅ Audio généré en {gen_time:.1f}s")
print(f"   📊 RTF VoxCPM2: {gen_time / 25:.2f} (target: ~2.5)")

# Step 2: Transcribe with Whisper
print("\n2️⃣  Whisper Large v3 — Transcription...")
wav_path = "/tmp/rcc_test.wav"
with open(wav_path, "wb") as f:
    f.write(r.content)

start = time.time()
result = subprocess.run(
    [WHISPER_PYTHON, "-c", f"""
import torch
from transformers import AutoProcessor, WhisperForConditionalGeneration
processor = AutoProcessor.from_pretrained("openai/whisper-large-v3")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3", dtype=torch.float16, device_map="cuda:0")
import soundfile as sf
samples, sr = sf.read("{wav_path}")
import numpy as np
target_sr = 16000
num_samples = int(len(samples) * target_sr / sr)
samples_16k = np.interp(np.linspace(0, len(samples), num_samples), np.linspace(0, len(samples), len(samples)), samples)
input_features = processor(samples_16k, sampling_rate=target_sr, return_tensors="pt").input_features
input_features = input_features.to("cuda:0", dtype=torch.float16)
output_ids = model.generate(input_features, language="fr", max_new_tokens=256)
text = processor.batch_decode(output_ids, skip_special_tokens=True)[0]
print(text)
"""],
    capture_output=True, text=True, timeout=120
)
transcribe_time = time.time() - start
transcription = result.stdout.strip()
print(f"   ✅ Transcription en {transcribe_time:.1f}s")
print(f"   📊 RTF Whisper: {transcribe_time / 25:.2f} (target: 0.06)")
print(f"   📝 {transcription[:200]}...")

# Step 3: Extract with LLM (if available)
print("\n3️⃣  LLM — Extraction...")
llm_available = False
try:
    r = requests.post(f"{LLM_URL}/v1/chat/completions", json={
        "model": "qwen36-35b-official",
        "messages": [{"role": "user", "content": f"Extrait les observations de ce rapport: {transcription}"}],
        "max_tokens": 200,
    }, timeout=30)
    if r.status_code == 200:
        extracted = r.json()["choices"][0]["message"]["content"]
        print(f"   ✅ Extraction LLM en {time.time()-start:.1f}s")
        print(f"   📝 {extracted[:150]}...")
        llm_available = True
    else:
        print(f"   ⚠️ LLM non disponible (status: {r.status_code})")
except:
    print(f"   ⚠️ LLM non disponible")

# Step 4: Save to Baserow
print("\n4️⃣  Baserow — Sauvegarde...")
token = requests.post(
    f"{BASEROW_URL}/api/user/token-auth/",
    json={"email": "kmorhaim@hotmail.com", "password": "nYKG9!x?s8?hCyeb"},
).json()["access_token"]

fields = requests.get(
    f"{BASEROW_URL}/api/database/fields/table/{RCC_TABLE_ID}/",
    headers={"Authorization": f"JWT {token}"},
).json()
field_map = {f["name"]: f["id"] for f in fields}

row_data = {
    f"field_{field_map['Date du chantier']}": "2026-04-23",
    f"field_{field_map['Lieu']}": "Montpellier",
    f"field_{field_map['Équipe']}": "3 maçons, 2 électriciens, 1 menuisier",
    f"field_{field_map['Météo']}": "Ciel dégagé, 22°C",
    f"field_{field_map['Transcription']}": transcription,
    f"field_{field_map['Observations']}": text[:300] if llm_available else text[:300],
    f"field_{field_map['Anomalies']}": "",
    f"field_{field_map['Statut']}": "Validé",
    f"field_{field_map['Photos']}": "",
}
row_r = requests.post(
    f"{BASEROW_URL}/api/database/rows/table/{RCC_TABLE_ID}/",
    headers={"Authorization": f"JWT {token}"},
    json=row_data,
)
print(f"   ✅ Sauvegardé en Baserow (row {row_r.json().get('id', '?')})")

print("\n" + "=" * 60)
print("✅ Pipeline complet fonctionnel!")
print("=" * 60)
print(f"\n🌐 Preview: https://rcc.zrok.consulting-ai.eu")
print(f"🔗 API: http://192.168.1.127:8102")
print(f"📊 Vitesse audio: {gen_time:.1f}s pour {25}s (RTF {gen_time/25:.2f})")
print(f"⚡ Transcription: {transcribe_time:.1f}s pour {25}s (RTF {transcribe_time/25:.2f})")
