diff --git a/api.php b/api.php index d81ca13..d074ccc 100644 --- a/api.php +++ b/api.php @@ -116,64 +116,75 @@ function emptyPhysicalResult() { ]; } -// ── NOUVELLE FONCTION : SCRAPPING GO-UPC.COM (CORRIGÉE) ── -function fetchFromGoUpc($ean) { +// ── NOUVELLE FONCTION : SCRAPPING DVDFR.COM ── +function fetchFromDvdfr($ean) { $empty = emptyPhysicalResult(); - $url = "https://go-upc.com/search?q=" . urlencode($ean); + $url = "https://www.dvdfr.com/listeliv.php?base=dvd&mots_recherche=" . urlencode($ean); $html = httpGet($url, 15); if (!$html) return $empty; - // Recherche du titre dans h1.product-name - if (preg_match('/]*class="product-name"[^>]*>([^<]+)<\/h1>/i', $html, $m)) { - $empty['title'] = trim(strip_tags($m[1])); - } elseif (preg_match('/]*>([^<]+)<\/title>/i', $html, $m)) { - $title = trim($m[1]); - $title = preg_replace('/\s*[-–]\s*EAN\s+\d+\s*[-–]\s*Go-UPC.*$/i', '', $title); - $title = preg_replace('/\s*[-–]\s*Go-UPC.*$/i', '', $title); - $empty['title'] = $title; - } - - // ✅ RÉCUPÉRATION DE L'IMAGE DEPUIS GO-UPC (VERSION ROBUSTE) - // 1. Chercher directement l'URL Amazon S3 de go-upc (plus fiable) - if (preg_match('/src="(https:\/\/go-upc\.s3\.amazonaws\.com\/images\/[^"]+)"/i', $html, $m)) { - $empty['poster'] = trim($m[1]); - } - // 2. Fallback : chercher dans les figures product-image - elseif (preg_match('/]*class="product-image[^"]*"[^>]*>.*?]*src="([^"]+)"/is', $html, $m)) { - $empty['poster'] = trim($m[1]); - } - // 3. Fallback : chercher toute image de go-upc.s3.amazonaws.com - elseif (preg_match('/src="([^"]+go-upc\.s3\.amazonaws\.com[^"]+)"/i', $html, $m)) { - $empty['poster'] = trim($m[1]); + // Recherche du premier produit trouvé dans la liste + if (preg_match('/
  • \s*
    .*?.*?]*src="([^"]+)"[^>]*alt="([^"]+)".*?<\/a>.*?

    .*?]*>([^<]+)<\/a>.*?<\/h2>.*?

    .*?]*>([^<]+)<\/a>.*?<\/h2>.*?
  • .*?]*>([^<]+)<\/span>.*?<\/li>.*?.*?]*>([^<]+)<\/a>.*?.*?]*>([^<]+)<\/a>/is', $html, $m)) { + $empty['title'] = trim($m[1]); + } + + if (preg_match('/

    .*?]*>([^<]+)<\/a>/is', $html, $m)) { + $empty['director'] = trim($m[1]); + } + + if (preg_match('/
  • .*?]*>([^<]+)<\/span>/is', $html, $m)) { + $empty['publisher'] = trim($m[1]); + } + + if (preg_match('/]*id="img' . preg_quote($ean, '/') . '"[^>]*src="([^"]+)"/i', $html, $m)) { + $imageUrl = trim($m[1]); + $imageUrl = str_replace('_v.jpg', '_1.jpg', $imageUrl); + $empty['poster'] = $imageUrl; + } elseif (preg_match('/]*src="(https:\/\/images\.epagine\.fr\/[^"]+)"/i', $html, $m)) { + $imageUrl = trim($m[1]); + $imageUrl = str_replace('_v.jpg', '_1.jpg', $imageUrl); + $empty['poster'] = $imageUrl; + } + + if (preg_match('/.*?]*>([^<]+)<\/a>/is', $html, $m)) { + $format = trim($m[1]); + $empty['format'] = detectFormat($format); + } } // Nettoyage du titre if (!empty($empty['title'])) { $empty['title'] = html_entity_decode($empty['title'], ENT_QUOTES | ENT_HTML5, 'UTF-8'); - - // ✅ APPLIQUER cleanTitle() pour supprimer les suffixes comme "- DVD", "- Édition Collector" $empty['title'] = cleanTitle($empty['title']); - - // ✅ Supprimer le nom du réalisateur au début si présent - // Pattern : "Prénom Nom Titre du film" → "Titre du film" - if (preg_match('/^([A-ZÀ-Ÿ][a-zà-ÿ]+\s+[A-ZÀ-Ÿ][a-zà-ÿ]+)\s+(.+)$/u', $empty['title'], $nameMatch)) { - $possibleDirector = $nameMatch[1]; - $possibleTitle = $nameMatch[2]; - - // Vérifier si le "titre" restant semble être un vrai titre (pas juste un mot) - if (strlen($possibleTitle) > 3 && !preg_match('/^(DVD|Blu-ray|4K|VHS|CD|Vinyl)$/i', $possibleTitle)) { - $empty['director'] = $possibleDirector; - $empty['title'] = $possibleTitle; - } - } - $empty['title'] = trim($empty['title']); } - if (!empty($empty['title'])) { - $empty['format'] = detectFormat($empty['title']); - } - return $empty; } @@ -542,24 +553,25 @@ function fetchFromMovieCovers($title, $year = '') { return $empty; } -// ── AGGREGATEUR PHYSIQUE (VERSION MODIFIÉE) ── +/ ── AGGREGATEUR PHYSIQUE (VERSION MODIFIÉE AVEC DVDFR) ── function fetchPhysicalByEan($ean, $pdo = null) { error_log("=== DEBUT fetchPhysicalByEan EAN=$ean ==="); $res = emptyPhysicalResult(); $title = ''; - // 1. GO-UPC.COM (Priorité 1 - pour titre et image) - $goUpcData = fetchFromGoUpc($ean); - error_log("GO-UPC -> title='" . ($goUpcData['title'] ?? '') . "' poster='" . ($goUpcData['poster'] ?? '') . "'"); - if (!empty($goUpcData['title'])) { - $title = $goUpcData['title']; - $res = $goUpcData; + // 1. DVDFR.COM (Priorité 1 - pour titre, image et métadonnées) + $dvdfrData = fetchFromDvdfr($ean); + error_log("DVDFR -> title='" . ($dvdfrData['title'] ?? '') . "' poster='" . ($dvdfrData['poster'] ?? '') . "' director='" . ($dvdfrData['director'] ?? '') . "'"); + + if (!empty($dvdfrData['title'])) { + $title = $dvdfrData['title']; + $res = $dvdfrData; } // 2. UPCINDEX.COM (Priorité 2 - pour métadonnées physiques détaillées) $upcIndexData = fetchFromUpcIndex($ean); - error_log("UPCINDEX -> title='" . ($upcIndexData['title'] ?? '') . "' director='" . ($upcIndexData['director'] ?? '') . "' actors='" . ($upcIndexData['actors'] ?? '') . "'"); + error_log("UPCINDEX -> title='" . ($upcIndexData['title'] ?? '') . "' director='" . ($upcIndexData['director'] ?? '') . "'"); if (!empty($upcIndexData['title']) && empty($title)) { $title = $upcIndexData['title']; @@ -567,15 +579,21 @@ function fetchPhysicalByEan($ean, $pdo = null) { // Fusionner les données UPCINDEX (métadonnées physiques) if (!empty($upcIndexData)) { - if (!empty($upcIndexData['director'])) $res['director'] = $upcIndexData['director']; + if (!empty($upcIndexData['director']) && empty($res['director'])) { + $res['director'] = $upcIndexData['director']; + } if (!empty($upcIndexData['actors'])) $res['actors'] = $upcIndexData['actors']; - if (!empty($upcIndexData['format'])) $res['format'] = $upcIndexData['format']; + if (!empty($upcIndexData['format']) && empty($res['format'])) { + $res['format'] = $upcIndexData['format']; + } if (!empty($upcIndexData['aspect_ratio'])) $res['aspect_ratio'] = $upcIndexData['aspect_ratio']; if (!empty($upcIndexData['number_of_discs'])) $res['number_of_discs'] = $upcIndexData['number_of_discs']; - if (!empty($upcIndexData['publisher'])) $res['publisher'] = $upcIndexData['publisher']; + if (!empty($upcIndexData['publisher']) && empty($res['publisher'])) { + $res['publisher'] = $upcIndexData['publisher']; + } if (!empty($upcIndexData['description'])) $res['description'] = $upcIndexData['description']; if (!empty($upcIndexData['year'])) $res['year'] = $upcIndexData['year']; - // ✅ GARDER L'IMAGE DE GO-UPC EN PRIORITÉ + // Garder l'image de DVD.fr en priorité if (empty($res['poster']) && !empty($upcIndexData['poster'])) { $res['poster'] = $upcIndexData['poster']; }