Actualiser api.php

This commit is contained in:
2026-07-02 16:55:28 +02:00
parent 2331efa4a1
commit de0c2a0b7b
+75 -57
View File
@@ -116,64 +116,75 @@ function emptyPhysicalResult() {
]; ];
} }
// ── NOUVELLE FONCTION : SCRAPPING GO-UPC.COM (CORRIGÉE) ── // ── NOUVELLE FONCTION : SCRAPPING DVDFR.COM ──
function fetchFromGoUpc($ean) { function fetchFromDvdfr($ean) {
$empty = emptyPhysicalResult(); $empty = emptyPhysicalResult();
$url = "https://go-upc.com/search?q=" . urlencode($ean); $url = "https://www.dvdfr.com/listeliv.php?base=dvd&mots_recherche=" . urlencode($ean);
$html = httpGet($url, 15); $html = httpGet($url, 15);
if (!$html) return $empty; if (!$html) return $empty;
// Recherche du titre dans h1.product-name // Recherche du premier produit trouvé dans la liste
if (preg_match('/<h1[^>]*class="product-name"[^>]*>([^<]+)<\/h1>/i', $html, $m)) { if (preg_match('/<li>\s*<div class="col-xs-4 text-center mb-01 col-md-2">.*?<a href="\/dvd\/[^"]+\/([^"]+)\/">.*?<img[^>]*src="([^"]+)"[^>]*alt="([^"]+)".*?<\/a>.*?<h2 class="livre_titre">.*?<a[^>]*>([^<]+)<\/a>.*?<\/h2>.*?<h2 class="livre_auteur">.*?<a[^>]*>([^<]+)<\/a>.*?<\/h2>.*?<li class="editeur">.*?<span[^>]*>([^<]+)<\/span>.*?<\/li>.*?<span class="item_format">.*?<a[^>]*>([^<]+)<\/a>.*?<span class="item_prix[^>]*>([^<]+)<\/span>/is', $html, $m)) {
$empty['title'] = trim(strip_tags($m[1]));
} elseif (preg_match('/<title[^>]*>([^<]+)<\/title>/i', $html, $m)) { // Titre
$title = trim($m[1]); $empty['title'] = trim($m[4]);
$title = preg_replace('/\s*[-]\s*EAN\s+\d+\s*[-]\s*Go-UPC.*$/i', '', $title);
$title = preg_replace('/\s*[-]\s*Go-UPC.*$/i', '', $title); // Image (version haute qualité)
$empty['title'] = $title; $imageUrl = trim($m[2]);
} // Remplacer _v.jpg par _1.jpg pour avoir l'image en haute qualité
$imageUrl = str_replace('_v.jpg', '_1.jpg', $imageUrl);
// ✅ RÉCUPÉRATION DE L'IMAGE DEPUIS GO-UPC (VERSION ROBUSTE) $empty['poster'] = $imageUrl;
// 1. Chercher directement l'URL Amazon S3 de go-upc (plus fiable)
if (preg_match('/src="(https:\/\/go-upc\.s3\.amazonaws\.com\/images\/[^"]+)"/i', $html, $m)) { // Réalisateur
$empty['poster'] = trim($m[1]); $empty['director'] = trim($m[5]);
}
// 2. Fallback : chercher dans les figures product-image // Éditeur
elseif (preg_match('/<figure[^>]*class="product-image[^"]*"[^>]*>.*?<img[^>]*src="([^"]+)"/is', $html, $m)) { $empty['publisher'] = trim($m[6]);
$empty['poster'] = trim($m[1]);
} // Format
// 3. Fallback : chercher toute image de go-upc.s3.amazonaws.com $format = trim($m[7]);
elseif (preg_match('/src="([^"]+go-upc\.s3\.amazonaws\.com[^"]+)"/i', $html, $m)) { $empty['format'] = detectFormat($format);
$empty['poster'] = trim($m[1]);
// Prix (optionnel)
$price = trim($m[8]);
} else {
// Fallback : recherche individuelle des éléments
if (preg_match('/<h2 class="livre_titre">.*?<a[^>]*>([^<]+)<\/a>/is', $html, $m)) {
$empty['title'] = trim($m[1]);
}
if (preg_match('/<h2 class="livre_auteur">.*?<a[^>]*>([^<]+)<\/a>/is', $html, $m)) {
$empty['director'] = trim($m[1]);
}
if (preg_match('/<li class="editeur">.*?<span[^>]*>([^<]+)<\/span>/is', $html, $m)) {
$empty['publisher'] = trim($m[1]);
}
if (preg_match('/<img[^>]*id="img' . preg_quote($ean, '/') . '"[^>]*src="([^"]+)"/i', $html, $m)) {
$imageUrl = trim($m[1]);
$imageUrl = str_replace('_v.jpg', '_1.jpg', $imageUrl);
$empty['poster'] = $imageUrl;
} elseif (preg_match('/<img[^>]*src="(https:\/\/images\.epagine\.fr\/[^"]+)"/i', $html, $m)) {
$imageUrl = trim($m[1]);
$imageUrl = str_replace('_v.jpg', '_1.jpg', $imageUrl);
$empty['poster'] = $imageUrl;
}
if (preg_match('/<span class="item_format">.*?<a[^>]*>([^<]+)<\/a>/is', $html, $m)) {
$format = trim($m[1]);
$empty['format'] = detectFormat($format);
}
} }
// Nettoyage du titre // Nettoyage du titre
if (!empty($empty['title'])) { if (!empty($empty['title'])) {
$empty['title'] = html_entity_decode($empty['title'], ENT_QUOTES | ENT_HTML5, 'UTF-8'); $empty['title'] = html_entity_decode($empty['title'], ENT_QUOTES | ENT_HTML5, 'UTF-8');
// ✅ APPLIQUER cleanTitle() pour supprimer les suffixes comme "- DVD", "- Édition Collector"
$empty['title'] = cleanTitle($empty['title']); $empty['title'] = cleanTitle($empty['title']);
// ✅ Supprimer le nom du réalisateur au début si présent
// Pattern : "Prénom Nom Titre du film" → "Titre du film"
if (preg_match('/^([A-ZÀ-Ÿ][a-zà-ÿ]+\s+[A-ZÀ-Ÿ][a-zà-ÿ]+)\s+(.+)$/u', $empty['title'], $nameMatch)) {
$possibleDirector = $nameMatch[1];
$possibleTitle = $nameMatch[2];
// Vérifier si le "titre" restant semble être un vrai titre (pas juste un mot)
if (strlen($possibleTitle) > 3 && !preg_match('/^(DVD|Blu-ray|4K|VHS|CD|Vinyl)$/i', $possibleTitle)) {
$empty['director'] = $possibleDirector;
$empty['title'] = $possibleTitle;
}
}
$empty['title'] = trim($empty['title']); $empty['title'] = trim($empty['title']);
} }
if (!empty($empty['title'])) {
$empty['format'] = detectFormat($empty['title']);
}
return $empty; return $empty;
} }
@@ -542,24 +553,25 @@ function fetchFromMovieCovers($title, $year = '') {
return $empty; return $empty;
} }
// ── AGGREGATEUR PHYSIQUE (VERSION MODIFIÉE) ── / ── AGGREGATEUR PHYSIQUE (VERSION MODIFIÉE AVEC DVDFR) ──
function fetchPhysicalByEan($ean, $pdo = null) { function fetchPhysicalByEan($ean, $pdo = null) {
error_log("=== DEBUT fetchPhysicalByEan EAN=$ean ==="); error_log("=== DEBUT fetchPhysicalByEan EAN=$ean ===");
$res = emptyPhysicalResult(); $res = emptyPhysicalResult();
$title = ''; $title = '';
// 1. GO-UPC.COM (Priorité 1 - pour titre et image) // 1. DVDFR.COM (Priorité 1 - pour titre, image et métadonnées)
$goUpcData = fetchFromGoUpc($ean); $dvdfrData = fetchFromDvdfr($ean);
error_log("GO-UPC -> title='" . ($goUpcData['title'] ?? '') . "' poster='" . ($goUpcData['poster'] ?? '') . "'"); error_log("DVDFR -> title='" . ($dvdfrData['title'] ?? '') . "' poster='" . ($dvdfrData['poster'] ?? '') . "' director='" . ($dvdfrData['director'] ?? '') . "'");
if (!empty($goUpcData['title'])) {
$title = $goUpcData['title']; if (!empty($dvdfrData['title'])) {
$res = $goUpcData; $title = $dvdfrData['title'];
$res = $dvdfrData;
} }
// 2. UPCINDEX.COM (Priorité 2 - pour métadonnées physiques détaillées) // 2. UPCINDEX.COM (Priorité 2 - pour métadonnées physiques détaillées)
$upcIndexData = fetchFromUpcIndex($ean); $upcIndexData = fetchFromUpcIndex($ean);
error_log("UPCINDEX -> title='" . ($upcIndexData['title'] ?? '') . "' director='" . ($upcIndexData['director'] ?? '') . "' actors='" . ($upcIndexData['actors'] ?? '') . "'"); error_log("UPCINDEX -> title='" . ($upcIndexData['title'] ?? '') . "' director='" . ($upcIndexData['director'] ?? '') . "'");
if (!empty($upcIndexData['title']) && empty($title)) { if (!empty($upcIndexData['title']) && empty($title)) {
$title = $upcIndexData['title']; $title = $upcIndexData['title'];
@@ -567,15 +579,21 @@ function fetchPhysicalByEan($ean, $pdo = null) {
// Fusionner les données UPCINDEX (métadonnées physiques) // Fusionner les données UPCINDEX (métadonnées physiques)
if (!empty($upcIndexData)) { if (!empty($upcIndexData)) {
if (!empty($upcIndexData['director'])) $res['director'] = $upcIndexData['director']; if (!empty($upcIndexData['director']) && empty($res['director'])) {
$res['director'] = $upcIndexData['director'];
}
if (!empty($upcIndexData['actors'])) $res['actors'] = $upcIndexData['actors']; if (!empty($upcIndexData['actors'])) $res['actors'] = $upcIndexData['actors'];
if (!empty($upcIndexData['format'])) $res['format'] = $upcIndexData['format']; if (!empty($upcIndexData['format']) && empty($res['format'])) {
$res['format'] = $upcIndexData['format'];
}
if (!empty($upcIndexData['aspect_ratio'])) $res['aspect_ratio'] = $upcIndexData['aspect_ratio']; if (!empty($upcIndexData['aspect_ratio'])) $res['aspect_ratio'] = $upcIndexData['aspect_ratio'];
if (!empty($upcIndexData['number_of_discs'])) $res['number_of_discs'] = $upcIndexData['number_of_discs']; if (!empty($upcIndexData['number_of_discs'])) $res['number_of_discs'] = $upcIndexData['number_of_discs'];
if (!empty($upcIndexData['publisher'])) $res['publisher'] = $upcIndexData['publisher']; if (!empty($upcIndexData['publisher']) && empty($res['publisher'])) {
$res['publisher'] = $upcIndexData['publisher'];
}
if (!empty($upcIndexData['description'])) $res['description'] = $upcIndexData['description']; if (!empty($upcIndexData['description'])) $res['description'] = $upcIndexData['description'];
if (!empty($upcIndexData['year'])) $res['year'] = $upcIndexData['year']; if (!empty($upcIndexData['year'])) $res['year'] = $upcIndexData['year'];
// ✅ GARDER L'IMAGE DE GO-UPC EN PRIORITÉ // Garder l'image de DVD.fr en priorité
if (empty($res['poster']) && !empty($upcIndexData['poster'])) { if (empty($res['poster']) && !empty($upcIndexData['poster'])) {
$res['poster'] = $upcIndexData['poster']; $res['poster'] = $upcIndexData['poster'];
} }