From 064c8a6658d68993fc3307dec5f3b60af50aa0b6 Mon Sep 17 00:00:00 2001 From: Cedric Date: Thu, 2 Jul 2026 11:22:22 +0200 Subject: [PATCH] Actualiser api.php --- api.php | 229 ++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 157 insertions(+), 72 deletions(-) diff --git a/api.php b/api.php index af66980..7a71f24 100644 --- a/api.php +++ b/api.php @@ -116,47 +116,60 @@ function emptyPhysicalResult() { ]; } -// ── SCRAPPING FNAC (CORRIGÉ) ── +// ── SCRAPPING FNAC (CORRIGÉ POUR PARSER LA PAGE DE RÉSULTATS) ── function fetchFromFnac($ean) { $empty = emptyPhysicalResult(); $url = "https://www.fnac.com/SearchResult/ResultList.aspx?Search=" . urlencode($ean); - - $ch = curl_init($url); - curl_setopt_array($ch, [ - CURLOPT_RETURNTRANSFER => true, - CURLOPT_TIMEOUT => 10, - CURLOPT_SSL_VERIFYPEER => false, - CURLOPT_FOLLOWLOCATION => true, // Important pour suivre la redirection vers la page produit - CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0', - ]); - $html = curl_exec($ch); - $finalUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); // Récupère l'URL après redirection - curl_close($ch); - + $html = httpGet($url, 15); if (!$html) return $empty; - // ✅ CORRECTION : On vérifie si on a bien été redirigé vers une page produit - // Si l'URL finale contient encore "SearchResult", c'est qu'on est sur la page de recherche - // (avec les bannières sponsorisées type "Project Hail Mary"). On ignore pour éviter les pubs. - if (strpos($finalUrl, 'SearchResult/ResultList.aspx') === false && strpos($finalUrl, 'SearchResult') === false) { - - // On est sur la page produit ! On récupère le titre via og:title (infaillible) - if (preg_match('/]*property="og:title"[^>]*content="([^"]+)"/i', $html, $m)) { - $title = html_entity_decode(trim($m[1]), ENT_QUOTES | ENT_HTML5, 'UTF-8'); - // Nettoyage du titre FNAC qui peut contenir " | Fnac" à la fin - $title = preg_replace('/\s*\|\s*Fnac.*$/i', '', $title); - $empty['title'] = trim($title); - } elseif (preg_match('/]*class="[^"]*f-product__name[^"]*"[^>]*>([^<]+)<\/h1>/i', $html, $m)) { - $empty['title'] = trim(strip_tags($m[1])); - } - - // Récupération de l'image via og:image - if (preg_match('/]*property="og:image"[^>]*content="([^"]+)"/i', $html, $m)) { - $empty['poster'] = trim($m[1]); + // 1. Essayer de trouver le JSON-LD (méthode la plus fiable) + if (preg_match_all('/]*type="application\/ld\+json"[^>]*>(.*?)<\/script>/is', $html, $jsonMatches)) { + foreach ($jsonMatches[1] as $jsonBlock) { + if (preg_match('/"@type"\s*:\s*"Product"[^}]*?"name"\s*:\s*"([^"]+)"/is', $jsonBlock, $m)) { + $empty['title'] = trim($m[1]); + break; + } + if (preg_match('/"@type"\s*:\s*"ItemList".*?"name"\s*:\s*"([^"]+)"/is', $jsonBlock, $m)) { + $empty['title'] = trim($m[1]); + break; + } } } - // Si on est resté sur la page de recherche, on retourne un tableau vide. - // Le fallback Blu-ray.com prendra le relais proprement. + + // 2. Si pas de JSON-LD, essayer les classes CSS courantes de la FNAC + if (empty($empty['title'])) { + if (preg_match('/]*class="[^"]*js-ProductTitle[^"]*"[^>]*>([^<]+)<\/a>/i', $html, $m)) { + $empty['title'] = trim(strip_tags($m[1])); + } elseif (preg_match('/]*class="[^"]*f-product__name[^"]*"[^>]*>([^<]+)<\/h2>/i', $html, $m)) { + $empty['title'] = trim(strip_tags($m[1])); + } elseif (preg_match('/]*class="[^"]*product-title[^"]*"[^>]*>([^<]+)<\/a>/i', $html, $m)) { + $empty['title'] = trim(strip_tags($m[1])); + } + } + + // 3. Fallback : chercher le premier lien qui pointe vers une page produit FNAC (/A\d+ ou /tp\d+) + if (empty($empty['title'])) { + if (preg_match('/]*href="\/(A\d+|tp\d+)[^"]*"[^>]*title="([^"]+)"[^>]*>/i', $html, $m)) { + $empty['title'] = trim($m[2]); + } elseif (preg_match('/]*href="\/(A\d+|tp\d+)[^"]*"[^>]*>([^<]+)<\/a>/i', $html, $m)) { + $empty['title'] = trim(strip_tags($m[2])); + } + } + + // Nettoyage du titre + if (!empty($empty['title'])) { + $empty['title'] = html_entity_decode($empty['title'], ENT_QUOTES | ENT_HTML5, 'UTF-8'); + $empty['title'] = preg_replace('/\s*\|\s*Fnac.*$/i', '', $empty['title']); + $empty['title'] = trim($empty['title']); + } + + // Récupération de l'image + if (preg_match('/]*property="og:image"[^>]*content="([^"]+)"/i', $html, $m)) { + $empty['poster'] = trim($m[1]); + } elseif (preg_match('/]*class="[^"]*js-ProductImage[^"]*"[^>]*src="([^"]+)"/i', $html, $m)) { + $empty['poster'] = trim($m[1]); + } if (!empty($empty['title'])) { $empty['format'] = detectFormat($empty['title']); @@ -165,8 +178,82 @@ function fetchFromFnac($ean) { return $empty; } -// ── SCRAPPING BLU-RAY.COM ── -function fetchFromBlurayCom($ean) { +// ── SCRAPPING BLU-RAY.COM (PAR TITRE) ── +function fetchFromBlurayComByTitle($title) { + static $lastRequest = 0; + $empty = emptyPhysicalResult(); + if (empty($title)) return $empty; + + $now = microtime(true); + if ($lastRequest > 0 && ($now - $lastRequest) < 3) { + usleep((int)((3 - ($now - $lastRequest)) * 1000000)); + } + $lastRequest = microtime(true); + + $cleanTitle = cleanTitle($title); + $searchUrl = "https://www.blu-ray.com/movies/search.php?keyword=" . urlencode($cleanTitle) . "&action=search"; + + $ch = curl_init($searchUrl); + curl_setopt_array($ch, [ + CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 15, CURLOPT_CONNECTTIMEOUT => 5, + CURLOPT_SSL_VERIFYPEER => false, CURLOPT_FOLLOWLOCATION => true, + CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36', + CURLOPT_HTTPHEADER => ['Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language: fr-FR,fr;q=0.9', 'Referer: https://www.blu-ray.com/'] + ]); + $searchHtml = curl_exec($ch); + $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + curl_close($ch); + + if (!$searchHtml || $httpCode !== 200) return $empty; + + if (!preg_match('/href="(https:\/\/www\.blu-ray\.com\/movies\/[^"]+\/(\d+)\/)"/i', $searchHtml, $matches)) { + return $empty; + } + + $movieUrl = $matches[1]; + sleep(2); + + $ch2 = curl_init($movieUrl); + curl_setopt_array($ch2, [ + CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 15, CURLOPT_CONNECTTIMEOUT => 5, + CURLOPT_SSL_VERIFYPEER => false, CURLOPT_FOLLOWLOCATION => true, + CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', + CURLOPT_HTTPHEADER => ['Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Referer: https://www.blu-ray.com/'] + ]); + $movieHtml = curl_exec($ch2); + curl_close($ch2); + + if (!$movieHtml) return $empty; + + // On ne récupère PAS le titre ici pour éviter l'anglais, on garde celui de la FNAC/TMDB + + if (preg_match('/]*>([^<]+)<\/h3>\s*(?: )?\((\d{4})\)/i', $movieHtml, $m)) $empty['year'] = $m[2]; + if (preg_match('/href="[^"]*studioid=\d+[^"]*"[^>]*>([^<]+)<\/a>/i', $movieHtml, $m)) $empty['publisher'] = trim($m[1]); + if (preg_match('/(\d+)\s*min<\/span>/i', $movieHtml, $m)) $empty['length'] = $m[1] . ' min'; + if (preg_match('/Aspect[\s-]*ratio:\s*([\d\.]+:[\d\.]+)/i', $movieHtml, $m)) $empty['aspect_ratio'] = trim($m[1]); + + if (preg_match('/(\w+)-disc\s+set/i', $movieHtml, $m)) { + $wordToNum = ['single' => 1, 'one' => 1, 'two' => 2, 'three' => 3, 'four' => 4, 'five' => 5, 'six' => 6]; + $empty['number_of_discs'] = $wordToNum[strtolower($m[1])] ?? 1; + } elseif (preg_match('/(\d+)-disc\s+set/i', $movieHtml, $m)) { + $empty['number_of_discs'] = (int)$m[1]; + } + + if (strpos($movieUrl, '/4k/') !== false || stripos($movieHtml, '4K Ultra HD') !== false) $empty['format'] = '4K Ultra HD'; + elseif (strpos($movieUrl, '/3d/') !== false) $empty['format'] = '3D Blu-ray'; + else $empty['format'] = 'Blu-ray'; + + if (preg_match('/src="(https:\/\/images\.static-bluray\.com\/movies\/covers\/\d+_front\.jpg[^"]*)"/i', $movieHtml, $m)) { + $empty['poster'] = $m[1]; + } elseif (preg_match('/]*class="coverfront"[^>]*src="([^"]+)"/i', $movieHtml, $m)) { + $empty['poster'] = preg_replace('/_large\.jpg/', '_front.jpg', $m[1]); + } + + return $empty; +} + +// ── SCRAPPING BLU-RAY.COM (PAR EAN - FALLBACK) ── +function fetchFromBlurayComByEan($ean) { static $lastRequest = 0; $empty = emptyPhysicalResult(); $ean = preg_replace('/[^0-9]/', '', (string)$ean); @@ -237,21 +324,6 @@ function fetchFromBlurayCom($ean) { $empty['poster'] = preg_replace('/_large\.jpg/', '_front.jpg', $m[1]); } - if (preg_match('/]*id="movie_info"[^>]*>(.*?)]*id="movie_review_intro"/is', $movieHtml, $infoBlock)) { - $infoHtml = $infoBlock[1]; - if (preg_match('/]*>
<\/font>\s*(.*?)


Directors:/is', $infoHtml, $m)) { - $empty['description'] = trim(preg_replace('/\s+/', ' ', strip_tags($m[1]))); - } - if (preg_match('/Directors?:\s*(.*?)(?:
|<\/div>)/is', $infoHtml, $m)) { - preg_match_all('/]*>([^<]+)<\/a>/i', $m[1], $dirMatches); - if (!empty($dirMatches[1])) $empty['director'] = implode(', ', array_map('trim', array_slice($dirMatches[1], 0, 2))); - } - if (preg_match('/Starring:\s*(.*?)(?:
|<\/div>)/is', $infoHtml, $m)) { - preg_match_all('/]*>([^<]+)<\/a>/i', $m[1], $actorMatches); - if (!empty($actorMatches[1])) $empty['actors'] = implode(', ', array_map('trim', array_slice($actorMatches[1], 0, 6))); - } - } - return $empty; } @@ -346,33 +418,46 @@ function fetchFromMovieCovers($title, $year = '') { return $empty; } -// ── AGGREGATEUR PHYSIQUE (FNAC -> BLU-RAY.COM) ── +// ── AGGREGATEUR PHYSIQUE (FNAC -> BLU-RAY.COM -> MOVIECOVERS) ── function fetchPhysicalByEan($ean, $pdo = null) { - // 1. Tenter la FNAC (Titre français garanti) + // 1. Tenter la FNAC pour avoir le titre français $fnacData = fetchFromFnac($ean); - if (!empty($fnacData['title'])) { - $res = $fnacData; - $mc = fetchFromMovieCovers($res['title'], $res['year']); - if (!empty($mc['poster'])) $res['poster'] = $mc['poster']; - if (!empty($mc['director'])) $res['director'] = $mc['director']; - if (!empty($mc['actors'])) $res['actors'] = $mc['actors']; - if (!empty($mc['description'])) $res['description'] = $mc['description']; - return $res; + $title = $fnacData['title'] ?? ''; + $res = $fnacData; + + // 2. Chercher les détails techniques sur Blu-ray.com + $blurayData = []; + if (!empty($title)) { + $blurayData = fetchFromBlurayComByTitle($title); } - - // 2. Fallback sur Blu-ray.com - $blurayData = fetchFromBlurayCom($ean); - if (!empty($blurayData['title'])) { + if (empty($blurayData) && empty($title)) { + // Fallback : chercher sur Blu-ray.com par EAN + $blurayData = fetchFromBlurayComByEan($ean); + $title = $blurayData['title'] ?? ''; $res = $blurayData; - $mc = fetchFromMovieCovers($res['title'], $res['year']); - if (!empty($mc['poster'])) $res['poster'] = $mc['poster']; - if (!empty($mc['director'])) $res['director'] = $mc['director']; - if (!empty($mc['actors'])) $res['actors'] = $mc['actors']; - if (!empty($mc['description'])) $res['description'] = $mc['description']; - return $res; } - return emptyPhysicalResult(); + if (!empty($blurayData)) { + // Les détails techniques de Blu-ray.com sont prioritaires + if (!empty($blurayData['length'])) $res['length'] = $blurayData['length']; + if (!empty($blurayData['number_of_discs'])) $res['number_of_discs'] = $blurayData['number_of_discs']; + if (!empty($blurayData['aspect_ratio'])) $res['aspect_ratio'] = $blurayData['aspect_ratio']; + if (!empty($blurayData['publisher'])) $res['publisher'] = $blurayData['publisher']; + if (!empty($blurayData['format'])) $res['format'] = $blurayData['format']; + if (!empty($blurayData['poster'])) $res['poster'] = $blurayData['poster']; + if (!empty($blurayData['year'])) $res['year'] = $blurayData['year']; + } + + // 3. Compléter avec MovieCovers pour la jaquette HD FR et métadonnées FR + if (!empty($title)) { + $mc = fetchFromMovieCovers($title, $res['year'] ?? ''); + if (!empty($mc['poster'])) $res['poster'] = $mc['poster']; + if (!empty($mc['director'])) $res['director'] = $mc['director']; + if (!empty($mc['actors'])) $res['actors'] = $mc['actors']; + if (!empty($mc['description'])) $res['description'] = $mc['description']; + } + + return $res; } // ── ROUTEUR PRINCIPAL ──