Actualiser api.php
This commit is contained in:
@@ -116,47 +116,60 @@ function emptyPhysicalResult() {
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── SCRAPPING FNAC (CORRIGÉ) ──
|
// ── SCRAPPING FNAC (CORRIGÉ POUR PARSER LA PAGE DE RÉSULTATS) ──
|
||||||
function fetchFromFnac($ean) {
|
function fetchFromFnac($ean) {
|
||||||
$empty = emptyPhysicalResult();
|
$empty = emptyPhysicalResult();
|
||||||
$url = "https://www.fnac.com/SearchResult/ResultList.aspx?Search=" . urlencode($ean);
|
$url = "https://www.fnac.com/SearchResult/ResultList.aspx?Search=" . urlencode($ean);
|
||||||
|
$html = httpGet($url, 15);
|
||||||
$ch = curl_init($url);
|
|
||||||
curl_setopt_array($ch, [
|
|
||||||
CURLOPT_RETURNTRANSFER => true,
|
|
||||||
CURLOPT_TIMEOUT => 10,
|
|
||||||
CURLOPT_SSL_VERIFYPEER => false,
|
|
||||||
CURLOPT_FOLLOWLOCATION => true, // Important pour suivre la redirection vers la page produit
|
|
||||||
CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0',
|
|
||||||
]);
|
|
||||||
$html = curl_exec($ch);
|
|
||||||
$finalUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); // Récupère l'URL après redirection
|
|
||||||
curl_close($ch);
|
|
||||||
|
|
||||||
if (!$html) return $empty;
|
if (!$html) return $empty;
|
||||||
|
|
||||||
// ✅ CORRECTION : On vérifie si on a bien été redirigé vers une page produit
|
// 1. Essayer de trouver le JSON-LD (méthode la plus fiable)
|
||||||
// Si l'URL finale contient encore "SearchResult", c'est qu'on est sur la page de recherche
|
if (preg_match_all('/<script[^>]*type="application\/ld\+json"[^>]*>(.*?)<\/script>/is', $html, $jsonMatches)) {
|
||||||
// (avec les bannières sponsorisées type "Project Hail Mary"). On ignore pour éviter les pubs.
|
foreach ($jsonMatches[1] as $jsonBlock) {
|
||||||
if (strpos($finalUrl, 'SearchResult/ResultList.aspx') === false && strpos($finalUrl, 'SearchResult') === false) {
|
if (preg_match('/"@type"\s*:\s*"Product"[^}]*?"name"\s*:\s*"([^"]+)"/is', $jsonBlock, $m)) {
|
||||||
|
$empty['title'] = trim($m[1]);
|
||||||
// On est sur la page produit ! On récupère le titre via og:title (infaillible)
|
break;
|
||||||
if (preg_match('/<meta[^>]*property="og:title"[^>]*content="([^"]+)"/i', $html, $m)) {
|
}
|
||||||
$title = html_entity_decode(trim($m[1]), ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
if (preg_match('/"@type"\s*:\s*"ItemList".*?"name"\s*:\s*"([^"]+)"/is', $jsonBlock, $m)) {
|
||||||
// Nettoyage du titre FNAC qui peut contenir " | Fnac" à la fin
|
$empty['title'] = trim($m[1]);
|
||||||
$title = preg_replace('/\s*\|\s*Fnac.*$/i', '', $title);
|
break;
|
||||||
$empty['title'] = trim($title);
|
}
|
||||||
} elseif (preg_match('/<h1[^>]*class="[^"]*f-product__name[^"]*"[^>]*>([^<]+)<\/h1>/i', $html, $m)) {
|
|
||||||
$empty['title'] = trim(strip_tags($m[1]));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Récupération de l'image via og:image
|
|
||||||
if (preg_match('/<meta[^>]*property="og:image"[^>]*content="([^"]+)"/i', $html, $m)) {
|
|
||||||
$empty['poster'] = trim($m[1]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Si on est resté sur la page de recherche, on retourne un tableau vide.
|
|
||||||
// Le fallback Blu-ray.com prendra le relais proprement.
|
// 2. Si pas de JSON-LD, essayer les classes CSS courantes de la FNAC
|
||||||
|
if (empty($empty['title'])) {
|
||||||
|
if (preg_match('/<a[^>]*class="[^"]*js-ProductTitle[^"]*"[^>]*>([^<]+)<\/a>/i', $html, $m)) {
|
||||||
|
$empty['title'] = trim(strip_tags($m[1]));
|
||||||
|
} elseif (preg_match('/<h2[^>]*class="[^"]*f-product__name[^"]*"[^>]*>([^<]+)<\/h2>/i', $html, $m)) {
|
||||||
|
$empty['title'] = trim(strip_tags($m[1]));
|
||||||
|
} elseif (preg_match('/<a[^>]*class="[^"]*product-title[^"]*"[^>]*>([^<]+)<\/a>/i', $html, $m)) {
|
||||||
|
$empty['title'] = trim(strip_tags($m[1]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Fallback : chercher le premier lien qui pointe vers une page produit FNAC (/A\d+ ou /tp\d+)
|
||||||
|
if (empty($empty['title'])) {
|
||||||
|
if (preg_match('/<a[^>]*href="\/(A\d+|tp\d+)[^"]*"[^>]*title="([^"]+)"[^>]*>/i', $html, $m)) {
|
||||||
|
$empty['title'] = trim($m[2]);
|
||||||
|
} elseif (preg_match('/<a[^>]*href="\/(A\d+|tp\d+)[^"]*"[^>]*>([^<]+)<\/a>/i', $html, $m)) {
|
||||||
|
$empty['title'] = trim(strip_tags($m[2]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Nettoyage du titre
|
||||||
|
if (!empty($empty['title'])) {
|
||||||
|
$empty['title'] = html_entity_decode($empty['title'], ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
||||||
|
$empty['title'] = preg_replace('/\s*\|\s*Fnac.*$/i', '', $empty['title']);
|
||||||
|
$empty['title'] = trim($empty['title']);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Récupération de l'image
|
||||||
|
if (preg_match('/<meta[^>]*property="og:image"[^>]*content="([^"]+)"/i', $html, $m)) {
|
||||||
|
$empty['poster'] = trim($m[1]);
|
||||||
|
} elseif (preg_match('/<img[^>]*class="[^"]*js-ProductImage[^"]*"[^>]*src="([^"]+)"/i', $html, $m)) {
|
||||||
|
$empty['poster'] = trim($m[1]);
|
||||||
|
}
|
||||||
|
|
||||||
if (!empty($empty['title'])) {
|
if (!empty($empty['title'])) {
|
||||||
$empty['format'] = detectFormat($empty['title']);
|
$empty['format'] = detectFormat($empty['title']);
|
||||||
@@ -165,8 +178,82 @@ function fetchFromFnac($ean) {
|
|||||||
return $empty;
|
return $empty;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── SCRAPPING BLU-RAY.COM ──
|
// ── SCRAPPING BLU-RAY.COM (PAR TITRE) ──
|
||||||
function fetchFromBlurayCom($ean) {
|
function fetchFromBlurayComByTitle($title) {
|
||||||
|
static $lastRequest = 0;
|
||||||
|
$empty = emptyPhysicalResult();
|
||||||
|
if (empty($title)) return $empty;
|
||||||
|
|
||||||
|
$now = microtime(true);
|
||||||
|
if ($lastRequest > 0 && ($now - $lastRequest) < 3) {
|
||||||
|
usleep((int)((3 - ($now - $lastRequest)) * 1000000));
|
||||||
|
}
|
||||||
|
$lastRequest = microtime(true);
|
||||||
|
|
||||||
|
$cleanTitle = cleanTitle($title);
|
||||||
|
$searchUrl = "https://www.blu-ray.com/movies/search.php?keyword=" . urlencode($cleanTitle) . "&action=search";
|
||||||
|
|
||||||
|
$ch = curl_init($searchUrl);
|
||||||
|
curl_setopt_array($ch, [
|
||||||
|
CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 15, CURLOPT_CONNECTTIMEOUT => 5,
|
||||||
|
CURLOPT_SSL_VERIFYPEER => false, CURLOPT_FOLLOWLOCATION => true,
|
||||||
|
CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
|
||||||
|
CURLOPT_HTTPHEADER => ['Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language: fr-FR,fr;q=0.9', 'Referer: https://www.blu-ray.com/']
|
||||||
|
]);
|
||||||
|
$searchHtml = curl_exec($ch);
|
||||||
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||||
|
curl_close($ch);
|
||||||
|
|
||||||
|
if (!$searchHtml || $httpCode !== 200) return $empty;
|
||||||
|
|
||||||
|
if (!preg_match('/href="(https:\/\/www\.blu-ray\.com\/movies\/[^"]+\/(\d+)\/)"/i', $searchHtml, $matches)) {
|
||||||
|
return $empty;
|
||||||
|
}
|
||||||
|
|
||||||
|
$movieUrl = $matches[1];
|
||||||
|
sleep(2);
|
||||||
|
|
||||||
|
$ch2 = curl_init($movieUrl);
|
||||||
|
curl_setopt_array($ch2, [
|
||||||
|
CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 15, CURLOPT_CONNECTTIMEOUT => 5,
|
||||||
|
CURLOPT_SSL_VERIFYPEER => false, CURLOPT_FOLLOWLOCATION => true,
|
||||||
|
CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||||
|
CURLOPT_HTTPHEADER => ['Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Referer: https://www.blu-ray.com/']
|
||||||
|
]);
|
||||||
|
$movieHtml = curl_exec($ch2);
|
||||||
|
curl_close($ch2);
|
||||||
|
|
||||||
|
if (!$movieHtml) return $empty;
|
||||||
|
|
||||||
|
// On ne récupère PAS le titre ici pour éviter l'anglais, on garde celui de la FNAC/TMDB
|
||||||
|
|
||||||
|
if (preg_match('/<h3[^>]*>([^<]+)<\/h3>\s*(?: )?\((\d{4})\)/i', $movieHtml, $m)) $empty['year'] = $m[2];
|
||||||
|
if (preg_match('/href="[^"]*studioid=\d+[^"]*"[^>]*>([^<]+)<\/a>/i', $movieHtml, $m)) $empty['publisher'] = trim($m[1]);
|
||||||
|
if (preg_match('/(\d+)\s*min<\/span>/i', $movieHtml, $m)) $empty['length'] = $m[1] . ' min';
|
||||||
|
if (preg_match('/Aspect[\s-]*ratio:\s*([\d\.]+:[\d\.]+)/i', $movieHtml, $m)) $empty['aspect_ratio'] = trim($m[1]);
|
||||||
|
|
||||||
|
if (preg_match('/(\w+)-disc\s+set/i', $movieHtml, $m)) {
|
||||||
|
$wordToNum = ['single' => 1, 'one' => 1, 'two' => 2, 'three' => 3, 'four' => 4, 'five' => 5, 'six' => 6];
|
||||||
|
$empty['number_of_discs'] = $wordToNum[strtolower($m[1])] ?? 1;
|
||||||
|
} elseif (preg_match('/(\d+)-disc\s+set/i', $movieHtml, $m)) {
|
||||||
|
$empty['number_of_discs'] = (int)$m[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strpos($movieUrl, '/4k/') !== false || stripos($movieHtml, '4K Ultra HD') !== false) $empty['format'] = '4K Ultra HD';
|
||||||
|
elseif (strpos($movieUrl, '/3d/') !== false) $empty['format'] = '3D Blu-ray';
|
||||||
|
else $empty['format'] = 'Blu-ray';
|
||||||
|
|
||||||
|
if (preg_match('/src="(https:\/\/images\.static-bluray\.com\/movies\/covers\/\d+_front\.jpg[^"]*)"/i', $movieHtml, $m)) {
|
||||||
|
$empty['poster'] = $m[1];
|
||||||
|
} elseif (preg_match('/<img[^>]*class="coverfront"[^>]*src="([^"]+)"/i', $movieHtml, $m)) {
|
||||||
|
$empty['poster'] = preg_replace('/_large\.jpg/', '_front.jpg', $m[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $empty;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── SCRAPPING BLU-RAY.COM (PAR EAN - FALLBACK) ──
|
||||||
|
function fetchFromBlurayComByEan($ean) {
|
||||||
static $lastRequest = 0;
|
static $lastRequest = 0;
|
||||||
$empty = emptyPhysicalResult();
|
$empty = emptyPhysicalResult();
|
||||||
$ean = preg_replace('/[^0-9]/', '', (string)$ean);
|
$ean = preg_replace('/[^0-9]/', '', (string)$ean);
|
||||||
@@ -237,21 +324,6 @@ function fetchFromBlurayCom($ean) {
|
|||||||
$empty['poster'] = preg_replace('/_large\.jpg/', '_front.jpg', $m[1]);
|
$empty['poster'] = preg_replace('/_large\.jpg/', '_front.jpg', $m[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (preg_match('/<div[^>]*id="movie_info"[^>]*>(.*?)<div[^>]*id="movie_review_intro"/is', $movieHtml, $infoBlock)) {
|
|
||||||
$infoHtml = $infoBlock[1];
|
|
||||||
if (preg_match('/<font[^>]*><br><\/font>\s*(.*?)<br><br><br>Directors:/is', $infoHtml, $m)) {
|
|
||||||
$empty['description'] = trim(preg_replace('/\s+/', ' ', strip_tags($m[1])));
|
|
||||||
}
|
|
||||||
if (preg_match('/Directors?:\s*(.*?)(?:<br>|<\/div>)/is', $infoHtml, $m)) {
|
|
||||||
preg_match_all('/<a[^>]*>([^<]+)<\/a>/i', $m[1], $dirMatches);
|
|
||||||
if (!empty($dirMatches[1])) $empty['director'] = implode(', ', array_map('trim', array_slice($dirMatches[1], 0, 2)));
|
|
||||||
}
|
|
||||||
if (preg_match('/Starring:\s*(.*?)(?:<br>|<\/div>)/is', $infoHtml, $m)) {
|
|
||||||
preg_match_all('/<a[^>]*>([^<]+)<\/a>/i', $m[1], $actorMatches);
|
|
||||||
if (!empty($actorMatches[1])) $empty['actors'] = implode(', ', array_map('trim', array_slice($actorMatches[1], 0, 6)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $empty;
|
return $empty;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -346,33 +418,46 @@ function fetchFromMovieCovers($title, $year = '') {
|
|||||||
return $empty;
|
return $empty;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── AGGREGATEUR PHYSIQUE (FNAC -> BLU-RAY.COM) ──
|
// ── AGGREGATEUR PHYSIQUE (FNAC -> BLU-RAY.COM -> MOVIECOVERS) ──
|
||||||
function fetchPhysicalByEan($ean, $pdo = null) {
|
function fetchPhysicalByEan($ean, $pdo = null) {
|
||||||
// 1. Tenter la FNAC (Titre français garanti)
|
// 1. Tenter la FNAC pour avoir le titre français
|
||||||
$fnacData = fetchFromFnac($ean);
|
$fnacData = fetchFromFnac($ean);
|
||||||
if (!empty($fnacData['title'])) {
|
$title = $fnacData['title'] ?? '';
|
||||||
$res = $fnacData;
|
$res = $fnacData;
|
||||||
$mc = fetchFromMovieCovers($res['title'], $res['year']);
|
|
||||||
if (!empty($mc['poster'])) $res['poster'] = $mc['poster'];
|
// 2. Chercher les détails techniques sur Blu-ray.com
|
||||||
if (!empty($mc['director'])) $res['director'] = $mc['director'];
|
$blurayData = [];
|
||||||
if (!empty($mc['actors'])) $res['actors'] = $mc['actors'];
|
if (!empty($title)) {
|
||||||
if (!empty($mc['description'])) $res['description'] = $mc['description'];
|
$blurayData = fetchFromBlurayComByTitle($title);
|
||||||
return $res;
|
|
||||||
}
|
}
|
||||||
|
if (empty($blurayData) && empty($title)) {
|
||||||
// 2. Fallback sur Blu-ray.com
|
// Fallback : chercher sur Blu-ray.com par EAN
|
||||||
$blurayData = fetchFromBlurayCom($ean);
|
$blurayData = fetchFromBlurayComByEan($ean);
|
||||||
if (!empty($blurayData['title'])) {
|
$title = $blurayData['title'] ?? '';
|
||||||
$res = $blurayData;
|
$res = $blurayData;
|
||||||
$mc = fetchFromMovieCovers($res['title'], $res['year']);
|
|
||||||
if (!empty($mc['poster'])) $res['poster'] = $mc['poster'];
|
|
||||||
if (!empty($mc['director'])) $res['director'] = $mc['director'];
|
|
||||||
if (!empty($mc['actors'])) $res['actors'] = $mc['actors'];
|
|
||||||
if (!empty($mc['description'])) $res['description'] = $mc['description'];
|
|
||||||
return $res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return emptyPhysicalResult();
|
if (!empty($blurayData)) {
|
||||||
|
// Les détails techniques de Blu-ray.com sont prioritaires
|
||||||
|
if (!empty($blurayData['length'])) $res['length'] = $blurayData['length'];
|
||||||
|
if (!empty($blurayData['number_of_discs'])) $res['number_of_discs'] = $blurayData['number_of_discs'];
|
||||||
|
if (!empty($blurayData['aspect_ratio'])) $res['aspect_ratio'] = $blurayData['aspect_ratio'];
|
||||||
|
if (!empty($blurayData['publisher'])) $res['publisher'] = $blurayData['publisher'];
|
||||||
|
if (!empty($blurayData['format'])) $res['format'] = $blurayData['format'];
|
||||||
|
if (!empty($blurayData['poster'])) $res['poster'] = $blurayData['poster'];
|
||||||
|
if (!empty($blurayData['year'])) $res['year'] = $blurayData['year'];
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Compléter avec MovieCovers pour la jaquette HD FR et métadonnées FR
|
||||||
|
if (!empty($title)) {
|
||||||
|
$mc = fetchFromMovieCovers($title, $res['year'] ?? '');
|
||||||
|
if (!empty($mc['poster'])) $res['poster'] = $mc['poster'];
|
||||||
|
if (!empty($mc['director'])) $res['director'] = $mc['director'];
|
||||||
|
if (!empty($mc['actors'])) $res['actors'] = $mc['actors'];
|
||||||
|
if (!empty($mc['description'])) $res['description'] = $mc['description'];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $res;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── ROUTEUR PRINCIPAL ──
|
// ── ROUTEUR PRINCIPAL ──
|
||||||
|
|||||||
Reference in New Issue
Block a user