Actualiser api.php
This commit is contained in:
@@ -105,191 +105,203 @@ function extractYear($dateStr) {
|
||||
return '';
|
||||
}
|
||||
|
||||
// ── API DVDFr (SANS CACHE - Scraping HTML) ──
|
||||
// ── API DVDFr (réécriture complète) ──
|
||||
function fetchDVDFr($ean, $pdo) {
|
||||
if (empty($ean) || strlen($ean) < 8) return null;
|
||||
if (empty($ean) || strlen((string)$ean) < 8) return null;
|
||||
|
||||
// 🔥 SUPPRESSION DES APPELS À getCache() QUI N'EXISTE PLUS
|
||||
$ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36';
|
||||
$baseHeaders = [
|
||||
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language: fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
|
||||
'Accept-Encoding: gzip, deflate, br',
|
||||
'Connection: keep-alive',
|
||||
];
|
||||
|
||||
$ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
||||
// ── Helpers internes ──
|
||||
$curlGet = function(string $url) use ($ua, $baseHeaders): ?string {
|
||||
$ch = curl_init($url);
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_TIMEOUT => 10,
|
||||
CURLOPT_CONNECTTIMEOUT => 5,
|
||||
CURLOPT_SSL_VERIFYPEER => false,
|
||||
CURLOPT_USERAGENT => $ua,
|
||||
CURLOPT_FOLLOWLOCATION => true,
|
||||
CURLOPT_MAXREDIRS => 5,
|
||||
CURLOPT_ENCODING => '', // décompression auto (gzip, br…)
|
||||
CURLOPT_HTTPHEADER => $baseHeaders,
|
||||
CURLOPT_COOKIEFILE => '', // active le jar de cookies en mémoire
|
||||
]);
|
||||
$body = curl_exec($ch);
|
||||
$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
curl_close($ch);
|
||||
if (!$body || $code !== 200) {
|
||||
error_log("DVDFr curlGet: HTTP $code pour $url");
|
||||
return null;
|
||||
}
|
||||
return $body;
|
||||
};
|
||||
|
||||
// Étape 1 : Recherche via le site DVDfr (page HTML)
|
||||
$searchUrl = "https://www.dvdfr.com/search/?q=" . urlencode($ean);
|
||||
$absoluteUrl = function(string $src): string {
|
||||
if (strpos($src, 'http') === 0) return $src;
|
||||
if (strpos($src, '//') === 0) return 'https:' . $src;
|
||||
if (strpos($src, '/') === 0) return 'https://www.dvdfr.com' . $src;
|
||||
return 'https://www.dvdfr.com/' . $src;
|
||||
};
|
||||
|
||||
$ch = curl_init($searchUrl);
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_TIMEOUT => 8,
|
||||
CURLOPT_CONNECTTIMEOUT => 5,
|
||||
CURLOPT_SSL_VERIFYPEER => false,
|
||||
CURLOPT_USERAGENT => $ua,
|
||||
CURLOPT_FOLLOWLOCATION => true,
|
||||
CURLOPT_HTTPHEADER => [
|
||||
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language: fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
|
||||
],
|
||||
]);
|
||||
$html = curl_exec($ch);
|
||||
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
curl_close($ch);
|
||||
|
||||
if (!$html || $httpCode !== 200) {
|
||||
error_log("DVDFr: Échec recherche HTML - HTTP $httpCode");
|
||||
// ── ÉTAPE 1 : trouver l'URL de la fiche via la recherche ──
|
||||
$searchHtml = $curlGet('https://www.dvdfr.com/search/?q=' . urlencode($ean));
|
||||
if (!$searchHtml) {
|
||||
error_log("DVDFr: échec de la page de recherche pour EAN $ean");
|
||||
return null;
|
||||
}
|
||||
|
||||
// Étape 2 : Extraire le lien vers la fiche du film
|
||||
$dvdUrl = null;
|
||||
|
||||
if (preg_match('/<a[^>]+href=["\']([^"\']+\.html)["\'][^>]*class=["\'][^"\']*result[^"\']*["\'][^>]*>/i', $html, $matches)) {
|
||||
$dvdUrl = $matches[1];
|
||||
if (strpos($dvdUrl, 'http') !== 0) {
|
||||
$dvdUrl = 'https://www.dvdfr.com' . $dvdUrl;
|
||||
// Patterns par ordre de priorité
|
||||
$ficheUrl = null;
|
||||
$patterns = [
|
||||
// lien direct vers une fiche /dvd/ ou /blu-ray/ contenant l'EAN dans l'URL
|
||||
'@href=["\']([^"\']*(?:dvd|blu-ray|4k|vhs|cd|coffret)[^"\']+\.html)["\']@i',
|
||||
// toute fiche .html dans le domaine dvdfr.com
|
||||
'@href=["\'](?:https?://(?:www\.)?dvdfr\.com)?(/[^"\']+\.html)["\']@i',
|
||||
];
|
||||
foreach ($patterns as $pattern) {
|
||||
if (preg_match($pattern, $searchHtml, $m)) {
|
||||
$ficheUrl = $absoluteUrl($m[1]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$dvdUrl && preg_match('/href=["\']([^"\']*' . preg_quote($ean, '/') . '[^"\']*\.html)["\']/i', $html, $matches)) {
|
||||
$dvdUrl = $matches[1];
|
||||
if (strpos($dvdUrl, 'http') !== 0) {
|
||||
$dvdUrl = 'https://www.dvdfr.com' . $dvdUrl;
|
||||
}
|
||||
if (!$ficheUrl) {
|
||||
error_log("DVDFr: aucune fiche trouvée pour EAN $ean");
|
||||
return null;
|
||||
}
|
||||
error_log("DVDFr: fiche → $ficheUrl");
|
||||
|
||||
if (!$dvdUrl && preg_match('/<a[^>]+href=["\'](https:\/\/www\.dvdfr\.com\/(?:dvd|blu-ray)\/[^"\']+\.html)["\']/i', $html, $matches)) {
|
||||
$dvdUrl = $matches[1];
|
||||
}
|
||||
|
||||
if (!$dvdUrl) {
|
||||
error_log("DVDFr: Aucune fiche trouvée pour EAN $ean");
|
||||
// ── ÉTAPE 2 : charger la fiche ──
|
||||
$html = $curlGet($ficheUrl);
|
||||
if (!$html) {
|
||||
error_log("DVDFr: impossible de charger la fiche $ficheUrl");
|
||||
return null;
|
||||
}
|
||||
|
||||
error_log("DVDFr: Fiche trouvée - $dvdUrl");
|
||||
// ── ÉTAPE 3 : extraction des données ──
|
||||
$result = [
|
||||
'poster' => '',
|
||||
'title' => '',
|
||||
'publisher' => '',
|
||||
'format' => '',
|
||||
'length' => '',
|
||||
'aspect' => '',
|
||||
'discs' => '',
|
||||
];
|
||||
|
||||
// Étape 3 : Récupérer la fiche complète
|
||||
$ch2 = curl_init($dvdUrl);
|
||||
curl_setopt_array($ch2, [
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_TIMEOUT => 8,
|
||||
CURLOPT_CONNECTTIMEOUT => 5,
|
||||
CURLOPT_SSL_VERIFYPEER => false,
|
||||
CURLOPT_USERAGENT => $ua,
|
||||
CURLOPT_FOLLOWLOCATION => true,
|
||||
CURLOPT_HTTPHEADER => [
|
||||
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language: fr-FR,fr;q=0.9',
|
||||
],
|
||||
]);
|
||||
$ficheHtml = curl_exec($ch2);
|
||||
curl_close($ch2);
|
||||
|
||||
if (!$ficheHtml) {
|
||||
error_log("DVDFr: Impossible de charger la fiche");
|
||||
return null;
|
||||
// --- 3a. AFFICHE ---
|
||||
// Priorité 1 : og:image (le plus fiable)
|
||||
if (preg_match('/<meta[^>]+property=["\']og:image["\'][^>]+content=["\']([^"\']+)["\']/i', $html, $m) ||
|
||||
preg_match('/<meta[^>]+content=["\']([^"\']+)["\'][^>]+property=["\']og:image["\']/i', $html, $m)) {
|
||||
$result['poster'] = $m[1];
|
||||
error_log("DVDFr: affiche via og:image → " . $result['poster']);
|
||||
}
|
||||
|
||||
// Étape 4 : Extraire les données depuis le HTML
|
||||
$result = [
|
||||
'poster' => '',
|
||||
'publisher' => '',
|
||||
'format' => '',
|
||||
'length' => '',
|
||||
'aspect' => '',
|
||||
'discs' => '',
|
||||
];
|
||||
|
||||
// 🔥 EXTRACTION ROBUSTE DE L'AFFICHE (plusieurs méthodes fallback)
|
||||
// Méthode 1 : Chercher toutes les images et filtrer par taille/URL
|
||||
preg_match_all('/<img[^>]+src=["\']([^"\']+)["\'][^>]*>/i', $ficheHtml, $allImages);
|
||||
if (!empty($allImages[1])) {
|
||||
foreach ($allImages[1] as $imgUrl) {
|
||||
// Filtrer les images qui ressemblent à une jaquette
|
||||
if (preg_match('/(cover|jaquette|pochette|affiche|poster|front)/i', $imgUrl) ||
|
||||
preg_match('/\.(jpg|jpeg|png|webp)$/i', $imgUrl)) {
|
||||
// Vérifier que c'est bien une URL complète
|
||||
if (strpos($imgUrl, 'http') === 0) {
|
||||
$result['poster'] = $imgUrl;
|
||||
error_log("DVDFr: Affiche trouvée (méthode 1) - $imgUrl");
|
||||
break;
|
||||
} elseif (strpos($imgUrl, '//') === 0) {
|
||||
$result['poster'] = 'https:' . $imgUrl;
|
||||
error_log("DVDFr: Affiche trouvée (méthode 1b) - " . $result['poster']);
|
||||
break;
|
||||
} elseif (strpos($imgUrl, '/') === 0) {
|
||||
$result['poster'] = 'https://www.dvdfr.com' . $imgUrl;
|
||||
error_log("DVDFr: Affiche trouvée (méthode 1c) - " . $result['poster']);
|
||||
// Priorité 2 : JSON-LD
|
||||
if (empty($result['poster'])) {
|
||||
preg_match_all('/<script[^>]+type=["\']application\/ld\+json["\'][^>]*>(.*?)<\/script>/is', $html, $scripts);
|
||||
foreach ($scripts[1] as $raw) {
|
||||
$json = json_decode($raw, true);
|
||||
if (!$json) continue;
|
||||
$img = $json['image'] ?? null;
|
||||
if ($img) {
|
||||
$result['poster'] = is_array($img) ? $img[0] : $img;
|
||||
error_log("DVDFr: affiche via JSON-LD → " . $result['poster']);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Méthode 2 : Chercher dans les balises meta (Open Graph)
|
||||
if (empty($result['poster']) && preg_match('/<meta[^>]+property=["\']og:image["\'][^>]+content=["\']([^"\']+)["\']/i', $ficheHtml, $matches)) {
|
||||
$result['poster'] = $matches[1];
|
||||
error_log("DVDFr: Affiche trouvée (méthode 2 - og:image) - " . $result['poster']);
|
||||
}
|
||||
// Priorité 3 : image dans le bloc principal de la fiche (div.cover, div.product, etc.)
|
||||
if (empty($result['poster'])) {
|
||||
// Chercher dans un conteneur typique d'une fiche produit
|
||||
$coverBlock = '';
|
||||
if (preg_match('/<(?:div|figure|section)[^>]+(?:class|id)=["\'][^"\']*(?:cover|product|fiche|jaquette|packshot)[^"\']*["\'][^>]*>(.*?)<\/(?:div|figure|section)>/is', $html, $m)) {
|
||||
$coverBlock = $m[1];
|
||||
}
|
||||
$searchArea = $coverBlock ?: $html;
|
||||
|
||||
// Méthode 3 : Chercher dans les balises link (rel="image_src")
|
||||
if (empty($result['poster']) && preg_match('/<link[^>]+rel=["\']image_src["\'][^>]+href=["\']([^"\']+)["\']/i', $ficheHtml, $matches)) {
|
||||
$result['poster'] = $matches[1];
|
||||
error_log("DVDFr: Affiche trouvée (méthode 3 - link image_src) - " . $result['poster']);
|
||||
}
|
||||
|
||||
// Méthode 4 : Chercher dans les données JSON-LD (structured data)
|
||||
if (empty($result['poster']) && preg_match('/<script[^>]+type=["\']application\/ld\+json["\'][^>]*>([^<]+)<\/script>/i', $ficheHtml, $matches)) {
|
||||
$jsonData = json_decode($matches[1], true);
|
||||
if ($jsonData && isset($jsonData['image'])) {
|
||||
$result['poster'] = is_array($jsonData['image']) ? $jsonData['image'][0] : $jsonData['image'];
|
||||
error_log("DVDFr: Affiche trouvée (méthode 4 - JSON-LD) - " . $result['poster']);
|
||||
preg_match_all('/<img[^>]+(?:src|data-src|data-lazy-src|data-original)=["\']([^"\']+)["\'][^>]*>/i', $searchArea, $imgs);
|
||||
foreach ($imgs[1] as $src) {
|
||||
// Exclusions : icônes, logos, UI, bannières pub
|
||||
if (preg_match('/(icon|logo|sprite|blank|spacer|pixel|tracking|banner|ad|pub|star|note|flag|arrow|btn|button|nav)/i', $src)) continue;
|
||||
// Doit avoir une extension image
|
||||
if (!preg_match('/\.(jpe?g|png|webp)(\?[^"\']*)?$/i', $src)) continue;
|
||||
// Exclure les data-URI
|
||||
if (strpos($src, 'data:') === 0) continue;
|
||||
$result['poster'] = $absoluteUrl($src);
|
||||
error_log("DVDFr: affiche via img scan → " . $result['poster']);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extraction de l'éditeur (plusieurs méthodes)
|
||||
if (preg_match('/(?:éditeur|distributeur|studio)\s*[:<\/]>\s*([^<]+)/i', $ficheHtml, $matches)) {
|
||||
$result['publisher'] = trim(strip_tags($matches[1]));
|
||||
} elseif (preg_match('/<span[^>]+class=["\'][^"\']*publisher[^"\']*["\'][^>]*>([^<]+)<\/span>/i', $ficheHtml, $matches)) {
|
||||
$result['publisher'] = trim(strip_tags($matches[1]));
|
||||
}
|
||||
// --- 3b. TITRE ---
|
||||
// og:title
|
||||
if (preg_match('/<meta[^>]+property=["\']og:title["\'][^>]+content=["\']([^"\']+)["\']/i', $html, $m) ||
|
||||
preg_match('/<meta[^>]+content=["\']([^"\']+)["\'][^>]+property=["\']og:title["\']/i', $html, $m)) {
|
||||
$result['title'] = html_entity_decode(trim($m[1]), ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
||||
}
|
||||
// fallback <h1>
|
||||
if (empty($result['title']) && preg_match('/<h1[^>]*>([^<]+)<\/h1>/i', $html, $m)) {
|
||||
$result['title'] = html_entity_decode(trim(strip_tags($m[1])), ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
||||
}
|
||||
|
||||
// Extraction du format
|
||||
if (preg_match('/(4k\s*ultra\s*hd|ultra\s*hd|blu[\s-]?ray|dvd|coffret)/i', $ficheHtml, $matches)) {
|
||||
$format = strtoupper(trim($matches[1]));
|
||||
if (strpos($format, '4K') !== false || strpos($format, 'ULTRA') !== false) {
|
||||
// --- 3c. MÉTADONNÉES : éditeur, format, durée, ratio, disques ---
|
||||
// Nettoyeur de valeur brute
|
||||
$clean = fn(string $s): string => trim(html_entity_decode(strip_tags($s), ENT_QUOTES | ENT_HTML5, 'UTF-8'));
|
||||
|
||||
// Éditeur
|
||||
$publisherPatterns = [
|
||||
'@(?:diteur|distributeur|studio|label)\s*</?\w*>\s*:?\s*</?\w*>\s*([^<]{2,60})@i',
|
||||
'@(?:diteur|distributeur)\s*[:<]([^<]{2,60})@i',
|
||||
'@<[^>]+class=["\'][^"\']*(?:publisher|editor|editeur)[^"\']*["\'][^>]*>\s*([^<]{2,60})\s*</@i',
|
||||
];
|
||||
foreach ($publisherPatterns as $pat) {
|
||||
if (preg_match($pat, $html, $m)) {
|
||||
$val = $clean($m[1]);
|
||||
if (strlen($val) > 1) { $result['publisher'] = $val; break; }
|
||||
}
|
||||
}
|
||||
|
||||
// Format
|
||||
if (preg_match('/\b(4[Kk]\s*[Uu]ltra\s*[Hh][Dd]|Ultra\s*HD|UHD)\b/', $html)) {
|
||||
$result['format'] = '4K Ultra HD';
|
||||
} elseif (strpos($format, 'BLU') !== false) {
|
||||
} elseif (preg_match('/\b(Blu-?ray)\b/i', $html)) {
|
||||
$result['format'] = 'Blu-ray';
|
||||
} elseif (strpos($format, 'DVD') !== false) {
|
||||
} elseif (preg_match('/\bDVD\b/i', $html)) {
|
||||
$result['format'] = 'DVD';
|
||||
} elseif (strpos($format, 'COFFRET') !== false) {
|
||||
$result['format'] = 'Coffret';
|
||||
} elseif (preg_match('/\bVHS\b/i', $html)) {
|
||||
$result['format'] = 'VHS';
|
||||
}
|
||||
}
|
||||
|
||||
// Extraction de la durée
|
||||
if (preg_match('/(?:durée|duree|duration)\s*[:<\/]>\s*(\d+)\s*(?:min|mn|h)/i', $ficheHtml, $matches)) {
|
||||
$result['length'] = trim($matches[1]) . ' min';
|
||||
}
|
||||
// Durée
|
||||
if (preg_match('/(?:dur[ée]{1,2}e?|duration)\s*[:<\/]?\s*(\d{1,4})\s*(?:min(?:utes?)?|mn)/i', $html, $m)) {
|
||||
$result['length'] = $m[1] . ' min';
|
||||
} elseif (preg_match('/(\d{1,2})h\s*(\d{2})\s*(?:min)?/i', $html, $m)) {
|
||||
$result['length'] = ((int)$m[1] * 60 + (int)$m[2]) . ' min';
|
||||
}
|
||||
|
||||
// Extraction de l'aspect ratio
|
||||
if (preg_match('/(?:format\s*image|aspect\s*ratio|ratio)\s*[:<\/]>\s*([0-9.]+\s*[:\.]\s*[0-9.]+)/i', $ficheHtml, $matches)) {
|
||||
$result['aspect'] = trim($matches[1]);
|
||||
}
|
||||
// Ratio d'aspect
|
||||
if (preg_match('/(?:format\s*(?:image|vid[eé]o)|aspect\s*ratio|ratio)\s*[:<]?\s*([\d,. ]+[:×xX/][\d,. ]+)/i', $html, $m)) {
|
||||
$result['aspect'] = trim($m[1]);
|
||||
}
|
||||
|
||||
// Extraction du nombre de disques
|
||||
if (preg_match('/(?:nombre\s*de\s*disques?|disques?|nb\s*disques?)\s*[:<\/]>\s*(\d+)/i', $ficheHtml, $matches)) {
|
||||
$result['discs'] = trim($matches[1]);
|
||||
}
|
||||
// Nombre de disques
|
||||
if (preg_match('/(?:nombre\s*de\s*disques?|nb\.?\s*disques?|disques?)\s*[:<]?\s*(\d+)/i', $html, $m)) {
|
||||
$result['discs'] = (int)$m[1];
|
||||
}
|
||||
|
||||
// Nettoyage des données
|
||||
$result = array_map(function($val) {
|
||||
return trim(strip_tags(html_entity_decode($val, ENT_QUOTES | ENT_HTML5, 'UTF-8')));
|
||||
}, $result);
|
||||
error_log("DVDFr: résultat final → " . json_encode($result, JSON_UNESCAPED_UNICODE));
|
||||
|
||||
// 🔥 LOG DE DÉBOGAGE
|
||||
error_log("DVDFr: Données finales - " . json_encode($result));
|
||||
|
||||
return (!empty($result['poster']) || !empty($result['publisher'])) ? $result : null;
|
||||
// Retourner null si rien d'utile récupéré
|
||||
$hasData = !empty($result['poster']) || !empty($result['publisher']) || !empty($result['title']);
|
||||
return $hasData ? $result : null;
|
||||
}
|
||||
|
||||
// ── 2. API TMDB (SANS CACHE - TITRE FRANÇAIS) ──
|
||||
|
||||
Reference in New Issue
Block a user