Actualiser api.php
This commit is contained in:
@@ -116,7 +116,7 @@ function emptyPhysicalResult() {
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── NOUVELLE FONCTION : SCRAPPING GO-UPC.COM (MODIFIÉE) ──
|
// ── NOUVELLE FONCTION : SCRAPPING GO-UPC.COM (CORRIGÉE) ──
|
||||||
function fetchFromGoUpc($ean) {
|
function fetchFromGoUpc($ean) {
|
||||||
$empty = emptyPhysicalResult();
|
$empty = emptyPhysicalResult();
|
||||||
$url = "https://go-upc.com/search?q=" . urlencode($ean);
|
$url = "https://go-upc.com/search?q=" . urlencode($ean);
|
||||||
@@ -128,21 +128,45 @@ function fetchFromGoUpc($ean) {
|
|||||||
$empty['title'] = trim(strip_tags($m[1]));
|
$empty['title'] = trim(strip_tags($m[1]));
|
||||||
} elseif (preg_match('/<title[^>]*>([^<]+)<\/title>/i', $html, $m)) {
|
} elseif (preg_match('/<title[^>]*>([^<]+)<\/title>/i', $html, $m)) {
|
||||||
$title = trim($m[1]);
|
$title = trim($m[1]);
|
||||||
|
$title = preg_replace('/\s*[-–]\s*EAN\s+\d+\s*[-–]\s*Go-UPC.*$/i', '', $title);
|
||||||
$title = preg_replace('/\s*[-–]\s*Go-UPC.*$/i', '', $title);
|
$title = preg_replace('/\s*[-–]\s*Go-UPC.*$/i', '', $title);
|
||||||
$empty['title'] = $title;
|
$empty['title'] = $title;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ✅ RÉCUPÉRATION DE L'IMAGE DEPUIS GO-UPC
|
// ✅ RÉCUPÉRATION DE L'IMAGE DEPUIS GO-UPC (VERSION ROBUSTE)
|
||||||
// Chercher dans les figures product-image
|
// 1. Chercher directement l'URL Amazon S3 de go-upc (plus fiable)
|
||||||
if (preg_match('/<figure[^>]*class="product-image[^"]*"[^>]*>.*?<img[^>]*src="([^"]+\.(?:jpg|jpeg|png|webp))"/is', $html, $m)) {
|
if (preg_match('/src="(https:\/\/go-upc\.s3\.amazonaws\.com\/images\/[^"]+)"/i', $html, $m)) {
|
||||||
$empty['poster'] = trim($m[1]);
|
$empty['poster'] = trim($m[1]);
|
||||||
} elseif (preg_match('/<img[^>]*src="([^"]+go-upc\.s3\.amazonaws\.com[^"]+)"/i', $html, $m)) {
|
}
|
||||||
|
// 2. Fallback : chercher dans les figures product-image
|
||||||
|
elseif (preg_match('/<figure[^>]*class="product-image[^"]*"[^>]*>.*?<img[^>]*src="([^"]+)"/is', $html, $m)) {
|
||||||
|
$empty['poster'] = trim($m[1]);
|
||||||
|
}
|
||||||
|
// 3. Fallback : chercher toute image de go-upc.s3.amazonaws.com
|
||||||
|
elseif (preg_match('/src="([^"]+go-upc\.s3\.amazonaws\.com[^"]+)"/i', $html, $m)) {
|
||||||
$empty['poster'] = trim($m[1]);
|
$empty['poster'] = trim($m[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Nettoyage du titre
|
// Nettoyage du titre
|
||||||
if (!empty($empty['title'])) {
|
if (!empty($empty['title'])) {
|
||||||
$empty['title'] = html_entity_decode($empty['title'], ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
$empty['title'] = html_entity_decode($empty['title'], ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
||||||
|
|
||||||
|
// ✅ APPLIQUER cleanTitle() pour supprimer les suffixes comme "- DVD", "- Édition Collector"
|
||||||
|
$empty['title'] = cleanTitle($empty['title']);
|
||||||
|
|
||||||
|
// ✅ Supprimer le nom du réalisateur au début si présent
|
||||||
|
// Pattern : "Prénom Nom Titre du film" → "Titre du film"
|
||||||
|
if (preg_match('/^([A-ZÀ-Ÿ][a-zà-ÿ]+\s+[A-ZÀ-Ÿ][a-zà-ÿ]+)\s+(.+)$/u', $empty['title'], $nameMatch)) {
|
||||||
|
$possibleDirector = $nameMatch[1];
|
||||||
|
$possibleTitle = $nameMatch[2];
|
||||||
|
|
||||||
|
// Vérifier si le "titre" restant semble être un vrai titre (pas juste un mot)
|
||||||
|
if (strlen($possibleTitle) > 3 && !preg_match('/^(DVD|Blu-ray|4K|VHS|CD|Vinyl)$/i', $possibleTitle)) {
|
||||||
|
$empty['director'] = $possibleDirector;
|
||||||
|
$empty['title'] = $possibleTitle;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$empty['title'] = trim($empty['title']);
|
$empty['title'] = trim($empty['title']);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user