add images updater from url
This commit is contained in:
@ -3,19 +3,13 @@ package fr.tetelie.crawler;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.SQLException;
|
||||
|
||||
public class Crawler {
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
new WebScrapper();
|
||||
|
||||
new DatabaseConfig();
|
||||
boolean isConnected = DatabaseConfig.getInstance().connect();
|
||||
@ -23,7 +17,9 @@ public class Crawler {
|
||||
|
||||
if(!isConnected){return;};
|
||||
|
||||
String url = "https://www.amazon.fr/Victool-temp%C3%A9rature-professionnel-r%C3%A9paration-%C3%A9lectronique/dp/B0FP2D7TBY/?_encoding=UTF8&pd_rd_w=UWXHj&content-id=amzn1.sym.5633189b-a269-4b24-8a80-52a48568a326%3Aamzn1.symc.752cde0b-d2ce-4cce-9121-769ea438869e&pf_rd_p=5633189b-a269-4b24-8a80-52a48568a326&pf_rd_r=G56TKFERTQ9WS62C7WY4&pd_rd_wg=mtTBQ&pd_rd_r=519dfa29-c58c-41b3-89ca-4d01e27bfc2e&ref_=pd_hp_d_atf_ci_mcx_mr_ca_hp_atf_d";
|
||||
DatabaseConfig.getInstance().updatesAllMissingImages();
|
||||
|
||||
/*String url = "https://www.amazon.fr/Victool-temp%C3%A9rature-professionnel-r%C3%A9paration-%C3%A9lectronique/dp/B0FP2D7TBY/?_encoding=UTF8&pd_rd_w=UWXHj&content-id=amzn1.sym.5633189b-a269-4b24-8a80-52a48568a326%3Aamzn1.symc.752cde0b-d2ce-4cce-9121-769ea438869e&pf_rd_p=5633189b-a269-4b24-8a80-52a48568a326&pf_rd_r=G56TKFERTQ9WS62C7WY4&pd_rd_wg=mtTBQ&pd_rd_r=519dfa29-c58c-41b3-89ca-4d01e27bfc2e&ref_=pd_hp_d_atf_ci_mcx_mr_ca_hp_atf_d";
|
||||
|
||||
try {
|
||||
// IMPORTANT : Amazon bloque les requêtes sans "User-Agent"
|
||||
@ -56,6 +52,6 @@ public class Crawler {
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}*/
|
||||
}
|
||||
}
|
||||
|
||||
@ -2,10 +2,7 @@ package fr.tetelie.crawler;
|
||||
|
||||
import io.github.cdimascio.dotenv.Dotenv;
|
||||
|
||||
import javax.xml.crypto.Data;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.*;
|
||||
|
||||
public class DatabaseConfig {
|
||||
|
||||
@ -15,6 +12,8 @@ public class DatabaseConfig {
|
||||
private String dbUser;
|
||||
private String dbPass;
|
||||
|
||||
public Connection connection;
|
||||
|
||||
public static DatabaseConfig getInstance() {
|
||||
return instance;
|
||||
|
||||
@ -38,20 +37,70 @@ public class DatabaseConfig {
|
||||
System.out.println("Configuration chargée avec succès !");
|
||||
}
|
||||
|
||||
public boolean connect()
|
||||
{
|
||||
public boolean connect() {
|
||||
System.out.println("Tentative de connexion à la base de données...");
|
||||
|
||||
try (Connection connection = DriverManager.getConnection(DatabaseConfig.getInstance().dbUrl, DatabaseConfig.getInstance().dbUser, DatabaseConfig.getInstance().dbPass)) {
|
||||
if (connection != null) {
|
||||
try {
|
||||
// On assigne directement à la variable de classe
|
||||
this.connection = DriverManager.getConnection(dbUrl, dbUser, dbPass);
|
||||
|
||||
if (this.connection != null && !this.connection.isClosed()) {
|
||||
System.out.println("✅ SUCCÈS : Connexion établie avec brio !");
|
||||
System.out.println("Serveur distant : " + connection.getMetaData().getDatabaseProductName());
|
||||
return true;
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
System.err.println("❌ ÉCHEC : Impossible de se connecter.");
|
||||
System.err.println("Erreur : " + e.getMessage());
|
||||
return false;
|
||||
System.err.println("❌ ÉCHEC : " + e.getMessage());
|
||||
}
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
public void requestPrice()
|
||||
{
|
||||
// pour toutes les entrée dans le table products
|
||||
// aller chercher le prix
|
||||
// l'enregistrer dans la database price_history
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
public void updatesAllMissingImages() {
|
||||
String selectQuery = "SELECT id, link FROM products WHERE image_url IS NULL OR image_url = ''";
|
||||
String updateQuery = "UPDATE products SET image_url = ? WHERE id = ?";
|
||||
|
||||
|
||||
try (PreparedStatement selectStmt = connection.prepareStatement(selectQuery);
|
||||
ResultSet rs = selectStmt.executeQuery()) {
|
||||
|
||||
while (rs.next()) {
|
||||
int id = rs.getInt("id");
|
||||
String urlProduit = rs.getString("link");
|
||||
|
||||
System.out.println("Traitement de l'ID : " + id);
|
||||
|
||||
String imageUrl = WebScrapper.getInstance().requestImage(urlProduit);
|
||||
|
||||
if (imageUrl != null) {
|
||||
try (PreparedStatement updateStmt = connection.prepareStatement(updateQuery)) {
|
||||
updateStmt.setString(1, imageUrl);
|
||||
updateStmt.setInt(2, id);
|
||||
updateStmt.executeUpdate();
|
||||
System.out.println("Image mise à jour pour ID " + id);
|
||||
}
|
||||
}
|
||||
|
||||
// Petit délai pour ne pas saturer le serveur cible
|
||||
Thread.sleep(1500);
|
||||
}
|
||||
|
||||
} catch (SQLException | InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
43
src/main/java/fr/tetelie/crawler/WebScrapper.java
Normal file
43
src/main/java/fr/tetelie/crawler/WebScrapper.java
Normal file
@ -0,0 +1,43 @@
|
||||
package fr.tetelie.crawler;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class WebScrapper {
|
||||
|
||||
static WebScrapper instance;
|
||||
|
||||
public static WebScrapper getInstance() { return instance; }
|
||||
|
||||
public void requestPrice()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
public String requestImage(String urlPage) {
|
||||
try {
|
||||
// On ajoute un User-Agent pour éviter d'être bloqué (comme sur Amazon)
|
||||
Document doc = Jsoup.connect(urlPage)
|
||||
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
|
||||
.timeout(10000)
|
||||
.get();
|
||||
|
||||
// Recherche de l'élément par ID
|
||||
Element img = doc.getElementById("landingImage");
|
||||
|
||||
if (img != null) {
|
||||
// On récupère l'attribut "src"
|
||||
return img.attr("src");
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.err.println("Erreur lors du crawl de " + urlPage + " : " + e.getMessage());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user