From 2061cc29ff69fbfd29e96a11074686d64eb59b07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CE=A3lie=20*?= <44349276+tetelie@users.noreply.github.com> Date: Fri, 27 Feb 2026 18:16:57 +0100 Subject: [PATCH] initial commit --- .gitignore | 39 +++++++++++++++ .idea/.gitignore | 10 ++++ .idea/discord.xml | 7 +++ .idea/encodings.xml | 7 +++ .idea/material_theme_project_new.xml | 13 +++++ .idea/misc.xml | 14 ++++++ .idea/vcs.xml | 6 +++ pom.xml | 24 +++++++++ src/main/java/fr/tetelie/crawler/Crawler.java | 49 +++++++++++++++++++ 9 files changed, 169 insertions(+) create mode 100644 .gitignore create mode 100644 .idea/.gitignore create mode 100644 .idea/discord.xml create mode 100644 .idea/encodings.xml create mode 100644 .idea/material_theme_project_new.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/vcs.xml create mode 100644 pom.xml create mode 100644 src/main/java/fr/tetelie/crawler/Crawler.java diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..480bdf5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,39 @@ +target/ +!.mvn/wrapper/maven-wrapper.jar +!**/src/main/**/target/ +!**/src/test/**/target/ +.kotlin + +### IntelliJ IDEA ### +.idea/modules.xml +.idea/jarRepositories.xml +.idea/compiler.xml +.idea/libraries/ +*.iws +*.iml +*.ipr + +### Eclipse ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ +!**/src/main/**/build/ +!**/src/test/**/build/ + +### VS Code ### +.vscode/ + +### Mac OS ### +.DS_Store \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..30cf57e --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,10 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Ignored default folder with query files +/queries/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/discord.xml b/.idea/discord.xml new file mode 100644 index 0000000..30bab2a --- /dev/null +++ b/.idea/discord.xml @@ -0,0 +1,7 @@ + + + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..aa00ffa --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/material_theme_project_new.xml b/.idea/material_theme_project_new.xml new file mode 100644 index 0000000..99309d2 --- /dev/null +++ b/.idea/material_theme_project_new.xml @@ -0,0 +1,13 @@ + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..dd5e3ad --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,14 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..64493bf --- /dev/null +++ b/pom.xml @@ -0,0 +1,24 @@ + + + 4.0.0 + + org.example + WebCrawler + 1.0-SNAPSHOT + + + 20 + 20 + UTF-8 + + + + + org.jsoup + jsoup + 1.18.1 + + + \ No newline at end of file diff --git a/src/main/java/fr/tetelie/crawler/Crawler.java b/src/main/java/fr/tetelie/crawler/Crawler.java new file mode 100644 index 0000000..33b7cae --- /dev/null +++ b/src/main/java/fr/tetelie/crawler/Crawler.java @@ -0,0 +1,49 @@ +package fr.tetelie.crawler; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; + +public class Crawler { + + + public static void main(String[] args) { + String url = "https://www.amazon.fr/Victool-temp%C3%A9rature-professionnel-r%C3%A9paration-%C3%A9lectronique/dp/B0FP2D7TBY/?_encoding=UTF8&pd_rd_w=UWXHj&content-id=amzn1.sym.5633189b-a269-4b24-8a80-52a48568a326%3Aamzn1.symc.752cde0b-d2ce-4cce-9121-769ea438869e&pf_rd_p=5633189b-a269-4b24-8a80-52a48568a326&pf_rd_r=G56TKFERTQ9WS62C7WY4&pd_rd_wg=mtTBQ&pd_rd_r=519dfa29-c58c-41b3-89ca-4d01e27bfc2e&ref_=pd_hp_d_atf_ci_mcx_mr_ca_hp_atf_d"; + + try { + // IMPORTANT : Amazon bloque les requêtes sans "User-Agent" + Document doc = Jsoup.connect(url) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + .get(); + + // On cherche la classe .a-price-whole + Element priceElement = doc.selectFirst(".a-price-whole"); + + Element fractionElement = doc.selectFirst(".a-price-fraction"); + + if (priceElement != null) { + String price = priceElement.text(); + } else { + System.out.println("Prix non trouvé"); + } + + if(fractionElement != null) { + String fraction = fractionElement.text(); + }else{ + System.out.println("Fraction non trouvé"); + } + + if(fractionElement != null && priceElement != null) { + + System.out.println("Le prix est de: " + priceElement.text() + fractionElement.text()); + + } + + } catch (Exception e) { + e.printStackTrace(); + } + } +}