From f3766955fd92ec3466ed2263968429da216a5fbd Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Thu, 11 Apr 2024 12:17:04 +0200 Subject: [PATCH 1/5] add examples --- README.md | 39 +++++- examples/gemini/readme.md | 1 + .../{ => Docker}/inputs/books.xml | 0 .../inputs/plain_html_example.txt | 0 examples/local_models/Docker/readme.md | 0 .../{ => Docker}/results/result.csv | 0 .../{ => Docker}/results/result.json | 0 .../scrape_plain_text_docker.py} | 0 .../scrape_xml_docker.py} | 0 .../search_graph_docker.py} | 0 .../smart_scraper_docker.py} | 5 - examples/local_models/Ollama/inputs/books.xml | 120 ++++++++++++++++++ .../Ollama/inputs/plain_html_example.txt | 105 +++++++++++++++ examples/local_models/Ollama/readme.md | 0 examples/local_models/Ollama/result.csv | 2 + examples/local_models/Ollama/result.json | 1 + .../local_models/Ollama/results/result.csv | 2 + .../local_models/Ollama/results/result.json | 1 + .../Ollama/scrape_plain_text_ollama.py | 55 ++++++++ .../local_models/Ollama/scrape_xml_ollama.py | 54 ++++++++ .../Ollama/smart_scraper_ollama.py | 37 ++++++ examples/mixed_models/readme.md | 1 + examples/openai/readme.md | 1 + 23 files changed, 415 insertions(+), 9 deletions(-) create mode 100644 examples/gemini/readme.md rename examples/local_models/{ => Docker}/inputs/books.xml (100%) rename examples/local_models/{ => Docker}/inputs/plain_html_example.txt (100%) create mode 100644 examples/local_models/Docker/readme.md rename examples/local_models/{ => Docker}/results/result.csv (100%) rename examples/local_models/{ => Docker}/results/result.json (100%) rename examples/local_models/{scrape_plain_text_local.py => Docker/scrape_plain_text_docker.py} (100%) rename examples/local_models/{scrape_xml_local.py => Docker/scrape_xml_docker.py} (100%) rename examples/local_models/{search_graph_local.py => Docker/search_graph_docker.py} (100%) rename examples/local_models/{smart_scraper_local.py => Docker/smart_scraper_docker.py} (87%) create mode 100644 examples/local_models/Ollama/inputs/books.xml create mode 100644 examples/local_models/Ollama/inputs/plain_html_example.txt create mode 100644 examples/local_models/Ollama/readme.md create mode 100644 examples/local_models/Ollama/result.csv create mode 100644 examples/local_models/Ollama/result.json create mode 100644 examples/local_models/Ollama/results/result.csv create mode 100644 examples/local_models/Ollama/results/result.json create mode 100644 examples/local_models/Ollama/scrape_plain_text_ollama.py create mode 100644 examples/local_models/Ollama/scrape_xml_ollama.py create mode 100644 examples/local_models/Ollama/smart_scraper_ollama.py create mode 100644 examples/mixed_models/readme.md create mode 100644 examples/openai/readme.md diff --git a/README.md b/README.md index c86039ce..bd97c42f 100644 --- a/README.md +++ b/README.md @@ -43,14 +43,45 @@ Check out also the docusaurus [documentation](https://scrapegraph-doc.onrender.c You can use the `SmartScraper` class to extract information from a website using a prompt. The `SmartScraper` class is a direct graph implementation that uses the most common nodes present in a web scraping pipeline. For more information, please see the [documentation](https://scrapegraph-ai.readthedocs.io/en/latest/). -### Case 1: Extracting informations using a local LLM +### Case 1: Extracting informations using Ollama +Remember to download the model on Ollama separately! +```python +from scrapegraphai.graphs import SmartScraperGraph + +graph_config = { + "llm": { + "model": "ollama/mistral", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + "base_url": "http://localhost:11434", # set ollama URL arbitrarily + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + "base_url": "http://localhost:11434", # set ollama URL arbitrarily + } +} + +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the news with their description.", + # also accepts a string with the already downloaded HTML code + source="https://perinim.github.io/projects", + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +``` + +### Case 2: Extracting informations using Docker Note: before using the local model remeber to create the docker container! ```text docker-compose up -d docker exec -it ollama ollama run stablelm-zephyr ``` -You can use which model you want instead of stablelm-zephyr +You can use which models avaiable on Ollama or your own model instead of stablelm-zephyr ```python from scrapegraphai.graphs import SmartScraperGraph @@ -75,7 +106,7 @@ print(result) ``` -### Case 2: Extracting informations using Openai model +### Case 3: Extracting informations using Openai model ```python from scrapegraphai.graphs import SmartScraperGraph OPENAI_API_KEY = "YOUR_API_KEY" @@ -98,7 +129,7 @@ result = smart_scraper_graph.run() print(result) ``` -### Case 3: Extracting informations using Gemini +### Case 4: Extracting informations using Gemini ```python from scrapegraphai.graphs import SmartScraperGraph GOOGLE_APIKEY = "YOUR_API_KEY" diff --git a/examples/gemini/readme.md b/examples/gemini/readme.md new file mode 100644 index 00000000..7e06773d --- /dev/null +++ b/examples/gemini/readme.md @@ -0,0 +1 @@ +This folder contains an example of how to use ScrapeGraph-AI with Gemini, a large language model (LLM) from Google AI. The example shows how to extract information from a website using a natural language prompt. \ No newline at end of file diff --git a/examples/local_models/inputs/books.xml b/examples/local_models/Docker/inputs/books.xml similarity index 100% rename from examples/local_models/inputs/books.xml rename to examples/local_models/Docker/inputs/books.xml diff --git a/examples/local_models/inputs/plain_html_example.txt b/examples/local_models/Docker/inputs/plain_html_example.txt similarity index 100% rename from examples/local_models/inputs/plain_html_example.txt rename to examples/local_models/Docker/inputs/plain_html_example.txt diff --git a/examples/local_models/Docker/readme.md b/examples/local_models/Docker/readme.md new file mode 100644 index 00000000..e69de29b diff --git a/examples/local_models/results/result.csv b/examples/local_models/Docker/results/result.csv similarity index 100% rename from examples/local_models/results/result.csv rename to examples/local_models/Docker/results/result.csv diff --git a/examples/local_models/results/result.json b/examples/local_models/Docker/results/result.json similarity index 100% rename from examples/local_models/results/result.json rename to examples/local_models/Docker/results/result.json diff --git a/examples/local_models/scrape_plain_text_local.py b/examples/local_models/Docker/scrape_plain_text_docker.py similarity index 100% rename from examples/local_models/scrape_plain_text_local.py rename to examples/local_models/Docker/scrape_plain_text_docker.py diff --git a/examples/local_models/scrape_xml_local.py b/examples/local_models/Docker/scrape_xml_docker.py similarity index 100% rename from examples/local_models/scrape_xml_local.py rename to examples/local_models/Docker/scrape_xml_docker.py diff --git a/examples/local_models/search_graph_local.py b/examples/local_models/Docker/search_graph_docker.py similarity index 100% rename from examples/local_models/search_graph_local.py rename to examples/local_models/Docker/search_graph_docker.py diff --git a/examples/local_models/smart_scraper_local.py b/examples/local_models/Docker/smart_scraper_docker.py similarity index 87% rename from examples/local_models/smart_scraper_local.py rename to examples/local_models/Docker/smart_scraper_docker.py index 09a18fac..b02aa8c5 100644 --- a/examples/local_models/smart_scraper_local.py +++ b/examples/local_models/Docker/smart_scraper_docker.py @@ -21,12 +21,7 @@ "temperature": 0, "format": "json", # Ollama needs the format to be specified explicitly # "model_tokens": 2000, # set context length arbitrarily, - # "base_url": "http://ollama:11434", # set ollama URL arbitrarily }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - } } # ************************************************ diff --git a/examples/local_models/Ollama/inputs/books.xml b/examples/local_models/Ollama/inputs/books.xml new file mode 100644 index 00000000..e3d1fe87 --- /dev/null +++ b/examples/local_models/Ollama/inputs/books.xml @@ -0,0 +1,120 @@ + + + + Gambardella, Matthew + XML Developer's Guide + Computer + 44.95 + 2000-10-01 + An in-depth look at creating applications + with XML. + + + Ralls, Kim + Midnight Rain + Fantasy + 5.95 + 2000-12-16 + A former architect battles corporate zombies, + an evil sorceress, and her own childhood to become queen + of the world. + + + Corets, Eva + Maeve Ascendant + Fantasy + 5.95 + 2000-11-17 + After the collapse of a nanotechnology + society in England, the young survivors lay the + foundation for a new society. + + + Corets, Eva + Oberon's Legacy + Fantasy + 5.95 + 2001-03-10 + In post-apocalypse England, the mysterious + agent known only as Oberon helps to create a new life + for the inhabitants of London. Sequel to Maeve + Ascendant. + + + Corets, Eva + The Sundered Grail + Fantasy + 5.95 + 2001-09-10 + The two daughters of Maeve, half-sisters, + battle one another for control of England. Sequel to + Oberon's Legacy. + + + Randall, Cynthia + Lover Birds + Romance + 4.95 + 2000-09-02 + When Carla meets Paul at an ornithology + conference, tempers fly as feathers get ruffled. + + + Thurman, Paula + Splish Splash + Romance + 4.95 + 2000-11-02 + A deep sea diver finds true love twenty + thousand leagues beneath the sea. + + + Knorr, Stefan + Creepy Crawlies + Horror + 4.95 + 2000-12-06 + An anthology of horror stories about roaches, + centipedes, scorpions and other insects. + + + Kress, Peter + Paradox Lost + Science Fiction + 6.95 + 2000-11-02 + After an inadvertant trip through a Heisenberg + Uncertainty Device, James Salway discovers the problems + of being quantum. + + + O'Brien, Tim + Microsoft .NET: The Programming Bible + Computer + 36.95 + 2000-12-09 + Microsoft's .NET initiative is explored in + detail in this deep programmer's reference. + + + O'Brien, Tim + MSXML3: A Comprehensive Guide + Computer + 36.95 + 2000-12-01 + The Microsoft MSXML3 parser is covered in + detail, with attention to XML DOM interfaces, XSLT processing, + SAX and more. + + + Galos, Mike + Visual Studio 7: A Comprehensive Guide + Computer + 49.95 + 2001-04-16 + Microsoft Visual Studio 7 is explored in depth, + looking at how Visual Basic, Visual C++, C#, and ASP+ are + integrated into a comprehensive development + environment. + + \ No newline at end of file diff --git a/examples/local_models/Ollama/inputs/plain_html_example.txt b/examples/local_models/Ollama/inputs/plain_html_example.txt new file mode 100644 index 00000000..78f814ae --- /dev/null +++ b/examples/local_models/Ollama/inputs/plain_html_example.txt @@ -0,0 +1,105 @@ + +
+ + +
+
+
+
+
+
+

Projects

+

+
+
+ +
+
+
+ +
+ \ No newline at end of file diff --git a/examples/local_models/Ollama/readme.md b/examples/local_models/Ollama/readme.md new file mode 100644 index 00000000..e69de29b diff --git a/examples/local_models/Ollama/result.csv b/examples/local_models/Ollama/result.csv new file mode 100644 index 00000000..97ef817e --- /dev/null +++ b/examples/local_models/Ollama/result.csv @@ -0,0 +1,2 @@ +0,1,2,3 +"{'title': 'Rotary Pendulum RL', 'description': 'Open Source project aimed at controlling a real life rotary pendulum using RL algorithms'}","{'title': 'DQN Implementation from scratch', 'description': 'Developed a Deep Q-Network algorithm to train a simple and double pendulum'}","{'title': 'Multi Agents HAED', 'description': 'University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.'}","{'title': 'Wireless ESC for Modular Drones', 'description': 'Modular drone architecture proposal and proof of concept. The project received maximum grade.'}" diff --git a/examples/local_models/Ollama/result.json b/examples/local_models/Ollama/result.json new file mode 100644 index 00000000..48b3752f --- /dev/null +++ b/examples/local_models/Ollama/result.json @@ -0,0 +1 @@ +{"news": [{"title": "Rotary Pendulum RL", "description": "Open Source project aimed at controlling a real life rotary pendulum using RL algorithms"}, {"title": "DQN Implementation from scratch", "description": "Developed a Deep Q-Network algorithm to train a simple and double pendulum"}, {"title": "Multi Agents HAED", "description": "University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings."}, {"title": "Wireless ESC for Modular Drones", "description": "Modular drone architecture proposal and proof of concept. The project received maximum grade."}]} \ No newline at end of file diff --git a/examples/local_models/Ollama/results/result.csv b/examples/local_models/Ollama/results/result.csv new file mode 100644 index 00000000..97ef817e --- /dev/null +++ b/examples/local_models/Ollama/results/result.csv @@ -0,0 +1,2 @@ +0,1,2,3 +"{'title': 'Rotary Pendulum RL', 'description': 'Open Source project aimed at controlling a real life rotary pendulum using RL algorithms'}","{'title': 'DQN Implementation from scratch', 'description': 'Developed a Deep Q-Network algorithm to train a simple and double pendulum'}","{'title': 'Multi Agents HAED', 'description': 'University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.'}","{'title': 'Wireless ESC for Modular Drones', 'description': 'Modular drone architecture proposal and proof of concept. The project received maximum grade.'}" diff --git a/examples/local_models/Ollama/results/result.json b/examples/local_models/Ollama/results/result.json new file mode 100644 index 00000000..8a4e7057 --- /dev/null +++ b/examples/local_models/Ollama/results/result.json @@ -0,0 +1 @@ +{"projects": [{"title": "Rotary Pendulum RL", "description": "Open Source project aimed at controlling a real life rotary pendulum using RL algorithms"}, {"title": "DQN Implementation from scratch", "description": "Developed a Deep Q-Network algorithm to train a simple and double pendulum"}, {"title": "Multi Agents HAED", "description": "University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings."}, {"title": "Wireless ESC for Modular Drones", "description": "Modular drone architecture proposal and proof of concept. The project received maximum grade."}]} \ No newline at end of file diff --git a/examples/local_models/Ollama/scrape_plain_text_ollama.py b/examples/local_models/Ollama/scrape_plain_text_ollama.py new file mode 100644 index 00000000..a9351d70 --- /dev/null +++ b/examples/local_models/Ollama/scrape_plain_text_ollama.py @@ -0,0 +1,55 @@ +""" +Basic example of scraping pipeline using SmartScraper from text +""" + +import os +from scrapegraphai.graphs import SmartScraperGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json + +# ************************************************ +# Read the text file +# ************************************************ + +FILE_NAME = "inputs/plain_html_example.txt" +curr_dir = os.path.dirname(os.path.realpath(__file__)) +file_path = os.path.join(curr_dir, FILE_NAME) + +# It could be also a http request using the request model +with open(file_path, 'r', encoding="utf-8") as file: + text = file.read() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +graph_config = { + "llm": { + "model": "ollama/mistral", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + # "model_tokens": 2000, # set context length arbitrarily + "base_url": "http://localhost:11434", + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + "base_url": "http://localhost:11434", + } +} + +# ************************************************ +# Create the SmartScraperGraph instance and run it +# ************************************************ + +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the news with their description.", + source=text, + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +# Save to json or csv +convert_to_csv(result, "result") +convert_to_json(result, "result") diff --git a/examples/local_models/Ollama/scrape_xml_ollama.py b/examples/local_models/Ollama/scrape_xml_ollama.py new file mode 100644 index 00000000..9b3838f1 --- /dev/null +++ b/examples/local_models/Ollama/scrape_xml_ollama.py @@ -0,0 +1,54 @@ +""" +Basic example of scraping pipeline using SmartScraper from XML documents +""" +import os +from scrapegraphai.graphs import SmartScraperGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json + +# ************************************************ +# Read the XML file +# ************************************************ + +FILE_NAME = "inputs/books.xml" +curr_dir = os.path.dirname(os.path.realpath(__file__)) +file_path = os.path.join(curr_dir, FILE_NAME) + +with open(file_path, 'r', encoding="utf-8") as file: + text = file.read() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + + +graph_config = { + "llm": { + "model": "ollama/mistral", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + # "model_tokens": 2000, # set context length arbitrarily + "base_url": "http://localhost:11434", # set ollama URL arbitrarily + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + "base_url": "http://localhost:11434", # set ollama URL arbitrarily + } +} + +# ************************************************ +# Create the SmartScraperGraph instance and run it +# ************************************************ + +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the authors, title and genres of the books", + source=text, # Pass the content of the file, not the file object + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +# Save to json or csv +convert_to_csv(result, "result") +convert_to_json(result, "result") diff --git a/examples/local_models/Ollama/smart_scraper_ollama.py b/examples/local_models/Ollama/smart_scraper_ollama.py new file mode 100644 index 00000000..f8da3cfe --- /dev/null +++ b/examples/local_models/Ollama/smart_scraper_ollama.py @@ -0,0 +1,37 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +from scrapegraphai.graphs import SmartScraperGraph + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +graph_config = { + "llm": { + "model": "ollama/mistral", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + # "model_tokens": 2000, # set context length arbitrarily, + "base_url": "http://localhost:11434", # set ollama URL arbitrarily + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + "base_url": "http://localhost:11434", # set ollama URL arbitrarily + } +} + +# ************************************************ +# Create the SmartScraperGraph instance and run it +# ************************************************ + +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the news with their description.", + # also accepts a string with the already downloaded HTML code + source="https://perinim.github.io/projects", + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) diff --git a/examples/mixed_models/readme.md b/examples/mixed_models/readme.md new file mode 100644 index 00000000..9e739212 --- /dev/null +++ b/examples/mixed_models/readme.md @@ -0,0 +1 @@ +This folder contains an example of how to use ScrapeGraph-AI with mixed models. The example shows how to extract information from a website using a natural language prompt and a machine learning model. \ No newline at end of file diff --git a/examples/openai/readme.md b/examples/openai/readme.md new file mode 100644 index 00000000..9a517ac6 --- /dev/null +++ b/examples/openai/readme.md @@ -0,0 +1 @@ +This folder contains an example of how to use ScrapeGraph-AI with OpenAI, an artificial intelligence platform. The examples show how to extract information from a website using a natural language prompt. \ No newline at end of file From 87d911b7bbc78fd10054db35dbb482bc6ea1acfd Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Thu, 11 Apr 2024 18:18:22 +0200 Subject: [PATCH 2/5] removed files and refactoring of actions --- .github/workflows/pylint.yml | 2 +- examples/gemini/inputs/books.xml | 120 ------------------ examples/gemini/inputs/plain_html_example.txt | 105 --------------- examples/local_models/Docker/inputs/books.xml | 120 ------------------ .../Docker/inputs/plain_html_example.txt | 105 --------------- examples/local_models/Ollama/inputs/books.xml | 120 ------------------ .../Ollama/inputs/plain_html_example.txt | 105 --------------- examples/openai/inputs/books.xml | 120 ------------------ examples/openai/inputs/plain_html_example.txt | 105 --------------- manual deployement/commit_and_push.sh | 2 +- 10 files changed, 2 insertions(+), 902 deletions(-) delete mode 100644 examples/gemini/inputs/books.xml delete mode 100644 examples/gemini/inputs/plain_html_example.txt delete mode 100644 examples/local_models/Docker/inputs/books.xml delete mode 100644 examples/local_models/Docker/inputs/plain_html_example.txt delete mode 100644 examples/local_models/Ollama/inputs/books.xml delete mode 100644 examples/local_models/Ollama/inputs/plain_html_example.txt delete mode 100644 examples/openai/inputs/books.xml delete mode 100644 examples/openai/inputs/plain_html_example.txt diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index f675162f..464080ff 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -20,4 +20,4 @@ jobs: pip install pylint pip install -r requirements.txt - name: Analysing the code with pylint - run: pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py scrapegraphai/*.py examples/**/*.py tests/**/*.py \ No newline at end of file + run: pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py scrapegraphai/*.py \ No newline at end of file diff --git a/examples/gemini/inputs/books.xml b/examples/gemini/inputs/books.xml deleted file mode 100644 index e3d1fe87..00000000 --- a/examples/gemini/inputs/books.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - - Gambardella, Matthew - XML Developer's Guide - Computer - 44.95 - 2000-10-01 - An in-depth look at creating applications - with XML. - - - Ralls, Kim - Midnight Rain - Fantasy - 5.95 - 2000-12-16 - A former architect battles corporate zombies, - an evil sorceress, and her own childhood to become queen - of the world. - - - Corets, Eva - Maeve Ascendant - Fantasy - 5.95 - 2000-11-17 - After the collapse of a nanotechnology - society in England, the young survivors lay the - foundation for a new society. - - - Corets, Eva - Oberon's Legacy - Fantasy - 5.95 - 2001-03-10 - In post-apocalypse England, the mysterious - agent known only as Oberon helps to create a new life - for the inhabitants of London. Sequel to Maeve - Ascendant. - - - Corets, Eva - The Sundered Grail - Fantasy - 5.95 - 2001-09-10 - The two daughters of Maeve, half-sisters, - battle one another for control of England. Sequel to - Oberon's Legacy. - - - Randall, Cynthia - Lover Birds - Romance - 4.95 - 2000-09-02 - When Carla meets Paul at an ornithology - conference, tempers fly as feathers get ruffled. - - - Thurman, Paula - Splish Splash - Romance - 4.95 - 2000-11-02 - A deep sea diver finds true love twenty - thousand leagues beneath the sea. - - - Knorr, Stefan - Creepy Crawlies - Horror - 4.95 - 2000-12-06 - An anthology of horror stories about roaches, - centipedes, scorpions and other insects. - - - Kress, Peter - Paradox Lost - Science Fiction - 6.95 - 2000-11-02 - After an inadvertant trip through a Heisenberg - Uncertainty Device, James Salway discovers the problems - of being quantum. - - - O'Brien, Tim - Microsoft .NET: The Programming Bible - Computer - 36.95 - 2000-12-09 - Microsoft's .NET initiative is explored in - detail in this deep programmer's reference. - - - O'Brien, Tim - MSXML3: A Comprehensive Guide - Computer - 36.95 - 2000-12-01 - The Microsoft MSXML3 parser is covered in - detail, with attention to XML DOM interfaces, XSLT processing, - SAX and more. - - - Galos, Mike - Visual Studio 7: A Comprehensive Guide - Computer - 49.95 - 2001-04-16 - Microsoft Visual Studio 7 is explored in depth, - looking at how Visual Basic, Visual C++, C#, and ASP+ are - integrated into a comprehensive development - environment. - - \ No newline at end of file diff --git a/examples/gemini/inputs/plain_html_example.txt b/examples/gemini/inputs/plain_html_example.txt deleted file mode 100644 index 78f814ae..00000000 --- a/examples/gemini/inputs/plain_html_example.txt +++ /dev/null @@ -1,105 +0,0 @@ - -
- - -
-
-
- - -
- \ No newline at end of file diff --git a/examples/local_models/Docker/inputs/books.xml b/examples/local_models/Docker/inputs/books.xml deleted file mode 100644 index e3d1fe87..00000000 --- a/examples/local_models/Docker/inputs/books.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - - Gambardella, Matthew - XML Developer's Guide - Computer - 44.95 - 2000-10-01 - An in-depth look at creating applications - with XML. - - - Ralls, Kim - Midnight Rain - Fantasy - 5.95 - 2000-12-16 - A former architect battles corporate zombies, - an evil sorceress, and her own childhood to become queen - of the world. - - - Corets, Eva - Maeve Ascendant - Fantasy - 5.95 - 2000-11-17 - After the collapse of a nanotechnology - society in England, the young survivors lay the - foundation for a new society. - - - Corets, Eva - Oberon's Legacy - Fantasy - 5.95 - 2001-03-10 - In post-apocalypse England, the mysterious - agent known only as Oberon helps to create a new life - for the inhabitants of London. Sequel to Maeve - Ascendant. - - - Corets, Eva - The Sundered Grail - Fantasy - 5.95 - 2001-09-10 - The two daughters of Maeve, half-sisters, - battle one another for control of England. Sequel to - Oberon's Legacy. - - - Randall, Cynthia - Lover Birds - Romance - 4.95 - 2000-09-02 - When Carla meets Paul at an ornithology - conference, tempers fly as feathers get ruffled. - - - Thurman, Paula - Splish Splash - Romance - 4.95 - 2000-11-02 - A deep sea diver finds true love twenty - thousand leagues beneath the sea. - - - Knorr, Stefan - Creepy Crawlies - Horror - 4.95 - 2000-12-06 - An anthology of horror stories about roaches, - centipedes, scorpions and other insects. - - - Kress, Peter - Paradox Lost - Science Fiction - 6.95 - 2000-11-02 - After an inadvertant trip through a Heisenberg - Uncertainty Device, James Salway discovers the problems - of being quantum. - - - O'Brien, Tim - Microsoft .NET: The Programming Bible - Computer - 36.95 - 2000-12-09 - Microsoft's .NET initiative is explored in - detail in this deep programmer's reference. - - - O'Brien, Tim - MSXML3: A Comprehensive Guide - Computer - 36.95 - 2000-12-01 - The Microsoft MSXML3 parser is covered in - detail, with attention to XML DOM interfaces, XSLT processing, - SAX and more. - - - Galos, Mike - Visual Studio 7: A Comprehensive Guide - Computer - 49.95 - 2001-04-16 - Microsoft Visual Studio 7 is explored in depth, - looking at how Visual Basic, Visual C++, C#, and ASP+ are - integrated into a comprehensive development - environment. - - \ No newline at end of file diff --git a/examples/local_models/Docker/inputs/plain_html_example.txt b/examples/local_models/Docker/inputs/plain_html_example.txt deleted file mode 100644 index 78f814ae..00000000 --- a/examples/local_models/Docker/inputs/plain_html_example.txt +++ /dev/null @@ -1,105 +0,0 @@ - -
- - -
-
-
- - -
- \ No newline at end of file diff --git a/examples/local_models/Ollama/inputs/books.xml b/examples/local_models/Ollama/inputs/books.xml deleted file mode 100644 index e3d1fe87..00000000 --- a/examples/local_models/Ollama/inputs/books.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - - Gambardella, Matthew - XML Developer's Guide - Computer - 44.95 - 2000-10-01 - An in-depth look at creating applications - with XML. - - - Ralls, Kim - Midnight Rain - Fantasy - 5.95 - 2000-12-16 - A former architect battles corporate zombies, - an evil sorceress, and her own childhood to become queen - of the world. - - - Corets, Eva - Maeve Ascendant - Fantasy - 5.95 - 2000-11-17 - After the collapse of a nanotechnology - society in England, the young survivors lay the - foundation for a new society. - - - Corets, Eva - Oberon's Legacy - Fantasy - 5.95 - 2001-03-10 - In post-apocalypse England, the mysterious - agent known only as Oberon helps to create a new life - for the inhabitants of London. Sequel to Maeve - Ascendant. - - - Corets, Eva - The Sundered Grail - Fantasy - 5.95 - 2001-09-10 - The two daughters of Maeve, half-sisters, - battle one another for control of England. Sequel to - Oberon's Legacy. - - - Randall, Cynthia - Lover Birds - Romance - 4.95 - 2000-09-02 - When Carla meets Paul at an ornithology - conference, tempers fly as feathers get ruffled. - - - Thurman, Paula - Splish Splash - Romance - 4.95 - 2000-11-02 - A deep sea diver finds true love twenty - thousand leagues beneath the sea. - - - Knorr, Stefan - Creepy Crawlies - Horror - 4.95 - 2000-12-06 - An anthology of horror stories about roaches, - centipedes, scorpions and other insects. - - - Kress, Peter - Paradox Lost - Science Fiction - 6.95 - 2000-11-02 - After an inadvertant trip through a Heisenberg - Uncertainty Device, James Salway discovers the problems - of being quantum. - - - O'Brien, Tim - Microsoft .NET: The Programming Bible - Computer - 36.95 - 2000-12-09 - Microsoft's .NET initiative is explored in - detail in this deep programmer's reference. - - - O'Brien, Tim - MSXML3: A Comprehensive Guide - Computer - 36.95 - 2000-12-01 - The Microsoft MSXML3 parser is covered in - detail, with attention to XML DOM interfaces, XSLT processing, - SAX and more. - - - Galos, Mike - Visual Studio 7: A Comprehensive Guide - Computer - 49.95 - 2001-04-16 - Microsoft Visual Studio 7 is explored in depth, - looking at how Visual Basic, Visual C++, C#, and ASP+ are - integrated into a comprehensive development - environment. - - \ No newline at end of file diff --git a/examples/local_models/Ollama/inputs/plain_html_example.txt b/examples/local_models/Ollama/inputs/plain_html_example.txt deleted file mode 100644 index 78f814ae..00000000 --- a/examples/local_models/Ollama/inputs/plain_html_example.txt +++ /dev/null @@ -1,105 +0,0 @@ - -
- - -
-
-
- - -
- \ No newline at end of file diff --git a/examples/openai/inputs/books.xml b/examples/openai/inputs/books.xml deleted file mode 100644 index e3d1fe87..00000000 --- a/examples/openai/inputs/books.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - - Gambardella, Matthew - XML Developer's Guide - Computer - 44.95 - 2000-10-01 - An in-depth look at creating applications - with XML. - - - Ralls, Kim - Midnight Rain - Fantasy - 5.95 - 2000-12-16 - A former architect battles corporate zombies, - an evil sorceress, and her own childhood to become queen - of the world. - - - Corets, Eva - Maeve Ascendant - Fantasy - 5.95 - 2000-11-17 - After the collapse of a nanotechnology - society in England, the young survivors lay the - foundation for a new society. - - - Corets, Eva - Oberon's Legacy - Fantasy - 5.95 - 2001-03-10 - In post-apocalypse England, the mysterious - agent known only as Oberon helps to create a new life - for the inhabitants of London. Sequel to Maeve - Ascendant. - - - Corets, Eva - The Sundered Grail - Fantasy - 5.95 - 2001-09-10 - The two daughters of Maeve, half-sisters, - battle one another for control of England. Sequel to - Oberon's Legacy. - - - Randall, Cynthia - Lover Birds - Romance - 4.95 - 2000-09-02 - When Carla meets Paul at an ornithology - conference, tempers fly as feathers get ruffled. - - - Thurman, Paula - Splish Splash - Romance - 4.95 - 2000-11-02 - A deep sea diver finds true love twenty - thousand leagues beneath the sea. - - - Knorr, Stefan - Creepy Crawlies - Horror - 4.95 - 2000-12-06 - An anthology of horror stories about roaches, - centipedes, scorpions and other insects. - - - Kress, Peter - Paradox Lost - Science Fiction - 6.95 - 2000-11-02 - After an inadvertant trip through a Heisenberg - Uncertainty Device, James Salway discovers the problems - of being quantum. - - - O'Brien, Tim - Microsoft .NET: The Programming Bible - Computer - 36.95 - 2000-12-09 - Microsoft's .NET initiative is explored in - detail in this deep programmer's reference. - - - O'Brien, Tim - MSXML3: A Comprehensive Guide - Computer - 36.95 - 2000-12-01 - The Microsoft MSXML3 parser is covered in - detail, with attention to XML DOM interfaces, XSLT processing, - SAX and more. - - - Galos, Mike - Visual Studio 7: A Comprehensive Guide - Computer - 49.95 - 2001-04-16 - Microsoft Visual Studio 7 is explored in depth, - looking at how Visual Basic, Visual C++, C#, and ASP+ are - integrated into a comprehensive development - environment. - - \ No newline at end of file diff --git a/examples/openai/inputs/plain_html_example.txt b/examples/openai/inputs/plain_html_example.txt deleted file mode 100644 index 78f814ae..00000000 --- a/examples/openai/inputs/plain_html_example.txt +++ /dev/null @@ -1,105 +0,0 @@ - -
- - -
-
-
- - -
- \ No newline at end of file diff --git a/manual deployement/commit_and_push.sh b/manual deployement/commit_and_push.sh index 4a0afbee..cb51c968 100755 --- a/manual deployement/commit_and_push.sh +++ b/manual deployement/commit_and_push.sh @@ -21,7 +21,7 @@ cd .. commit_message="$1" # Run Pylint on the specified Python files -pylint scrapegraphai/**/*.py scrapegraphai/*.py examples/**/*.py tests/**/*.py +pylint pylint scrapegraphai/**/*.py scrapegraphai/*.py #Make the pull git pull From 8e08652d8a150f95bbee7e82b289dc9f32e7f711 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Fri, 12 Apr 2024 11:05:52 +0200 Subject: [PATCH 3/5] removed unused file and removed unused documentation --- examples/local_models/Docker/smart_scraper_docker.py | 8 -------- examples/local_models/Ollama/result.csv | 2 -- 2 files changed, 10 deletions(-) delete mode 100644 examples/local_models/Ollama/result.csv diff --git a/examples/local_models/Docker/smart_scraper_docker.py b/examples/local_models/Docker/smart_scraper_docker.py index b02aa8c5..a7e1ce71 100644 --- a/examples/local_models/Docker/smart_scraper_docker.py +++ b/examples/local_models/Docker/smart_scraper_docker.py @@ -6,14 +6,6 @@ # ************************************************ # Define the configuration for the graph # ************************************************ -""" - Avaiable models: - - ollama/llama2 - - ollama/mistral - - ollama/codellama - - ollama/dolphin-mixtral - - ollama/mistral-openorca -""" graph_config = { "llm": { diff --git a/examples/local_models/Ollama/result.csv b/examples/local_models/Ollama/result.csv deleted file mode 100644 index 97ef817e..00000000 --- a/examples/local_models/Ollama/result.csv +++ /dev/null @@ -1,2 +0,0 @@ -0,1,2,3 -"{'title': 'Rotary Pendulum RL', 'description': 'Open Source project aimed at controlling a real life rotary pendulum using RL algorithms'}","{'title': 'DQN Implementation from scratch', 'description': 'Developed a Deep Q-Network algorithm to train a simple and double pendulum'}","{'title': 'Multi Agents HAED', 'description': 'University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.'}","{'title': 'Wireless ESC for Modular Drones', 'description': 'Modular drone architecture proposal and proof of concept. The project received maximum grade.'}" From 60a2377bffb97e6b8fcbe08f98fb7a4a5f4af6c6 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Fri, 12 Apr 2024 12:53:54 +0200 Subject: [PATCH 4/5] add inputs --- examples/gemini/inputs/books.xml | 120 ++++++++++++++++++ examples/gemini/inputs/plain_html_example.txt | 105 +++++++++++++++ examples/gemini/results/result.csv | 2 - examples/gemini/results/result.json | 1 - examples/local_models/Docker/inputs/books.xml | 120 ++++++++++++++++++ .../Docker/inputs/plain_html_example.txt | 105 +++++++++++++++ .../local_models/Docker/results/result.csv | 2 - .../local_models/Docker/results/result.json | 1 - examples/local_models/Ollama/inputs/books.xml | 120 ++++++++++++++++++ .../Ollama/inputs/plain_html_example.txt | 105 +++++++++++++++ examples/local_models/Ollama/result.json | 1 - .../local_models/Ollama/results/result.csv | 2 - .../local_models/Ollama/results/result.json | 1 - examples/mixed_models/results/result.csv | 2 - examples/mixed_models/results/result.json | 1 - examples/openai/inputs/books.xml | 120 ++++++++++++++++++ examples/openai/inputs/plain_html_example.txt | 105 +++++++++++++++ examples/openai/results/result.csv | 2 - examples/openai/results/result.json | 1 - 19 files changed, 900 insertions(+), 16 deletions(-) create mode 100644 examples/gemini/inputs/books.xml create mode 100644 examples/gemini/inputs/plain_html_example.txt delete mode 100644 examples/gemini/results/result.csv delete mode 100644 examples/gemini/results/result.json create mode 100644 examples/local_models/Docker/inputs/books.xml create mode 100644 examples/local_models/Docker/inputs/plain_html_example.txt delete mode 100644 examples/local_models/Docker/results/result.csv delete mode 100644 examples/local_models/Docker/results/result.json create mode 100644 examples/local_models/Ollama/inputs/books.xml create mode 100644 examples/local_models/Ollama/inputs/plain_html_example.txt delete mode 100644 examples/local_models/Ollama/result.json delete mode 100644 examples/local_models/Ollama/results/result.csv delete mode 100644 examples/local_models/Ollama/results/result.json delete mode 100644 examples/mixed_models/results/result.csv delete mode 100644 examples/mixed_models/results/result.json create mode 100644 examples/openai/inputs/books.xml create mode 100644 examples/openai/inputs/plain_html_example.txt delete mode 100644 examples/openai/results/result.csv delete mode 100644 examples/openai/results/result.json diff --git a/examples/gemini/inputs/books.xml b/examples/gemini/inputs/books.xml new file mode 100644 index 00000000..e3d1fe87 --- /dev/null +++ b/examples/gemini/inputs/books.xml @@ -0,0 +1,120 @@ + + + + Gambardella, Matthew + XML Developer's Guide + Computer + 44.95 + 2000-10-01 + An in-depth look at creating applications + with XML. + + + Ralls, Kim + Midnight Rain + Fantasy + 5.95 + 2000-12-16 + A former architect battles corporate zombies, + an evil sorceress, and her own childhood to become queen + of the world. + + + Corets, Eva + Maeve Ascendant + Fantasy + 5.95 + 2000-11-17 + After the collapse of a nanotechnology + society in England, the young survivors lay the + foundation for a new society. + + + Corets, Eva + Oberon's Legacy + Fantasy + 5.95 + 2001-03-10 + In post-apocalypse England, the mysterious + agent known only as Oberon helps to create a new life + for the inhabitants of London. Sequel to Maeve + Ascendant. + + + Corets, Eva + The Sundered Grail + Fantasy + 5.95 + 2001-09-10 + The two daughters of Maeve, half-sisters, + battle one another for control of England. Sequel to + Oberon's Legacy. + + + Randall, Cynthia + Lover Birds + Romance + 4.95 + 2000-09-02 + When Carla meets Paul at an ornithology + conference, tempers fly as feathers get ruffled. + + + Thurman, Paula + Splish Splash + Romance + 4.95 + 2000-11-02 + A deep sea diver finds true love twenty + thousand leagues beneath the sea. + + + Knorr, Stefan + Creepy Crawlies + Horror + 4.95 + 2000-12-06 + An anthology of horror stories about roaches, + centipedes, scorpions and other insects. + + + Kress, Peter + Paradox Lost + Science Fiction + 6.95 + 2000-11-02 + After an inadvertant trip through a Heisenberg + Uncertainty Device, James Salway discovers the problems + of being quantum. + + + O'Brien, Tim + Microsoft .NET: The Programming Bible + Computer + 36.95 + 2000-12-09 + Microsoft's .NET initiative is explored in + detail in this deep programmer's reference. + + + O'Brien, Tim + MSXML3: A Comprehensive Guide + Computer + 36.95 + 2000-12-01 + The Microsoft MSXML3 parser is covered in + detail, with attention to XML DOM interfaces, XSLT processing, + SAX and more. + + + Galos, Mike + Visual Studio 7: A Comprehensive Guide + Computer + 49.95 + 2001-04-16 + Microsoft Visual Studio 7 is explored in depth, + looking at how Visual Basic, Visual C++, C#, and ASP+ are + integrated into a comprehensive development + environment. + + \ No newline at end of file diff --git a/examples/gemini/inputs/plain_html_example.txt b/examples/gemini/inputs/plain_html_example.txt new file mode 100644 index 00000000..78f814ae --- /dev/null +++ b/examples/gemini/inputs/plain_html_example.txt @@ -0,0 +1,105 @@ + +
+ + +
+
+
+ + +
+ \ No newline at end of file diff --git a/examples/gemini/results/result.csv b/examples/gemini/results/result.csv deleted file mode 100644 index 97ef817e..00000000 --- a/examples/gemini/results/result.csv +++ /dev/null @@ -1,2 +0,0 @@ -0,1,2,3 -"{'title': 'Rotary Pendulum RL', 'description': 'Open Source project aimed at controlling a real life rotary pendulum using RL algorithms'}","{'title': 'DQN Implementation from scratch', 'description': 'Developed a Deep Q-Network algorithm to train a simple and double pendulum'}","{'title': 'Multi Agents HAED', 'description': 'University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.'}","{'title': 'Wireless ESC for Modular Drones', 'description': 'Modular drone architecture proposal and proof of concept. The project received maximum grade.'}" diff --git a/examples/gemini/results/result.json b/examples/gemini/results/result.json deleted file mode 100644 index 8a4e7057..00000000 --- a/examples/gemini/results/result.json +++ /dev/null @@ -1 +0,0 @@ -{"projects": [{"title": "Rotary Pendulum RL", "description": "Open Source project aimed at controlling a real life rotary pendulum using RL algorithms"}, {"title": "DQN Implementation from scratch", "description": "Developed a Deep Q-Network algorithm to train a simple and double pendulum"}, {"title": "Multi Agents HAED", "description": "University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings."}, {"title": "Wireless ESC for Modular Drones", "description": "Modular drone architecture proposal and proof of concept. The project received maximum grade."}]} \ No newline at end of file diff --git a/examples/local_models/Docker/inputs/books.xml b/examples/local_models/Docker/inputs/books.xml new file mode 100644 index 00000000..e3d1fe87 --- /dev/null +++ b/examples/local_models/Docker/inputs/books.xml @@ -0,0 +1,120 @@ + + + + Gambardella, Matthew + XML Developer's Guide + Computer + 44.95 + 2000-10-01 + An in-depth look at creating applications + with XML. + + + Ralls, Kim + Midnight Rain + Fantasy + 5.95 + 2000-12-16 + A former architect battles corporate zombies, + an evil sorceress, and her own childhood to become queen + of the world. + + + Corets, Eva + Maeve Ascendant + Fantasy + 5.95 + 2000-11-17 + After the collapse of a nanotechnology + society in England, the young survivors lay the + foundation for a new society. + + + Corets, Eva + Oberon's Legacy + Fantasy + 5.95 + 2001-03-10 + In post-apocalypse England, the mysterious + agent known only as Oberon helps to create a new life + for the inhabitants of London. Sequel to Maeve + Ascendant. + + + Corets, Eva + The Sundered Grail + Fantasy + 5.95 + 2001-09-10 + The two daughters of Maeve, half-sisters, + battle one another for control of England. Sequel to + Oberon's Legacy. + + + Randall, Cynthia + Lover Birds + Romance + 4.95 + 2000-09-02 + When Carla meets Paul at an ornithology + conference, tempers fly as feathers get ruffled. + + + Thurman, Paula + Splish Splash + Romance + 4.95 + 2000-11-02 + A deep sea diver finds true love twenty + thousand leagues beneath the sea. + + + Knorr, Stefan + Creepy Crawlies + Horror + 4.95 + 2000-12-06 + An anthology of horror stories about roaches, + centipedes, scorpions and other insects. + + + Kress, Peter + Paradox Lost + Science Fiction + 6.95 + 2000-11-02 + After an inadvertant trip through a Heisenberg + Uncertainty Device, James Salway discovers the problems + of being quantum. + + + O'Brien, Tim + Microsoft .NET: The Programming Bible + Computer + 36.95 + 2000-12-09 + Microsoft's .NET initiative is explored in + detail in this deep programmer's reference. + + + O'Brien, Tim + MSXML3: A Comprehensive Guide + Computer + 36.95 + 2000-12-01 + The Microsoft MSXML3 parser is covered in + detail, with attention to XML DOM interfaces, XSLT processing, + SAX and more. + + + Galos, Mike + Visual Studio 7: A Comprehensive Guide + Computer + 49.95 + 2001-04-16 + Microsoft Visual Studio 7 is explored in depth, + looking at how Visual Basic, Visual C++, C#, and ASP+ are + integrated into a comprehensive development + environment. + + \ No newline at end of file diff --git a/examples/local_models/Docker/inputs/plain_html_example.txt b/examples/local_models/Docker/inputs/plain_html_example.txt new file mode 100644 index 00000000..78f814ae --- /dev/null +++ b/examples/local_models/Docker/inputs/plain_html_example.txt @@ -0,0 +1,105 @@ + +
+ + +
+
+
+ + +
+ \ No newline at end of file diff --git a/examples/local_models/Docker/results/result.csv b/examples/local_models/Docker/results/result.csv deleted file mode 100644 index 97ef817e..00000000 --- a/examples/local_models/Docker/results/result.csv +++ /dev/null @@ -1,2 +0,0 @@ -0,1,2,3 -"{'title': 'Rotary Pendulum RL', 'description': 'Open Source project aimed at controlling a real life rotary pendulum using RL algorithms'}","{'title': 'DQN Implementation from scratch', 'description': 'Developed a Deep Q-Network algorithm to train a simple and double pendulum'}","{'title': 'Multi Agents HAED', 'description': 'University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.'}","{'title': 'Wireless ESC for Modular Drones', 'description': 'Modular drone architecture proposal and proof of concept. The project received maximum grade.'}" diff --git a/examples/local_models/Docker/results/result.json b/examples/local_models/Docker/results/result.json deleted file mode 100644 index 8a4e7057..00000000 --- a/examples/local_models/Docker/results/result.json +++ /dev/null @@ -1 +0,0 @@ -{"projects": [{"title": "Rotary Pendulum RL", "description": "Open Source project aimed at controlling a real life rotary pendulum using RL algorithms"}, {"title": "DQN Implementation from scratch", "description": "Developed a Deep Q-Network algorithm to train a simple and double pendulum"}, {"title": "Multi Agents HAED", "description": "University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings."}, {"title": "Wireless ESC for Modular Drones", "description": "Modular drone architecture proposal and proof of concept. The project received maximum grade."}]} \ No newline at end of file diff --git a/examples/local_models/Ollama/inputs/books.xml b/examples/local_models/Ollama/inputs/books.xml new file mode 100644 index 00000000..e3d1fe87 --- /dev/null +++ b/examples/local_models/Ollama/inputs/books.xml @@ -0,0 +1,120 @@ + + + + Gambardella, Matthew + XML Developer's Guide + Computer + 44.95 + 2000-10-01 + An in-depth look at creating applications + with XML. + + + Ralls, Kim + Midnight Rain + Fantasy + 5.95 + 2000-12-16 + A former architect battles corporate zombies, + an evil sorceress, and her own childhood to become queen + of the world. + + + Corets, Eva + Maeve Ascendant + Fantasy + 5.95 + 2000-11-17 + After the collapse of a nanotechnology + society in England, the young survivors lay the + foundation for a new society. + + + Corets, Eva + Oberon's Legacy + Fantasy + 5.95 + 2001-03-10 + In post-apocalypse England, the mysterious + agent known only as Oberon helps to create a new life + for the inhabitants of London. Sequel to Maeve + Ascendant. + + + Corets, Eva + The Sundered Grail + Fantasy + 5.95 + 2001-09-10 + The two daughters of Maeve, half-sisters, + battle one another for control of England. Sequel to + Oberon's Legacy. + + + Randall, Cynthia + Lover Birds + Romance + 4.95 + 2000-09-02 + When Carla meets Paul at an ornithology + conference, tempers fly as feathers get ruffled. + + + Thurman, Paula + Splish Splash + Romance + 4.95 + 2000-11-02 + A deep sea diver finds true love twenty + thousand leagues beneath the sea. + + + Knorr, Stefan + Creepy Crawlies + Horror + 4.95 + 2000-12-06 + An anthology of horror stories about roaches, + centipedes, scorpions and other insects. + + + Kress, Peter + Paradox Lost + Science Fiction + 6.95 + 2000-11-02 + After an inadvertant trip through a Heisenberg + Uncertainty Device, James Salway discovers the problems + of being quantum. + + + O'Brien, Tim + Microsoft .NET: The Programming Bible + Computer + 36.95 + 2000-12-09 + Microsoft's .NET initiative is explored in + detail in this deep programmer's reference. + + + O'Brien, Tim + MSXML3: A Comprehensive Guide + Computer + 36.95 + 2000-12-01 + The Microsoft MSXML3 parser is covered in + detail, with attention to XML DOM interfaces, XSLT processing, + SAX and more. + + + Galos, Mike + Visual Studio 7: A Comprehensive Guide + Computer + 49.95 + 2001-04-16 + Microsoft Visual Studio 7 is explored in depth, + looking at how Visual Basic, Visual C++, C#, and ASP+ are + integrated into a comprehensive development + environment. + + \ No newline at end of file diff --git a/examples/local_models/Ollama/inputs/plain_html_example.txt b/examples/local_models/Ollama/inputs/plain_html_example.txt new file mode 100644 index 00000000..78f814ae --- /dev/null +++ b/examples/local_models/Ollama/inputs/plain_html_example.txt @@ -0,0 +1,105 @@ + +
+ + +
+
+
+ + +
+ \ No newline at end of file diff --git a/examples/local_models/Ollama/result.json b/examples/local_models/Ollama/result.json deleted file mode 100644 index 48b3752f..00000000 --- a/examples/local_models/Ollama/result.json +++ /dev/null @@ -1 +0,0 @@ -{"news": [{"title": "Rotary Pendulum RL", "description": "Open Source project aimed at controlling a real life rotary pendulum using RL algorithms"}, {"title": "DQN Implementation from scratch", "description": "Developed a Deep Q-Network algorithm to train a simple and double pendulum"}, {"title": "Multi Agents HAED", "description": "University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings."}, {"title": "Wireless ESC for Modular Drones", "description": "Modular drone architecture proposal and proof of concept. The project received maximum grade."}]} \ No newline at end of file diff --git a/examples/local_models/Ollama/results/result.csv b/examples/local_models/Ollama/results/result.csv deleted file mode 100644 index 97ef817e..00000000 --- a/examples/local_models/Ollama/results/result.csv +++ /dev/null @@ -1,2 +0,0 @@ -0,1,2,3 -"{'title': 'Rotary Pendulum RL', 'description': 'Open Source project aimed at controlling a real life rotary pendulum using RL algorithms'}","{'title': 'DQN Implementation from scratch', 'description': 'Developed a Deep Q-Network algorithm to train a simple and double pendulum'}","{'title': 'Multi Agents HAED', 'description': 'University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.'}","{'title': 'Wireless ESC for Modular Drones', 'description': 'Modular drone architecture proposal and proof of concept. The project received maximum grade.'}" diff --git a/examples/local_models/Ollama/results/result.json b/examples/local_models/Ollama/results/result.json deleted file mode 100644 index 8a4e7057..00000000 --- a/examples/local_models/Ollama/results/result.json +++ /dev/null @@ -1 +0,0 @@ -{"projects": [{"title": "Rotary Pendulum RL", "description": "Open Source project aimed at controlling a real life rotary pendulum using RL algorithms"}, {"title": "DQN Implementation from scratch", "description": "Developed a Deep Q-Network algorithm to train a simple and double pendulum"}, {"title": "Multi Agents HAED", "description": "University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings."}, {"title": "Wireless ESC for Modular Drones", "description": "Modular drone architecture proposal and proof of concept. The project received maximum grade."}]} \ No newline at end of file diff --git a/examples/mixed_models/results/result.csv b/examples/mixed_models/results/result.csv deleted file mode 100644 index 97ef817e..00000000 --- a/examples/mixed_models/results/result.csv +++ /dev/null @@ -1,2 +0,0 @@ -0,1,2,3 -"{'title': 'Rotary Pendulum RL', 'description': 'Open Source project aimed at controlling a real life rotary pendulum using RL algorithms'}","{'title': 'DQN Implementation from scratch', 'description': 'Developed a Deep Q-Network algorithm to train a simple and double pendulum'}","{'title': 'Multi Agents HAED', 'description': 'University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.'}","{'title': 'Wireless ESC for Modular Drones', 'description': 'Modular drone architecture proposal and proof of concept. The project received maximum grade.'}" diff --git a/examples/mixed_models/results/result.json b/examples/mixed_models/results/result.json deleted file mode 100644 index 8a4e7057..00000000 --- a/examples/mixed_models/results/result.json +++ /dev/null @@ -1 +0,0 @@ -{"projects": [{"title": "Rotary Pendulum RL", "description": "Open Source project aimed at controlling a real life rotary pendulum using RL algorithms"}, {"title": "DQN Implementation from scratch", "description": "Developed a Deep Q-Network algorithm to train a simple and double pendulum"}, {"title": "Multi Agents HAED", "description": "University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings."}, {"title": "Wireless ESC for Modular Drones", "description": "Modular drone architecture proposal and proof of concept. The project received maximum grade."}]} \ No newline at end of file diff --git a/examples/openai/inputs/books.xml b/examples/openai/inputs/books.xml new file mode 100644 index 00000000..e3d1fe87 --- /dev/null +++ b/examples/openai/inputs/books.xml @@ -0,0 +1,120 @@ + + + + Gambardella, Matthew + XML Developer's Guide + Computer + 44.95 + 2000-10-01 + An in-depth look at creating applications + with XML. + + + Ralls, Kim + Midnight Rain + Fantasy + 5.95 + 2000-12-16 + A former architect battles corporate zombies, + an evil sorceress, and her own childhood to become queen + of the world. + + + Corets, Eva + Maeve Ascendant + Fantasy + 5.95 + 2000-11-17 + After the collapse of a nanotechnology + society in England, the young survivors lay the + foundation for a new society. + + + Corets, Eva + Oberon's Legacy + Fantasy + 5.95 + 2001-03-10 + In post-apocalypse England, the mysterious + agent known only as Oberon helps to create a new life + for the inhabitants of London. Sequel to Maeve + Ascendant. + + + Corets, Eva + The Sundered Grail + Fantasy + 5.95 + 2001-09-10 + The two daughters of Maeve, half-sisters, + battle one another for control of England. Sequel to + Oberon's Legacy. + + + Randall, Cynthia + Lover Birds + Romance + 4.95 + 2000-09-02 + When Carla meets Paul at an ornithology + conference, tempers fly as feathers get ruffled. + + + Thurman, Paula + Splish Splash + Romance + 4.95 + 2000-11-02 + A deep sea diver finds true love twenty + thousand leagues beneath the sea. + + + Knorr, Stefan + Creepy Crawlies + Horror + 4.95 + 2000-12-06 + An anthology of horror stories about roaches, + centipedes, scorpions and other insects. + + + Kress, Peter + Paradox Lost + Science Fiction + 6.95 + 2000-11-02 + After an inadvertant trip through a Heisenberg + Uncertainty Device, James Salway discovers the problems + of being quantum. + + + O'Brien, Tim + Microsoft .NET: The Programming Bible + Computer + 36.95 + 2000-12-09 + Microsoft's .NET initiative is explored in + detail in this deep programmer's reference. + + + O'Brien, Tim + MSXML3: A Comprehensive Guide + Computer + 36.95 + 2000-12-01 + The Microsoft MSXML3 parser is covered in + detail, with attention to XML DOM interfaces, XSLT processing, + SAX and more. + + + Galos, Mike + Visual Studio 7: A Comprehensive Guide + Computer + 49.95 + 2001-04-16 + Microsoft Visual Studio 7 is explored in depth, + looking at how Visual Basic, Visual C++, C#, and ASP+ are + integrated into a comprehensive development + environment. + + \ No newline at end of file diff --git a/examples/openai/inputs/plain_html_example.txt b/examples/openai/inputs/plain_html_example.txt new file mode 100644 index 00000000..78f814ae --- /dev/null +++ b/examples/openai/inputs/plain_html_example.txt @@ -0,0 +1,105 @@ + +
+ + +
+
+
+ + +
+ \ No newline at end of file diff --git a/examples/openai/results/result.csv b/examples/openai/results/result.csv deleted file mode 100644 index 97ef817e..00000000 --- a/examples/openai/results/result.csv +++ /dev/null @@ -1,2 +0,0 @@ -0,1,2,3 -"{'title': 'Rotary Pendulum RL', 'description': 'Open Source project aimed at controlling a real life rotary pendulum using RL algorithms'}","{'title': 'DQN Implementation from scratch', 'description': 'Developed a Deep Q-Network algorithm to train a simple and double pendulum'}","{'title': 'Multi Agents HAED', 'description': 'University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.'}","{'title': 'Wireless ESC for Modular Drones', 'description': 'Modular drone architecture proposal and proof of concept. The project received maximum grade.'}" diff --git a/examples/openai/results/result.json b/examples/openai/results/result.json deleted file mode 100644 index 8a4e7057..00000000 --- a/examples/openai/results/result.json +++ /dev/null @@ -1 +0,0 @@ -{"projects": [{"title": "Rotary Pendulum RL", "description": "Open Source project aimed at controlling a real life rotary pendulum using RL algorithms"}, {"title": "DQN Implementation from scratch", "description": "Developed a Deep Q-Network algorithm to train a simple and double pendulum"}, {"title": "Multi Agents HAED", "description": "University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings."}, {"title": "Wireless ESC for Modular Drones", "description": "Modular drone architecture proposal and proof of concept. The project received maximum grade."}]} \ No newline at end of file From 3640434f5c6117299e2aa297f43755e8a73cebc4 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Fri, 12 Apr 2024 12:59:15 +0200 Subject: [PATCH 5/5] add utils --- .../Ollama/smart_scraper_ollama.py | 9 +++- scrapegraphai/utils/__init__.py | 1 + scrapegraphai/utils/prettify_exec_info.py | 48 +++++++++++++++++++ 3 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 scrapegraphai/utils/prettify_exec_info.py diff --git a/examples/local_models/Ollama/smart_scraper_ollama.py b/examples/local_models/Ollama/smart_scraper_ollama.py index f8da3cfe..d710b986 100644 --- a/examples/local_models/Ollama/smart_scraper_ollama.py +++ b/examples/local_models/Ollama/smart_scraper_ollama.py @@ -2,7 +2,7 @@ Basic example of scraping pipeline using SmartScraper """ from scrapegraphai.graphs import SmartScraperGraph - +from scrapegraphai.utils import prettify_exec_info # ************************************************ # Define the configuration for the graph # ************************************************ @@ -35,3 +35,10 @@ result = smart_scraper_graph.run() print(result) + +# ************************************************ +# Get graph execution info +# ************************************************ + +graph_exec_info = smart_scraper_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/scrapegraphai/utils/__init__.py b/scrapegraphai/utils/__init__.py index e0bf7d52..2ea30cf1 100644 --- a/scrapegraphai/utils/__init__.py +++ b/scrapegraphai/utils/__init__.py @@ -4,3 +4,4 @@ from .save_audio_from_bytes import save_audio_from_bytes from .convert_to_csv import convert_to_csv from .convert_to_json import convert_to_json +from .prettify_exec_info import prettify_exec_info diff --git a/scrapegraphai/utils/prettify_exec_info.py b/scrapegraphai/utils/prettify_exec_info.py new file mode 100644 index 00000000..7023d6df --- /dev/null +++ b/scrapegraphai/utils/prettify_exec_info.py @@ -0,0 +1,48 @@ +""" +Prettify the execution information of the graph. +""" + +import pandas as pd + + +def prettify_exec_info(complete_result: dict) -> pd.DataFrame: + """ + Transform the execution information of the graph into a DataFrame for better visualization. + + Args: + - complete_result (dict): The complete execution information of the graph. + + Returns: + - pd.DataFrame: The execution information of the graph in a DataFrame. + """ + + nodes_info = complete_result['nodes_info'] + total_info = { + 'total_exec_time': complete_result['total_exec_time'], + 'total_model_info': complete_result['total_model_info'] + } + + # Convert node-specific information to DataFrame + flat_data = [] + for node_name, node_info in nodes_info.items(): + flat_data.append({ + 'Node': node_name, + 'Execution Time': node_info['exec_time'], + # Unpack the model_info dict into the row + **node_info['model_info'] + }) + + df_nodes = pd.DataFrame(flat_data) + + # Add a row for the total execution time and total model info + total_row = { + 'Node': 'Total', + 'Execution Time': total_info['total_exec_time'], + # Unpack the total_model_info dict into the row + **total_info['total_model_info'] + } + df_total = pd.DataFrame([total_row]) + + # Combine the nodes DataFrame with the total info DataFrame + df_combined_with_total = pd.concat([df_nodes, df_total], ignore_index=True) + return df_combined_with_total