From 6104c801221296ceec113f080ccbe66fb89f3157 Mon Sep 17 00:00:00 2001
From: SilentJMA <kilwanitro@gmail.com>
Date: Fri, 22 Sep 2023 12:21:31 +0200
Subject: [PATCH 1/2] Added error handling, combined the url and file name
 pairs, used a list comprehension to simplify the creation, removed the
 duplicate import of the requests module, adjust readibality

---
 wiki_toc.py | 39 +++++++++++++++------------------------
 1 file changed, 15 insertions(+), 24 deletions(-)

diff --git a/wiki_toc.py b/wiki_toc.py
index 6df7b35..89e65bf 100644
--- a/wiki_toc.py
+++ b/wiki_toc.py
@@ -1,45 +1,36 @@
 import csv
 import requests
 from bs4 import BeautifulSoup
-import requests
-
 
 def get_data(url):
     response = requests.get(url)
+    response.raise_for_status()  # Add error handling for request
     soup = BeautifulSoup(response.text, 'lxml')
-    table_of_contents = soup.find("div", id="toc")
-    headings = table_of_contents.find_all("li")
-    data = []
-    for heading in headings:
-        heading_text = heading.find("span", class_="toctext").text
-        heading_number = heading.find("span", class_="tocnumber").text
-        data.append({
-            'heading_number': heading_number,
-            'heading_text': heading_text,
-        })
+    headings = soup.find("div", id="toc").find_all("li")
+    
+    data = [{'heading_number': heading.find("span", class_="tocnumber").text,
+             'heading_text': heading.find("span", class_="toctext").text}
+            for heading in headings]
+    
     return data
 
-
 def export_data(data, file_name):
     with open(file_name, "w", newline="") as file:
         writer = csv.DictWriter(file, fieldnames=['heading_number', 'heading_text'])
         writer.writeheader()
         writer.writerows(data)
 
-
 def main():
-    url_to_parse = "https://en.wikipedia.org/wiki/Python_(programming_language)"
-    file_name = "python_toc.csv"
-    data = get_data(url_to_parse)
-    export_data(data, file_name)
-
-    url_to_parse = "https://en.wikipedia.org/wiki/Web_scraping"
-    file_name = "web_scraping_toc.csv"
-    data = get_data(url_to_parse)
-    export_data(data, file_name)
+    urls = [
+        ("https://en.wikipedia.org/wiki/Python_(programming_language)", "python_toc.csv"),
+        ("https://en.wikipedia.org/wiki/Web_scraping", "web_scraping_toc.csv")
+    ]
+    
+    for url, file_name in urls:
+        data = get_data(url)
+        export_data(data, file_name)
 
     print('Done')
 
-
 if __name__ == '__main__':
     main()

From dd524945f4f3ba0bf946567d57a7e85b2b065978 Mon Sep 17 00:00:00 2001
From: SilentJMA <kilwanitro@gmail.com>
Date: Fri, 22 Sep 2023 12:32:04 +0200
Subject: [PATCH 2/2] Added error handling, combined the url and file name
 pairs, used a list comprehension to simplify the creation, removed the
 duplicate import of the requests module, adjust readibality

---
 wiki_toc.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/wiki_toc.py b/wiki_toc.py
index 89e65bf..f566353 100644
--- a/wiki_toc.py
+++ b/wiki_toc.py
@@ -4,13 +4,24 @@
 
 def get_data(url):
     response = requests.get(url)
-    response.raise_for_status()  # Add error handling for request
-    soup = BeautifulSoup(response.text, 'lxml')
-    headings = soup.find("div", id="toc").find_all("li")
+    response.raise_for_status()
+    soup = BeautifulSoup(response.text, 'html.parser')
     
-    data = [{'heading_number': heading.find("span", class_="tocnumber").text,
-             'heading_text': heading.find("span", class_="toctext").text}
-            for heading in headings]
+    data = []
+    
+    toc = soup.find("div", {"id": "toc"})
+    
+    if toc:
+        headings = toc.find_all("li")
+        for heading in headings:
+            heading_number = heading.find("span", {"class": "tocnumber"})
+            heading_text = heading.find("span", {"class": "toctext"})
+            
+            if heading_number and heading_text:
+                data.append({
+                    'heading_number': heading_number.text.strip(),
+                    'heading_text': heading_text.text.strip(),
+                })
     
     return data