-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathn11_scraper.py
74 lines (59 loc) · 3.24 KB
/
n11_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import asyncio
import aiohttp
from bs4 import BeautifulSoup
class N11Scraper:
def __init__(self, urls):
self.urls = urls
self.data = []
async def fetch_data(self, session, url):
async with session.get(url) as response:
if response.status == 200:
return await response.text()
else:
return None
async def scrape(self):
async with aiohttp.ClientSession() as session:
tasks = [self.fetch_data(session, url) for url in self.urls]
results = await asyncio.gather(*tasks)
for html_content in results:
soup = BeautifulSoup(html_content, 'html.parser')
price = self.extract_price(soup)
seller = self.extract_seller(soup)
product_name = self.extract_product_name(soup)
# price_without_discount = self.extract_price_without_discount(soup)
review_count = self.extract_review_count(soup)
rating_count = self.extract_rating_count(soup)
main_image_url = self.extract_main_image_url(soup)
all_images = self.extract_all_images(soup)
self.data.append({'product_name': product_name,
'price': price,
'seller': seller,
'review_count': review_count,
'rating_count': rating_count,
'main_image_url': main_image_url,
'all_images': all_images
})
def extract_price(self, soup):
price_tag = soup.find(class_='unf-p-summary-price')
return price_tag.text.strip() if price_tag else 'Price not found'
# def extract_price_without_discount(self, soup):
# old_price_tag = soup.find('div', id='unf-p-id').find('div', class_='priceContainer').find('del',class_='oldPrice')
# return old_price_tag.text.strip() if old_price_tag else 'Discounted Price not found'
def extract_product_name(self, soup):
product_name_tag = soup.find(class_='proName')
return product_name_tag.text.strip() if product_name_tag else 'Product name not found'
def extract_seller(self, soup):
seller_tag = soup.find(class_='unf-p-seller-name')
return seller_tag.text.strip() if seller_tag else 'Seller not found'
def extract_review_count(self, soup):
review_count_tag = soup.find(class_='reviewNum')
return review_count_tag.text.strip() if review_count_tag else 'Review count not found'
def extract_rating_count(self, soup):
rating_count_tag = soup.find(class_='ratingScore') # Doğru sınıf adını kontrol ediniz
return rating_count_tag.text.strip() if rating_count_tag else 'Rating count not found'
def extract_main_image_url(self, soup):
main_image_url_tag = soup.find('img', class_='unf-p-img')
return main_image_url_tag['data-original'] if main_image_url_tag else 'Main Image Url not found'
def extract_all_images(self, soup):
return [image_tag.get('data-full') for image_tag in soup.find_all('div', class_='unf-p-thumbs-item') if
image_tag.get('data-full')]