-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreddit.py
26 lines (24 loc) · 820 Bytes
/
reddit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import requests
import bs4
def get_reddit_links(link, pages=5):
current_page = 1
links = []
while current_page <= pages:
if current_page == 1:
res = requests.get(link + '/nsfw.html')
else:
res = requests.get(link + '/nsfw' + str(current_page) + '.html')
res.raise_for_status()
soup = bs4.BeautifulSoup(res.text, 'html.parser')
# Find all the links under the span tag of `subreddit-url`
l = soup.select('span.subreddit-url a')
for i in l:
links.append(i['href'])
# Get the next page
current_page += 1
return links
if __name__ == '__main__':
l = get_reddit_links('https://redditlist.com')
with open('data/nsfw/nsfw_sites.txt', 'a') as f:
for i in l:
f.write(i + '\n')