-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearch.py
44 lines (40 loc) · 1.41 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import sys
import requests
from bs4 import BeautifulSoup
# Get all files in an url
def recurse_search(url):
sys.stdout.write("\rSearching: {}".format(url))
files = []
r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")
for tr in soup.find_all('tr'):
is_file = False
is_dir = False
link = ''
link_text = ''
size_str = ''
no_size = False
has_slash = False
for td in tr.find_all('td'):
if td.get('class') == ['size'] and ('KiB' in td.text or 'MiB' in td.text):
is_file = True
size_str = td.text
break
elif td.get('class') == ['size'] and td.text == '-':
no_size = True
elif td.get('class') == ['link'] and td.text.endswith('/'):
has_slash = True
is_dir = no_size and has_slash
for td in tr.find_all('td'):
if td.get('class') == ['link']:
if td.find('a').get('href'):
link = td.find('a').get('href')
link_text = td.find('a').text
if link and link_text:
if is_file:
assert size_str
files.append((url + link, size_str))
elif is_dir:
if '..' not in link and 'parent' not in link_text.lower():
files.extend(recurse_search(url + link))
return files