-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
225 lines (199 loc) · 8.16 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
from urllib.request import urlretrieve
from urllib.request import urlopen
from mutagen.id3 import ID3, APIC, TIT2, TALB, TRCK, TPE1
import sys
import re
import os
def main(reply):
def f7(seq):
seen = set()
seen_add = seen.add
return [x for x in seq if x not in seen and not seen_add(x)]
def parse_username(user):
url = "http://" + user + ".bandcamp.com"
try:
sock = urlretrieve(url)
except:
print("[!] That's not a valid username,\n\
perhaps try with a full url instead")
sys.exit()
ch = open(sock[0], encoding="ISO-8859-1").read()
#Regex to find the artists/bands name, if it has one
RE = re.compile('<span class="title">(.*?)</span>')
new_user = None
try:
new_user = RE.findall(ch)
for i in new_user:
if i != '':
user = i
except:
"""If it doesn't find a span with class="title",
we know that it's not a valid username, because
then it's not a valid bandcamp page"""
print("[!] That's not a valid username")
sys.exit()
#Regex to find the different albums
RE = re.compile('/album/(.*?)"')
albums = RE.findall(ch)
#Remove duplicates
albums = f7(albums)
for album in albums:
print("Downloading Album: " + album)
print("="*42)
album_url = url + "/album/" + album
#Read in the entire source of the pageon
sock = urlretrieve(album_url)
#Read the source
ch = open(sock[0], encoding="ISO-8859-1").read()
#Regex the title of the album
RE = re.compile('album_title : "(.*?)",')
album_name = RE.findall(ch)[0]
#Regex the name of the artist
RE = re.compile('artist : "(.*?)",')
artist = RE.findall(ch)[0]
#Regex to find the url to the album art
RE = re.compile('<link rel="shortcut icon" href="(.*?)">')
img_url = RE.findall(ch)[0]
try:
#Tries to download a temporary copy of the cover
response = urlretrieve(img_url)[0]
#Reads the data and saves it to img
img = open(response, 'rb').read()
except:
#There might be cases where there is no cover art
print("[!] No cover art found, sorry")
img = False
# Regex to find the parth of the page where
# we find all the mp3's and titles
RE = re.compile('trackinfo :(.*?)\}\],')
# And saves this as it's own variable, this will keep
# any stray duplicates out
matches = RE.findall(ch)[0]
#Regex to find all the different titles
RE = re.compile('"title":"(.*?)","')
titles = RE.findall(matches)
# titles = f7(titles) # Remove stray duplicates, if any
#Regex all the different songs
RE = re.compile('"file":{"mp3-128":"(.*?)"}')
songs = RE.findall(matches)
directory = os.path.join(user + "/" + artist + "-" + album_name)
if not os.path.exists(directory):
os.makedirs(directory)
for i in range(len(songs)):
title = titles[i].replace('"', '')
track = str(i + 1)
download(songs[i], title, album_name,
artist, track, img, user=user)
def simple_parse(url):
# if not url[:7] == "http://": # Failsafe, fails on https
# url = "http://" + url
try:
sock = urlretrieve(url)
except:
print("[!] That's not a valid url")
print("Example: http://derp.bandcamp.com")
sys.exit()
#Reads in the entire source of the page
ch = open(sock[0], encoding="ISO-8859-1").read()
#Regex for album name
RE = re.compile('album_title : "(.*?)",')
try:
#Get's the name of the album
album_name = RE.findall(ch)[0]
except:
#If it does not find an album name, there is no page
print("[!] That's not a valid username")
sys.exit()
#Regex to find the url to the album art
RE = re.compile('<link rel="shortcut icon" href="(.*?)">') # Cover Art
img_url = RE.findall(ch)[0]
try:
#Tries to download a temporary copy of the cover
response = urlretrieve(img_url)[0]
#Reads the data and saves it to img
img = open(response, 'rb').read()
except:
#There might be cases where there is no cover art
print("[!] No cover art found, sorry")
img = False
#Regex to find the name of the artist
RE = re.compile('artist : "(.*?)",')
artist = RE.findall(ch)[0]
# Regex to find the parth of the page where
# we find all the mp3's and titles
RE = re.compile('trackinfo :(.*?)\}\],')
# Saves this as it's own variable, this will keep
# any stray duplicates out
matches = RE.findall(ch)[0]
#Regex all the titles that are stored in matches
RE = re.compile('"title":"(.*?)",')
titles = RE.findall(matches)
#titles = f7(titles) # Remove any stray duplicates, if any
RE = re.compile('"file":{"mp3-128":"(.*?)"}')
songs = RE.findall(matches)
#Make a folder named "Artist name"-"Album name"
directory = os.path.join(artist + "-" + album_name)
if not os.path.exists(directory):
os.makedirs(directory)
for i in range(len(songs)):
title = titles[i].replace('"', '').replace('\\', '')
track = str(i + 1)
download(songs[i], title, album_name, artist, track, img)
def download(download_url, title, album, artist, track, img, user=False):
if user:
file_name = "{}/{}-{}/{}.mp3".format(user, artist, album, title)
else:
file_name = "{}-{}/{}.mp3".format(artist, album, title)
u = urlopen(download_url)
try:
f = open(file_name, 'a+')
except:
print("Can't open file")
pass
finally:
f = open(file_name, 'wb')
meta = u.info()
file_size = int(meta["Content-Length"])
print("Downloading: {} (Bytes: {})".format(title, file_size))
file_size_dl = 0
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
f.write(buffer)
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl *
100. / file_size)
status = status + chr(8)*(len(status)+1)
sys.stdout.write("\r" + status)
sys.stdout.flush()
f.close()
tags = ID3()
tags["TIT2"] = TIT2(encoding=3, text=title)
tags["TALB"] = TALB(encoding=3, text=album)
tags["TPE1"] = TPE1(encoding=3, text=artist)
tags["TRCK"] = TRCK(encoding=3, text=track)
tags["APIC"] = APIC(encoding=3, mime='image/jpeg', type=3, data=img)
# I still haven't figured out a good way to find a suitable genre
# tags["TCON"] = TCON(encoding=3, text=u'mutagen Genre')
if img: # If there is cover art
tags["APIC"] = APIC(encoding=3, mime='image/jpeg',
type=3, data=img)
tags.save(file_name)
print("Hold on, getting the relevant info...")
if "bandcamp" in reply.split("."):
simple_parse(reply)
else:
parse_username(reply)
if __name__ == '__main__':
os.system('clc' if os.name == "nt" else 'clear') # Clear the terminal
print("You have two options")
print("Enter the full url to the album or track you wish to download")
print("Or")
print('Enter the username of the artist, and \n\
I will try to download everything the artist has made available')
print("-"*42)
print('A full url: http://"username".bandcamp.com')
reply = input("[?] You must choose, but choose wisely: ")
main(reply)