-
Notifications
You must be signed in to change notification settings - Fork 0
/
http_file_server.py
313 lines (284 loc) · 12.1 KB
/
http_file_server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
# http文件服务器程序, 可用于在本地创建一个网站,基于socket库
# 使用方法:将本文件"http文件服务器.py"和html文件(如:index.html)放在同一个目录
# 然后运行"http文件服务器.py"即可
# 命令行:python http文件服务器.py <端口号(可选)>
import socket, sys, os, time, traceback, pprint
from concurrent.futures import ThreadPoolExecutor
from urllib.parse import parse_qs, unquote
import chardet,mimetypes
HEAD_100 = b"HTTP/1.1 100 Continue\n"
HEAD_OK = b"HTTP/1.1 200 OK\n"
HEAD_206 = b"HTTP/1.1 206 Partial Content\n"
HEAD_404 = b"HTTP/1.1 404 Not Found\n"
RECV_LENGTH = 16384 # sock.recv()一次接收内容的长度
CHUNK_SIZE = 1<<19 # 0.5MB
SEND_SPEED = 10 # 大文件的发送速度限制,单位为MB/s,设为非正数则不限速
def _read_file_helper(head,file,chunk_size,start,end): # 分段读取文件使用的生成器
yield head
file.seek(start)
total=0
while total<end-start:
size=min(chunk_size,end-start-total)
data=file.read(size)
total+=size
yield data
file.close()
def _slice_helper(data,size):
n=len(data)
for i in range(0,n,size):
yield data[i:i+size]
def convert_bytes(num): # 将整数转换为数据单位
units = ["", "K", "M", "G", "T", "P", "E", "Z", "Y"]
for unit in units:
if num < 1024:
return f"{num:.2f}{unit}B"
num /= 1024
return f"{num:.2f}{units[-1]}B"
def check_filetype(path): # 检查文件扩展名并返回content-type
mime_type=mimetypes.guess_type(path)[0]
if mime_type is None: # 未知类型
return b"" # 不返回类型,由浏览器自行检测
if mime_type.lower().startswith("text"):
with open(path,"rb") as f:
head=f.read(512) # 读取文件头部,并检测编码
detected=chardet.detect(head)
coding=detected["encoding"]
if coding=="ascii": # 如果未检测到多字节的编码,则尝试继续检测
data=f.read(3072)
if data:
detected=chardet.detect(data)
coding=detected["encoding"]
if coding is not None and detected["confidence"]>0.9:
mime_type+=";charset=%s"%coding
return b"Content-Type: %s\n"%mime_type.encode()
def parse_head(req_head): # 解析请求头中的路径和查询参数
path = unquote(req_head.split(' ')[1])[1:] # 获取请求url后面的路径, 在请求数据第一行
split = path.rsplit("#",1)
path = split[0]
fragment = split[1] if len(split)==2 else None
split = path.split("?",1)
dir = split[0]
query = parse_qs(split[1],keep_blank_values=True) if len(split)==2 else {}
if dir == "": # 路径为空,则用当前路径
dir="."
dir=dir.replace("\\","/")
if dir[-1]=="/": # 去除末尾多余的斜杠
dir=dir[:-1]
return dir,query,fragment
def get_dir_content(dir):
path = os.path.join(os.getcwd(),dir)
head = HEAD_OK
response = head + f"""
<html><head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<title>{path} 的目录</title>
</head><body>
<h1>{path}的目录</h1><p></p>""".encode()
# 获取当前路径下的各个文件、目录名
subdirs=[] # 子目录名
subfiles=[] # 子文件名
for sub in os.listdir(path):
# os.listdir()无法直接区分目录名和文件名, 因此还需进行判断
if os.path.isfile(os.path.join(path,sub)): # 如果子项是文件
subfiles.append(sub)
else: # 子项是目录
subdirs.append(sub)
subdirs.sort(key=lambda s:s.lower()) # 升序排序
subfiles.sort(key=lambda s:s.lower())
if dir != ".":
response += f'\n<a href="/{dir}/..">[上级目录]</a><p></p>'.encode()
# 依次显示各个子文件、目录
for sub in subdirs:
response += f'\n<a href="/{dir}/{sub}">[目录]{sub}</a><p></p>'.encode()
for sub in subfiles:
size=convert_bytes(os.path.getsize(os.path.join(path,sub)))
response += f'''\n<a href="/{dir}/{sub}">{sub}</a>\
<span style="color: #707070;"> {size}</span><p></p>'''.encode()
response += b"\n</body></html>"
return response
def get_file(path,start=None,end=None): # 返回文件的数据
size = os.path.getsize(path)
if start is not None or end is not None:
start = start or 0
end = end or size
head = HEAD_206 + check_filetype(path)
head += b"Content-Range: bytes %d-%d/%d\n\n" % (start,end,size)
else:
start = 0; end = size
head = HEAD_OK + check_filetype(path) # 加入content-type
# 响应头末尾以两个换行符(\n\n)结尾
head += b"Content-Length: %d\n\n" % size # 加入文件长度
return _read_file_helper(head,open(path,'rb'),CHUNK_SIZE,start,end) # 分段读取文件
def getcontent(dir,start=None,end=None): # 根据url的路径dir构造响应数据
# 将dir转换为系统路径, 放入path
path = os.path.join(os.getcwd(),dir)
try:
if ".." in dir.split("/"): # 禁止访问上层目录
raise OSError # 引发错误, 进入except语句
if os.path.isdir(path):
# 找出路径中名为index的文件,若有则直接读取
file=None
for f in os.listdir(path):
if f.split(".")[0].lower()=="index":
file = f
if f.split(".")[1].lower() in ("htm","html"): # 当有多个index文件时html文件优先
break
if file is not None:
path = os.path.join(path,file)
# 构造响应数据
if os.path.isfile(path): # --path是文件, 就打开文件并读取--
response = get_file(path,start,end)
elif os.path.isdir(path): # --path是路径, 就显示路径中的各个文件--
response = get_dir_content(dir)
else: # 不存在文件或目录
# 若.html的后缀名省略,自动寻找html文件
# 不过,例如要访问path,path/index.html要优先于path.html,用户可自行修改
for ext in (".htm",".html"):
file = path + ext
if os.path.isfile(file):
response = get_file(file,start,end)
break
else:
raise OSError # 当作错误处理, 进入except语句
except OSError:
# 返回404
response = HEAD_404 + f"""
<html><head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<title>404</title>
</head><body>
<h1>404 Not Found</h1>
<p>来自 Python 服务器测试</p>
<a href="/{dir}/..">返回上一级</a>
<a href="/">返回首页</a>
</body></html>
""".encode()
return response
def send_response(sock,response,address):
# 分段发送响应
if isinstance(response,bytes):
response = _slice_helper(response,CHUNK_SIZE)
total=0
chunk=next(response)
sock.send(chunk)
begin=time.perf_counter()
while True:
size=len(chunk)
total+=size
try:
chunk=next(response)
except StopIteration:
break
else:
if SEND_SPEED > 0:
seconds = (total/(1<<20))/SEND_SPEED - \
(time.perf_counter() - begin) # 预计时间 - 实际时间
if seconds > 0:
time.sleep(seconds) # 延迟发送,限制速度
sock.send(chunk)
if SEND_SPEED > 0 and total >= SEND_SPEED*(1<<20) \
or SEND_SPEED <= 0 and total >= 1<<27: # 如果预计发送时间超过1秒,或不限速时大于128MB
print(address,"较大响应 (%s) 发送完毕" % convert_bytes(total))
def handle_post(sock,req_head,req_info,content):
length = int(req_info.get('Content-Length',-1))
if not (content.startswith(b"------WebKitFormBoundary") and \
len(content)<=42): # 如果表单未以WebKitFormBoundary结束
if len(content)<length: # 第一次调用sock.recv接收的内容不完整,就尝试继续接收数据
while True:
new_data = sock.recv(RECV_LENGTH)
content += new_data
if not new_data or len(content)>=length:break
#content += sock.recv(length-len(content))
if length != -1:content = content[:length] # 截断过长的数据
if content.startswith(b"------WebKitFormBoundary"): # 处理上传文件等请求
if len(content)<=42: # 空的表单
content = b''
else:
split=content.splitlines()[1:-1]
content = b"\n".join(split) # 去除第一行和末尾的WebKitFormBoundary标识
print(address,"提交文件数据:",content)
else:
if len(content)<length: # post含有多个tcp数据包时
return HEAD_100 # 让客户端继续发送数据
else:
form=parse_qs(content.decode("utf-8"),
keep_blank_values=True,encoding="utf-8")
print(address,"提交数据:",form)
#dir=parse_head(req_head)[0]
return HEAD_OK + """
<html><head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<title>提交成功</title>
</head><body>
<h1>提交成功</h1>
<a href="javascript:void(0);"
onclick="window.history.back();">返回</a>
</body></html>
""".encode()
def get_request_info(data):
# 获取请求头部信息,首行存入req_head,其他信息存入字典req_info
lines = data.splitlines()
req_head = lines[0]
req_info = {}
for line in lines[1:]:
lst = line.split(':', 1)
try:
key, value = lst[0].strip(), lst[1].strip()
req_info[key] = value
except (ValueError, IndexError): # 不是请求头信息时
pass
return req_head,req_info
def handle_get(req_head,req_info):
url=unquote(req_head.split(' ')[1])
dir=parse_head(req_head)[0]
if "Range" in req_info: # 断点续传
range_=req_info["Range"].split("=",1)[1]
start,end=range_.split("-")
start = int(start) if start else None
end = int(end) if end else None
print(address,"访问URL: %s (从 %s 到 %s 断点续传)" % (url,
convert_bytes(start) if start is not None else None,
convert_bytes(end) if end is not None else "末尾"))
return getcontent(dir,start,end)
else:
print(address,"访问URL:",url)
return getcontent(dir) # 获取目录的数据
def handle_client(sock, address):# 处理客户端请求
raw = sock.recv(RECV_LENGTH)
data = raw.decode("utf-8")
if data=="":return # 忽略空数据
req_head,req_info=get_request_info(data)
#print("请求数据:", req_head);pprint.pprint(req_info)
# 获取响应数据,response可以为bytes类型,或一个生成器
if req_head.startswith("POST"): # POST请求
response=handle_post(sock,req_head,req_info,raw.splitlines()[-1])
else: # GET请求
response=handle_get(req_head,req_info)
try:send_response(sock,response,address) # 向客户端分段发送响应数据
except ConnectionError as err:
print(address,"连接异常 (%s): %s" % (type(err).__name__,str(err)))
sock.close() # 关闭客户端连接
def handle_client_thread(*args,**kw): # 仅用于多线程中产生异常时输出错误信息
try:handle_client(*args,**kw)
except Exception:
traceback.print_exc()
PORT=int(sys.argv[1]) if len(sys.argv)==2 else 80 # 80为HTTP的默认端口
if __name__ == "__main__":
host = socket.gethostname()
ips = socket.gethostbyname_ex(host)[2] # 或者socket.gethostbyname(host)
print("服务器的IP:",ips)
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind(("", PORT))
sock.listen(128) # 监听,参数为最大等待连接数
import webbrowser
webbrowser.open('http://127.0.0.1:%d/'%PORT)
# 单线程模式,一次处理一个客户端
#while True:
# client_sock, address = sock.accept()
# handle_client(client_sock, address)
# 多线程
with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
while True:
client_sock, address = sock.accept()
executor.submit(handle_client_thread, client_sock, address)
sock.close()