-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdeepseeked.py
129 lines (105 loc) · 4.63 KB
/
deepseeked.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from flask import Flask, request, Response
from flask_cors import CORS
import requests
import json
app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*", "methods": ["GET", "POST", "OPTIONS"], "allow_headers": "*"}})
# Configuration
OLLAMA_SERVER = "http://localhost:11434" # Default OLLAMA_HOST 11434
PROXY_PORT = 11435 # forwarded new ENDPOINT
def process_thinking_content(message_content, thinking_started):
"""Process content based on thinking tags state"""
if not message_content:
return "", thinking_started
# Handle opening tag
if '<think>' in message_content:
thinking_started = True
# If there's content before <think>, keep it
content_before = message_content.split('<think>')[0]
return content_before, thinking_started
# Handle closing tag
if '</think>' in message_content:
thinking_started = False
# If there's content after </think>, keep it
content_after = message_content.split('</think>')[-1]
return content_after, thinking_started
# If we're in thinking mode, return empty
if thinking_started:
return "", thinking_started
return message_content, thinking_started
def is_empty_content(message_content):
"""Check if the content is empty or just whitespace/newlines"""
if not message_content:
return True
return not message_content.strip()
@app.route('/api/<path:path>', methods=['POST'])
def proxy_api(path):
if path not in ['generate', 'chat']:
return Response('Not Found', status=404)
response = requests.post(
f"{OLLAMA_SERVER}/api/{path}",
json=request.json,
stream=True
)
def generate():
thinking_started = False
for chunk in response.iter_lines():
if chunk:
try:
# Parse the JSON response
data = json.loads(chunk.decode('utf-8'))
# Check if this is a message with content
if 'message' in data and 'content' in data['message']:
content = data['message']['content']
# Process thinking tags and get cleaned content
cleaned_content, thinking_started = process_thinking_content(
content, thinking_started
)
# Skip if content is empty after processing
if is_empty_content(cleaned_content):
continue
# Clean up excessive newlines
#cleaned_content = '\n'.join(filter(None, cleaned_content.split('\n')))
#if not cleaned_content:
# continue
# Update the content in the data
data['message']['content'] = cleaned_content
# Convert back to JSON and send
yield json.dumps(data).encode('utf-8') + b'\n'
else:
# Forward non-content messages (like 'done' messages)
yield chunk + b'\n'
except json.JSONDecodeError:
# If we can't parse the JSON, forward it anyway
yield chunk + b'\n'
return Response(
generate(),
mimetype='application/json',
headers={
'X-Accel-Buffering': 'no',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Type': 'application/json'
}
)
@app.route('/', defaults={'path': ''})
@app.route('/<path:path>', methods=['GET', 'OPTIONS'])
def catch_all(path):
if request.method == 'OPTIONS':
return Response('', 204)
resp = requests.request(
method=request.method,
url=f"{OLLAMA_SERVER}/{path}",
headers={key: value for key, value in request.headers if key != 'Host'},
data=request.get_data(),
cookies=request.cookies,
allow_redirects=False
)
excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
headers = [(name, value) for (name, value) in resp.raw.headers.items()
if name.lower() not in excluded_headers]
return Response(resp.content, resp.status_code, headers)
if __name__ == '__main__':
print(f"Starting proxy server on port {PROXY_PORT}")
print(f"Forwarding requests to {OLLAMA_SERVER}")
app.run(port=PROXY_PORT, debug=True)