-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
143 lines (127 loc) · 9.07 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import yaml
import argparse
import os
import gradio as gr
from func import predict, re_generate, remove_last_turn, like, select_model_change
# Argument parser setup
parser = argparse.ArgumentParser(description='Chatbot Interface with Customizable Parameters')
parser.add_argument('--stop-token-ids', type=str, default='', help='Comma-separated stop token IDs')
parser.add_argument("--host", type=str, default='0.0.0.0')
parser.add_argument("--port", type=int, default=8101)
parser.add_argument("--default_model", type=str, default="deepseek-r1-distill-qwen-32b")
parser.add_argument("--default_model_url", type=str, default="http://100.107.154.21:12000/v1")
parser.add_argument("--default_api_key", type=str, default="EMPTY")
# Parse the arguments
args = parser.parse_args()
def load_models_from_yaml(path):
if os.path.isfile(path) and path.endswith('.yaml'):
with open(path, 'r') as f:
models = yaml.safe_load(f)
# print("models:", models)
supported_models = {}
for model in models['models']:
for key, value in model.items():
# print("key:", key, "value:", value)
supported_models[key] = value
# print("supported_models:", supported_models)
return supported_models
elif os.path.isdir(path):
supported_models = {}
for file in os.listdir(path):
if file.endswith('.yaml'):
with open(os.path.join(path, file), 'r') as f:
models = yaml.safe_load(f)
for model in models['models']:
for key, value in model.items():
supported_models[key] = value
return supported_models
else:
raise ValueError(f"Invalid path: {path}")
multimodaltextbox = gr.MultimodalTextbox(label="Submit your message here")
with gr.Blocks(fill_height=True, theme=gr.themes.Ocean()) as demo:
with gr.Row():
with gr.Column(scale=3):
gr.Markdown("## Minecraft Chatbot@CraftJarvis")
supported_models = load_models_from_yaml('configs/api')
# print("supported_models:", supported_models)
model_choices = [(key, value) for key, value in supported_models.items()]
# print("model_choices:", model_choices)
select_model = gr.Dropdown(choices=model_choices, value="custom", interactive=True, label="Select Model", allow_custom_value=True)
model_name = gr.Textbox(value=args.default_model, label="Model name")
model_url = gr.Textbox(value=args.default_model_url, label="Model URL")
api_key = gr.Textbox(value=args.default_api_key, label="API Key", type="password")
select_model.change(select_model_change, inputs=[select_model], outputs=[model_name, model_url])
with gr.Accordion("Parameters", open=True) as parameter_row:
temp = gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.1, interactive=True, label="Temperature",)
max_output_tokens = gr.Slider(minimum=0, maximum=8196, value=2048, step=128, interactive=True, label="Max output tokens",)
# repetition_penalty = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, step=0.1, interactive=True, label="Repetition penalty (future)",)
# gradio checkbox for stream mode or not
stream = gr.Checkbox(label="Streaming", value = True)
system_prompt = gr.Textbox(value="You are a helpful assistant that can answer questions and help with tasks in Minecraft.", label="System Prompt (support in the future)", lines = 4)
with gr.Column(scale=8):
chatbot = gr.Chatbot(
height=650,
type="messages",
show_copy_button=True,
# additional_inputs=[model, model_url, api_key, temp, max_output_tokens, stream],
# fill_height=True
)
multimodaltextbox.render()
# gr.ChatInterface(predict, type="messages", chatbot=chatbot, textbox=multimodaltextbox, multimodal=True, additional_inputs=[model, model_url, api_key, temp, max_output_tokens, stream], fill_height=True)
multimodaltextbox.submit(predict, [multimodaltextbox, chatbot, model_name, model_url, api_key, temp, max_output_tokens, stream], [multimodaltextbox, chatbot])
chatbot.like(like)
# add three buttons in one row
with gr.Row():
# add a button to submit the chat
# submit = gr.Button(value = "🚀 Submit")
# submit.click(predict, [multimodaltextbox, chatbot, model, model_url, api_key, temp, max_output_tokens, stream], [multimodaltextbox, chatbot])
# add a button to re-generate the response
regenerate = gr.Button(value = "🔄 Regenerate")
regenerate.click(re_generate, inputs=[multimodaltextbox, chatbot, model_name, model_url, api_key, temp, max_output_tokens, stream], outputs=[multimodaltextbox, chatbot])
# # add a button to download the conversation
# download = gr.Button(value = "📥 Download")
# add a button to remove the last run
remove_last = gr.Button(value = "🧺 Remove Last Turn")
remove_last.click(remove_last_turn, inputs=[chatbot], outputs=[chatbot])
# add a clear button to clear the chatbot
clear = gr.ClearButton([multimodaltextbox, chatbot], value="🗑 Clear History", )
clear.click(inputs=[multimodaltextbox, chatbot], outputs=[multimodaltextbox, chatbot])
with gr.Row():
with gr.Accordion("Examples", open=False) as conv_examples:
gr.Examples(examples=[
{"files":[], "text": "Can diamond be mined with a stone pickaxe in Minecraft?"},
{"files":[], "text": "Give you nothing in the inventory, generate a step-by-step plan to obtain diamonds."},
{"files":[], "text": "What is the recipe for the enchanting table in Minecraft?"},
{"files":[], "text": "You are a Minecraft expert. You can finish tasks by strict and correct reasoning. Now you face a task: obtain a diamond pickaxe in Minecraft. Reasoning what should you do first when you have nothing in the inventory."}
], inputs=multimodaltextbox, label="Chat")
gr.Examples(examples=[
{"files":["data/images/007-dark_forest.png"], "text": "What are the red structures visible in the background?"},
{"files":["data/images/030-villager.png"], "text": "What is the role of the villager seen in the image?"},
{"files":["data/images/038-inventory.png"], "text": "What type of armor is the player wearing?"},
], inputs=multimodaltextbox, label="Visual Question Answering")
gr.Examples(examples=[
{"files":["data/images/004-forest.png"], "text": "Caption the image and answer what biome is this."},
{"files":["data/images/042-ender_dragen.png"], "text": "Caption this image in details."},
{"files":["data/images/033-enderman.png"], "text": "Caption this image."},
], inputs=multimodaltextbox, label="Visual Captioning")
gr.Examples(examples=[
{"files":["data/images/005-diamond_ore.png"], "text": "Pinpoint the diamond ore."},
{"files":["data/images/009-pig.png"], "text": "Pinpoint the pig."},
{"files":["data/images/035-cow.png"], "text": "Pinpoint the cow."},
{"files":["data/images/045-crafting.png"], "text": "Pinpoint the oak_planks."},
{"files":["data/images/023-chicken.png"], "text": "Pinpoint the chicken."},
{"files":["data/images/014-inventory.png"], "text": "Pinpoint the oak_log."},
], inputs=multimodaltextbox, label="Visual Point Grounding")
gr.Examples(examples=[
{"files":["data/images/022-zombie.png"], "text": "Please provide the bounding box coordinate of the region this sentence describes: zombie."},
{"files":["data/images/037-cow.png"], "text": "Please provide the bounding box coordinate of the region this sentence describes: cow."},
{"files":["data/images/031-sheep.png"], "text": "Output the bounding box of the sheep in the image."},
{"files":["data/images/039-villager.png"], "text": "Output the bounding box of the villager in the image."},
], inputs=multimodaltextbox, label="Visual Box Grounding")
gr.Examples(examples=[
{"files":["data/images/020-ender_dragen.png"], "text": "Generate the caption with grounding:"},
{"files":["data/images/028-savannah_biome.png"], "text": "Generate the caption with grounding:"},
{"files":["data/images/043-layout.png"], "text": "Generate the caption with grounding:"},
{"files":["data/images/052-inventory.jpg"], "text": "Generate the caption with grounding:"},
], inputs=multimodaltextbox, label="Visual Captioning and Box Grounding")
demo.queue().launch(server_name=args.host, server_port=args.port, share=True)