Skip to content

Commit 04c6f5e

Browse files
authored
Immediately start processing the prompt before user input has been provided (#476)
1 parent 7a9b6c3 commit 04c6f5e

File tree

4 files changed

+13
-7
lines changed

4 files changed

+13
-7
lines changed

alpaca.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
# Temporary script - will be removed in the future
44
#
55

6-
./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins --top_k 10000 --temp 0.2 --repeat_penalty 1 -t 7
6+
./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins -b 256 --top_k 10000 --temp 0.2 --repeat_penalty 1 -t 7

chat.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
# Temporary script - will be removed in the future
44
#
55

6-
./main -m ./models/7B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt
6+
./main -m ./models/7B/ggml-model-q4_0.bin -b 128 -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt

examples/chatLLaMa

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ N_PREDICTS="${N_PREDICTS:-2048}"
1313

1414
# Note: you can also override the generation options by specifying them on the command line:
1515
# For example, override the context size by doing: ./chatLLaMa --ctx_size 1024
16-
GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --repeat_penalty 1.17647}"
16+
GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --batch_size 1024 --repeat_penalty 1.17647}"
1717

1818
# shellcheck disable=SC2086 # Intended splitting of GEN_OPTIONS
1919
./main $GEN_OPTIONS \

main.cpp

+10-4
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ int main(int argc, char ** argv) {
372372
n_past += embd.size();
373373
embd.clear();
374374

375-
if ((int) embd_inp.size() <= input_consumed) {
375+
if ((int) embd_inp.size() <= input_consumed && !is_interacting) {
376376
// out of user input, sample next token
377377
const float top_k = params.top_k;
378378
const float top_p = params.top_p;
@@ -451,13 +451,16 @@ int main(int argc, char ** argv) {
451451
}
452452

453453
// Check if each of the reverse prompts appears at the end of the output.
454-
for (std::string antiprompt : params.antiprompt) {
454+
for (std::string & antiprompt : params.antiprompt) {
455455
if (last_output.find(antiprompt.c_str(), last_output.length() - antiprompt.length(), antiprompt.length()) != std::string::npos) {
456456
is_interacting = true;
457+
set_console_state(CONSOLE_STATE_USER_INPUT);
458+
fflush(stdout);
457459
break;
458460
}
459461
}
460-
if (is_interacting) {
462+
463+
if (n_past > 0 && is_interacting) {
461464
// potentially set color to indicate we are taking user input
462465
set_console_state(CONSOLE_STATE_USER_INPUT);
463466

@@ -495,7 +498,10 @@ int main(int argc, char ** argv) {
495498

496499
input_noecho = true; // do not echo this again
497500
}
498-
is_interacting = false;
501+
502+
if (n_past > 0) {
503+
is_interacting = false;
504+
}
499505
}
500506

501507
// end of text token

0 commit comments

Comments
 (0)