File tree 4 files changed +13
-7
lines changed
4 files changed +13
-7
lines changed Original file line number Diff line number Diff line change 3
3
# Temporary script - will be removed in the future
4
4
#
5
5
6
- ./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins --top_k 10000 --temp 0.2 --repeat_penalty 1 -t 7
6
+ ./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins -b 256 - -top_k 10000 --temp 0.2 --repeat_penalty 1 -t 7
Original file line number Diff line number Diff line change 3
3
# Temporary script - will be removed in the future
4
4
#
5
5
6
- ./main -m ./models/7B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r " User:" -f prompts/chat-with-bob.txt
6
+ ./main -m ./models/7B/ggml-model-q4_0.bin -b 128 - n 256 --repeat_penalty 1.0 --color -i -r " User:" -f prompts/chat-with-bob.txt
Original file line number Diff line number Diff line change @@ -13,7 +13,7 @@ N_PREDICTS="${N_PREDICTS:-2048}"
13
13
14
14
# Note: you can also override the generation options by specifying them on the command line:
15
15
# For example, override the context size by doing: ./chatLLaMa --ctx_size 1024
16
- GEN_OPTIONS=" ${GEN_OPTIONS:- --ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --repeat_penalty 1.17647} "
16
+ GEN_OPTIONS=" ${GEN_OPTIONS:- --ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --batch_size 1024 -- repeat_penalty 1.17647} "
17
17
18
18
# shellcheck disable=SC2086 # Intended splitting of GEN_OPTIONS
19
19
./main $GEN_OPTIONS \
Original file line number Diff line number Diff line change @@ -372,7 +372,7 @@ int main(int argc, char ** argv) {
372
372
n_past += embd.size ();
373
373
embd.clear ();
374
374
375
- if ((int ) embd_inp.size () <= input_consumed) {
375
+ if ((int ) embd_inp.size () <= input_consumed && !is_interacting ) {
376
376
// out of user input, sample next token
377
377
const float top_k = params.top_k ;
378
378
const float top_p = params.top_p ;
@@ -451,13 +451,16 @@ int main(int argc, char ** argv) {
451
451
}
452
452
453
453
// Check if each of the reverse prompts appears at the end of the output.
454
- for (std::string antiprompt : params.antiprompt ) {
454
+ for (std::string & antiprompt : params.antiprompt ) {
455
455
if (last_output.find (antiprompt.c_str (), last_output.length () - antiprompt.length (), antiprompt.length ()) != std::string::npos) {
456
456
is_interacting = true ;
457
+ set_console_state (CONSOLE_STATE_USER_INPUT);
458
+ fflush (stdout);
457
459
break ;
458
460
}
459
461
}
460
- if (is_interacting) {
462
+
463
+ if (n_past > 0 && is_interacting) {
461
464
// potentially set color to indicate we are taking user input
462
465
set_console_state (CONSOLE_STATE_USER_INPUT);
463
466
@@ -495,7 +498,10 @@ int main(int argc, char ** argv) {
495
498
496
499
input_noecho = true ; // do not echo this again
497
500
}
498
- is_interacting = false ;
501
+
502
+ if (n_past > 0 ) {
503
+ is_interacting = false ;
504
+ }
499
505
}
500
506
501
507
// end of text token
You can’t perform that action at this time.
0 commit comments