Skip to content

Commit

Permalink
llama : work around load crash caused by out-of-vocab token IDs
Browse files Browse the repository at this point in the history
Upstream issue: ggerganov#2378
  • Loading branch information
cebtenzzre committed May 28, 2024
1 parent fadf113 commit f67f465
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4703,10 +4703,18 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
if (vocab.special_mask_id != -1) { LLAMA_LOG_INFO( "%s: MASK token = %d '%s'\n", __func__, vocab.special_mask_id, vocab.id_to_token[vocab.special_mask_id].text.c_str() ); }

if (vocab.linefeed_id != -1) { LLAMA_LOG_INFO( "%s: LF token = %d '%s'\n", __func__, vocab.linefeed_id, vocab.id_to_token[vocab.linefeed_id].text.c_str() ); }
if (vocab.special_prefix_id != -1) { LLAMA_LOG_INFO( "%s: PRE token = %d '%s'\n", __func__, vocab.special_prefix_id, vocab.id_to_token[vocab.special_prefix_id].text.c_str() ); }
if (vocab.special_suffix_id != -1) { LLAMA_LOG_INFO( "%s: SUF token = %d '%s'\n", __func__, vocab.special_suffix_id, vocab.id_to_token[vocab.special_suffix_id].text.c_str() ); }
if (vocab.special_middle_id != -1) { LLAMA_LOG_INFO( "%s: MID token = %d '%s'\n", __func__, vocab.special_middle_id, vocab.id_to_token[vocab.special_middle_id].text.c_str() ); }
if (vocab.special_eot_id != -1) { LLAMA_LOG_INFO( "%s: EOT token = %d '%s'\n", __func__, vocab.special_eot_id, vocab.id_to_token[vocab.special_eot_id].text.c_str() ); }
if (vocab.special_prefix_id != -1 && vocab.special_prefix_id < ptrdiff_t(vocab.id_to_token.size())) {
LLAMA_LOG_INFO( "%s: PRE token = %d '%s'\n", __func__, vocab.special_prefix_id, vocab.id_to_token[vocab.special_prefix_id].text.c_str() );
}
if (vocab.special_suffix_id != -1 && vocab.special_suffix_id < ptrdiff_t(vocab.id_to_token.size())) {
LLAMA_LOG_INFO( "%s: SUF token = %d '%s'\n", __func__, vocab.special_suffix_id, vocab.id_to_token[vocab.special_suffix_id].text.c_str() );
}
if (vocab.special_middle_id != -1 && vocab.special_middle_id < ptrdiff_t(vocab.id_to_token.size())) {
LLAMA_LOG_INFO( "%s: MID token = %d '%s'\n", __func__, vocab.special_middle_id, vocab.id_to_token[vocab.special_middle_id].text.c_str() );
}
if (vocab.special_eot_id != -1 && vocab.special_eot_id < ptrdiff_t(vocab.id_to_token.size())) {
LLAMA_LOG_INFO( "%s: EOT token = %d '%s'\n", __func__, vocab.special_eot_id, vocab.id_to_token[vocab.special_eot_id ].text.c_str() );
}
}

// Returns false if cancelled by progress_callback
Expand Down

0 comments on commit f67f465

Please # to comment.