diff --git a/DESCRIPTION b/DESCRIPTION index 3f693fe..54f5bff 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: minutemaker Title: GenAI-based meeting and conferences minutes generator -Version: 0.8.0 +Version: 0.9.0 Authors@R: person("Angelo", "D'Ambrosio", , "a.dambrosioMD@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2045-5155")) diff --git a/NEWS.md b/NEWS.md index da60704..ae93c42 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,23 @@ +# minutemaker 0.9.0 + +### Improve agenda review and add custom LLM support + +#### Enhancements +- Improve user interaction for agenda review: the `speech_to_summary_workflow` function now displays the generated agenda in the console, facilitating review and reducing the need to locate the agenda file separately. (Commit: 3bed1cc). +- Add support for custom language model endpoints: replace `use_local_llm()` with `use_custom_llm()` to send requests to custom (local or remote) language model endpoints compatible with the OpenAI API specification, allowing to use also API Keys. (Commit: 0fdfe57). +- Dynamic output_length parameter as default: dynamically set `summarization_output_length` in the workflow function based on whether a multipart summary is generated (shorter outputs) or not (longer outputs), optimizing the readability and relevance of summaries. (Commit: 2511287). + +#### Fixes +- Fix output_length usage: `output_length` was not being considered in the summarization process. (Commit: 08e95d1). +- Fix agenda file validation: update the validation logic for the 'agenda' argument in the `speech_to_summary_workflow` function to handle character type inputs correctly and provide clearer error messages. (Commit: d200a55). +- Fix agenda validation: add checks for empty agenda elements and missing required items, improve error messages for invalid time formats, and update tests. (Commit: 6d870ee). + +#### Documentation +- Fix messages typos: correct typos in messages. (Commit: 0fdfe57). + +#### Summary +This pull request focuses on improving the user experience and adding support for custom language model endpoints. It enhances the agenda review process, ensures dynamic output length adjustment, fixes agenda validation, and adds documentation typo corrections. + # minutemaker 0.8.0 ### Enhanced Agenda Management and Utilization diff --git a/R/LLM_calls.R b/R/LLM_calls.R index 3a034d1..576b8a9 100644 --- a/R/LLM_calls.R +++ b/R/LLM_calls.R @@ -165,7 +165,6 @@ interrogate_llm <- function( ...) { messages <- process_messages(messages) - provider <- match.arg(provider) if (is.null(provider)) { stop("Language model provider is not set. ", @@ -208,7 +207,7 @@ interrogate_llm <- function( if (log_request) tictoc::tic() response <- llm_fun(body, ...) - if (log_request) tictoc::toc() + if (log_request) elapsed <- tictoc::toc() if (httr::status_code(response) == 429) { warning("Rate limit exceeded. Waiting before retrying.", @@ -229,8 +228,10 @@ interrogate_llm <- function( err_message <- if (is.character(err_obj)) { err_obj - } else { + } else if (is.character(err_obj$message)) { err_obj$message + } else { + httr::content(response) } stop("Error in LLM request: ", err_message) @@ -244,6 +245,8 @@ interrogate_llm <- function( paste( "Prompt tokens:", prompt_tokens, "\nResponse tokens:", completion_tokens, + "\nGeneration speed:", paste( + signif(completion_tokens/(elapsed$toc - elapsed$tic), 3), "t/s"), "\nTotal tokens:", total_tokens ) ) |> message() @@ -260,7 +263,7 @@ interrogate_llm <- function( #' Use OpenAI Language Model #' -#' Sends a request to the OpenAI API using the parameters in the `body` +#' Sends a request to the OpenAI API using the parameters in the `body` #' argument. It requires an API key and model identifier set in the R options. #' #' @param body The body of the request. @@ -276,7 +279,7 @@ use_openai_llm <- function( ) { if (is.null(api_key) || is.null(model)) { - stop("OpenAI GPT model and API key are not set. ", + stop("OpenAI GPT model or API key are not set. ", "Use the following options to set them:\n", "minutemaker_openai_model_gpt, ", "minutemaker_open_api_key options.") @@ -349,27 +352,30 @@ use_azure_llm <- function( } -#' Use Local Language Model +#' Use Custom Language Model #' -#' Sends a request to a local language model endpoint using the parameters in -#' the `body` argument. The endpoint URL should be set in the R options, with a -#' default provided. +#' Sends a request to a custom (local or remote) language model endpoint +#' compatible with the OpenAi API specification, using the parameters in the +#' `body` argument. The user can provide an API key if required. #' #' @param body The body of the request. #' @param endpoint The local endpoint for the language model service. Can be #' obtained from R options. +#' @param api_key Optional API key for the custom language model services that +#' require it. Obtained from R options. +#' #' @return The function returns the response from the local language model #' endpoint. -use_local_llm <- function( +use_custom_llm <- function( body, - endpoint = getOption("minutemaker_local_endpoint_gpt", - "http://localhost:1234/v1/chat/completions") + endpoint = getOption("minutemaker_custom_endpoint_gpt"), + api_key = getOption("minutemaker_custom_api_key") ) { if (is.null(endpoint)) { stop("Local endpoint is not set. ", "Use the following options to set it:\n", - "minutemaker_local_endpoint_gpt." + "minutemaker_custom_endpoint_gpt" ) } @@ -378,7 +384,11 @@ use_local_llm <- function( # Prepare the request httr::POST( url = endpoint, - httr::add_headers(`Content-Type` = "application/json"), + httr::add_headers( + `Content-Type` = "application/json", + if (!is.null(api_key)) { + .headers = c(Authorization = paste0("Bearer ", api_key)) + }), body = jsonlite::toJSON(body, auto_unbox = TRUE) ) diff --git a/R/data_management.R b/R/data_management.R index 8410537..335f5c6 100644 --- a/R/data_management.R +++ b/R/data_management.R @@ -1102,7 +1102,7 @@ add_chat_transcript <- function( #' minutes if the "rolling" method is used. See `summarise_transcript` for #' more details. #' @param summarization_output_length An indication to the LLM regarding the -#' length of the output. See `summarise_transcript` for more details. +#' length of the output in pages. See `summarise_transcript` for more details. #' @param summarization_output_file A string with the path to the output file #' where the summary tree will be written. Should be a .R file. See #' `summarise_full_meeting` for more details. @@ -1173,7 +1173,7 @@ speech_to_summary_workflow <- function( llm_provider = getOption("minutemaker_llm_provider"), extra_summarise_args = NULL, summarization_window_size = 15, - summarization_output_length = 3, + summarization_output_length = if (isTRUE(multipart_summary)) 1 else 3, summarization_method = c("simple", "rolling"), summarization_output_file = file.path(target_dir, "event_summary.R"), @@ -1327,8 +1327,8 @@ speech_to_summary_workflow <- function( ## Perform summarization ## - if (length(agenda) > 1) { - stop("The agenda argument should be of length 1.") + if (is.character(agenda) && length(agenda) > 1) { + stop("No more than one agenda file can be provided.") } # If the agenda argument is a character and the file does not exist, stop the @@ -1390,7 +1390,9 @@ speech_to_summary_workflow <- function( # Ask the user if they want to proceed with the generated agenda or review # it first - message("Agenda generated. Please review it before proceeding.") + message("Agenda generated. Please review it before proceeding:") + + cat("\n", format_agenda(agenda), "\n") # Don't ask the user if the process is not interactive, just stop if (!interactive()) { @@ -1428,30 +1430,6 @@ speech_to_summary_workflow <- function( stop("The overwrite_formatted_output argument must be TRUE or FALSE") } - # isFALSE(overwrite_formatted_output) && - # file.exists(formatted_output_file)) { - - # if (interactive()) { - # choice <- utils::menu( - # choices = c( - # "Overwrite the existing formatted summary file", - # "Abort the process" - # ), - # title = "The formatted summary output file already exists and overwrite is FALSE. What do you want to do?" - # ) - # - # if (choice == 2) { - # message("Aborted by user.") - # return(invisible(transcript_data)) - # - # } else { - # message("Overwriting the existing formatted summary file.") - # } - # } else { - # message("The formatted summary output file already exists and overwrite is FALSE.\nSet overwrite_formatted_output = TRUE to overwrite it or remove it.") - # return(invisible(transcript_data)) - # } - } # Common summarization arguments @@ -1476,6 +1454,7 @@ speech_to_summary_workflow <- function( if (isFALSE(agenda) || isFALSE(multipart_summary)) { # Summarize as single talk + message("...with single part approach...\n") if (validate_agenda(agenda)) { agenda <- format_agenda(agenda) @@ -1495,6 +1474,7 @@ speech_to_summary_workflow <- function( } else { # Summarize as multiple talks + message("...with multipart approach...\n") # Necessary extra arguments for the summarization of whole events summarization_args$agenda <- agenda diff --git a/R/summarization.R b/R/summarization.R index 3dffe9d..27bfbf9 100644 --- a/R/summarization.R +++ b/R/summarization.R @@ -229,7 +229,7 @@ summarise_transcript <- function( args <- args[ c("event_description", "recording_details", "audience", "vocabulary", - "consider_diarization", "summary_structure", + "consider_diarization", "summary_structure", "output_length", "extra_diarization_instructions", "extra_output_instructions") ] @@ -270,7 +270,7 @@ summarise_transcript <- function( message("\nAggregating summaries") args <- args[ - c("event_description", "recording_details", "audience", + c("event_description", "recording_details", "audience", "output_length", "summary_structure", "extra_output_instructions") ] @@ -890,6 +890,9 @@ entity_extractor <- function( ... ) { + # Initialize the prompts + set_prompts() + text <- paste(text, collapse = "--------\n\n\n") acro_or_concepts <- entities[entities %in% c("acronyms", "concepts")] diff --git a/R/validation.R b/R/validation.R index 3e2ae30..f539a79 100644 --- a/R/validation.R +++ b/R/validation.R @@ -28,13 +28,39 @@ validate_agenda_element <- function( # Get the arguments as a list args <- as.list(environment()) + # Initialize the validation result + is_valid <- TRUE + + general_warn <- paste( + "Agenda element validation failed:\n", + styler::style_text( + deparse(agenda_element)) |> paste(collapse = "\n"), "\n" + ) + # Remove the 'agenda_element' argument from the list args$agenda_element <- NULL + if (length(agenda_element) == 0) { + warning( + general_warn, "The agenda element is empty.", + call. = FALSE, immediate. = TRUE) + + return(FALSE) + } + # Check if the required items are present in the agenda element - is_valid <- purrr::imap_lgl(args, ~ { + el_checks <- purrr::imap_lgl(args, ~ { !is.null(agenda_element[[.y]]) || isFALSE(.x) - }) |> all() + }) + + if (!all(el_checks)) { + warning( + general_warn, "Some of the required items are missing:\n", + stringr::str_flatten_comma(names(args)[!el_checks]), + call. = FALSE, immediate. = TRUE) + } + + is_valid <- all(el_checks) if (isTRUE(from) || isTRUE(to)) { @@ -48,32 +74,47 @@ validate_agenda_element <- function( if (!inherits(agenda_element[[time]], c("numeric", "POSIXct", "character"))) { - stop(stringr::str_glue( - 'Agenda element "{time}" should be numeric, character or POSIXct,', - "but it's of class {class(agenda_element[[time]])}." - )) + warning( + general_warn, stringr::str_glue( + 'Agenda element "{time}" should be numeric, character or POSIXct,', + "but it's of class {class(agenda_element[[time]])}." + ), call. = FALSE, immediate. = TRUE) + + is_valid <- FALSE } if (!is.numeric(agenda_element[[time]]) && is.na(parse_event_time(agenda_element[[time]])) ) { - stop("Agenda element \"", time, "\" time not interpretable: ", - agenda_element[[time]]) + warning( + general_warn, "Agenda element \"", time, + "\" time not interpretable: ", agenda_element[[time]], + call. = FALSE, immediate. = TRUE) + + is_valid <- FALSE } } if (class(agenda_element$from) != class(agenda_element$to)) { - stop("The agenda element times are not of the same class:", - " from: ", agenda_element$from, - " to: ", agenda_element$to) + warning( + general_warn, "The agenda element times are not of the same class:", + " from: ", agenda_element$from, + " to: ", agenda_element$to, + call. = FALSE, immediate. = TRUE) + + is_valid <- FALSE } if ( time_to_numeric(agenda_element$from) > time_to_numeric(agenda_element$to) ) { - stop("Agenda element \"from\" time should preceed \"to\" time:", - " from: ", agenda_element$from, - " to: ", agenda_element$to) + warning( + general_warn, "Agenda element \"from\" time should preceed \"to\" time:", + " from: ", agenda_element$from, + " to: ", agenda_element$to, + call. = FALSE, immediate. = TRUE) + + is_valid <- FALSE } } @@ -106,15 +147,53 @@ validate_agenda_element <- function( #' to = "10:00 AM" #' ), #' list() -#' ), session = TRUE, title = TRUE, speakers = TRUE, moderators = TRUE, -#' type = TRUE, from = TRUE, to = TRUE) +#' )) #' #> [1] FALSE # Because the second element is empty #' +#' validate_agenda(list( +#' list( +#' title = "Opening Session", +#' moderators = "Jane Doe", +#' type = "conference talk", +#' from = "09:00 AM", +#' to = "10:00 AM" +#' ) +#' ), session = TRUE, title = TRUE, speakers = TRUE, moderators = TRUE, +#' type = TRUE, from = TRUE, to = TRUE) +#' +#' #> [1] FALSE # Because the session and speakers are missing +#' validate_agenda <- function( - agenda, - ... + agenda, + ... ) { + general_warn <- "Agenda validation failed:\n" + + # Check if the agenda is FALSE + if (isFALSE(agenda)) { + return(FALSE) + } + + # Initialize the validation result + is_valid <- TRUE + + if (purrr::is_empty(agenda)) { + warning( + general_warn, "The agenda is empty.", + call. = FALSE, immediate. = TRUE) + + return(FALSE) + } + + if (!class(agenda) %in% c("list", "character")) { + warning( + general_warn, "The agenda is not a list or a file path.", + call. = FALSE, immediate. = TRUE) + + return(FALSE) + } + # Check if the agenda is a file path if (!purrr::is_empty(agenda) && is.character(agenda) && file.exists(agenda)){ agenda <- dget(agenda) @@ -122,16 +201,21 @@ validate_agenda <- function( # Check if the agenda is a list if (!is.list(agenda)) { - return(FALSE) - } + warning( + general_warn, "The agenda is not a list.", + call. = FALSE, immediate. = TRUE) - # Check if the agenda is empty - if (length(agenda) == 0) { return(FALSE) } # Check if the agenda elements are valid - purrr::map_lgl(agenda, ~ validate_agenda_element(.x, ...)) |> all() + for (agenda_element in agenda) { + if (!validate_agenda_element(agenda_element, ...)) { + return(FALSE) + } + } + + return(TRUE) } #' Validate summary tree id consistency diff --git a/man/speech_to_summary_workflow.Rd b/man/speech_to_summary_workflow.Rd index aae3c6e..24b9c8d 100644 --- a/man/speech_to_summary_workflow.Rd +++ b/man/speech_to_summary_workflow.Rd @@ -42,7 +42,7 @@ speech_to_summary_workflow( llm_provider = getOption("minutemaker_llm_provider"), extra_summarise_args = NULL, summarization_window_size = 15, - summarization_output_length = 3, + summarization_output_length = if (isTRUE(multipart_summary)) 1 else 3, summarization_method = c("simple", "rolling"), summarization_output_file = file.path(target_dir, "event_summary.R"), overwrite_summary_tree = FALSE, @@ -191,7 +191,7 @@ minutes if the "rolling" method is used. See \code{summarise_transcript} for more details.} \item{summarization_output_length}{An indication to the LLM regarding the -length of the output. See \code{summarise_transcript} for more details.} +length of the output in pages. See \code{summarise_transcript} for more details.} \item{summarization_method}{A string indicating the summarization method to use. See \code{summarise_full_meeting} for more details.} diff --git a/man/use_custom_llm.Rd b/man/use_custom_llm.Rd new file mode 100644 index 0000000..4d37b9c --- /dev/null +++ b/man/use_custom_llm.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/LLM_calls.R +\name{use_custom_llm} +\alias{use_custom_llm} +\title{Use Custom Language Model} +\usage{ +use_custom_llm( + body, + endpoint = getOption("minutemaker_custom_endpoint_gpt"), + api_key = getOption("minutemaker_custom_api_key") +) +} +\arguments{ +\item{body}{The body of the request.} + +\item{endpoint}{The local endpoint for the language model service. Can be +obtained from R options.} + +\item{api_key}{Optional API key for the custom language model services that +require it. Obtained from R options.} +} +\value{ +The function returns the response from the local language model +endpoint. +} +\description{ +Sends a request to a custom (local or remote) language model endpoint +compatible with the OpenAi API specification, using the parameters in the +\code{body} argument. The user can provide an API key if required. +} diff --git a/man/use_local_llm.Rd b/man/use_local_llm.Rd deleted file mode 100644 index 3b502af..0000000 --- a/man/use_local_llm.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/LLM_calls.R -\name{use_local_llm} -\alias{use_local_llm} -\title{Use Local Language Model} -\usage{ -use_local_llm( - body, - endpoint = getOption("minutemaker_local_endpoint_gpt", - "http://localhost:1234/v1/chat/completions") -) -} -\arguments{ -\item{body}{The body of the request.} - -\item{endpoint}{The local endpoint for the language model service. Can be -obtained from R options.} -} -\value{ -The function returns the response from the local language model -endpoint. -} -\description{ -Sends a request to a local language model endpoint using the parameters in -the \code{body} argument. The endpoint URL should be set in the R options, with a -default provided. -} diff --git a/man/use_openai_llm.Rd b/man/use_openai_llm.Rd index 31dbaca..32de36d 100644 --- a/man/use_openai_llm.Rd +++ b/man/use_openai_llm.Rd @@ -21,6 +21,6 @@ use_openai_llm( The function returns the response from the OpenAI API. } \description{ -Sends a request to the OpenAI API using the parameters in the \code{body} +Sends a request to the OpenAI API using the parameters in the \code{body} argument. It requires an API key and model identifier set in the R options. } diff --git a/man/validate_agenda.Rd b/man/validate_agenda.Rd index 88b1581..178a36c 100644 --- a/man/validate_agenda.Rd +++ b/man/validate_agenda.Rd @@ -30,8 +30,20 @@ validate_agenda(list( to = "10:00 AM" ), list() - ), session = TRUE, title = TRUE, speakers = TRUE, moderators = TRUE, - type = TRUE, from = TRUE, to = TRUE) + )) #> [1] FALSE # Because the second element is empty +validate_agenda(list( + list( + title = "Opening Session", + moderators = "Jane Doe", + type = "conference talk", + from = "09:00 AM", + to = "10:00 AM" + ) +), session = TRUE, title = TRUE, speakers = TRUE, moderators = TRUE, +type = TRUE, from = TRUE, to = TRUE) + +#> [1] FALSE # Because the session and speakers are missing + }