Merge pull request #21 from bakaburg1/Dev

Improve agenda review and add custom LLM support
bakaburg1 · Apr 17, 2024 · 8fe8e60 · 8fe8e60
2 parents 5fd6494 + d8c5a50
commit 8fe8e60
Show file tree

Hide file tree

Showing 11 changed files with 213 additions and 101 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: minutemaker
 Title: GenAI-based meeting and conferences minutes generator
-Version: 0.8.0
+Version: 0.9.0
 Authors@R: 
     person("Angelo", "D'Ambrosio", , "a.dambrosioMD@gmail.com", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0002-2045-5155"))

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,23 @@
+# minutemaker 0.9.0
+
+### Improve agenda review and add custom LLM support
+
+#### Enhancements
+- Improve user interaction for agenda review: the `speech_to_summary_workflow` function now displays the generated agenda in the console, facilitating review and reducing the need to locate the agenda file separately. (Commit: 3bed1cc).
+- Add support for custom language model endpoints: replace `use_local_llm()` with `use_custom_llm()` to send requests to custom (local or remote) language model endpoints compatible with the OpenAI API specification, allowing to use also API Keys. (Commit: 0fdfe57).
+- Dynamic output_length parameter as default: dynamically set `summarization_output_length` in the workflow function based on whether a multipart summary is generated (shorter outputs) or not (longer outputs), optimizing the readability and relevance of summaries. (Commit: 2511287).
+
+#### Fixes
+- Fix output_length usage: `output_length` was not being considered in the summarization process. (Commit: 08e95d1).
+- Fix agenda file validation: update the validation logic for the 'agenda' argument in the `speech_to_summary_workflow` function to handle character type inputs correctly and provide clearer error messages. (Commit: d200a55).
+- Fix agenda validation: add checks for empty agenda elements and missing required items, improve error messages for invalid time formats, and update tests. (Commit: 6d870ee).
+
+#### Documentation
+- Fix messages typos: correct typos in messages. (Commit: 0fdfe57).
+
+#### Summary
+This pull request focuses on improving the user experience and adding support for custom language model endpoints. It enhances the agenda review process, ensures dynamic output length adjustment, fixes agenda validation, and adds documentation typo corrections.
+
 # minutemaker 0.8.0
 
 ### Enhanced Agenda Management and Utilization

diff --git a/R/LLM_calls.R b/R/LLM_calls.R
@@ -165,7 +165,6 @@ interrogate_llm <- function(
     ...) {
 
   messages <- process_messages(messages)
-  provider <- match.arg(provider)
 
   if (is.null(provider)) {
     stop("Language model provider is not set. ",
@@ -208,7 +207,7 @@ interrogate_llm <- function(
 
     if (log_request) tictoc::tic()
     response <- llm_fun(body, ...)
-    if (log_request) tictoc::toc()
+    if (log_request) elapsed <- tictoc::toc()
 
     if (httr::status_code(response) == 429) {
       warning("Rate limit exceeded. Waiting before retrying.",
@@ -229,8 +228,10 @@ interrogate_llm <- function(
 
     err_message <- if (is.character(err_obj)) {
       err_obj
-    } else {
+    } else if (is.character(err_obj$message)) {
       err_obj$message
+    } else {
+      httr::content(response)
     }
 
     stop("Error in LLM request: ", err_message)
@@ -244,6 +245,8 @@ interrogate_llm <- function(
          paste(
            "Prompt tokens:", prompt_tokens,
            "\nResponse tokens:", completion_tokens,
+           "\nGeneration speed:", paste(
+             signif(completion_tokens/(elapsed$toc - elapsed$tic), 3), "t/s"),
            "\nTotal tokens:", total_tokens
          )
     ) |> message()
@@ -260,7 +263,7 @@ interrogate_llm <- function(
 
 #' Use OpenAI Language Model
 #'
-#' Sends a request to the OpenAI API  using the parameters in the `body`
+#' Sends a request to the OpenAI API using the parameters in the `body`
 #' argument. It requires an API key and model identifier set in the R options.
 #'
 #' @param body The body of the request.
@@ -276,7 +279,7 @@ use_openai_llm <- function(
 ) {
 
   if (is.null(api_key) || is.null(model)) {
-    stop("OpenAI GPT model and API key are not set. ",
+    stop("OpenAI GPT model or API key are not set. ",
          "Use the following options to set them:\n",
          "minutemaker_openai_model_gpt, ",
          "minutemaker_open_api_key options.")
@@ -349,27 +352,30 @@ use_azure_llm <- function(
 
 }
 
-#' Use Local Language Model
+#' Use Custom Language Model
 #'
-#' Sends a request to a local language model endpoint using the parameters in
-#' the `body` argument. The endpoint URL should be set in the R options, with a
-#' default provided.
+#' Sends a request to a custom (local or remote) language model endpoint
+#' compatible with the OpenAi API specification, using the parameters in the
+#' `body` argument. The user can provide an API key if required.
 #'
 #' @param body The body of the request.
 #' @param endpoint The local endpoint for the language model service. Can be
 #'   obtained from R options.
+#' @param api_key Optional API key for the custom language model services that
+#'   require it. Obtained from R options.
+#'
 #' @return The function returns the response from the local language model
 #'   endpoint.
-use_local_llm <- function(
+use_custom_llm <- function(
     body,
-    endpoint = getOption("minutemaker_local_endpoint_gpt",
-                         "http://localhost:1234/v1/chat/completions")
+    endpoint = getOption("minutemaker_custom_endpoint_gpt"),
+    api_key = getOption("minutemaker_custom_api_key")
 ) {
 
   if (is.null(endpoint)) {
     stop("Local endpoint is not set. ",
          "Use the following options to set it:\n",
-         "minutemaker_local_endpoint_gpt."
+         "minutemaker_custom_endpoint_gpt"
     )
   }
 
@@ -378,7 +384,11 @@ use_local_llm <- function(
   # Prepare the request
   httr::POST(
     url = endpoint,
-    httr::add_headers(`Content-Type` = "application/json"),
+    httr::add_headers(
+      `Content-Type` = "application/json",
+      if (!is.null(api_key)) {
+        .headers = c(Authorization = paste0("Bearer ", api_key))
+      }),
     body = jsonlite::toJSON(body, auto_unbox = TRUE)
   )
 

diff --git a/R/data_management.R b/R/data_management.R
@@ -1102,7 +1102,7 @@ add_chat_transcript <- function(
 #'   minutes if the "rolling"  method is used. See `summarise_transcript` for
 #'   more details.
 #' @param summarization_output_length An indication to the LLM regarding the
-#'   length of the output. See `summarise_transcript` for more details.
+#'   length of the output in pages. See `summarise_transcript` for more details.
 #' @param summarization_output_file A string with the path to the output file
 #'   where the summary tree will be written. Should be a .R file. See
 #'   `summarise_full_meeting` for more details.
@@ -1173,7 +1173,7 @@ speech_to_summary_workflow <- function(
   llm_provider = getOption("minutemaker_llm_provider"),
   extra_summarise_args = NULL,
   summarization_window_size = 15,
-  summarization_output_length = 3,
+  summarization_output_length = if (isTRUE(multipart_summary)) 1 else 3,
   summarization_method = c("simple", "rolling"),
 
   summarization_output_file = file.path(target_dir, "event_summary.R"),
@@ -1327,8 +1327,8 @@ speech_to_summary_workflow <- function(
 
   ## Perform summarization ##
 
-  if (length(agenda) > 1) {
-    stop("The agenda argument should be of length 1.")
+  if (is.character(agenda) && length(agenda) > 1) {
+    stop("No more than one agenda file can be provided.")
   }
 
   # If the agenda argument is a character and the file does not exist, stop the
@@ -1390,7 +1390,9 @@ speech_to_summary_workflow <- function(
 
       # Ask the user if they want to proceed with the generated agenda or review
       # it first
-      message("Agenda generated. Please review it before proceeding.")
+      message("Agenda generated. Please review it before proceeding:")
+
+      cat("\n", format_agenda(agenda), "\n")
 
       # Don't ask the user if the process is not interactive, just stop
       if (!interactive()) {
@@ -1428,30 +1430,6 @@ speech_to_summary_workflow <- function(
       stop("The overwrite_formatted_output argument must be TRUE or FALSE")
     }
 
-      # isFALSE(overwrite_formatted_output) &&
-      # file.exists(formatted_output_file)) {
-
-    # if (interactive()) {
-    #   choice <- utils::menu(
-    #     choices = c(
-    #       "Overwrite the existing formatted summary file",
-    #       "Abort the process"
-    #     ),
-    #     title = "The formatted summary output file already exists and overwrite is FALSE. What do you want to do?"
-    #   )
-    #
-    #   if (choice == 2) {
-    #     message("Aborted by user.")
-    #     return(invisible(transcript_data))
-    #
-    #   } else {
-    #     message("Overwriting the existing formatted summary file.")
-    #   }
-    # } else {
-    #   message("The formatted summary output file already exists and overwrite is FALSE.\nSet overwrite_formatted_output = TRUE to overwrite it or remove it.")
-    #   return(invisible(transcript_data))
-    # }
-
   }
 
   # Common summarization arguments
@@ -1476,6 +1454,7 @@ speech_to_summary_workflow <- function(
 
   if (isFALSE(agenda) || isFALSE(multipart_summary)) {
     # Summarize as single talk
+    message("...with single part approach...\n")
 
     if (validate_agenda(agenda)) {
       agenda <- format_agenda(agenda)
@@ -1495,6 +1474,7 @@ speech_to_summary_workflow <- function(
   } else {
 
     # Summarize as multiple talks
+    message("...with multipart approach...\n")
 
     # Necessary extra arguments for the summarization of whole events
     summarization_args$agenda <- agenda

diff --git a/R/summarization.R b/R/summarization.R
@@ -229,7 +229,7 @@ summarise_transcript <- function(
 
     args <- args[
       c("event_description", "recording_details", "audience", "vocabulary",
-        "consider_diarization", "summary_structure",
+        "consider_diarization", "summary_structure", "output_length",
         "extra_diarization_instructions", "extra_output_instructions")
     ]
 
@@ -270,7 +270,7 @@ summarise_transcript <- function(
   message("\nAggregating summaries")
 
   args <- args[
-    c("event_description", "recording_details", "audience",
+    c("event_description", "recording_details", "audience", "output_length",
       "summary_structure", "extra_output_instructions")
   ]
 
@@ -890,6 +890,9 @@ entity_extractor <- function(
     ...
     ) {
 
+  # Initialize the prompts
+  set_prompts()
+
   text <- paste(text, collapse = "--------\n\n\n")
 
   acro_or_concepts <- entities[entities %in% c("acronyms", "concepts")]