diff --git a/R/data_management.R b/R/data_management.R index 58a73c7..5bce122 100644 --- a/R/data_management.R +++ b/R/data_management.R @@ -1068,16 +1068,22 @@ add_chat_transcript <- function( #' file where the automatically generated agenda will be written. Should be a #' .R file. See `infer_agenda_from_transcript` for more details. #' @param extra_agenda_generation_args Additional arguments passed to the -#' `infer_agenda_from_transcript` function. See `infer_agenda_from_transcript` -#' for more details. Note that the `diarization_instructions` argument for this -#' function will be taken from the `extra_agenda_generation_args` argument. +#' `infer_agenda_from_transcript` function. See `infer_agenda_from_transcript` +#' for more details. Note that the `diarization_instructions` argument for +#' this function will be taken from the `extra_agenda_generation_args` +#' argument. #' @param summarization_method A string indicating the summarization method to #' use. See `summarise_full_meeting` for more details. +#' @param multipart_summary If a valid agenda is provided, this argument allows +#' the user to specify whether the summarisation should be done in parts, one +#' for each agenda element using the `summarise_full_meeting` function, or in +#' one go using the `summarise_transcript` function. See the respective +#' functions for more details. #' @param event_description A string containing a description of the meeting. #' See `summarise_transcript` for more details. -#' @param audience A string containing a description of the audience of -#' the meeting and what to focus on in the summary. See `summarise_transcript` -#' for more details. +#' @param audience A string containing a description of the audience of the +#' meeting and what to focus on in the summary. See `summarise_transcript` for +#' more details. #' @param vocabulary A character vector of specific vocabulary words, names, #' definitions, to help the LLM recognise misspellings and abbreviations. See #' `summarise_transcript` for more details. @@ -1085,9 +1091,9 @@ add_chat_transcript <- function( #' should take into account the diarization of the transcript. See #' `summarise_transcript` for more details. #' @param summary_structure,extra_diarization_instructions,extra_output_instructions -#' Specific instructions necessary to build the summarisation prompt. See -#' `summarise_transcript` for more details and run `get_prompts()` to see the -#' defaults. See `summarise_transcript` for more details. +#' Specific instructions necessary to build the summarisation prompt. See +#' `summarise_transcript` for more details and run `get_prompts()` to see the +#' defaults. See `summarise_transcript` for more details. #' @param llm_provider A string indicating the LLM provider to use for the #' summarization. See `summarise_transcript` for more details. #' @param extra_summarise_args Additional arguments passed to the @@ -1155,6 +1161,8 @@ speech_to_summary_workflow <- function( agenda_generation_output_file = file.path(target_dir, "agenda.R"), extra_agenda_generation_args = NULL, + # Arguments for the actual summarization + multipart_summary = validate_agenda(agenda), event_description = NULL, audience = "An audience with understanding of the topic", vocabulary = NULL, @@ -1162,7 +1170,7 @@ speech_to_summary_workflow <- function( summary_structure = get_prompts("summary_structure"), extra_diarization_instructions = NULL, extra_output_instructions = NULL, - llm_provider = NULL, + llm_provider = getOption("minutemaker_llm_provider"), extra_summarise_args = NULL, summarization_window_size = 15, summarization_output_length = 3, @@ -1258,7 +1266,7 @@ speech_to_summary_workflow <- function( ## Create the transcript file ## # Check if the transcript file doesn't exists or overwrite is TRUE - if (overwrite_transcript || !file.exists(transcript_file)) { + if (isTRUE(overwrite_transcript) || !file.exists(transcript_file)) { # Generate the trascript from the json output data transcript_data <- parse_transcript_json( @@ -1380,8 +1388,11 @@ speech_to_summary_workflow <- function( agenda <- do.call(infer_agenda_from_transcript, agenda_infer_args) + # Ask the user if they want to proceed with the generated agenda or review + # it first message("Agenda generated. Please review it before proceeding.") + # Don't ask the user if the process is not interactive, just stop if (!interactive()) { return(invisible(transcript_data)) } @@ -1401,31 +1412,46 @@ speech_to_summary_workflow <- function( } # Manage situations where the formatted output file exists - if (!is.null(formatted_output_file) && - isFALSE(overwrite_formatted_output) && + if (!purrr::is_empty(formatted_output_file) && file.exists(formatted_output_file)) { - if (interactive()) { - choice <- utils::menu( - choices = c( - "Overwrite the existing formatted summary file", - "Abort the process" - ), - title = "The formatted summary output file already exists and overwrite is FALSE. What do you want to do?" - ) - - if (choice == 2) { - message("Aborted by user.") - return(invisible(transcript_data)) - - } else { - message("Overwriting the existing formatted summary file.") - } - } else { - message("The formatted summary output file already exists and overwrite is FALSE.\nSet overwrite_formatted_output = TRUE to overwrite it or remove it.") + if (isTRUE(overwrite_formatted_output)) { + message("WARNING: Overwriting the existing summary output.\n", + "Stop the process if you want to keep the existing file.") + } else if (isFALSE(overwrite_formatted_output)) { + message( + "The formatted summary output file already exists and", + "overwrite is FALSE.\n", + "Set overwrite_formatted_output = TRUE to overwrite it or remove it.") return(invisible(transcript_data)) + } else { + stop("The overwrite_formatted_output argument must be TRUE or FALSE") } + # isFALSE(overwrite_formatted_output) && + # file.exists(formatted_output_file)) { + + # if (interactive()) { + # choice <- utils::menu( + # choices = c( + # "Overwrite the existing formatted summary file", + # "Abort the process" + # ), + # title = "The formatted summary output file already exists and overwrite is FALSE. What do you want to do?" + # ) + # + # if (choice == 2) { + # message("Aborted by user.") + # return(invisible(transcript_data)) + # + # } else { + # message("Overwriting the existing formatted summary file.") + # } + # } else { + # message("The formatted summary output file already exists and overwrite is FALSE.\nSet overwrite_formatted_output = TRUE to overwrite it or remove it.") + # return(invisible(transcript_data)) + # } + } # Common summarization arguments @@ -1448,9 +1474,18 @@ speech_to_summary_workflow <- function( provider = llm_provider ), extra_summarise_args) - if (isFALSE(agenda)) { + if (isFALSE(agenda) || isFALSE(multipart_summary)) { # Summarize as single talk + if (validate_agenda(agenda)) { + #TODO: put this prompt in the set_prompts function + summarization_args$summary_structure <- stringr::str_glue(" + {summary_structure} + Here is an agenda of the event to keep into account while summarizing: + {agenda} + Stricly follow the agenda to understand which information is worth summarizing.") + } + formatted_summary <- do.call(summarise_transcript, summarization_args) return_vec <- c("transcript_data", "formatted_summary")