Skip to content

Commit

Permalink
feat: add multipart summary option and improve workflow logic
Browse files Browse the repository at this point in the history
- Implemented a new `multipart_summary` argument in the `speech_to_summary_workflow` function to allow users to specify whether the summarization should be done in parts for each agenda element or as a single summary.
- Updated the `speech_to_summary_workflow` function to validate the `overwrite_transcript` argument more robustly and handle the existence of a formatted output file with clearer messaging and logic.
- Enhanced the documentation for several parameters in the `speech_to_summary_workflow` function to improve clarity and consistency.
- Made minor code refactoring for better readability and maintainability.
  • Loading branch information
bakaburg1 committed Apr 16, 2024
1 parent 0d27980 commit 99168d4
Showing 1 changed file with 67 additions and 32 deletions.
99 changes: 67 additions & 32 deletions R/data_management.R
Original file line number Diff line number Diff line change
Expand Up @@ -1068,26 +1068,32 @@ add_chat_transcript <- function(
#' file where the automatically generated agenda will be written. Should be a
#' .R file. See `infer_agenda_from_transcript` for more details.
#' @param extra_agenda_generation_args Additional arguments passed to the
#' `infer_agenda_from_transcript` function. See `infer_agenda_from_transcript`
#' for more details. Note that the `diarization_instructions` argument for this
#' function will be taken from the `extra_agenda_generation_args` argument.
#' `infer_agenda_from_transcript` function. See `infer_agenda_from_transcript`
#' for more details. Note that the `diarization_instructions` argument for
#' this function will be taken from the `extra_agenda_generation_args`
#' argument.
#' @param summarization_method A string indicating the summarization method to
#' use. See `summarise_full_meeting` for more details.
#' @param multipart_summary If a valid agenda is provided, this argument allows
#' the user to specify whether the summarisation should be done in parts, one
#' for each agenda element using the `summarise_full_meeting` function, or in
#' one go using the `summarise_transcript` function. See the respective
#' functions for more details.
#' @param event_description A string containing a description of the meeting.
#' See `summarise_transcript` for more details.
#' @param audience A string containing a description of the audience of
#' the meeting and what to focus on in the summary. See `summarise_transcript`
#' for more details.
#' @param audience A string containing a description of the audience of the
#' meeting and what to focus on in the summary. See `summarise_transcript` for
#' more details.
#' @param vocabulary A character vector of specific vocabulary words, names,
#' definitions, to help the LLM recognise misspellings and abbreviations. See
#' `summarise_transcript` for more details.
#' @param consider_diarization A logical indicating whether the summarisation
#' should take into account the diarization of the transcript. See
#' `summarise_transcript` for more details.
#' @param summary_structure,extra_diarization_instructions,extra_output_instructions
#' Specific instructions necessary to build the summarisation prompt. See
#' `summarise_transcript` for more details and run `get_prompts()` to see the
#' defaults. See `summarise_transcript` for more details.
#' Specific instructions necessary to build the summarisation prompt. See
#' `summarise_transcript` for more details and run `get_prompts()` to see the
#' defaults. See `summarise_transcript` for more details.
#' @param llm_provider A string indicating the LLM provider to use for the
#' summarization. See `summarise_transcript` for more details.
#' @param extra_summarise_args Additional arguments passed to the
Expand Down Expand Up @@ -1155,14 +1161,16 @@ speech_to_summary_workflow <- function(
agenda_generation_output_file = file.path(target_dir, "agenda.R"),
extra_agenda_generation_args = NULL,

# Arguments for the actual summarization
multipart_summary = validate_agenda(agenda),
event_description = NULL,
audience = "An audience with understanding of the topic",
vocabulary = NULL,
consider_diarization = TRUE,
summary_structure = get_prompts("summary_structure"),
extra_diarization_instructions = NULL,
extra_output_instructions = NULL,
llm_provider = NULL,
llm_provider = getOption("minutemaker_llm_provider"),
extra_summarise_args = NULL,
summarization_window_size = 15,
summarization_output_length = 3,
Expand Down Expand Up @@ -1258,7 +1266,7 @@ speech_to_summary_workflow <- function(
## Create the transcript file ##

# Check if the transcript file doesn't exists or overwrite is TRUE
if (overwrite_transcript || !file.exists(transcript_file)) {
if (isTRUE(overwrite_transcript) || !file.exists(transcript_file)) {

# Generate the trascript from the json output data
transcript_data <- parse_transcript_json(
Expand Down Expand Up @@ -1380,8 +1388,11 @@ speech_to_summary_workflow <- function(

agenda <- do.call(infer_agenda_from_transcript, agenda_infer_args)

# Ask the user if they want to proceed with the generated agenda or review
# it first
message("Agenda generated. Please review it before proceeding.")

# Don't ask the user if the process is not interactive, just stop
if (!interactive()) {
return(invisible(transcript_data))
}
Expand All @@ -1401,31 +1412,46 @@ speech_to_summary_workflow <- function(
}

# Manage situations where the formatted output file exists
if (!is.null(formatted_output_file) &&
isFALSE(overwrite_formatted_output) &&
if (!purrr::is_empty(formatted_output_file) &&
file.exists(formatted_output_file)) {

if (interactive()) {
choice <- utils::menu(
choices = c(
"Overwrite the existing formatted summary file",
"Abort the process"
),
title = "The formatted summary output file already exists and overwrite is FALSE. What do you want to do?"
)

if (choice == 2) {
message("Aborted by user.")
return(invisible(transcript_data))

} else {
message("Overwriting the existing formatted summary file.")
}
} else {
message("The formatted summary output file already exists and overwrite is FALSE.\nSet overwrite_formatted_output = TRUE to overwrite it or remove it.")
if (isTRUE(overwrite_formatted_output)) {
message("WARNING: Overwriting the existing summary output.\n",
"Stop the process if you want to keep the existing file.")
} else if (isFALSE(overwrite_formatted_output)) {
message(
"The formatted summary output file already exists and",
"overwrite is FALSE.\n",
"Set overwrite_formatted_output = TRUE to overwrite it or remove it.")
return(invisible(transcript_data))
} else {
stop("The overwrite_formatted_output argument must be TRUE or FALSE")
}

# isFALSE(overwrite_formatted_output) &&
# file.exists(formatted_output_file)) {

# if (interactive()) {
# choice <- utils::menu(
# choices = c(
# "Overwrite the existing formatted summary file",
# "Abort the process"
# ),
# title = "The formatted summary output file already exists and overwrite is FALSE. What do you want to do?"
# )
#
# if (choice == 2) {
# message("Aborted by user.")
# return(invisible(transcript_data))
#
# } else {
# message("Overwriting the existing formatted summary file.")
# }
# } else {
# message("The formatted summary output file already exists and overwrite is FALSE.\nSet overwrite_formatted_output = TRUE to overwrite it or remove it.")
# return(invisible(transcript_data))
# }

}

# Common summarization arguments
Expand All @@ -1448,9 +1474,18 @@ speech_to_summary_workflow <- function(
provider = llm_provider
), extra_summarise_args)

if (isFALSE(agenda)) {
if (isFALSE(agenda) || isFALSE(multipart_summary)) {
# Summarize as single talk

if (validate_agenda(agenda)) {
#TODO: put this prompt in the set_prompts function
summarization_args$summary_structure <- stringr::str_glue("
{summary_structure}
Here is an agenda of the event to keep into account while summarizing:
{agenda}
Stricly follow the agenda to understand which information is worth summarizing.")
}

formatted_summary <- do.call(summarise_transcript, summarization_args)

return_vec <- c("transcript_data", "formatted_summary")
Expand Down

0 comments on commit 99168d4

Please # to comment.