Skip to content

Commit

Permalink
Merge pull request #17 from bakaburg1/feature/infer-agenda
Browse files Browse the repository at this point in the history
Implement Agenda Inference from Transcripts and Enhance Summarization Workflow
  • Loading branch information
bakaburg1 authored Mar 8, 2024
2 parents 3636930 + 4584c73 commit d7b5a85
Show file tree
Hide file tree
Showing 21 changed files with 2,625 additions and 47 deletions.
2 changes: 1 addition & 1 deletion .Rprofile
Original file line number Diff line number Diff line change
@@ -1 +1 @@

source("renv/activate.R")
9 changes: 6 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
Package: minutemaker
Title: GenAI-based meeting and conferences minutes generator
Version: 0.5.5
Version: 0.6.0
Authors@R:
person("Angelo", "D'Ambrosio", , "a.dambrosioMD@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-2045-5155"))
Description: Generate meeting minutes starting from an audio recording or a transcripts using speech-to-text and LLMs.
Description: Generate meeting minutes starting from an audio recording or a
transcripts using speech-to-text and LLMs.
License: MIT + file LICENSE
Imports:
dplyr (>= 1.1.4),
Expand All @@ -20,10 +21,12 @@ Imports:
Config/testthat/edition: 3
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Suggests:
av (>= 0.9.0),
devtools (>= 2.4.5),
parallel (>= 4.3.2),
testthat (>= 3.0.0),
text2vec (>= 0.6.4),
tictoc (>= 1.2),
usethis (>= 2.2.3)
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(format_summary_tree)
export(generate_recording_details)
export(get_prompts)
export(import_transcript_from_file)
export(infer_agenda_from_transcript)
export(interrogate_llm)
export(merge_transcripts)
export(parse_transcript_json)
Expand Down
11 changes: 9 additions & 2 deletions R/LLM_calls.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,17 @@
#'
process_messages <- function(messages) {

if (missing(messages) || is.null(messages)) {
if (missing(messages) || is.null(messages) || length(messages) == 0) {
stop("User messages are required.")
}

# Assume that a single message is from the user
if (length(messages) == 1 &&
is.character(messages) &&
is.null(names(messages))) {
messages <- c(user = messages)
}

# Convert vector to list format
vector_to_list <- function(msg_vec) {

Expand Down Expand Up @@ -199,7 +206,7 @@ interrogate_llm <- function(

if (httr::status_code(response) == 429) {
warning("Rate limit exceeded. Waiting before retrying.",
immediate. = TRUE)
immediate. = TRUE, call. = FALSE)

to_wait <- as.numeric(httr::headers(response)$`retry-after`)
message("Waiting for ", to_wait, " seconds.\n...")
Expand Down
77 changes: 52 additions & 25 deletions R/data_management.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ parse_transcript_json <- function(
"Please remove it and try transcription again.")
}

if (length(transcript_list[[i]]$segments) == 0) {
# skip this file, there was nothing to transcribe
next
}

transcript_data <- transcript_list[[i]]$segments |>
bind_rows() |>
# Select only the columns to import
Expand Down Expand Up @@ -280,11 +285,12 @@ extract_text_from_transcript <- function(

# Ignore the `import_diarization` parameter if the transcript does not contain
# speaker information
if (all(is.na(transcript_data$speaker)) ||
if (!"speaker" %in% names(transcript_data) ||
all(is.na(transcript_data$speaker)) ||
n_distinct(transcript_data$speaker, na.rm = T) == 1) {
import_diarization <- FALSE

transcript_data$speaker <- "None"
transcript_data$speaker <- "Unknown"
}

transcript <- transcript_data %>%
Expand Down Expand Up @@ -386,16 +392,6 @@ convert_agenda_times <- function(
validate_agenda_element(agenda[[i]], from = TRUE, to = TRUE)
}

# if (
# convert_to == "clocktime" &&
# inherits(agenda[[1]]["from"], c("POSIXct", "character"))) {
#
# warning("Agenda already in clock time format.",
# call. = FALSE, immediate. = TRUE)
#
# return(agenda_orig)
# }

# Check if agenda times are all of the same class
if (!all(purrr::map_lgl(agenda, ~ is.numeric(.x$from))) &&
!all(purrr::map_lgl(agenda, ~ {
Expand Down Expand Up @@ -988,8 +984,25 @@ add_chat_transcript <- function(
#' generate the chat file. See `add_chat_transcript` for more details.
#' @param agenda The agenda of the meeting, that is, a list of agenda elements
#' each with a session name, a title, speaker and moderator lists, type of
#' talk and start and end times. Alternatively, the path to an R file
#' containing such a list. See `summarise_full_meeting` for more details.
#' talk, talk description and start and end times. Alternatively, the path to
#' an R file containing such a list. See `summarise_full_meeting` for more
#' details. If NULL, the user will be asked if the system should try to
#' generate the agenda automatically, using the `infer_agenda_from_transcript`
#' function.
#' @param expected_agenda A character string. Only used if the `agenda` argument
#' is `NULL` and the user requests the automatic agenda generation. this
#' string will be used to drive the LLM while generating the agenda. See
#' `infer_agenda_from_transcript` for more details.
#' @param agenda_generation_window_size The size of the window in seconds to
#' analyze at once when generating the agenda. See
#' `infer_agenda_from_transcript` for more details.
#' @param agenda_generation_output_file A string with the path to the output
#' file where the automatically generated agenda will be written. Should be a
#' .R file. See `infer_agenda_from_transcript` for more details.
#' @param extra_agenda_generation_args Additional arguments passed to the
#' `infer_agenda_from_transcript` function. See `infer_agenda_from_transcript`
#' for more details. Note that the `diarization_instructions` argument for this
#' function will be taken from the `extra_agenda_generation_args` argument.
#' @param summarization_method A string indicating the summarization method to
#' use. See `summarise_full_meeting` for more details.
#' @param event_description A string containing a description of the meeting.
Expand All @@ -1004,9 +1017,9 @@ add_chat_transcript <- function(
#' should take into account the diarization of the transcript. See
#' `summarise_transcript` for more details.
#' @param summary_structure,extra_diarization_instructions,extra_output_instructions
#' Specific instructions necessary to build the summarisation prompt. See
#' `summarise_transcript` for more details and run `get_prompts()` to see the
#' defaults. See `summarise_transcript` for more details.
#' Specific instructions necessary to build the summarisation prompt. See
#' `summarise_transcript` for more details and run `get_prompts()` to see the
#' defaults. See `summarise_transcript` for more details.
#' @param llm_provider A string indicating the LLM provider to use for the
#' summarization. See `summarise_transcript` for more details.
#' @param extra_summarise_args Additional arguments passed to the
Expand Down Expand Up @@ -1067,8 +1080,12 @@ speech_to_summary_workflow <- function(
, full.names = T)[1],
chat_format = "webex",

# Arguments for `summarise_full_meeting`
# Arguments for `summarise_full_meeting` and `infer_agenda_from_transcript`
agenda = file.path(target_dir, "agenda.R"),
expected_agenda = NULL,
agenda_generation_window_size = 3600,
agenda_generation_output_file = file.path(target_dir, "agenda.R"),
extra_agenda_generation_args = NULL,

event_description = NULL,
event_audience = "An audience with understanding of the topic",
Expand Down Expand Up @@ -1249,7 +1266,7 @@ speech_to_summary_workflow <- function(
} else {
choice <- utils::menu(
choices = c(
"Generate a default agenda (i.e., process the transcript as one talk)",
"Generate the agenda automatically (You will need to review it before proceeding)",
"Exit (write your own agenda)"
),
title = "How do you want to proceed?"
Expand All @@ -1262,12 +1279,22 @@ speech_to_summary_workflow <- function(
}

# Generate a default agenda with 1 talk/meeting if none is provided
agenda <- list(
list(
from = min(transcript_data$start),
to = max(transcript_data$end)
)
)
agenda_infer_args <- c(list(
transcript = transcript_data,
event_description = event_description,
vocabulary = vocabulary,
diarization_instructions = extra_diarization_instructions,
start_time = event_start_time,
expected_agenda = expected_agenda,
window_size = agenda_generation_window_size,
output_file = file.path(target_dir, "agenda.R"),
provider = llm_provider
), extra_agenda_generation_args)

agenda <- do.call(infer_agenda_from_transcript, agenda_infer_args)

message("Agenda generated. Please review it before proceeding.")
return(invisible(transcript_data))
}

message("\n### Summarizing transcript...\n")
Expand Down
Loading

0 comments on commit d7b5a85

Please # to comment.