Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Dev #18

Merged
merged 19 commits into from
Mar 8, 2024
Merged

Dev #18

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .Rprofile
Original file line number Diff line number Diff line change
@@ -1 +1 @@

source("renv/activate.R")
9 changes: 6 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
Package: minutemaker
Title: GenAI-based meeting and conferences minutes generator
Version: 0.5.5
Version: 0.6.0
Authors@R:
person("Angelo", "D'Ambrosio", , "a.dambrosioMD@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-2045-5155"))
Description: Generate meeting minutes starting from an audio recording or a transcripts using speech-to-text and LLMs.
Description: Generate meeting minutes starting from an audio recording or a
transcripts using speech-to-text and LLMs.
License: MIT + file LICENSE
Imports:
dplyr (>= 1.1.4),
Expand All @@ -20,10 +21,12 @@ Imports:
Config/testthat/edition: 3
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Suggests:
av (>= 0.9.0),
devtools (>= 2.4.5),
parallel (>= 4.3.2),
testthat (>= 3.0.0),
text2vec (>= 0.6.4),
tictoc (>= 1.2),
usethis (>= 2.2.3)
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(format_summary_tree)
export(generate_recording_details)
export(get_prompts)
export(import_transcript_from_file)
export(infer_agenda_from_transcript)
export(interrogate_llm)
export(merge_transcripts)
export(parse_transcript_json)
Expand Down
11 changes: 9 additions & 2 deletions R/LLM_calls.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,17 @@
#'
process_messages <- function(messages) {

if (missing(messages) || is.null(messages)) {
if (missing(messages) || is.null(messages) || length(messages) == 0) {
stop("User messages are required.")
}

# Assume that a single message is from the user
if (length(messages) == 1 &&
is.character(messages) &&
is.null(names(messages))) {
messages <- c(user = messages)
}

# Convert vector to list format
vector_to_list <- function(msg_vec) {

Expand Down Expand Up @@ -199,7 +206,7 @@ interrogate_llm <- function(

if (httr::status_code(response) == 429) {
warning("Rate limit exceeded. Waiting before retrying.",
immediate. = TRUE)
immediate. = TRUE, call. = FALSE)

to_wait <- as.numeric(httr::headers(response)$`retry-after`)
message("Waiting for ", to_wait, " seconds.\n...")
Expand Down
77 changes: 52 additions & 25 deletions R/data_management.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ parse_transcript_json <- function(
"Please remove it and try transcription again.")
}

if (length(transcript_list[[i]]$segments) == 0) {
# skip this file, there was nothing to transcribe
next
}

transcript_data <- transcript_list[[i]]$segments |>
bind_rows() |>
# Select only the columns to import
Expand Down Expand Up @@ -280,11 +285,12 @@ extract_text_from_transcript <- function(

# Ignore the `import_diarization` parameter if the transcript does not contain
# speaker information
if (all(is.na(transcript_data$speaker)) ||
if (!"speaker" %in% names(transcript_data) ||
all(is.na(transcript_data$speaker)) ||
n_distinct(transcript_data$speaker, na.rm = T) == 1) {
import_diarization <- FALSE

transcript_data$speaker <- "None"
transcript_data$speaker <- "Unknown"
}

transcript <- transcript_data %>%
Expand Down Expand Up @@ -386,16 +392,6 @@ convert_agenda_times <- function(
validate_agenda_element(agenda[[i]], from = TRUE, to = TRUE)
}

# if (
# convert_to == "clocktime" &&
# inherits(agenda[[1]]["from"], c("POSIXct", "character"))) {
#
# warning("Agenda already in clock time format.",
# call. = FALSE, immediate. = TRUE)
#
# return(agenda_orig)
# }

# Check if agenda times are all of the same class
if (!all(purrr::map_lgl(agenda, ~ is.numeric(.x$from))) &&
!all(purrr::map_lgl(agenda, ~ {
Expand Down Expand Up @@ -988,8 +984,25 @@ add_chat_transcript <- function(
#' generate the chat file. See `add_chat_transcript` for more details.
#' @param agenda The agenda of the meeting, that is, a list of agenda elements
#' each with a session name, a title, speaker and moderator lists, type of
#' talk and start and end times. Alternatively, the path to an R file
#' containing such a list. See `summarise_full_meeting` for more details.
#' talk, talk description and start and end times. Alternatively, the path to
#' an R file containing such a list. See `summarise_full_meeting` for more
#' details. If NULL, the user will be asked if the system should try to
#' generate the agenda automatically, using the `infer_agenda_from_transcript`
#' function.
#' @param expected_agenda A character string. Only used if the `agenda` argument
#' is `NULL` and the user requests the automatic agenda generation. this
#' string will be used to drive the LLM while generating the agenda. See
#' `infer_agenda_from_transcript` for more details.
#' @param agenda_generation_window_size The size of the window in seconds to
#' analyze at once when generating the agenda. See
#' `infer_agenda_from_transcript` for more details.
#' @param agenda_generation_output_file A string with the path to the output
#' file where the automatically generated agenda will be written. Should be a
#' .R file. See `infer_agenda_from_transcript` for more details.
#' @param extra_agenda_generation_args Additional arguments passed to the
#' `infer_agenda_from_transcript` function. See `infer_agenda_from_transcript`
#' for more details. Note that the `diarization_instructions` argument for this
#' function will be taken from the `extra_agenda_generation_args` argument.
#' @param summarization_method A string indicating the summarization method to
#' use. See `summarise_full_meeting` for more details.
#' @param event_description A string containing a description of the meeting.
Expand All @@ -1004,9 +1017,9 @@ add_chat_transcript <- function(
#' should take into account the diarization of the transcript. See
#' `summarise_transcript` for more details.
#' @param summary_structure,extra_diarization_instructions,extra_output_instructions
#' Specific instructions necessary to build the summarisation prompt. See
#' `summarise_transcript` for more details and run `get_prompts()` to see the
#' defaults. See `summarise_transcript` for more details.
#' Specific instructions necessary to build the summarisation prompt. See
#' `summarise_transcript` for more details and run `get_prompts()` to see the
#' defaults. See `summarise_transcript` for more details.
#' @param llm_provider A string indicating the LLM provider to use for the
#' summarization. See `summarise_transcript` for more details.
#' @param extra_summarise_args Additional arguments passed to the
Expand Down Expand Up @@ -1067,8 +1080,12 @@ speech_to_summary_workflow <- function(
, full.names = T)[1],
chat_format = "webex",

# Arguments for `summarise_full_meeting`
# Arguments for `summarise_full_meeting` and `infer_agenda_from_transcript`
agenda = file.path(target_dir, "agenda.R"),
expected_agenda = NULL,
agenda_generation_window_size = 3600,
agenda_generation_output_file = file.path(target_dir, "agenda.R"),
extra_agenda_generation_args = NULL,

event_description = NULL,
event_audience = "An audience with understanding of the topic",
Expand Down Expand Up @@ -1249,7 +1266,7 @@ speech_to_summary_workflow <- function(
} else {
choice <- utils::menu(
choices = c(
"Generate a default agenda (i.e., process the transcript as one talk)",
"Generate the agenda automatically (You will need to review it before proceeding)",
"Exit (write your own agenda)"
),
title = "How do you want to proceed?"
Expand All @@ -1262,12 +1279,22 @@ speech_to_summary_workflow <- function(
}

# Generate a default agenda with 1 talk/meeting if none is provided
agenda <- list(
list(
from = min(transcript_data$start),
to = max(transcript_data$end)
)
)
agenda_infer_args <- c(list(
transcript = transcript_data,
event_description = event_description,
vocabulary = vocabulary,
diarization_instructions = extra_diarization_instructions,
start_time = event_start_time,
expected_agenda = expected_agenda,
window_size = agenda_generation_window_size,
output_file = file.path(target_dir, "agenda.R"),
provider = llm_provider
), extra_agenda_generation_args)

agenda <- do.call(infer_agenda_from_transcript, agenda_infer_args)

message("Agenda generated. Please review it before proceeding.")
return(invisible(transcript_data))
}

message("\n### Summarizing transcript...\n")
Expand Down
Loading