Skip to content

Commit

Permalink
Merge pull request #21 from bakaburg1/Dev
Browse files Browse the repository at this point in the history
Improve agenda review and add custom LLM support
  • Loading branch information
bakaburg1 authored Apr 17, 2024
2 parents 5fd6494 + d8c5a50 commit 8fe8e60
Show file tree
Hide file tree
Showing 11 changed files with 213 additions and 101 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: minutemaker
Title: GenAI-based meeting and conferences minutes generator
Version: 0.8.0
Version: 0.9.0
Authors@R:
person("Angelo", "D'Ambrosio", , "a.dambrosioMD@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-2045-5155"))
Expand Down
20 changes: 20 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,23 @@
# minutemaker 0.9.0

### Improve agenda review and add custom LLM support

#### Enhancements
- Improve user interaction for agenda review: the `speech_to_summary_workflow` function now displays the generated agenda in the console, facilitating review and reducing the need to locate the agenda file separately. (Commit: 3bed1cc).
- Add support for custom language model endpoints: replace `use_local_llm()` with `use_custom_llm()` to send requests to custom (local or remote) language model endpoints compatible with the OpenAI API specification, allowing to use also API Keys. (Commit: 0fdfe57).
- Dynamic output_length parameter as default: dynamically set `summarization_output_length` in the workflow function based on whether a multipart summary is generated (shorter outputs) or not (longer outputs), optimizing the readability and relevance of summaries. (Commit: 2511287).

#### Fixes
- Fix output_length usage: `output_length` was not being considered in the summarization process. (Commit: 08e95d1).
- Fix agenda file validation: update the validation logic for the 'agenda' argument in the `speech_to_summary_workflow` function to handle character type inputs correctly and provide clearer error messages. (Commit: d200a55).
- Fix agenda validation: add checks for empty agenda elements and missing required items, improve error messages for invalid time formats, and update tests. (Commit: 6d870ee).

#### Documentation
- Fix messages typos: correct typos in messages. (Commit: 0fdfe57).

#### Summary
This pull request focuses on improving the user experience and adding support for custom language model endpoints. It enhances the agenda review process, ensures dynamic output length adjustment, fixes agenda validation, and adds documentation typo corrections.

# minutemaker 0.8.0

### Enhanced Agenda Management and Utilization
Expand Down
38 changes: 24 additions & 14 deletions R/LLM_calls.R
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ interrogate_llm <- function(
...) {

messages <- process_messages(messages)
provider <- match.arg(provider)

if (is.null(provider)) {
stop("Language model provider is not set. ",
Expand Down Expand Up @@ -208,7 +207,7 @@ interrogate_llm <- function(

if (log_request) tictoc::tic()
response <- llm_fun(body, ...)
if (log_request) tictoc::toc()
if (log_request) elapsed <- tictoc::toc()

if (httr::status_code(response) == 429) {
warning("Rate limit exceeded. Waiting before retrying.",
Expand All @@ -229,8 +228,10 @@ interrogate_llm <- function(

err_message <- if (is.character(err_obj)) {
err_obj
} else {
} else if (is.character(err_obj$message)) {
err_obj$message
} else {
httr::content(response)
}

stop("Error in LLM request: ", err_message)
Expand All @@ -244,6 +245,8 @@ interrogate_llm <- function(
paste(
"Prompt tokens:", prompt_tokens,
"\nResponse tokens:", completion_tokens,
"\nGeneration speed:", paste(
signif(completion_tokens/(elapsed$toc - elapsed$tic), 3), "t/s"),
"\nTotal tokens:", total_tokens
)
) |> message()
Expand All @@ -260,7 +263,7 @@ interrogate_llm <- function(

#' Use OpenAI Language Model
#'
#' Sends a request to the OpenAI API using the parameters in the `body`
#' Sends a request to the OpenAI API using the parameters in the `body`
#' argument. It requires an API key and model identifier set in the R options.
#'
#' @param body The body of the request.
Expand All @@ -276,7 +279,7 @@ use_openai_llm <- function(
) {

if (is.null(api_key) || is.null(model)) {
stop("OpenAI GPT model and API key are not set. ",
stop("OpenAI GPT model or API key are not set. ",
"Use the following options to set them:\n",
"minutemaker_openai_model_gpt, ",
"minutemaker_open_api_key options.")
Expand Down Expand Up @@ -349,27 +352,30 @@ use_azure_llm <- function(

}

#' Use Local Language Model
#' Use Custom Language Model
#'
#' Sends a request to a local language model endpoint using the parameters in
#' the `body` argument. The endpoint URL should be set in the R options, with a
#' default provided.
#' Sends a request to a custom (local or remote) language model endpoint
#' compatible with the OpenAi API specification, using the parameters in the
#' `body` argument. The user can provide an API key if required.
#'
#' @param body The body of the request.
#' @param endpoint The local endpoint for the language model service. Can be
#' obtained from R options.
#' @param api_key Optional API key for the custom language model services that
#' require it. Obtained from R options.
#'
#' @return The function returns the response from the local language model
#' endpoint.
use_local_llm <- function(
use_custom_llm <- function(
body,
endpoint = getOption("minutemaker_local_endpoint_gpt",
"http://localhost:1234/v1/chat/completions")
endpoint = getOption("minutemaker_custom_endpoint_gpt"),
api_key = getOption("minutemaker_custom_api_key")
) {

if (is.null(endpoint)) {
stop("Local endpoint is not set. ",
"Use the following options to set it:\n",
"minutemaker_local_endpoint_gpt."
"minutemaker_custom_endpoint_gpt"
)
}

Expand All @@ -378,7 +384,11 @@ use_local_llm <- function(
# Prepare the request
httr::POST(
url = endpoint,
httr::add_headers(`Content-Type` = "application/json"),
httr::add_headers(
`Content-Type` = "application/json",
if (!is.null(api_key)) {
.headers = c(Authorization = paste0("Bearer ", api_key))
}),
body = jsonlite::toJSON(body, auto_unbox = TRUE)
)

Expand Down
38 changes: 9 additions & 29 deletions R/data_management.R
Original file line number Diff line number Diff line change
Expand Up @@ -1102,7 +1102,7 @@ add_chat_transcript <- function(
#' minutes if the "rolling" method is used. See `summarise_transcript` for
#' more details.
#' @param summarization_output_length An indication to the LLM regarding the
#' length of the output. See `summarise_transcript` for more details.
#' length of the output in pages. See `summarise_transcript` for more details.
#' @param summarization_output_file A string with the path to the output file
#' where the summary tree will be written. Should be a .R file. See
#' `summarise_full_meeting` for more details.
Expand Down Expand Up @@ -1173,7 +1173,7 @@ speech_to_summary_workflow <- function(
llm_provider = getOption("minutemaker_llm_provider"),
extra_summarise_args = NULL,
summarization_window_size = 15,
summarization_output_length = 3,
summarization_output_length = if (isTRUE(multipart_summary)) 1 else 3,
summarization_method = c("simple", "rolling"),

summarization_output_file = file.path(target_dir, "event_summary.R"),
Expand Down Expand Up @@ -1327,8 +1327,8 @@ speech_to_summary_workflow <- function(

## Perform summarization ##

if (length(agenda) > 1) {
stop("The agenda argument should be of length 1.")
if (is.character(agenda) && length(agenda) > 1) {
stop("No more than one agenda file can be provided.")
}

# If the agenda argument is a character and the file does not exist, stop the
Expand Down Expand Up @@ -1390,7 +1390,9 @@ speech_to_summary_workflow <- function(

# Ask the user if they want to proceed with the generated agenda or review
# it first
message("Agenda generated. Please review it before proceeding.")
message("Agenda generated. Please review it before proceeding:")

cat("\n", format_agenda(agenda), "\n")

# Don't ask the user if the process is not interactive, just stop
if (!interactive()) {
Expand Down Expand Up @@ -1428,30 +1430,6 @@ speech_to_summary_workflow <- function(
stop("The overwrite_formatted_output argument must be TRUE or FALSE")
}

# isFALSE(overwrite_formatted_output) &&
# file.exists(formatted_output_file)) {

# if (interactive()) {
# choice <- utils::menu(
# choices = c(
# "Overwrite the existing formatted summary file",
# "Abort the process"
# ),
# title = "The formatted summary output file already exists and overwrite is FALSE. What do you want to do?"
# )
#
# if (choice == 2) {
# message("Aborted by user.")
# return(invisible(transcript_data))
#
# } else {
# message("Overwriting the existing formatted summary file.")
# }
# } else {
# message("The formatted summary output file already exists and overwrite is FALSE.\nSet overwrite_formatted_output = TRUE to overwrite it or remove it.")
# return(invisible(transcript_data))
# }

}

# Common summarization arguments
Expand All @@ -1476,6 +1454,7 @@ speech_to_summary_workflow <- function(

if (isFALSE(agenda) || isFALSE(multipart_summary)) {
# Summarize as single talk
message("...with single part approach...\n")

if (validate_agenda(agenda)) {
agenda <- format_agenda(agenda)
Expand All @@ -1495,6 +1474,7 @@ speech_to_summary_workflow <- function(
} else {

# Summarize as multiple talks
message("...with multipart approach...\n")

# Necessary extra arguments for the summarization of whole events
summarization_args$agenda <- agenda
Expand Down
7 changes: 5 additions & 2 deletions R/summarization.R
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ summarise_transcript <- function(

args <- args[
c("event_description", "recording_details", "audience", "vocabulary",
"consider_diarization", "summary_structure",
"consider_diarization", "summary_structure", "output_length",
"extra_diarization_instructions", "extra_output_instructions")
]

Expand Down Expand Up @@ -270,7 +270,7 @@ summarise_transcript <- function(
message("\nAggregating summaries")

args <- args[
c("event_description", "recording_details", "audience",
c("event_description", "recording_details", "audience", "output_length",
"summary_structure", "extra_output_instructions")
]

Expand Down Expand Up @@ -890,6 +890,9 @@ entity_extractor <- function(
...
) {

# Initialize the prompts
set_prompts()

text <- paste(text, collapse = "--------\n\n\n")

acro_or_concepts <- entities[entities %in% c("acronyms", "concepts")]
Expand Down
Loading

0 comments on commit 8fe8e60

Please # to comment.