diff --git a/DESCRIPTION b/DESCRIPTION index 2198a07..04da6e8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: minutemaker Title: GenAI-based meeting and conferences minutes generator -Version: 0.10.0 +Version: 0.12.0 Authors@R: person("Angelo", "D'Ambrosio", , "a.dambrosioMD@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2045-5155")) @@ -11,6 +11,7 @@ Imports: dplyr (>= 1.1.4), httr (>= 1.4.7), jsonlite (>= 1.8.8), + llmR (>= 1.1.0), lubridate (>= 1.9.3), purrr (>= 1.0.2), readr (>= 2.1.4), @@ -19,14 +20,17 @@ Imports: styler (>= 1.10.2), tools (>= 4.3.2), vctrs (>= 0.6.5) +Remotes: + github::bakaburg1/llmR Config/testthat/edition: 3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Suggests: av (>= 0.9.0), devtools (>= 2.4.5), parallel (>= 4.3.2), + reticulate (>= 1.38.0), testthat (>= 3.0.0), text2vec (>= 0.6.4), tictoc (>= 1.2), diff --git a/NAMESPACE b/NAMESPACE index 440d251..2e20e68 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,15 +13,15 @@ export(infer_agenda_from_transcript) export(merge_transcripts) export(parse_transcript_json) export(perform_speech_to_text) -export(prompt_llm) export(run_in_terminal) export(set_prompts) export(speech_to_summary_workflow) export(split_audio) export(summarise_full_meeting) export(summarise_transcript) +export(use_mlx_whisper_local_stt) +export(use_whisper_local_stt) export(validate_agenda) import(dplyr) importFrom(stats,setNames) -importFrom(utils,hasName) importFrom(utils,tail) diff --git a/NEWS.md b/NEWS.md index 841c785..efa7057 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,19 @@ +# minutemaker 0.12.0 + +#### Enhancements +- **Support for Local Whisper Models**: Added functions `use_whisper_local_stt` and `use_mlx_whisper_local_stt` to support local Whisper models via Python with reticulate, with the second being optimized for Mac OS with Apple Silicon (Commit: [69e4f5e](https://github.com/bakaburg1/minutemaker/commit/69e4f5e59518da51d7f757a5076511d4224c6d65)). +- **Integration with llmR**: Refactored the code to rely on the `llmR` package for LLM interactions, removing redundant LLM-related functions (Commit: [2331b46](https://github.com/bakaburg1/minutemaker/commit/2331b463e0606cd4ee49ecb353b89b163da06d9e)). +- **Enhanced Speech-to-Text Workflow**: Updated `perform_speech_to_text` to use `whisper_local` as the default model and enhanced `speech_to_summary_workflow` to display the selected speech-to-text model (Commit: [69e4f5e](https://github.com/bakaburg1/minutemaker/commit/69e4f5e59518da51d7f757a5076511d4224c6d65)). + +#### Fixes +- **Dependency Management**: Replaced custom dependency check function with `rlang::check_installed` for better package management (Commit: [3227b0d](https://github.com/bakaburg1/minutemaker/commit/3227b0d7dba8785949c1d66c83d232bb38438c08)). + +#### Documentation +- **Updated README**: Revised README to describe the use of `llmR` for summarization and the addition of new local models for speech-to-text (Commit: [8bff883](https://github.com/bakaburg1/minutemaker/commit/8bff88380c5dc977a52c2207f1ef380904784bf4)). + +#### Summary +This pull request introduces significant enhancements to the `minutemaker` package by adding support for local Whisper models, integrating the `llmR` package for LLM interactions, and improving the speech-to-text workflow. Additionally, it fixes dependency management issues and updates the documentation to reflect these changes. + # minutemaker 0.10.0 #### Enhancements diff --git a/R/LLM_calls.R b/R/LLM_calls.R deleted file mode 100644 index 8745236..0000000 --- a/R/LLM_calls.R +++ /dev/null @@ -1,473 +0,0 @@ -#' Process chat message into standard format -#' -#' This function takes one or more (a list of) chat messages and processes them -#' into a standard list format with role and content for each message to be fed -#' to a large language model. -#' -#' The standard format is a list of chat messages with the following structure: -#' message: `c(role = "system", content = "Welcome to the chat!")` -#' list of messages: \code{list( -#' c(role = "system", content = "You are an useful AI assistant."), -#' c(role = "user", content = "Hi there!") -#' )} -#' list format: \code{list( -#' list(role = "system", content = "You are an useful AI assistant."), -#' list(role = "user", content = "Hi there!") -#' )} -#' list of lists format: \code{list( -#' list( -#' list(role = "system", content = "You are an useful AI assistant."), -#' list(role = "user", content = "Hi there!") -#' ), -#' list( -#' list(role = "system", content = "You are an useful AI assistant."), -#' list(role = "user", content = "Hi there!") -#' ) -#' )} -#' -#' @param messages A character vector or list of chat messages. In can be a -#' vector, a specifically structured list or a list of both if the goal is the -#' have the API process multiple messages at once. -#' -#' @return A list of chat messages in standard format. -#' -#' @importFrom utils hasName -#' -process_messages <- function(messages) { - - if (missing(messages) || is.null(messages) || length(messages) == 0) { - stop("User messages are required.") - } - - # Assume that a single message is from the user - if (length(messages) == 1 && - is.character(messages) && - is.null(names(messages))) { - messages <- c(user = messages) - } - - # Convert vector to list format - vector_to_list <- function(msg_vec) { - - # Check if vector is in named format - check <- all( - names(msg_vec) %in% - c("system", "user", "assistant", "function") - , na.rm = TRUE) - - check <- check && !is.null(names(msg_vec)) - - if (check) { - - # Convert from vector to list format - msg_vec <- purrr::imap(msg_vec, function(msg, nm) { - list(role = nm, content = msg) - }) |> setNames(NULL) - - } else { - stop("Invalid format for 'messages' vector.") - } - } - - # Validate list format - validate_list_format <- function(msg_list) { - - # Check if the message is in correct list format - check <- !purrr::every(msg_list, function(msg) { - vctrs::obj_is_list(msg) && - hasName(msg, "role") && - hasName(msg, "content") && - msg$role %in% c("system", "user", "assistant", "function") - }) - - return(!check) - } - - # Check if message is in a valid vector format - if (is.character(messages)) { - return(vector_to_list(messages)) - } - - - if (vctrs::obj_is_list(messages)) { - - # Check if valid list format - if (validate_list_format(messages)) { - return(messages) - } - - # It turned out the API doesn't really support batch calls of - # multiple prompts - - # # Check if list of vectors - # if (purrr::every(messages, is.character)) { - # - # # Convert each to list - # return(purrr::map(messages, vector_to_list)) - # - # } - # - # # Check if list of lists - # if (purrr::every(messages, validate_list_format)) { - # - # return(messages) - # - # } - - } - - stop("Message is neither a valid vector nor a valid list.") - -} - -#' Interrogate a Language Model -#' -#' This function sends requests to a specified language model provider (OpenAI, -#' Azure, or a locally running LLM server) and returns the response. It handles -#' rate limiting and retries the request if necessary, and also processes errors -#' in the response. -#' -#' Users can provide their own models by writing a function with the following -#' name pattern: `use__llm`. See the existing functions using the -#' ::: operator for examples. -#' -#' @param messages Messages to be sent to the language model. -#' @param provider The provider of the language model. Defaults to "openai". -#' Other options are "azure" and "local". -#' @param params Additional parameters for the language model request. Defaults -#' to a list with `temperature = 0`. -#' @param force_json A boolean to force the response in JSON format. Default is -#' FALSE. Works only for OpenAI and Azure endpoints. -#' @param log_request A boolean to log the request time. Can be set up globally -#' using the `minutemaker_log_requests` option, which defaults to TRUE. -#' @param ... Additional arguments passed to the language model provider -#' functions. -#' -#' @return Returns the content of the message from the language model response. -#' -#' @export -#' -#' @examples -#' \dontrun{ -#' response <- prompt_llm( -#' messages = c(user = "Hello there!"), -#' provider = "openai") -#' } -#' -prompt_llm <- function( - messages = NULL, - provider = getOption("minutemaker_llm_provider"), - params = list( - temperature = 0 - ), - force_json = FALSE, - log_request = getOption("minutemaker_log_requests", TRUE), - ...) { - - messages <- process_messages(messages) - - if (is.null(provider)) { - stop("Language model provider is not set. ", - "You can use the following option to set it globally:\n", - "minutemaker_llm_provider.") - } - - if (log_request) { - check_and_install_dependencies("tictoc") - } - - # Prepare the body of the request and merge with default - body <- purrr::list_modify(list( - temperature = 0 - ), !!!params) - - body$messages <- messages - - # Force the LLM to answer in JSON format (not all models support this) - if (force_json) { - body$response_format <- list("type" = "json_object") - } - - # Map provider to specific function - llm_fun <- paste0("use_", provider, "_llm") - - if (!exists(llm_fun, mode = "function")) { - stop("Unsupported LLM provider. - You can set it project-wide using the minutemaker_llm_provider option.") - } - - llm_fun <- get(llm_fun) - - # Try to send the request - retry <- FALSE - - while(!exists("response", inherits = FALSE) || retry) { - - #message("Sending message to Azure GPT API.") - retry <- FALSE - - if (log_request) tictoc::tic() - response <- llm_fun(body, ...) - if (log_request) elapsed <- tictoc::toc() - - if (httr::status_code(response) == 429) { - warning("Rate limit exceeded. Waiting before retrying.", - immediate. = TRUE, call. = FALSE) - - to_wait <- as.numeric(httr::headers(response)$`retry-after`) - message("Waiting for ", to_wait, " seconds.\n...") - Sys.sleep(to_wait) - message("Retrying...") - retry <- TRUE - } - } - - - # Check for errors in response - if (httr::http_error(response)) { - err_obj <- httr::content(response)$error - - err_message <- if (is.character(err_obj)) { - err_obj - } else if (is.character(err_obj$message)) { - err_obj$message - } else { - httr::content(response) - } - - stop("Error in LLM request: ", err_message) - } - - # Return the response - parsed <- httr::content(response, as = "parsed", encoding = "UTF-8") - - if (log_request) { - with(parsed$usage, - paste( - "Prompt tokens:", prompt_tokens, - "\nResponse tokens:", completion_tokens, - "\nGeneration speed:", paste( - signif(completion_tokens/(elapsed$toc - elapsed$tic), 3), "t/s"), - "\nTotal tokens:", total_tokens - ) - ) |> message() - } - - # Return the response - purrr::imap_chr(parsed$choices, \(ans, i) { - ans_content <- ans$message$content - - # Manage the case when the answer is cut off due to exceeding the - # output token limit - if (ans$finish_reason == "length") { - i <- if (length(parsed$choices) > 1) paste0(" ", i, " ") else " " - - warning("Answer", i, "exhausted the context window!") - - file_name <- paste0("output_", Sys.time(), ".txt") - - warning( - "Answer", i, "exhausted the context window!\n", - "The answer has been saved to a file: ", file_name - ) - - readr::write_lines(ans_content, file_name) - - choice <- utils::menu( - c( - "Try to complete the answer", - "Keep the incomplete answer", - "Stop the process"), - title = "How do you want to proceed?" - ) - - if (choice == 1) { - # Ask the model to continue the answer - messages_new <- c( - messages, - list(list( - role = "assistant", - content = ans_content - )), - list(list( - role = "user", - content = "continue" - )) - ) - - ans_new <- prompt_llm( - messages_new, provider = provider, params = params, - force_json = force_json, - log_request = log_request, ... - ) - - return(paste0(ans_content, ans_new)) - } else if (choice == 2) { - return(ans_content) - } else { - stop("The process has been stopped.") - } - } else ans_content - }) -} - -#' Use OpenAI Language Model -#' -#' Sends a request to the OpenAI API using the parameters in the `body` -#' argument. It requires an API key and model identifier set in the R options. -#' -#' @param body The body of the request. -#' @param model Model identifier for the OpenAI API. Obtained from R options. -#' @param api_key API key for the OpenAI service. Obtained from R options. -#' @param log_request A boolean to log the request time. Can be set up globally -#' using the `minutemaker_log_requests` option, which defaults to TRUE. -#' -#' @return The function returns the response from the OpenAI API. -#' -use_openai_llm <- function( - body, - model = getOption("minutemaker_openai_model_gpt"), - api_key = getOption("minutemaker_openai_api_key"), - log_request = getOption("minutemaker_log_requests", TRUE) -) { - - if (is.null(api_key) || is.null(model)) { - stop("OpenAI GPT model or API key are not set. ", - "Use the following options to set them:\n", - "minutemaker_openai_model_gpt, ", - "minutemaker_open_api_key options.") - } - - if (log_request) { - message("Interrogating OpenAI: ", model, "...") - } - - body$model = model - - # Prepare the request - httr::POST( - url = "https://api.openai.com/v1/chat/completions", - httr::add_headers( - `Content-Type` = "application/json", - `Authorization` = paste0("Bearer ", api_key) - ), - body = jsonlite::toJSON(body, auto_unbox = TRUE), - encode = "json" - ) - -} - -#' Use Azure Language Model -#' -#' Sends a request to the Azure API for language model completions using the -#' parameters in the `body` argument. This function requires specific Azure -#' configurations (deployment ID, resource name, API key, and API version) set -#' in the R options. -#' -#' @param body The body of the request. -#' @param deployment_id Azure deployment ID for the language model. Obtained -#' from R options. -#' @param resource_name Azure resource name. Obtained from R options. -#' @param api_key API key for the Azure language model service. Obtained from R -#' options. -#' @param api_version API version for the Azure language model service. Obtained -#' from R options. -#' @param log_request A boolean to log the request time. Can be set up globally -#' using the `minutemaker_log_requests` option, which defaults to TRUE. -#' -#' @return The function returns the response from the Azure API. -use_azure_llm <- function( - body, - deployment_id = getOption("minutemaker_azure_deployment_gpt"), - resource_name = getOption("minutemaker_azure_resource_gpt"), - api_key = getOption("minutemaker_azure_api_key_gpt"), - api_version = getOption("minutemaker_azure_api_version"), - log_request = getOption("minutemaker_log_requests", TRUE) -) { - - if (is.null(resource_name) || is.null(deployment_id) || - is.null(api_key) || is.null(api_version)) { - stop("Azure GPT resource name, deployment name,", - ", API key, or API version are not set. ", - "Use the following options to set them:\n", - "minutemaker_azure_deployment_gpt, ", - "minutemaker_azure_resource_gpt, ", - "minutemaker_azure_api_key_gpt, ", - "minutemaker_azure_api_version." - ) - } - - if (log_request) { - message( - "Interrogating Azure OpenAI: ", resource_name, "/", deployment_id, - " (", api_version, ")...") - } - - # Prepare the request - httr::POST( - url = paste0( - "https://", - resource_name, - ".openai.azure.com/openai/deployments/", - deployment_id, - "/chat/completions?api-version=", - api_version), - httr::add_headers(`Content-Type` = "application/json", `api-key` = api_key), - body = jsonlite::toJSON(body, auto_unbox = TRUE) - ) - -} - -#' Use Custom Language Model -#' -#' Sends a request to a custom (local or remote) language model endpoint -#' compatible with the OpenAi API specification, using the parameters in the -#' `body` argument. The user can provide an API key if required. -#' -#' @param body The body of the request. -#' @param endpoint The local endpoint for the language model service. Can be -#' obtained from R options. -#' @param model Model identifier for the custom API, if needed (some API have -#' one model per endpoint, some multiple ones). Obtained from R options. -#' @param api_key Optional API key for the custom language model services that -#' require it. Obtained from R options. -#' @param log_request A boolean to log the request time. Can be set up globally -#' using the `minutemaker_log_requests` option, which defaults to TRUE. -#' -#' @return The function returns the response from the local language model -#' endpoint. -use_custom_llm <- function( - body, - endpoint = getOption("minutemaker_custom_endpoint_gpt"), - model = getOption("minutemaker_custom_model_gpt", NULL), - api_key = getOption("minutemaker_custom_api_key"), - log_request = getOption("minutemaker_log_requests", TRUE) -) { - - if (is.null(endpoint)) { - stop("Local endpoint is not set. ", - "Use the following options to set it:\n", - "minutemaker_custom_endpoint_gpt" - ) - } - - if (log_request) { - message("Interrogating custom LLM: ", endpoint, "/", model, "...") - } - - if (!is.null(model)) { - body$model = model - } - - # Prepare the request - httr::POST( - url = endpoint, - httr::add_headers( - `Content-Type` = "application/json", - if (!is.null(api_key)) { - .headers = c(Authorization = paste0("Bearer ", api_key)) - }), - body = jsonlite::toJSON(body, auto_unbox = TRUE) - ) - -} diff --git a/R/NLP_operations.R b/R/NLP_operations.R index f54114d..c47aa4c 100644 --- a/R/NLP_operations.R +++ b/R/NLP_operations.R @@ -11,7 +11,7 @@ #' tokenize_text <- function(text) { - check_and_install_dependencies("text2vec") + rlang::check_installed("text2vec") # Convert text to lowercase, remove special characters and extra spaces, and # tokenize @@ -50,7 +50,7 @@ generate_glove_model <- function( overwrite = FALSE ) { - check_and_install_dependencies("parallel") + rlang::check_installed("parallel") # Create a hash of the data and the model parameters data_hash <- rlang::hash(list( @@ -177,7 +177,7 @@ compute_text_sim <- function(x_text, y_texts, embedding_matrix) { # # if (method == "word2vec") { # -# check_and_install_dependencies("word2vec") +# rlang::check_installed("word2vec") # # # Word2vec returns NA if a segment is a single word with a punctuation # # mark at the end, don't know why @@ -191,7 +191,7 @@ compute_text_sim <- function(x_text, y_texts, embedding_matrix) { # word2vec::doc2vec(model, x_text)) |> # as.vector() # } else { -# check_and_install_dependencies("stringdist") +# rlang::check_installed("stringdist") # # y_probes$similarity <- stringdist::stringsim( # x_text, y_probes$text, method = "cosine") diff --git a/R/data_management.R b/R/data_management.R index a5248fc..0feb783 100644 --- a/R/data_management.R +++ b/R/data_management.R @@ -1100,7 +1100,7 @@ add_chat_transcript <- function( #' @param llm_provider A string indicating the LLM provider to use for the #' summarization. See `summarise_transcript` for more details. #' @param extra_summarise_args Additional arguments passed to the -#' `prompt_llm` function. See `summarise_transcript` for more details. +#' `llmR::prompt_llm` function. See `summarise_transcript` for more details. #' @param summarization_window_size The size of the summarization window in #' minutes if the "rolling" method is used. See `summarise_transcript` for #' more details. @@ -1173,7 +1173,7 @@ speech_to_summary_workflow <- function( summary_structure = get_prompts("summary_structure"), extra_diarization_instructions = NULL, extra_output_instructions = NULL, - llm_provider = getOption("minutemaker_llm_provider"), + llm_provider = getOption("llmr_llm_provider"), extra_summarise_args = NULL, summarization_window_size = 15, summarization_output_length = if (isTRUE(multipart_summary)) 1 else 3, @@ -1239,6 +1239,7 @@ speech_to_summary_workflow <- function( ) { message("\n### Performing speech to text...\n") + message("(stt model: ", stt_model, ")\n") # A speech-to-text model is required if (is.null(stt_model)) { diff --git a/R/speech_to_text.R b/R/speech_to_text.R index 21be5ea..eaad207 100644 --- a/R/speech_to_text.R +++ b/R/speech_to_text.R @@ -33,7 +33,7 @@ perform_speech_to_text <- function( audio_path, output_dir = file.path(dirname(audio_path), "transcription_output_data"), - model, + model = getOption("minutemaker_stt_model", "whisper_local"), initial_prompt = NULL, overwrite = FALSE, language = "en", ... @@ -212,7 +212,7 @@ split_audio <- function( ) { # Check if the av package is installed and ask to install it if not - check_and_install_dependencies("av") + rlang::check_installed("av") # Calculate segment length in seconds segment_length_sec <- segment_duration * 60 @@ -278,7 +278,7 @@ use_whisper_ctranslate2_stt <- function( n_threads = NULL ) { - check_and_install_dependencies("parallel") + rlang::check_installed("parallel") if (is.null(n_threads)) { n_threads <- parallel::detectCores() @@ -315,11 +315,6 @@ use_whisper_ctranslate2_stt <- function( jsonlite::read_json(output_file_path) - # In console, synchronous version, no logging - # p <- processx::process$run( - # "whisper-ctranslate2", - # args = args, - # echo_cmd = T, stdout = "|") } #' Use Azure Whisper Model for Speech-to-Text @@ -395,25 +390,25 @@ use_azure_whisper_stt <- function( warning("Error ", response$status_code, " in Azure Whisper API request: ", httr::content(response, "text"), call. = FALSE, immediate. = TRUE) - wait_for <- stringr::str_extract( - httr::content(response, "text", encoding = "UTF-8"), - "\\d+(?= seconds)") |> as.numeric() + wait_for <- stringr::str_extract( + httr::content(response, "text", encoding = "UTF-8"), + "\\d+(?= seconds)") |> as.numeric() - if (is.na(wait_for) && !interactive()) stop() + if (is.na(wait_for) && !interactive()) stop() - if (is.na(wait_for)) wait_for <- 30 + if (is.na(wait_for)) wait_for <- 30 - message("Retrying in ", wait_for, " seconds...") + message("Retrying in ", wait_for, " seconds...") - Sys.sleep(wait_for) + Sys.sleep(wait_for) - res <- use_azure_whisper_stt( - audio_file = audio_file, - language = language, - initial_prompt = initial_prompt, - temperature = temperature) + res <- use_azure_whisper_stt( + audio_file = audio_file, + language = language, + initial_prompt = initial_prompt, + temperature = temperature) - return(res) + return(res) } # Return the response @@ -489,3 +484,201 @@ use_openai_whisper_stt <- function( # Return the response res <- httr::content(response) } + +#' Use Local Whisper Model for Speech-to-Text +#' +#' This function uses a local Whisper model via Python with reticulate to +#' transcribe audio. It can use the official OpenAI Whisper package or any +#' compatible Python package. +#' +#' @param audio_file The path to the audio file to transcribe. +#' @param language The language of the input audio. Default is "en" for English. +#' If NULL, Whisper will attempt to detect the language. +#' @param initial_prompt Text to guide the model's style or continue a previous +#' segment. +#' @param model The Whisper model to use. Default is "turbo". Check +#' https://github.com/openai/whisper for other available models. +#' @param whisper_package The Python package to use for Whisper (default: +#' "openai-whisper"). +#' +#' @return A list with the full transcript and the transcription by segments. +#' +#' @export +use_whisper_local_stt <- function( + audio_file, + language = "en", + initial_prompt = "", + model = "turbo", + whisper_package = getOption( + "minutemaker_whisper_package", "openai-whisper") +) { + # Check if reticulate is installed + if (!rlang::is_installed("reticulate")) { + stop("Package 'reticulate' is required. ", + "Please install it using install.packages('reticulate')") + } + + # Check if Miniconda is installed + if (length(list.files(reticulate::miniconda_path())) == 0) { + message("Miniconda not found. Installing it now...") + reticulate::install_miniconda() + } + + conda_env <- "minutemaker_env" + + # Check if the conda environment exists + if (!reticulate::condaenv_exists(conda_env)) { + message( + "Conda environment '", conda_env, "' does not exist. Creating it now...") + + reticulate::conda_create(conda_env, python_version = "3.9") + } + + # Use the conda environment + reticulate::use_miniconda(conda_env, required = TRUE) + + # Check if Whisper is already installed + if (!reticulate::py_module_available("whisper")) { + message("Whisper not found. Installing dependencies...") + + # Install the required packages + reticulate::conda_install( + conda_env, + c("numpy==1.23.5", "numba==0.56.4", "llvmlite==0.39.1", whisper_package), + pip = TRUE) + } + + # Import the Whisper module + whisper <- reticulate::import("whisper") + + # Load the Whisper model + model <- whisper$load_model(model) + + # Prepare transcription options + options <- list( + language = language, + initial_prompt = initial_prompt, + fp16 = FALSE + ) + + # Remove NULL values from options + options <- options[!sapply(options, is.null)] + + # Perform transcription + result <- do.call(model$transcribe, c(list(audio_file), options)) + + # Extract segments + segments <- lapply(result$segments, function(seg) { + list( + id = seg$id, + start = seg$start, + end = seg$end, + text = seg$text + ) + }) + + # Return results in the expected format + list( + text = result$text, + segments = segments + ) +} + +#' Use MLX Whisper Local Model for Speech-to-Text (Mac OS only) +#' +#' This function uses a local MLX Whisper model via Python with reticulate to +#' transcribe audio. It is specifically designed to work with the MLX Whisper +#' package. MLX allows faster inference on Mac OS with Apple Silicon. +#' +#' @param audio_file The path to the audio file to transcribe. +#' @param language The language of the input audio. Default is "en" for English. +#' If NULL, Whisper will attempt to detect the language. +#' @param initial_prompt Text to guide the model's style or continue a previous +#' segment. +#' @param model The MLX Whisper model to use. Default is +#' "mlx-community/whisper-large-v3-turbo". +#' @param whisper_package The Python package to use for MLX Whisper (default: +#' "mlx_whisper"). +#' +#' @return A list with the full transcript and the transcription by segments. +#' +#' @export +use_mlx_whisper_local_stt <- function( + audio_file, + language = "en", + initial_prompt = "", + model = "mlx-community/distil-whisper-large-v3", + whisper_package = getOption("minutemaker_whisper_package", "mlx_whisper") +) { + # Check if reticulate is installed + if (!rlang::is_installed("reticulate")) { + stop("Package 'reticulate' is required. ", + "Please install it using install.packages('reticulate')") + } + + # Check if Miniconda is installed + if (length(list.files(reticulate::miniconda_path())) == 0) { + message("Miniconda not found. Installing it now...") + reticulate::install_miniconda() + } + + conda_env <- "minutemaker_env" + + # Check if the conda environment exists + if (!reticulate::condaenv_exists(conda_env)) { + message( + "Conda environment '", conda_env, "' does not exist. Creating it now...") + + reticulate::conda_create(conda_env, python_version = "3.9") + } + + # Use the conda environment + reticulate::use_condaenv(conda_env, required = TRUE) + + # Check if Whisper is already installed + if (!reticulate::py_module_available(whisper_package)) { + message("Whisper not found. Installing dependencies...") + + # Install the required packages reticulate::conda_install(conda_env, + # c("numpy==1.23.5", "numba==0.56.4", "llvmlite==0.39.1", whisper_package), + # pip = TRUE) + reticulate::conda_install(conda_env, whisper_package, pip = TRUE) + } + + # Import the Whisper module + mlx_whisper <- reticulate::import(whisper_package) + + # Prepare transcription options + decode_options <- list( + language = language, + initial_prompt = initial_prompt + ) + + # Remove NULL values from options + decode_options <- decode_options[!sapply(decode_options, is.null)] + + # Perform transcription + result <- mlx_whisper$transcribe( + audio_file, + path_or_hf_repo = model, + fp16 = FALSE, + word_timestamps = TRUE, + !!!decode_options + ) + + # Extract segments + segments <- lapply(result$segments, function(seg) { + list( + id = seg$id, + start = seg$start, + end = seg$end, + text = seg$text + ) + }) + + # Return results in the expected format + list( + text = result$text, + segments = segments + ) +} diff --git a/R/summarization.R b/R/summarization.R index cd42a1f..0eb05fc 100644 --- a/R/summarization.R +++ b/R/summarization.R @@ -111,7 +111,7 @@ generate_recording_details <- function( #' get_prompts("output_rolling_aggregation") prompts depending on the task. #' @param prompt_only If TRUE, only the prompt is returned, the LLM is not #' interrogated. Default is FALSE. -#' @param ... Additional arguments passed to the `prompt_llm` function, +#' @param ... Additional arguments passed to the `llmR::prompt_llm` function, #' such as the LLM provider. #' #' @return A summary of the transcript. @@ -252,7 +252,7 @@ summarise_transcript <- function( } # Interrogate the LLM - prompt_llm( + llmR::prompt_llm( c( system = get_prompts("persona"), user = prompt), @@ -280,7 +280,7 @@ summarise_transcript <- function( args = args ) - prompt_llm( + llmR::prompt_llm( c( system = get_prompts("persona"), user = aggregation_prompt), @@ -328,8 +328,8 @@ summarise_transcript <- function( #' `summarise_transcript` for more details and run `get_prompts()` to see the #' defaults. See `summarise_transcript` for more details. #' @param overwrite Whether to overwrite existing summaries. Default is FALSE. -#' @param ... Additional arguments passed to `prompt_llm` function, such as -#' the LLM provider. +#' @param ... Additional arguments passed to `llmR::prompt_llm` function, such +#' as the LLM provider. #' #' @return The result tree of the meeting summary. Also saves the results in the #' output file as side effect. @@ -503,7 +503,7 @@ summarise_full_meeting <- function( #' LLM context. #' @param output_file An optional file to save the results to. Default is NULL, #' i.e., the results are not saved to a file. -#' @param ... Additional arguments passed to the `prompt_llm` function. +#' @param ... Additional arguments passed to the `llmR::prompt_llm` function. #' Keep in consideration that this function needs LLMs that manages long #' context and that produce valid JSON outputs. The `force_json` argument is #' used with OpenAI based LLM but it's not accepted by other LLMs; therefore @@ -686,7 +686,7 @@ infer_agenda_from_transcript <- function( } # Attempt to interrogate the LLM - result_json <- try(prompt_llm( + result_json <- try(llmR::prompt_llm( prompt_set, ..., force_json = TRUE @@ -834,7 +834,7 @@ infer_agenda_from_transcript <- function( user = prompt ) - result_json <- prompt_llm( + result_json <- llmR::prompt_llm( prompt_set, ..., force_json = TRUE ) @@ -877,7 +877,8 @@ infer_agenda_from_transcript <- function( #' them. #' @param prompt_only If TRUE, only the prompt is returned, the LLM is not #' interrogated. Default is FALSE. -#' @param ... Additional arguments passed to the `prompt_llm` function. +#' @param ... Additional arguments passed to the `llmR::prompt_llm` +#' function. #' #' @return A vector with the entities found in the text. #' @@ -936,7 +937,7 @@ entity_extractor <- function( return(task) } - prompt_llm( + llmR::prompt_llm( c("system" = get_prompts("persona"), "user" = task), force_json = TRUE, ...) |> jsonlite::fromJSON() |> diff --git a/R/utils.R b/R/utils.R index c054efb..614a501 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,40 +1,3 @@ -#' Check and install dependencies -#' -#' Checks if a list of packages are installed and installs them if not. Trigger -#' an error if the user chooses not to install a package. -#' -#' @param deps A character vector of package names. -#' -#' @return Nothing. -#' -check_and_install_dependencies <- function(deps) { - for (dep in deps) { - stop_message <- paste0(dep, " is required but was not installed.") - # Check if the package is installed - is_installed <- requireNamespace(dep, quietly = TRUE) - - if (!is_installed) { - # If not, ask the user if they want to install it - if (interactive()) { - # Only in interactive sessions, otherwise just stop - do_install <- utils::menu( - c("Yes", "No"), - title = paste0(dep, " is not installed. Install it now?")) == 1 - - if(do_install) { - try({ - utils::install.packages(dep) - # After successful installation, recheck if the package is now installed - is_installed <- requireNamespace(dep, quietly = FALSE) - }) - } - } - } - - # Stop if the package is not installed - if (!is_installed) stop(stop_message) - } -} #' Check if transcript segments are silent #' diff --git a/README.Rmd b/README.Rmd index 203c9ee..dd18763 100644 --- a/README.Rmd +++ b/README.Rmd @@ -58,63 +58,64 @@ Here is an example workflow. ### Setting up the package -You need to set up the API keys for the speech-to-text and text summarization -APIs. You can do this by setting the following options: +You need to set up the infrastructure for the speech-to-text and text +summarization APIs. +The LLM-powered summarization requires the `bakaburg1/llmR` package which is +installed (from GitHub) together with `minutemaker`. -```{r, eval = FALSE} +You can do this by setting the following options: -# Load the package +```{r, eval = FALSE} +# Load the necessary packages library(minutemaker) -# Set the API information for the speech-to-text API of choice - -# OpenAI example +# Set up LLM model of choice using llmR functions -options( - - # OpenAI API Key (for both text-to-speech and text summary) - minutemaker_openai_api_key = "***", - - minutemaker_openai_model_gpt = "gpt-4" +# Example: Setting up OpenAI GPT-4 model +llmR::record_llmr_model( + label = "openai", + provider = "openai", + model = "gpt-4", + api_key = "your_openai_api_key" ) -# Azure example - -options( - - # Azure Whisper API (for text-to-speech) - minutemaker_azure_resource_whisper = "***", - minutemaker_azure_deployment_whisper = "***", - minutemaker_azure_api_key_whisper = "***", - - # Azure GPT API (for text summary) - minutemaker_azure_resource_gpt = "***", - minutemaker_azure_deployment_gpt = "***", - minutemaker_azure_api_key_gpt = "***", - - # Azure common parameters (common) - minutemaker_azure_api_version = "***" +# Example: Setting up Azure GPT model +llmR::record_llmr_model( + label = "azure_gpt", + provider = "azure", + model = "your_azure_deployment_id", + endpoint = "https://your-resource-name.openai.azure.com", + api_key = "your_azure_api_key", + api_version = "2024-06-01" ) -# Local GPT model example - -options( - # Local LLM model (for text summary) - minutemaker_local_endpoint_gpt = "local-host-path-to-model" -) +# Set the preferred LLM globally using one of the labels defined above +llmR::set_llmr_model("openai") -# Set the preferred LLM globally +# Set up the speech-to-text (STT) options options( - minutemaker_llm_provider = "***" # E.g. "openai", "azure", "local" or custom + # Choose the STT model among online models: "azure_whisper" or "openai_whisper" + # or local models: "whisper_local", "mlx_whisper_local" (python based), + # (use "mlx_whisper_local" for Apple Silicon) + # "whisper_ctranslate2" (cli based, install ctranslate2 separately) + minutemaker_stt_model = "whisper_local", + + # OpenAI Whisper API Key (for remote OpenAI whisper model) + minutemaker_openai_api_key = "your_openai_api_key", + + # Azure Whisper API credentials (for remote Azure whisper model) + minutemaker_azure_resource_whisper = "your_azure_resource_name", + minutemaker_azure_deployment_whisper = "your_azure_deployment_id", + minutemaker_azure_api_key_whisper = "your_azure_api_key", + minutemaker_azure_api_version = "2024-06-01" ) - ``` -These setting can be also passed manually to the various functions, but the -option system is more convenient. -Of course you just need to set the options for the APIs you want to use (e.g., -you don't need the speech-to-text API options if you already have a transcript). +These settings can also be passed manually to the various functions, but the +option system is more convenient. You only need to set the options for the APIs +you want to use (e.g., you don't need the speech-to-text API options if you +already have a transcript). ### Transcribing the audio diff --git a/README.md b/README.md index 4b4a49a..a854f82 100644 --- a/README.md +++ b/README.md @@ -51,62 +51,64 @@ Here is an example workflow. ### Setting up the package -You need to set up the API keys for the speech-to-text and text -summarization APIs. You can do this by setting the following options: +You need to set up the infrastructure for the speech-to-text and text +summarization APIs. The LLM-powered summarization requires the +`bakaburg1/llmR` package which is installed (from GitHub) together with +`minutemaker`. -``` r +You can do this by setting the following options: -# Load the package +``` r +# Load the necessary packages library(minutemaker) -# Set the API information for the speech-to-text API of choice +# Set up LLM model of choice using llmR functions -# OpenAI example - -options( - - # OpenAI API Key (for both text-to-speech and text summary) - minutemaker_openai_api_key = "***", - - minutemaker_openai_model_gpt = "gpt-4" +# Example: Setting up OpenAI GPT-4 model +llmR::record_llmr_model( + label = "openai", + provider = "openai", + model = "gpt-4", + api_key = "your_openai_api_key" ) -# Azure example - -options( - - # Azure Whisper API (for text-to-speech) - minutemaker_azure_resource_whisper = "***", - minutemaker_azure_deployment_whisper = "***", - minutemaker_azure_api_key_whisper = "***", - - # Azure GPT API (for text summary) - minutemaker_azure_resource_gpt = "***", - minutemaker_azure_deployment_gpt = "***", - minutemaker_azure_api_key_gpt = "***", - - # Azure common parameters (common) - minutemaker_azure_api_version = "***" +# Example: Setting up Azure GPT model +llmR::record_llmr_model( + label = "azure_gpt", + provider = "azure", + model = "your_azure_deployment_id", + endpoint = "https://your-resource-name.openai.azure.com", + api_key = "your_azure_api_key", + api_version = "2024-06-01" ) -# Local GPT model example - -options( - # Local LLM model (for text summary) - minutemaker_local_endpoint_gpt = "local-host-path-to-model" -) +# Set the preferred LLM globally using one of the labels defined above +llmR::set_llmr_model("openai") -# Set the preferred LLM globally +# Set up the speech-to-text (STT) options options( - minutemaker_llm_provider = "***" # E.g. "openai", "azure", "local" or custom + # Choose the STT model among online models: "azure_whisper" or "openai_whisper" + # or local models: "whisper_local", "mlx_whisper_local" (python based), + # (use "mlx_whisper_local" for Apple Silicon) + # "whisper_ctranslate2" (cli based, install ctranslate2 separately) + minutemaker_stt_model = "whisper_local", + + # OpenAI Whisper API Key (for remote OpenAI whisper model) + minutemaker_openai_api_key = "your_openai_api_key", + + # Azure Whisper API credentials (for remote Azure whisper model) + minutemaker_azure_resource_whisper = "your_azure_resource_name", + minutemaker_azure_deployment_whisper = "your_azure_deployment_id", + minutemaker_azure_api_key_whisper = "your_azure_api_key", + minutemaker_azure_api_version = "2024-06-01" ) ``` -These setting can be also passed manually to the various functions, but -the option system is more convenient. Of course you just need to set the -options for the APIs you want to use (e.g., you don’t need the -speech-to-text API options if you already have a transcript). +These settings can also be passed manually to the various functions, but +the option system is more convenient. You only need to set the options +for the APIs you want to use (e.g., you don’t need the speech-to-text +API options if you already have a transcript). ### Transcribing the audio diff --git a/man/check_and_install_dependencies.Rd b/man/check_and_install_dependencies.Rd deleted file mode 100644 index 27e5eb0..0000000 --- a/man/check_and_install_dependencies.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{check_and_install_dependencies} -\alias{check_and_install_dependencies} -\title{Check and install dependencies} -\usage{ -check_and_install_dependencies(deps) -} -\arguments{ -\item{deps}{A character vector of package names.} -} -\value{ -Nothing. -} -\description{ -Checks if a list of packages are installed and installs them if not. Trigger -an error if the user chooses not to install a package. -} diff --git a/man/entity_extractor.Rd b/man/entity_extractor.Rd index 07fc1be..3026ab9 100644 --- a/man/entity_extractor.Rd +++ b/man/entity_extractor.Rd @@ -21,7 +21,8 @@ them.} \item{prompt_only}{If TRUE, only the prompt is returned, the LLM is not interrogated. Default is FALSE.} -\item{...}{Additional arguments passed to the \code{prompt_llm} function.} +\item{...}{Additional arguments passed to the \code{llmR::prompt_llm} +function.} } \value{ A vector with the entities found in the text. diff --git a/man/infer_agenda_from_transcript.Rd b/man/infer_agenda_from_transcript.Rd index e7e70a2..58dd95b 100644 --- a/man/infer_agenda_from_transcript.Rd +++ b/man/infer_agenda_from_transcript.Rd @@ -49,7 +49,7 @@ LLM context.} \item{output_file}{An optional file to save the results to. Default is NULL, i.e., the results are not saved to a file.} -\item{...}{Additional arguments passed to the \code{prompt_llm} function. +\item{...}{Additional arguments passed to the \code{llmR::prompt_llm} function. Keep in consideration that this function needs LLMs that manages long context and that produce valid JSON outputs. The \code{force_json} argument is used with OpenAI based LLM but it's not accepted by other LLMs; therefore diff --git a/man/perform_speech_to_text.Rd b/man/perform_speech_to_text.Rd index f8cd860..32a8e45 100644 --- a/man/perform_speech_to_text.Rd +++ b/man/perform_speech_to_text.Rd @@ -7,7 +7,7 @@ perform_speech_to_text( audio_path, output_dir = file.path(dirname(audio_path), "transcription_output_data"), - model, + model = getOption("minutemaker_stt_model", "whisper_local"), initial_prompt = NULL, overwrite = FALSE, language = "en", diff --git a/man/process_messages.Rd b/man/process_messages.Rd deleted file mode 100644 index 8478326..0000000 --- a/man/process_messages.Rd +++ /dev/null @@ -1,43 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/LLM_calls.R -\name{process_messages} -\alias{process_messages} -\title{Process chat message into standard format} -\usage{ -process_messages(messages) -} -\arguments{ -\item{messages}{A character vector or list of chat messages. In can be a -vector, a specifically structured list or a list of both if the goal is the -have the API process multiple messages at once.} -} -\value{ -A list of chat messages in standard format. -} -\description{ -This function takes one or more (a list of) chat messages and processes them -into a standard list format with role and content for each message to be fed -to a large language model. -} -\details{ -The standard format is a list of chat messages with the following structure: -message: \code{c(role = "system", content = "Welcome to the chat!")} -list of messages: \code{list( - c(role = "system", content = "You are an useful AI assistant."), - c(role = "user", content = "Hi there!") - )} -list format: \code{list( - list(role = "system", content = "You are an useful AI assistant."), - list(role = "user", content = "Hi there!") - )} -list of lists format: \code{list( - list( - list(role = "system", content = "You are an useful AI assistant."), - list(role = "user", content = "Hi there!") - ), - list( - list(role = "system", content = "You are an useful AI assistant."), - list(role = "user", content = "Hi there!") - ) - )} -} diff --git a/man/prompt_llm.Rd b/man/prompt_llm.Rd deleted file mode 100644 index 471cb35..0000000 --- a/man/prompt_llm.Rd +++ /dev/null @@ -1,55 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/LLM_calls.R -\name{prompt_llm} -\alias{prompt_llm} -\title{Interrogate a Language Model} -\usage{ -prompt_llm( - messages = NULL, - provider = getOption("minutemaker_llm_provider"), - params = list(temperature = 0), - force_json = FALSE, - log_request = getOption("minutemaker_log_requests", TRUE), - ... -) -} -\arguments{ -\item{messages}{Messages to be sent to the language model.} - -\item{provider}{The provider of the language model. Defaults to "openai". -Other options are "azure" and "local".} - -\item{params}{Additional parameters for the language model request. Defaults -to a list with \code{temperature = 0}.} - -\item{force_json}{A boolean to force the response in JSON format. Default is -FALSE. Works only for OpenAI and Azure endpoints.} - -\item{log_request}{A boolean to log the request time. Can be set up globally -using the \code{minutemaker_log_requests} option, which defaults to TRUE.} - -\item{...}{Additional arguments passed to the language model provider -functions.} -} -\value{ -Returns the content of the message from the language model response. -} -\description{ -This function sends requests to a specified language model provider (OpenAI, -Azure, or a locally running LLM server) and returns the response. It handles -rate limiting and retries the request if necessary, and also processes errors -in the response. -} -\details{ -Users can provide their own models by writing a function with the following -name pattern: \verb{use__llm}. See the existing functions using the -::: operator for examples. -} -\examples{ -\dontrun{ -response <- prompt_llm( - messages = c(user = "Hello there!"), - provider = "openai") - } - -} diff --git a/man/speech_to_summary_workflow.Rd b/man/speech_to_summary_workflow.Rd index 2700ffd..648393f 100644 --- a/man/speech_to_summary_workflow.Rd +++ b/man/speech_to_summary_workflow.Rd @@ -39,7 +39,7 @@ speech_to_summary_workflow( summary_structure = get_prompts("summary_structure"), extra_diarization_instructions = NULL, extra_output_instructions = NULL, - llm_provider = getOption("minutemaker_llm_provider"), + llm_provider = getOption("llmr_llm_provider"), extra_summarise_args = NULL, summarization_window_size = 15, summarization_output_length = if (isTRUE(multipart_summary)) 1 else 3, @@ -184,7 +184,7 @@ defaults. See \code{summarise_transcript} for more details.} summarization. See \code{summarise_transcript} for more details.} \item{extra_summarise_args}{Additional arguments passed to the -\code{prompt_llm} function. See \code{summarise_transcript} for more details.} +\code{llmR::prompt_llm} function. See \code{summarise_transcript} for more details.} \item{summarization_window_size}{The size of the summarization window in minutes if the "rolling" method is used. See \code{summarise_transcript} for diff --git a/man/summarise_full_meeting.Rd b/man/summarise_full_meeting.Rd index e2ce52c..59d1e2b 100644 --- a/man/summarise_full_meeting.Rd +++ b/man/summarise_full_meeting.Rd @@ -68,8 +68,8 @@ defaults. See \code{summarise_transcript} for more details.} \item{overwrite}{Whether to overwrite existing summaries. Default is FALSE.} -\item{...}{Additional arguments passed to \code{prompt_llm} function, such as -the LLM provider.} +\item{...}{Additional arguments passed to \code{llmR::prompt_llm} function, such +as the LLM provider.} } \value{ The result tree of the meeting summary. Also saves the results in the diff --git a/man/summarise_transcript.Rd b/man/summarise_transcript.Rd index f489344..0f23e2e 100644 --- a/man/summarise_transcript.Rd +++ b/man/summarise_transcript.Rd @@ -71,7 +71,7 @@ get_prompts("output_rolling_aggregation") prompts depending on the task.} \item{prompt_only}{If TRUE, only the prompt is returned, the LLM is not interrogated. Default is FALSE.} -\item{...}{Additional arguments passed to the \code{prompt_llm} function, +\item{...}{Additional arguments passed to the \code{llmR::prompt_llm} function, such as the LLM provider.} } \value{ diff --git a/man/use_azure_llm.Rd b/man/use_azure_llm.Rd deleted file mode 100644 index 17a2207..0000000 --- a/man/use_azure_llm.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/LLM_calls.R -\name{use_azure_llm} -\alias{use_azure_llm} -\title{Use Azure Language Model} -\usage{ -use_azure_llm( - body, - deployment_id = getOption("minutemaker_azure_deployment_gpt"), - resource_name = getOption("minutemaker_azure_resource_gpt"), - api_key = getOption("minutemaker_azure_api_key_gpt"), - api_version = getOption("minutemaker_azure_api_version"), - log_request = getOption("minutemaker_log_requests", TRUE) -) -} -\arguments{ -\item{body}{The body of the request.} - -\item{deployment_id}{Azure deployment ID for the language model. Obtained -from R options.} - -\item{resource_name}{Azure resource name. Obtained from R options.} - -\item{api_key}{API key for the Azure language model service. Obtained from R -options.} - -\item{api_version}{API version for the Azure language model service. Obtained -from R options.} - -\item{log_request}{A boolean to log the request time. Can be set up globally -using the \code{minutemaker_log_requests} option, which defaults to TRUE.} -} -\value{ -The function returns the response from the Azure API. -} -\description{ -Sends a request to the Azure API for language model completions using the -parameters in the \code{body} argument. This function requires specific Azure -configurations (deployment ID, resource name, API key, and API version) set -in the R options. -} diff --git a/man/use_custom_llm.Rd b/man/use_custom_llm.Rd deleted file mode 100644 index 71845b7..0000000 --- a/man/use_custom_llm.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/LLM_calls.R -\name{use_custom_llm} -\alias{use_custom_llm} -\title{Use Custom Language Model} -\usage{ -use_custom_llm( - body, - endpoint = getOption("minutemaker_custom_endpoint_gpt"), - model = getOption("minutemaker_custom_model_gpt", NULL), - api_key = getOption("minutemaker_custom_api_key"), - log_request = getOption("minutemaker_log_requests", TRUE) -) -} -\arguments{ -\item{body}{The body of the request.} - -\item{endpoint}{The local endpoint for the language model service. Can be -obtained from R options.} - -\item{model}{Model identifier for the custom API, if needed (some API have -one model per endpoint, some multiple ones). Obtained from R options.} - -\item{api_key}{Optional API key for the custom language model services that -require it. Obtained from R options.} - -\item{log_request}{A boolean to log the request time. Can be set up globally -using the \code{minutemaker_log_requests} option, which defaults to TRUE.} -} -\value{ -The function returns the response from the local language model -endpoint. -} -\description{ -Sends a request to a custom (local or remote) language model endpoint -compatible with the OpenAi API specification, using the parameters in the -\code{body} argument. The user can provide an API key if required. -} diff --git a/man/use_mlx_whisper_local_stt.Rd b/man/use_mlx_whisper_local_stt.Rd new file mode 100644 index 0000000..0f6967a --- /dev/null +++ b/man/use_mlx_whisper_local_stt.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/speech_to_text.R +\name{use_mlx_whisper_local_stt} +\alias{use_mlx_whisper_local_stt} +\title{Use MLX Whisper Local Model for Speech-to-Text (Mac OS only)} +\usage{ +use_mlx_whisper_local_stt( + audio_file, + language = "en", + initial_prompt = "", + model = "mlx-community/distil-whisper-large-v3", + whisper_package = getOption("minutemaker_whisper_package", "mlx_whisper") +) +} +\arguments{ +\item{audio_file}{The path to the audio file to transcribe.} + +\item{language}{The language of the input audio. Default is "en" for English. +If NULL, Whisper will attempt to detect the language.} + +\item{initial_prompt}{Text to guide the model's style or continue a previous +segment.} + +\item{model}{The MLX Whisper model to use. Default is +"mlx-community/whisper-large-v3-turbo".} + +\item{whisper_package}{The Python package to use for MLX Whisper (default: +"mlx_whisper").} +} +\value{ +A list with the full transcript and the transcription by segments. +} +\description{ +This function uses a local MLX Whisper model via Python with reticulate to +transcribe audio. It is specifically designed to work with the MLX Whisper +package. MLX allows faster inference on Mac OS with Apple Silicon. +} diff --git a/man/use_openai_llm.Rd b/man/use_openai_llm.Rd deleted file mode 100644 index 2833e3b..0000000 --- a/man/use_openai_llm.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/LLM_calls.R -\name{use_openai_llm} -\alias{use_openai_llm} -\title{Use OpenAI Language Model} -\usage{ -use_openai_llm( - body, - model = getOption("minutemaker_openai_model_gpt"), - api_key = getOption("minutemaker_openai_api_key"), - log_request = getOption("minutemaker_log_requests", TRUE) -) -} -\arguments{ -\item{body}{The body of the request.} - -\item{model}{Model identifier for the OpenAI API. Obtained from R options.} - -\item{api_key}{API key for the OpenAI service. Obtained from R options.} - -\item{log_request}{A boolean to log the request time. Can be set up globally -using the \code{minutemaker_log_requests} option, which defaults to TRUE.} -} -\value{ -The function returns the response from the OpenAI API. -} -\description{ -Sends a request to the OpenAI API using the parameters in the \code{body} -argument. It requires an API key and model identifier set in the R options. -} diff --git a/man/use_whisper_local_stt.Rd b/man/use_whisper_local_stt.Rd new file mode 100644 index 0000000..0d84ff7 --- /dev/null +++ b/man/use_whisper_local_stt.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/speech_to_text.R +\name{use_whisper_local_stt} +\alias{use_whisper_local_stt} +\title{Use Local Whisper Model for Speech-to-Text} +\usage{ +use_whisper_local_stt( + audio_file, + language = "en", + initial_prompt = "", + model = "turbo", + whisper_package = getOption("minutemaker_whisper_package", "openai-whisper") +) +} +\arguments{ +\item{audio_file}{The path to the audio file to transcribe.} + +\item{language}{The language of the input audio. Default is "en" for English. +If NULL, Whisper will attempt to detect the language.} + +\item{initial_prompt}{Text to guide the model's style or continue a previous +segment.} + +\item{model}{The Whisper model to use. Default is "turbo". Check +https://github.com/openai/whisper for other available models.} + +\item{whisper_package}{The Python package to use for Whisper (default: +"openai-whisper").} +} +\value{ +A list with the full transcript and the transcription by segments. +} +\description{ +This function uses a local Whisper model via Python with reticulate to +transcribe audio. It can use the official OpenAI Whisper package or any +compatible Python package. +} diff --git a/renv.lock b/renv.lock index 1abe148..e51bcc0 100644 --- a/renv.lock +++ b/renv.lock @@ -1,6 +1,6 @@ { "R": { - "Version": "4.4.0", + "Version": "4.4.1", "Repositories": [ { "Name": "CRAN", @@ -424,14 +424,14 @@ }, "rlang": { "Package": "rlang", - "Version": "1.1.3", + "Version": "1.1.4", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Requirements": [ "R", "utils" ], - "Hash": "42548638fae05fd9a9b5f3f437fbbbe2" + "Hash": "3eec01f8b1dee337674b2e34ab1f9bc1" }, "rprojroot": { "Package": "rprojroot",