From 98c7ba608424e05f060c907d4f79428f7c3a4a31 Mon Sep 17 00:00:00 2001 From: Angelo D'Ambrosio Date: Fri, 2 Feb 2024 18:45:10 +0100 Subject: [PATCH 1/3] fix: better detection of valid file arguments NAs were being accepted as file path for chat and transcripts to be merged. Plus, some reindentation --- R/data_management.R | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/R/data_management.R b/R/data_management.R index 4e9d877..030e70f 100644 --- a/R/data_management.R +++ b/R/data_management.R @@ -452,12 +452,12 @@ convert_agenda_times <- function( } else if (convert_to == "clocktime"){ if (is.numeric(agenda[[i]][[time]])) { - # Convert to clock time - cur_time <- agenda[[i]][[time]] + # Convert to clock time + cur_time <- agenda[[i]][[time]] - agenda[[i]][[time]] <- ( - event_start_time + - lubridate::seconds_to_period(cur_time)) + agenda[[i]][[time]] <- ( + event_start_time + + lubridate::seconds_to_period(cur_time)) } else { # Allow users to change the format agenda[[i]][[time]] <- parse_event_time(agenda[[i]][[time]]) @@ -634,7 +634,7 @@ validate_agenda_element <- function( } if (!is.numeric(agenda_element[[time]]) && - is.na(parse_event_time(agenda_element[[time]])) + is.na(parse_event_time(agenda_element[[time]])) ) { stop("Agenda element \"", time, "\" time not interpretable: ", agenda_element[[time]]) @@ -1241,14 +1241,14 @@ speech_to_summary_workflow <- function( event_start_time = event_start_time) # Merge transcripts - if (!is.null(transcript_to_merge)) { + if (!purrr::is_empty(transcript_to_merge) && !is.na(transcript_to_merge)) { + + message("\n### Merging transcripts...\n ") if (!file.exists(transcript_to_merge)) { stop("Transcript file to merge not valid.") } - message("\n### Merging transcripts...\n ") - # If the transcript to merge is a file path, load the data from the file if (is.character(transcript_to_merge)) { transcript_to_merge <- import_transcript_from_file( @@ -1264,7 +1264,8 @@ speech_to_summary_workflow <- function( } # Add chat transcript - if (!is.null(chat_file)) { + if (!purrr::is_empty(chat_file) && !is.na(chat_file)) { + message("\n### Adding chat transcript...\n") if (is.null(event_start_time)) { @@ -1294,7 +1295,8 @@ speech_to_summary_workflow <- function( ## Perform summarization ## # Agenda is not provided, ask whether to generate a default agenda - if (is.null(agenda) || (is.character(agenda) && !file.exists(agenda))) { + if (purrr::is_empty(agenda) || + (is.character(agenda) && !file.exists(agenda))) { cat("No agenda was provided or found in the target directory.\n") From 5c8f3205a1ce9e14dd257d02f77d209ca0d27d99 Mon Sep 17 00:00:00 2001 From: Angelo D'Ambrosio Date: Fri, 2 Feb 2024 20:13:58 +0100 Subject: [PATCH 2/3] check: agenda and summary tree consistency checks Also, move validation function in their own file --- R/data_management.R | 79 +--------- R/validation.R | 157 +++++++++++++++++++ man/check_agenda_summary_tree_consistency.Rd | 20 +++ man/check_summary_tree_consistency.Rd | 17 ++ man/validate_agenda_element.Rd | 2 +- 5 files changed, 198 insertions(+), 77 deletions(-) create mode 100644 R/validation.R create mode 100644 man/check_agenda_summary_tree_consistency.Rd create mode 100644 man/check_summary_tree_consistency.Rd diff --git a/R/data_management.R b/R/data_management.R index 030e70f..da02a67 100644 --- a/R/data_management.R +++ b/R/data_management.R @@ -510,6 +510,9 @@ format_summary_tree <- function( output_file = NULL ) { + # Check the consistency of the summary tree and the agenda + check_agenda_summary_tree_consistency(agenda, summary_tree) + # If summary_tree is a file path, load the data from the file if (is.character(summary_tree) && file.exists(summary_tree)) { summary_tree <- dget(summary_tree) @@ -582,83 +585,7 @@ format_summary_tree <- function( } -#' Validates an agenda element -#' -#' @param agenda_element A list containing the agenda elements. -#' @param session A boolean indicating whether the `session` item should be -#' present. -#' @param title A boolean indicating whether the `title` item should be present. -#' @param speakers A boolean indicating whether the `speakers` item should be -#' present. -#' @param moderators A boolean indicating whether the `moderators` item should -#' be present. -#' @param type A boolean indicating whether the `type` item should be present. -#' @param from A boolean indicating whether the `from` item should be present. -#' @param to A boolean indicating whether the `to` item should be present. -#' -#' @return A boolean indicating whether the agenda element is valid. -#' -validate_agenda_element <- function( - agenda_element, - session = FALSE, - title = FALSE, - speakers = FALSE, - moderators = FALSE, - type = FALSE, - from = FALSE, - to = FALSE -) { - - # Get the arguments as a list - args <- as.list(environment()) - - # Remove the 'agenda_element' argument from the list - args$agenda_element <- NULL - - # Check if the required items are present in the agenda element - is_valid <- purrr::imap_lgl(args, ~ { - !is.null(agenda_element[[.y]]) || isFALSE(.x) - }) |> all() - - if (isTRUE(from) || isTRUE(to)) { - - # Check if the times are interpretable - for (time in c("from", "to")) { - - if (!inherits(agenda_element[[time]], - c("numeric", "POSIXct", "character"))) { - stop(stringr::str_glue( - 'Agenda element "{time}" should be numeric, character or POSIXct,', - "but it's of class {class(agenda_element[[time]])}." - )) - } - - if (!is.numeric(agenda_element[[time]]) && - is.na(parse_event_time(agenda_element[[time]])) - ) { - stop("Agenda element \"", time, "\" time not interpretable: ", - agenda_element[[time]]) - } - } - if (class(agenda_element$from) != class(agenda_element$to)) { - stop("The agenda element times are not of the same class:", - " from: ", agenda_element$from, - " to: ", agenda_element$to) - } - - if ( - time_to_numeric(agenda_element$from) > time_to_numeric(agenda_element$to) - ) { - stop("Agenda element \"from\" time should preceed \"to\" time:", - " from: ", agenda_element$from, - " to: ", agenda_element$to) - } - } - - # Return the validation result - is_valid -} #' Import transcript from subtitle file #' diff --git a/R/validation.R b/R/validation.R new file mode 100644 index 0000000..31e1ffd --- /dev/null +++ b/R/validation.R @@ -0,0 +1,157 @@ +#' Validates an agenda element +#' +#' @param agenda_element A list containing the agenda elements. +#' @param session A boolean indicating whether the `session` item should be +#' present. +#' @param title A boolean indicating whether the `title` item should be present. +#' @param speakers A boolean indicating whether the `speakers` item should be +#' present. +#' @param moderators A boolean indicating whether the `moderators` item should +#' be present. +#' @param type A boolean indicating whether the `type` item should be present. +#' @param from A boolean indicating whether the `from` item should be present. +#' @param to A boolean indicating whether the `to` item should be present. +#' +#' @return A boolean indicating whether the agenda element is valid. +#' +validate_agenda_element <- function( + agenda_element, + session = FALSE, + title = FALSE, + speakers = FALSE, + moderators = FALSE, + type = FALSE, + from = FALSE, + to = FALSE +) { + + # Get the arguments as a list + args <- as.list(environment()) + + # Remove the 'agenda_element' argument from the list + args$agenda_element <- NULL + + # Check if the required items are present in the agenda element + is_valid <- purrr::imap_lgl(args, ~ { + !is.null(agenda_element[[.y]]) || isFALSE(.x) + }) |> all() + + if (isTRUE(from) || isTRUE(to)) { + + # Check if the times are interpretable + for (time in c("from", "to")) { + + if (!inherits(agenda_element[[time]], + c("numeric", "POSIXct", "character"))) { + stop(stringr::str_glue( + 'Agenda element "{time}" should be numeric, character or POSIXct,', + "but it's of class {class(agenda_element[[time]])}." + )) + } + + if (!is.numeric(agenda_element[[time]]) && + is.na(parse_event_time(agenda_element[[time]])) + ) { + stop("Agenda element \"", time, "\" time not interpretable: ", + agenda_element[[time]]) + } + } + + if (class(agenda_element$from) != class(agenda_element$to)) { + stop("The agenda element times are not of the same class:", + " from: ", agenda_element$from, + " to: ", agenda_element$to) + } + + if ( + time_to_numeric(agenda_element$from) > time_to_numeric(agenda_element$to) + ) { + stop("Agenda element \"from\" time should preceed \"to\" time:", + " from: ", agenda_element$from, + " to: ", agenda_element$to) + } + } + + # Return the validation result + is_valid +} + +#' Validate summary tree id consistency +#' +#' @param summary_tree A list containing the summary tree or a file path to it. +#' +#' @return Nothing, will throw an error if the summary tree is not consistent. +check_summary_tree_consistency <- function(summary_tree) { + + if (is.character(summary_tree)) { + summary_tree <- dget(summary_tree) + } + + if (length(summary_tree) == 0) { + stop("The summary tree is empty.") + } + + obs_ids <- names(summary_tree) + + titles <- purrr::map(summary_tree, \(x) x[["title"]]) + sessions <- purrr::map(summary_tree, \(x) { + if (!purrr::is_empty(x[["session"]])) { + x[["session"]] + } else { + x[["title"]] + } + }) + + exp_ids <- paste(sessions, titles, sep = "_") + + test <- all.equal(obs_ids, exp_ids) + + if (isTRUE(test)) { + return() + } + + stop("The summary tree is not consistent: ", test, ".\n", + purrr::map(seq_along(obs_ids), \(i) { + if (obs_ids[i] != exp_ids[i]) { + sprintf('"%s" != "%s"', obs_ids[i], exp_ids[i]) + } + }) |> purrr::compact() |> unlist() |> paste(collapse = "\n") + ) +} + +#' Check consistency of agenda and summary tree ids +#' +#' @param agenda A list containing the agenda or a file path to it. +#' @param summary_tree A list containing the summary tree or a file path to it. +#' +#' @return Nothing, will throw an error if the agenda and summary tree are not +#' consistent. +check_agenda_summary_tree_consistency <- function(agenda, summary_tree) { + + if (is.character(agenda)) { + agenda <- dget(agenda) + } + + if (is.character(summary_tree)) { + summary_tree <- dget(summary_tree) + } + + check_summary_tree_consistency(summary_tree) + + agenda_ids <- build_ids_from_agenda(agenda) + summary_ids <- names(summary_tree) + + test <- all.equal(agenda_ids, summary_ids) + + if (isTRUE(test)) { + return() + } + + stop("The agenda and summary tree are not consistent: ", test, ".\n", + purrr::map(seq_along(agenda_ids), \(i) { + if (!agenda_ids[i] %in% summary_ids[i]) { + sprintf('"%s" != "%s"', agenda_ids[i], summary_ids[i]) + } + }) |> purrr::compact() |> unlist() |> paste(collapse = "\n") + ) +} diff --git a/man/check_agenda_summary_tree_consistency.Rd b/man/check_agenda_summary_tree_consistency.Rd new file mode 100644 index 0000000..51ff513 --- /dev/null +++ b/man/check_agenda_summary_tree_consistency.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validation.R +\name{check_agenda_summary_tree_consistency} +\alias{check_agenda_summary_tree_consistency} +\title{Check consistency of agenda and summary tree ids} +\usage{ +check_agenda_summary_tree_consistency(agenda, summary_tree) +} +\arguments{ +\item{agenda}{A list containing the agenda or a file path to it.} + +\item{summary_tree}{A list containing the summary tree or a file path to it.} +} +\value{ +Nothing, will throw an error if the agenda and summary tree are not +consistent. +} +\description{ +Check consistency of agenda and summary tree ids +} diff --git a/man/check_summary_tree_consistency.Rd b/man/check_summary_tree_consistency.Rd new file mode 100644 index 0000000..8660b10 --- /dev/null +++ b/man/check_summary_tree_consistency.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validation.R +\name{check_summary_tree_consistency} +\alias{check_summary_tree_consistency} +\title{Validate summary tree id consistency} +\usage{ +check_summary_tree_consistency(summary_tree) +} +\arguments{ +\item{summary_tree}{A list containing the summary tree or a file path to it.} +} +\value{ +Nothing, will throw an error if the summary tree is not consistent. +} +\description{ +Validate summary tree id consistency +} diff --git a/man/validate_agenda_element.Rd b/man/validate_agenda_element.Rd index c7e202d..b7ad2bd 100644 --- a/man/validate_agenda_element.Rd +++ b/man/validate_agenda_element.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_management.R +% Please edit documentation in R/validation.R \name{validate_agenda_element} \alias{validate_agenda_element} \title{Validates an agenda element} From 9ab7fa98170cee92d1f97a18962af05197cb38a9 Mon Sep 17 00:00:00 2001 From: Angelo D'Ambrosio Date: Fri, 2 Feb 2024 20:14:40 +0100 Subject: [PATCH 3/3] Update version number to 0.5.3 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 55eec5f..efb17b3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: minutemaker Title: GenAI-based meeting and conferences minutes generator -Version: 0.5.2 +Version: 0.5.3 Authors@R: person("Angelo", "D'Ambrosio", , "a.dambrosioMD@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2045-5155"))