Skip to content

Commit

Permalink
Merge pull request #12 from bakaburg1/Dev
Browse files Browse the repository at this point in the history
Fix file argument validation and add summary tree and agenda consistency checks
  • Loading branch information
bakaburg1 authored Feb 2, 2024
2 parents 5f6fcef + 9ab7fa9 commit e38a23e
Show file tree
Hide file tree
Showing 6 changed files with 211 additions and 88 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: minutemaker
Title: GenAI-based meeting and conferences minutes generator
Version: 0.5.2
Version: 0.5.3
Authors@R:
person("Angelo", "D'Ambrosio", , "a.dambrosioMD@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-2045-5155"))
Expand Down
101 changes: 15 additions & 86 deletions R/data_management.R
Original file line number Diff line number Diff line change
Expand Up @@ -452,12 +452,12 @@ convert_agenda_times <- function(
} else if (convert_to == "clocktime"){

if (is.numeric(agenda[[i]][[time]])) {
# Convert to clock time
cur_time <- agenda[[i]][[time]]
# Convert to clock time
cur_time <- agenda[[i]][[time]]

agenda[[i]][[time]] <- (
event_start_time +
lubridate::seconds_to_period(cur_time))
agenda[[i]][[time]] <- (
event_start_time +
lubridate::seconds_to_period(cur_time))
} else {
# Allow users to change the format
agenda[[i]][[time]] <- parse_event_time(agenda[[i]][[time]])
Expand Down Expand Up @@ -510,6 +510,9 @@ format_summary_tree <- function(
output_file = NULL
) {

# Check the consistency of the summary tree and the agenda
check_agenda_summary_tree_consistency(agenda, summary_tree)

# If summary_tree is a file path, load the data from the file
if (is.character(summary_tree) && file.exists(summary_tree)) {
summary_tree <- dget(summary_tree)
Expand Down Expand Up @@ -582,83 +585,7 @@ format_summary_tree <- function(
}


#' Validates an agenda element
#'
#' @param agenda_element A list containing the agenda elements.
#' @param session A boolean indicating whether the `session` item should be
#' present.
#' @param title A boolean indicating whether the `title` item should be present.
#' @param speakers A boolean indicating whether the `speakers` item should be
#' present.
#' @param moderators A boolean indicating whether the `moderators` item should
#' be present.
#' @param type A boolean indicating whether the `type` item should be present.
#' @param from A boolean indicating whether the `from` item should be present.
#' @param to A boolean indicating whether the `to` item should be present.
#'
#' @return A boolean indicating whether the agenda element is valid.
#'
validate_agenda_element <- function(
agenda_element,
session = FALSE,
title = FALSE,
speakers = FALSE,
moderators = FALSE,
type = FALSE,
from = FALSE,
to = FALSE
) {

# Get the arguments as a list
args <- as.list(environment())

# Remove the 'agenda_element' argument from the list
args$agenda_element <- NULL

# Check if the required items are present in the agenda element
is_valid <- purrr::imap_lgl(args, ~ {
!is.null(agenda_element[[.y]]) || isFALSE(.x)
}) |> all()

if (isTRUE(from) || isTRUE(to)) {

# Check if the times are interpretable
for (time in c("from", "to")) {

if (!inherits(agenda_element[[time]],
c("numeric", "POSIXct", "character"))) {
stop(stringr::str_glue(
'Agenda element "{time}" should be numeric, character or POSIXct,',
"but it's of class {class(agenda_element[[time]])}."
))
}

if (!is.numeric(agenda_element[[time]]) &&
is.na(parse_event_time(agenda_element[[time]]))
) {
stop("Agenda element \"", time, "\" time not interpretable: ",
agenda_element[[time]])
}
}

if (class(agenda_element$from) != class(agenda_element$to)) {
stop("The agenda element times are not of the same class:",
" from: ", agenda_element$from,
" to: ", agenda_element$to)
}

if (
time_to_numeric(agenda_element$from) > time_to_numeric(agenda_element$to)
) {
stop("Agenda element \"from\" time should preceed \"to\" time:",
" from: ", agenda_element$from,
" to: ", agenda_element$to)
}
}

# Return the validation result
is_valid
}

#' Import transcript from subtitle file
#'
Expand Down Expand Up @@ -1241,14 +1168,14 @@ speech_to_summary_workflow <- function(
event_start_time = event_start_time)

# Merge transcripts
if (!is.null(transcript_to_merge)) {
if (!purrr::is_empty(transcript_to_merge) && !is.na(transcript_to_merge)) {

message("\n### Merging transcripts...\n ")

if (!file.exists(transcript_to_merge)) {
stop("Transcript file to merge not valid.")
}

message("\n### Merging transcripts...\n ")

# If the transcript to merge is a file path, load the data from the file
if (is.character(transcript_to_merge)) {
transcript_to_merge <- import_transcript_from_file(
Expand All @@ -1264,7 +1191,8 @@ speech_to_summary_workflow <- function(
}

# Add chat transcript
if (!is.null(chat_file)) {
if (!purrr::is_empty(chat_file) && !is.na(chat_file)) {

message("\n### Adding chat transcript...\n")

if (is.null(event_start_time)) {
Expand Down Expand Up @@ -1294,7 +1222,8 @@ speech_to_summary_workflow <- function(
## Perform summarization ##

# Agenda is not provided, ask whether to generate a default agenda
if (is.null(agenda) || (is.character(agenda) && !file.exists(agenda))) {
if (purrr::is_empty(agenda) ||
(is.character(agenda) && !file.exists(agenda))) {

cat("No agenda was provided or found in the target directory.\n")

Expand Down
157 changes: 157 additions & 0 deletions R/validation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
#' Validates an agenda element
#'
#' @param agenda_element A list containing the agenda elements.
#' @param session A boolean indicating whether the `session` item should be
#' present.
#' @param title A boolean indicating whether the `title` item should be present.
#' @param speakers A boolean indicating whether the `speakers` item should be
#' present.
#' @param moderators A boolean indicating whether the `moderators` item should
#' be present.
#' @param type A boolean indicating whether the `type` item should be present.
#' @param from A boolean indicating whether the `from` item should be present.
#' @param to A boolean indicating whether the `to` item should be present.
#'
#' @return A boolean indicating whether the agenda element is valid.
#'
validate_agenda_element <- function(
agenda_element,
session = FALSE,
title = FALSE,
speakers = FALSE,
moderators = FALSE,
type = FALSE,
from = FALSE,
to = FALSE
) {

# Get the arguments as a list
args <- as.list(environment())

# Remove the 'agenda_element' argument from the list
args$agenda_element <- NULL

# Check if the required items are present in the agenda element
is_valid <- purrr::imap_lgl(args, ~ {
!is.null(agenda_element[[.y]]) || isFALSE(.x)
}) |> all()

if (isTRUE(from) || isTRUE(to)) {

# Check if the times are interpretable
for (time in c("from", "to")) {

if (!inherits(agenda_element[[time]],
c("numeric", "POSIXct", "character"))) {
stop(stringr::str_glue(
'Agenda element "{time}" should be numeric, character or POSIXct,',
"but it's of class {class(agenda_element[[time]])}."
))
}

if (!is.numeric(agenda_element[[time]]) &&
is.na(parse_event_time(agenda_element[[time]]))
) {
stop("Agenda element \"", time, "\" time not interpretable: ",
agenda_element[[time]])
}
}

if (class(agenda_element$from) != class(agenda_element$to)) {
stop("The agenda element times are not of the same class:",
" from: ", agenda_element$from,
" to: ", agenda_element$to)
}

if (
time_to_numeric(agenda_element$from) > time_to_numeric(agenda_element$to)
) {
stop("Agenda element \"from\" time should preceed \"to\" time:",
" from: ", agenda_element$from,
" to: ", agenda_element$to)
}
}

# Return the validation result
is_valid
}

#' Validate summary tree id consistency
#'
#' @param summary_tree A list containing the summary tree or a file path to it.
#'
#' @return Nothing, will throw an error if the summary tree is not consistent.
check_summary_tree_consistency <- function(summary_tree) {

if (is.character(summary_tree)) {
summary_tree <- dget(summary_tree)
}

if (length(summary_tree) == 0) {
stop("The summary tree is empty.")
}

obs_ids <- names(summary_tree)

titles <- purrr::map(summary_tree, \(x) x[["title"]])
sessions <- purrr::map(summary_tree, \(x) {
if (!purrr::is_empty(x[["session"]])) {
x[["session"]]
} else {
x[["title"]]
}
})

exp_ids <- paste(sessions, titles, sep = "_")

test <- all.equal(obs_ids, exp_ids)

if (isTRUE(test)) {
return()
}

stop("The summary tree is not consistent: ", test, ".\n",
purrr::map(seq_along(obs_ids), \(i) {
if (obs_ids[i] != exp_ids[i]) {
sprintf('"%s" != "%s"', obs_ids[i], exp_ids[i])
}
}) |> purrr::compact() |> unlist() |> paste(collapse = "\n")
)
}

#' Check consistency of agenda and summary tree ids
#'
#' @param agenda A list containing the agenda or a file path to it.
#' @param summary_tree A list containing the summary tree or a file path to it.
#'
#' @return Nothing, will throw an error if the agenda and summary tree are not
#' consistent.
check_agenda_summary_tree_consistency <- function(agenda, summary_tree) {

if (is.character(agenda)) {
agenda <- dget(agenda)
}

if (is.character(summary_tree)) {
summary_tree <- dget(summary_tree)
}

check_summary_tree_consistency(summary_tree)

agenda_ids <- build_ids_from_agenda(agenda)
summary_ids <- names(summary_tree)

test <- all.equal(agenda_ids, summary_ids)

if (isTRUE(test)) {
return()
}

stop("The agenda and summary tree are not consistent: ", test, ".\n",
purrr::map(seq_along(agenda_ids), \(i) {
if (!agenda_ids[i] %in% summary_ids[i]) {
sprintf('"%s" != "%s"', agenda_ids[i], summary_ids[i])
}
}) |> purrr::compact() |> unlist() |> paste(collapse = "\n")
)
}
20 changes: 20 additions & 0 deletions man/check_agenda_summary_tree_consistency.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/check_summary_tree_consistency.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/validate_agenda_element.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit e38a23e

Please # to comment.