diff --git a/DESCRIPTION b/DESCRIPTION index 3bf4ae9..583d739 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: textplot Type: Package Title: Text Plots -Version: 0.2.0 +Version: 0.2.1 Maintainer: Jan Wijffels Authors@R: c( person('Jan', 'Wijffels', role = c('aut', 'cre', 'cph'), email = 'jwijffels@bnosac.be'), diff --git a/NEWS.md b/NEWS.md index f0f8ded..b1103d8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +## CHANGES IN textplot VERSION 0.2.1 + +- Fix bug in textplot_bitermclusters.default (and plot.BTM as it uses textplot_bitermclusters.default) which + unintentionally left the option open that if a biterm which was assigned to several topics it could be shown in one of the 2 topics by chance depending on the order of the biterms data. This could only occur if the most emitted words by each of these 2 topics were the same. See issue #7. + ## CHANGES IN textplot VERSION 0.2.0 - Added textplot_embedding_2d diff --git a/R/textplot_biterms.R b/R/textplot_biterms.R index 80efe65..ef5fd80 100644 --- a/R/textplot_biterms.R +++ b/R/textplot_biterms.R @@ -227,7 +227,7 @@ textplot_bitermclusters.default <- function(x, biterms, if(!missing(which)){ biterms <- biterms[biterms$topic %in% which, ] } - biterms <- biterms[, topic_freq := .N, by = list(term1, term2)] + biterms <- biterms[, topic_freq := .N, by = list(term1, term2, topic)] biterms <- biterms[, list(best_topic = topic[which.max(topic_freq)], cooc = .N), by = list(term1, term2)] # biterms <- biterms[, list(best_topic = utils::head(base::names(base::sort(base::table(topic), decreasing = TRUE)), 1), # cooc = .N), by = list(term1, term2)]