From 741911c8ad4bbf20ef39d1e5590cc6f8c6dbac49 Mon Sep 17 00:00:00 2001 From: "Brian P. Walenz" Date: Tue, 3 Aug 2021 06:12:20 -0400 Subject: [PATCH] Change default corErrorRate from 50 to 30 (nanopore) and 30 to 25 (pacbio). --- documentation/source/parameter-reference.rst | 59 +++++++++++++++++++- src/pipelines/canu.pl | 4 +- 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/documentation/source/parameter-reference.rst b/documentation/source/parameter-reference.rst index 55e810474..222c2dca6 100644 --- a/documentation/source/parameter-reference.rst +++ b/documentation/source/parameter-reference.rst @@ -914,7 +914,64 @@ assemble all input data, at the expense of runtime. .. _corErrorRate: corErrorRate - Do not use overlaps with error rate higher than this (estimated error rate for `mhap` and `minimap` overlaps). + Do not use overlaps with error rate higher than this when computing corrected reads. + + In Canu v2.2, this parameter was changed from 0.50 (for -nanopore) and 0.30 + (for -pacbio) to 0.30 and 0.25, respectively. + + The tables below show a significant speedup for Nanopore reads without much + loss in output quantity. There is indication of a slight improvement in + corrected read quality at lower corErrorRate, however, read quality was not + directly evaluated. + + For PacBio reads, with a smaller change in corErrorRate, the speedup is + about 10%. + + +-----------------+--------------------+------------------+-------------------+------------+ + | CHM13 Chromosome X, nanopore, 105x input coverage | + +-----------------+--------------------+------------------+-------------------+------------+ + | | | | Corrected | | Trimmed | | Bogart | | CPU Time | + | | corErrorRate | | Coverage | | Coverage | | Error Rate | | (hours) | + +=================+====================+==================+===================+============+ + | 5 | 22.0x | 21.8x | 0.3958% | | + +-----------------+--------------------+------------------+-------------------+------------+ + | 10 | 35.7x | 35.2x | | | + +-----------------+--------------------+------------------+-------------------+------------+ + | 15 | 38.1x | 37.5x | | | + +-----------------+--------------------+------------------+-------------------+------------+ + | 20 | 38.6x | 38.0x | | 1160 | + +-----------------+--------------------+------------------+-------------------+------------+ + | 25 | 38.7x | 38.1x | | 1290 | + +-----------------+--------------------+------------------+-------------------+------------+ + | 30 | 38.8x | 38.1x | | 1449 | + +-----------------+--------------------+------------------+-------------------+------------+ + | 40 | 38.8x | 38.1x | | 1625 | + +-----------------+--------------------+------------------+-------------------+------------+ + | 50 | 38.8x | 38.2x | | 3683 | + +-----------------+--------------------+------------------+-------------------+------------+ + + +-----------------+--------------------+------------------+-------------------+------------+ + | HG002 Chromosome X, nanopore, 20x input coverage | + +-----------------+--------------------+------------------+-------------------+------------+ + | | | | Corrected | | Trimmed | | Bogart | | CPU Time | + | | corErrorRate | | Coverage | | Coverage | | Error Rate | | (hours) | + +=================+====================+==================+===================+============+ + | 5 | --.-x | --.-x | -% | 31 | + +-----------------+--------------------+------------------+-------------------+------------+ + | 10 | 3.9x | --.-x | -% | 66 | + +-----------------+--------------------+------------------+-------------------+------------+ + | 15 | 9.6x | --.-x | -% | 105 | + +-----------------+--------------------+------------------+-------------------+------------+ + | 20 | 11.4x | 11.2x | 1.71% | 134 | + +-----------------+--------------------+------------------+-------------------+------------+ + | 25 | 11.9x | 11.6x | 1.79% | 154 | + +-----------------+--------------------+------------------+-------------------+------------+ + | 30 | 12.0x | 11.8x | 1.83% | 169 | + +-----------------+--------------------+------------------+-------------------+------------+ + | 40 | 12.2x | 12.0x | 1.94% | 221 | + +-----------------+--------------------+------------------+-------------------+------------+ + | 50 | 12.6x | 12.3x | 2.29% | 709 | + +-----------------+--------------------+------------------+-------------------+------------+ corConsensus Which algorithm to use for computing read consensus sequences. Only 'falcon' and 'falconpipe' are supported. diff --git a/src/pipelines/canu.pl b/src/pipelines/canu.pl index 9ca5ffede..50ab1c128 100644 --- a/src/pipelines/canu.pl +++ b/src/pipelines/canu.pl @@ -608,7 +608,7 @@ setGlobalIfUndef("corOvlErrorRate", 0.320); setGlobalIfUndef("obtOvlErrorRate", 0.120); setGlobalIfUndef("utgOvlErrorRate", 0.120); - setGlobalIfUndef("corErrorRate", 0.500); + setGlobalIfUndef("corErrorRate", 0.300); setGlobalIfUndef("obtErrorRate", 0.120); setGlobalIfUndef("oeaErrorRate", getGlobal("utgOvlErrorRate")); setGlobalIfUndef("oeaHaploConfirm", 5); @@ -622,7 +622,7 @@ setGlobalIfUndef("corOvlErrorRate", 0.240); setGlobalIfUndef("obtOvlErrorRate", 0.045); setGlobalIfUndef("utgOvlErrorRate", 0.045); - setGlobalIfUndef("corErrorRate", 0.300); + setGlobalIfUndef("corErrorRate", 0.250); setGlobalIfUndef("obtErrorRate", 0.045); setGlobalIfUndef("oeaErrorRate", getGlobal("utgOvlErrorRate")); setGlobalIfUndef("oeaHaploConfirm", 5);