From 67772921d34e63c7b19b3197e496ba2b7723102f Mon Sep 17 00:00:00 2001 From: Bill Denney Date: Wed, 27 Jan 2021 22:10:03 -0500 Subject: [PATCH] make entrez_search() use POST (and some minor typo fixes) --- NEWS | 8 +++++++- R/base.r | 15 +++++++-------- R/entrez_search.r | 1 + 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/NEWS b/NEWS index 6bb333d..bca9bc3 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,9 @@ +Development Version +----------------- + +* A fix to #163 forces the use of POST for `entrez_search()` so that long query + strings do not get the HTTP 414 error (thanks to @billdenney for the fix). + Version 1.2.3 ----------------- Maintenance release, mostly to prevent issues with rate-limiting errors when the @@ -5,7 +11,7 @@ package is tested in CRAN. * The sleep commands for rate-limiting are slightly increased -* As of this release, the vignette is NOT build by default (to avoid issues with +* As of this release, the vignette is NOT built by default (to avoid issues with automated tests on CRAN). This will not affect most users, but a developers may want to read a wiki page describing how to build the vignette: diff --git a/R/base.r b/R/base.r index 0996201..df1aa97 100755 --- a/R/base.r +++ b/R/base.r @@ -20,15 +20,15 @@ entrez_tool <- function() 'rentrez' # # This function is used by all the API-querying functions in rentrez to build # the appropriate url. Required arguments for each endpoint are handled by -# specific funcitons. All of these functions can use the id_or_webenv() function +# specific functions. All of these functions can use the id_or_webenv() function # (below) to ensure that at least on of these arguments are provided and the -# sleep_time() function to set the approrate time to wait between requests. +# sleep_time() function to set the appropriate time to wait between requests. # # if debug_mode is set to TRUE the function returns a list with the URL and # arguments that would have been passed to GET or POST (useful for debugging # and used in the test suite). -make_entrez_query <- function(util, config, interface=".fcgi?", by_id=FALSE, debug_mode=FALSE, ...){ +make_entrez_query <- function(util, config, interface=".fcgi?", by_id=FALSE, debug_mode=FALSE, use_post=FALSE, ...){ uri <- paste0("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/", util, interface) args <- list(..., email=entrez_email(), tool=entrez_tool()) if(!("api_key" %in% names(args))){ #no api key set, try to use the sytem var @@ -50,12 +50,12 @@ make_entrez_query <- function(util, config, interface=".fcgi?", by_id=FALSE, deb } #Seemingly, NCBI moved to https but not https v2.0? - # (thatnks Jeff Hammerbacher for report/solution) + # (thanks Jeff Hammerbacher for report/solution) # # if no httr::config was passed we will add one if(is.null(config)){ config = httr::config(http_version = 2) - # otherwise add https version, checkign we aren't overwriting something + # otherwise add https version, checking we aren't overwriting something # passed in (seems unlikely, but worth checking?) # } else { @@ -65,8 +65,7 @@ make_entrez_query <- function(util, config, interface=".fcgi?", by_id=FALSE, deb config$options$http_version <- 2 } - - if(length(args$id) > 200){ + if(use_post | (length(args$id) > 200)) { response <- httr::POST(uri, body=args, config= config) } else { response <- httr::GET(uri, query=args, config= config) @@ -122,7 +121,7 @@ entrez_check <- function(req){ } message <- httr::content(req, as="text", encoding="UTF-8") if (req$status_code == 429){ - #too many requests. First sleep to precent us racking up more + #too many requests. First sleep to prevent us racking up more Sys.sleep(0.3) stop(paste("HTTP failure: 429, too many requests. Functions that contact the NCBI should not be called in parallel. If you are using a shared IP, consider registerring for an API key as described in the rate-limiting section of rentrez tutorial. NCBI message:\n", message)) } diff --git a/R/entrez_search.r b/R/entrez_search.r index 6dfb250..c278643 100755 --- a/R/entrez_search.r +++ b/R/entrez_search.r @@ -74,6 +74,7 @@ entrez_search <- function(db, term, config=NULL, retmode="xml", use_history=FALS config=config, retmode=retmode, usehistory=usehistory, + use_post=TRUE, ...) parsed <- parse_response(response, retmode) parse_esearch(parsed, history=use_history)