diff --git a/NEWS.md b/NEWS.md index 23e8d5c873..2930f2311a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,6 +16,8 @@ 1. `nafill()`, `setnafill()` extended to work on logical vectors (part of [#3992](https://github.com/Rdatatable/data.table/issues/3992)). Thanks @jangorecki for the request and @MichaelChirico for the PR. +2. `[,showProgress=]` and `options(datatable.showProgress)` now accept an integer to control the progress bar update interval in seconds, allowing finer control over progress reporting frequency; `TRUE` uses the default 3-second interval, [#6514](https://github.com/Rdatatable/data.table/issues/6514). Thanks @ethanbsmith for the report and @ben-schwen for the PR. + ### Notes 1. {data.table} now depends on R 3.5.0 (2018). diff --git a/R/data.table.R b/R/data.table.R index 27c985e44c..1ce6b62cf5 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -244,7 +244,7 @@ replace_dot_alias = function(e) { if ((isTRUE(which)||is.na(which)) && !missing(j)) stopf("which==%s (meaning return row numbers) but j is also supplied. Either you need row numbers or the result of j, but only one type of result can be returned.", which) if (is.null(nomatch) && is.na(which)) stopf("which=NA with nomatch=0|NULL would always return an empty vector. Please change or remove either which or nomatch.") if (!with && missing(j)) stopf("j must be provided when with=FALSE") - if (!missing(by) && !isTRUEorFALSE(showProgress)) stopf("%s must be TRUE or FALSE", "showProgress") + if (!missing(by) && !(isTRUEorFALSE(showProgress) || (is.numeric(showProgress) && length(showProgress)==1L && showProgress >= 0))) stopf("showProgress must be TRUE, FALSE, or a single non-negative number") # nocov irows = NULL # Meaning all rows. We avoid creating 1:nrow(x) for efficiency. notjoin = FALSE rightcols = leftcols = integer() @@ -1972,7 +1972,7 @@ replace_dot_alias = function(e) { } ans = c(g, ans) } else { - ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose, showProgress) + ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose, as.integer(showProgress)) } # unlock any locked data.table components of the answer, #4159 # MAX_DEPTH prevents possible infinite recursion from truly recursive object, #4173 diff --git a/man/data.table.Rd b/man/data.table.Rd index a674ecccb0..cfdcb27068 100644 --- a/man/data.table.Rd +++ b/man/data.table.Rd @@ -181,7 +181,7 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac \item{env}{ List or an environment, passed to \code{\link{substitute2}} for substitution of parameters in \code{i}, \code{j} and \code{by} (or \code{keyby}). Use \code{verbose} to preview constructed expressions. For more details see \href{../doc/datatable-programming.html}{\code{vignette("datatable-programming")}}. } - \item{showProgress}{ \code{TRUE} shows progress indicator with estimated time to completion for lengthy "by" operations. } + \item{showProgress}{ \code{TRUE} (default when \code{interactive()}) shows a progress indicator with estimated time to completion for lengthy "by" operations, updating every 3 seconds. An integer value controls the update interval in seconds (minimum 3). \code{FALSE} disables the progress indicator. } } \details{ \code{data.table} builds on base \R functionality to reduce 2 types of time:\cr diff --git a/src/dogroups.c b/src/dogroups.c index 5200d33f36..d5b9f01bee 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -86,9 +86,10 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX SEXP SDall = PROTECT(findVar(install(".SDall"), env)); nprotect++; // PROTECT for rchk SEXP SD = PROTECT(findVar(install(".SD"), env)); nprotect++; - const bool showProgress = LOGICAL(showProgressArg)[0]==1 && ngrp > 1; // showProgress only if more than 1 group + int updateTime = INTEGER(showProgressArg)[0]; + const bool showProgress = updateTime > 0 && ngrp > 1; // showProgress only if more than 1 group double startTime = (showProgress) ? wallclock() : 0; // For progress printing, startTime is set at the beginning - double nextTime = (showProgress) ? startTime+3 : 0; // wait 3 seconds before printing progress + double nextTime = (showProgress) ? startTime + MAX(updateTime, 3) : 0; // wait at least 3 seconds before starting to print progress hashtab * specials = hash_create(3 + ngrpcols + xlength(SDall)); // .I, .N, .GRP plus columns of .BY plus SDall PROTECT(specials->prot); nprotect++; @@ -451,17 +452,17 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX // could potentially refactor to use fread's progress() function, however we would lose some information in favor of simplicity. double now; if (showProgress && (now=wallclock())>=nextTime) { + // # nocov start. Requires long-running test case double avgTimePerGroup = (now-startTime)/(i+1); int ETA = (int)(avgTimePerGroup*(ngrp-i-1)); if (hasPrinted || ETA >= 0) { - // # nocov start. Requires long-running test case if (verbose && !hasPrinted) Rprintf(_("\n")); Rprintf("\r"); // # notranslate. \r is not internationalizable Rprintf(_("Processed %d groups out of %d. %.0f%% done. Time elapsed: %ds. ETA: %ds."), i+1, ngrp, 100.0*(i+1)/ngrp, (int)(now-startTime), ETA); - // # nocov end } - nextTime = now+1; + nextTime = now+updateTime; hasPrinted = true; + // # nocov end } ansloc += maxn; if (firstalloc) {