diff --git a/CRAN-RELEASE b/CRAN-RELEASE index fb0a555..4761df2 100644 --- a/CRAN-RELEASE +++ b/CRAN-RELEASE @@ -1,2 +1,2 @@ This package was submitted to CRAN on 2021-03-31. -Once it is accepted, delete this file and tag the release (commit 7406be2). +Once it is accepted, delete this file and tag the release (commit 7cdcee0). diff --git a/NEWS.md b/NEWS.md index 0063a06..b0b77f8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,7 +7,7 @@ ## Fix CRAN issue -- Moved `h2o.init()` to skip on CRAN per H2O (Erin LeDell) suggestion. The original issue was related to `libxgboost4j_gpu*.so` being left on the user system when the user's temp library. +- Moved `h2o.init()` to skip on CRAN per H2O recommendation. The original issue was related to `libxgboost4j_gpu*.so` being left on the user system when the user's temp library. This is being corrected by H2O. # modeltime.h2o 0.1.0 diff --git a/docs/articles/getting-started.html b/docs/articles/getting-started.html index 68e2b19..1e600e7 100644 --- a/docs/articles/getting-started.html +++ b/docs/articles/getting-started.html @@ -219,17 +219,25 @@

ip = 'localhost', port = 54321 ) -#> Connection successful! +#> +#> H2O is not running yet, starting it now... +#> +#> Note: In case of errors look at the following log files: +#> /var/folders/st/s5vwv9pd27g7z2sffwlqmtv80000gn/T//RtmpOAzLqf/file60373f953c09/h2o_mdancho_started_from_r.out +#> /var/folders/st/s5vwv9pd27g7z2sffwlqmtv80000gn/T//RtmpOAzLqf/file60375e98937b/h2o_mdancho_started_from_r.err +#> +#> +#> Starting H2O JVM and connecting: ... Connection successful! #> #> R is connected to the H2O cluster: -#> H2O cluster uptime: 51 seconds 359 milliseconds +#> H2O cluster uptime: 2 seconds 616 milliseconds #> H2O cluster timezone: America/New_York #> H2O data parsing timezone: UTC #> H2O cluster version: 3.32.0.1 -#> H2O cluster version age: 5 months and 22 days !!! -#> H2O cluster name: H2O_started_from_R_mdancho_ood584 +#> H2O cluster version age: 5 months and 27 days !!! +#> H2O cluster name: H2O_started_from_R_mdancho_ueg906 #> H2O cluster total nodes: 1 -#> H2O cluster total memory: 7.99 GB +#> H2O cluster total memory: 8.00 GB #> H2O cluster total cores: 12 #> H2O cluster allowed cores: 12 #> H2O cluster healthy: TRUE @@ -274,22 +282,22 @@

model_fitted <- model_spec %>% fit(Weekly_Sales ~ ., data = train_tbl) #> model_id mean_residual_deviance -#> 1 StackedEnsemble_AllModels_AutoML_20210331_100707 36017226 -#> 2 XGBoost_3_AutoML_20210331_100707 37869757 -#> 3 XGBoost_2_AutoML_20210331_100707 38767815 -#> 4 XGBoost_1_AutoML_20210331_100707 40512216 -#> rmse mse mae rmsle -#> 1 6001.435 36017226 3649.674 0.1449377 -#> 2 6153.841 37869757 3774.116 0.1485089 -#> 3 6226.381 38767815 3949.343 0.1610428 -#> 4 6364.921 40512216 4162.325 0.1694726 +#> 1 StackedEnsemble_AllModels_AutoML_20210405_115904 38501320 +#> 2 XGBoost_3_AutoML_20210405_115904 42212631 +#> 3 XGBoost_2_AutoML_20210405_115904 58816361 +#> 4 XGBoost_1_AutoML_20210405_115904 2369268925 +#> rmse mse mae rmsle +#> 1 6204.943 38501320 3835.035 0.1444437 +#> 2 6497.125 42212631 4096.153 0.1501578 +#> 3 7669.183 58816361 4875.643 0.1673720 +#> 4 48675.137 2369268925 40066.038 1.2850293 #> #> [4 rows x 6 columns] model_fitted #> parsnip model object #> -#> Fit time: 8s +#> Fit time: 9s #> #> H2O AutoML - Stackedensemble #> -------- @@ -297,7 +305,7 @@

#> ============== #> #> H2ORegressionModel: stackedensemble -#> Model ID: StackedEnsemble_AllModels_AutoML_20210331_100707 +#> Model ID: StackedEnsemble_AllModels_AutoML_20210405_115904 #> Number of Base Models: 3 #> #> Base Models (count by algorithm type): @@ -318,11 +326,11 @@

#> H2ORegressionMetrics: stackedensemble #> ** Reported on training data. ** #> -#> MSE: 12372090 -#> RMSE: 3517.398 -#> MAE: 2215.673 -#> RMSLE: 0.07996155 -#> Mean Residual Deviance : 12372090 +#> MSE: 22728954 +#> RMSE: 4767.489 +#> MAE: 3027.272 +#> RMSLE: 0.1026567 +#> Mean Residual Deviance : 22728954 #> #> #> @@ -330,21 +338,21 @@

#> ** Reported on cross-validation data. ** #> ** 5-fold cross-validation on training data (Metrics computed for combined holdout predictions) ** #> -#> MSE: 36017226 -#> RMSE: 6001.435 -#> MAE: 3649.674 -#> RMSLE: 0.1449377 -#> Mean Residual Deviance : 36017226 +#> MSE: 38501320 +#> RMSE: 6204.943 +#> MAE: 3835.035 +#> RMSLE: 0.1444437 +#> Mean Residual Deviance : 38501320

The best models are stored in the leaderbord and by default the one with the best metric with which you have decided to sort the leaderbord is selected (this behavior can be controlled with the sort_metric parameter passed through set_engine. For more information see ?h2o.automl. By default, it is sorted by the mean_residual_deviance). To list the models created during training that have finally been stored in the leaderbord you can use the automl_leaderbord function as follows:

 automl_leaderboard(model_fitted)
 #> # A tibble: 4 x 6
-#>   model_id                        mean_residual_devia…  rmse     mse   mae rmsle
-#>   <chr>                                          <dbl> <dbl>   <dbl> <dbl> <dbl>
-#> 1 StackedEnsemble_AllModels_Auto…            36017226. 6001.  3.60e7 3650. 0.145
-#> 2 XGBoost_3_AutoML_20210331_1007…            37869757. 6154.  3.79e7 3774. 0.149
-#> 3 XGBoost_2_AutoML_20210331_1007…            38767815. 6226.  3.88e7 3949. 0.161
-#> 4 XGBoost_1_AutoML_20210331_1007…            40512216. 6365.  4.05e7 4162. 0.169
+#> model_id mean_residual_devi… rmse mse mae rmsle +#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> +#> 1 StackedEnsemble_AllModels_Au… 38501320. 6205. 3.85e7 3835. 0.144 +#> 2 XGBoost_3_AutoML_20210405_11… 42212631. 6497. 4.22e7 4096. 0.150 +#> 3 XGBoost_2_AutoML_20210405_11… 58816361. 7669. 5.88e7 4876. 0.167 +#> 4 XGBoost_1_AutoML_20210405_11… 2369268925. 48675. 2.37e9 40066. 1.29

To change the default selected model (remember, the first one sorted according to the selected metric) you can do it with the automl_update_model() function as follows (do not run the following example as the model id name will have changed as there is randomness in the process):

 automl_update_model(model_fitted, model_id = "StackedEnsemble_AllModels_AutoML_20210319_204825")
@@ -354,16 +362,16 @@

#> # A tibble: 84 x 1 #> .pred #> <dbl> -#> 1 18102. -#> 2 31263. -#> 3 38531. -#> 4 40891. -#> 5 75102. -#> 6 82129. -#> 7 135616. -#> 8 18035. -#> 9 36727. -#> 10 37618. +#> 1 19438. +#> 2 31548. +#> 3 37335. +#> 4 40876. +#> 5 77931. +#> 6 79811. +#> 7 132721. +#> 8 19065. +#> 9 36439. +#> 10 36364. #> # … with 74 more rows
@@ -430,15 +438,15 @@

refit_tbl <- modeltime_tbl %>% modeltime_refit(data_prepared_tbl) #> model_id mean_residual_deviance -#> 1 StackedEnsemble_AllModels_AutoML_20210331_100717 33864338 -#> 2 XGBoost_3_AutoML_20210331_100717 36005432 -#> 3 XGBoost_2_AutoML_20210331_100717 37963076 -#> 4 XGBoost_1_AutoML_20210331_100717 38981678 -#> rmse mse mae rmsle -#> 1 5819.307 33864338 3552.083 0.1408208 -#> 2 6000.453 36005432 3656.208 0.1448132 -#> 3 6161.418 37963076 3941.063 0.1610182 -#> 4 6243.531 38981678 4171.108 0.1825377 +#> 1 StackedEnsemble_AllModels_AutoML_20210405_115918 45119504 +#> 2 XGBoost_2_AutoML_20210405_115918 128393878 +#> 3 XGBoost_1_AutoML_20210405_115918 156236498 +#> 4 XGBoost_3_AutoML_20210405_115918 265914814 +#> rmse mse mae rmsle +#> 1 6717.105 45119504 4409.966 0.1535976 +#> 2 11331.102 128393878 7725.048 0.1921754 +#> 3 12499.460 156236498 8734.104 0.2435574 +#> 4 16306.895 265914814 11719.747 0.2776191 #> #> [4 rows x 6 columns] diff --git a/docs/articles/getting-started_files/figure-html/unnamed-chunk-12-1.png b/docs/articles/getting-started_files/figure-html/unnamed-chunk-12-1.png index 4b1c496..e1dd66c 100644 Binary files a/docs/articles/getting-started_files/figure-html/unnamed-chunk-12-1.png and b/docs/articles/getting-started_files/figure-html/unnamed-chunk-12-1.png differ diff --git a/docs/articles/getting-started_files/figure-html/unnamed-chunk-14-1.png b/docs/articles/getting-started_files/figure-html/unnamed-chunk-14-1.png index ddb204f..2f7f00f 100644 Binary files a/docs/articles/getting-started_files/figure-html/unnamed-chunk-14-1.png and b/docs/articles/getting-started_files/figure-html/unnamed-chunk-14-1.png differ diff --git a/docs/news/index.html b/docs/news/index.html index a21f544..ce2e2a1 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -221,7 +221,7 @@

Fix CRAN issue

    -
  • Moved h2o.init() to skip on CRAN per H2O (Erin LeDell) suggestion. The original issue was related to libxgboost4j_gpu*.so being left on the user system when the user’s temp library.
  • +
  • Moved h2o.init() to skip on CRAN per H2O recommendation. The original issue was related to libxgboost4j_gpu*.so being left on the user system when the user’s temp library. This is being corrected by H2O.

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 543062c..3cb853d 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -3,5 +3,5 @@ pkgdown: 1.6.1 pkgdown_sha: ~ articles: getting-started: getting-started.html -last_built: 2021-03-31T14:06Z +last_built: 2021-04-05T15:58Z diff --git a/docs/reference/automl_leaderboard.html b/docs/reference/automl_leaderboard.html index d218d05..2012261 100644 --- a/docs/reference/automl_leaderboard.html +++ b/docs/reference/automl_leaderboard.html @@ -287,6 +287,10 @@

Examp model_id_2 <- leaderboard_tbl$model_id[[2]] model_fit_2 <- automl_update_model(model_fit, model_id_2) model_fit_2 + +# Shutdown H2O when Finished. +# Make sure to save any work before. +h2o.shutdown(prompt = FALSE) } diff --git a/docs/reference/automl_reg.html b/docs/reference/automl_reg.html index ef52149..cce0b7e 100644 --- a/docs/reference/automl_reg.html +++ b/docs/reference/automl_reg.html @@ -260,30 +260,12 @@

See a

fit.model_spec(), set_engine()

Examples

-
# \donttest{ +
if (FALSE) { library(tidymodels) -
#> ── Attaching packages ────────────────────────────────────── tidymodels 0.1.2 ──
#> broom 0.7.5 recipes 0.1.15 -#> dials 0.0.9.9000 rsample 0.0.9 -#> dplyr 1.0.5 tibble 3.1.0 -#> ggplot2 3.3.3 tidyr 1.1.3 -#> infer 0.5.4 tune 0.1.3 -#> modeldata 0.1.0 workflows 0.2.2 -#> parsnip 0.1.5 yardstick 0.0.8 -#> purrr 0.3.4
#> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ── -#> x purrr::discard() masks scales::discard() -#> x dplyr::filter() masks stats::filter() -#> x dplyr::lag() masks stats::lag() -#> x recipes::step() masks stats::step()
#> ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
#> readr 1.4.0 forcats 0.5.1 -#> stringr 1.4.0
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── -#> x readr::col_factor() masks scales::col_factor() -#> x purrr::discard() masks scales::discard() -#> x dplyr::filter() masks stats::filter() -#> x stringr::fixed() masks recipes::fixed() -#> x dplyr::lag() masks stats::lag() -#> x readr::spec() masks yardstick::spec()
library(timetk) +library(timetk) data_tbl <- walmart_sales_weekly %>% select(id, Date, Weekly_Sales) @@ -293,7 +275,7 @@

Examp assess = "3 month", cumulative = TRUE ) -

#> Using date_var: Date
#> Data is not ordered by the 'date_var'. Resamples will be arranged by `Date`.
#> Overlapping Timestamps Detected. Processing overlapping time series together using sliding windows.
+ recipe_spec <- recipe(Weekly_Sales ~ ., data = training(splits)) %>% step_timeseries_signature(Date) @@ -307,36 +289,7 @@

Examp ip = 'localhost', port = 54321 ) -

#> -#> H2O is not running yet, starting it now... -#> -#> Note: In case of errors look at the following log files: -#> /var/folders/st/s5vwv9pd27g7z2sffwlqmtv80000gn/T//RtmpMDLL90/file3eb71110bb29/h2o_mdancho_started_from_r.out -#> /var/folders/st/s5vwv9pd27g7z2sffwlqmtv80000gn/T//RtmpMDLL90/file3eb7616f897f/h2o_mdancho_started_from_r.err -#> -#> -#> Starting H2O JVM and connecting: .. Connection successful! -#> -#> R is connected to the H2O cluster: -#> H2O cluster uptime: 2 seconds 484 milliseconds -#> H2O cluster timezone: America/New_York -#> H2O data parsing timezone: UTC -#> H2O cluster version: 3.32.0.1 -#> H2O cluster version age: 5 months and 22 days !!! -#> H2O cluster name: H2O_started_from_R_mdancho_ood584 -#> H2O cluster total nodes: 1 -#> H2O cluster total memory: 8.00 GB -#> H2O cluster total cores: 12 -#> H2O cluster allowed cores: 12 -#> H2O cluster healthy: TRUE -#> H2O Connection ip: localhost -#> H2O Connection port: 54321 -#> H2O Connection proxy: NA -#> H2O Internal Security: FALSE -#> H2O API Extensions: Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4 -#> R Version: R version 4.0.2 (2020-06-22)
#> Warning: -#> Your H2O cluster version is too old (5 months and 22 days)! -#> Please download and install the latest version from http://h2o.ai/download/
#>
+ # ---- MODEL SPEC ---- @@ -353,118 +306,27 @@

Examp ) model_spec -

#> H2O AutoML Model Specification (regression) -#> -#> Engine-Specific Arguments: -#> max_runtime_secs = 30 -#> max_runtime_secs_per_model = 30 -#> project_name = project_01 -#> nfolds = 5 -#> max_models = 1000 -#> exclude_algos = c("DeepLearning") -#> seed = 786 -#> -#> Computational engine: h2o -#>
+ # ---- TRAINING ---- # Important: Make sure the date is included as regressor. # This training process should take 30-40 seconds model_fitted <- model_spec %>% fit(Weekly_Sales ~ ., data = train_tbl) -
#> Converting to H2OFrame...
#> | | | 0% | |======================================================================| 100%
#> -#> Training H2O AutoML...
#> | | | 0% | |=== | 5% | |======= | 9% | |========== | 14% | |============= | 19% | |================ | 23% | |==================== | 28% | |======================= | 33% | |=========================== | 39% | |================================ | 46% | |=================================== | 50% | |====================================== | 55% | |========================================= | 59% | |============================================= | 64% | |================================================ | 69% | |=================================================== | 73% | |====================================================== | 78% | |========================================================== | 83% | |============================================================= | 87% | |================================================================= | 92% | |==================================================================== | 97% | |======================================================================| 100% -#> | | | 0% | |======================================================================| 100%
#> -#> -#> Leaderboard:
#> model_id mean_residual_deviance -#> 1 StackedEnsemble_AllModels_AutoML_20210331_100618 33313950 -#> 2 StackedEnsemble_BestOfFamily_AutoML_20210331_100618 33416037 -#> 3 XGBoost_grid__1_AutoML_20210331_100618_model_3 34331133 -#> 4 XGBoost_grid__1_AutoML_20210331_100618_model_10 35345362 -#> 5 XGBoost_3_AutoML_20210331_100618 36178176 -#> 6 XGBoost_grid__1_AutoML_20210331_100618_model_4 36289402 -#> rmse mse mae rmsle -#> 1 5771.824 33313950 3517.760 0.1395222 -#> 2 5780.661 33416037 3520.183 0.1392016 -#> 3 5859.278 34331133 3628.706 0.1437346 -#> 4 5945.197 35345362 3672.445 0.1501596 -#> 5 6014.830 36178176 3731.129 0.1543888 -#> 6 6024.069 36289402 3736.910 0.1496543 -#> -#> [33 rows x 6 columns]
#> -#> Using top model: StackedEnsemble_AllModels_AutoML_20210331_100618
+ model_fitted -
#> parsnip model object -#> -#> Fit time: 39.4s -#> -#> H2O AutoML - Stackedensemble -#> -------- -#> Model: Model Details: -#> ============== -#> -#> H2ORegressionModel: stackedensemble -#> Model ID: StackedEnsemble_AllModels_AutoML_20210331_100618 -#> Number of Base Models: 31 -#> -#> Base Models (count by algorithm type): -#> -#> drf gbm glm xgboost -#> 2 12 1 16 -#> -#> Metalearner: -#> -#> Metalearner algorithm: glm -#> Metalearner cross-validation fold assignment: -#> Fold assignment scheme: AUTO -#> Number of folds: 5 -#> Fold column: NULL -#> Metalearner hyperparameters: -#> -#> -#> H2ORegressionMetrics: stackedensemble -#> ** Reported on training data. ** -#> -#> MSE: 3951237 -#> RMSE: 1987.772 -#> MAE: 1365.175 -#> RMSLE: 0.0575679 -#> Mean Residual Deviance : 3951237 -#> -#> -#> -#> H2ORegressionMetrics: stackedensemble -#> ** Reported on cross-validation data. ** -#> ** 5-fold cross-validation on training data (Metrics computed for combined holdout predictions) ** -#> -#> MSE: 33313950 -#> RMSE: 5771.824 -#> MAE: 3517.76 -#> RMSLE: 0.1395222 -#> Mean Residual Deviance : 33313950 -#> -#>
+ # ---- PREDICT ---- # - IMPORTANT: New Data must have date feature predict(model_fitted, test_tbl) -
#> Converting to H2OFrame...
#> | | | 0% | |======================================================================| 100% -#> | | | 0% | |======================================================================| 100%
#> # A tibble: 84 x 1 -#> .pred -#> <dbl> -#> 1 17402. -#> 2 31275. -#> 3 37803. -#> 4 40099. -#> 5 72840. -#> 6 81692. -#> 7 136457. -#> 8 17101. -#> 9 38239. -#> 10 37315. -#> # … with 74 more rows
# } - -
+ +# Shutdown H2O when Finished. +# Make sure to save any work before. +h2o.shutdown(prompt = FALSE) +} + +