From 613a986f1517ab5a641771da62eba6c7f5a41577 Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Wed, 22 Feb 2017 10:32:16 +0100
Subject: [PATCH] all set-up to prepare ultimate test before last reports

---
 {pkg/data => data}/README                     |  0
 data/data.tar.xz                              |  1 +
 {pkg/data => data}/scripts/augment_meteo.R    |  0
 data/scripts/build_testdata.R                 |  6 ++
 {pkg/data => data}/scripts/fill_NAs.R         |  0
 pkg/DESCRIPTION                               |  4 +-
 pkg/R/F_Neighbors.R                           |  2 +-
 pkg/R/getData.R                               | 31 ++++------
 pkg/R/getForecast.R                           |  2 +-
 pkg/data/data.tar.xz                          |  1 -
 pkg/inst/CITATION                             | 17 ++++++
 .../extdata}/meteo_extra_noNAs.csv            |  0
 .../extdata}/pm10_mesures_H_loc.csv           |  0
 pkg/inst/testdata/exo.csv                     |  0
 pkg/inst/testdata/ts.csv                      |  0
 pkg/tests/testthat/test.Forecaster.R          | 44 +++++++-------
 pkg/tests/testthat/test.dateIndexToInteger.R  | 46 ++++++---------
 pkg/tests/testthat/test.integerIndexToDate.R  | 39 +++++++------
 pkg/vignettes/talweg.Rmd                      | 58 +++++++++++++++++++
 19 files changed, 160 insertions(+), 91 deletions(-)
 rename {pkg/data => data}/README (100%)
 create mode 100644 data/data.tar.xz
 rename {pkg/data => data}/scripts/augment_meteo.R (100%)
 create mode 100644 data/scripts/build_testdata.R
 rename {pkg/data => data}/scripts/fill_NAs.R (100%)
 delete mode 100644 pkg/data/data.tar.xz
 create mode 100644 pkg/inst/CITATION
 rename pkg/{data => inst/extdata}/meteo_extra_noNAs.csv (100%)
 rename pkg/{data => inst/extdata}/pm10_mesures_H_loc.csv (100%)
 create mode 100644 pkg/inst/testdata/exo.csv
 create mode 100644 pkg/inst/testdata/ts.csv
 create mode 100644 pkg/vignettes/talweg.Rmd

diff --git a/pkg/data/README b/data/README
similarity index 100%
rename from pkg/data/README
rename to data/README
diff --git a/data/data.tar.xz b/data/data.tar.xz
new file mode 100644
index 0000000..785b39f
--- /dev/null
+++ b/data/data.tar.xz
@@ -0,0 +1 @@
+#$# git-fat 9d5a273f6f71606d55f37372d1eaf0d8a7fee3f6               609260
diff --git a/pkg/data/scripts/augment_meteo.R b/data/scripts/augment_meteo.R
similarity index 100%
rename from pkg/data/scripts/augment_meteo.R
rename to data/scripts/augment_meteo.R
diff --git a/data/scripts/build_testdata.R b/data/scripts/build_testdata.R
new file mode 100644
index 0000000..9bc4fa4
--- /dev/null
+++ b/data/scripts/build_testdata.R
@@ -0,0 +1,6 @@
+# Only one covariable (so that the "matrix" is always invertible)
+
+# Series are 1,1,...,1 2,2,...,2 ... 7,7,...,7 1,1,...,1 ...etc
+# First serie on monday 2007-01-01
+
+#TODO:
diff --git a/pkg/data/scripts/fill_NAs.R b/data/scripts/fill_NAs.R
similarity index 100%
rename from pkg/data/scripts/fill_NAs.R
rename to data/scripts/fill_NAs.R
diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION
index 2a29f90..1453d54 100644
--- a/pkg/DESCRIPTION
+++ b/pkg/DESCRIPTION
@@ -15,7 +15,8 @@ Suggests:
     roxygen2,
     testthat,
     rmarkdown,
-    rainbow
+    rainbow,
+    knitr
 LazyData: yes
 URL: http://git.auder.net/?p=talweg.git
 License: MIT + file LICENSE
@@ -36,3 +37,4 @@ Collate:
     'getForecast.R'
     'plot.R'
     'utils.R'
+VignetteBuilder: knitr
diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R
index 7144fad..3abd0ff 100644
--- a/pkg/R/F_Neighbors.R
+++ b/pkg/R/F_Neighbors.R
@@ -173,7 +173,7 @@ NeighborsForecaster = setRefClass(
 				}
 
 				sigma = cov(M) #NOTE: robust covariance is way too slow
-				sigma_inv = qr.solve(sigma)
+				sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed?
 
 				# Distances from last observed day to days in the past
 				distances2 = rep(NA, nrow(M)-1)
diff --git a/pkg/R/getData.R b/pkg/R/getData.R
index 205ee5d..574522b 100644
--- a/pkg/R/getData.R
+++ b/pkg/R/getData.R
@@ -17,6 +17,11 @@
 #'
 #' @return An object of class Data
 #'
+#' @example
+#' ts_data = read.csv(system.file("extdata",ts_data,package="talweg",mustWork=TRUE))
+#' exo_data = read.csv(system.file("extdata",exo_data,package="talweg",mustWork=TRUE))
+#' getData(ts_data, exo_data, ...)
+#'
 #' @export
 getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:%M",
 	working_tz="GMT", predict_at=0, limit=Inf)
@@ -28,7 +33,7 @@ getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:%
 	working_tz = working_tz[1]
 	if ( (!is.data.frame(ts_data) && !is.character(ts_data)) ||
 			(!is.data.frame(exo_data) && !is.character(exo_data)) )
-		stop("Bad time-series / exogenous input (data [frame] or CSV file)")
+		stop("Bad time-series / exogenous input (data frame or CSV file)")
 	if (is.character(ts_data))
 		ts_data = ts_data[1]
 	if (is.character(exo_data))
@@ -41,29 +46,15 @@ getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:%
 	date_format = date_format[1]
 
 	ts_df =
-		if (is.character(ts_data)) {
-			if (ts_data %in% data(package="talweg")$results[,"Item"])
-				ts_data = 
-	
-					
-					
-					
-					############CONTINUE: http://r-pkgs.had.co.nz/data.html
-					
-					
-					
-					
-					
-					read.csv(ts_data)
-		} else {
+		if (is.character(ts_data))
+			read.csv(ts_data)
+		else
 			ts_data
-		}
 	exo_df =
-		if (is.character(exo_data)) {
+		if (is.character(exo_data))
 			read.csv(exo_data)
-		} else {
+		else
 			exo_data
-		}
 	# Convert to the desired timezone (usually "GMT" or "Europe/Paris")
 	formatted_dates_POSIXlt = strptime(as.character(ts_df[,1]), date_format, tz=input_tz)
 	ts_df[,1] = format(as.POSIXct(formatted_dates_POSIXlt), tz=working_tz, usetz=TRUE)
diff --git a/pkg/R/getForecast.R b/pkg/R/getForecast.R
index b48f3da..6e7720e 100644
--- a/pkg/R/getForecast.R
+++ b/pkg/R/getForecast.R
@@ -24,7 +24,7 @@
 #' @return An object of class Forecast
 #'
 #' @examples
-#' data = getData(ts_data="data/pm10_mesures_H_loc.csv", exo_data="data/meteo_extra_noNAs.csv",
+#' data = getData(ts_data="pm10_mesures_H_loc.csv", exo_data="meteo_extra_noNAs.csv",
 #'   input_tz = "Europe/Paris", working_tz="Europe/Paris", predict_at=7)
 #' pred = getForecast(data, 2200:2230, "Persistence", "Persistence", 500, 12)
 #' \dontrun{#Sketch for real-time mode:
diff --git a/pkg/data/data.tar.xz b/pkg/data/data.tar.xz
deleted file mode 100644
index 6e5e696..0000000
--- a/pkg/data/data.tar.xz
+++ /dev/null
@@ -1 +0,0 @@
-#$# git-fat fb2d21849524e743ce1b3e5589efb40d1182f468               564468
diff --git a/pkg/inst/CITATION b/pkg/inst/CITATION
new file mode 100644
index 0000000..2167e9e
--- /dev/null
+++ b/pkg/inst/CITATION
@@ -0,0 +1,17 @@
+citHeader("To cite talweg in publications use:")
+
+citEntry(entry = "Manual",
+  title        = "Time-series sAmpLes forecasted With ExoGenous variables.",
+  author       = personList(as.person("Benjamin Auder"),
+                   as.person("Jean-Michel Poggi"),
+                   as.person("Bruno Portier")),
+  organization = "Paris-Sud & INSA Rouen",
+  address      = "Orsay & Rouen, France",
+  year         = "2017",
+  url          = "https://git.auder.net/?p=talweg.git",
+
+  textVersion  =
+  paste("Benjamin Auder, Jean-Michel Poggi, Bruno Portier (2017).",
+        "talweg: Time-series sAmpLes forecasted With ExoGenous variables.",
+        "URL https://git.auder.net/?p=talweg.git")
+)
diff --git a/pkg/data/meteo_extra_noNAs.csv b/pkg/inst/extdata/meteo_extra_noNAs.csv
similarity index 100%
rename from pkg/data/meteo_extra_noNAs.csv
rename to pkg/inst/extdata/meteo_extra_noNAs.csv
diff --git a/pkg/data/pm10_mesures_H_loc.csv b/pkg/inst/extdata/pm10_mesures_H_loc.csv
similarity index 100%
rename from pkg/data/pm10_mesures_H_loc.csv
rename to pkg/inst/extdata/pm10_mesures_H_loc.csv
diff --git a/pkg/inst/testdata/exo.csv b/pkg/inst/testdata/exo.csv
new file mode 100644
index 0000000..e69de29
diff --git a/pkg/inst/testdata/ts.csv b/pkg/inst/testdata/ts.csv
new file mode 100644
index 0000000..e69de29
diff --git a/pkg/tests/testthat/test.Forecaster.R b/pkg/tests/testthat/test.Forecaster.R
index b20f104..0986a3d 100644
--- a/pkg/tests/testthat/test.Forecaster.R
+++ b/pkg/tests/testthat/test.Forecaster.R
@@ -2,34 +2,34 @@ context("Check that forecasters behave as expected")
 
 test_that("Average+Zero method behave as expected",
 {
+	ts_data = system.file("testdata","ts",package="talweg")
+	exo_data = system.file("testdata","exo",package="talweg")
+
+	data0 = getData(ts_data, exo_data, input_tz="GMT", date_format="%Y-%m-%d %H:%M",
+		working_tz="GMT", predict_at=0, limit=Inf)
+	#TODO: with and without shift at origin (so series values at least forst ones are required)
+
+	indices = ...
+	pred0 = getForecast(.......)
+
+	for (i in seq_along(indices))
+	{
+		expect_identical(....)
+	}
+
+	data13 = getData(ts_data, exo_data, input_tz="GMT", date_format="%Y-%m-%d %H:%M",
+		working_tz="GMT", predict_at=13, limit=Inf)
+	#Attention: jours deviennent 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 (14h-->0h puis 1h-->13h)
+}
 
 test_that("Persistence+Zero method behave as expected",
 {
+}
 
 test_that("Neighbors+Zero method behave as expected",
 {
+}
 
 test_that("Neighbors+Neighbors method behave as expected",
 {
-
-
-
-#TODO: with and without shift at origin (so series values at least forst ones are required)
-
-
-	n = 1500
-	series = list()
-	for (i in seq_len(n))
-	{
-		index = (i%%3) + 1
-		level = mean(s[[index]])
-		serie = s[[index]] - level + rnorm(L,sd=0.05)
-		# 10 series with NAs for index 2
-		if (index == 2 && i >= 60 && i<= 90)
-			serie[sample(seq_len(L),1)] = NA
-		series[[i]] = list("level"=level,"serie"=serie) #no need for more :: si : time !!!
-	}
-	data = new("Data", data=series)
-
-	dateIndexToInteger = function(index, data)
-})
+}
diff --git a/pkg/tests/testthat/test.dateIndexToInteger.R b/pkg/tests/testthat/test.dateIndexToInteger.R
index 7f09847..b2316dd 100644
--- a/pkg/tests/testthat/test.dateIndexToInteger.R
+++ b/pkg/tests/testthat/test.dateIndexToInteger.R
@@ -1,33 +1,25 @@
 context("Check that dateIndexToInteger behaves as expected")
 
-getDataTest = function(n, shift)
+test_that("computed integer index is correct; predict_at == 0",
 {
-	n = 1500
-	series = list()
-	s = rep(0, 24)
-
-	for (i in seq_len(n))
-	{
-		level = i %% 3mean(s[[index]])
-		serie = s
-		# 10 series with NAs for index 2
-		if (index == 2 && i >= 60 && i<= 90)
-			serie[sample(seq_len(L),1)] = NA
-		series[[i]] = list("level"=i%%3, "serie"=s, "time"=)
-	}
-	data = new("Data", data=series)
-}
-
-test_that("integer index matches date in data, predict_at == 0",
-{
-	data = getData(
-	dateIndexToInteger = function(index, data)
+	data0 = getData(ts_data="pm10_mesures_H_loc.csv", exo_data="meteo_extra_noNAs.csv",
+		input_tz="Europe/Paris",working_tz="Europe/Paris", predict_at=0, limit=200)
+	expect_identical( dateIndexToInteger("2008-12-10",data), 1 )
+	expect_identical( dateIndexToInteger("2008-12-11",data), 2 )
+	expect_identical( dateIndexToInteger("2008-12-20",data), 11 )
+	expect_identical( dateIndexToInteger("2009-02-01",data), 53 )
+	expect_identical( dateIndexToInteger("2009-03-01",data), 81 )
+	expect_identical( dateIndexToInteger("2009-05-31",data), 172 )
 })
 
-test_that("integer index matches date in data, predict_at > 0",
+test_that("computed integer index is correct; predict_at > 0",
 {
-
-
-
-
-	####TODO: CSV as raw data in inst/extdata http://r-pkgs.had.co.nz/data.html
+	data7 = getData(ts_data="pm10_mesures_H_loc.csv", exo_data="meteo_extra_noNAs.csv",
+		input_tz="Europe/Paris",working_tz="Europe/Paris", predict_at=7, limit=200)
+	expect_identical( dateIndexToInteger("2008-12-10",data), 2 )
+	expect_identical( dateIndexToInteger("2008-12-11",data), 3 )
+	expect_identical( dateIndexToInteger("2008-12-20",data), 12 )
+	expect_identical( dateIndexToInteger("2009-02-01",data), 54 )
+	expect_identical( dateIndexToInteger("2009-03-01",data), 82 )
+	expect_identical( dateIndexToInteger("2009-05-31",data), 173 )
+}
diff --git a/pkg/tests/testthat/test.integerIndexToDate.R b/pkg/tests/testthat/test.integerIndexToDate.R
index 31cd740..5f3d007 100644
--- a/pkg/tests/testthat/test.integerIndexToDate.R
+++ b/pkg/tests/testthat/test.integerIndexToDate.R
@@ -1,22 +1,25 @@
 context("Check that integerIndexToDate behaves as expected")
 
-test_that("date matches index in data",
+test_that("computed dates match the input indexes; predict_at == 0",
 {
-	#TODO: with and without shift at origin (so series values at least forst ones are required)
-
-	n = 1500
-	series = list()
-	for (i in seq_len(n))
-	{
-		index = (i%%3) + 1
-		level = mean(s[[index]])
-		serie = s[[index]] - level + rnorm(L,sd=0.05)
-		# 10 series with NAs for index 2
-		if (index == 2 && i >= 60 && i<= 90)
-			serie[sample(seq_len(L),1)] = NA
-		series[[i]] = list("level"=level,"serie"=serie) #no need for more :: si : time !!!
-	}
-	data = new("Data", data=series)
-
-	integerIndexToDate = function(index, data)
+	data0 = getData(ts_data="pm10_mesures_H_loc.csv", exo_data="meteo_extra_noNAs.csv",
+		input_tz="Europe/Paris",working_tz="Europe/Paris", predict_at=0, limit=200)
+	expect_identical( integerIndexToDate(1,data), as.Date("2008-12-10") )
+	expect_identical( integerIndexToDate(2,data), as.Date("2008-12-11") )
+	expect_identical( integerIndexToDate(11,data), as.Date("2008-12-20") )
+	expect_identical( integerIndexToDate(53,data), as.Date("2009-02-01") )
+	expect_identical( integerIndexToDate(81,data), as.Date("2009-03-01") )
+	expect_identical( integerIndexToDate(172,data), as.Date("2009-05-31") )
 })
+
+test_that("computed dates match the input indexes; predict_at > 0",
+{
+	data7 = getData(ts_data="pm10_mesures_H_loc.csv", exo_data="meteo_extra_noNAs.csv",
+		input_tz="Europe/Paris",working_tz="Europe/Paris", predict_at=7, limit=200)
+	expect_identical( integerIndexToDate(2,data), as.Date("2008-12-10") )
+	expect_identical( integerIndexToDate(3,data), as.Date("2008-12-11") )
+	expect_identical( integerIndexToDate(12,data), as.Date("2008-12-20") )
+	expect_identical( integerIndexToDate(54,data), as.Date("2009-02-01") )
+	expect_identical( integerIndexToDate(82,data), as.Date("2009-03-01") )
+	expect_identical( integerIndexToDate(173,data), as.Date("2009-05-31") )
+}
diff --git a/pkg/vignettes/talweg.Rmd b/pkg/vignettes/talweg.Rmd
new file mode 100644
index 0000000..aace6af
--- /dev/null
+++ b/pkg/vignettes/talweg.Rmd
@@ -0,0 +1,58 @@
+---
+title: "Vignette Title"
+author: "Vignette Author"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Vignette Title}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+Vignettes are long form documentation commonly included in packages. Because they are part of the distribution of the package, they need to be as compact as possible. The `html_vignette` output type provides a custom style sheet (and tweaks some options) to ensure that the resulting html is as small as possible. The `html_vignette` format:
+
+- Never uses retina figures
+- Has a smaller default figure size
+- Uses a custom CSS stylesheet instead of the default Twitter Bootstrap style
+
+## Vignette Info
+
+Note the various macros within the `vignette` section of the metadata block above. These are required in order to instruct R how to build the vignette. Note that you should change the `title` field and the `\VignetteIndexEntry` to match the title of your vignette.
+
+## Styles
+
+The `html_vignette` template includes a basic CSS theme. To override this theme you can specify your own CSS in the document metadata as follows:
+
+    output: 
+      rmarkdown::html_vignette:
+        css: mystyles.css
+
+## Figures
+
+The figure sizes have been customised so that you can easily put two images side-by-side. 
+
+```{r, fig.show='hold'}
+plot(1:10)
+plot(10:1)
+```
+
+You can enable figure captions by `fig_caption: yes` in YAML:
+
+    output:
+      rmarkdown::html_vignette:
+        fig_caption: yes
+
+Then you can use the chunk option `fig.cap = "Your figure caption."` in **knitr**.
+
+## More Examples
+
+You can write math expressions, e.g. $Y = X\beta + \epsilon$, footnotes^[A footnote here.], and tables, e.g. using `knitr::kable()`.
+
+```{r, echo=FALSE, results='asis'}
+knitr::kable(head(mtcars, 10))
+```
+
+Also a quote using `>`:
+
+> "He who gives up [code] safety for [code] speed deserves neither."
+([via](https://twitter.com/hadleywickham/status/504368538874703872))
-- 
2.44.0