X-Git-Url: https://git.auder.net/%3C?a=blobdiff_plain;f=pkg%2FR%2Fz_util.R;fp=pkg%2FR%2Fz_util.R;h=996a5f82fe74431f9cb411abc020195981d949c6;hb=ae507b4e6ed3ae1f50a24a7c43d77be9501d791c;hp=0000000000000000000000000000000000000000;hpb=57dbbab63a48b42f85e0a711640e1dd10b58f18a;p=aggexp.git diff --git a/pkg/R/z_util.R b/pkg/R/z_util.R new file mode 100644 index 0000000..996a5f8 --- /dev/null +++ b/pkg/R/z_util.R @@ -0,0 +1,49 @@ +#Maximum size of stored data to predict next PM10 +MAX_HISTORY = 10000 + +#Default lambda value (when too few data) +LAMBDA = 2. + +#Maximum error to keep a line in (incremental) data +MAX_ERROR = 20. + +#Turn a "vector" into 1D matrix if needed (because R auto cast 1D matrices) +matricize = function(x) +{ + if (!is.null(dim(x))) + return (as.matrix(x)) + return (t(as.matrix(x))) +} + +#Moore-Penrose pseudo inverse +mpPsInv = function(M) +{ + epsilon = 1e-10 + s = svd(M) + sd = s$d ; sd[sd < epsilon] = Inf + sd = diag(1.0 / sd, min(nrow(M),ncol(M))) + return (s$v %*% sd %*% t(s$u)) +} + +#Heuristic for k in knn algorithms +getKnn = function(n) +{ + return ( max(1, min(50, ceiling(n^(2./3.)))) ) +} + +#Minimize lambda*||u||^2 + ||Xu - Y||^2 +ridgeSolve = function(X, Y, lambda) +{ + s = svd(X) + deltaDiag = s$d / (s$d^2 + lambda) + deltaDiag[!is.finite(deltaDiag)] = 0.0 + if (length(deltaDiag) > 1) + deltaDiag = diag(deltaDiag) + return (s$v %*% deltaDiag %*% t(s$u) %*% Y) +} + +#Return the indices (of rows, by default) without any NA +getNoNAindices = function(M, margin=1) +{ + return (apply(M, margin, function(z)(!any(is.na(z))))) +}