Skip to content

Commit

Permalink
Enhance test coverage (#18)
Browse files Browse the repository at this point in the history
* CHG: rename function by camel style; add a new test case for spatpca

* CHG: add essential modified files

* CHG: add new helper for validating input data; add test cases; modify the manual

* CHG: add a new helper function - detrend; add more test cases

* CHG: add helper functions for tuning parameter settings; add test cases for them

* CHG: add an auxillary function for selecting K with spatpacCV; add test cases

* CHG: add a helper function for setting gamma; add test cases

* CHG: add a missing maunaul; modify the expected values for testing on higher R version

* CHG: correct the expected values for testing spatpca

* CHG: correct the expected values for testing spatpca

* CHG: correct the expected values for testing spatpca

* CHG: correct the expected values for testing spatpca

* CHG: correct the expected values for testing spatpca

* CHG: correct the expected values for testing spatpca

* CHG: add test cases for spatpca and thin-plate splines
  • Loading branch information
egpivo authored Jan 30, 2021
1 parent c253ea0 commit d6f5a64
Show file tree
Hide file tree
Showing 19 changed files with 546 additions and 141 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
.travis.yml
^\.github$
^codecov\.yml$
^.*\.gcno$
149 changes: 63 additions & 86 deletions R/SpatPCA.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,39 @@
# This file was generated by Rcpp::compileAttributes
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#'
#' Internal function: M-fold CV of SpatPCA with selecting K
#'
#' @keywords internal
#'
#' @param x Location matrix
#' @param Y Data matrix
#' @param M The number of folds for cross validation; default is 5.
#' @param tau1 Vector of a nonnegative smoothness parameter sequence. If NULL, 10 tau1 values in a range are used.
#' @param tau2 Vector of a nonnegative sparseness parameter sequence. If NULL, none of tau2 is used.
#' @param gamma Vector of a nonnegative hyper parameter sequence for tuning eigenvalues. If NULL, 10 values in a range are used.
#' @param shuffle_split Vector of indeces for random splitting Y into training and test sets
#' @param maxit Maximum number of iterations. Default value is 100.
#' @param thr Threshold for convergence. Default value is \eqn{10^{-4}}.
#' @param l2 Vector of a nonnegative tuning parameter sequence for ADMM use
#' @return A list of objects including
#' \item{cv_result}{A list of resultant objects produced by `spatpcaCV`}
#' \item{selected_K}{Selected K based on CV.}
#'
spatpcaCVWithSelectingK <- function(x, Y, M, tau1, tau2, gamma, shuffle_split, maxit, thr, l2) {
upper_bound <- fetchUpperBoundNumberEigenfunctions(Y, M)
prev_cv_selection <- spatpcaCV(x, Y, M, 1, tau1, tau2, gamma, shuffle_split, maxit, thr, l2)

for (k in 2:upper_bound) {
cv_selection <- spatpcaCV(x, Y, M, k, tau1, tau2, gamma, shuffle_split, maxit, thr, l2)
difference <- prev_cv_selection$selected_gamma - cv_selection$selected_gamma
prev_cv_selection <- cv_selection
if (difference <= 0 || abs(difference) <= 1e-8) {
break
}
}
return(list(cv_result = cv_selection, selected_K = k - 1))
}

#'
#' @title Regularized PCA for spatial data
#'
Expand All @@ -12,8 +46,8 @@
#' @param tau1 Optional user-supplied numeric vector of a nonnegative smoothness parameter sequence. If NULL, 10 tau1 values in a range are used.
#' @param tau2 Optional user-supplied numeric vector of a nonnegative sparseness parameter sequence. If NULL, none of tau2 is used.
#' @param gamma Optional user-supplied numeric vector of a nonnegative tuning parameter sequence. If NULL, 10 values in a range are used.
#' @param M Optional number of folds; default is 5.
#' @param is_Y_centered If TRUE, center the columns of Y. Default is FALSE.
#' @param M Optional number of folds for cross validation; default is 5.
#' @param is_Y_detrended If TRUE, center the columns of Y. Default is FALSE.
#' @param maxit Maximum number of iterations. Default value is 100.
#' @param thr Threshold for convergence. Default value is \eqn{10^{-4}}.
#' @param num_cores Number of cores used to parallel computing. Default value is NULL (See `RcppParallel::defaultNumThreads()`)
Expand All @@ -32,7 +66,7 @@
#' \item{tau1}{Sequence of tau1-values used in the process.}
#' \item{tau2}{Sequence of tau2-values used in the process.}
#' \item{gamma}{Sequence of gamma-values used in the process.}
#' \item{centered_Y}{If is_Y_centered is TRUE, centered_Y is the centered Y; else, centered_Y is equal to Y.}
#' \item{detrended_Y}{If is_Y_detrended is TRUE, detrended_Y means Y is detrended; else, detrended_Y is equal to Y.}
#' \item{scaled_x}{Input location matrix. Only scale when it is one-dimensional}
#'
#' @details An ADMM form of the proposed objective function is written as
Expand Down Expand Up @@ -81,7 +115,7 @@
#' K = cv$selected_K,
#' tau1 = cv$selected_tau1,
#' tau2 = cv$selected_tau2)
#' predicted_eof <- predict_eigenfunction(eof, xx_new)
#' predicted_eof <- predictEigenfunction(eof, xx_new)
#' quilt.plot(xx_new,
#' predicted_eof[,1],
#' nx = new_p,
Expand Down Expand Up @@ -115,92 +149,37 @@ spatpca <- function(x,
tau1 = NULL,
tau2 = NULL,
gamma = NULL,
is_Y_centered = FALSE,
is_Y_detrended = FALSE,
maxit = 100,
thr = 1e-04,
num_cores = NULL) {
call2 <- match.call()
checkInputData(Y, x, M)
setCores(num_cores)

# Transform main objects
x <- as.matrix(x)
Y <- detrend(Y, is_Y_detrended)
K <- setNumberEigenfunctions(K, Y, M)
p <- ncol(Y)
n <- nrow(Y)
if (p < 3) {
stop("Number of locations must be larger than 2.")
}
if (nrow(x) != p) {
stop("The number of rows of x should be equal to the number of columns of Y.")
}
if (ncol(x) > 3) {
stop("Dimension of locations must be less than 4.")
}
if (M >= n) {
stop("Number of folds must be less than sample size.")
}
if (!is.null(K)) {
if (K > min(floor(n - n / M), p)) {
K <- min(floor(n - n / M), p)
warning("K must be smaller than min(floor(n - n/M), p).")
}
}
# Remove the mean trend of Y
if (is_Y_centered) {
Y <- Y - apply(Y, 2, "mean")
}
# Initialize candidates of tuning parameters
if (is.null(tau2)) {
tau2 <- 0
num_tau2 <- 1
} else {
num_tau2 <- length(tau2)
}
if (is.null(tau1)) {
num_tau1 <- 11
tau1 <- c(0, exp(seq(log(1e-6), 0, length = num_tau1 - 1)))
} else {
num_tau1 <- length(tau1)
}

if (M < 2 && (num_tau1 > 1 || num_tau2 > 1)) {
num_tau1 <- num_tau2 <- 1
warning("Only produce the result based on the largest tau1 and largest tau2.")
}

stra <- sample(rep(1:M, length.out = nrow(Y)))
if (is.null(gamma)) {
num_gamma <- 11
svd_Y_partial <- svd(Y[which(stra != 1), ])
max_gamma <- svd_Y_partial$d[1]^2 / nrow(Y[which(stra != 1), ])
log_scale_candidates <-
seq(log(max_gamma / 1e4), log(max_gamma), length = num_gamma - 1)
gamma <- c(0, log_scale_candidates)
}

scaled_x <- scaleLocation(x)
shuffle_split <- sample(rep(1:M, length.out = nrow(Y)))

# Initialize candidates of tuning parameters
tau1 <- setTau1(tau1, M)
tau2 <- setTau2(tau2, M)
l2 <- setL2(tau2)
gamma <- setGamma(gamma, Y[which(shuffle_split != 1), ])

if (num_tau2 == 1 && tau2 > 0) {
l2 <- ifelse(tau2 != 0,
c(0, exp(seq(log(tau2 / 1e4), log(tau2), length = 10))),
tau2
)
} else {
l2 <- 1
}

if (is_K_selected) {
cv_result <- spatpcaCV(scaled_x, Y, M, 1, tau1, tau2, gamma, stra, maxit, thr, l2)
for (k in 2:min(floor(n - n / M), p)) {
cv_new_result <- spatpcaCV(scaled_x, Y, M, k, tau1, tau2, gamma, stra, maxit, thr, l2)
difference <- cv_result$selected_gamma - cv_new_result$selected_gamma
if (difference <= 0 || abs(difference) <= 1e-8) {
break
}
cv_result <- cv_new_result
}
selected_K <- k - 1
cv_with_selected_k <- spatpcaCVWithSelectingK(scaled_x, Y, M, tau1, tau2, gamma, shuffle_split, maxit, thr, l2)
cv_result <- cv_with_selected_k$cv_result
selected_K <- cv_with_selected_k$selected_K
}
else {
cv_result <- spatpcaCV(scaled_x, Y, M, K, tau1, tau2, gamma, stra, maxit, thr, l2)
cv_result <- spatpcaCV(scaled_x, Y, M, K, tau1, tau2, gamma, shuffle_split, maxit, thr, l2)
selected_K <- K
}

Expand All @@ -225,14 +204,13 @@ spatpca <- function(x,
tau1 = tau1,
tau2 = tau2,
gamma = gamma,
centered_Y = Y,
detrended_Y = Y,
scaled_x = scaled_x
)
class(obj.cv) <- "spatpca"
return(obj.cv)
}


#' @title Spatial dominant patterns on new locations
#'
#' @description Estimate K eigenfunctions on new locations
Expand All @@ -252,10 +230,10 @@ spatpca <- function(x,
#' Y_1Drm <- Y_1D[, -rm_loc]
#' x_1Dnew <- as.matrix(seq(-5, 5, length = 20))
#' cv_1D <- spatpca(x = x_1Drm, Y = Y_1Drm, tau2 = 1:100, num_cores = 2)
#' dominant_patterns <- predict_eigenfunction(cv_1D, x_new = x_1Dnew)
#' dominant_patterns <- predictEigenfunction(cv_1D, x_new = x_1Dnew)
#'
predict_eigenfunction <- function(spatpca_object, x_new) {
check_new_locations_for_spatpca_object(spatpca_object, x_new)
predictEigenfunction <- function(spatpca_object, x_new) {
checkNewLocationsForSpatpcaObject(spatpca_object, x_new)
scaled_x_new <- scaleLocation(x_new)

predicted_eigenfn <- eigenFunction(
Expand Down Expand Up @@ -286,25 +264,24 @@ predict_eigenfunction <- function(spatpca_object, x_new) {
#' Y_1Drm <- Y_1D[, -rm_loc]
#' x_1Dnew <- as.matrix(seq(-5, 5, length = 20))
#' cv_1D <- spatpca(x = x_1Drm, Y = Y_1Drm, tau2 = 1:100, num_cores = 2)
#' predictions <- predict_on_new_locations(cv_1D, x_new = x_1Dnew)
#' predictions <- predict(cv_1D, x_new = x_1Dnew)
#'
predict_on_new_locations <- function(spatpca_object, x_new, eigen_patterns_on_new_site = NULL) {
check_new_locations_for_spatpca_object(spatpca_object, x_new)
predict <- function(spatpca_object, x_new, eigen_patterns_on_new_site = NULL) {
checkNewLocationsForSpatpcaObject(spatpca_object, x_new)

if (is.null(eigen_patterns_on_new_site)) {
eigen_patterns_on_new_site <- predict_eigenfunction(spatpca_object, x_new)
eigen_patterns_on_new_site <- predictEigenfunction(spatpca_object, x_new)
}

spatial_prediction <- spatialPrediction(
spatpca_object$eigenfn,
spatpca_object$centered_Y,
spatpca_object$detrended_Y,
spatpca_object$selected_gamma,
eigen_patterns_on_new_site
)
return(spatial_prediction$predict)
}


#'
#' @title Display the cross-validation results
#'
Expand Down
Loading

0 comments on commit d6f5a64

Please sign in to comment.