This is a README file giving instructions on running the code to the manuscript “Common Reducing Subspace Model and Network Alternation Analysis” by Wang, Zhang and Li (2018). It gives examples on how to run Common Reducing Subspace model (CRS) on estimation of (differences between) covariance and precision matrices.

Covariance Matrices estimation

rm(list = ls())

library(MASS)
#install.packages("ManifoldOptim")
library(ManifoldOptim)
mod <- Module("ManifoldOptim_module", PACKAGE = "ManifoldOptim")
source("sg1D.R")
source("sgFG.R")

K <- 2   # 2 groups
r <- 100  # covariance matrix dimension
n_k <- rep(40, K)  # sample size for each group
d <- 100  # sample size for generate covariance matrix 
u <- 5  #rank and envelope dimension

set.seed(999)

# Model I data generation
Ok <- matrix(runif(u^2), u, u)
Ok <- qr.Q(qr(Ok))
O <- matrix(runif((r-u)^2), r-u, r-u) 
D <- diag(exp(seq(-2, 2, length.out=(r-u))))
Omega0 <- O %*% D %*% t(O)
Omegak <- array(0, c(u, u, K))
for(j in 1:K){
  Dk <- diag(j*(1:u))
  Omegak[, , j] <- Ok %*% Dk %*% t(Ok)
}

Gamma <- matrix(runif(r*u), r, u)
Gamma <- qr.Q(qr(Gamma))
Gamma0 <- qr.Q(qr(Gamma),complete=T)[,(u+1):r]

delta <- array(0, c(r, r, K))
for (j in 1:K) {
  delta[, , j] <- Gamma %*% Omegak[, , j] %*% t(Gamma) + Gamma0 %*% Omega0 %*% t(Gamma0)
  delta[, , j] <- delta[, , j]+0.001*diag(r)
}

D_cov <- array(0, c(r, r, sum(n_k)))
for (j in 1:K) {
  for (m in 1:n_k[j]) {  
    X <- mvrnorm(d, mu = rep(0, r), Sigma = delta[, , j])
    D_cov[, , m + sum(n_k[0:(j-1)])] <- (t(X) %*% X)/d
  }  
}

# define \barD_k, \barD
Dbar_k <- array(0, c(r, r, K))
n <- sum(n_k)
for (j in 1:K) {
  tmp3 <- D_cov[, , (sum(n_k[0:(j-1)]) + 1):sum(n_k[0:j])]
  Dbar_k[, , j] <- apply(tmp3, c(1, 2), sum)/n_k[j]
}
tmp5 <- (n_k/n)*Dbar_k[, , 1:K]
Dbar <- apply(tmp5, c(1, 2), sum)

# use 1D algorithm as initial value for FG algorithm
X1_init <- gmanifold1D(Dbar, Dbar_k, n_k, d = u)
X1 <- gmanifoldFG(Dbar, Dbar_k, n_k, d=u, X1_init)

# use random generated basis as initial value for FG algorithm
X2_init <- matrix(runif(r*u), r, u)
X2_init <- qr.Q(qr(X2_init))
X2 <- gmanifoldFG(Dbar, Dbar_k, n_k, d=u, X2_init)


X10 <- qr.Q(qr(X1),complete=T)[, (u+1):r]
X20 <- qr.Q(qr(X2),complete=T)[, (u+1):r]
dlt_k_init <- dlt_k_rdm <- array(0, c(r, r, K))
for (k in 1:K) {
  dlt_k_init[, , k] <- X1 %*% t(X1) %*% Dbar_k[ , , k] %*% X1 %*% t(X1) +
    X10 %*% t(X10) %*% Dbar %*% X10 %*% t(X10)
  dlt_k_rdm[, , k] <- X2 %*% t(X2) %*% Dbar_k[ , , k] %*% X2 %*% t(X2) +
    X20 %*% t(X20) %*% Dbar %*% X20 %*% t(X20)
}

# estimation error of CRS estimator
(mu_env_init <- norm(((delta[, , 1] - delta[, , 2])-(dlt_k_init[, , 1]-dlt_k_init[, , 2])), type = "F"))
## [1] 1.429281
(mu_env_rdm  <- norm(((delta[, , 1] - delta[, , 2])-(dlt_k_rdm[, , 1]-dlt_k_rdm[, , 2])), type = "F"))
## [1] 6.294729
# estimation error of sample estimator
(mu_smp <- norm(((delta[, , 1] - delta[, , 2])-(Dbar_k[, , 1]-Dbar_k[, , 2])), type = "F"))
## [1] 79.65839
# subspace estimation error
(mu_init <- norm((X1 %*% t(X1)-Gamma %*% t(Gamma)), type="F"))
## [1] 0.3805161
(mu_rdm <- norm((X2 %*% t(X2)-Gamma %*% t(Gamma)), type="F"))
## [1] 2.452408

Precision Matrices estimation

rm(list=ls())
library(MASS)
library(ManifoldOptim)
mod <- Module("ManifoldOptim_module", PACKAGE = "ManifoldOptim")
library("scio")
source("sg1D.R")
source("sgFG.R")

K <- 2   # 2 groups
r <- 100  # covariance matrix dimension
n_k <- rep(40, K)  # sample size for each group
n <- sum(n_k)
d <- 100  # sample size for generate covariance matrix 
u <- 5 #rank and envelope dimension

# define loss function
Loss <- function(Sig, Siginv) {
  l <- sum(diag(Sig %*% Siginv))-log(det(Siginv))
  l
}

set.seed(999)
if(r==100){df <- 30} else{df <- 60};delta <- 0.5

# data generation
idx <- sample(1:r, df)
Delta_inv <- array(0, c(r, r, K))
B <- array(0, c(r, r, K))
Gamma.tmp <- matrix(runif(df*u), df, u)
Gamma.tmp <- qr.Q(qr(Gamma.tmp))

Gamma <- matrix(0, r, u)
Gamma[idx, ] <- Gamma.tmp; Gamma0 <- qr.Q(qr(Gamma),complete=T)[,(u+1):r]

Omegak <- array(0, c(u, u, K))
Ok <- matrix(runif(u^2), u, u)
Ok <- qr.Q(qr(Ok))

for(k in 1:K){
  
  Dk <- diag(0.1*k*seq(1, u))
  Omegak[, , k] <- Ok %*% Dk %*% t(Ok)
  B[, , k] <-  Gamma %*% Omegak[, , k] %*% t(Gamma)
  
  Delta_inv[, , k] <- B[, , k]+delta*diag(r)
  
}

D_cov <- Dte_cov <- array(0, c(r, r, sum(n_k)))
for (j in 1:K) {
  for (m in 1:n_k[j]) {  
    X_all <- mvrnorm(2*d, mu = rep(0, r), Sigma = solve(Delta_inv[, , j]))
    X <- X_all[1:d, ]; X_te <- X_all[(d+1):(2*d), ]
    D_cov[, , m + sum(n_k[0:(j-1)])] <- (t(X) %*% X)/d
    Dte_cov[, , m + sum(n_k[0:(j-1)])] <- (t(X_te) %*% X_te)/d
  }  
}

# define \barD_k, \barD
Dbar_k <- Dbarte_k <- array(0, c(r, r, K))
n <- sum(n_k)
for (j in 1:K) {
  tmp3 <- D_cov[, , (sum(n_k[0:(j-1)]) + 1):sum(n_k[0:j])]
  tmp4 <- Dte_cov[, , (sum(n_k[0:(j-1)]) + 1):sum(n_k[0:j])]
  Dbar_k[, , j] <- apply(tmp3, c(1, 2), sum)/n_k[j]
  Dbarte_k[, , j] <- apply(tmp4, c(1, 2), sum)/n_k[j]
}

tmp5 <- (n_k/n)*Dbar_k[, , 1:K]
Dbar <- apply(tmp5, c(1, 2), sum)
X1_hat <- gmanifold1D(Dbar, Dbar_k, n_k, d = u)
X1 <- gmanifoldFG(Dbar, Dbar_k, n_k, d = u, X1_hat)


X10 <- qr.Q(qr(X1),complete=T)[, (u+1):r]
dlt_k <- array(0, c(r, r, K))
for (k in 1:K) {
  dlt_k[, , k] <- X1 %*% t(X1) %*% Dbar_k[ , , k] %*% X1 %*% t(X1) +
    X10 %*% t(X10) %*% Dbar %*% X10 %*% t(X10)
}

# apply scio to get sparse estimation
lbd1 <- seq(0.05, 0.001, length.out=50)
lbd2 <- seq(0.05, 0.001, length.out = 50)
smp1_scio <- smp2_scio <- env1_scio <- env2_scio <- array(0, c(r, r, 50))
for(j in 1:50) {
  smp1_scio[, , j] <- scio(Dbar_k[, , 1], lambda = lbd1[j])$w
  smp2_scio[, , j] <- scio(Dbar_k[, , 2], lambda = lbd1[j])$w
  env1_scio[, , j] <- scio(dlt_k[, , 1], lambda = lbd2[j])$w
  env2_scio[, , j] <- scio(dlt_k[, , 2], lambda = lbd2[j])$w
}
smp1 <- smp2 <- env1 <- env2 <- rep(0, 50)
for(i in 1:50) {
  smp1[i] <- Loss(Dbarte_k[, , 1], smp1_scio[, , i])
  smp2[i] <- Loss(Dbarte_k[, , 2], smp2_scio[, , i])
  env1[i] <- Loss(Dbarte_k[, , 1], env1_scio[, , i])
  env2[i] <- Loss(Dbarte_k[, , 2], env2_scio[, , i])
}

idxm <- apply(cbind(smp1, smp2, env1, env2), 2, which.min)

fsmp_scio <- fenv_scio <- array(NA, c(r, r, K))
fsmp_scio[, , 1] <- smp1_scio[, , idxm[1]];
fsmp_scio[, , 2] <- smp2_scio[, , idxm[2]];
fenv_scio[, , 1] <- env1_scio[, , idxm[3]];
fenv_scio[, , 2] <- env2_scio[, , idxm[4]];

# sparse estimaton error

(mu_smp_scio <- norm(((Delta_inv[, , 1]-Delta_inv[, , 2])-(fsmp_scio[, , 1]-fsmp_scio[, , 2])), type = "F"))
## [1] 0.4484844
(mu_env_scio <- norm(((Delta_inv[, , 1]-Delta_inv[, , 2])-(fenv_scio[, , 1]-fenv_scio[, , 2])), type = "F"))
## [1] 0.2112269
# non-sparse estimatior error
(mu_env_inv <- norm(((Delta_inv[, , 1]-Delta_inv[, , 2])-(solve(dlt_k[, , 1])- solve(dlt_k[, , 2]))), type = "F"))
## [1] 0.3005224
(mu_smp_inv <- norm(((Delta_inv[, , 1]-Delta_inv[, , 2])-(solve(Dbar_k[, , 1])- solve(Dbar_k[, , 2]))), type = "F"))
## [1] 1.223634
# subspace estimation error
(norm((X1 %*% t(X1)-Gamma %*% t(Gamma)), type = "F"))
## [1] 1.478918
zero_stp <- sum((fsmp_scio[, , 1]-fsmp_scio[, , 2])==0 & (Delta_inv[, , 1]-Delta_inv[, , 2])==0)
zero_stn <- sum((fsmp_scio[, , 1]-fsmp_scio[, , 2])!=0 & (Delta_inv[, , 1]-Delta_inv[, , 2])!=0)
zero_sfp <-  sum((fsmp_scio[, , 1]-fsmp_scio[, , 2])==0 & (Delta_inv[, , 1]-Delta_inv[, , 2])!=0)
zero_sfn <-  sum((fsmp_scio[, , 1]-fsmp_scio[, , 2])!=0 & (Delta_inv[, , 1]-Delta_inv[, , 2])==0)

# specificity and sensitivity of sample-SCIO
(smp_spf <- zero_stn/(zero_stn + zero_sfp))
## [1] 0.78
(smp_sst <- zero_stp/(zero_stp + zero_sfn))
## [1] 0.6923077
zero_etp <-  sum((fenv_scio[, , 1]-fenv_scio[, , 2])==0 & (Delta_inv[, , 1]-Delta_inv[, , 2])==0)
zero_etn <-  sum((fenv_scio[, , 1]-fenv_scio[, , 2])!=0 & (Delta_inv[, , 1]-Delta_inv[, , 2])!=0)
zero_efp <-  sum((fenv_scio[, , 1]-fenv_scio[, , 2])==0 & (Delta_inv[, , 1]-Delta_inv[, , 2])!=0)
zero_efn <-  sum((fenv_scio[, , 1]-fenv_scio[, , 2])!=0 & (Delta_inv[, , 1]-Delta_inv[, , 2])==0)

# specificity and sensitivity of CRS-SCIO
(env_spf <- zero_etn/(zero_etn + zero_efp))
## [1] 0.8555556
(env_sst <- zero_etp/(zero_etp + zero_efn))
## [1] 0.7362637