# Step 2: Generate Data

# Prep ----
# Load packages
library(magrittr)
library(dplyr)
library(foreach)
library(parallel)
library(doRNG)

# Load simulation conditions 
sim_conditions <- readRDS("data/simconditions.RDS")

# Set up functions to generate data

# Function to turn continuous into ordinal data with varying thresholds
makeord <- function(Data, vars = NULL, tau = NULL, prop = 0, num_ind = NULL){
  if(length(vars) == 0) vars <- 1:NCOL(Data)
  
  if(prop == 0 | prop == 1) {
    Data[,vars] <- apply(Data[,vars,drop = FALSE], 2,
                         function(x){
                           brks <- c(min(x, na.rm = TRUE) - .1,
                                     tau[[1]],
                                     max(x, na.rm = TRUE) + .1)
                           cut(x, breaks = brks, labels = FALSE)
                           #cut(scale(x,center = FALSE), breaks = brks, labels = FALSE)
                         })
  } else {
    if(prop == .33) {
      if(num_ind == 3) {
        vars1 <- c(1,4,7)
        vars2 <- vars[-c(vars1)]
      } else if (num_ind == 6) {
        vars1 <- c(1,2,7,8,13,14)
        vars2 <- vars[-c(vars1)]
      }
    } else if (prop == .66) {
      if(num_ind == 3) {
        vars1 <- c(1,2,4,5,7.8)
        vars2 <- vars[-c(vars1)]
      } else if (num_ind == 6) {
        vars1 <- c(1,2,3,4,7,8,9,10,13,14,15,16)
        vars2 <- vars[-c(vars1)]
      }
    }

    Data[,vars1] <- apply(Data[,vars1,drop = FALSE], 2,
                          function(x){
                            brks <- c(min(x, na.rm = TRUE) - .1,
                                      tau[[1]],
                                      max(x, na.rm = TRUE) + .1)
                            cut(x, breaks = brks, labels = FALSE)
                            #cut(scale(x,center = FALSE), breaks = brks, labels = FALSE)
                          })
    
    Data[,vars2] <- apply(Data[,vars2,drop = FALSE], 2,
                          function(x){
                            brks <- c(min(x, na.rm = TRUE) - .1,
                                      tau[[2]],
                                      max(x, na.rm = TRUE) + .1)
                            cut(x, breaks = brks, labels = FALSE)
                            #cut(scale(x,center = FALSE), breaks = brks, labels = FALSE)
                          })
    
  }
  
  
  Data
}

# Function to ensure all response options present in data
f1 <- function(x) {length(unique(x))}

# Function to go through simulation conditions
simData <- function(samplesize, resp_option, resp_pattern, num_ind, prop_sparse, loading_mag, condition_number, iter) {
  indrange <- list(1:num_ind, (num_ind+1):(num_ind*2), (num_ind*2+1):(num_ind*3))
  totind <- num_ind*3
  
  # Set up pop model
  f1_l <- paste0(loading_mag, "*x", indrange[[1]], collapse = "+")
  f2_l <- paste0(loading_mag, "*x", indrange[[2]], collapse = "+")
  f3_l <- paste0(loading_mag, "*x", indrange[[3]], collapse = "+")
  
  #Set up residuals
  resid <- 1 - loading_mag^2
  resids <- paste0("x", 1:totind, "~~", resid, "*x", 1:totind, collapse = "\n")
  
  pop.mod <- paste0('f1 =~ ',f1_l,'
                    f2 =~ ', f2_l,'
                    f3 =~ ', f3_l,'
                    
                    f1 ~~ 1*f1
                    f2 ~~ 1*f2
                    f3 ~~ 1*f3
                    
                    f1 ~~ .3*f2
                    f2 ~~ .3*f3
                    f1 ~~ .3*f3\n',
                    resids,
                    collapse = "")  
  
  good <- FALSE
  
  while(good == FALSE) {
    ## continuous data:
    dcont <- lavaan::simulateData(pop.mod, model.type = "cfa", 
                                  sample.nobs = samplesize, std.lv = T)
    
    popsd <- 1
    
    # Set up thresholds
    if(resp_option == 2) {
      if(resp_pattern == "balanced") {
        qtile <- 0.5
        thresholds <- list(qnorm(qtile, sd = popsd))
      } else if (resp_pattern == "sparse1") {
        qtile <- 0.96
        thresholds <- list(qnorm(qtile, sd = popsd),
                           qnorm(0.5, sd = popsd))
      } 
    } else if(resp_option == 3) {
      if(resp_pattern == "balanced") {
        qtile = c(0.25, .75)
        thresholds <- list(qnorm(qtile, sd = popsd))
      } else if (resp_pattern == "sparse1") {
        qtile = c(.90, .96)
        thresholds <- list(qnorm(qtile, sd = popsd),
                           qnorm(c(0.25, .75), sd = popsd))
      } else if (resp_pattern == "sparse2") {
        qtile = c(.05, .95)
        thresholds <- list(qnorm(qtile, sd = popsd),
                           qnorm(c(0.25, .75), sd = popsd))
      } 
    } else if(resp_option == 4) {
      if(resp_pattern == "balanced") {
        qtile = c(0.2, .5, .8)
        thresholds <-  list(qnorm(qtile, sd = popsd))
      } else if (resp_pattern == "sparse1") {
        qtile = c(.80, .90, .96)
        thresholds <- list(qnorm(qtile, sd = popsd),
                           qnorm(c(0.2, .5, .8), sd = popsd))
      } else if (resp_pattern == "sparse2") {
        qtile = c(.05, .5, .95)
        thresholds <- list(qnorm(qtile, sd = popsd),
                           qnorm(c(0.2, .5, .8), sd = popsd))
      } 
    } else if(resp_option == 5) {
      if(resp_pattern == "balanced") {
        qtile = c(.10, .32, .68, .90)
        thresholds <-  list(qnorm(qtile, sd = popsd))
      } else if (resp_pattern == "sparse1") {
        qtile = c(.70, .80, .90, .96)
        thresholds <- list(qnorm(qtile, sd = popsd),
                           qnorm(c(.10, .32, .68, .90), sd = popsd))
      } else if (resp_pattern == "sparse2") {
        qtile = c(.05, .30, .70, .95)
        thresholds <- list(qnorm(qtile, sd = popsd),
                           qnorm(c(.10, .32, .68, .90), sd = popsd))
      } 
    }
    
    dord <- makeord(dcont, tau = thresholds, prop = prop_sparse, num_ind = num_ind)
    
    allcat <- sapply(dord, f1)
    
    cat <- length(qtile) + 1
    
    allcat <- sum(allcat == cat) == ncol(dord)
    
    if(allcat == TRUE) good <- TRUE
  }
  
  dord$condition_number <- condition_number
  dord$iter <- iter
  
  dord
}

# Generate Data ----

# number of iterations per condition
niter <- 500

# expand conditions x niter
sim_iterations <- data.frame(condition_number = rep(1:nrow(sim_conditions), each = niter),
                             iter = rep(1:niter, nrow(sim_conditions)))

# get cores for parallel processing (specified in simulation_execute.R)
n.cores <- n.cores

# create cluster and  make foreach aware of cluster and RNG
my.cluster <- parallel::makeCluster(n.cores)
doParallel::registerDoParallel(cl = my.cluster)
registerDoRNG(seed = 24062016)

# generate data
sim_data <- foreach(i = 1:nrow(sim_iterations)) %dopar% {
  sim_condition <- sim_iterations[i,1]
                     return(simData(sim_conditions[sim_condition,1], sim_conditions[sim_condition,2], 
                                    sim_conditions[sim_condition,3], sim_conditions[sim_condition,4], 
                                    sim_conditions[sim_condition,5], sim_conditions[sim_condition,6],
                                    condition_number = sim_condition, iter = sim_iterations[i,2]))
                   }

# stop cluster
doParallel::stopImplicitCluster()
stopCluster(my.cluster)

# save data
saveRDS(sim_data, file = "data/simdata.RDS")






