% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/meerva.sim.tools_211026.R
\name{meerva.sim.block}
\alias{meerva.sim.block}
\title{Simulation of meerva used to Analyze Data with Measurement Error}
\usage{
meerva.sim.block(
  simfam = "gaussian",
  nsims = 100,
  seed = 0,
  n = 4000,
  m = 400,
  beta = c(-0.5, 0.5, 0.2, 1, 0.5),
  alpha1 = c(-0.05, 0.1, 0.05, 0.1),
  alpha2 = c(0.98, 0.98, 0.95, 0.95),
  bx3s1 = c(0.05, 0, 0, NA, NA),
  bx3s2 = c(0.95, NA, NA),
  bx12 = c(0.25, 0.15),
  sd = 1,
  fewer = 0,
  mncor = 0,
  sigma = NULL,
  vmethod = NA,
  jksize = 0,
  compare = 1,
  diffam = NA,
  simtime = 1
)
}
\arguments{
\item{simfam}{The family for the underlying regression model 
to be simulated, amongst "binomial", "gaussian" and "Cox".}

\item{nsims}{Number of datasets to be simulated}

\item{seed}{A seed for the R random number generator.  The default is 0 in which case the 
program random selects and records the seed so one ca replicate simulation studies.}

\item{n}{The full dataset size.}

\item{m}{The validation subsample size (m < n).}

\item{beta}{A vector of length 5 for the true regression parameter for the linear 
regression model with 5 predictors including the intercept. For the Cox model
beta[0] is not estimated but determines a basal event rate.}

\item{alpha1}{A vector of length four determining the measurement error or 
misclassification probabilities for the outcome surrogate ys.  
Usage is slightly different 
for the different  simfam  values "gaussian", "binomial" and "Cox".  See the
help pages for meerva.sim.brn, meerva.sim.cox and meerva.sim.nrm 
for clarification.}

\item{alpha2}{A vector describing the correct classification probabilities for x1s, 
the surrogate for x1.  
Usage is slighlty different 
for the different  simfam  values "gaussian", "binomial" and "Cox".  See the
help pages for meerva.sim.brn, meerva.sim.cox and meerva.sim.nrm 
for clarification.}

\item{bx3s1}{A vector of length 5 determining the relation between the reference variable x3 
and the mean and SD of the surrogate x3s1.  
Roughly, bx3s1[1] determines a minimal measurement error SD,   
conditional on x3 bx3s1[2] determines a rate of increase in SD for values of x3 greater than bx3s1[3], 
bx3s1[4] is a value above which the relation between x3 and the mean of x3s is determined by the power bx3s1[5].
The mean values for x3s1 are rescaled to have mean 0 and variance 1.}

\item{bx3s2}{A vector of length 3 determining scale in x3s and potentially x3s2, 
a second surrogate for xs.  
Roughly, bx3s2[1] takes the previously determined mean for x3s1 
using bx3s1 and multiples by bx3s2[1].
Conditional on x3, x3s2 has mean  bx3s2[2] * x3 and variance bx3s2[3].}

\item{bx12}{Bernoulli probabilities for reference variables x1 and x2.
A vector of length 2, default is c(0.25, 0.15).  If mncor (see below)
is positive the correlations between these Bernoulli and continuous
predictors remains positive.}

\item{sd}{In case of simfam == "gaussain" for linear regression, the sd of outcome y. 
In case of simfam == "Cox" for Cox PH regression, the multiplicative error 
term for ys, the surrogate for the time to event y 
(ys = log(sd * a (random variable) * y).}

\item{fewer}{When set to 1 x3s1 and x4 will be collapsed to one 
variable in the surrogate set.  This demonstrates how the method
works when there are fewer surrogate variables than reference 
variables.  If bx3s2 is specified such that there are 
duplicate surrogate variables for the reference variable x3 
then the number of surrogate predictors will not be reduced.}

\item{mncor}{Correlation of the columns in the x matrix before 
x1 and x2 are dichotomized to Bernoulli random variables. 
Default is 0.}

\item{sigma}{A 4x4 varaince-covarniance matrix for the 
multivarite normal dsitribution used to derive the 4 
reference predictor variables.}

\item{vmethod}{Method for robust estimation of variance covariance matrices needed 
for calculation of the augmented estimates (beta aug).
0 for JK or jackknife (slowest but more accurate), 
1 for IJK or the infinitesimal JK using the R default dfbeta's
2 for IJK using an alternate formula for the dfbeta, and 
3 for all three of these methods to be used
NA to let the program choose a stronger, faster method.}

\item{jksize}{leave out number for grouped jackknife used for non validation data 
The default is 0 where the program chooses jksize so that the number of leave out 
groups is about validation subsample size.}

\item{compare}{1 to compare gamma_val with gamma_ful (default) or 0 with gamma_non.}

\item{diffam}{inidcates a cutoff if for a "guassian" family in surrogate a "binomial" 
famliy is to be similated for the refernce model.  For example, the
surrogate outcome could be an estimated probit (or logit) based upon
a convolutional neural network. Normal data are simulated and
y_val is repalced by 1*(y_val >= diffam).  Default is NA and
the surrogate and reference have the same model form.  Only 
for use with vmethod of 0 or 1.}

\item{simtime}{1 (default) to print out time duirng simulalation to inform user how long the
simulation may run, 0 to not print out this information.}
}
\value{
meerva.sim.block returns a list object of class meerva.sim.  
The list will contain summary information used to simulate the data, and
for each data set simulated with measurement error,
the augmented estimates based upon the full data set accounting for measurement error,
estimates based upon reference variables from the validation subsample,
estimates based upon the surrogate variables from the whole sample,
along with estimated variances for these estimates.
These can be inspected by the user directly or by as shown in the example.
}
\description{
The meerva package is designed to analyze data with measurement error when 
 there is a validation subsample randomly selected from the full sample.  
 The method assumes surrogate variables measured with error are available 
 for the full sample, and reference variables measured with little or no 
 error are available for this randomly chosen subsample of the full sample.  
 Measurement errors may be differential or non differential, in any or all 
 predictors (simultaneously) as well as outcome.   

 The meerva.sim.block lets the user specify a model with measurement error, 
 and then simulate and analyze many datasets to 
 examine the model fits and judge how the method works.
 Data sets are generated according to 3 functions for simulating
 Cox PH, linear and logistic regression models.  These functions generate 
 data sets with 4 reference predictor variables and from 3 to 5 surrogate 
 predictor variables.  The user can 
 consider, program and simulate data sets of greater complexity
 but these examples provided with the package should serve as a 
 reasonable introduction to the robustness of the method.
}
\examples{
# Simulation study for logistic reg data with 
# differential misclassification in outcome 
# and a predictor and measurement error in 
# another predictor.  nsims=10 is as an 
# example only.  Try running nsims=100 or 
# 1000, but be prepared to wait a little while. 
sim.binomial = meerva.sim.block(simfam="binomial", 
    nsims=10, seed=0, n=4000, m=400, 
    beta = c(-0.5, 0.5, 0.2, 1, 0.5) , 
    alpha1 = c(0.95, 0.90, 0.90, 0.95), 
    alpha2 = c(0.98,0.98,0.95,0.95), 
    bx3s1=c(0.05, 0, 0, NA, NA) , 
    bx3s2 = c(NA,NA,NA) , 
    vmethod=2, jksize=0, compare=1) 
    
plot(sim.binomial) 
summary(sim.binomial, 1) 

# Simulation study for linear reg data.   
# For this example there are more surrogate 
# predictors than reference predictors.  
# nsims=10 is as an example only.  Try 
# running nsims=100 or 1000, but be 
# prepared to wait a little while.   
sim.gaussianm = meerva.sim.block(simfam="gaussian", 
    nsims=10, seed=0, n=4000, m=400, 
    beta = c(-0.5, 0.5, 0.2, 1, 0.5) , 
    alpha1 = c(-0.05, 0.1, 0.05, 0.1) , 
    alpha2 = c(0.98,0.94,0.95,0.95) , 
    bx3s1=c(0.05, 0, 0, NA, NA) , 
    bx3s2  = c(1.1,0.9,0.05) ,  
    sd=1, fewer=0, 
    vmethod=1, jksize=0, compare=1) 

plot(sim.gaussianm)
summary(sim.gaussianm)
 
# Simulation study for Cox PH data.  
# For this example there are fewer surrogates 
# than reference variables yet they provide 
# information to decrease the variance in the 
# augmented estimate.  nsims=10 is as an 
# example only.  Try running nsims=100 or 
# 1000, but be prepared to wait a little 
# while.   
sim.coxphf = meerva.sim.block(simfam="Cox", 
    nsims=10, seed=0, n=4000, m=400, 
    beta   = c(-0.5, 0.5, 0.2, 1, 0.5) , 
    alpha1 = c(0.95,0.90,0.90,0.95)  , 
    alpha2 = c(0.98,0.94,0.94,0.98) , 
    bx3s1  = c(0.05,0,0,NA,NA) , 
    bx3s2  = c(1.1, NA, NA) , 
    sd=0.1, fewer=1, 
    vmethod=1, jksize=0, compare=1 ) 

plot(sim.coxphf)
summary(sim.coxphf)
 
}
\seealso{
\code{\link{meerva.fit}} , \code{\link{meerva.sim.brn}} , \code{\link{meerva.sim.cox}} , \code{\link{meerva.sim.nrm}}
}
\author{
Walter Kremers (kremers.walter@mayo.edu)
}
