# Detritus Wiki

### Site Tools

This is an old revision of the document!

# Using ADNI data for Cusp model fitting

## Simple way

Auditory Verbal Learning Test fitted with Whole gray matter and covariables.

 # Estevez-Gonzalez, A., Kulisevsky, J., Boltes, A., Otermin, P., & Garcia-Sanchez, C. (2003).
# Rey verbal learning test is a useful tool for differential diagnosis in the preclinical phase
# of Alzheimer's disease: comparison with mild cognitive impairment and normal aging.
# International Journal of Geriatric Psychiatry. 18 (11), 1021.
library("ADNIMERGE")
library(cusp)
library(psych) #for composite scores
# Let's get the data
tmp_np <- merge(adas, neurobat, by=c("RID", "VISCODE") )
mt2fa <- merge(tmp_np, adnimerge, by=c("RID", "VISCODE") )
rm(tmp_np)
# Calculate the subject age at every point
mt2fa$vAGE = mt2fa$AGE + mt2fa$Years data <- data.frame(mt2fa$WholeBrain, mt2fa$ICV, mt2fa$vAGE, mt2fa$PTGENDER, mt2fa$PTEDUCAT, mt2fa$AVDEL30MIN, mt2fa$AVDELTOT)
datac <- data[complete.cases(data),]
datac$WB = datac$mt2fa.WholeBrain/datac$mt2fa.ICV fit_avd <- cusp(y ~ mt2fa.AVDEL30MIN, alpha ~ WB +mt2fa.vAGE + mt2fa.PTGENDER +mt2fa.PTEDUCAT, beta ~ WB +mt2fa.vAGE + mt2fa.PTGENDER +mt2fa.PTEDUCAT, datac) summary(fit_avd) Amazing results ## Z-scores Now let's compare the weights of each variable on the model. We need to translate everything to z-scores (or just do another linear transformation that carry every thing to comparable values) datac$zWB = (datac$WB - mean(datac$WB))/sd(datac$WB) datac$zAge = (datac$mt2fa.vAGE - mean(datac$mt2fa.vAGE))/sd(datac$mt2fa.vAGE) datac$zEduc = (datac$mt2fa.PTEDUCAT - mean(datac$mt2fa.PTEDUCAT))/sd(datac$mt2fa.PTEDUCAT) datac$zAVD = (datac$mt2fa.AVDEL30MIN - mean(datac$mt2fa.AVDEL30MIN))/sd(datac$mt2fa.AVDEL30MIN) fit_avd_z <- cusp(y ~ zAVD, alpha ~ zWB + zAge + mt2fa.PTGENDER + zEduc, beta ~ zWB +zAge + mt2fa.PTGENDER + zEduc, datac) summary(fit_avd_z) The results are of course the same but the coefficients must be meaningful now, ## Composite scores First I'm going to try another NP test (Recognition) fit_avr <- cusp(y ~ zAVR, alpha ~ zWB + zAge + mt2fa.PTGENDER + zEduc, beta ~ zWB +zAge + mt2fa.PTGENDER + zEduc, datac) and this is not so good but still an improvement is done Now, let's try a composite score gfam <- data.frame(datac$zAVD, datac$zAVR) famod <- fa(gfam, scores="regression") datac$cs <- famod$scores fit_cs <- cusp(y ~ cs, alpha ~ zWB + zAge + mt2fa.PTGENDER + zEduc, beta ~ zWB +zAge + mt2fa.PTGENDER + zEduc, datac) And we get a very bad fit result That is, the composite score is not related through a cusp model to the independent variable analyzed here ## A try for ADAS-Cog data <- data.frame(mt2fa$WholeBrain, mt2fa$ICV, mt2fa$vAGE, mt2fa$PTGENDER, mt2fa$PTEDUCAT, mt2fa$Q4SCORE, mt2fa$Q8SCORE)
datac <- data[complete.cases(data),]
datac$WB = datac$mt2fa.WholeBrain/datac$mt2fa.ICV datac$zWB = (datac$WB - mean(datac$WB))/sd(datac$WB) datac$zAge = (datac$mt2fa.vAGE - mean(datac$mt2fa.vAGE))/sd(datac$mt2fa.vAGE) datac$zEduc = (datac$mt2fa.PTEDUCAT - mean(datac$mt2fa.PTEDUCAT))/sd(datac$mt2fa.PTEDUCAT) datac$dr = (mean(datac$mt2fa.Q4SCORE) - datac$mt2fa.Q4SCORE)/sd(datac$mt2fa.Q4SCORE) datac$r = (mean(datac$mt2fa.Q8SCORE) - datac$mt2fa.Q8SCORE)/sd(datac$mt2fa.Q8SCORE) fit_dr <- cusp(y ~ dr, alpha ~ zWB + zAge + mt2fa.PTGENDER + zEduc, beta ~ zWB +zAge + mt2fa.PTGENDER + zEduc, datac) fit_r <- cusp(y ~ r, alpha ~ zWB + zAge + mt2fa.PTGENDER + zEduc, beta ~ zWB +zAge + mt2fa.PTGENDER + zEduc, datac) not bad at all for Delay Recall but worst for Recognition ## Notas para Composite Scores Lo ideal seria hacer script con todos los composites posibles y mirarlo contra los biomarcadores disponibles en adnimerge. Pero cada biomarcador lleva un tipo de procesamiento distinto y cada composite ha de ser definido previamente. Por ejemplo el composite de Delay Recall (drcs) lo construimos a partir de adas.Q4SCORE y neurobat.AVDEL30MIN pero en cada caso hay que definir las variables de partida. Hay varios biomarcadores en la tabla adnimerge que pueden estar relacionados con los composites neuropsicologicos El problema es que cada uno debe ser analizado de manera distinta. Las variables Ventricles, Hippocampus y WholeBrain deben de alguna manera normalizarse por ICV (revisar Entorhinal, Fusiform y MidTemp) mientras que FDG, PIB y AV45 son variables normalizadas. library("ADNIMERGE") library(cusp) library(psych) #for composite scores # Let's get the data tmp_np <- merge(adas, neurobat, by=c("RID", "VISCODE") ) m <- merge(tmp_np, adnimerge, by=c("RID", "VISCODE") ) rm(tmp_np) # Select data m$cAGE = m$AGE + m$Years
data <- data.frame(m$WholeBrain, m$ICV, m$cAGE, m$PTGENDER, m$PTEDUCAT, m$AVDEL30MIN, m$Q4SCORE) datac <- data[complete.cases(data),] #Z-scores and Composite Scores datac$zavd = (datac$m.AVDEL30MIN - mean(datac$m.AVDEL30MIN))/sd(datac$m.AVDEL30MIN) datac$zdr = (mean(datac$m.Q4SCORE) - datac$m.Q4SCORE)/sd(datac$m.Q4SCORE) datac$zAge = (datac$m.cAGE - mean(datac$m.cAGE))/sd(datac$m.cAGE) datac$zEduc = (datac$m.PTEDUCAT - mean(datac$m.PTEDUCAT))/sd(datac$m.PTEDUCAT) gfam <- data.frame(datac$zavd, datac$zdr) famod <- fa(gfam, scores="regression") datac$drcs <- famod$scores # NI biomarker datac$wb = datac$m.WholeBrain/datac$m.ICV
datac$zwb = (datac$wb - mean(datac$wb))/sd(datac$wb)
#fit to Cusp model
fit <- cusp(y ~ drcs, alpha ~ zwb + zAge + m.PTGENDER + zEduc, beta ~ zwb +zAge + m.PTGENDER + zEduc, datac)
summary(fit)

++++ Un poco mejor (no mucho) para los Ventriculos |

> summary(fit)

Call:
cusp(formula = y ~ drcs, alpha = alpha ~ zwb + zAge + m.PTGENDER +
zEduc, beta = beta ~ zwb + zAge + m.PTGENDER + zEduc, data = datac)

Deviance Residuals:
Min        1Q    Median        3Q       Max
-1.96597  -0.34590   0.08371   0.71785   3.30025

Coefficients:
Estimate Std. Error z value Pr(>|z|)
a[(Intercept)]      -0.45714    0.02514 -18.184  < 2e-16 ***
a[zwb]              -0.37215    0.01895 -19.643  < 2e-16 ***
a[zAge]              0.02568    0.01398   1.837   0.0662 .
a[m.PTGENDERFemale]  0.26724    0.02818   9.484  < 2e-16 ***
a[zEduc]             0.24629    0.01497  16.455  < 2e-16 ***
b[(Intercept)]       0.68135    0.06771  10.063  < 2e-16 ***
b[zwb]               0.04897    0.02989   1.638   0.1013
b[zAge]             -0.13237    0.02618  -5.057 4.27e-07 ***
b[m.PTGENDERFemale]  0.71151    0.05476  12.994  < 2e-16 ***
b[zEduc]             0.13563    0.02624   5.169 2.35e-07 ***
w[(Intercept)]      -0.28049    0.01186 -23.655  < 2e-16 ***
w[drcs]              1.12939    0.01124 100.486  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Null deviance: 7186.9  on 6554  degrees of freedom
Linear deviance: 4853.0  on 6549  degrees of freedom
Logist deviance:     NA  on   NA  degrees of freedom
Delay deviance: 4458.2  on 6543  degrees of freedom

R.Squared    logLik npar      AIC     AICc      BIC
Linear model 0.1386990 -8315.823    6 16643.65 16643.66 16684.37
Cusp model   0.4315817 -7959.414   12 15942.83 15942.88 16024.28
---
Note: R.Squared for cusp model is Cobb's pseudo-R^2. This value
can become negative.

Chi-square test of linear vs. cusp model

X-squared = 712.8, df = 6, p-value = 0

Number of optimization iterations: 40