geom_hline(aes(yintercept=0.02), colour="grey", lty=2) + geom_hline(aes(yintercept=0.105), lty=2, colour="grey") + labs(x="SBP Z-score", y="Error", title="") + theme_bw() + scale_color_grey() + ylim(-0.15,0.15) + xlim(-4,4) +  guides(
colour = FALSE,
shape = FALSE,
size = FALSE)
5
ggplot(df, aes(x=s_ref, y=s_diffs, colour=source, shape=source)) + geom_point() + geom_hline(aes(yintercept=-0.06), colour="black", lty=2) + geom_hline(aes(yintercept=0.053), lty=2, colour="black") +
geom_hline(aes(yintercept=0.02), colour="grey", lty=2) + geom_hline(aes(yintercept=0.105), lty=2, colour="grey") + labs(x="SBP Z-score", y="Error", title="") + theme_bw() + scale_color_grey() + ylim(-0.15,0.15) + xlim(-5,5) +  guides(
colour = FALSE,
shape = FALSE,
size = FALSE)
with(df2, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI vs NoAKI"))
# Chunk 1: setup
knitr::opts_chunk$set(
fig.height=4, fig.width=6,
fig.align='center', fig.path='Figs/',
comment='', dev='pdf', dpi=300,
echo=FALSE, warning=FALSE, message=FALSE,
include=TRUE, results="markup", tidy=TRUE)
library(pls);library(caret);library(mi);require(ggplot2)
require(pROC);require(plotROC)
# Chunk 2
d0<-dload("probe_062614.3")
d<-subset(d0, PhenoRecGN=="noGN" & PhenoBKV %in% c("Absent","NotDone"));
cat1<-with(d, paste0(Banff_i, Banff_t));
d$cscore<-NULL
for(i in 1:length(cat1)){
d$cscore[i]<-switch(cat1[i],
"00"=0, "10"=1, "20"=2, "30"=3,
"01"=4, "11"=5, "21"=6, "31"=7,
"02"=8, "12"=9, "22"=10, "32"=11,
"03"=12, "13"=13, "23"=14, "33"=15
)};
d<-subset(d, Banff_g==0 & Banff_t==0 & DSA_Present!="Yes" & MonthsPostTx >1);
# Chunk 3
d<-d[d$BXDayCr!=9999,]
d$pBL<-with(d, BXDayCr/BaselineCr)
with(d, describe(pBL))
ggplot(d, aes(x=pBL)) + geom_histogram()
d$AKI<-factor(ifelse(d$pBL >=1.25,"AKI","NoAKI"), levels=c("NoAKI","AKI"))
d$AKI3<- factor(ifelse(d$pBL >=1.25,"AKI",ifelse(d$pBL<1.1,"NoAKI","Suspicious")), levels=c("NoAKI","Suspicious","AKI"))
with(d, table(AKI))
with(d, table(AKI3))
quantile(d$pBL, prob=seq(0,1, 0.333))
d$pBLcat<-cut(d$pBL, breaks=c(0.0, 1, 1.1, 4.78))
with(d, table(pBLcat))
# Chunk 1: setup
knitr::opts_chunk$set(
fig.height=4, fig.width=6,
fig.align='center', fig.path='Figs/',
comment='', dev='pdf', dpi=300,
echo=FALSE, warning=FALSE, message=FALSE,
include=TRUE, results="markup", tidy=TRUE)
library(pls);library(caret);require(ggplot2)
require(pROC);require(plotROC)
# Chunk 2
d0<-dload("probe_062614.3")
d<-subset(d0, PhenoRecGN=="noGN" & PhenoBKV %in% c("Absent","NotDone"));
cat1<-with(d, paste0(Banff_i, Banff_t));
d$cscore<-NULL
for(i in 1:length(cat1)){
d$cscore[i]<-switch(cat1[i],
"00"=0, "10"=1, "20"=2, "30"=3,
"01"=4, "11"=5, "21"=6, "31"=7,
"02"=8, "12"=9, "22"=10, "32"=11,
"03"=12, "13"=13, "23"=14, "33"=15
)};
d<-subset(d, Banff_g==0 & Banff_t==0 & DSA_Present!="Yes" & MonthsPostTx >1);
# Chunk 3
d<-d[d$BXDayCr!=9999,]
d$pBL<-with(d, BXDayCr/BaselineCr)
with(d, describe(pBL))
ggplot(d, aes(x=pBL)) + geom_histogram()
d$AKI<-factor(ifelse(d$pBL >=1.25,"AKI","NoAKI"), levels=c("NoAKI","AKI"))
d$AKI3<- factor(ifelse(d$pBL >=1.25,"AKI",ifelse(d$pBL<1.1,"NoAKI","Suspicious")), levels=c("NoAKI","Suspicious","AKI"))
with(d, table(AKI))
with(d, table(AKI3))
d$pBLcat<-cut(d$pBL, breaks=c(0.0, 1, 1.1, 4.78))
with(d, table(pBLcat))
d$pBLcat<-cut(d$pBL, breaks=c(0.0, 1.1, 1.25, 4.78))
with(d, table(pBLcat))
with(vdata, table(AKI))
# Chunk 1: setup
knitr::opts_chunk$set(
fig.height=4, fig.width=6,
fig.align='center', fig.path='Figs/',
comment='', dev='pdf', dpi=300,
echo=FALSE, warning=FALSE, message=FALSE,
include=TRUE, results="markup", tidy=TRUE)
library(pls);library(caret);require(ggplot2)
require(pROC);require(plotROC)
# Chunk 2
d0<-dload("probe_062614.3")
d<-subset(d0, PhenoRecGN=="noGN" & PhenoBKV %in% c("Absent","NotDone"));
cat1<-with(d, paste0(Banff_i, Banff_t));
d$cscore<-NULL
for(i in 1:length(cat1)){
d$cscore[i]<-switch(cat1[i],
"00"=0, "10"=1, "20"=2, "30"=3,
"01"=4, "11"=5, "21"=6, "31"=7,
"02"=8, "12"=9, "22"=10, "32"=11,
"03"=12, "13"=13, "23"=14, "33"=15
)};
d<-subset(d, Banff_g==0 & Banff_t==0 & DSA_Present!="Yes" & MonthsPostTx >1);
# Chunk 3
d<-d[d$BXDayCr!=9999,]
d$pBL<-with(d, BXDayCr/BaselineCr)
with(d, describe(pBL))
ggplot(d, aes(x=pBL)) + geom_histogram()
d$AKI<-factor(ifelse(d$pBL >=1.25,"AKI","NoAKI"), levels=c("NoAKI","AKI"))
d$AKI3<- factor(ifelse(d$pBL >=1.25,"AKI",ifelse(d$pBL<1.1,"NoAKI","Suspicious")), levels=c("NoAKI","Suspicious","AKI"))
with(d, table(AKI))
with(d, table(AKI3))
# Chunk 4
#quantile(d$pBL, prob=seq(0,1, 0.333))
#d$pBLcat<-cut(d$pBL, breaks=c(0.41, 0.99, 1.09, 4.78))
d$pBLcat<-cut(d$pBL, breaks=c(0.0, 1.1, 1.25, 4.78))
with(d, table(pBLcat))
# Chunk 5
set.seed(8999);in.train <- createDataPartition(d$pBLcat, p=0.75, list=FALSE)[,1]
out.test<-(1:nrow(d))[!(1:nrow(d) %in% in.train)]
d$dataset<-rep(NA,nrow(d))
d$dataset[in.train]<-"calibration"
d$dataset[out.test]<-"validation"
d$dataset<-factor(d$dataset)
cat("Overall breakdown\n")
table(d$dataset)
tab11<-prop.table(tab1<-table(d[in.train,"pBLcat"]))
cat("Training Data")
tab1;
tab21<-prop.table(tab2<-table(d[out.test,"pBLcat"]))
cat("Test Data")
tab2;
write.csv(d, "Probe_071117.2.csv", row.names=F)
Probe_071117.2<-d;
save(Probe_071117.2, file="Probe_071117.2.Rdata")
tdata<-d[in.train,]
vdata<-d[out.test,]
# Chunk 6
cat("Training data")
with(tdata, table(AKI))
cat("\n\nValidation data")
with(vdata, table(AKI))
# Chunk 7
ggplot(tdata, aes(x=pBL))+geom_histogram() + ggtitle("Training data")
ggplot(vdata, aes(x=pBL))+geom_histogram()+ ggtitle("Validation data")
# Chunk 8
any(vdata$StudyUrineNo %in%  tdata$StudyUrineNo)
# Chunk 9
d1<-tdata
y<-d1$AKI
X<-log(as.matrix(d1[,58:190]),10);#head(X) #log tranformed
m1<-data.frame(y,X);# head(m1)
#names(tdata)
ctrl <- trainControl(method = "repeatedcv",
repeats = 3, # 3 repeats of 10-fold CV by default
classProbs = TRUE, # needed to use ROC metric
summaryFunction = twoClassSummary)
set.seed(112) # seed for all  fits
plsFit <- train(factor(y) ~ .,
data = m1,
method = "pls",
tuneLength = 8,
trControl = ctrl,
metric = "ROC", preProcess = c("center", "scale"))
tmp<-data.frame(roc=plsFit$results$ROC,ncomp=plsFit$results$ncomp);
ggplot(tmp, aes(y=roc, x=ncomp)) + geom_line() + labs(x="number latent factors", y = "AUC", title="k-fold cross-validation")
plsFit
ncomp <- plsFit$bestTune[[1]];
# Chunk 10
# run with minimum 3 components so we can look 3D score plot
set.seed(112);res3<-mvr(unclass(y)~X, ncomp=3, method="oscorespls", scale=TRUE)
Tmatrix<-res3$scores;Pmatrix<-res3$loadings
require(scatterplot3d)
label0<-c(19,19)[unclass(d1$AKI)]
label1<-c("blue","red")[unclass(d1$AKI)]
scatterplot3d(x=Tmatrix[,1],y=Tmatrix[,2],z=Tmatrix[,3], pch=label0, main="3D Score Plot", color=label1, xlab=expression(paste("PLS"[1])), ylab=expression(paste("PLS"[2])), zlab=expression(paste("PLS"[3])), angle=40)
legend("topleft", col=c("blue","red"), pch=c(19,19), legend=c("NoAKI", "AKI"), cex=0.9, bty="n")
# Chunk 11
set.seed(112)
res<-mvr(unclass(y)~X, ncomp=8, method="oscorespls",validation="LOO", scale=TRUE)
save(res,file="tmp_pls") # save PLS object to apply to validation data
Tmatrix<-res$scores[,ncomp];Pmatrix<-res$loadings[,ncomp]
# Chunk 12
df<-data.frame(StudyUrineNo=d1$StudyUrineNo, AKI=d1$AKI,dscore=drop(res$fitted.values[,,ncomp]), pBL=d1$pBL)
df$AKI<-relevel(df$AKI, ref="NoAKI")
ct1<-with(df, cor.test(dscore, pBL, method="pearson"));ct1
# Chunk 13
ggplot(df, aes(y=dscore,x=pBL,colour=AKI)) + geom_point() + geom_smooth(method="lm", aes(group=1)) + annotate(geom="text", x=1, y=0.5, label=paste("correlation =", round(ct1$estimate,2))) + ggtitle("Training Data")
# Chunk 14
ggplot(df, aes(x=AKI, y=dscore)) + geom_point(position=position_jitter(width=0.3),aes(colour=AKI)) + geom_boxplot(outlier.colour=NA) + ggtitle("Training Data")
# Chunk 15
cat("t-test discriminant score vs AKI")
an1<-t.test(dscore~AKI, data=df);an1
with(vdata, table(AKI))
with(vdata, table(AKI3))
df3<-data.frame(StudyUrineNo=vdata$StudyUrineNo, AKI=ifelse(vdata$AKI3=="NoAKI",0,1), dscore=yhatp, pBL=vdata$pBL)
# Chunk 1: setup
knitr::opts_chunk$set(
fig.height=4, fig.width=6,
fig.align='center', fig.path='Figs/',
comment='', dev='pdf', dpi=300,
echo=FALSE, warning=FALSE, message=FALSE,
include=TRUE, results="markup", tidy=TRUE)
library(pls);library(caret);require(ggplot2)
require(pROC);require(plotROC)
# Chunk 2
d0<-dload("probe_062614.3")
d<-subset(d0, PhenoRecGN=="noGN" & PhenoBKV %in% c("Absent","NotDone"));
cat1<-with(d, paste0(Banff_i, Banff_t));
d$cscore<-NULL
for(i in 1:length(cat1)){
d$cscore[i]<-switch(cat1[i],
"00"=0, "10"=1, "20"=2, "30"=3,
"01"=4, "11"=5, "21"=6, "31"=7,
"02"=8, "12"=9, "22"=10, "32"=11,
"03"=12, "13"=13, "23"=14, "33"=15
)};
d<-subset(d, Banff_g==0 & Banff_t==0 & DSA_Present!="Yes" & MonthsPostTx >1);
# Chunk 3
d<-d[d$BXDayCr!=9999,]
d$pBL<-with(d, BXDayCr/BaselineCr)
with(d, describe(pBL))
ggplot(d, aes(x=pBL)) + geom_histogram()
d$AKI<-factor(ifelse(d$pBL >=1.25,"AKI","NoAKI"), levels=c("NoAKI","AKI"))
d$AKI3<- factor(ifelse(d$pBL >=1.25,"AKI",ifelse(d$pBL<1.1,"NoAKI","Suspicious")), levels=c("NoAKI","Suspicious","AKI"))
with(d, table(AKI))
with(d, table(AKI3))
# Chunk 4
#quantile(d$pBL, prob=seq(0,1, 0.333))
#d$pBLcat<-cut(d$pBL, breaks=c(0.41, 0.99, 1.09, 4.78))
d$pBLcat<-cut(d$pBL, breaks=c(0.0, 1.1, 1.25, 4.78))
with(d, table(pBLcat))
# Chunk 5
set.seed(8999);in.train <- createDataPartition(d$pBLcat, p=0.75, list=FALSE)[,1]
out.test<-(1:nrow(d))[!(1:nrow(d) %in% in.train)]
d$dataset<-rep(NA,nrow(d))
d$dataset[in.train]<-"calibration"
d$dataset[out.test]<-"validation"
d$dataset<-factor(d$dataset)
cat("Overall breakdown\n")
table(d$dataset)
tab11<-prop.table(tab1<-table(d[in.train,"pBLcat"]))
cat("Training Data")
tab1;
tab21<-prop.table(tab2<-table(d[out.test,"pBLcat"]))
cat("Test Data")
tab2;
write.csv(d, "Probe_071117.2.csv", row.names=F)
Probe_071117.2<-d;
save(Probe_071117.2, file="Probe_071117.2.Rdata")
tdata<-d[in.train,]
vdata<-d[out.test,]
# Chunk 6
cat("Training data")
with(tdata, table(AKI))
cat("\n\nValidation data")
with(vdata, table(AKI))
# Chunk 7
ggplot(tdata, aes(x=pBL))+geom_histogram() + ggtitle("Training data")
ggplot(vdata, aes(x=pBL))+geom_histogram()+ ggtitle("Validation data")
# Chunk 8
any(vdata$StudyUrineNo %in%  tdata$StudyUrineNo)
# Chunk 9
d1<-tdata
y<-d1$AKI
X<-log(as.matrix(d1[,58:190]),10);#head(X) #log tranformed
m1<-data.frame(y,X);# head(m1)
#names(tdata)
ctrl <- trainControl(method = "repeatedcv",
repeats = 3, # 3 repeats of 10-fold CV by default
classProbs = TRUE, # needed to use ROC metric
summaryFunction = twoClassSummary)
set.seed(112) # seed for all  fits
plsFit <- train(factor(y) ~ .,
data = m1,
method = "pls",
tuneLength = 8,
trControl = ctrl,
metric = "ROC", preProcess = c("center", "scale"))
tmp<-data.frame(roc=plsFit$results$ROC,ncomp=plsFit$results$ncomp);
ggplot(tmp, aes(y=roc, x=ncomp)) + geom_line() + labs(x="number latent factors", y = "AUC", title="k-fold cross-validation")
plsFit
ncomp <- plsFit$bestTune[[1]];
# Chunk 10
# run with minimum 3 components so we can look 3D score plot
set.seed(112);res3<-mvr(unclass(y)~X, ncomp=3, method="oscorespls", scale=TRUE)
Tmatrix<-res3$scores;Pmatrix<-res3$loadings
require(scatterplot3d)
label0<-c(19,19)[unclass(d1$AKI)]
label1<-c("blue","red")[unclass(d1$AKI)]
scatterplot3d(x=Tmatrix[,1],y=Tmatrix[,2],z=Tmatrix[,3], pch=label0, main="3D Score Plot", color=label1, xlab=expression(paste("PLS"[1])), ylab=expression(paste("PLS"[2])), zlab=expression(paste("PLS"[3])), angle=40)
legend("topleft", col=c("blue","red"), pch=c(19,19), legend=c("NoAKI", "AKI"), cex=0.9, bty="n")
# Chunk 11
set.seed(112)
res<-mvr(unclass(y)~X, ncomp=8, method="oscorespls",validation="LOO", scale=TRUE)
save(res,file="tmp_pls") # save PLS object to apply to validation data
Tmatrix<-res$scores[,ncomp];Pmatrix<-res$loadings[,ncomp]
# Chunk 12
df<-data.frame(StudyUrineNo=d1$StudyUrineNo, AKI=d1$AKI,dscore=drop(res$fitted.values[,,ncomp]), pBL=d1$pBL)
df$AKI<-relevel(df$AKI, ref="NoAKI")
ct1<-with(df, cor.test(dscore, pBL, method="pearson"));ct1
# Chunk 13
ggplot(df, aes(y=dscore,x=pBL,colour=AKI)) + geom_point() + geom_smooth(method="lm", aes(group=1)) + annotate(geom="text", x=1, y=0.5, label=paste("correlation =", round(ct1$estimate,2))) + ggtitle("Training Data")
# Chunk 14
ggplot(df, aes(x=AKI, y=dscore)) + geom_point(position=position_jitter(width=0.3),aes(colour=AKI)) + geom_boxplot(outlier.colour=NA) + ggtitle("Training Data")
# Chunk 15
cat("t-test discriminant score vs AKI")
an1<-t.test(dscore~AKI, data=df);an1
# Chunk 16
with(vdata, table(AKI))
yp<-vdata$AKI
Xp<-log(as.matrix(vdata[,58:190]),10); #log tranformed
yhatp <- drop(predict(res, newdata = Xp, ncomp = ncomp) );
df2<-data.frame(StudyUrineNo=vdata$StudyUrineNo, AKI=vdata$AKI, dscore=yhatp, pBL=vdata$pBL)
df2$AKI<-relevel(df2$AKI,ref="NoAKI")
cat("Correlation between dscore and pBL")
ct3<-with(df2, cor.test(dscore, pBL));ct3
# Chunk 17
ggplot(df2, aes(x=pBL, y=dscore, colour=AKI)) + geom_point() + geom_smooth(method="lm", aes(group=1)) + annotate(geom="text", x=1, y=0.5, label=paste("correlation =", round(ct3$estimate,2))) + ggtitle("Validation Data")
with(df2, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI vs NoAKI"))
df3<-data.frame(StudyUrineNo=vdata$StudyUrineNo, AKI=ifelse(vdata$AKI3=="NoAKI",0,1), dscore=yhatp, pBL=vdata$pBL)
with(df2, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI + Suspicious vs NoAKI"))
with(df2, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI vs NoAKI"))
df3<-data.frame(StudyUrineNo=vdata$StudyUrineNo, AKI=ifelse(vdata$AKI3=="NoAKI",0,1), dscore=yhatp, pBL=vdata$pBL)
with(df2, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI  vs NoAKI"))
with(df3, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI + Suspicious vs NoAKI"))
ggplot(df2, aes(x=AKI, y=dscore)) + geom_point(position=position_jitter(width=0.3),aes(colour=AKI)) + geom_boxplot(outlier.colour=NA) + ggtitle("Training Data")
with(df2, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI vs NoAKI"))
df3<-data.frame(StudyUrineNo=vdata$StudyUrineNo, AKI=ifelse(vdata$AKI3=="NoAKI",0,1), dscore=yhatp, pBL=vdata$pBL)
with(df3, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI + Suspicious vs NoAKI"))
ct4<-with(df3, cor.test(dscore, pBL));ct3
with(df3, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI + Suspicious vs NoAKI"))
ggplot(df2, aes(x=pBL, y=dscore, colour=AKI)) + geom_point() + geom_smooth(method="lm", aes(group=1)) + annotate(geom="text", x=1, y=0.5, label=paste("correlation =", round(ct3$estimate,2))) + ggtitle("Validation Data")
df2<-data.frame(StudyUrineNo=vdata$StudyUrineNo, AKI=vdata$AKI, dscore=yhatp, pBL=vdata$pBL)
df2$AKI<-relevel(df2$AKI,ref="NoAKI")
cat("Correlation between dscore and pBL")
ct3<-with(df2, cor.test(dscore, pBL));ct3
with(df2, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI vs NoAKI"))
ggplot(df2, aes(x=AKI, y=dscore)) + geom_point(position=position_jitter(width=0.3),aes(colour=AKI)) + geom_boxplot(outlier.colour=NA) + ggtitle("Training Data")
ggplot(df2, aes(x=pBL, y=dscore, colour=AKI)) + geom_point() + geom_smooth(method="lm", aes(group=1)) + annotate(geom="text", x=1, y=0.5, label=paste("correlation =", round(ct3$estimate,2))) + ggtitle("Validation Data")
with(df2, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI vs NoAKI"))
ggplot(df2, aes(x=AKI, y=dscore)) + geom_point(position=position_jitter(width=0.3),aes(colour=AKI)) + geom_boxplot(outlier.colour=NA) + ggtitle("Training Data")
t.test(dscore~AKI, data=df2)
df3<-data.frame(StudyUrineNo=vdata$StudyUrineNo, AKI=ifelse(vdata$AKI3=="NoAKI",0,1), dscore=yhatp, pBL=vdata$pBL)
with(df3, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI + Suspicious vs NoAKI"))
with(df2, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI vs NoAKI"))
ggplot(df2, aes(x=AKI, y=dscore)) + geom_point(position=position_jitter(width=0.3),aes(colour=AKI)) + geom_boxplot(outlier.colour=NA) + ggtitle("Validation Data")
t.test(dscore~AKI, data=df2)
df3<-data.frame(StudyUrineNo=vdata$StudyUrineNo, AKI=ifelse(vdata$AKI3=="NoAKI",0,1), dscore=yhatp, pBL=vdata$pBL)
with(df3, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI + Suspicious vs NoAKI"))
ggplot(df2, aes(x=AKI, y=dscore)) + geom_point(position=position_jitter(width=0.3),aes(colour=AKI)) + geom_boxplot(outlier.colour=NA) + ggtitle("Training Data")
ggplot(df3, aes(x=AKI, y=dscore)) + geom_point(position=position_jitter(width=0.3),aes(colour=AKI)) + geom_boxplot(outlier.colour=NA) + ggtitle("Training Data")
df3<-data.frame(StudyUrineNo=vdata$StudyUrineNo, AKI=ifelse(vdata$AKI3=="NoAKI",0,1), dscore=yhatp, pBL=vdata$pBL)
df3<-data.frame(StudyUrineNo=vdata$StudyUrineNo, AKI=ifelse(vdata$AKI3=="NoAKI","NoAKI","AKI"), dscore=yhatp, pBL=vdata$pBL)
with(df3, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI + Suspicious vs NoAKI"))
ggplot(df3, aes(x=AKI, y=dscore)) + geom_point(position=position_jitter(width=0.3),aes(colour=AKI)) + geom_boxplot(outlier.colour=NA) + ggtitle("Training Data")
df3<-data.frame(StudyUrineNo=vdata$StudyUrineNo, AKI=relevel(ifelse(vdata$AKI3=="NoAKI","NoAKI","AKI"),ref="NoAKI"), dscore=yhatp, pBL=vdata$pBL)
df3<-data.frame(StudyUrineNo=vdata$StudyUrineNo, AKI=relevel(factor(ifelse(vdata$AKI3=="NoAKI","NoAKI","AKI")),ref="NoAKI"), dscore=yhatp, pBL=vdata$pBL)
with(df3, myroc(AKI, dscore, plot=TRUE, main="Validation: AKI + Suspicious vs NoAKI"))
ggplot(df3, aes(x=AKI, y=dscore)) + geom_point(position=position_jitter(width=0.3),aes(colour=AKI)) + geom_boxplot(outlier.colour=NA) + ggtitle("Training Data")
cat("t-test: dscore ~ AKI")
t.test(dscore~AKI, data=df3)
d1<-subset(tdata, AKI!="Suspicious")
with(d1, table(AKI))
cat("Dropping Suspicious")
with(d1, table(AKI))
cat("Dropping Suspicious cases from training set")
with(d1, table(AKI))
y<-d1$AKI
d1<-subset(tdata, AKI3!="Suspicious")
cat("Dropping Suspicious cases from training set")
with(d1, table(AKI))
y<-d1$AKI
X<-log(as.matrix(d1[,58:190]),10);#head(X) #log tranformed
m1<-data.frame(y,X);# head(m1)
ctrl <- trainControl(method = "repeatedcv",
repeats = 3, # 3 repeats of 10-fold CV by default
classProbs = TRUE, # needed to use ROC metric
summaryFunction = twoClassSummary)
set.seed(112) # seed for all  fits
plsFit <- train(factor(y) ~ .,
data = m1,
method = "pls",
tuneLength = 8,
trControl = ctrl,
metric = "ROC", preProcess = c("center", "scale"))
tmp<-data.frame(roc=plsFit$results$ROC,ncomp=plsFit$results$ncomp);
ggplot(tmp, aes(y=roc, x=ncomp)) + geom_line() + labs(x="number latent factors", y = "AUC", title="k-fold cross-validation")
plsFit
ncomp <- plsFit$bestTune[[1]];
vdata<-subset(vdata0, AKI3!="Suspicious")
vdata0<-vdata;
vdata<-subset(vdata0, AKI3!="Suspicious")
power.t.test(n=NULL, delta=0.2, sd=1, sig.level=0.05, power=0.8,alternative="two.sided'', type="two.sample'')
power.t.test(n=NULL, delta=0.2, sd=1, sig.level=0.05, power=0.8, alternative="two.sided", type="two.sample'')
power.t.test(n=NULL, delta=0.2, sd=1, sig.level=0.05, power=0.8, alternative="two.sided", type="two.sample")
power.t.test(n=NULL, delta=0.2, sd=1, sig.level=0.05, power=0.8, alternative="two.sided", type="two.sample")
power.t.test(n=300, delta=NULL, sd=1, sig.level=0.05, power=0.8, alternative="two.sided", type="two.sample")
power.t.test(n=NULL, delta=0.2, sd=1, sig.level=0.05, power=0.8, alternative="two.sided", type="two.sample")
power.t.test(n=300, delta=NULL, sd=1, sig.level=0.05, power=0.8, alternative="two.sided", type="two.sample")
power.t.test(n=300, delta=0.2, sd=1, sig.level=0.05, power=NULL, alternative="two.sided", type="two.sample")
??ciNormN
library(EnvStats)
ciNormN(half.width=60, sigma=600, conf.level=0.99, sample.type="one.sample”
)
)
/
/
ciNormN(half.width=60, sigma=600, conf.level=0.99, sample.type="one.sample")
library(EnvStats)
ciNormN(half.width=60, sigma=600, conf.level=0.99, sample.type="one.sample")
ciNormN(half.width=60, sigma=600, conf.level=0.99, sample.type="one.sample")
ciBinomN(half.width=0.05,p.hat=0.8,conf.level=0.95)
plotCiBinomDesign(p.hat=0.8)
library(pwr)
pwr.r.test(n=NULL, r=-0.2,sig.level=0.05, power=0.8, alternative="less")
power.t.test(n=NULL, delta=0.2, sd=1, sig.level=0.05, power=0.9, alternative="two.sided", type="one.sample")
power.t.test(n=NULL, delta=0.2, sd=1, sig.level=0.05, power=0.8, alternative="two.sided", type="one.sample")
power.t.test(n=NULL, delta=0.2, sd=1, sig.level=0.05, power=0.8, alternative="two.sided", type="one.sample")
power.prop.test(n=NULL, p1=0.2, p2=0.3, power=0.8, sig.level=0.05, alternative="one.sided")
power.prop.test(n=NULL, p1=0.3, p2=0.52, power=0.9, sig.level=0.025, alternative="one.sided")
library(EnvStats)
ciNormN(half.width=60, sigma=600, conf.level=0.99, sample.type="one.sample")
ciBinomN(half.width=0.05,p.hat=0.8,conf.level=0.95)
plotCiBinomDesign(p.hat=0.8)
library(pwr)
pwr.r.test(n=NULL, r=-0.2,sig.level=0.05, power=0.8, alternative="less")
setwd("~/Desktop/bookdown_shiny/Bookdown/BPR/datasets")
d=read.csv("nations.csv")
summary(d)
plot(infant.mortality~logGDP, data=d)
lm1 <-lm(infant.mortality~logGDP, data=d)
abline(lm1, lty="dashed", col="red")
lines(lowess(d$infant.mortality~d$logGDP), col="blue")
?loess
lowess(d$infant.mortality~d$logGDP)
lowess(infant.mortality~logGDP, data=d)
loess(infant.mortality~logGDP, data=d)
l1=loess(infant.mortality~logGDP, data=d)
predict(l1)
lowessModel=loess(infant.mortality~logGDP, data=d)
length(l1)
length(predict(lowessModel)
)
length(predict(lowessModel))
dim(d)
d1<-d[,c("infant.mortality","logGDP")]
d1<-na.omit(d[, c("infant.mortality","logGDP")])
summary(d1)
plot(infant.mortality~logGDP, data=d)
plot(infant.mortality~logGDP, data=d1)
lm1 <-lm(infant.mortality~logGDP, data=d1)
abline(lm1, lty="dashed", col="red")
lines(lowess(d1$infant.mortality~d1$logGDP), col="blue")
dim(d1)
d1=na.omit(d[, c("infant.mortality","logGDP")])
dim(d1)
summary(d1)
plot(infant.mortality~logGDP, data=d1)
lm1 <-lm(infant.mortality~logGDP, data=d1)
abline(lm1, lty="dashed", col="red")
lines(lowess(d1$infant.mortality~d1$logGDP), col="blue")
head(d1)
lines(lowess(d1$infant.mortality~d1$logGDP, na.action=na.omit), col="blue")
?lowess
delta = 0.01 * diff(range(d$logGDP))
delta
delta = 0.01 * diff(range(d$logGDP, na.action=na.omit))
delta = 0.01 * diff(range(d$logGDP, na.action=na.rm))
delta = 0.01 * diff(range(d$logGDP, na.rm=TRUE))
delta
lines(lowess(d1$infant.mortality~d1$logGDP, delta = 0.01 * diff(range(d$logGDP, na.rm=TRUE))), col="blue")
summary(d)
plot(infant.mortality~logGDP, data=d)
lm1 <-lm(infant.mortality~logGDP, data=d1)
lm1 <-lm(infant.mortality~logGDP, data=d)
abline(lm1, lty="dashed", col="red")
lines(lowess(d$infant.mortality~d$logGDP, delta = 0.01 * diff(range(d$logGDP, na.rm=TRUE))), col="blue")
d=read.csv("nations.csv")
d1=na.omit(d[, c("infant.mortality","logGDP")])
dim(d1)
summary(d1)
plot(infant.mortality~logGDP, data=d1)
lm1 <-lm(infant.mortality~logGDP, data=d1)
abline(lm1, lty="dashed", col="red")
lines(lowess(d1$infant.mortality~d1$logGDP, delta = 0.01 * diff(range(d$logGDP, na.rm=TRUE))), col="blue")
lines(lowess(d1$infant.mortality~d1$logGDP)), col="blue")
lines(lowess(d1$infant.mortality~d1$logGDP), col="blue")
lines(lowess(d1$infant.mortality~d1$logGDP), col="green")
lines(lowess(d1$infant.mortality~d1$logGDP, delta = 0.01 * diff(range(d$logGDP, na.rm=TRUE))), col="blue")
lines(lowess(d$infant.mortality~d$logGDP, delta = 0.01 * diff(range(d$logGDP, na.rm=TRUE))), col="blue")
lines(lowess(d1$infant.mortality~d1$logGDP, f=2/3, delta = 0.01 * diff(range(d$logGDP, na.rm=TRUE))), col="blue")
plot(infant.mortality~logGDP, data=d1)
lm1 <-lm(infant.mortality~logGDP, data=d1)
abline(lm1, lty="dashed", col="red")
lines(lowess(d$infant.mortality~d$logGDP, f=2/3, delta = 0.01 * diff(range(d$logGDP, na.rm=TRUE))), col="blue")
head(d1)
lowessModel=loess(infant.mortality~logGDP, data=d)
length(predict(lowessModel))
?power.t.test
power.t.test(N=50000, delta=NULL, sd=2700, sig.level=0.05 power=0.8)
power.t.test(N=50000, delta=NULL, sd=2700, sig.level=0.05, power=0.8)
power.t.test(n=50000, delta=NULL, sd=2700, sig.level=0.05, power=0.8)
power.t.test(n=50000, delta=NULL, sd=2700, sig.level=0.05, power=0.8)
power.t.test(n=5, delta=NULL, sd=2700, sig.level=0.05, power=0.8)
50000/365
power.t.test(n=365, delta=NULL, sd=20, sig.level=0.05, power=0.8)
?power.prop.test
power.t.test(n=365, delta=NULL, sd=20, sig.level=0.05, power=0.8)
power.t.test(n=365, delta=NULL, sd=30, sig.level=0.05, power=0.8)
50000/365
power.t.test(n=365, delta=NULL, sd=50, sig.level=0.05, power=0.8)
