load("atherosclerosis.RData") sick = Data[which(Data$Atherosclerosis == "Yes"),-1] healthy = Data[which(Data$Atherosclerosis == "No"),-1] Data = Data[,-1] pValuesOriginal <- numeric(dim(Data)[2]) for (i in 1:length(pValuesOriginal)) { pValuesOriginal[i] <- t.test(sick[,i],healthy[,i])$p.value } length(which(pValuesOriginal < 0.05)) nPermutations = 100 pValuesShuffled <- matrix(data = 0, nrow = dim(Data)[2], ncol = nPermutations) for (j in 1:nPermutations) { permute <- sample(nrow(Data)) sickShuffled <- permute[1:nrow(sick)] healthyShuffled <- permute[(nrow(sick)+1):length(permute)] for (i in 1:ncol(Data)) { pValuesShuffled[i,j] <- t.test(x = Data[sickShuffled,i], y = Data[healthyShuffled,i])$p.value } } alphas <- seq(0.001,0.05,0.001) FDR <- numeric(length(alphas)) sigOriginal<-numeric(length(alphas)) for (k in 1:length(alphas)) { sigOriginal[k]<- sum(pValuesOriginal < alphas[k]) sigShuffled <- colSums(pValuesShuffled < alphas[k]) FDR[k]<-mean(sigShuffled)/sigOriginal[k] FDR[k]<-max(FDR[1:k]) # avoid non-monotonic portions } FDR.df = data.frame(alpha = alphas, sigComparison = sigOriginal, FDR = FDR) plot(x = sigOriginal, y = FDR, type='b', xlab='# significant comparisons', ylab='FDR')