#Monte Carlo simulation looking at whether coefficient magnitude is affected by number of levels #It is if the IV's are categorical (in favor of predictor with more values) #The exponentiation is done to convert the coefficients into varbrule factor weights #This is the single-level (individual) simulation nReps<-1000 nVals1<-3 nVals2<-8 nPerCell<-10 nCells<-nVals1*nVals2 SubjCoeffs<-mat.or.vec(i,2) for(i in 1:nReps) { p_yes_response<-rnorm(1,mean=0.7,sd=0.2) if (p_yes_response > 1) {p_yes_response<-1} if (p_yes_response < 0) {p_yes_response<-0} fakeSample<-rbinom(nCells,nPerCell,p_yes_response) fakeIV2<-as.factor(rep(0:(nVals1-1),each=nVals2)) fakeIV8<-rep(0:(nVals2-1),each=nVals1)[sample(nCells,nCells)] fakeIV8<-as.factor(fakeIV8) fake.glm<-glm(cbind(fakeSample, 10-fakeSample) ~ fakeIV2 + fakeIV8, family=binomial(link="logit")) fakeIV2_missing<-sum(fake.glm$coefficients[2:nVals1])*(-1) fakeIV8_missing<-sum(fake.glm$coefficients[(nVals1+1):(nVals1+nVals2-1)])*(-1) coefficients<-c(fake.glm$coefficients[2:nVals1],fakeIV2_missing,fake.glm$coefficients[(nVals1+1):(nVals1+nVals2-1)],fakeIV8_missing) factor.wts<-exp(coefficients)/(1+exp(coefficients)) SubjCoeffs[i,1]<- max(factor.wts[1:nVals1])-min(factor.wts[1:nVals1]) SubjCoeffs[i,2]<- max(factor.wts[(nVals1+1):(nVals1+nVals2)])-min(factor.wts[(nVals1+1):(nVals1+nVals2)]) } hist(SubjCoeffs[,1]-SubjCoeffs[,2],xlab="Individual subjects' differences between ranges of factor 1 and factor 2",ylab="Number of simulated subjects",main="") t.test(SubjCoeffs[,1]-SubjCoeffs[,2]) #Simulation for groups numSpeakers<-20 nReps<-1000 nVals1<-3 nVals2<-8 nPerCell<-10 nCells<-nVals1*nVals2 pVec<-rep(2,nReps) for(i in 1:nReps) { SubjCoeffs<-mat.or.vec(numSpeakers,2) for (j in 1:numSpeakers) { p_yes_response<-rnorm(1,mean=0.5,sd=0.1) if (p_yes_response > 1) {p_yes_response<-1} if (p_yes_response < 0) {p_yes_response<-0} fakeSample<-rbinom(nCells,nPerCell,p_yes_response) fakeIV2<-as.factor(rep(0:(nVals1-1),each=nVals2)) fakeIV8<-rep(0:(nVals2-1),each=nVals1)[sample(nCells,nCells)] fakeIV8<-as.factor(fakeIV8) fake.glm<-glm(cbind(fakeSample, 10-fakeSample) ~ fakeIV2 + fakeIV8, family=binomial(link="logit")) fakeIV2_missing<-sum(fake.glm$coefficients[2:nVals1])*(-1) fakeIV8_missing<-sum(fake.glm$coefficients[(nVals1+1):(nVals1+nVals2-1)])*(-1) coefficients<-c(fake.glm$coefficients[2:nVals1],fakeIV2_missing,fake.glm$coefficients[(nVals1+1):(nVals1+nVals2-1)],fakeIV8_missing) factor.wts<-exp(coefficients)/(1+exp(coefficients)) SubjCoeffs[j,1]<- max(factor.wts[1:nVals1])-min(factor.wts[1:nVals1]) SubjCoeffs[j,2]<- max(factor.wts[(nVals1+1):(nVals1+nVals2)])-min(factor.wts[(nVals1+1):(nVals1+nVals2)]) } pVec[i]<-t.test(SubjCoeffs[,1]-SubjCoeffs[,2])$statistic } hist(pVec, xlab="t",ylab="Frequency in the 1000 samples from the null",main="") t.test(pVec) #Numerical predictors: no bias numSpeakers<-20 nReps<-1000 nVals1<-3 nVals2<-8 nPerCell<-10 nCells<-nVals1*nVals2 pVec<-rep(2,nReps) for(i in 1:nReps) { SubjCoeffs<-mat.or.vec(numSpeakers,2) for (j in 1:numSpeakers) { p_yes_response<-rnorm(1,mean=0.5,sd=0.1) if (p_yes_response > 1) {p_yes_response<-1} if (p_yes_response < 0) {p_yes_response<-0} fakeSample<-rbinom(nCells,nPerCell,p_yes_response) fakeIV2<-rep(c(0,50,100),each=nVals2) fakeIV8<-rep(c(71,121,171,212,254,304,354,418),each=nVals1)[sample(nCells,nCells)] fake.glm<-glm(cbind(fakeSample, 10-fakeSample) ~ fakeIV2 + fakeIV8, family=binomial(link="logit")) factor.wts<-exp(fake.glm$coefficients)/(1+exp(fake.glm$coefficients)) SubjCoeffs[j,1]<- abs(factor.wts[2]) SubjCoeffs[j,2]<- abs(factor.wts[3]) } pVec[i]<-t.test(SubjCoeffs[,1]-SubjCoeffs[,2])$statistic } hist(pVec, xlab="t",ylab="Frequency in the 1000 samples from the null",main="") t.test(pVec)