#Monte Carlo simulation looking at whether coefficient magnitude is affected by number of levels
#It is if the IV's are categorical (in favor of predictor with more values)
#The exponentiation is done to convert the coefficients into varbrule factor weights

#This is the single-level (individual) simulation
nReps<-1000
nVals1<-3
nVals2<-8
nPerCell<-10
nCells<-nVals1*nVals2
SubjCoeffs<-mat.or.vec(i,2)
for(i in 1:nReps)
{
		p_yes_response<-rnorm(1,mean=0.7,sd=0.2)
		if (p_yes_response > 1) {p_yes_response<-1}
		if (p_yes_response < 0) {p_yes_response<-0}
		fakeSample<-rbinom(nCells,nPerCell,p_yes_response)
		fakeIV2<-as.factor(rep(0:(nVals1-1),each=nVals2))
		fakeIV8<-rep(0:(nVals2-1),each=nVals1)[sample(nCells,nCells)]
		fakeIV8<-as.factor(fakeIV8)
		fake.glm<-glm(cbind(fakeSample, 10-fakeSample) ~ fakeIV2 + fakeIV8, family=binomial(link="logit"))
		fakeIV2_missing<-sum(fake.glm$coefficients[2:nVals1])*(-1)
		fakeIV8_missing<-sum(fake.glm$coefficients[(nVals1+1):(nVals1+nVals2-1)])*(-1)
		coefficients<-c(fake.glm$coefficients[2:nVals1],fakeIV2_missing,fake.glm$coefficients[(nVals1+1):(nVals1+nVals2-1)],fakeIV8_missing)
		factor.wts<-exp(coefficients)/(1+exp(coefficients))
		SubjCoeffs[i,1]<- max(factor.wts[1:nVals1])-min(factor.wts[1:nVals1])
		SubjCoeffs[i,2]<- max(factor.wts[(nVals1+1):(nVals1+nVals2)])-min(factor.wts[(nVals1+1):(nVals1+nVals2)])	
}
hist(SubjCoeffs[,1]-SubjCoeffs[,2],xlab="Individual subjects' differences between ranges of factor 1 and factor 2",ylab="Number of simulated subjects",main="")
t.test(SubjCoeffs[,1]-SubjCoeffs[,2])

#Simulation for groups
numSpeakers<-20
nReps<-1000
nVals1<-3
nVals2<-8
nPerCell<-10
nCells<-nVals1*nVals2
pVec<-rep(2,nReps)
for(i in 1:nReps)
{
	SubjCoeffs<-mat.or.vec(numSpeakers,2)
	for (j in 1:numSpeakers)
	{
		p_yes_response<-rnorm(1,mean=0.5,sd=0.1)
		if (p_yes_response > 1) {p_yes_response<-1}
		if (p_yes_response < 0) {p_yes_response<-0}
		fakeSample<-rbinom(nCells,nPerCell,p_yes_response)
		fakeIV2<-as.factor(rep(0:(nVals1-1),each=nVals2))
		fakeIV8<-rep(0:(nVals2-1),each=nVals1)[sample(nCells,nCells)]
		fakeIV8<-as.factor(fakeIV8)
		fake.glm<-glm(cbind(fakeSample, 10-fakeSample) ~ fakeIV2 + fakeIV8, family=binomial(link="logit"))
		fakeIV2_missing<-sum(fake.glm$coefficients[2:nVals1])*(-1)
		fakeIV8_missing<-sum(fake.glm$coefficients[(nVals1+1):(nVals1+nVals2-1)])*(-1)
		coefficients<-c(fake.glm$coefficients[2:nVals1],fakeIV2_missing,fake.glm$coefficients[(nVals1+1):(nVals1+nVals2-1)],fakeIV8_missing)
		factor.wts<-exp(coefficients)/(1+exp(coefficients))
		SubjCoeffs[j,1]<- max(factor.wts[1:nVals1])-min(factor.wts[1:nVals1])
		SubjCoeffs[j,2]<- max(factor.wts[(nVals1+1):(nVals1+nVals2)])-min(factor.wts[(nVals1+1):(nVals1+nVals2)])	
	}	
	pVec[i]<-t.test(SubjCoeffs[,1]-SubjCoeffs[,2])$statistic
}
hist(pVec, xlab="t",ylab="Frequency in the 1000 samples from the null",main="")
t.test(pVec)


#Numerical predictors: no bias
numSpeakers<-20
nReps<-1000
nVals1<-3
nVals2<-8
nPerCell<-10
nCells<-nVals1*nVals2
pVec<-rep(2,nReps)
for(i in 1:nReps)
{
	SubjCoeffs<-mat.or.vec(numSpeakers,2)
	for (j in 1:numSpeakers)
	{
		p_yes_response<-rnorm(1,mean=0.5,sd=0.1)
		if (p_yes_response > 1) {p_yes_response<-1}
		if (p_yes_response < 0) {p_yes_response<-0}
		fakeSample<-rbinom(nCells,nPerCell,p_yes_response)
		fakeIV2<-rep(c(0,50,100),each=nVals2)
		fakeIV8<-rep(c(71,121,171,212,254,304,354,418),each=nVals1)[sample(nCells,nCells)]
		fake.glm<-glm(cbind(fakeSample, 10-fakeSample) ~ fakeIV2 + fakeIV8, family=binomial(link="logit"))
		factor.wts<-exp(fake.glm$coefficients)/(1+exp(fake.glm$coefficients))
		SubjCoeffs[j,1]<- abs(factor.wts[2])
		SubjCoeffs[j,2]<- abs(factor.wts[3])
	}	
	pVec[i]<-t.test(SubjCoeffs[,1]-SubjCoeffs[,2])$statistic
}
hist(pVec, xlab="t",ylab="Frequency in the 1000 samples from the null",main="")
t.test(pVec)