## To execute any commmand, highlight it with the mouse and hit Control r.

##########################################################################
## DO THIS ALWAYS AT START
##########################################################################

##If your computer doesn't have these, go to the Packages menu, pick a CRAN mirror site
##and download the necessary "packages"; i.e. languageR and arm.
##Then run these commands, as above, to get them into your computer's memory.

library(languageR)
library(arm)

##Set working directory

setwd("C:/Dropbox/AR/TheCoinagesInSeuss/Phonesthemes/Analysis/FullModel/R")
 
##########################################################################
## Deal with your data file
##########################################################################

## You want a plain-text input file where all the columns are labeled, and the separator of columns is
## a tab.  

##WARNINGS ABOUT INPUT FILE FORMAT

##These issues can cause huge amounts of wasted time!

##CAUTION:  Column headers should be extremely plain; it's best to use nothing but letters (you can
##  also use noninitial digits.  
##CAUTION:  The same is true of your data/candidates:  it is wise to use nothing but letters and numbers.
##CAUTION:  R is case-sensitive; always check variable names with care.
##CAUTION:  apostrophes anywhere in your file will create chaos; remove or replace them before proceeding.

##INFO

## Here is the command to read a data file.  Of course, you have to change it to match the file you have.
## sep="t" is needed so that it will assume that tab is the column separator.

##The command reads the data file and loads it into a variable, MyData.

MyData=read.table("RFileForFullModel.txt", header=TRUE, sep="\t")

##########################################################################
## Eyeballing your data with contingency tables NOT USED HERE
##########################################################################

## This command gives you a list of all the variables you have available from your file to work with -- i.e. the column headers.
colnames(MyData)

## Contingency tables.
## Put the dependent variable first.
## In the schema, Line 1 performs the calculation, Line 2 gives raw counts, Line 3 the proportions, and Line 4 the chi-square.

## ALWAYS DO THIS FIRST.
attach(MyData)

## Dependent variable with one independent variable.  The code further down creates a graph.
writeLines("") 
MyTable = xtabs( ~ SuffixIden + WeightToStress)

MyTable = xtabs( ~ SuffixIden + StressHigh)
MyTable
round(prop.table(MyTable), digits = 3)
summary(MyTable)

##This makes a table with the relative proportions.
PropTable = prop.table(MyTable, 2)
round(PropTable, digits=3)
ftable(PropTable)
barplot(PropTable)

##Try this plotting method:
Gobble.model.2<-lm(Sleepyyy~Turkeyyy, data=dataset.gobble2)
summary(Gobble.model.2)
plot(TurkeyTime, NapTime, main="Scatterplot of Thanksgiving", 
    xlab="Turkey Consumption in Grams ", ylab="Sleep Time in Minutes ", pch=19)
Source:
https://ademos.people.uic.edu/Chapter12.html

##########################################################################
## Logistic regression
##########################################################################

## Here is how you set up the logistic regression model.
## For linguistics, the best r function for logistic regression is probably bayesglm().
## This is because there are often exceptionless principles--
##   you don't want the weights to go sky high without good justification.
##   bayesglm() employs a prior to enforce this principle
## The reference source for bayesglm() is http://www.stat.columbia.edu/~gelman/research/unpublished/priors7.pdf.
## If you want, you can leave out the word "bayes" in this command and get classical glm instead.

#########################################################
##---SEUSS MODELING---
#########################################################

   ##You can look at the column names with this command:
      colnames(MyData)

   ## You can use this command to check the content of the first few lines.  Add n = number to get a different number of lines.
      head(MyData)
   ## If you're having trouble, the following is useful for debugging bad input files -- it makes a printout of what R thinks 
   ## it's working with.
      write.table(MyData, file = "Debug.txt", sep="\t")

##Repeated for convenience.
colnames(MyData)

##We start by trying all the constraints, then trim back gradually
##to get a model where every constraint has a meaning contribution.

##Start by trying everything.
MyModel = bayesglm(Status ~ Vowel_AA +
Vowel_AE +
Vowel_AH +
Vowel_AO +
Vowel_AY +
Vowel_UW +
Vowel_EH +
Vowel_IY +
Vowel_ER +
Vowel_IH +
Vowel_OW +
Vowel_OY +
Vowel_EY +
Vowel_AW +
Vowel_UH +
InOns_ +
InOns_B +
InOns_BL +
InOns_BR +
InOns_CH +
InOns_D +
InOns_DR +
InOns_F +
InOns_FL +
InOns_FR +
InOns_G +
InOns_GL +
InOns_GR +
InOns_GW +
InOns_HH +
InOns_JH +
InOns_K +
InOns_KL +
InOns_KR +
InOns_KW +
InOns_L +
InOns_M +
InOns_N +
InOns_P +
InOns_PL +
InOns_PR +
InOns_R +
InOns_S +
InOns_SF +
InOns_SK +
InOns_SKR +
InOns_SKW +
InOns_SL +
InOns_SM +
InOns_SN +
InOns_SP +
InOns_SPR +
InOns_STR +
InOns_SW +
InOns_SHL +
InOns_SHN +
InOns_T +
InOns_TR +
InOns_TW +
InOns_TH +
InOns_THN +
InOns_THR +
InOns_THW +
InOns_V +
InOns_VL +
InOns_VR +
InOns_W +
InOns_Y +
InOns_Z +
InOns_ZL +
InOns_BY +
InOns_SH +
InOns_KY +
InOns_DW +
InOns_FY +
InOns_ZH +
InOns_GY +
InOns_HHY +
InOns_MY +
InOns_PY +
InOns_SKL +
InOns_SHR +
InOns_SKY +
InOns_SPY +
InOns_SPL +
InOns_ST +
InOns_DH +
InOns_VY +
InOns_ZW +
PF +
TS +
AH0MAH0 +
AH1M +
AH1MP +
AH1NG +
AH1P +
GAH0L +
IH1NG +
IH1NGK +
LAH1 +
MPF +
NG +
NGG +
MedLapse +
MonoSyl +
SylsOverTwo +
VcdOb +
Fin_AA1P +
Fin_AE1SH +
Fin_AE1TER0 +
Fin_AH0L +
Fin_AW1NS +
Fin_EH1R +
Fin_ER1L +
Fin_F +
Fin_IY1M +
Fin_KS +
Fin_NG +
Fin_P +
Fin_PS +
Fin_TS +
Fin_UW1 +
Fin_AE1K +
Fin_AE1M +
Fin_AE1MP +
Fin_AE1P +
Fin_AE1SH +
Fin_AE1SP +
Fin_AO1L +
Fin_IH1K +
Fin_IH1NJH +
Fin_IH1P +
Fin_IH1SP +
Fin_IH1NGK +
Fin_ER1L +
Fin_OY1L +
Fin_UW1P +
Fin_AW1L +
Fin_AH1MP +
Fin_AH1ST +
Init_UW1 +
Init_MUH1, data = MyData, family="binomial")
summary(MyModel)
##We can't step on this; it's too big.
step <- stepAIC(MyModel)
step

##Now removing the constraints that failed to test as significant 
##at the .05 level.
MyModel = bayesglm(Status ~ Vowel_AH +
Vowel_UW +
Vowel_IH +
InOns_ +
InOns_BL +
InOns_F +
InOns_FL +
InOns_G +
InOns_GL +
InOns_GR +
InOns_GW +
InOns_K +
InOns_KW +
InOns_L +
InOns_N +
InOns_R +
InOns_S +
InOns_SKR +
InOns_SN +
InOns_SHL +
InOns_SHN +
InOns_THN +
InOns_THW +
InOns_VL +
InOns_VR +
InOns_W +
InOns_Y +
InOns_Z +
InOns_ZL +
InOns_SH +
InOns_ST +
PF +
TS +
AH0MAH0 +
AH1M +
AH1P +
IH1NGK +
MedLapse +
SylsOverTwo +
VcdOb +
Fin_AA1P +
Fin_AH0L +
Fin_AW1NS +
Fin_F +
Fin_KS +
Fin_P +
Fin_PS +
Fin_TS +
Fin_UW1 +
Init_UW1, data = MyData, family="binomial")
summary(MyModel)
##Try stepping.
step <- stepAIC(MyModel)
step

##It seems extravagant to have three constraints regulating UW1. Let's
##just use the context-free one.
MyModel = bayesglm(Status ~ Vowel_AH +
Vowel_UW +
Vowel_IH +
InOns_ +
InOns_BL +
InOns_F +
InOns_FL +
InOns_G +
InOns_GL +
InOns_GR +
InOns_GW +
InOns_K +
InOns_KW +
InOns_L +
InOns_N +
InOns_R +
InOns_S +
InOns_SKR +
InOns_SN +
InOns_SHL +
InOns_SHN +
InOns_THN +
InOns_THW +
InOns_VL +
InOns_VR +
InOns_W +
InOns_Y +
InOns_Z +
InOns_ZL +
InOns_SH +
InOns_ST +
PF +
TS +
AH0MAH0 +
AH1M +
AH1P +
IH1NGK +
MedLapse +
SylsOverTwo +
VcdOb +
Fin_AA1P +
Fin_AH0L +
Fin_AW1NS +
Fin_F +
Fin_KS +
Fin_P +
Fin_PS +
Fin_TS, data = MyData, family="binomial")
summary(MyModel)
##Try stepping.
step <- stepAIC(MyModel)
step

##The step procedure killed off AWNS, so try now:
MyModel = bayesglm(Status ~ Vowel_AH +
Vowel_UW +
Vowel_IH +
InOns_ +
InOns_BL +
InOns_F +
InOns_FL +
InOns_G +
InOns_GL +
InOns_GR +
InOns_GW +
InOns_K +
InOns_KW +
InOns_L +
InOns_N +
InOns_R +
InOns_S +
InOns_SKR +
InOns_SN +
InOns_SHL +
InOns_SHN +
InOns_THN +
InOns_THW +
InOns_VL +
InOns_VR +
InOns_W +
InOns_Y +
InOns_Z +
InOns_ZL +
InOns_SH +
InOns_ST +
PF +
TS +
AH0MAH0 +
AH1M +
AH1P +
IH1NGK +
MedLapse +
SylsOverTwo +
VcdOb +
Fin_AA1P +
Fin_AH0L +
Fin_F +
Fin_KS +
Fin_P +
Fin_PS +
Fin_TS, data = MyData, family="binomial")
summary(MyModel)
step <- stepAIC(MyModel)
step

##The step procedure killed off InOnsL, so try now:
MyModel = bayesglm(Status ~ Vowel_AH +
Vowel_UW +
Vowel_IH +
InOns_ +
InOns_BL +
InOns_F +
InOns_FL +
InOns_G +
InOns_GL +
InOns_GR +
InOns_GW +
InOns_K +
InOns_KW +
InOns_N +
InOns_R +
InOns_S +
InOns_SKR +
InOns_SN +
InOns_SHL +
InOns_SHN +
InOns_THN +
InOns_THW +
InOns_VL +
InOns_VR +
InOns_W +
InOns_Y +
InOns_Z +
InOns_ZL +
InOns_SH +
InOns_ST +
PF +
TS +
AH0MAH0 +
AH1M +
AH1P +
IH1NGK +
MedLapse +
SylsOverTwo +
VcdOb +
Fin_AA1P +
Fin_AH0L +
Fin_F +
Fin_KS +
Fin_P +
Fin_PS +
Fin_TS, data = MyData, family="binomial")
summary(MyModel)
step <- stepAIC(MyModel)
step

##The step procedure just killed UH1M
MyModel = bayesglm(Status ~ Vowel_AH +
Vowel_UW +
Vowel_IH +
InOns_ +
InOns_BL +
InOns_F +
InOns_FL +
InOns_G +
InOns_GL +
InOns_GR +
InOns_GW +
InOns_K +
InOns_KW +
InOns_N +
InOns_R +
InOns_S +
InOns_SKR +
InOns_SN +
InOns_SHL +
InOns_SHN +
InOns_THN +
InOns_THW +
InOns_VL +
InOns_VR +
InOns_W +
InOns_Y +
InOns_Z +
InOns_ZL +
InOns_SH +
InOns_ST +
PF +
TS +
AH0MAH0 +
AH1P +
IH1NGK +
MedLapse +
SylsOverTwo +
VcdOb +
Fin_AA1P +
Fin_AH0L +
Fin_F +
Fin_KS +
Fin_P +
Fin_PS +
Fin_TS, data = MyData, family="binomial")
summary(MyModel)
step <- stepAIC(MyModel, direction = 'backward')
summary(step)
##This one is stable -- the step procedure doesn't kill any more constraints.
##AIC is 2966.1


------------------------

## This command merely reports the weights that were found:
MyModel

## This one is nicer, because it also gives you a significance test for each weight:
summary(MyModel)

## Print out the model's predictions.
## This next line uses the actual formula for logistic regression to create probabilities, 
## and put the computed probabilities into a new column in MyData.
MyData$Prediction <- exp(predict(MyModel)) / (1 + exp(predict(MyModel)))
## Print the result out as a tab-delimited file.
## The bit with col.names is to compensate for a bug in R; it will otherwise misalign your column names.
write.table(MyData, sep="\t", file = "ModelPredictions.txt", col.names=NA)

## Make a spreadsheet of the grammar.
idx <- coef(summary(MyModel))   
idx          
MyConstraints = round(idx, digits=3)##
write.table(MyConstraints, sep="\t", file = "ConstraintsAndWeights.txt", col.names=NA)

##########################################################################
## The likelihood ratio test
##########################################################################

## The likelihood ratio test

library(car)
Anova(MyModel, type=2)
warnings()












