I am writing a script (in python with R parts in pypeR), so I need to use a function in R that compares two models with a test for coefficient F.
Models are as follows:
Model 1: Response ~ Predictor A + Predictor B + Predictor C.... + Predictor n
Model 2: Response ~ Predictor 1
Together, the A+B+...n predictors make up Predictor 1 , so there is no embedding problem here (trust me).
When I pass Predictor A + Predictor B + Predictor C.... + Predictor n function I created, I think that it treats them as one variable (because the degrees of freedom are the same as Model 2 ). Perhaps this is because I use paste() ? In any case, the actual number of predictors in model 1 will vary in all runs (which is why I need it as a function), so I'm not sure how to do this except using paste() .
Keep in mind that in fact pasta cannot be a problem; I just wanted to tell people what I thought the problem might be.
Are there any suggestions as to how I can get the true residual deviation and degree of freedom for model 1 ? It could be a hack. For example, I simply subtracted length(vector of predictors) - 1 to get degrees of freedom. I have no idea what a similar hack for residual deviation will be.
Here is the function and example:
make_and_compare_models <- function(fitness_trait_name, data_frame_name, vector_for_multiple_regression, predictor_for_single_regression, fam){ fit1<-glm(formula=as.formula(paste(fitness_trait_name,"~", paste(vector_for_multiple_regression, sep="+"))), family=fam, data=data_frame_name) #print ('length of vector of predictors') additional.degrees.of.freedom.fit1<-length(vector_for_multiple_regression)-1 ##the paste above prevents R from recognizing all of the vectors as separate predictors. This -1 gives you the difference in parameter number between the two models. print ("summary fit 1") print(summary(fit1)) dev1<-(fit1$deviance) print ('residual deviance of fit1') print (dev1) print(fit1$df.residual) ##this is how I'd correct for degrees of freedom #df1=fit1$df.residual-additional.degrees.of.freedom.fit1 #fit1$df.residual=df1 ##if the old way df1=fit1$df.residual print(fit1$df.residual) print ('df1') print (df1) fit2<- glm(data=data_frame_name, formula=as.formula(paste(fitness_trait_name,"~",predictor_for_single_regression)), family=fam) print("summary fit 2") print(summary(fit2)) print ("deviance of fit2") dev2<-(fit2$deviance) print(dev2) df2=fit2$df.residual print ('df2') print (df2) F.ratio<-((dev2-dev1)/(df2-df1))/(dev1/df1) print('F.ratio') print(F.ratio) new.p<-1-pf(F.ratio,abs(df1-df2),max(df2,df1)) print('new.p') print(new.p) } data <- structure(list(ID = c(1L, 2L, 4L, 7L, 9L, 10L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 20L, 21L, 22L, 23L, 24L, 25L, 27L, 28L, 29L, 31L, 34L, 37L, 38L, 39L, 40L, 41L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 52L, 55L, 56L, 59L, 60L, 61L, 62L, 63L, 65L, 66L, 67L, 68L, 69L, 71L), QnWeight_initial = c(158L, 165L, 137L, 150L, 153L, 137L, 158L, 163L, 159L, 151L, 145L, 144L, 157L, 144L, 133L, 148L, 151L, 151L, 147L, 158L, 178L, 164L, 134L, 151L, 148L, 142L, 127L, 179L, 162L, 150L, 151L, 153L, 163L, 155L, 163L, 170L, 149L, 165L, 128L, 134L, 145L, 147L, 148L, 160L, 131L, 155L, 169L, 143L, 123L, 151L), Survived_eclosion = c(0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Days_wrkr_eclosion_minus20 = c(NA, 1L, NA, 3L, 0L, 2L, 0L, 1L, 0L, 0L, 0L, 1L, NA, 0L, 7L, 1L, 0L, 1L, 0L, 1L, 2L, 2L, NA, 2L, 3L, 2L, 2L, NA, 0L, 1L, NA, NA, 0L, 0L, 0L, 0L, 3L, 3L, 3L, 1L, 0L, 2L, NA, 1L, 0L, 1L, 1L, 3L, 1L, 2L), MLH = c(0.5, 0.666666667, 0.555555556, 0.25, 1, 0.5, 0.333333333, 0.7, 0.5, 0.7, 0.5, 0.666666667, 0.375, 0.4, 0.5, 0.333333333, 0.4, 0.375, 0.3, 0.5, 0.3, 0.2, 0.4, 0.875, 0.6, 0.4, 0.222222222, 0.222222222, 0.6, 0.6, 0.3, 0.4, 0.714285714, 0.4, 0.3, 0.6, 0.4, 0.7, 0.625, 0.555555556, 0.25, 0.5, 0.5, 0.6, 0.25, 0.428571429, 0.3, 0.25, 0.375, 0.555555556), Acon5 = c(0.35387674, 0.35387674, 0.35387674, 0.35387674, 0.35387674, 0.35387674, 0.35387674, 0, 0, 1, 0, 1, 0.35387674, 0, 0, 0.35387674, 1, 1, 0, 0, 0, 1, 0, 0.35387674, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0.35387674), Baez = c(1, 1, 1, 0.467836257, 1, 1, 0, 0, 1, 1, 0, 0.467836257, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0.467836257, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1), C294 = c(0, 1, 0, 0, 1, 0.582542694, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0.582542694, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1), C316 = c(1, 1, 0, 0, 0.519685039, 0.519685039, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0.519685039, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0.519685039, 1, 0, 1, 1, 0, 0.519685039, 1, 0.519685039, 1, 1, 1, 0.519685039, 0.519685039, 0, 0.519685039, 0.519685039, 0), i_120_PigTail = c(1, 1, 0, 1, 0.631236443, 0.631236443, 1, 1, 1, 1, 1, 0, 0.631236443, 1, 1, 1, 0, 0.631236443, 1, 1, 1, 0, 0, 1, 1, 1, 0.631236443, 0, 1, 1, 0, 1, 0.631236443, 1, 0, 1, 0, 0, 1, 0.631236443, 0.631236443, 0, 1, 0, 0.631236443, 0.631236443, 1, 0.631236443, 0.631236443, 1), i129 = c(0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Jackstraw_PigTail = c(0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Neil_Young = c(0.529636711, 0, 1, 0, 0.529636711, 0.529636711, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1), Ramble = c(0, 0, 0, 0, 0.215163934, 0.215163934, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0.215163934, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0.215163934, 0, 0, 0, 0), Sol_18 = c(1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0.404669261, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1)), .Names = c("ID", "QnWeight_initial", "Survived_eclosion", "Days_wrkr_eclosion_minus20", "MLH", "Acon5", "Baez", "C294", "C316", "i_120_PigTail", "i129", "Jackstraw_PigTail", "Neil_Young", "Ramble", "Sol_18"), class = "data.frame", row.names = c(NA, -50L)) make_and_compare_models("QnWeight_initial", data, c("Acon5","Baez","C294","C316","i_120_PigTail","i129","Jackstraw_PigTail","Neil_Young","Ramble","Sol_18"), "MLH", "gaussian")