Thursday, June 6, 2013

PCAs and reweights of the values for each species is finally done and in a format that can be easily used to run the evol.rate.mcmc()


Script to impute data:
#impute missing values by taking mean of all other values of of other species ofthat character 

i<-1 #set the index
species.removed<-c()
#sift through all babbler species
while (i <= length(babblers$UniqueID_1)){
  #missing wing lengths; if the Wing Length is NA for that value
  if(is.na(babblers$Wing.Length[i]) == TRUE){
    #select the wing lengths of all birds which match this species with missing wing length
    wing.species.vector<-babblers$Wing.Length[which(babblers$Species[i] == babblers$Species)]
    #make a new vector without the missing value
    wing.species.filler<-wing.species.vector[which(is.na(wing.species.vector) == FALSE)]
    #calculate the mean
    wing.impute<-mean(wing.species.filler)
    #fill in missing value with the mean for that bird
    babblers$Wing.Length[i]<-wing.impute
  }
  
  #repeat the above method for all characters
  if(is.na(babblers$Tail.Length[i]) == TRUE){
    tail.species.vector<-babblers$Tail.Length[which(babblers$Species[i] == babblers$Species)]
    tail.species.filler<-tail.species.vector[which(is.na(tail.species.vector) == FALSE)]
    tail.impute<-mean(tail.species.filler)
    babblers$Tail.Length[i]<-tail.impute
  }
  
  
  
  if(is.na(babblers$Bill.Length[i]) == TRUE){
    BL.species.vector<-babblers$Bill.Length[which(babblers$Species[i] == babblers$Species)]
    BL.species.filler<-BL.species.vector[which(is.na(BL.species.vector) == FALSE)]
    BL.impute<-mean(BL.species.filler)
    babblers$Bill.Length[i]<-BL.impute
  }
  
  if(is.na(babblers$Bill.Width[i]) == TRUE){
    BW.species.vector<-babblers$Bill.Width[which(babblers$Species[i] == babblers$Species)]
    BW.species.filler<-BW.species.vector[which(is.na(BW.species.vector) == FALSE)]
    BW.impute<-mean(BW.species.filler)
    babblers$Bill.Width[i]<-BW.impute
  }
  
  if(is.na(babblers$Bill.Depth[i]) == TRUE){
    BD.species.vector<-babblers$Bill.Depth[which(babblers$Species[i] == babblers$Species)]
    BD.species.filler<-BD.species.vector[which(is.na(BD.species.vector) == FALSE)]
    BD.impute<-mean(BD.species.filler)
    babblers$Bill.Depth[i]<-BD.impute
  }
  
  if(is.na(babblers$Tarsus.Length[i]) == TRUE){
    TL.species.vector<-babblers$Tarsus.Length[which(babblers$Species[i] == babblers$Species)]
    TL.species.filler<-TL.species.vector[which(is.na(TL.species.vector) == FALSE)]
    TL.impute<-mean(BD.species.filler)
    babblers$Tarsus.Length[i]<-TL.impute
  }
  
  if(is.na(babblers$Hallux[i]) == TRUE){
    hallux.species.vector<-babblers$Hallux[which(babblers$Species[i] == babblers$Species)]
    hallux.species.filler<-hallux.species.vector[which(is.na(hallux.species.vector) == FALSE)]
    hallux.impute<-mean(hallux.species.filler)
    babblers$Hallux[i]<-hallux.impute
  }

  i<-i+1
}

Script to scale by geometric mean:
#make empty columns for geometric mean scale
babblers$gm.wing<-0
babblers$gm.tail<-0
babblers$gm.BL<-0
babblers$gm.BW<-0
babblers$gm.BD<-0
babblers$gm.tarsus<-0
babblers$gm.hallux<-0

#Size-adjust the raw data by dividing each individual by the geometric mean 

j<-1
could.not.scale<-c()
while(j<=length(babblers$UniqueID_1)){
  #grab all the data of that species and put in vector
  temp<-c(babblers$Wing.Length[j], babblers$Tail.Length[j], babblers$Bill.Length[j],
          babblers$Bill.Width[j], babblers$Bill.Depth[j], babblers$Tarsus.Length[j], 
          babblers$Hallux[j])
  temp<-na.omit(temp)
  #calculate the geometric mean of meausrements for that species
  geo.mean<-exp(mean(log(temp)))
  
  if(is.nan(geo.mean)==FALSE){
  #divide all values for that species by the geometric mean
  geo.wing<-(babblers$Wing.Length[j])/geo.mean
  babblers$gm.wing[j]<-geo.wing
  
  geo.tail<-babblers$Tail.Length[j]/geo.mean
  babblers$gm.tail[j]<-geo.tail
  
  geo.BL<-babblers$Bill.Length[j]/geo.mean
  babblers$gm.BL[j]<-geo.BL
  
  geo.BW<-babblers$Bill.Width[j]/geo.mean
  babblers$gm.BW[j]<-geo.BW
  
  geo.BD<-babblers$Bill.Depth[j]/geo.mean
  babblers$gm.BD[j]<-geo.BD
  
  geo.tarsus<-babblers$Tarsus.Length[j]/geo.mean
  babblers$gm.tarsus[j]<-geo.tarsus
  
  geo.hallux<-babblers$Hallux[j]/geo.mean
  babblers$gm.hallux[j]<-geo.hallux
 }
 else{
    append(could.not.scale, babblers$UniqueID_1[j])
  }
  #reset the temp
  temp<-c()
  
  #increment to next species
  j<-j+1
}

cat("The following species could not be scaled:", could.not.scale, "\n")

Script to do PCA with geometric mean values and put rescaled scores into new data frame:
#PCA for all measurements
pca.result <- prcomp(~babblers$gm.wing + babblers$gm.tail + babblers$gm.BL + babblers$gm.BW + babblers$gm.BD + 
       babblers$gm.tarsus + babblers$gm.hallux, data = babblers, retx = T)

#standard deviation of PCA
sd <- pca.result$sdev
#make vector of weights
loadings <- pca.result$rotation
#value of the rotated data (scores)
scores <- pca.result$x

#make a new data frame
new.babblers<-as.data.frame(babblers)

#make empty columns in the data frame
new.babblers$pca1<-0
new.babblers$pca2<-0
new.babblers$pca3<-0
new.babblers$pca4<-0
new.babblers$pca5<-0
new.babblers$pca6<-0
new.babblers$pca7<-0

#put PCA Scores into the data frame
new.babblers$pca1<-scores[,1]
new.babblers$pca2<-scores[,2]
new.babblers$pca3<-scores[,3]
new.babblers$pca4<-scores[,4]
new.babblers$pca5<-scores[,5]
new.babblers$pca6<-scores[,6]
new.babblers$pca7<-scores[,7]

new.babblers$pca# are the traits that will be used in the Bayesian analysis.
This site was really useful for understanding PCA:
http://strata.uga.edu/software/pdf/pcaTutorial.pdf

No comments:

Post a Comment