PCAs and reweights of the values for each species is finally done and in a format that can be easily used to run the evol.rate.mcmc()
Script to impute data:
#impute missing values by taking mean of all other values of of other species ofthat character
i<-1 #set the index
species.removed<-c()
#sift through all babbler species
while (i <= length(babblers$UniqueID_1)){
#missing wing lengths; if the Wing Length is NA for that value
if(is.na(babblers$Wing.Length[i]) == TRUE){
#select the wing lengths of all birds which match this species with missing wing length
wing.species.vector<-babblers$Wing.Length[which(babblers$Species[i] == babblers$Species)]
#make a new vector without the missing value
wing.species.filler<-wing.species.vector[which(is.na(wing.species.vector) == FALSE)]
#calculate the mean
wing.impute<-mean(wing.species.filler)
#fill in missing value with the mean for that bird
babblers$Wing.Length[i]<-wing.impute
}
#repeat the above method for all characters
if(is.na(babblers$Tail.Length[i]) == TRUE){
tail.species.vector<-babblers$Tail.Length[which(babblers$Species[i] == babblers$Species)]
tail.species.filler<-tail.species.vector[which(is.na(tail.species.vector) == FALSE)]
tail.impute<-mean(tail.species.filler)
babblers$Tail.Length[i]<-tail.impute
}
if(is.na(babblers$Bill.Length[i]) == TRUE){
BL.species.vector<-babblers$Bill.Length[which(babblers$Species[i] == babblers$Species)]
BL.species.filler<-BL.species.vector[which(is.na(BL.species.vector) == FALSE)]
BL.impute<-mean(BL.species.filler)
babblers$Bill.Length[i]<-BL.impute
}
if(is.na(babblers$Bill.Width[i]) == TRUE){
BW.species.vector<-babblers$Bill.Width[which(babblers$Species[i] == babblers$Species)]
BW.species.filler<-BW.species.vector[which(is.na(BW.species.vector) == FALSE)]
BW.impute<-mean(BW.species.filler)
babblers$Bill.Width[i]<-BW.impute
}
if(is.na(babblers$Bill.Depth[i]) == TRUE){
BD.species.vector<-babblers$Bill.Depth[which(babblers$Species[i] == babblers$Species)]
BD.species.filler<-BD.species.vector[which(is.na(BD.species.vector) == FALSE)]
BD.impute<-mean(BD.species.filler)
babblers$Bill.Depth[i]<-BD.impute
}
if(is.na(babblers$Tarsus.Length[i]) == TRUE){
TL.species.vector<-babblers$Tarsus.Length[which(babblers$Species[i] == babblers$Species)]
TL.species.filler<-TL.species.vector[which(is.na(TL.species.vector) == FALSE)]
TL.impute<-mean(BD.species.filler)
babblers$Tarsus.Length[i]<-TL.impute
}
if(is.na(babblers$Hallux[i]) == TRUE){
hallux.species.vector<-babblers$Hallux[which(babblers$Species[i] == babblers$Species)]
hallux.species.filler<-hallux.species.vector[which(is.na(hallux.species.vector) == FALSE)]
hallux.impute<-mean(hallux.species.filler)
babblers$Hallux[i]<-hallux.impute
}
i<-i+1
}
Script to scale by geometric mean:
#make empty columns for geometric mean scale
babblers$gm.wing<-0
babblers$gm.tail<-0
babblers$gm.BL<-0
babblers$gm.BW<-0
babblers$gm.BD<-0
babblers$gm.tarsus<-0
babblers$gm.hallux<-0
#Size-adjust the raw data by dividing each individual by the geometric mean
j<-1
could.not.scale<-c()
while(j<=length(babblers$UniqueID_1)){
#grab all the data of that species and put in vector
temp<-c(babblers$Wing.Length[j], babblers$Tail.Length[j], babblers$Bill.Length[j],
babblers$Bill.Width[j], babblers$Bill.Depth[j], babblers$Tarsus.Length[j],
babblers$Hallux[j])
temp<-na.omit(temp)
#calculate the geometric mean of meausrements for that species
geo.mean<-exp(mean(log(temp)))
if(is.nan(geo.mean)==FALSE){
#divide all values for that species by the geometric mean
geo.wing<-(babblers$Wing.Length[j])/geo.mean
babblers$gm.wing[j]<-geo.wing
geo.tail<-babblers$Tail.Length[j]/geo.mean
babblers$gm.tail[j]<-geo.tail
geo.BL<-babblers$Bill.Length[j]/geo.mean
babblers$gm.BL[j]<-geo.BL
geo.BW<-babblers$Bill.Width[j]/geo.mean
babblers$gm.BW[j]<-geo.BW
geo.BD<-babblers$Bill.Depth[j]/geo.mean
babblers$gm.BD[j]<-geo.BD
geo.tarsus<-babblers$Tarsus.Length[j]/geo.mean
babblers$gm.tarsus[j]<-geo.tarsus
geo.hallux<-babblers$Hallux[j]/geo.mean
babblers$gm.hallux[j]<-geo.hallux
}
else{
append(could.not.scale, babblers$UniqueID_1[j])
}
#reset the temp
temp<-c()
#increment to next species
j<-j+1
}
cat("The following species could not be scaled:", could.not.scale, "\n")
Script to do PCA with geometric mean values and put rescaled scores into new data frame:
#PCA for all measurements
pca.result <- prcomp(~babblers$gm.wing + babblers$gm.tail + babblers$gm.BL + babblers$gm.BW + babblers$gm.BD +
babblers$gm.tarsus + babblers$gm.hallux, data = babblers, retx = T)
#standard deviation of PCA
sd <- pca.result$sdev
#make vector of weights
loadings <- pca.result$rotation
#value of the rotated data (scores)
scores <- pca.result$x
#make a new data frame
new.babblers<-as.data.frame(babblers)
#make empty columns in the data frame
new.babblers$pca1<-0
new.babblers$pca2<-0
new.babblers$pca3<-0
new.babblers$pca4<-0
new.babblers$pca5<-0
new.babblers$pca6<-0
new.babblers$pca7<-0
#put PCA Scores into the data frame
new.babblers$pca1<-scores[,1]
new.babblers$pca2<-scores[,2]
new.babblers$pca3<-scores[,3]
new.babblers$pca4<-scores[,4]
new.babblers$pca5<-scores[,5]
new.babblers$pca6<-scores[,6]
new.babblers$pca7<-scores[,7]
new.babblers$pca# are the traits that will be used in the Bayesian analysis.
This site was really useful for understanding PCA:
http://strata.uga.edu/software/pdf/pcaTutorial.pdf
This site was really useful for understanding PCA:
http://strata.uga.edu/software/pdf/pcaTutorial.pdf
No comments:
Post a Comment