Had a minor panic attack because I realized my missing tarsus values were being imputed with mean of the bill length which of course would lead to extreme variance in the trait. Anyways, fixed it right up and still got somewhat similar results. Whew.
New PCA result:
Fixed script:
#impute missing values by taking mean of all other values of of other species ofthat character
i<-1 #set the index
species.removed<-c()
#sift through all babbler species
while (i <= length(babblers$UniqueID_1)){
#missing wing lengths; if the Wing Length is NA for that value
if(is.na(babblers$Wing.Length[i]) == TRUE){
#select the wing lengths of all birds which match this species with missing wing length
wing.species.vector<-babblers$Wing.Length[which(babblers$Species[i] == babblers$Species)]
#make a new vector without the missing value
wing.species.filler<-wing.species.vector[which(is.na(wing.species.vector) == FALSE)]
#calculate the mean
wing.impute<-mean(wing.species.filler)
#fill in missing value with the mean for that bird
babblers$Wing.Length[i]<-wing.impute
}
#repeat the above method for all characters
if(is.na(babblers$Tail.Length[i]) == TRUE){
tail.species.vector<-babblers$Tail.Length[which(babblers$Species[i] == babblers$Species)]
tail.species.filler<-tail.species.vector[which(is.na(tail.species.vector) == FALSE)]
tail.impute<-mean(tail.species.filler)
babblers$Tail.Length[i]<-tail.impute
}
if(is.na(babblers$Bill.Length[i]) == TRUE){
BL.species.vector<-babblers$Bill.Length[which(babblers$Species[i] == babblers$Species)]
BL.species.filler<-BL.species.vector[which(is.na(BL.species.vector) == FALSE)]
BL.impute<-mean(BL.species.filler)
babblers$Bill.Length[i]<-BL.impute
}
if(is.na(babblers$Bill.Width[i]) == TRUE){
BW.species.vector<-babblers$Bill.Width[which(babblers$Species[i] == babblers$Species)]
BW.species.filler<-BW.species.vector[which(is.na(BW.species.vector) == FALSE)]
BW.impute<-mean(BW.species.filler)
babblers$Bill.Width[i]<-BW.impute
}
if(is.na(babblers$Bill.Depth[i]) == TRUE){
BD.species.vector<-babblers$Bill.Depth[which(babblers$Species[i] == babblers$Species)]
BD.species.filler<-BD.species.vector[which(is.na(BD.species.vector) == FALSE)]
BD.impute<-mean(BD.species.filler)
babblers$Bill.Depth[i]<-BD.impute
}
if(is.na(babblers$Tarsus.Length[i]) == TRUE){
TL.species.vector<-babblers$Tarsus.Length[which(babblers$Species[i] == babblers$Species)]
TL.species.filler<-TL.species.vector[which(is.na(TL.species.vector) == FALSE)]
TL.impute<-mean(TL.species.filler)
babblers$Tarsus.Length[i]<-TL.impute
}
if(is.na(babblers$Hallux[i]) == TRUE){
hallux.species.vector<-babblers$Hallux[which(babblers$Species[i] == babblers$Species)]
hallux.species.filler<-hallux.species.vector[which(is.na(hallux.species.vector) == FALSE)]
hallux.impute<-mean(hallux.species.filler)
babblers$Hallux[i]<-hallux.impute
}
i<-i+1
}
New PCA result:
Fixed script:
#impute missing values by taking mean of all other values of of other species ofthat character
i<-1 #set the index
species.removed<-c()
#sift through all babbler species
while (i <= length(babblers$UniqueID_1)){
#missing wing lengths; if the Wing Length is NA for that value
if(is.na(babblers$Wing.Length[i]) == TRUE){
#select the wing lengths of all birds which match this species with missing wing length
wing.species.vector<-babblers$Wing.Length[which(babblers$Species[i] == babblers$Species)]
#make a new vector without the missing value
wing.species.filler<-wing.species.vector[which(is.na(wing.species.vector) == FALSE)]
#calculate the mean
wing.impute<-mean(wing.species.filler)
#fill in missing value with the mean for that bird
babblers$Wing.Length[i]<-wing.impute
}
#repeat the above method for all characters
if(is.na(babblers$Tail.Length[i]) == TRUE){
tail.species.vector<-babblers$Tail.Length[which(babblers$Species[i] == babblers$Species)]
tail.species.filler<-tail.species.vector[which(is.na(tail.species.vector) == FALSE)]
tail.impute<-mean(tail.species.filler)
babblers$Tail.Length[i]<-tail.impute
}
if(is.na(babblers$Bill.Length[i]) == TRUE){
BL.species.vector<-babblers$Bill.Length[which(babblers$Species[i] == babblers$Species)]
BL.species.filler<-BL.species.vector[which(is.na(BL.species.vector) == FALSE)]
BL.impute<-mean(BL.species.filler)
babblers$Bill.Length[i]<-BL.impute
}
if(is.na(babblers$Bill.Width[i]) == TRUE){
BW.species.vector<-babblers$Bill.Width[which(babblers$Species[i] == babblers$Species)]
BW.species.filler<-BW.species.vector[which(is.na(BW.species.vector) == FALSE)]
BW.impute<-mean(BW.species.filler)
babblers$Bill.Width[i]<-BW.impute
}
if(is.na(babblers$Bill.Depth[i]) == TRUE){
BD.species.vector<-babblers$Bill.Depth[which(babblers$Species[i] == babblers$Species)]
BD.species.filler<-BD.species.vector[which(is.na(BD.species.vector) == FALSE)]
BD.impute<-mean(BD.species.filler)
babblers$Bill.Depth[i]<-BD.impute
}
if(is.na(babblers$Tarsus.Length[i]) == TRUE){
TL.species.vector<-babblers$Tarsus.Length[which(babblers$Species[i] == babblers$Species)]
TL.species.filler<-TL.species.vector[which(is.na(TL.species.vector) == FALSE)]
TL.impute<-mean(TL.species.filler)
babblers$Tarsus.Length[i]<-TL.impute
}
if(is.na(babblers$Hallux[i]) == TRUE){
hallux.species.vector<-babblers$Hallux[which(babblers$Species[i] == babblers$Species)]
hallux.species.filler<-hallux.species.vector[which(is.na(hallux.species.vector) == FALSE)]
hallux.impute<-mean(hallux.species.filler)
babblers$Hallux[i]<-hallux.impute
}
i<-i+1
}
No comments:
Post a Comment