-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathSima_optimization.R
89 lines (71 loc) · 3.54 KB
/
Sima_optimization.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
## A faster function for scraping min/max latitude and longitude for all reptile species
## Sima Bouzid 27 Oct 2015
########################################################################################
setwd("~/Dropbox/UW copy/Resources/lizlatlong_Tinkering")
# Read in .csv of data
reptile <- read.csv("Reptiles.csv", header=T, na.strings = c("", " "), stringsAsFactors = FALSE)
# Slow and clunky lizlatlong function
#~#~#~#~##~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#
lizlatlong <- function(x){
# Isolate data ranges of interest
species <- as.character(x$scientificname)
order <- as.character(x$order)
family <- as.character(x$family)
latitude <-as.numeric(x$decimallatitude, stringsAsFactors = FALSE)
longitude <- as.numeric(x$decimallongitude, stringsAsFactors = FALSE)
pruned_df <- data.frame(species, order, family, latitude, longitude)
# get rid of 0's that might have been accidentally inputted
pruned_df <- pruned_df[complete.cases(pruned_df[,c("latitude", "longitude")]),]
#return(pruned_df) #testing
# Use a loop to extract min/max lat/long for each species
maxLat <- length(unique(pruned_df$species))
minLat <- length(unique(pruned_df$species))
maxLong <- length(unique(pruned_df$species))
minLong <- length(unique(pruned_df$species))
for (i in 1:length(unique(pruned_df$species))){
y <- pruned_df[pruned_df$species==unique(pruned_df$species)[i],]
maxLat[i] <- max(y$latitude, na.rm=T)
minLat[i] <- min(y$latitude, na.rm=T)
maxLong[i] <- max(y$longitude, na.rm=T)
minLong[i] <- max(y$longitude, na.rm=T)
}
# Recombine into a data frame
new_df <- data.frame(species=(unique(pruned_df$species)), maxLat, minLat, maxLong, minLong)
write.csv(new_df, file = "lizlatlong.csv")
}
# Testing the function; this takes 6-8 hours to run
test <- lizlatlong(rep_test)
#~#~#~#~##~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#
#~#~#~#~##~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#
### The NEW AND IMPROVED lizlatlong function
# load function contingencies
#install.packages("plyr")
library(plyr)
# run new function
########################################################################################
lizlatlong <- function(x){
start <- Sys.time() # timing how long the function takes to run
# Isolate data ranges of interest
species <- as.character(x$scientificname)
order <- as.character(x$order)
family <- as.character(x$family)
latitude <- as.numeric(x$decimallatitude, stringsAsFactors = FALSE)
longitude <- as.numeric(x$decimallongitude, stringsAsFactors = FALSE)
pruned_df <- data.frame(species, order, family, latitude, longitude)
# get rid of rows with NAs in lat and long
pruned_df <- pruned_df[complete.cases(pruned_df[,c("latitude", "longitude")]),]
# for each combination of factors (species, order, family) find min/max lat/long
pruned_df <- ddply(pruned_df, .(species, order, family),
summarize,
max_latitude = max(latitude),
min_latitude = min(latitude),
max_longitude = max(longitude),
min_longitude = min(longitude))
write.csv(pruned_df, file = "lizlatlong.csv")
end <- Sys.time() #end timing
return(end-start)
}
########################################################################################
# testing the function; takes only about 15 seconds to run!
lizlatlong(reptile)
#~#~#~#~##~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#