counties = USAboundaries::us_counties() %>%
filter(!state_name %in% c("Alaska", "Hawaii", "Puerto Rico")) %>%
st_transform(5070)
b_counties = counties %>%
group_by(state_name) %>%
summarise()
counties_cen = counties %>%
st_centroid()
simp_count = counties %>%
st_union() %>%
ms_simplify(keep = .025)
mapview::npts(counties)
## [1] 51976
mapview::npts(simp_count)
## [1] 81
The number of points in the original object is 51976, and the number of points in the simplified object is 81, I removed 51895 points. It efficiently decreases the time of computation.
v_counties = counties_cen %>%
st_union() %>%
st_voronoi() %>%
st_cast() %>%
st_sf() %>%
mutate(id = 1:n()) %>%
st_intersection(simp_count)
t_counties = counties_cen %>%
st_union() %>%
st_triangulate() %>%
st_cast() %>%
st_sf() %>%
mutate(id = 1:n()) %>%
st_intersection(simp_count)
sq_counties = counties %>%
st_make_grid(n = 70) %>%
st_sf() %>%
mutate(id = 1:n())
h_counties = counties %>%
st_make_grid(n = 70, square = FALSE) %>%
st_sf() %>%
mutate(id = 1:n())
counties_plot = function(tiles, name){
ggplot() +
geom_sf(data = tiles, col = "navy", fill = "white", size = .2) +
theme_void() +
labs(title = name,
caption = paste("This Tesselation has:", nrow(tiles), "tiles")) +
theme(plot.title = element_text(hjust = .5, color = "navy", face = "bold"))
}
counties_plot(v_counties, "Voronoi Coverage")
counties_plot(t_counties, "Triangulation Coverage")
counties_plot(sq_counties, "Square Coverage")
counties_plot(h_counties, "Hexegonal Coverage")
counties_plot(counties, "Original Data")
tess_calcu = function(tess_type, tess_name){
tess_type = tess_type %>%
mutate(area = st_area(tess_type),
area = units::set_units(area, "km^2"),
area = units::drop_units(area),
total_area = sum(area),
m_area = total_area / n(),
sd_area = sum(area - m_area /n()) ^ (1/2),
number = length(tess_type$id)
)
numberoffearture = length(tess_type$id)
tess_name = tess_type %>%
mutate(number = numberoffearture, name = tess_name) %>%
select(name, number, m_area, sd_area, total_area) %>%
st_drop_geometry() %>%
head(1)
return(tess_name)
}
original = counties %>%
mutate(id = 1:n())
tess_calcu(v_counties, "Voronoi")
## name number m_area sd_area total_area
## 1 Voronoi 3098 2531.911 2800.237 7843859
tess_calcu(t_counties, "Triangulation")
## name number m_area sd_area total_area
## 1 Triangulation 6185 1254.108 2784.853 7756659
tess_calcu(sq_counties, "Square")
## name number m_area sd_area total_area
## 1 Square 3108 2728.126 2911.406 8479014
tess_calcu(h_counties, "Hexegon")
## name number m_area sd_area total_area
## 1 Hexegon 2271 3763.052 2922.692 8545891
tess_calcu(original, "Original")
## name number m_area sd_area total_area
## 1 Original 3108 2521.745 2799.118 7837583
tess_summary = bind_rows(tess_calcu(v_counties, "Voronoi"),
tess_calcu(t_counties, "Triangulation"),
tess_calcu(sq_counties, "Square"),
tess_calcu(h_counties, "Hexagon"),
tess_calcu(original, "Original"),
)
knitr::kable(tess_summary,
caption = "Summary of Tessellations",
col.names = c("Tessellation Type", "Number of Feature", "Mean Area", "Standard Deviation", "Total Area"))
Tessellation Type | Number of Feature | Mean Area | Standard Deviation | Total Area |
---|---|---|---|---|
Voronoi | 3098 | 2531.911 | 2800.237 | 7843859 |
Triangulation | 6185 | 1254.108 | 2784.853 | 7756659 |
Square | 3108 | 2728.126 | 2911.406 | 8479014 |
Hexagon | 2271 | 3763.052 | 2922.692 | 8545891 |
Original | 3108 | 2521.745 | 2799.118 | 7837583 |
dam = readxl::read_excel("data/NID2019_U.xlsx") %>%
filter(!is.na(LATITUDE)) %>%
st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
st_transform(5070) %>%
select(DAM_NAME = DAM_NAME)
point_in_polygon = function(points, polygon, tess_id){
st_join(polygon, points) %>%
count(get(tess_id))
}
Vor = point_in_polygon(dam, v_counties, "id")
Tri = point_in_polygon(dam, t_counties, "id")
Squ = point_in_polygon(dam, sq_counties, "id")
Hex = point_in_polygon(dam, h_counties, "id")
Ori = point_in_polygon(dam, original, "id")
plot_pip = function(data, name){
ggplot() +
geom_sf(data = data, aes(fill = n),col = NA, size = .2) +
# scale_fill_gradient(low = "white", high = "viridis") +
scale_fill_viridis_c() +
theme_void() +
labs(title = name,
caption = paste0(sum(data$n),"locations represented")) +
theme(plot.title = element_text(hjust = .5, color = "darkgreen", face = "bold"))
}
plot_pip(Vor, "Voronoi Coverage")
plot_pip(Tri, "Triangulation Coverage")
plot_pip(Squ, "Square Coverage")
plot_pip(Hex, "Hexagonal Coverage")
plot_pip(Ori, "Raw Data")
As the data shows in plots, both voronoi and triangulation can mostly keep data precision similar to the original geometric information. However, because the area covered in each tiles is large in square and hexagonal coverage, the distribution of the information is distorted, and the deviation appears. I prefer voronoi tessellation for further analysis. It maintains the raw data precision and also has bigger range of data number than triangulation, which has a more distinctive visual change in the plot that can help us in analysis.
dam2 = readxl::read_excel("data/NID2019_U.xlsx") %>%
filter(!is.na(LATITUDE)) %>%
st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
st_transform(5070)
dam_purpose = function(data, name){
data %>%
filter(grepl(name, data$PURPOSES))
}
Recreation = dam_purpose(dam2, "R")
Flood_control = dam_purpose(dam2, "C")
Water_supply = dam_purpose(dam2, "S")
Irrigation = dam_purpose(dam2, "I")
I choose recreation, flood control, water supply, and irrigation in the analysis. Four purposes are main functions which most dams have. I am curious that those main functions’ relationship with the geographic location. Does the river system influence them, or other geographic features? From the analysis of these purposes the data would give me some hints.
analysis_R = point_in_polygon(Recreation, v_counties, "id")
analysis_C = point_in_polygon(Flood_control, v_counties, "id")
analysis_S = point_in_polygon(Water_supply, v_counties, "id")
analysis_I = point_in_polygon(Irrigation, v_counties, "id")
plot_pip(analysis_R, "Recreation Dams Location") +
gghighlight::gghighlight(n > mean(n) + sd(n))
plot_pip(analysis_C, "Flood Control Dams Location") +
gghighlight::gghighlight(n > mean(n) + sd(n))
plot_pip(analysis_S, "Water Supply Dams Location") +
gghighlight::gghighlight(n > mean(n) + sd(n))
plot_pip(analysis_I, "Irrigation Dams Location") +
gghighlight::gghighlight(n > mean(n) + sd(n))
Miss = read_sf("data/majorrivers_0_0") %>%
filter(SYSTEM == "Mississippi")
dam_leaflet = readxl::read_excel("data/NID2019_U.xlsx") %>%
filter(!is.na(LATITUDE)) %>%
st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
st_transform(5070) %>%
select(dam_name = DAM_NAME, storage = NID_STORAGE, purposes = PURPOSES, year_completed = YEAR_COMPLETED, hazard = HAZARD) %>%
filter(hazard == "H")
DAM = st_join(counties, dam_leaflet) %>%
group_by(state_abbr) %>%
slice_max(storage, n = 1) %>%
select(dam_name, purposes) %>%
st_drop_geometry()
DAM_leaflet = left_join(DAM, dam_leaflet)%>%
st_as_sf()
leaflet() %>%
addProviderTiles(providers$CartoDB) %>%
addCircleMarkers(data = st_transform(DAM_leaflet, 4326),
color = "red",
radius = ~storage/1500000,
fillOpacity = 1,
stroke = FALSE,
popup = leafpop::popupTable(st_drop_geometry(DAM_leaflet[1:4]),
feature.id = FALSE,
row.numbers = FALSE)) %>%
addPolylines(data = Miss)