R code
library(tidyverse)
library(maps)
library(mapdata)
library(lubridate)
library(viridis)
library(wesanderson)
library(plotly)
library(tidyverse)
library(maps)
library(mapdata)
library(lubridate)
library(viridis)
library(wesanderson)
library(plotly)
The ideas for this course tutorial came from multiple examples contributed by Prof. Chris Sutherland and these tutorials
For our labs two types of approaches will be used to add data to maps. The first is based on using the longitude and latitude information to add a point at a specific position on a map. The second is two add the information to shapes in a map based on the name of the shape (e.g. states). Although ggmaps is a wonderful tool for mapping using Google Maps and other resources, it is beyond what is needed for now.
As in the previous labs the sources of data are from Github repo for Novel Coronavirus (COVID-19) Cases that supports the dashboard.
For this lab it is important to note that the time series data does not currently have entries for US States. The daily reports include US State and more recently US country/administrative units. Is possible to concatenate the daily reports to create a time series for US States, but cognizant of changes in the formats of the daily reports.
Here is a graph containing all the coordinate information. Note this is not summarized by country. Since there is a point for each US county. There are many points in the US
<- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) |>
daily_report rename(Long = "Long_")
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
borders("world", colour = NA, fill = "grey90") +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'World COVID-19 Confirmed cases',x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5) +
theme_bw()
Zoom in on US 48 states. To do this Alaska, Hawaii and US Territories are filtered. Some US State entries have a Lat and Long of zero, so these are filtered as well.
<- daily_report |>
daily_report_filtered_US filter(Country_Region == "US") |>
filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
"Puerto Rico","Northern Mariana Islands",
"Virgin Islands", "Recovered", "Guam", "Grand Princess",
"District of Columbia", "Diamond Princess")) |>
filter(Lat > 0)
ggplot(daily_report_filtered_US, aes(x = Long, y = Lat, size = Confirmed)) +
borders("state", colour = "black", fill = "grey90") +
theme_bw() +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'COVID-19 Confirmed Cases in the US', x = '', y = '',
size="Cases") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)
Here is a prettier version based on an example by Anisa Dhana
<- daily_report |>
daily_report_filtered_Dhana filter(Country_Region == "US") |>
filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
"Puerto Rico","Northern Mariana Islands",
"Virgin Islands", "Recovered", "Guam", "Grand Princess",
"District of Columbia", "Diamond Princess")) |>
filter(Lat > 0)
<- c(1, 100, 1000, 10000, 10000)
mybreaks ggplot(daily_report_filtered_Dhana, aes(x = Long, y = Lat, size = Confirmed)) +
borders("state", colour = "white", fill = "grey90") +
geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=0.7) +
scale_size_continuous(name="Cases", trans="log", range=c(1,7),
breaks=mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
scale_color_viridis_c(option="viridis",name="Cases",
trans="log", breaks=mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
# Cleaning up the graph
theme_void() +
guides( colour = guide_legend()) +
labs(title = "Anisa Dhana's lagout for COVID-19 Confirmed Cases in the US'") +
theme(
legend.position = "bottom",
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA)
+
) coord_fixed(ratio=1.5)
<- daily_report |>
daily_report_filtered_shapes filter(Country_Region == "US") |>
group_by(Province_State) |>
summarize(Confirmed = sum(Confirmed)) |>
mutate(Province_State = tolower(Province_State))
# load the US map data
<- map_data("state")
us # We need to join the us map data with our daily report to make one data frame/tibble
<- left_join(us, daily_report_filtered_shapes, by = c("region" = "Province_State"))
state_join # plot state map
This is a bit of a digression back to Labs 3 and 4, but there are many R color palattes to choose from or you can create your own. In the above a simple gradient is used. The example from Anisa Dhana uses the viridis palatte which is designed to be perceived by viewers with common forms of colour blindness. Here is an example using a different color package - Wes Anderson. …and more Top R Color Palettes to Know for Great Data Visualization
# plot state map
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
scale_fill_gradientn(colours =
wes_palette("Zissou1", 100, type = "continuous"),
trans = "log10") +
labs(title = "COVID-19 Confirmed Cases in the US'")
A look now by the counties using the RColorBrewer
library(RColorBrewer)
# To display only colorblind-friendly brewer palettes, specify the option colorblindFriendly = TRUE as follow:
# display.brewer.all(colorblindFriendly = TRUE)
# Get and format the covid report data
<- daily_report |>
daily_report_filtered_shapes unite(Key, Admin2, Province_State, sep = ".") |>
group_by(Key) |>
summarize(Confirmed = sum(Confirmed)) |>
mutate(Key = tolower(Key))
# dim(report_03_27_2020)
# get and format the map data
<- map_data("state")
us <- map_data("county") |>
counties unite(Key, subregion, region, sep = ".", remove = FALSE)
# Join the 2 tibbles
<- left_join(counties, daily_report_filtered_shapes, by = c("Key"))
state_join # sum(is.na(state_join$Confirmed))
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
borders("state", colour = "black") +
geom_polygon(data = state_join, aes(fill = Confirmed)) +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "PuRd"),
breaks = c(1, 10, 100, 1000, 10000, 100000),
trans = "log10", na.value = "White") +
ggtitle("Number of Confirmed Cases by US County") +
theme_bw()
If we look at just Massachusetts
<- daily_report |>
daily_report_filtered_MA filter(Province_State == "Massachusetts") |>
group_by(Admin2) |>
summarize(Confirmed = sum(Confirmed)) |>
mutate(Admin2 = tolower(Admin2))
<- map_data("state")
us <- subset(us, region == "massachusetts")
ma_us <- map_data("county")
counties <- subset(counties, region == "massachusetts")
ma_county <- left_join(ma_county, daily_report_filtered_MA, by = c("subregion" = "Admin2"))
state_join # plot state map
ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join, aes(fill = Confirmed), color = "white") +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "BuGn"),
trans = "log10") +
labs(title = "COVID-19 Confirmed Cases in Massachusetts'")
daily_report
# A tibble: 2,608 × 12
FIPS Admin2 Province_State Country_Region Last_Update Lat Long Confirmed
<dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
1 45001 Abbev… South Carolina US 4/2/20 23:… 34.2 -82.5 6
2 22001 Acadia Louisiana US 4/2/20 23:… 30.3 -92.4 61
3 51001 Accom… Virginia US 4/2/20 23:… 37.8 -75.6 10
4 16001 Ada Idaho US 4/2/20 23:… 43.5 -116. 312
5 19001 Adair Iowa US 4/2/20 23:… 41.3 -94.5 1
6 29001 Adair Missouri US 4/2/20 23:… 40.2 -92.6 6
7 40001 Adair Oklahoma US 4/2/20 23:… 35.9 -94.7 9
8 8001 Adams Colorado US 4/2/20 23:… 39.9 -104. 661
9 16003 Adams Idaho US 4/2/20 23:… 44.9 -116. 1
10 17001 Adams Illinois US 4/2/20 23:… 40.0 -91.2 2
# ℹ 2,598 more rows
# ℹ 4 more variables: Deaths <dbl>, Recovered <dbl>, Active <dbl>,
# Combined_Key <chr>
In Lab 4 plotly was introduced. It is a great simple way to make interactive graphs with the maps
# !!!! This example assumes the prior code was run
library(plotly)
ggplotly(
ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
scale_fill_gradientn(colours =
wes_palette("Zissou1", 100, type = "continuous")) +
ggtitle("COVID-19 Cases in MA") +
# Cleaning up the graph
labs(x=NULL, y=NULL) +
theme(panel.border = element_blank()) +
theme(panel.background = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(axis.text = element_blank())
)
Here is an example with the world map
# Read in the daily report
<- daily_report |>
daily_report_filtered_world group_by(Country_Region) |>
summarize(Confirmed = sum(Confirmed), Deaths = sum(Deaths))
# Read in the world map data
<- as_tibble(map_data("world"))
world # Check to see if there are differences in the naming of countries
setdiff(world$region, daily_report$Country_Region)
[1] "Aruba" "Anguilla"
[3] "American Samoa" "French Southern and Antarctic Lands"
[5] "Antigua" "Barbuda"
[7] "Saint Barthelemy" "Bermuda"
[9] "Ivory Coast" "Democratic Republic of the Congo"
[11] "Republic of Congo" "Cook Islands"
[13] "Comoros" "Cape Verde"
[15] "Curacao" "Cayman Islands"
[17] "Czech Republic" "Canary Islands"
[19] "Falkland Islands" "Reunion"
[21] "Mayotte" "French Guiana"
[23] "Martinique" "Guadeloupe"
[25] "Faroe Islands" "Micronesia"
[27] "UK" "Guernsey"
[29] "Greenland" "Guam"
[31] "Heard Island" "Isle of Man"
[33] "Cocos Islands" "Christmas Island"
[35] "Chagos Archipelago" "Jersey"
[37] "Siachen Glacier" "Nevis"
[39] "Saint Kitts" "South Korea"
[41] "Lesotho" "Saint Martin"
[43] "Marshall Islands" "Myanmar"
[45] "Northern Mariana Islands" "Montserrat"
[47] "New Caledonia" "Norfolk Island"
[49] "Niue" "Bonaire"
[51] "Sint Eustatius" "Saba"
[53] "Pitcairn Islands" "Puerto Rico"
[55] "North Korea" "Madeira Islands"
[57] "Azores" "Palestine"
[59] "French Polynesia" "Western Sahara"
[61] "South Sudan" "South Sandwich Islands"
[63] "South Georgia" "Saint Helena"
[65] "Ascension Island" "Solomon Islands"
[67] "Saint Pierre and Miquelon" "Sao Tome and Principe"
[69] "Swaziland" "Sint Maarten"
[71] "Turks and Caicos Islands" "Tajikistan"
[73] "Turkmenistan" "Trinidad"
[75] "Tobago" "Taiwan"
[77] "USA" "Vatican"
[79] "Grenadines" "Saint Vincent"
[81] "Virgin Islands" "Vanuatu"
[83] "Wallis and Futuna" "Yemen"
# Many of these countries are considered states or territories in the JHU covid reports,
# but let's fix a few of them
<- as_tibble(map_data("world")) |>
world mutate(region = str_replace_all(region, c("USA" = "US", "Czech Republic" = "Czechia",
"Ivory Coast" = "Cote d'Ivoire", "Democratic Republic of the Congo" = "Congo (Kinshasa)",
"Republic of Congo" = "Congo (Brazzaville)")))
# Join the covid report with the map data
<- left_join(world, daily_report_filtered_world, by = c("region" = "Country_Region"))
country_join # Create the graph
ggplotly(
ggplot(data = world, mapping = aes(x = long, y = lat, text = region, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = country_join, aes(fill = Deaths), color = "black") +
scale_fill_gradientn(colours =
wes_palette("Zissou1", 100, type = "continuous")) +
labs(title = "COVID-19 Deaths")
)
Update Anisa Dhana’s graph layout of the US to 3/09/2023. You may need to adjust the range or change to a linear scale (delete trans=“log”)
Update the above graph “Number of Confirmed Cases by US County” to 3/09/2023 and use a different color scheme or theme
Make an interactive plot using a state of your choosing using a theme different from used in the above examples. To do this change the above code chunk to the abbreviation for your choice state.
ma_us <- subset(us, region == “massachusetts”) counties <- map_data(“county”) ma_county <- subset(counties, region == “massachusetts”) state_join <- left_join(ma_county, daily_report_filtered_MA, by = c(“subregion” = “Admin2”))