R code
install.packages("neonUtilities")
install.packages("neonOS")
From Access NEON Data for Metagenomics See * Update on the changing NEON microbial data * Soil microbe metagenome sequences
Install packages
This has HARV data collected up until 2022. The 2021 and 2022 data are not in the phyloNEON data.
metaGdata_dnaSampleID <- metaGdata_mms_metagenomeSequencing |>
select(dnaSampleID)
# Read in 2023 and 2024 data that is not in the metagenome data product yet
neon_ay23_jgi_samples <- read_csv("data/NEON_metadata/neon_ay23_jgi_samples_soil.csv")
neon_ay24_jgi_samples <- read_csv("data/NEON_metadata/neon_ay24_jgi_samples_soil.csv")
neon_dnaSampleID <- rbind(metaGdata_dnaSampleID, neon_ay23_jgi_samples, neon_ay24_jgi_samples)
neon_dnaSampleID_split <- neon_dnaSampleID |>
separate(`dnaSampleID`, c("dnaSampleID.site","dnaSampleID.sub"), "_", remove=FALSE) |>
mutate_at("dnaSampleID.sub", str_replace, "-comp", "_COMP") |>
mutate_at("dnaSampleID.sub", str_replace, "-COMP", "_COMP") |>
mutate_at("dnaSampleID.sub", str_replace, "-GEN", "_GEN") |>
separate(`dnaSampleID.sub`, c("dnaSampleID.sub","dnaSampleID.type"), "_") |>
mutate_at("dnaSampleID.sub", str_replace, "-M", "_M") |>
mutate_at("dnaSampleID.sub", str_replace, "-O", "_O") |>
separate(`dnaSampleID.sub`, c("dnaSampleID.plot","dnaSampleID.sub"), "_") |>
mutate_at("dnaSampleID.sub", str_replace, "M-", "M_") |>
mutate_at("dnaSampleID.sub", str_replace, "O-", "O_") |>
separate(`dnaSampleID.sub`, c("dnaSampleID.layer","dnaSampleID.sub"), "_") |>
mutate_at("dnaSampleID.sub", str_replace, "-201", "201") |>
mutate_at("dnaSampleID.sub", str_replace, "-202", "202") |>
mutate_at("dnaSampleID.sub", str_replace, "2013", "_2013") |>
mutate_at("dnaSampleID.sub", str_replace, "2014", "_2014") |>
mutate_at("dnaSampleID.sub", str_replace, "2015", "_2015") |>
mutate_at("dnaSampleID.sub", str_replace, "2016", "_2016") |>
mutate_at("dnaSampleID.sub", str_replace, "2017", "_2017") |>
mutate_at("dnaSampleID.sub", str_replace, "2018", "_2018") |>
mutate_at("dnaSampleID.sub", str_replace, "2019", "_2019") |>
mutate_at("dnaSampleID.sub", str_replace, "2020", "_2020") |>
mutate_at("dnaSampleID.sub", str_replace, "2021", "_2021") |>
mutate_at("dnaSampleID.sub", str_replace, "2022", "_2022") |>
mutate_at("dnaSampleID.sub", str_replace, "2023", "_2023") |>
mutate_at("dnaSampleID.sub", str_replace, "2024", "_2024") |>
separate(`dnaSampleID.sub`, c("dnaSampleID.subplot","dnaSampleID.date"), "_") |>
unite(plotID, c(dnaSampleID.site, dnaSampleID.plot), sep='_', remove=FALSE)
neon_dnaSampleID_split$dnaSampleID.date <- as.numeric(neon_dnaSampleID_split$dnaSampleID.date)
neon_dnaSampleID_split$dnaSampleID.date <- ymd(neon_dnaSampleID_split$dnaSampleID.date)
metaGdata_mms_metagenomeSequencing <- metaGdata_mms_metagenomeSequencing |>
separate(`dnaSampleID`, c("dnaSampleID.site","dnaSampleID.sub"), "_", remove=FALSE) |>
mutate_at("dnaSampleID.sub", str_replace, "-comp", "_COMP") |>
mutate_at("dnaSampleID.sub", str_replace, "-COMP", "_COMP") |>
mutate_at("dnaSampleID.sub", str_replace, "-GEN", "_GEN") |>
separate(`dnaSampleID.sub`, c("dnaSampleID.sub","dnaSampleID.type"), "_") |>
mutate_at("dnaSampleID.sub", str_replace, "-M", "_M") |>
mutate_at("dnaSampleID.sub", str_replace, "-O", "_O") |>
separate(`dnaSampleID.sub`, c("dnaSampleID.plot","dnaSampleID.sub"), "_") |>
mutate_at("dnaSampleID.sub", str_replace, "M-", "M_") |>
mutate_at("dnaSampleID.sub", str_replace, "O-", "O_") |>
separate(`dnaSampleID.sub`, c("dnaSampleID.layer","dnaSampleID.sub"), "_") |>
mutate_at("dnaSampleID.sub", str_replace, "-201", "201") |>
mutate_at("dnaSampleID.sub", str_replace, "-202", "202") |>
mutate_at("dnaSampleID.sub", str_replace, "201", "_201") |>
mutate_at("dnaSampleID.sub", str_replace, "202", "_202") |>
separate(`dnaSampleID.sub`, c("dnaSampleID.subplot","dnaSampleID.date"), "_") |>
unite(plotID, c(dnaSampleID.site, dnaSampleID.plot), sep='_', remove=FALSE)
metaGdata_mms_metagenomeSequencing$dnaSampleID.data <- as.numeric(metaGdata_mms_metagenomeSequencing$dnaSampleID.date)
metaGdata_mms_metagenomeSequencing$dnaSampleID.date <- ymd(metaGdata_mms_metagenomeSequencing$dnaSampleID.date)
neon_dnaSampleID_split |>
filter(dnaSampleID.site == "HARV") |>
group_by(Year = lubridate::year(dnaSampleID.date), dnaSampleID.plot) |>
count() |>
pivot_wider(names_from = dnaSampleID.plot, values_from = n) |>
mutate_all(funs(replace_na(.,0))) |>
pivot_longer(!Year, names_to = "plot", values_to = "metagenomes") |>
ggplot(aes(x=Year, y = plot)) +
geom_tile(aes(fill = metagenomes)) +
scale_fill_viridis(discrete=FALSE, direction = -1) +
scale_x_continuous(breaks = seq(2013, 2024, by = 1))
neon_dnaSampleID_split |>
group_by(dnaSampleID.site, Year = lubridate::year(dnaSampleID.date), dnaSampleID.plot) |>
count() |>
pivot_wider(names_from = Year, values_from = n) |>
mutate_all(funs(replace_na(.,0))) |>
pivot_longer(!c(dnaSampleID.site, dnaSampleID.plot), names_to = "Year", values_to = "metagenomes") |>
ggplot(aes(x=Year, y = dnaSampleID.plot)) +
geom_tile(aes(fill = metagenomes)) +
scale_fill_viridis(discrete=FALSE, direction = -1) +
facet_wrap(~dnaSampleID.site, scales ="free_y", ncol = 3) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))