remotes::install_github("liibre/Rocc")
library(dplyr)
library(Rocc)
Here, we have a short list of two fern species.
species_search <- c("Asplenium truncorum", "Lindsaea lancea")
Here, we are downloading data from two species of ferns.
data_splink <- list()
for (sp in species_search) {
data_splink[[sp]] <- rspeciesLink(species = sp,
filename = paste0(gsub(" ", "_", sp), "_splink"))
}
## Making request to speciesLink...
## Writing results/Asplenium_truncorum_splink.csv on disk.
## Making request to speciesLink...
## Writing results/Lindsaea_lancea_splink.csv on disk.
## [1] 2812 50
unique(df_splink$species_search)
## [1] "Asplenium truncorum" "Lindsaea lancea"
data_gbif <- list()
for (sp in species_search) {
data_gbif[[sp]] <- rgbif2(species = sp,
filename = paste0(gsub(" ", "_", sp), "_gbif"))
}
## Making request to GBIF...
## Writing results/Asplenium_truncorum_gbif.csv on disk.
## Making request to GBIF...
## Writing results/Lindsaea_lancea_gbif.csv on disk.
df <- bind_dwc(splink_data = df_splink, gbif_data = df_gbif)
Given that the data base might come from source with errors, we perform a basic check on the string of a species name. We will select only unique entries in species names.
# Vector of unique entries in species names
species_name_raw <- unique(df$scientificName)
For the unique entries, we will perform a basic check on the string.
species_name_check <- check_string(species_name_raw)
## Warning in stri_detect_regex(string, pattern, negate = negate, opts_regex =
## opts(pattern)): argument is not an atomic vector; coercing
## Warning in stri_detect_regex(string, pattern, negate = negate, opts_regex =
## opts(pattern)): argument is not an atomic vector; coercing
species_name_check
## verbatimSpecies speciesStatus
## 1 Asplenium truncorum F.B.Matos, Labiak & Sylvestre name_w_wrong_case
## 2 Lindsaea lancea (L.) Bedd. name_w_wrong_case
## 3 Lindsaea leprieurii Hook. name_w_wrong_case
## 4 Lindsaea lancea var. lancea variety
## 5 Asplenium truncorum possibly_ok
## 6 Lindsaea lancea possibly_ok
## 7 Lindsaea lancea elatior not_Genus_epithet_format
## 8 Lindsaea lancea falcata not_Genus_epithet_format
## 9 Lindsaea lancea lancea not_Genus_epithet_format
## 10 Lindsaea lancea leprieurii not_Genus_epithet_format
## 11 Lindsaea lancea remota not_Genus_epithet_format
## 12 Lindsaea lancea x schomburgkii hybrid_species
## 13 Lindsaea lancea var. falcata variety
## 14 Lindsaea lancea var. elatior variety
## 15 Lindsaea lancea var. remota variety
## 16 Lindsaea lancea quadrangularis not_Genus_epithet_format
## 17 Lindsaea lanceae possibly_ok
## 18 Lindsaea lancea longifolia not_Genus_epithet_format
## 19 Lindsaea lancea submontana not_Genus_epithet_format
## 20 Lindsaeae lancea possibly_ok
## 21 Lindsaea cf. lancea conferre
## 22 Lindsaea lancea semilunata not_Genus_epithet_format
## species remove_author
## 1 Asplenium truncorum f.b.matos, labiak & sylvestre FALSE
## 2 Lindsaea lancea (l.) bedd. FALSE
## 3 Lindsaea leprieurii hook. FALSE
## 4 Lindsaea lancea var. lancea TRUE
## 5 Asplenium truncorum FALSE
## 6 Lindsaea lancea FALSE
## 7 Lindsaea lancea elatior FALSE
## 8 Lindsaea lancea falcata FALSE
## 9 Lindsaea lancea lancea FALSE
## 10 Lindsaea lancea leprieurii FALSE
## 11 Lindsaea lancea remota FALSE
## 12 Lindsaea lancea TRUE
## 13 Lindsaea lancea var. falcata TRUE
## 14 Lindsaea lancea var. elatior TRUE
## 15 Lindsaea lancea var. remota TRUE
## 16 Lindsaea lancea quadrangularis FALSE
## 17 Lindsaea lanceae FALSE
## 18 Lindsaea lancea longifolia FALSE
## 19 Lindsaea lancea submontana FALSE
## 20 Lindsaeae lancea FALSE
## 21 Lindsaea lancea FALSE
## 22 Lindsaea lancea semilunata FALSE
Here, we are interested only in the names assigned with possibly_ok
and name_w_authors
. Now we will filter the occurrence data within these categories.
verbatimSpecies_ok <- species_name_check$verbatimSpecies[species_name_check$speciesStatus %in% c("possibly_ok", "name_w_authors")]
df_ok <- df[df$scientificName %in% verbatimSpecies_ok, ]
In this cleaning we went from a total of 3346 occurrences to 1505 occurrences.
Finally, we can write the resultant occurrence data on disk.
write.csv(df_ok, "results/occurrence_data.csv", row.names = FALSE)