This package is a client for the OBIS API. It includes functions for data access, as well as a few helper functions for visualizing occurrence data and extracting nested MeasurementOrFact or DNADerivedData records.
First some packages:
library(robis)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(ggplot2)
The occurrence()
function provides access to raw occurrence data. For example, to fetch all occurrences by scientific name:
occ <- occurrence("Abra aequalis")
occ
#> # A tibble: 767 x 103
#> date_year scientificNameID year scientificName dynamicProperties
#> <int> <chr> <chr> <chr> <chr>
#> 1 1986 urn:lsid:marinespecies.o… 1986 Abra aequalis temperature=10;
#> 2 1976 urn:lsid:marinespecies.o… 1976 Abra aequalis <NA>
#> 3 1992 urn:lsid:marinespecies.o… 1992 Abra aequalis <NA>
#> 4 2015 urn:lsid:marinespecies.o… <NA> Abra aequalis <NA>
#> 5 2002 urn:lsid:marinespecies.o… 2002 Abra aequalis <NA>
#> 6 2013 urn:lsid:marinespecies.o… <NA> Abra aequalis <NA>
#> 7 2014 urn:lsid:marinespecies.o… <NA> Abra aequalis <NA>
#> 8 1978 urn:lsid:marinespecies.o… 1978 Abra aequalis observedindividualc…
#> 9 1994 urn:lsid:marinespecies.o… <NA> Abra aequalis <NA>
#> 10 2013 urn:lsid:marinespecies.o… <NA> Abra aequalis <NA>
#> # … with 757 more rows, and 98 more variables: superfamilyid <int>,
#> # individualCount <chr>, dropped <lgl>, aphiaID <int>, decimalLatitude <dbl>,
#> # subclassid <int>, phylumid <int>, familyid <int>, catalogNumber <chr>,
#> # basisOfRecord <chr>, subterclassid <int>, maximumDepthInMeters <dbl>,
#> # id <chr>, order <chr>, dataset_id <chr>, locality <chr>,
#> # decimalLongitude <dbl>, collectionCode <chr>, date_end <dbl>,
#> # speciesid <int>, superfamily <chr>, date_start <dbl>, genus <chr>,
#> # subterclass <chr>, eventDate <chr>, superorder <chr>,
#> # scientificNameAuthorship <chr>, absence <lgl>, superorderid <int>,
#> # genusid <int>, originalScientificName <chr>, marine <lgl>,
#> # minimumDepthInMeters <dbl>, infraclassid <int>, institutionCode <chr>,
#> # date_mid <dbl>, infraclass <chr>, identifiedBy <chr>, class <chr>,
#> # orderid <int>, kingdom <chr>, classid <int>, phylum <chr>, species <chr>,
#> # subclass <chr>, family <chr>, kingdomid <int>, node_id <chr>, flags <chr>,
#> # sss <dbl>, depth <dbl>, shoredistance <int>, sst <dbl>, bathymetry <int>,
#> # country <chr>, day <chr>, month <chr>, bibliographicCitation <chr>,
#> # waterBody <chr>, recordedBy <chr>, coordinatePrecision <chr>, type <chr>,
#> # taxonRemarks <chr>, occurrenceStatus <chr>, materialSampleID <chr>,
#> # occurrenceID <chr>, ownerInstitutionCode <chr>, samplingProtocol <chr>,
#> # taxonRank <chr>, datasetName <chr>, datasetID <chr>,
#> # associatedReferences <chr>, fieldNumber <chr>, stateProvince <chr>,
#> # preparations <chr>, county <chr>, modified <lgl>,
#> # infraspecificEpithet <lgl>, recordNumber <chr>, higherGeography <chr>,
#> # continent <chr>, typeStatus <lgl>, geodeticDatum <lgl>,
#> # specificEpithet <chr>, georeferenceSources <lgl>, verbatimDepth <chr>,
#> # occurrenceRemarks <chr>, collectionID <chr>, eventID <chr>,
#> # dateIdentified <chr>, habitat <chr>, institutionID <chr>, organismID <chr>,
#> # eventRemarks <chr>, taxonID <chr>, locationRemarks <chr>,
#> # countryCode <chr>, locationID <chr>
ggplot(occ) +
geom_bar(aes(date_year), stat = "count", width = 1)
#> Warning: Removed 25 rows containing non-finite values (stat_count).
Alternatively, occurrences can be fetched by AphiaID:
occurrence(taxonid = 293683)
#> # A tibble: 767 x 103
#> date_year scientificNameID year scientificName dynamicProperties
#> <int> <chr> <chr> <chr> <chr>
#> 1 1986 urn:lsid:marinespecies.o… 1986 Abra aequalis temperature=10;
#> 2 1976 urn:lsid:marinespecies.o… 1976 Abra aequalis <NA>
#> 3 1992 urn:lsid:marinespecies.o… 1992 Abra aequalis <NA>
#> 4 2015 urn:lsid:marinespecies.o… <NA> Abra aequalis <NA>
#> 5 2002 urn:lsid:marinespecies.o… 2002 Abra aequalis <NA>
#> 6 2013 urn:lsid:marinespecies.o… <NA> Abra aequalis <NA>
#> 7 2014 urn:lsid:marinespecies.o… <NA> Abra aequalis <NA>
#> 8 1978 urn:lsid:marinespecies.o… 1978 Abra aequalis observedindividualc…
#> 9 1994 urn:lsid:marinespecies.o… <NA> Abra aequalis <NA>
#> 10 2013 urn:lsid:marinespecies.o… <NA> Abra aequalis <NA>
#> # … with 757 more rows, and 98 more variables: superfamilyid <int>,
#> # individualCount <chr>, dropped <lgl>, aphiaID <int>, decimalLatitude <dbl>,
#> # subclassid <int>, phylumid <int>, familyid <int>, catalogNumber <chr>,
#> # basisOfRecord <chr>, subterclassid <int>, maximumDepthInMeters <dbl>,
#> # id <chr>, order <chr>, dataset_id <chr>, locality <chr>,
#> # decimalLongitude <dbl>, collectionCode <chr>, date_end <dbl>,
#> # speciesid <int>, superfamily <chr>, date_start <dbl>, genus <chr>,
#> # subterclass <chr>, eventDate <chr>, superorder <chr>,
#> # scientificNameAuthorship <chr>, absence <lgl>, superorderid <int>,
#> # genusid <int>, originalScientificName <chr>, marine <lgl>,
#> # minimumDepthInMeters <dbl>, infraclassid <int>, institutionCode <chr>,
#> # date_mid <dbl>, infraclass <chr>, identifiedBy <chr>, class <chr>,
#> # orderid <int>, kingdom <chr>, classid <int>, phylum <chr>, species <chr>,
#> # subclass <chr>, family <chr>, kingdomid <int>, node_id <chr>, flags <chr>,
#> # sss <dbl>, depth <dbl>, shoredistance <int>, sst <dbl>, bathymetry <int>,
#> # country <chr>, day <chr>, month <chr>, bibliographicCitation <chr>,
#> # waterBody <chr>, recordedBy <chr>, coordinatePrecision <chr>, type <chr>,
#> # taxonRemarks <chr>, occurrenceStatus <chr>, materialSampleID <chr>,
#> # occurrenceID <chr>, ownerInstitutionCode <chr>, samplingProtocol <chr>,
#> # taxonRank <chr>, datasetName <chr>, datasetID <chr>,
#> # associatedReferences <chr>, fieldNumber <chr>, stateProvince <chr>,
#> # preparations <chr>, county <chr>, modified <lgl>,
#> # infraspecificEpithet <lgl>, recordNumber <chr>, higherGeography <chr>,
#> # continent <chr>, typeStatus <lgl>, geodeticDatum <lgl>,
#> # specificEpithet <chr>, georeferenceSources <lgl>, verbatimDepth <chr>,
#> # occurrenceRemarks <chr>, collectionID <chr>, eventID <chr>,
#> # dateIdentified <chr>, habitat <chr>, institutionID <chr>, organismID <chr>,
#> # eventRemarks <chr>, taxonID <chr>, locationRemarks <chr>,
#> # countryCode <chr>, locationID <chr>
Other parameters include geometry
, which accepts polygons in WKT format:
occurrence("Abra alba", geometry = "POLYGON ((2.59689 51.16772, 2.62436 51.14059, 2.76066 51.19225, 2.73216 51.20946, 2.59689 51.16772))")
#> # A tibble: 319 x 85
#> date_year scientificNameID year scientificName superfamilyid dropped
#> <int> <chr> <chr> <chr> <int> <lgl>
#> 1 2016 urn:lsid:marinespecies.… 2016 Abra alba 14636 FALSE
#> 2 2014 urn:lsid:marinespecies.… 2014 Abra alba 14636 FALSE
#> 3 1995 urn:lsid:marinespecies.… 1995 Abra alba 14636 FALSE
#> 4 2017 urn:lsid:marinespecies.… 2017 Abra alba 14636 FALSE
#> 5 1995 urn:lsid:marinespecies.… 1995 Abra alba 14636 FALSE
#> 6 2016 urn:lsid:marinespecies.… 2016 Abra alba 14636 FALSE
#> 7 1997 urn:lsid:marinespecies.… 1997 Abra alba 14636 FALSE
#> 8 1995 urn:lsid:marinespecies.… 1995 Abra alba 14636 FALSE
#> 9 2009 urn:lsid:marinespecies.… 2009 Abra alba 14636 FALSE
#> 10 2009 urn:lsid:marinespecies.… 2009 Abra alba 14636 FALSE
#> # … with 309 more rows, and 79 more variables: fieldNumber <chr>,
#> # aphiaID <int>, language <chr>, decimalLatitude <dbl>, subclassid <int>,
#> # phylumid <int>, familyid <int>, catalogNumber <chr>,
#> # occurrenceStatus <chr>, basisOfRecord <chr>, subterclassid <int>,
#> # modified <chr>, id <chr>, day <chr>, order <chr>, dataset_id <chr>,
#> # locality <chr>, decimalLongitude <dbl>, collectionCode <chr>,
#> # date_end <dbl>, speciesid <int>, occurrenceID <chr>, superfamily <chr>,
#> # date_start <dbl>, footprintSRS <chr>, month <chr>, genus <chr>,
#> # subterclass <chr>, eventDate <chr>, eventID <chr>, superorder <chr>,
#> # absence <lgl>, superorderid <int>, genusid <int>,
#> # originalScientificName <chr>, marine <lgl>, infraclassid <int>,
#> # date_mid <dbl>, infraclass <chr>, class <chr>, orderid <int>,
#> # datasetName <chr>, geodeticDatum <chr>, kingdom <chr>, recordedBy <chr>,
#> # classid <int>, phylum <chr>, species <chr>, subclass <chr>,
#> # datasetID <chr>, family <chr>, kingdomid <int>, node_id <chr>, flags <chr>,
#> # sss <dbl>, shoredistance <int>, sst <dbl>, bathymetry <dbl>,
#> # verbatimDepth <chr>, country <chr>, references <chr>,
#> # dynamicProperties <chr>, individualCount <chr>,
#> # bibliographicCitation <chr>, continent <chr>,
#> # scientificNameAuthorship <chr>, samplingEffort <chr>,
#> # institutionCode <chr>, specificEpithet <chr>, lifeStage <chr>,
#> # samplingProtocol <chr>, sex <chr>, maximumDepthInMeters <dbl>,
#> # minimumDepthInMeters <dbl>, footprintWKT <chr>, depth <dbl>,
#> # locationID <chr>, waterBody <chr>, occurrenceRemarks <chr>
WKT strings can be created by drawing on a map using the get_geometry()
function.
A convenience function map_leaflet()
is provided to visualize occurrences on an interactive map:
The checklist()
function returns all taxa observed for a given set of filters.
cl <- checklist("Semelidae")
cl
#> # A tibble: 107 x 41
#> scientificName scientificNameAut… taxonID ncbi_id taxonRank taxonomicStatus
#> <chr> <chr> <int> <int> <chr> <chr>
#> 1 Abra alba (W. Wood, 1802) 141433 399303 Species accepted
#> 2 Abra nitida (O. F. Müller, 17… 141435 358434 Species accepted
#> 3 Scrobicularia p… (da Costa, 1778) 141424 665965 Species accepted
#> 4 Abra prismatica (Montagu, 1808) 141436 183592 Species accepted
#> 5 Abra tenuis (Montagu, 1803) 141439 NA Species accepted
#> 6 Abra Lamarck, 1818 138474 121180 Genus accepted
#> 7 Abra segmentum (Récluz, 1843) 141438 NA Species accepted
#> 8 Theora lubrica Gould, 1861 233903 1230554 Species accepted
#> 9 Semelidae Stoliczka, 1870 (… 1781 121179 Family accepted
#> 10 Abra aequalis (Say, 1822) 293683 2175524 Species accepted
#> # … with 97 more rows, and 35 more variables: acceptedNameUsage <chr>,
#> # acceptedNameUsageID <int>, is_marine <lgl>, is_brackish <lgl>,
#> # kingdom <chr>, phylum <chr>, class <chr>, subclass <chr>, infraclass <chr>,
#> # subterclass <chr>, superorder <chr>, order <chr>, superfamily <chr>,
#> # family <chr>, kingdomid <int>, phylumid <int>, classid <int>,
#> # subclassid <int>, infraclassid <int>, subterclassid <int>,
#> # superorderid <int>, orderid <int>, superfamilyid <int>, familyid <int>,
#> # records <int>, genus <chr>, genusid <int>, species <chr>, speciesid <int>,
#> # bold_id <int>, is_freshwater <lgl>, is_terrestrial <lgl>, wrims <lgl>,
#> # subspecies <chr>, subspeciesid <int>
Just like the occurrence()
function, checklist()
accepts WKT geometries:
checklist(geometry = "POLYGON ((2.59689 51.16772, 2.62436 51.14059, 2.76066 51.19225, 2.73216 51.20946, 2.59689 51.16772))")
#> # A tibble: 902 x 73
#> scientificName taxonID ncbi_id taxonomicStatus acceptedNameUsage
#> <chr> <int> <int> <chr> <chr>
#> 1 Nematoda 799 6231 accepted Nematoda
#> 2 Abra alba 141433 399303 accepted Abra alba
#> 3 Sabatieria celtica 121360 319964 accepted Sabatieria celtica
#> 4 Sabatieria punctata 153130 320140 accepted Sabatieria punctata
#> 5 Spiophanes bombyx 131187 696728 accepted Spiophanes bombyx
#> 6 Kurtiella bidentata 345281 1177057 accepted Kurtiella bidentata
#> 7 Nephtys hombergii 130359 36121 accepted Nephtys hombergii
#> 8 Oligochaeta 2036 NA accepted Oligochaeta
#> 9 Cirratulidae 919 46590 accepted Cirratulidae
#> 10 Fabulina fabula 146907 NA accepted Fabulina fabula
#> # … with 892 more rows, and 68 more variables: acceptedNameUsageID <int>,
#> # is_marine <lgl>, is_brackish <lgl>, is_freshwater <lgl>,
#> # is_terrestrial <lgl>, records <int>, taxonRank <chr>, kingdom <chr>,
#> # kingdomid <int>, phylum <chr>, phylumid <int>,
#> # scientificNameAuthorship <chr>, class <chr>, classid <int>, subclass <chr>,
#> # order <chr>, superfamily <chr>, family <chr>, subclassid <int>,
#> # orderid <int>, superfamilyid <int>, familyid <int>, infraclass <chr>,
#> # infraclassid <int>, subterclass <chr>, superorder <chr>,
#> # subterclassid <int>, superorderid <int>, suborder <chr>, suborderid <int>,
#> # subfamily <chr>, subfamilyid <int>, subphylum <chr>, subphylumid <int>,
#> # superclass <chr>, superclassid <int>, subkingdom <chr>, infrakingdom <chr>,
#> # subkingdomid <int>, infrakingdomid <int>, genus <chr>, genusid <int>,
#> # infraphylum <chr>, infraphylumid <int>, hab <lgl>, bold_id <int>,
#> # species <chr>, speciesid <int>, infraorder <chr>, parvorder <chr>,
#> # infraorderid <int>, parvorderid <int>, tribe <chr>, tribeid <int>,
#> # wrims <lgl>, subgenus <chr>, subgenusid <int>, category <chr>,
#> # section <chr>, subsection <chr>, sectionid <int>, subsectionid <int>,
#> # subspecies <chr>, subspeciesid <int>, variety <chr>, varietyid <int>,
#> # forma <chr>, formaid <int>
The package also provides access to MeasurementOrFact records associated with occurrences. When calling occurrence()
, MeasurementOrFact records can be included by setting mof = true
.
MeasurementOrFact records are nested in the occurrence, but the measurements()
function allows you to extract them to a flat data frame. Use the fields
parameter to indicate which occurrence fields need to be preserved in the measurements table.
mof <- measurements(occ, fields = c("scientificName", "decimalLongitude", "decimalLatitude"))
mof
#> # A tibble: 19,469 x 18
#> id scientificName decimalLongitude decimalLatitude measurementID
#> <chr> <chr> <dbl> <dbl> <chr>
#> 1 00037215-c2e8-… Abra tenuis -1.22 45.9 <NA>
#> 2 00037215-c2e8-… Abra tenuis -1.22 45.9 <NA>
#> 3 00037215-c2e8-… Abra tenuis -1.22 45.9 <NA>
#> 4 00037215-c2e8-… Abra tenuis -1.22 45.9 <NA>
#> 5 00037215-c2e8-… Abra tenuis -1.22 45.9 <NA>
#> 6 000e9ab4-bee5-… Abra tenuis -1.20 46.3 <NA>
#> 7 000e9ab4-bee5-… Abra tenuis -1.20 46.3 <NA>
#> 8 000e9ab4-bee5-… Abra tenuis -1.20 46.3 <NA>
#> 9 000e9ab4-bee5-… Abra tenuis -1.20 46.3 <NA>
#> 10 000e9ab4-bee5-… Abra tenuis -1.20 46.3 <NA>
#> # … with 19,459 more rows, and 13 more variables: occurrenceID <chr>,
#> # measurementType <chr>, measurementTypeID <chr>, measurementValue <chr>,
#> # measurementValueID <chr>, measurementAccuracy <chr>, measurementUnit <chr>,
#> # measurementUnitID <chr>, measurementDeterminedDate <chr>,
#> # measurementDeterminedBy <chr>, measurementMethod <chr>,
#> # measurementRemarks <chr>, level <int>
Note that the MeasurementOrFact fields can be used as parameters to the occurrence()
function. For example, to only get occurrences with associated biomass measurements:
library(dplyr)
occurrence("Abra tenuis", mof = TRUE, measurementtype = "biomass") %>%
measurements()
#> # A tibble: 44 x 15
#> id measurementID occurrenceID measurementType measurementTypeID
#> <chr> <chr> <chr> <chr> <chr>
#> 1 08269691… <NA> 476637_urn:lsid… individualCount http://vocab.nerc.a…
#> 2 08269691… <NA> 476637_urn:lsid… biomass <NA>
#> 3 08269691… <NA> <NA> sediment type http://vocab.nerc.a…
#> 4 16af269f… <NA> 475202_urn:lsid… individualCount http://vocab.nerc.a…
#> 5 16af269f… <NA> 475202_urn:lsid… biomass <NA>
#> 6 16af269f… <NA> <NA> sediment type http://vocab.nerc.a…
#> 7 2a20624e… <NA> 475273_urn:lsid… individualCount http://vocab.nerc.a…
#> 8 2a20624e… <NA> 475273_urn:lsid… biomass <NA>
#> 9 2a20624e… <NA> <NA> sediment type http://vocab.nerc.a…
#> 10 2b78df21… <NA> 475141_urn:lsid… individualCount http://vocab.nerc.a…
#> # … with 34 more rows, and 10 more variables: measurementValue <chr>,
#> # measurementValueID <chr>, measurementAccuracy <chr>, measurementUnit <chr>,
#> # measurementUnitID <chr>, measurementDeterminedDate <chr>,
#> # measurementDeterminedBy <chr>, measurementMethod <chr>,
#> # measurementRemarks <chr>, level <int>
Just like MeasurementOrFact records, nested DNADerivedData records can be extracted from the occurrence results.
occ <- occurrence("Prymnesiophyceae", datasetid = "62b97724-da17-4ca7-9b26-b2a22aeaab51", dna = TRUE)
occ
#> # A tibble: 1,136 x 54
#> eventID date_year scientificNameID scientificName absence dropped genusid
#> <chr> <int> <chr> <chr> <lgl> <lgl> <int>
#> 1 28215c01… 2015 urn:lsid:marinesp… Chrysochromul… FALSE FALSE 115090
#> 2 24416c01… 2016 urn:lsid:marinesp… Phaeocystaceae FALSE FALSE NA
#> 3 34916c01… 2016 urn:lsid:marinesp… Prymnesiaceae FALSE FALSE NA
#> 4 11216c01… 2016 urn:lsid:marinesp… Chrysochromul… FALSE FALSE 115090
#> 5 34916c01… 2016 urn:lsid:marinesp… Chrysochromul… FALSE FALSE 115090
#> 6 24416c01… 2016 urn:lsid:marinesp… Chrysochromul… FALSE FALSE 115090
#> 7 30214c01… 2014 urn:lsid:marinesp… Chrysochromul… FALSE FALSE 115090
#> 8 14213c01… 2013 urn:lsid:marinesp… Chrysochromul… FALSE FALSE 115090
#> 9 CANON16c… 2016 urn:lsid:marinesp… Chrysochromul… FALSE FALSE 115090
#> 10 22013c01… 2013 urn:lsid:marinesp… Chrysochromul… FALSE FALSE 115090
#> # … with 1,126 more rows, and 47 more variables: aphiaID <int>,
#> # decimalLatitude <dbl>, taxonID <chr>, originalScientificName <chr>,
#> # marine <lgl>, phylumid <int>, familyid <int>, basisOfRecord <chr>,
#> # taxonConceptID <chr>, subkingdom <chr>, date_mid <dbl>,
#> # identificationRemarks <chr>, nameAccordingTo <chr>, id <chr>, class <chr>,
#> # order <chr>, identificationReferences <chr>, organismQuantity <chr>,
#> # sampleSizeUnit <chr>, orderid <int>, dataset_id <chr>,
#> # decimalLongitude <dbl>, date_end <dbl>, speciesid <int>,
#> # occurrenceID <chr>, kingdom <chr>, subkingdomid <int>, date_start <dbl>,
#> # classid <int>, phylum <chr>, genus <chr>, organismQuantityType <chr>,
#> # species <chr>, associatedSequences <chr>, family <chr>, kingdomid <int>,
#> # sampleSizeValue <chr>, eventDate <chr>, node_id <chr>, flags <chr>,
#> # sss <dbl>, shoredistance <int>, sst <dbl>, bathymetry <int>, dna <list>,
#> # hab <lgl>, brackish <lgl>
dna <- dna_records(occ, fields = c("scientificName"))
dna %>%
select(scientificName, target_gene, DNA_sequence)
#> # A tibble: 1,136 x 3
#> scientificName target_gene DNA_sequence
#> <chr> <chr> <chr>
#> 1 Chrysochromulina st… 18S GCTCCTACCGATTGAATGATCCGGTGAGCTTTTTGGACTGTGG…
#> 2 Phaeocystaceae 18S GCTCCTACCGATTGAATGATCCGGTGAGGCCCCCGGACGGATT…
#> 3 Prymnesiaceae 18S GCTCCTACCGATTGGACGATCCGGTGAAGCCTCCGGACTGTAG…
#> 4 Chrysochromulina sc… 18S GCTCCTACCGATTGAATGATCCGGTGAGCTTTTTGGACTGTGG…
#> 5 Chrysochromulina st… 18S GCTCCTACCGATTGAATGATCCGGTGAGCTTTTTGGACAGTGG…
#> 6 Chrysochromulina 18S GCTCCTACCGATTGAATGATCCGGTGAGGCCCCCGGAATGGGA…
#> 7 Chrysochromulina 18S GCTCCTACCGATTGAATGATCCGGTGAGGCCCCCGGAGTGGGA…
#> 8 Chrysochromulina sp… 18S GCTCCTACCGATGGAAGGTTTAGGTGAGTTTTTCGGAGTTTTC…
#> 9 Chrysochromulina st… 18S GCTCCTACCGATTGAATGATCCGGTGAGCTTTTTGGACAGTGG…
#> 10 Chrysochromulina st… 18S GCTCCTACCGATTGAATGATCCGGTGAGCTTTTTGGACAGTGG…
#> # … with 1,126 more rows