orcidtr provides a modern, user-friendly interface to the ORCID public API. ORCID (Open Researcher and Contributor ID) is a persistent digital identifier for researchers that connects researchers with their professional activities. This package allows you to programmatically fetch:
All functions return structured data.table objects for
efficient data manipulation and analysis.
Let’s start by fetching basic biographical information for a researcher. We’ll use Hadley Wickham’s ORCID as an example:
# Fetch employment history
employments <- orcid_employments("0000-0003-4757-117X")
print(employments[, .(organization, role, city, country, start_date, end_date)])
# Fetch education records
education <- orcid_educations("0000-0003-4757-117X")
print(education[, .(organization, role, start_date, end_date)])# Fetch publications and other works
works <- orcid_works("0000-0003-4757-117X")
# Display summary
cat(sprintf("Total works: %d\n", nrow(works)))
cat(sprintf("Journal articles: %d\n", sum(works$type == "journal-article", na.rm = TRUE)))
# View recent publications
recent_works <- works[order(-publication_date)][1:5, .(title, type, publication_date, doi)]
print(recent_works)
# Fetch funding information
funding <- orcid_funding("0000-0003-4757-117X")
if (nrow(funding) > 0) {
print(funding[, .(title, organization, start_date, amount, currency)])
}
# Fetch peer review activities
reviews <- orcid_peer_reviews("0000-0003-4757-117X")
if (nrow(reviews) > 0) {
cat(sprintf("Total peer reviews: %d\n", nrow(reviews)))
}# Fetch distinctions and awards
distinctions <- orcid_distinctions("0000-0003-4757-117X")
if (nrow(distinctions) > 0) {
print(distinctions[, .(organization, role, start_date)])
}
# Fetch professional memberships
memberships <- orcid_memberships("0000-0003-4757-117X")
if (nrow(memberships) > 0) {
print(memberships[, .(organization, role, start_date, end_date)])
}
# Fetch all affiliations types
invited_positions <- orcid_invited_positions("0000-0003-4757-117X")
qualifications <- orcid_qualifications("0000-0003-4757-117X")
services <- orcid_services("0000-0003-4757-117X")Instead of calling multiple individual functions, you can fetch all sections at once:
# Fetch everything
record <- orcid_fetch_record("0000-0003-4757-117X")
names(record)
# Access individual sections
record$works
record$employments
record$person
# Fetch only specific sections for efficiency
record <- orcid_fetch_record(
"0000-0003-4757-117X",
sections = c("works", "employments", "funding")
)The orcid_activities() function provides summaries of
all activity types in a single API request:
When working with multiple researchers, use
orcid_fetch_many():
# Define multiple ORCIDs
orcids <- c(
"0000-0003-4757-117X", # Hadley Wickham
"0000-0002-1825-0097", # Yihui Xie
"0000-0003-1419-2405" # Jenny Bryan
)
# Fetch works for all
all_works <- orcid_fetch_many(orcids, section = "works")
# Analyze combined data
works_by_researcher <- all_works[, .N, by = orcid]
print(works_by_researcher)
# Get works by type across all researchers
works_by_type <- all_works[, .N, by = type][order(-N)]
print(works_by_type)# Search by family name
results <- orcid_search(family_name = "Wickham")
print(results[, .(orcid_id, given_names, family_name)])
# Search by affiliation
results <- orcid_search(
affiliation_org = "Stanford University",
rows = 20
)
# Combine multiple criteria
results <- orcid_search(
family_name = "Smith",
given_names = "John",
affiliation_org = "MIT"
)For more complex searches, use the orcid() function with
Solr syntax:
# Search with field-specific queries
results <- orcid(
query = 'family-name:Smith AND affiliation-org-name:"Harvard University"',
rows = 10
)
# Search by keywords
results <- orcid(
query = 'keyword:("machine learning" OR "artificial intelligence")',
rows = 15
)
# Check total number of results
cat(sprintf("Total found: %d\n", attr(results, "found")))
cat(sprintf("Returned: %d\n", nrow(results)))library(orcidtr)
# Fetch works
works <- orcid_works("0000-0003-4757-117X")
# Extract publication years
works[, pub_year := as.integer(substr(publication_date, 1, 4))]
# Count publications by year
pub_by_year <- works[!is.na(pub_year), .N, by = pub_year][order(pub_year)]
print(pub_by_year)
# Publications by type
pub_by_type <- works[, .N, by = type][order(-N)]
print(pub_by_type)# Fetch works for multiple researchers
orcids <- c(
"0000-0003-4757-117X",
"0000-0002-1825-0097",
"0000-0003-1419-2405"
)
all_works <- orcid_fetch_many(orcids, section = "works")
# Count works per researcher
works_count <- all_works[, .N, by = orcid]
print(works_count)
# Get researchers with most recent publications
recent_cutoff <- "2020-01-01"
recent_works <- all_works[publication_date >= recent_cutoff]
recent_count <- recent_works[, .N, by = orcid]
print(recent_count)# Fetch funding for a researcher
funding <- orcid_funding("0000-0003-4757-117X")
# Summarize by organization
funding_by_org <- funding[, .N, by = organization][order(-N)]
print(funding_by_org)
# Funding by type
funding_by_type <- funding[, .N, by = type][order(-N)]
print(funding_by_type)
# Active grants
active_grants <- funding[
is.na(end_date) | end_date >= Sys.Date()
]
print(active_grants[, .(title, organization, start_date)])Important: The ORCID public API does NOT require authentication for reading public data. All functions in this package work without any token by default.
Authentication is entirely optional and only provides:
Most users do not need authentication. Consider getting a token only if:
# The token is automatically used if ORCID_TOKEN environment variable is set
works <- orcid_works("0000-0003-4757-117X")
# Or pass explicitly (overrides environment variable)
works <- orcid_works("0000-0003-4757-117X", token = "your-token-here")Note: If you set an ORCID_TOKEN environment variable but find it’s not working, unset it. The public API works fine without any token.
# Wrap API calls in tryCatch for production use
result <- tryCatch(
{
orcid_works("0000-0003-4757-117X")
},
error = function(e) {
message("Failed to fetch works: ", conditionMessage(e))
data.table::data.table() # Return empty data.table
}
)
# For batch operations, use stop_on_error = FALSE
orcids <- c("0000-0003-4757-117X", "invalid-orcid", "0000-0002-1825-0097")
all_works <- orcid_fetch_many(
orcids,
section = "works",
stop_on_error = FALSE # Continue despite errors
)The ORCID public API has rate limits (~24 requests/second for unauthenticated requests). For large batch operations:
For frequently accessed data, consider caching:
# Simple file-based cache
cache_file <- "orcid_cache.rds"
fetch_with_cache <- function(orcid_id) {
if (file.exists(cache_file)) {
cache <- readRDS(cache_file)
if (orcid_id %in% names(cache)) {
message("Using cached data for ", orcid_id)
return(cache[[orcid_id]])
}
} else {
cache <- list()
}
# Fetch fresh data
data <- orcid_fetch_record(orcid_id)
cache[[orcid_id]] <- data
saveRDS(cache, cache_file)
data
}
record <- fetch_with_cache("0000-0003-4757-117X")All functions return data.table objects, which provide
efficient data manipulation:
library(data.table)
works <- orcid_works("0000-0003-4757-117X")
# Filter by type
articles <- works[type == "journal-article"]
# Select specific columns
works[, .(title, publication_date, doi)]
# Chain operations
works[
type == "journal-article" & !is.na(doi)
][
order(-publication_date)
][1:10, .(title, doi)]
# Aggregate
works[, .N, by = type][order(-N)]The orcidtr package provides comprehensive access to ORCID data with:
data.table return objectsFor more information, see help(package = "orcidtr") or
visit the package website.