## ----inst__0001, include = TRUE, echo = TRUE, eval = FALSE--------------- # install.packages("easyPubMed") ## ----inst___02, include = TRUE, echo = TRUE, eval = FALSE---------------- # library(easyPubMed) ## ----include = FALSE----------------------------------------------------- library(easyPubMed) data("EPMsamples") ## ----inst___04, include = TRUE, echo = TRUE, eval = FALSE---------------- # library(devtools) # install_github("dami82/easyPubMed") ## ----message = FALSE, warning = FALSE, eval = FALSE---------------------- # my_query <- 'Damiano Fantini[AU] AND "2018"[PDAT]' # my_entrez_id <- get_pubmed_ids(my_query) # my_abstracts_txt <- fetch_pubmed_data(my_entrez_id, format = "abstract") ## ----message = FALSE, warning = FALSE, eval = TRUE, echo = FALSE, include=FALSE---- # Loading from the dataset attached to the package # You may omit this conversion if your system supports UTF8 my_abstracts_txt <- iconv(EPMsamples$DF_papers_abs$pm_res, from = "UTF8", to = "ASCII", sub = ".") ## ----message = FALSE, warning = FALSE, eval = TRUE----------------------- head(my_abstracts_txt) ## ----message = FALSE, warning = FALSE, eval = FALSE---------------------- # my_abstracts_xml <- fetch_pubmed_data(pubmed_id_list = my_entrez_id) ## ----include=FALSE, echo = FALSE, eval = TRUE---------------------------- # Loading from the dataset attached to the package # You may omit this conversion if your system supports UTF8 my_abstracts_xml <- iconv(EPMsamples$DF_papers_std$pm_res, from = "UTF8", to = "ASCII", sub = ".") ## ----message = FALSE, warning = FALSE, eval = TRUE----------------------- class(my_abstracts_xml) my_titles <- custom_grep(my_abstracts_xml, "ArticleTitle", "char") # use gsub to remove the tag, also trim long titles TTM <- nchar(my_titles) > 75 my_titles[TTM] <- paste(substr(my_titles[TTM], 1, 70), "...", sep = "") # Print as a data.frame (use kable) head(my_titles) ## ----message = FALSE, warning = FALSE, eval=FALSE------------------------ # new_query <- 'Bladder[TIAB] AND Northwestern[AD] AND Chicago[AD] AND "2018"[PDAT]' # out.A <- batch_pubmed_download(pubmed_query_string = new_query, # format = "xml", # batch_size = 20, # dest_file_prefix = "easyPM_example", # encoding = "ASCII") # ## ----message = FALSE, warning = FALSE, include = FALSE, echo = FALSE, eval=TRUE---- # Loading from the dataset attached to the package out.A <- EPMsamples$NUBL_dw18$pm_res ## ----message = FALSE, warning = FALSE, eval=TRUE------------------------- # this variable stores the name of the output files print(out.A) ## ----message = FALSE, warning = FALSE, eval=TRUE------------------------- my_PM_list <- articles_to_list(pubmed_data = my_abstracts_xml) class(my_PM_list[1]) print(substr(my_PM_list[4], 1, 510)) ## ----message = FALSE, warning = FALSE, eval=TRUE------------------------- curr_PM_record <- my_PM_list[1] custom_grep(curr_PM_record, tag = "PubDate") custom_grep(curr_PM_record, tag = "LastName", format = "char") ## ----message = FALSE, warning = FALSE, eval=TRUE------------------------- # Select a single PubMed record from the internal dataset, NUBL_1618 curr_PM_record <- easyPubMed::EPMsamples$NUBL_1618$rec_lst[[37]] my.df <- article_to_df(curr_PM_record, max_chars = 18) # Fields extracted from the PubMed record head(colnames(my.df)) # Trim long strings and then display some content: each row corresponds to one author my.df$title <- substr(my.df$title, 1, 15) my.df$address <- substr(my.df$address, 1, 19) my.df$jabbrv <- substr(my.df$jabbrv, 1, 10) # Visualize my.df[,c("pmid", "title", "jabbrv", "firstname", "address")] ## ----message = FALSE, warning = FALSE, eval=TRUE------------------------- my.df2 <- article_to_df(curr_PM_record, autofill = TRUE) # Trim long strings and then display some content: each row corresponds to one author my.df2$title <- substr(my.df2$title, 1, 15) my.df2$jabbrv <- substr(my.df2$jabbrv, 1, 10) my.df2$address <- substr(my.df2$address, 1, 19) # Visualize my.df2[,c("pmid", "title", "jabbrv", "firstname", "address")] ## ----message = FALSE, warning = FALSE, eval=TRUE------------------------- xx <- lapply(my_PM_list, article_to_df, autofill = TRUE, max_chars = 50) full_df <- do.call(rbind, xx) full_df[seq(1, nrow(full_df), by = 10), c("pmid", "lastname", "jabbrv")] ## ----takes_some_time, message = FALSE, warning = FALSE, eval=TRUE-------- new_query <- 'Bladder[TIAB] AND Northwestern[AD] AND Chicago[AD] AND "2018"[PDAT]' out.B <- batch_pubmed_download(pubmed_query_string = new_query, dest_file_prefix = "NUBL_18_", encoding = "ASCII") # Retrieve the full name of the XML file downloaded in the previous step new_PM_file <- out.B[[1]] new_PM_df <- table_articles_byAuth(pubmed_data = new_PM_file, included_authors = "first", max_chars = 0, encoding = "ASCII") # Printing a sample of the resulting data frame new_PM_df$address <- substr(new_PM_df$address, 1, 28) new_PM_df$jabbrv <- substr(new_PM_df$jabbrv, 1, 9) sid <- seq(5, nrow(new_PM_df), by = 10) new_PM_df[sid, c("pmid", "year", "jabbrv", "lastname", "address")] ## ----takes_some_time2, message = FALSE, warning = FALSE, eval=FALSE------ # new_query <- 'Bladder[TIAB] AND Northwestern[AD] AND Chicago[AD] AND "2018"[PDAT]' # new_query <- get_pubmed_ids(new_query) # fetched_data <- fetch_pubmed_data(new_query, encoding = "ASCII") ## ----takes_some_time2biz, include = FALSE, echo = FALSE, message = FALSE, warning = FALSE, eval=TRUE---- fetched_data <- EPMsamples$NUBL_1618$pm_res ## ----takes_some_time2triz, message = FALSE, warning = FALSE, eval=TRUE---- new_PM_df <- table_articles_byAuth(pubmed_data = fetched_data, included_authors = "first", max_chars = 0, encoding = "ASCII") # Printing a sample of the resulting data frame new_PM_df$address <- substr(new_PM_df$address, 1, 28) new_PM_df$jabbrv <- substr(new_PM_df$jabbrv, 1, 9) sid <- seq(5, nrow(new_PM_df), by = 10) new_PM_df[sid, c("pmid", "year", "jabbrv", "lastname", "address")] ## ----message = FALSE, warning = FALSE, eval=TRUE------------------------- sessionInfo() ## ----include = FALSE----------------------------------------------------- # cleaning for (xfile in c(out.A, out.B)) { tryCatch(file.remove(xfile), error = function(e){NULL}) }