The PIC-SURE RESTful API way

The NHANES Unified Dataset may be accessed through the PIC-SURE RESTful API as described below. The dataset may also be accessed through the i2b2/tranSMART-based Dataset Explorer.

In this example, the PIC-SURE API is used to perform analysis in an R environment such as Jupyter Notebook. If you use Jupyter Notebook, you can use the ipynb file.

To obtain a key for the API, visit this page. A public_user key will be displayed in the top right of your screen. Replace the key in the step labeled "Start a secure session with the API" with the key you obtain.

Retrieving NHANES PCB-153 levels for two different ages (20-39 and 40-59)

Load Required Libraries 

#install.packages('readr', repos='http://cran.us.r-project.org')
#install.packages('httr', repos='http://cran.us.r-project.org')
library(readr)
library(httr)

 

Build Required URLs

These URLs represent the different web service calls to the PIC-SURE API. We build different strings to represent the different functions of the API.

IRCT_REST_BASE_URL <- "https://nhanes.hms.harvard.edu/"

#REST URL
IRCT_CL_SERVICE_URL <- paste(IRCT_REST_BASE_URL,"rest/v1/",sep="")

#Service URLS
IRCT_RESOURCE_BASE_URL <- paste(IRCT_CL_SERVICE_URL,"resourceService/",sep="")
IRCT_QUERY_BASE_URL <- paste(IRCT_CL_SERVICE_URL,"queryService/",sep="")
IRCT_RESULTS_BASE_URL <- paste(IRCT_CL_SERVICE_URL,"resultService/",sep="")
IRCT_PROCESS_BASE_URL <- paste(IRCT_CL_SERVICE_URL,"processService/",sep="")

#List resources
IRCT_LIST_RESOURCE_URL <- paste(IRCT_RESOURCE_BASE_URL,"resources",sep="")
IRCT_PATH_RESOURCE_URL <- paste(IRCT_RESOURCE_BASE_URL,"path",sep="")

#Query
IRCT_START_QUERY_URL <- paste(IRCT_QUERY_BASE_URL,"startQuery",sep="")
IRCT_CLAUSE_URL <- paste(IRCT_QUERY_BASE_URL,"clause",sep="")
IRCT_RUN_QUERY_URL <- paste(IRCT_QUERY_BASE_URL,"runQuery",sep="")

#Process
IRCT_START_PROCESS_URL <- paste(IRCT_PROCESS_BASE_URL,"startProcess",sep="")
IRCT_UPDATE_PROCESS_URL <- paste(IRCT_PROCESS_BASE_URL,"updateProcess",sep="")
IRCT_RUN_PROCESS_URL <- paste(IRCT_PROCESS_BASE_URL,"runProcess",sep="")

#Result
IRCT_GET_RESULTS_STATUS_URL <- paste(IRCT_RESULTS_BASE_URL,"resultStatus",sep="")
IRCT_GET_RESULTS_FORMATS_URL <- paste(IRCT_RESULTS_BASE_URL,"availableFormats",sep="")
IRCT_GET_RESULTS_URL <- paste(IRCT_RESULTS_BASE_URL,"result",sep="")

 

Get Age and PCB153 data

Gathers clinical data using both i2b2 and i2b2/tranSMART. We first declare a subset of patients we want to pull data on, all the patients that have data on PCB153 status, then we declare the fields to pull for those patients.

Start a secure session with the API

startSession <- content(GET('https://nhanes.hms.harvard.edu/rest/v1/securityService/startSession?key=akm44dli9ght7so9m8h7vg0ogr'))
startSession

$status = 'success'

 

Run Query

Check Results

body <- '{
  "select": [
      {
        "field": {
          "pui": "/nhanes/Demo/laboratory/laboratory/pcbs/PCB153 (ng per g)/",
          "dataType": "STRING"
        },
        "alias": "pcb153"
      },
    {
        "field": {
         "pui": "/nhanes/Demo/demographics/demographics/AGE/",
         "dataType": "STRING"
        },
        "alias": "Age"
    },
{
        "field": {
          "pui": "/nhanes/Demo/demographics/demographics/SEX/female/",
          "dataType": "STRING"
        },
        "alias": "Gender"
      }, 
  {
            "field": {
              "pui": "/nhanes/Demo/demographics/demographics/SEX/male/",
              "dataType": "STRING"
            },
            "alias": "Gender"
          } 
  ],
  "where": [
      {
        "field": {
          "pui": "/nhanes/Demo/demographics/demographics/AGE/",
          "dataType": "STRING"
        },
        "predicate": "CONTAINS",
        "fields": {
          "ENOUNTER": "YES"
        }
      }
  ]
}'

resultId <- content(POST(IRCT_RUN_QUERY_URL, body = body))$resultId
resultId

175017

 

Available Formats

Download in CSV Format

Transform Results and run t.test

 

response <- content(GET(paste(IRCT_GET_RESULTS_FORMATS_URL, resultId, sep="/")))
response

  1. 'JSON'
  2. 'XML'
  3. 'XLSX'
  4. 'CSV'
response <- content(GET(paste(IRCT_GET_RESULTS_URL, resultId, "CSV", sep="/")), as="text")
results <- read.csv(text = response)
results[1:5,] 
No encoding supplied: defaulting to UTF-8.
  PATIENT_NUM Gender Age pcb153
1 10997 male 61 NA
2 10998 male 48 NA
3 10999 male 0 NA
4 22529 female 21 0.128
5 22528 male 16 NA

 

results[results==""] <- NA
results <- na.omit(results)

results$AgeGroup <- NA
for( i in 1:nrow( results ) ){
    if(results$Age[i] < 20 ){
        results$AgeGroup[i] <- 0
    }else
        if(results$Age[i] >= 20 & results$Age[i] <= 39 ){
        results$AgeGroup[i] <- 1
    }else
        if(results$Age[i] >= 40 & results$Age[i] <= 59 ){
        results$AgeGroup[i] <- 2
    }else
        if(results$Age[i] > 59 ){
        results$AgeGroup[i] <- 0
    }
}   

 

results4analysis <- results[ results$AgeGroup != 0, ]
boxplot(as.numeric(unlist(pcb153))~AgeGroup,data=results4analysis, main="PCB153 vs AgeGroup", ylab="PCB153 value", xlab="Age Group")

 Box Plot

t.test(as.numeric(results4analysis$pcb153) ~ results4analysis$AgeGroup)
 
	Welch Two Sample t-test

data:  as.numeric(results4analysis$pcb153) by results4analysis$AgeGroup
t = -19.387, df = 1411.4, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.2228082 -0.1818622
sample estimates:
mean in group 1 mean in group 2 
      0.1184880       0.3208232 

Return to NHANES Unified Dataset product page.