Examples with DBI and SQLite

Roland Stevenson

2017-11-08

library(whisker)
library(DBI)
library(condusco)
## Loading required package: jsonlite
## Loading required package: assertthat
## Loading required package: bigrquery

Simple Example

Create pipelines that run dynamic queries based on results of a query. A common use case is to dynamically query a range of dates, without hard-coding the any variables.

con <- dbConnect(RSQLite::SQLite(), ":memory:")

pipeline <- function(params){

  query <-"
    SELECT count(*) as n_hits 
    FROM user_hits 
    WHERE date(date_time) BETWEEN date('{{{date_low}}}') AND date('{{{date_high}}}')
  ;"

  whisker.render(query,params)

}

run_pipeline_dbi(pipeline,
  "SELECT date('now', '-5 days') as date_low, date('now') as date_high",
  con
)
## [1] "\n    SELECT count(*) as n_hits \n    FROM user_hits \n    WHERE date(date_time) BETWEEN date('2017-11-03') AND date('2017-11-08')\n  ;"
dbDisconnect(con)

Dynamically generated queries via JSON

If list is defined, convert the JSON string to an object and iterate through name1,name2 pairs. This dynamically generates a query of variable length, based on the JSON object. In this example, we create a trivial JSON object manually. We’ll use a dynamically generated JSON object in the next example.

con <- dbConnect(RSQLite::SQLite(), ":memory:")

pipeline <- function(params){

  query <- "SELECT {{{value}}} as dollars_won,
    {{#list}}
    '{{name1}}' as {{name2}},
    {{/list}}
    '{{{field}}}' as field
  ;"

  whisker.render(query,params)

}


run_pipeline_dbi(
  pipeline,
  "SELECT value,
    field,
    list
  FROM (
    SELECT 1000 as value,
      'word' as field,
      '[{\"name1\":\"foo1\", \"name2\":\"bar1\"},{\"name1\":\"foo2\", \"name2\":\"bar2\"}]' as list
  )
  UNION ALL
  SELECT 2000 as value,
    'word' as field,
    '[{\"name1\":\"foo1\", \"name2\":\"bar1\"},{\"name1\":\"foo2\", \"name2\":\"bar2\"}]' as list
  ",
  con
)
## [1] "SELECT 1000 as dollars_won,\n    'foo1' as bar1,\n    'foo2' as bar2,\n    'word' as field\n  ;"
## [2] "SELECT 2000 as dollars_won,\n    'foo1' as bar1,\n    'foo2' as bar2,\n    'word' as field\n  ;"
dbDisconnect(con)

Feature Generation Query

For the top 5 represented horsepowers in the mtcars dataset, create features for each of those horsepowers for each of the types of cylinders. For example, we dynamically create features like n_hp_110=4, for cyl=6.

con <- dbConnect(RSQLite::SQLite(), ":memory:")

dbWriteTable(con, "mtcars", mtcars)

#for each cylinder count, count the number of top 5 hps it has
pipeline <- function(params){
  
  query <- "SELECT
    {{#list}}
    SUM(CASE WHEN hp='{{val}}' THEN 1 ELSE 0 END )as n_hp_{{val}},
  {{/list}}
    cyl
    FROM mtcars
    GROUP BY cyl
  ;"


  dbGetQuery(
    con,
    whisker.render(query,params)
  )
}


#pass the top 5 most common hps as val params
run_pipeline_dbi(
  pipeline,
  '
  SELECT "[" || GROUP_CONCAT("{ ""val"": """ || hp ||  """ }") || "]" AS list
  FROM (
    SELECT 
      CAST(hp as INTEGER) as HP,
      count(hp) as cnt
    FROM mtcars 
    GROUP BY hp
    ORDER BY cnt DESC
    LIMIT 5
  )
  ',
  con
)
## [[1]]
##   n_hp_110 n_hp_175 n_hp_180 n_hp_66 n_hp_123 cyl
## 1        0        0        0       2        0   4
## 2        3        1        0       0        2   6
## 3        0        2        3       0        0   8
dbDisconnect(con)