library(whisker)
library(DBI)
library(condusco)
## Loading required package: jsonlite
## Loading required package: assertthat
## Loading required package: bigrquery
Create pipelines that run dynamic queries based on results of a query. A common use case is to dynamically query a range of dates, without hard-coding the any variables.
con <- dbConnect(RSQLite::SQLite(), ":memory:")
pipeline <- function(params){
query <-"
SELECT count(*) as n_hits
FROM user_hits
WHERE date(date_time) BETWEEN date('{{{date_low}}}') AND date('{{{date_high}}}')
;"
whisker.render(query,params)
}
run_pipeline_dbi(pipeline,
"SELECT date('now', '-5 days') as date_low, date('now') as date_high",
con
)
## [1] "\n SELECT count(*) as n_hits \n FROM user_hits \n WHERE date(date_time) BETWEEN date('2017-11-03') AND date('2017-11-08')\n ;"
dbDisconnect(con)
If list is defined, convert the JSON string to an object and iterate through name1,name2 pairs. This dynamically generates a query of variable length, based on the JSON object. In this example, we create a trivial JSON object manually. We’ll use a dynamically generated JSON object in the next example.
con <- dbConnect(RSQLite::SQLite(), ":memory:")
pipeline <- function(params){
query <- "SELECT {{{value}}} as dollars_won,
{{#list}}
'{{name1}}' as {{name2}},
{{/list}}
'{{{field}}}' as field
;"
whisker.render(query,params)
}
run_pipeline_dbi(
pipeline,
"SELECT value,
field,
list
FROM (
SELECT 1000 as value,
'word' as field,
'[{\"name1\":\"foo1\", \"name2\":\"bar1\"},{\"name1\":\"foo2\", \"name2\":\"bar2\"}]' as list
)
UNION ALL
SELECT 2000 as value,
'word' as field,
'[{\"name1\":\"foo1\", \"name2\":\"bar1\"},{\"name1\":\"foo2\", \"name2\":\"bar2\"}]' as list
",
con
)
## [1] "SELECT 1000 as dollars_won,\n 'foo1' as bar1,\n 'foo2' as bar2,\n 'word' as field\n ;"
## [2] "SELECT 2000 as dollars_won,\n 'foo1' as bar1,\n 'foo2' as bar2,\n 'word' as field\n ;"
dbDisconnect(con)
For the top 5 represented horsepowers in the mtcars dataset, create features for each of those horsepowers for each of the types of cylinders. For example, we dynamically create features like n_hp_110=4, for cyl=6.
con <- dbConnect(RSQLite::SQLite(), ":memory:")
dbWriteTable(con, "mtcars", mtcars)
#for each cylinder count, count the number of top 5 hps it has
pipeline <- function(params){
query <- "SELECT
{{#list}}
SUM(CASE WHEN hp='{{val}}' THEN 1 ELSE 0 END )as n_hp_{{val}},
{{/list}}
cyl
FROM mtcars
GROUP BY cyl
;"
dbGetQuery(
con,
whisker.render(query,params)
)
}
#pass the top 5 most common hps as val params
run_pipeline_dbi(
pipeline,
'
SELECT "[" || GROUP_CONCAT("{ ""val"": """ || hp || """ }") || "]" AS list
FROM (
SELECT
CAST(hp as INTEGER) as HP,
count(hp) as cnt
FROM mtcars
GROUP BY hp
ORDER BY cnt DESC
LIMIT 5
)
',
con
)
## [[1]]
## n_hp_110 n_hp_175 n_hp_180 n_hp_66 n_hp_123 cyl
## 1 0 0 0 2 0 4
## 2 3 1 0 0 2 6
## 3 0 2 3 0 0 8
dbDisconnect(con)