rquery
1.2.0
now incorporates bquote()
quasi-quotation.
In fact this is enough to allow rqdatatable
to directly work the indirect column names example from our bquote()
articles (1, 2).
First let’s check what packages we have available for these examples.
<- FALSE
have_rqdatatable if (requireNamespace("rqdatatable", quietly = TRUE)) {
library("rqdatatable")
<- TRUE
have_rqdatatable
}<- FALSE
have_db if (requireNamespace("RSQLite", quietly = TRUE) &&
requireNamespace("DBI", quietly = TRUE)) {
<- TRUE
have_db }
library("rquery")
# define our parameters
# pretend these come from far away
# or as function arguments.
<- "am"
group_nm <- as.name("hp")
num_nm <- as.name("cyl")
den_nm <- as.name(paste0(num_nm, "_per_", den_nm))
derived_nm <- as.name(paste0("mean_", derived_nm))
mean_nm <- as.name("group_count") count_nm
Immediate mode example (note we are using newer rquery
1.2.1
notation “extend()
” instead of extend_nse()
).
# apply a parameterized pipeline using bquote
%.>%
mtcars extend(.,
:= .(num_nm)/.(den_nm)) %.>%
.(derived_nm) project(.,
:= mean(.(derived_nm)),
.(mean_nm) := length(.(derived_nm)),
.(count_nm) groupby = group_nm) %.>%
orderby(.,
group_nm)
## am mean_hp_per_cyl group_count
## 1 0 22.71491 19
## 2 1 23.41987 13
Stored operator tree examples.
# make an abstract description of the table to start with
<- mk_td("mtcars",
td as.character(list(group_nm, num_nm, den_nm)))
# helper function to adapt to later database environemnt
<- function(v) { length(v) }
count
# capture the operator pipeline
<- td %.>%
ops extend(.,
:= .(num_nm)/.(den_nm)) %.>%
.(derived_nm) project(.,
:= mean(.(derived_nm)),
.(mean_nm) := count(.(derived_nm)),
.(count_nm) groupby = group_nm) %.>%
orderby(.,
group_nm)
# apply it to data
%.>% ops mtcars
## am mean_hp_per_cyl group_count
## 1 0 22.71491 19
## 2 1 23.41987 13
We can display the pipeline in various forms.
# print the operator sequence
cat(format(ops))
## mk_td("mtcars", c(
## "am",
## "hp",
## "cyl")) %.>%
## extend(.,
## hp_per_cyl := hp / cyl) %.>%
## project(., mean_hp_per_cyl := mean(hp_per_cyl), group_count := count(hp_per_cyl),
## groupby = c('am')) %.>%
## order_rows(.,
## c('am'),
## reverse = c(),
## limit = NULL)
The same example in a database.
# connect to a database
<- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
raw_connection
# build a representation of the database connection
<- rq_connection_tests(raw_connection)
dbopts <- rquery_db_info(connection = raw_connection,
db is_dbi = TRUE,
connection_options = dbopts)
print(db)
## [1] "rquery_db_info(SQLiteConnection, is_dbi=TRUE, note=\"\")"
# copy data to db
<- rquery::rq_copy_to(db, "mtcars", mtcars,
tr temporary = TRUE,
overwrite = TRUE)
print(tr)
## [1] "mk_td(\"mtcars\", c( \"mpg\", \"cyl\", \"disp\", \"hp\", \"drat\", \"wt\", \"qsec\", \"vs\", \"am\", \"gear\", \"carb\"))"
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## 2 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## 3 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## 4 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## 5 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## 6 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
## ...
# materialize result remotely (without passing through R)
<- materialize(db, ops)
res ::dbReadTable(raw_connection, res$table_name) DBI
## am mean_hp_per_cyl group_count
## 1 0 22.71491 19
## 2 1 23.41987 13
# or execute and pull results back
execute(db, ops)
## am mean_hp_per_cyl group_count
## 1 0 22.71491 19
## 2 1 23.41987 13
# print the derived sql
<- to_sql(ops, db)
sql cat(sql)
## SELECT * FROM (
## SELECT `am`, AVG ( `hp_per_cyl` ) AS `mean_hp_per_cyl`, count ( `hp_per_cyl` ) AS `group_count` FROM (
## SELECT
## `am`,
## `hp` / `cyl` AS `hp_per_cyl`
## FROM (
## SELECT
## `am`,
## `hp`,
## `cyl`
## FROM
## `mtcars`
## ) tsql_43672649717521417876_0000000000
## ) tsql_43672649717521417876_0000000001
## GROUP BY
## `am`
## ) tsql_43672649717521417876_0000000002 ORDER BY `am`
# disconnect
::dbDisconnect(raw_connection)
DBIrm(list = c("raw_connection", "db"))