The second example produces a demographics summary table of selected variables. The report shows statistics for each of the four treatment groups.
Note the following about this example:
datastep()
function from the libr package allows for a
complex conditional in the middle of a dplyr
pipeline.library(tidyverse)
library(sassy)
# Prepare Log -------------------------------------------------------------
options("logr.autolog" = TRUE,
"logr.notes" = FALSE)
# Get path to temp directory
<- tempdir()
tmp
# Get sample data directory
<- system.file("extdata", package = "sassy")
dir
# Open log
<- log_open(file.path(tmp, "example2.log"))
lgpth
# Load and Prepare Data ---------------------------------------------------
sep("Prepare Data")
# Define data library
libname(sdtm, dir, "csv")
# Loads data into workspace
lib_load(sdtm)
# Prepare data
<- sdtm.DM |>
dm_mod select(USUBJID, SEX, AGE, ARM) |>
filter(ARM != "SCREEN FAILURE") |>
datastep({
if (AGE >= 18 & AGE <= 24)
= "18 to 24"
AGECAT else if (AGE >= 25 & AGE <= 44)
= "25 to 44"
AGECAT else if (AGE >= 45 & AGE <= 64)
<- "45 to 64"
AGECAT else if (AGE >= 65)
<- ">= 65"
AGECAT
|> put()
})
put("Get ARM population counts")
<- count(dm_mod, ARM) |> deframe() |> put()
arm_pop
# Age Summary Block -------------------------------------------------------
sep("Create summary statistics for age")
<-
age_block |>
dm_mod group_by(ARM) |>
summarise( N = fmt_n(AGE),
`Mean (SD)` = fmt_mean_sd(AGE),
Median = fmt_median(AGE),
`Q1 - Q3` = fmt_quantile_range(AGE),
Range = fmt_range(AGE)) |>
pivot_longer(-ARM,
names_to = "label",
values_to = "value") |>
pivot_wider(names_from = ARM,
values_from = "value") |>
add_column(var = "AGE", .before = "label") |>
put()
# Age Group Block ----------------------------------------------------------
sep("Create frequency counts for Age Group")
put("Create age group frequency counts")
<-
ageg_block |>
dm_mod select(ARM, AGECAT) |>
group_by(ARM, AGECAT) |>
summarize(n = n()) |>
pivot_wider(names_from = ARM,
values_from = n,
values_fill = 0) |>
transmute(var = "AGECAT",
label = factor(AGECAT, levels = c("18 to 24",
"25 to 44",
"45 to 64",
">= 65")),
`ARM A` = fmt_cnt_pct(`ARM A`, arm_pop["ARM A"]),
`ARM B` = fmt_cnt_pct(`ARM B`, arm_pop["ARM B"]),
`ARM C` = fmt_cnt_pct(`ARM C`, arm_pop["ARM C"]),
`ARM D` = fmt_cnt_pct(`ARM D`, arm_pop["ARM D"])) |>
arrange(label) |>
put()
# Sex Block ---------------------------------------------------------------
sep("Create frequency counts for SEX")
# Create user-defined format
<- value(condition(is.na(x), "Missing"),
fmt_sex condition(x == "M", "Male"),
condition(x == "F", "Female"),
condition(TRUE, "Other")) |> put()
# Create sex frequency counts
<-
sex_block |>
dm_mod select(ARM, SEX) |>
group_by(ARM, SEX) |>
summarize(n = n()) |>
pivot_wider(names_from = ARM,
values_from = n,
values_fill = 0) |>
transmute(var = "SEX",
label = fct_relevel(SEX, "M", "F"),
`ARM A` = fmt_cnt_pct(`ARM A`, arm_pop["ARM A"]),
`ARM B` = fmt_cnt_pct(`ARM B`, arm_pop["ARM B"]),
`ARM C` = fmt_cnt_pct(`ARM C`, arm_pop["ARM C"]),
`ARM D` = fmt_cnt_pct(`ARM D`, arm_pop["ARM D"])) |>
arrange(label) |>
mutate(label = fapply(label, fmt_sex)) |>
put()
put("Combine blocks into final data frame")
<- bind_rows(age_block, ageg_block, sex_block) |> put()
final
# Report ------------------------------------------------------------------
sep("Create and print report")
<- c("AGE" = "Age", "AGECAT" = "Age Group", "SEX" = "Sex")
var_fmt
# Create Table
<- create_table(final, first_row_blank = TRUE,
tbl borders = c("top", "bottom")) |>
column_defaults(from = `ARM A`, to = `ARM D`,
align = "center", width = 1.25) |>
stub(vars = c("var", "label"), "Variable", width = 2.5) |>
define(var, blank_after = TRUE, dedupe = TRUE, label = "Variable",
format = var_fmt,label_row = TRUE) |>
define(label, indent = .25, label = "Demographic Category") |>
define(`ARM A`, label = "Placebo", n = arm_pop["ARM A"]) |>
define(`ARM B`, label = "Drug 50mg", n = arm_pop["ARM B"]) |>
define(`ARM C`, label = "Drug 100mg", n = arm_pop["ARM C"]) |>
define(`ARM D`, label = "Competitor", n = arm_pop["ARM D"])
<- file.path(tmp, "output/example2.rtf")
pth
<- create_report(pth, output_type = "DOCX", font = "Arial") |>
rpt set_margins(top = 1, bottom = 1) |>
page_header("Sponsor: Company", "Study: ABC") |>
titles("Table 1.0", "Analysis of Demographic Characteristics",
"Safety Population", bold = TRUE, font_size = 11) |>
add_content(tbl) |>
footnotes("Program: DM_Table.R",
"NOTE: Denominator based on number of non-missing responses.") |>
page_footer(paste0("Date Produced: ", fapply(Sys.time(), "%d%b%y %H:%M")),
right = "Page [pg] of [tpg]")
write_report(rpt)
# Clean Up ----------------------------------------------------------------
# Unload library from workspace
lib_unload(sdtm)
# Close log
log_close()
# View files
# file.show(pth)
# file.show(lgpth)
Here is the output report:
And here is the log:
=========================================================================
Log Path: C:/Users/dbosa/AppData/Local/Temp/RtmpKC93lu/log/example2.log
Working Directory: C:/packages/Testing
User Name: dbosa
R Version: 4.0.5 (2021-03-31)
Machine: SOCRATES x86-64
Operating System: Windows 10 x64 build 19041
Log Start Time: 2021-06-27 14:35:47
=========================================================================
=========================================================================
Prepare Data
=========================================================================
# library 'sdtm': 4 items
- attributes: csv not loaded
- path: C:/Users/dbosa/Documents/R/win-library/4.0/sassy/extdata
- items:
Name Extension Rows Cols Size LastModified
1 AE csv 150 27 88.1 Kb 2021-04-04 11:55:58
2 DM csv 87 24 45.2 Kb 2021-04-04 11:55:58
3 SV csv 685 10 69.9 Kb 2021-04-04 11:55:58
4 VS csv 3358 17 467 Kb 2021-04-04 11:55:58
lib_load: library 'sdtm' loaded
select: dropped 20 variables (STUDYID, DOMAIN, SUBJID, RFSTDTC, RFENDTC, …)
filter: removed 2 rows (2%), 85 rows remaining
datastep: columns decreased from 4 to 5
# A tibble: 85 x 5
USUBJID SEX AGE ARM AGECAT
<chr> <chr> <dbl> <chr> <chr>
1 ABC-01-049 M 39 ARM D 25 to 44
2 ABC-01-050 M 47 ARM B 45 to 64
3 ABC-01-051 M 34 ARM A 25 to 44
4 ABC-01-052 F 45 ARM C 45 to 64
5 ABC-01-053 F 26 ARM B 25 to 44
6 ABC-01-054 M 44 ARM D 25 to 44
7 ABC-01-055 F 47 ARM C 45 to 64
8 ABC-01-056 M 31 ARM A 25 to 44
9 ABC-01-113 M 74 ARM D >= 65
10 ABC-01-114 F 72 ARM B >= 65
# ... with 75 more rows
Get ARM population counts
count: now 4 rows and 2 columns, ungrouped
ARM A ARM B ARM C ARM D
20 21 21 23
=========================================================================
Create summary statistics for age
=========================================================================
group_by: one grouping variable (ARM)
summarise: now 4 rows and 6 columns, ungrouped
pivot_longer: reorganized (N, Mean (SD), Median, Q1 - Q3, Range) into (label, value) [was 4x6, now 20x3]
pivot_wider: reorganized (ARM, value) into (ARM A, ARM B, ARM C, ARM D) [was 20x3, now 5x5]
# A tibble: 5 x 6
var label `ARM A` `ARM B` `ARM C` `ARM D`
<chr> <chr> <chr> <chr> <chr> <chr>
1 AGE N 20 21 21 23
2 AGE Mean (SD) 53.1 (11.9) 47.4 (16.3) 45.7 (14.4) 49.7 (14.3)
3 AGE Median 52.5 46.0 46.0 48.0
4 AGE Q1 - Q3 47.8 - 60.0 35.0 - 61.0 38.0 - 53.0 39.0 - 60.5
5 AGE Range 31 - 73 22 - 73 19 - 71 21 - 75
=========================================================================
Create frequency counts for Age Group
=========================================================================
Create age group frequency counts
select: dropped 3 variables (USUBJID, SEX, AGE)
group_by: 2 grouping variables (ARM, AGECAT)
summarize: now 15 rows and 3 columns, one group variable remaining (ARM)
pivot_wider: reorganized (ARM, n) into (ARM A, ARM B, ARM C, ARM D) [was 15x3, now 4x5]
transmute: dropped one variable (AGECAT)
new variable 'var' (character) with one unique value and 0% NA
new variable 'label' (factor) with 4 unique values and 0% NA
converted 'ARM A' from integer to character (0 new NA)
converted 'ARM B' from integer to character (0 new NA)
converted 'ARM C' from integer to character (0 new NA)
converted 'ARM D' from integer to character (0 new NA)
# A tibble: 4 x 6
var label `ARM A` `ARM B` `ARM C` `ARM D`
<chr> <fct> <chr> <chr> <chr> <chr>
1 AGECAT 18 to 24 0 ( 0.0%) 1 ( 4.8%) 3 ( 14.3%) 1 ( 4.3%)
2 AGECAT 25 to 44 4 ( 20.0%) 8 ( 38.1%) 4 ( 19.0%) 7 ( 30.4%)
3 AGECAT 45 to 64 13 ( 65.0%) 7 ( 33.3%) 12 ( 57.1%) 12 ( 52.2%)
4 AGECAT >= 65 3 ( 15.0%) 5 ( 23.8%) 2 ( 9.5%) 3 ( 13.0%)
=========================================================================
Create frequency counts for SEX
=========================================================================
# A user-defined format: 4 conditions
Name Type Expression Label Order
1 x U is.na(x) Missing NA
2 x U x == "M" Male NA
3 x U x == "F" Female NA
4 x U TRUE Other NA
select: dropped 3 variables (USUBJID, AGE, AGECAT)
group_by: 2 grouping variables (ARM, SEX)
summarize: now 8 rows and 3 columns, one group variable remaining (ARM)
pivot_wider: reorganized (ARM, n) into (ARM A, ARM B, ARM C, ARM D) [was 8x3, now 2x5]
transmute: dropped one variable (SEX)
new variable 'var' (character) with one unique value and 0% NA
new variable 'label' (factor) with 2 unique values and 0% NA
converted 'ARM A' from integer to character (0 new NA)
converted 'ARM B' from integer to character (0 new NA)
converted 'ARM C' from integer to character (0 new NA)
converted 'ARM D' from integer to character (0 new NA)
mutate: converted 'label' from factor to character (0 new NA)
# A tibble: 2 x 6
var label `ARM A` `ARM B` `ARM C` `ARM D`
<chr> <chr> <chr> <chr> <chr> <chr>
1 SEX Male 15 ( 75.0%) 10 ( 47.6%) 12 ( 57.1%) 16 ( 69.6%)
2 SEX Female 5 ( 25.0%) 11 ( 52.4%) 9 ( 42.9%) 7 ( 30.4%)
Combine blocks into final data frame
# A tibble: 11 x 6
var label `ARM A` `ARM B` `ARM C` `ARM D`
<chr> <chr> <chr> <chr> <chr> <chr>
1 AGE N 20 21 21 23
2 AGE Mean (SD) 53.1 (11.9) 47.4 (16.3) 45.7 (14.4) 49.7 (14.3)
3 AGE Median 52.5 46.0 46.0 48.0
4 AGE Q1 - Q3 47.8 - 60.0 35.0 - 61.0 38.0 - 53.0 39.0 - 60.5
5 AGE Range 31 - 73 22 - 73 19 - 71 21 - 75
6 AGECAT 18 to 24 0 ( 0.0%) 1 ( 4.8%) 3 ( 14.3%) 1 ( 4.3%)
7 AGECAT 25 to 44 4 ( 20.0%) 8 ( 38.1%) 4 ( 19.0%) 7 ( 30.4%)
8 AGECAT 45 to 64 13 ( 65.0%) 7 ( 33.3%) 12 ( 57.1%) 12 ( 52.2%)
9 AGECAT >= 65 3 ( 15.0%) 5 ( 23.8%) 2 ( 9.5%) 3 ( 13.0%)
10 SEX Male 15 ( 75.0%) 10 ( 47.6%) 12 ( 57.1%) 16 ( 69.6%)
11 SEX Female 5 ( 25.0%) 11 ( 52.4%) 9 ( 42.9%) 7 ( 30.4%)
=========================================================================
Create and print report
=========================================================================
# A report specification: 1 pages
- file_path: 'C:\Users\dbosa\AppData\Local\Temp\RtmpKC93lu/output/example2.rtf'
- output_type: RTF
- units: inches
- orientation: landscape
- margins: top 1 bottom 1 left 1 right 1
- line size/count: 107/41
- page_header: left=Sponsor: Company right=Study: ABC
- title 1: 'Table 1.0'
- title 2: 'Analysis of Demographic Characteristics'
- title 3: 'Safety Population'
- footnote 1: 'Program: DM_Table.R'
- footnote 2: 'NOTE: Denominator based on number of non-missing responses.'
- page_footer: left=Date Produced: 27Jun21 14:35 center= right=Page [pg] of [tpg]
- content:
# A table specification:
- data: tibble 'final' 11 rows 6 cols
- show_cols: all
- use_attributes: all
- stub: var label 'Variable' width=2.5 align='left'
- define: var 'Variable' dedupe='TRUE'
- define: label 'Demographic Category'
- define: ARM A
- define: ARM B
- define: ARM C
- define: ARM D
lib_sync: synchronized data in library 'sdtm'
lib_unload: library 'sdtm' unloaded
=========================================================================
Log End Time: 2021-06-27 14:35:48
Log Elapsed Time: 0 00:00:00
=========================================================================
Next: Example 3: Figures