max.N <- 20
atime.list <- atime::atime(
N=1:max.N,
setup={
subject <- paste(rep("a", N), collapse="")
pattern <- paste(rep(c("(a)?", "\\1"), each=N), collapse="")
},
ICU=stringi::stri_match(subject, regex=pattern),
PCRE=regexpr(pattern, subject, perl=TRUE),
TRE=regexpr(pattern, subject, perl=FALSE)
)
plot(atime.list)
if(require(ggplot2)){
gg <- ggplot()+
geom_ribbon(aes(
N, ymin=min, ymax=max, fill=expr.name),
data=atime.list$meas,
alpha=0.5)+
geom_line(aes(
N, median, color=expr.name),
data=atime.list$meas)+
scale_x_log10()+
scale_y_log10("seconds (median line, min/max band)")
if(require("directlabels")){
direct.label(gg, "top.polygons")
}else{
gg
}
}
The plots above show that ICU/PCRE/TRE are all exponential in N (subject/pattern size) when the pattern contains backreferences.
atime.list <- atime::atime(
ICU=stringi::stri_match(subject, regex=pattern),
RE2=re2::re2_match(subject, pattern),
PCRE=regexpr(pattern, subject, perl=TRUE),
TRE=regexpr(pattern, subject, perl=FALSE),
setup={
subject <- paste(rep("a", N), collapse="")
pattern <- paste(rep(c("a?", "a"), each=N), collapse="")
},
N=1:25,
times=3)
plot(atime.list)
best.list <- atime::references_best(atime.list)
if(require(ggplot2)){
gg <- ggplot()+
geom_ribbon(aes(
N, ymin=min, ymax=max, fill=expr.name),
data=best.list$meas[unit=="seconds"],
alpha=0.5)+
geom_line(aes(
N, empirical, color=expr.name),
data=best.list$meas)+
facet_grid(unit ~ ., scales="free")+
theme(legend.position="none")+
scale_x_log10()+
scale_y_log10("median line, min/max band")
if(require("directlabels")){
gg+
geom_dl(aes(
N, empirical, color=expr.name, label=expr.class),
method="right.polygons",
data=best.list$meas)+
coord_cartesian(xlim=c(1,30))
}else{
gg
}
}
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis
The plots above show that ICU/PCRE are exponential time whereas
RE2/TRE are polynomial time. Exercise for the reader: modify the above
code to use the seconds.limit
argument so that you can see what
happens to ICU/PCRE for larger N (hint: you should see a difference at
larger sizes).