rflashtext can be used to find and replace words in a given text with only one pass over the document.
It’s a pure R implementation of the FlashText algorithm and it’s inspired on the python library flashtext.
You can install the released version of rflashtext from CRAN with:
install.packages("rflashtext")
And the development version from GitHub with:
# install.packages("devtools")
::install_github("AbrJA/rflashtext") devtools
This is a basic example which shows you how to use the API:
library(rflashtext)
<- keyword_processor$new(ignore_case = FALSE, word_chars = c(letters, LETTERS))
processor $show_attrs(attrs = "dict_size")
processor#> $dict_size
#> [1] 0
$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$show_attrs(attrs = c("dict", "dict_size"))
processor#> $dict
#> $dict$`_class_`
#> [1] "keyword_dictionary"
#>
#> $dict$N
#> $dict$N$Y
#> $dict$N$Y$`_word_`
#> [1] "New York"
#>
#>
#>
#> $dict$L
#> $dict$L$A
#> $dict$L$A$`_word_`
#> [1] "Los Angeles"
#>
#>
#>
#>
#> $dict_size
#> [1] 2
<- processor$find_keys(sentence = "I live in LA and I like NY")
words_found
words_found#> [[1]]
#> [[1]]$word
#> [1] "Los Angeles"
#>
#> [[1]]$start
#> [1] 11
#>
#> [[1]]$end
#> [1] 13
#>
#>
#> [[2]]
#> [[2]]$word
#> [1] "New York"
#>
#> [[2]]$start
#> [1] 25
#>
#> [[2]]$end
#> [1] 26
do.call(rbind, words_found)
#> word start end
#> [1,] "Los Angeles" 11 13
#> [2,] "New York" 25 26
$replace_keys(sentence = "I live in LA and I like NY")
processor#> [1] "I live in Los Angeles and I like New York"