regex v r
^ = Beginning of Line or String
$ = End of Line or String
. = Matches Anything like a Joker Card (inc blank spaces)
\\. = Escape the period when we search on an actual period
\\d or [::digit::] = 0, 1, 2, 3, ...
\\w or [::word::] = a, b, c, ..., 1, 2, 3, ..., _
[A-Za-z] or [::alpha::] = A, B, C, ... a, b, c, ...
[aeiou] = a, e, i, o, u
\\s or [::space::] = " ", tabs or line breaks
\\D = match all except digits
\\W = match all except word char (inc numbers and underscore)
\\S = match all except spaces
[^A-Za-z] = match all except alphabet
\\w{2} = repeat w exactly twice
\\w{2, 3} = min repeat w twice, max repeat 3 thrice
\\w{2,} = min repeat w twice, no max
\\w+ = 1 or more repetitions
\\w* = 0, 1 or more repetitions
Examples:
1) Find all digits and spaces
stringr::str_match_all("chr", "[\\d\\s]")
regex v R
x <- "1888 is the longest year in Roman numerals: MDCCCLXXXVIII"
str_extract(x, "CC?")
#> [1] "CC"
str_extract(x, "CC+")
#> [1] "CCC"
str_extract(x, 'C[LX]+')
#> [1] "CLXXX"