Mastering Regular Expressions
Full Index
(use your browser's find function to search)
\(...\) 137
\<...\> 21, 25, 50, 133-134, 150
\<...\>, egrep 15
\<...\>, Emacs 101
\<...\>, mimicking in Perl 341-342
\+ 141
\\\\ 190, 380, 444
\? 141
`\+' history 87
\0 117-118
\1 138, 300, 303
\1, Perl 41
\A 112, 129-130
\A, optimization 246
\a 115-116
\B 134
\b 65, 115-116, 134
\b, backspace and word boundary 44, 46
\b, Java 368
\b, Perl 286
\b, PHP 442
\b\B 240
\C 120
\C, PHP 442
\D 49, 120
\d 49, 120
\d, Perl 288
\d, PHP 442
\E 290
\E, Java 368, 395, 403
\e 79, 115-116
\f 115-116
\f, introduced 44
\G 130-133, 212, 315-316, 362, 447
\G, advanced example 132, 399
\G, .NET 408
\G, optimization 246
\G, optimization, \kname (see named capture)
\l 290
\L...\E 290
\L...\E, inhibiting 292
\n 49, 115-116
\n, introduced 44
\n, machine-dependency 115
\N{LATIN SMALL LETTER SHARP S} 290
\N{name} 290
\N{name}, inhibiting 292
\p{^...} 288
\p{...} 121, 288
\p{...}, Java 368-369, 402-403
\p{...}, Perl 125
\p{All} 125
\p{All}, Perl 288
\p{all} 369
\p{Any} 125, 442
\p{Any}, Perl 288
\p{Arrows} 124
\p{Assigned} 125-126
\p{Assigned}, Perl 288
\p{Basic_Latin} 124
\p{Box_Drawing} 124
\p{C} 122
\p{C}, Java 369
\p{Cc} 123
\p{Cf} 123
\p{Cherokee} 122
\p{Close_Punctuation} 123
\p{Cn} 123, 125-126, 369, 408
\p{Cn}, Java 369
\p{Co} 123
\p{Connector_Punctuation} 123
\p{Control} 123
\p{Currency} 124
\p{Currency_Symbol} 123
\p{Cyrillic} 122, 124
\p{Dash_Punctuation} 123
\p{Decimal_Digit_Number} 123
\p{Dingbats} 124
\p{Enclosing_Mark} 123
\p{Final_Punctuation} 123
\p{Format} 123
\p{Gujarati} 122
\p{Han} 122
\p{Hangul_Jamo} 124
\p{Hebrew} 122, 124
\p{Hiragana} 122
\p{InArrows} 124
\p{InBasic_Latin} 124
\p{InBox_Drawing} 124
\p{InCurrency} 124
\p{InCyrillic} 124
\p{InDingbats} 124
\p{InHangul_Jamo} 124
\p{InHebrew} 124
\p{Inherited} 122
\p{Initial_Punctuation} 123
\p{InKatakana} 124
\p{InTamil} 124
\p{InTibetan} 124
\p{IsCherokee} 122
\p{IsCommon} 122
\p{IsCyrillic} 122
\p{IsGujarati} 122
\p{IsHan} 122
\p{IsHebrew} 122
\p{IsHiragana} 122
\p{IsKatakana} 122
\p{IsLatin} 122
\p{IsThai} 122
\p{IsTibetan} 124
\p{javaJavaIdentifierStart} 369
\p{Katakana} 122, 124
\pL PHP 442
\p{L} 121-122, 133, 368, 395
\p{L&} 122-123, 125, 442
\p{L&}, Java 369
\p{L&}, Perl 288
\p{Latin} 122
\p{Letter} 122, 288
\p{Letter_Number} 123
\p{Line_Separator} 123
\p{Ll} 123, 406
\p{Lm} 123, 406
\p{Lo} 123, 406
\p{Lowercase_Letter} 123
\p{Lt} 123, 406
\p{Lu} 123, 406
\p{M} 120, 122
\p{Mark} 122
\p{Math_Symbol} 123
\p{Mc} 123
\p{Me} 123
\p{Mn} 123
\p{Modifier_Letter} 123
\p{Modifier_Symbol} 123
\p{N} 122, 395
\pN PHP 442
\p{Nd} 123, 368, 406
\p{Nl} 123
\p{No} 123
\p{Non_Spacing_Mark} 123
\p{Number} 122
\p{Open_Punctuation} 123
\p{Other} 122
\p{Other_Letter} 123
\p{Other_Number} 123
\p{Other_Punctuation} 123
\p{Other_Symbol} 123
\p{P} 122
\p{Paragraph_Separator} 123
\p{Pc} 123, 406
\p{Pd} 123
\p{Pe} 123
\p{Pf} 123
\p{Pf}, Java 369
\p{Pi} 123
\p{Pi}, Java 369
\p{Po} 123
\p{Private_Use} 123
\p{Ps} 123
\p{Punctuation} 122
\p{S} 122
\p{Sc} 123-124
\p{Separator} 122
\p{Sk} 123
\p{Sm} 123
\p{So} 123
\p{Space_Separator} 123
\p{Spacing_Combining_Mark} 123
\p{Symbol} 122
\p{Tamil} 124
\p{Thai} 122
\p{Tibetan} 124
\p{Titlecase_Letter} 123
\p{Unassigned} 123, 125
\p{Unassigned}, Perl 288
\p{Uppercase_Letter} 123
\p{Z} 121-122, 368, 407
\pZ PHP 442
\p{Zl} 123
\p{Zp} 123
\p{Zs} 123
\Q Java 368, 395, 403
\Q...\E 290
\Q...\E, inhibiting 292
\r 49, 115-116
\r, machine-dependency 115
\S 49, 56, 121
\s 49, 121
\s, Emacs 128
\s, introduction 47
\s, Perl 288
\s, PHP 442
\t 49, 115-116
\t, introduced 44
\U 117
\u 117, 290, 406
\U...\E 290
\U...\E, inhibiting 292
\v 115-116, 364
\V 364
\w 49, 65, 120
\w, Emacs 129
\w, Java 368
\w, many different interpretations 93
\w, Perl 288
\w, PHP 442
\W 49, 121
\x 117, 406
\x, Perl 286
\X 108, 120
\Z 112, 129-130
\Z, Java 370
\Z, optimization 246
\z 112, 129-130, 316, 447
\z, optimization 246
\z, PHP 442
// 322
/c 131-132, 315
/e 319-321
/g 61, 132, 307, 311-312, 315, 319
/g, introduced 51
/g, with regex object 354
/i 135
/i, introduced 47
/i, with study 359
/m 135
/o 352-353
/o, with regex object 354
/osmosis 293
/s 135
/x 135, 288
/x, history 90
/x, introduced 72
-Dr 363
-i as -y 86
-y old grep 86
<> 54
<>, and $_ 79
<br/> 481
!~ 309
$& 299-300
$&, checking for 358
$&, mimicking 302, 357
$&, naughty 356
$&, .NET 424
$&, OK for debugging 331
$&, pre-match copy 355
$+ 300-301, 345
$+, example 202
$+, .NET 202, 424
$` 300
$`, checking for 358
$`, mimicking 357
$`, naughty 356
$`, .NET 424
$`, OK for debugging 331
$`, pre-match copy 355
$* 362
$/ 35, 78
$/, Perl 35
$' 300
$', checking for 358
$', mimicking 357
$', naughty 356
$', .NET 424
$', OK for debugging 331
$', pre-match copy 355
$$ .NET 424
$ 112-113, 130, 447
$, escaping 77
$, Java 370
$, optimization 246
$, Perl interpolation 289
$, PHP 442
$_ 79, 308, 311, 314, 318, 322, 353-354, 359
$_, .NET 424
$_, .NET, $+[0] (see @+)
$0 300
$0, Java 380
$0, PHP 459
$0, PHP, $-[0] (see @-)
${0} 459
$1 137-138, 300, 303
$1, introduced 41
$1, Java 380
$1, .NET 424
$1, in other languages 138
$1, pre-match copy 355
$all_matches 455
$ARGV 79
$HostnameRegex 76, 137, 303, 351
$HttpUrl 303, 305, 345, 351
$LevelN 330, 343
$matches 450
$^N 300-301, 344-346
${name} 409
${name~} 424
$NestedStuffRegex 339, 346
$^R 302, 327
$^W 297
% Perl interpolation 289
(?!) 241, 333, 335, 340-341
(?#...) 99, 136, 420
(?1), Java 402
(?1), PCRE 476
(?1), PHP 476
(?1) PHP 482
[(?-i)] 446
[(?i)] 446
(?n) 408
(?P<...>) 451-452, 457
(?R) 475
(?R), PCRE 475
(?R), PHP 475
(?R), PHP, (?s) (see: dot-matches-all mode; mode modifier)
(?R), PHP, (?s:...) (see mode-modified span)
(?R), PHP, (?x:...) (see mode-modified span)
(?R), PHP, (?x) (see: comments and free-spacing mode; mode modifier)
(?R), PHP, *+ (see possessive quantifiers)
(?R), PHP, * (see star)
(?R), PHP, + (see plus)
++ 483
++, (see also possessive quantifiers), ".*" (see double-quoted string example)
.*, introduced 55
.*, mechanics of matching 152
.*, optimization 246
.*, warning about 56
.NET xvii, 405-438
.NET, $+ 202
.NET, after-match data 138
.NET, benchmarking 237
.NET, character-class subtraction 406
.NET, code example 219
.NET, flavor overview 92
.NET, JIT 410
.NET, line anchors 130
.NET, literal-text mode 136
.NET, MISL 410
.NET, object model 417
.NET, \p{...} 125
.NET, regex approach 96-97
.NET, regex flavor 407
.NET, search and replace 414, 423-424
.NET, URL example 204
.NET, version covered 405
.NET, word boundaries 134
=~ 308-309, 318
=~, introduced 38
=~, introduced, ? (see question mark)
?...? 308
@"..." 103
@- 300, 302, 339
@+ 300, 302, 314
@ Perl interpolation 289
[=...=] 128
[:...:] 127
[:<:] 91
[.....] 128
^ 112-113, 130
^, Java 370
^, optimization 246
^Subject: example 94, 151-152, 154, 242-243, 245, 289
^Subject: example, Java 95
^Subject: example, .NET 96
^Subject: example, Perl 55
^Subject: example, Perl debugger 361
^Subject: example, PHP 97
^Subject: example, Python 97
{min,max} 20, 141
$+ .NET 202
\0 117-118
$0 300
$0, Java 380
$0, PHP 459
(?1), Java 402
(?1), PCRE 476
(?1), PHP 476
\1 138, 300, 303
\1, Perl 41
$1 137-138, 300, 303
$1, introduced 41
$1, Java 380
$1, .NET 424
$1, in other languages 138
$1, pre-match copy 355
(?1) PHP 482
8859-1 encoding 29, 87, 106, 108, 123
\a 115-116
@ escaping 77
\A 112, 129-130
\A, optimization 246
after-match data, Java 138
after-match data, .NET 138
after-match data, PHP 138
after-match variables, Perl 299
after-match variables, pre-match copy 355
Aho, Alfred 86, 180
\p{all} 369
\p{All} 125
\p{All}, Perl 288
$all_matches 455
$all_matches, collated 455
$all_matches, vs. $matches 454
$all_matches, stacked 456
alternation 139-140
alternation, and backtracking 231
alternation, efficiency 222, 231
alternation, greedy 174-175
alternation, hand tweaking 261
alternation, introduced 13-14
alternation, order of 175-177, 223, 260, 482
alternation, order of, for correctness 28, 189, 197
alternation, order of, for efficiency 224
alternation, and parentheses 13
analogy, backtracking, bread crumbs 158-159
analogy, backtracking, stacking dishes 159
analogy, ball rolling 262
analogy, building a car 31
analogy, charging batteries 179
analogy, engines 143-147
analogy, first come, first served 153
analogy, gas additive 150
analogy, learning regexes, Pascal 36
analogy, learning regexes, playing rummy 33
analogy, regex as a language 5, 27
analogy, regex as filename patterns 4
analogy, regex as filename patterns, regex-directed match (see NFA)
analogy, regex as filename patterns, text-directed match (see DFA)
analogy, transmission 148-149, 228
analogy, transparencies (Perl's local) 298
analogy, transparencies (Perl's local), anchor (see also: word boundaries; enhanced line-anchor mode)
analogy, caret 129
analogy, dollar 129
analogy, end-of-line optimization 246
analogy, exposing 256
analogy, line 87, 112-113, 150
analogy, overview 129
anchored(...) 362
anchored `string' 362
anchoring bounds 388
anchoring bounds, Java 388
AND class set operations 125-126
ANSI escape sequences 79
\p{Any} 125, 442
\p{Any}, Perl 288
\p{Any}, Perl, any character (see dot)
appendReplacement method 380
appendTail method 381
$ARGV 79
\p{Arrows} 124
ASCII encoding 29, 106-107, 115, 123
Asian character encoding 29
AssemblyName 435
\p{Assigned} 125-126
\p{Assigned}, Perl 288
\p{Assigned}, Perl, asterisk (see star)
\p{Assigned}, Perl, atomic grouping (see also possessive quantifiers)
\p{Assigned}, details 170-172
\p{Assigned}, for efficiency 171-172, 259-260, 268-270
\p{Assigned}, essence 170-171
\p{Assigned}, introduced 139
atomic grouping example 198, 201, 213, 271, 330, 340-341, 346
AT&T Bell Labs 86
author email xxiii
auto-lookaheadification 410
automatic possessification 251
awk, after-match data 138
awk, gensub 182
awk, history 87
awk, search and replace 100
awk, version covered 91
awk, word boundaries 134
\b 65, 115-116, 134
\b, backspace and word boundary 44, 46
\b, Java 368
\b, Perl 286
\b, PHP 442
\B 134
\b\B 240
<B>...</B> 165-167
<B>...</B>, unrolling 270
backreferences 118, 137
backreferences, DFA 150, 182
backreferences, introduced with egrep 20-22
backreferences, vs. octal escape 412-413
backreferences, remembering text 21
backreferences, remembering text, backspace (see \b)
backtracking 163-177
backtracking, and alternation 231
backtracking, avoiding 171-172
backtracking, computing count 227
backtracking, counting 222, 224
backtracking, detecting excessive 249-250
backtracking, efficiency 179-180
backtracking, essence 168-169
backtracking, exponential match 226
backtracking, global view 228-232
backtracking, introduction 157-163
backtracking, LIFO 159
backtracking, of lookaround 173-174
backtracking, neverending match 226
backtracking, non-match example 160-161
backtracking, POSIX NFA example 229
backtracking, saved states 159
backtracking, simple example 160
backtracking, simple lazy example 161
balanced constructs 328-331, 340-341, 436, 475-478, 481
balancing regex issues 186
Barwise, J. 85
base character 107, 120
Basic Regular Expressions 87-88
\p{Basic_Latin} 124
\b\B 240
benchmarking 232-239
benchmarking, comparative 249
benchmarking, compile caching 351
benchmarking, Java 235-236
benchmarking, for naughty variables 358
benchmarking, .NET 237, 410
benchmarking, with neverending match 227
benchmarking, Perl 360
benchmarking, PHP 234-235
benchmarking, pre-match copy 356
benchmarking, Python 238-239
benchmarking, Ruby 238
benchmarking, Tcl 239
Berkeley 86
Better-Late-Than-Never 236
<B>...</B> 165-167
<B>...</B>, unrolling 270
blocks 124, 288, 369, 402, 407
BLTN 236
BLTN, Java 236
BOL 362
\p{Box_Drawing} 124
Boyer-Moore 245, 247
bracket expressions 127
BRE 87-88
bread-crumb analogy 158-159
<br/> 481
bugs Java 365, 368-369, 387, 392, 399, 403
Bulletin of Math. Biophysics 85
bump-along, avoiding 210
bump-along, distrusting 215-218
bump-along, introduction 148-149
bump-along, optimization 255
bump-along, in overall processing 242
Byington, Ryan xxiv
byte matching 120, 442, 452-453, 456
¢ 124
\p{C} 122
\p{C}, Java 369
\C 120
\C, PHP 442
\C, PHP, C# (see also .NET)
\C, strings 103
/c 131-132, 315
C comments, matching 272-276
C comments, unrolling 275-276
caching 242-245
caching, benchmarking 351
caching, compile 242-245
caching, Emacs 244
caching, integrated 243
caching, Java 478
caching, .NET 432
caching, object-oriented 244
caching, Perl 350-352
caching, PHP 478
caching, procedural 244
caching, Tcl 244
caching, unconditional 350
callback PHP 463, 465
Capture 437
CaptureCollection 438
capturing parentheses Java 377
car analogy 83-84
caret anchor introduced 8
carriage return 109, 370
case title 110
case folding 290, 292
case folding, inhibiting 292
case-insensitive mode 110
case-insensitive mode, egrep 14-15
case-insensitive mode, /i 47
case-insensitive mode, introduced 14-15
case-insensitive mode, Ruby 110
case-insensitive mode, with study 359
cast 294-295
\p{Cc} 123
CDATA 483
\p{Cf} 123
chaining (of methods) 389
character, base 120
character, classes xvii
character, combining 107, 120
character, combining, Inherited script 122
character, vs. combining characters 107
character, control 117
character, initial character discrimination 245-248, 252, 257-259, 332, 361
character, machine-dependent codes 115
character, multiple code points 108
character, as opposed to byte 29
character, separating with split 322
character, shorthands 115-116
character class 118
character class, vs. alternation 13
character class, vs. dot 119
character class, elimination optimization 248
character class, introduced 9-10
character class, and lazy quantifiers 167
character class, mechanics of matching 149
character class, negated, must match character 11-12
character class, negated, and newline 119
character class, negated, Tcl 112
character class, positive assertion 119
character class, of POSIX bracket expression 127
character class, range 9, 119
character class, as separate language 10
character class, set operations 125-127
character class, subtraction 406
character class, subtraction (set) 126
character class, subtraction (simple) 125
character equivalent 128
character-class subtraction .NET 406
CharBuffer 373, 376, 387
charnames pragma 290
CharSequence 365, 373, 382, 397
CheckNaughtiness 358
\p{Cherokee} 122
Chinese text processing 29
chr 420
chunk limit, Java 396
chunk limit, Perl 323
chunk limit, PHP 466
CJKV Information Processing 29
class xvii
class, initial class discrimination 245-248, 252, 257-259, 332, 361
Click, Cliff xxiv
client VM 236
clock clicks 239
\p{Close_Punctuation} 123
closures 339
\p{Cn} 123, 125-126, 369, 408
\p{Cn}, Java 369
\p{Co} 123
code example, Java 81, 209, 217, 235, 371, 375, 378-379, 381-384, 389
code example, .NET 219
code point, beyond U+FFFF 109
code point, introduced 107
code point, multiple 108
code point, unassigned in block 124
coerce 294-295
cold VM 236
collated data 455
collating sequences 128
combining character 107, 120
combining character, Inherited script 122
commafying a number example 64-65
commafying a number example, introduced 59
commafying a number example, without lookbehind 67
COMMAND.COM 7
comments 99, 136
comments, Java 98
comments, matching of C comments 272-276
comments, matching of Pascal comments 265
comments, .NET 420
comments, XML 483
comments and free-spacing mode 111
Communications of the ACM 85
Compilation failed 474
compile, caching 242-245
compile, once (/o) 352-353
compile, on-demand 351
compile, regex 410-411
compile method 372
Compiled (.NET) 237, 408, 410, 420, 427-428, 435
Compilers -- Principles, Techniques, and Tools 180
CompileToAssembly 433, 435
conditional 140-141
conditional, with embedded regex 327, 335
conditional, mimicking with lookaround 140
conditional, .NET 409-410
Config module 290, 299
conflicting metacharacters 44-46
\p{Connector_Punctuation} 123
Constable, Robert 85
Constable, Robert, contorting, Perl 294
Constable, Robert, forcing 310
Constable, Robert, metacharacters 44-46
Constable, Robert, regex use 189
continuation lines 178, 186-187
continuation lines, unrolling 270-271
contorting an expression 294-295
\p{Control} 123
control characters 117
Conway, Damian 339
cooking for HTML 68, 414
correctness vs. efficiency 223-224
www.cpan.org 358
CR 109, 370
create_function 463, 465
CR/LF 370
Cruise, Tom 51
crummy analogy 158-159
CSV parsing example, Java 217, 401
CSV parsing example, .NET 435
CSV parsing example, Perl 213-219
CSV parsing example, PHP 480
CSV parsing example, unrolling 271
CSV parsing example, VB.NET 219
\p{Currency} 124
currency, \p{Currency} 124
currency, \p{Currency_Symbol} 123
currency, \p{Sc} 123
currency, Unicode block 123-124
\p{Currency_Symbol} 123
current location Java 374, 383, 398, 400
currentTimeMillis() 236
\p{Cyrillic} 122, 124
\D 49, 120
\d 49, 120
\d, Perl 288
\d, PHP 442
Darth 197
dash in character class 9
\p{Dash_Punctuation} 123
date_default_timezone_set 235
DBIx::DWIW 258
debugcolor 363
debugging 361-363
debugging, with embedded code 331-332
debugging, regex objects 305-306
debugging, run-time 362
\p{Decimal_Digit_Number} 123
default regex 308
define-key 101
delegate 423-424
delimited text 196-198
delimited text, standard formula 196, 273
delimiter, with shell 7
delimiter, with substitution 319
delimiters PHP 445, 448
description Java 365
Devel::FindAmpersand 358
Devel::SawAmpersand 358
DFA, acronym spelled out 156
DFA, backreferences 150, 182
DFA, boring 157
DFA, compared with NFA 224, 227
DFA, efficiency 179
DFA, implementation ease 183
DFA, introduced 145, 155
DFA, lazy evaluation 181
DFA, longest-leftmost match 177-179
DFA, testing for 146-147
dialytika 108
\p{Dingbats} 124
dish-stacking analogy 159
dollar for Perl variable 37
dollar anchor 129
dollar anchor, introduced 8
dollar value example 24-25, 51-52, 167-170, 175, 194-195
DOS 7
dot 119
dot, vs. character class 119
dot, introduced 11-12
dot, Java 370
dot, mechanics of matching 149
dot, Tcl 113
dot 370
dot modes Java 111, 370
.NET xvii, 405-438
.NET, $+ 202
.NET, after-match data 138
.NET, benchmarking 237
.NET, character-class subtraction 406
.NET, code example 219
.NET, flavor overview 92
.NET, JIT 410
.NET, line anchors 130
.NET, literal-text mode 136
.NET, MISL 410
.NET, object model 417
.NET, \p{...} 125
.NET, regex approach 96-97
.NET, regex flavor 407
.NET, search and replace 414, 423-424
.NET, URL example 204
.NET, version covered 405
.NET, word boundaries 134
dot-matches-all mode 111-112
double-quoted string example, allowing escaped quotes 196
double-quoted string example, egrep 24
double-quoted string example, final regex 264
double-quoted string example, makudonarudo 165, 169, 228-232, 264
double-quoted string example, sobering example 222-228
double-quoted string example, unrolled 262, 268
double-word finder example 81
double-word finder example, description 1
double-word finder example, egrep 22
double-word finder example, Emacs 101
double-word finder example, Java 81
double-word finder example, Perl 35, 77-80
-Dr 363
dragon book 180
DWIW (DBIx) 258
dynamic regex 327-331
dynamic regex, sanitizing 337
dynamic scope 295-299
dynamic scope, vs. lexical scope 299
\E 290
\E, Java 368, 395, 403
\e 79, 115-116
/e 319-321
earliest match wins 148-149
EBCDIC 29
ECMAScript (.NET) 406, 408, 412-413, 421, 427
ed 85
ed, and backtracking 179-180
ed, correctness 223-224
ed, Perl 347-363
ed, Perl-specific issues 347-363
ed, PHP 478-480
ed, regex objects 353-354
ed, unlimited lookbehind 134
egrep, after-match data 138
egrep, backreference support 150
egrep, case-insensitive match 15
egrep, doubled-word solution 22
egrep, example use 14
egrep, flavor overview 92
egrep, flavor summary 32
egrep, history 86-87
egrep, introduced 6-8
egrep, metacharacter discussion 8-22
egrep, regex implementation 183
egrep, version covered 91
egrep, word boundaries 134
electric engine analogy 143-147
Emacs, after-match data 138
Emacs, control characters 117
Emacs, flavor overview 92
Emacs, re-search-forward 101
Emacs, search 100
Emacs, strings as regexes 101
Emacs, syntax class 128
Emacs, version covered 91
Emacs, word boundaries 134
email of author xxiii
email address example 70-73, 98
email address example, Java 98
email address example, .NET 99
embedded code, local 336
embedded code, my 338-339
embedded code, regex construct 327, 331-335
embedded code, sanitizing 337
embedded string check optimization 247, 257
Embodiments of Mind 85
Empty 433
empty-element tag 481
\p{Enclosing_Mark} 123
\p{Enclosing_Mark}, ASCII 29, 106-107, 115, 123
\p{Enclosing_Mark}, introduced 29
\p{Enclosing_Mark}, issues overview 105
\p{Enclosing_Mark}, Latin-1 29, 87, 106, 108, 123
\p{Enclosing_Mark}, UCS-2 107
\p{Enclosing_Mark}, UCS-4 107
\p{Enclosing_Mark}, UTF-16 107
\p{Enclosing_Mark}, UTF-8 107, 442, 447
END block 358
end method 377
end-of-string anchor optimization 246
engine, analogy 143-147
engine, hybrid 182, 239, 243
engine, implementation ease 183
engine, introduced 27
engine, testing type 146-147
engine, testing type, with neverending match 227
engine, type comparison 156-157, 180-183
English module 357
English vs. regex 275
enhanced line-anchor mode 112-113
enhanced line-anchor mode, introduced 69
ERE 87-88
ereg suite 439
errata xxiii
Escape 432
escape, introduced 22
escape, term defined 27
essence, atomic grouping 170-171
essence, greediness, laziness, and backtracking 168-169
essence, greediness, laziness, and backtracking, NFA (see backtracking)
eval 319
example, atomic grouping 198, 201, 213, 271, 330, 340-341, 346
example, commafying a number 64-65
example, commafying a number, introduced 59
example, commafying a number, without lookbehind 67
example, CSV parsing, Java 217, 401
example, CSV parsing, .NET 435
example, CSV parsing, Perl 213-219
example, CSV parsing, PHP 480
example, CSV parsing, unrolling 271
example, CSV parsing, VB.NET 219
example, dollar value 24-25, 51-52, 167-170, 175, 194-195
example, double-quoted string, allowing escaped quotes 196
example, double-quoted string, egrep 24
example, double-quoted string, final regex 264
example, double-quoted string, makudonarudo 165, 169, 228-232, 264
example, double-quoted string, sobering example 222-228
example, double-quoted string, unrolled 262, 268
example, double-word finder 81
example, double-word finder, description 1
example, double-word finder, egrep 22
example, double-word finder, Emacs 101
example, double-word finder, Java 81
example, double-word finder, Perl 35, 77-80
example, email address 70-73, 98
example, email address, Java 98
example, email address, .NET 99
example, filename 190-192, 444
example, five modifiers 316
example, floating-point number 194
example, form letter 50-51
example, gr[ea]y 9
example, hostname 22, 73, 76, 98-99, 137-138, 203, 260, 267-268, 304, 306, 450-451
example, hostname, egrep 25
example, hostname, Java 209
example, hostname, plucking from text 71-73, 206-208
example, hostname, in URL 74-77
example, hostname, validating 203-205
example, hostname, VB.NET 204
example, HREF 452
example, HTML 443-444, 459, 461, 464, 481, 484
example, HTML, conversion from text 67-77
example, HTML, cooking 68, 414
example, HTML, encoding 414
example, HTML, <HR> 194
example, HTML, link 201-203
example, HTML, optional 140
example, HTML, paired tags 165
example, HTML, parsing 132, 315, 321, 399
example, HTML, tag 9, 18-19, 26, 200-201, 326, 357
example, HTML, URL 74-77, 203, 206-208, 303, 450-451
example, HTML, URL-encoding 320
example, HTTP response 467
example, image tags 397
example, IP 5, 187-189, 267-268, 311, 314, 348-349
example, Jeffs 61-64
example, lookahead 61-64
example, mail processing 53-59
example, makudonarudo 165, 169, 228-232, 264
example, pathname 190-192
example, population 59
example, possessive quantifiers 198, 201
example, postal code 209-212
example, regex overloading 341-345
example, stock pricing 51-52, 167-168
example, stock pricing, with alternation 175
example, stock pricing, with atomic grouping 170
example, stock pricing, with possessive quantifier 169
example, temperature conversion, Java 382
example, temperature conversion, .NET 425
example, temperature conversion, Perl 37, 283
example, temperature conversion, PHP 444
example, text-to-HTML 67-77
example, this|that 133, 139, 243, 245-247, 252, 255, 260-261
example, unrolling the loop 270-271, 477
example, URL 74-77, 201-204, 208, 260, 303-304, 306, 320, 450-451
example, URL, egrep 25
example, URL, Java 209
example, URL, plucking 206-208
example, username 73, 76, 98
example, username, plucking from text 71-73
example, username, in URL 74-77
example, variable names 24
example, XML 481-484
example, ZIP code 209-212
exception, IllegalArgumentException 373, 380
exception, IllegalStateException 376-377
exception, IndexOutOfBoundsException 375-376, 380
exception, IOException 81
exception, PatternSyntaxException 371, 373
Explicit (Option) 415
ExplicitCapture (.NET) 408, 420, 427
exponential match 222-228, 330, 340
exponential match, avoiding 264-266
exponential match, discovery 226-228
exponential match, explanation 226-228
exponential match, non-determinism 264
exponential match, short-circuiting 250
exponential match, solving with atomic grouping 268
exponential match, solving with possessive quantifiers 268
expose literal text 255
expression, context 294-295
expression, contorting 294-295
Extended Regular Expressions 87-88
\f 115-116
\f, introduced 44
\f, introduced, Fahrenheit (see temperature conversion example)
failure, atomic grouping 171-172
failure, forcing 241, 333, 335, 340-341
FF 109, 370
file globs 4
file-check example 2, 36
filename, patterns (globs) 4
filename, prepending to line 79
filename example 190-192, 444
Filo, David 397
\p{Final_Punctuation} 123
find method 375
find method, region 384
FindAmpersand 358
Fite, Liz 33
five modifiers example 316
flags method 394
flavor, Perl 286-293
flavor, superficial chart, general 92
flavor, superficial chart, Java 367
flavor, superficial chart, .NET 407
flavor, superficial chart, PCRE 441
flavor, superficial chart, Perl 285, 287
flavor, superficial chart, PHP 441
flavor, superficial chart, POSIX 88
flavor, term defined 27
flex version covered 91
floating `string' 362
floating-point number example 194
forcing failure 241, 333, 335, 340-341
foreach vs. while vs. if 320
form letter example 50-51
\p{Format} 123
freeflowing regex 277-281
Friedl, Alfred 176
Friedl, brothers 33
Friedl, Fumie v, xxiv
Friedl, Fumie, birthday 11-12
Friedl, Jeffrey xxiii
Friedl, Stephen xxiv, 458
fully qualified name 295
functions related to regexes in Perl 285
\G 130-133, 212, 315-316, 362, 447
\G, advanced example 132, 399
\G, .NET 408
\G, optimization 246
/g 61, 132, 307, 311-312, 315, 319
/g, introduced 51
/g, with regex object 354
garbage collection Java benchmarking 236
gas engine analogy 143-147
gensub 182
George, Kit xxiv
GetGroupNames 427-428
GetGroupNumbers 427-428
gettimeofday 234
Gill, Stuart xxiv
global vs. private Perl variables 295
globs filename 4
GNU awk, after-match data 138
GNU awk, gensub 182
GNU awk, version covered 91
GNU awk, word boundaries 134
GNU egrep, after-match data 138
GNU egrep, backreference support 150
GNU egrep, doubled-word solution 22
GNU egrep, -i bug 21
GNU egrep, regex implementation 183
GNU egrep, word boundaries 134
GNU egrep, word boundaries, GNU Emacs (see Emacs)
GNU grep, shortest-leftmost match 182
GNU grep, version covered 91
GNU sed, after-match data 138
GNU sed, version covered 91
GNU sed, word boundaries 134
Gosling, James 89
GPOS 362
Greant, Zak xxiv
greatest weakness Perl 286
gr[ea]y example 9
gr[ea]y example, alternation 174-175
gr[ea]y example, and backtracking 162-177
gr[ea]y example, deference to an overall match 153, 274
gr[ea]y example, essence 159, 168-169
gr[ea]y example, favors match 167-168
gr[ea]y example, first come, first served 153
gr[ea]y example, global vs. local 182
gr[ea]y example, introduced 151
gr[ea]y example, vs. lazy 169, 256-257
gr[ea]y example, localizing 225-226
gr[ea]y example, quantifier 141
gr[ea]y example, swapping 447
gr[ea]y example, too greedy 152
green dragon 180
grep Perl 324
grep, as an acronym 85
grep, flavor overview 92
grep, history 86
grep, regex flavor 86
grep, version covered 91
grep, -y option 86
group method 377
Group object (.NET) 418
Group object (.NET), Capture 437
Group object (.NET), creating 429
Group object (.NET), Index 430
Group object (.NET), Length 430
Group object (.NET), Success 430
Group object (.NET), ToString 430
Group object (.NET), using 430
Group object (.NET), Value 430
GroupCollection 429, 438
groupCount method 377
grouping and capturing 20-22
GroupNameFromNumber 427-428
GroupNumberFromName 427-428
Groups Match object method 429
\p{Gujarati} 122
Gutierrez, David xxiv
\p{Han} 122
hand tweaking, alternation 261
hand tweaking, caveats 253
\p{Hangul_Jamo} 124
hasAnchoringBounds method 388
HASH(0x80f60ac) 257
hasTransparentBounds method 387
Hazel, Philip xxiv, 91, 440
\p{Hebrew} 122, 124
height attribute Java example 397
hex escape 117-118
hex escape, Perl 286
highlighting with ANSI escape sequences 79
\p{Hiragana} 122
history, `\+' 87
history, AT&T Bell Labs 86
history, awk 87
history, Berkeley 86
history, ed trivia 86
history, egrep 86-87
history, grep 86
history, lex 87
history, Perl 88-90, 308
history, PHP 440
history, of regexes 85-91
history, sed 87
history, underscore in \w 89
history, /x 90
hitEnd method 389-392
hostname example 22, 73, 76, 98-99, 137-138, 203, 260, 267-268, 304, 306, 450-451
hostname example, egrep 25
hostname example, Java 209
hostname example, plucking from text 71-73, 206-208
hostname example, in URL 74-77
hostname example, validating 203-205
hostname example, VB.NET 204
$HostnameRegex 76, 137, 303, 351
hot VM 236
HREF example 452
HTML, cooking 68, 414
HTML, matching tag 200-201
HTML example 443-444, 459, 461, 464, 481, 484
HTML example, conversion from text 67-77
HTML example, cooking 68, 414
HTML example, encoding 414
HTML example, <HR> 194
HTML example, link 201-203
HTML example, optional 140
HTML example, paired tags 165
HTML example, parsing 132, 315, 321, 399
HTML example, tag 9, 18-19, 26, 200-201, 326, 357
HTML example, URL 74-77, 203, 206-208, 303, 450-451
HTML example, URL-encoding 320
htmlspecialchars 461
HTTP newlines 115
HTTP response example 467
HTTP URL example 25, 74-77, 201-204, 206-209, 260, 303-304, 306, 320, 450-451
http://regex.info/ xxiii, 7, 345, 471
$HttpUrl 303, 305, 345, 351
hybrid regex engine 182, 239, 243
hyphen in character class 9
/i 135
/i, introduced 47
/i, with study 359
-i as -y 86
identifier matching 24
if vs. while vs. foreach 320
IgnoreCase (.NET) 96, 99, 408, 419, 427
IgnorePatternWhitespace (.NET) 99, 408, 419, 427
IllegalArgumentException 373, 380
IllegalStateException 376-377
image tags Java example 397
image tags example 397
implementation of engine 183
implicit 362
implicit anchor optimization 246
Imports 413, 415, 434
\p{InArrows} 124
\p{InBasic_Latin} 124
\p{InBox_Drawing} 124
\p{InCurrency} 124
\p{InCyrillic} 124
Index, Group object method 430
Index, Match object method 429
IndexOutOfBoundsException 375-376, 380
\p{InDingbats} 124
indispensable TiVo 3
\p{InHangul_Jamo} 124
\p{InHebrew} 124
\p{Inherited} 122
initial class discrimination 245-248, 252, 257-259, 332, 361
\p{Initial_Punctuation} 123
\p{InKatakana} 124
\p{InTamil} 124
integrated handling 94
integrated handling, compile caching 243
interpolation 288-289
interpolation, caching 351
interpolation, introduced 77
interpolation, mimicking 321
interpolation, PHP 103
INTERSECTION class set operations 126
interval 141
interval, introduced 20
interval, [X{0,0}] 141
\p{InTibetan} 124
introduced encoding 29
introduction Perl 37-38
IOException 81
IP example 5, 187-189, 267-268, 311, 314, 348-349
Iraq 11
Is vs. In 121, 124-125
Is vs. In, Java 369
Is vs. In, .NET 407
Is vs. In, Perl 288
\p{IsCherokee} 122
\p{IsCommon} 122
\p{IsCyrillic} 122
\p{IsGujarati} 122
\p{IsHan} 122
\p{IsHebrew} 122
\p{IsHiragana} 122
isJavaIdentifierStart 369
\p{IsKatakana} 122
\p{IsLatin} 122
IsMatch (Regex object method) 421
ISO-8859-1 encoding 29, 87, 106, 108, 123
issues overview encoding 105
\p{IsThai} 122
\p{IsTibetan} 124
Japanese, text processing 29
japhy 246
Java 95-96, 365-403
Java, after-match data 138
Java, anchoring bounds 388
Java, benchmarking 235-236
Java, BLTN 236
Java, bugs 365, 368-369, 387, 392, 399, 403
Java, code example 81, 209, 217, 235, 371, 375, 378-379, 381-384, 389
Java, CSV parsing example 401
Java, description 365
Java, dot modes 111, 370
Java, doubled-word example 81
Java, JIT 236
Java, line anchors 130, 370, 388
Java, line terminators 370
Java, match modes 368
Java, match pointer 374, 383, 398, 400
Java, matching comments 272-276
Java, method chaining 389
Java, method index 366
Java, Mustang 401
Java, object model 371-372
Java, \p{...} 125
Java, regex flavor 366-370
Java, region 384-389
Java, search and replace 378-383
Java, split 395-396
Java, strings 102
Java, transparent bounds 387
Java, Unicode 369
Java, URL example 209
Java, version covered 365
Java, version history 365, 368-369, 392, 401
Java, VM 236
Java, word boundaries 134
java properties 369
\p{javaJavaIdentifierStart} 369
java.lang.Character 369
java.util.Scanner 390
Jeffs example 61-64
JfriedlsRegexLibrary 434-435
JIT, Java 236
JIT, .NET 410
JRE 236
\p{Katakana} 122, 124
keeping in sync 210-211
Keisler, H. J. 85
Kleene, Stephen 85
The Kleene Symposium 85
Korean text processing 29
Kunen, K. 85
£ 124
\l 290
\p{L&} 122-123, 125, 442
\p{L&}, Java 369
\p{L&}, Perl 288
\p{L} 121-122, 133, 368, 395
\p{L}, character class 10, 13
\p{L}, identifiers 24
\p{Latin} 122
Latin-1 encoding 29, 87, 106, 108, 123
lazy 166-167
lazy, essence 159, 168-169
lazy, favors match 167-168
lazy, vs. greedy 169, 256-257
lazy, optimization 248, 257
lazy, quantifier 141
lazy evaluation 181, 355
\L...\E 290
\L...\E, inhibiting 292
lc 290
lcfirst 290
leftmost match 177-179
Length, Group object method 430
Length, Match object method 429
length-cognizance optimization 245, 247
\p{Letter} 122, 288
\p{Letter_Number} 123
$LevelN 330, 343
lex 86
lex, $ 112
lex, dot 111
lex, history 87
lex, and trailing context 182
lexer 132, 389, 399
lexer, building 315
lexical scope 299
LF 109, 370
LIFO backtracking 159
limit, backtracking 239
limit, preg_split 466-467
limit, recursion 249-250
limit, recursion, line (see also string)
limit, anchor optimization 246
limit, vs. string 55
line anchor 112-113
line anchor, mechanics of matching 150
line anchor, variety of implementations 87
line anchors, Java 130, 370, 388
line anchors, .NET 130
line anchors, Perl 130
line anchors, PHP 130
line feed 109, 370
LINE SEPARATOR 109, 123, 370
line terminators 109-111, 129-130, 370
line terminators, with $ and ^ 112
line terminators, Java 370
\p{Line_Separator} 123
link, matching 201
link, matching, Java 209
link, matching, VB.NET 204
list context 294, 310-311
list context, forcing 310
literal string initial string discrimination 245-248, 252, 257-259, 332, 361
literal text, exposing 255
literal text, introduced 5
literal text, mechanics of matching 149
literal text, pre-check optimization 245-248, 252, 257-259, 332, 361
literal-text mode 113, 136, 290
literal-text mode, inhibiting 292
literal-text mode, .NET 136
\p{Ll} 123, 406
\p{Lm} 123, 406
\p{Lo} 123, 406
local 296, 341
local, in embedded code 336
local, vs. my 297
locale 127-128
locale, overview 87
locale, \w 120-121
localizing 296-297
localtime 294, 319, 351
locking in regex literal 352
A logical calculus of the ideas imminent in nervous activity 85
longest match finding 334-335
longest-leftmost match 148, 177-179
lookahead 133
lookahead, auto 410
lookahead, introduced 60
lookahead, mimic atomic grouping 174
lookahead, mimic optimizations 258-259
lookahead, negated, <B>...</B> 167
lookahead, positive vs. negative 66
lookahead example 61-64
lookaround, backtracking 173-174
lookaround, conditional 140-141
lookaround, and DFAs 182
lookaround, doesn't consume text 60
lookaround, introduced 59
lookaround, mimicking class set operations 126
lookaround, mimicking word boundaries 134
lookaround, Perl 288
lookbehind 133
lookbehind, Java 368
lookbehind, .NET 408
lookbehind, Perl 288
lookbehind, PHP 134, 443
lookbehind, positive vs. negative 66
lookbehind, unlimited 408
lookingAt method 376
Lord, Tom 183
\p{Lowercase_Letter} 123
LS 109, 123, 370
\p{Lt} 123, 406
\p{Lu} 123, 406
Lunde, Ken xxiv, 29
\p{M} 120, 122
m/.../ introduced 38
/m 135
machine-dependent character codes 115
MacOS 115
mail processing example 53-59
makudonarudo example 165, 169, 228-232, 264
\p{Mark} 122
match 306-318
match, actions 95
match, context 294-295, 309
match, context, list 294, 310-311
match, context, scalar 294, 310, 312-316
match, DFA vs. NFA 224
match, efficiency 179
match, example with backtracking 160
match, example without backtracking 160
match, lazy example 161
match, leftmost-longest 335
match, longest 334-335
match, m/.../, introduced 38
match, m/.../, introduced, mechanics (see also: greedy; lazy)
match, m/.../, .* 152
match, m/.../, anchors 150
match, m/.../, capturing parentheses 149
match, m/.../, character classes and dot 149
match, m/.../, consequences 156
match, m/.../, greedy introduced 151
match, m/.../, literal text 149
match, modes 110-113
match, modes, Java 368
match, negating 309
match, neverending 222-228, 330, 340
match, neverending, avoiding 264-266
match, neverending, discovery 226-228
match, neverending, explanation 226-228
match, neverending, non-determinism 264
match, neverending, short-circuiting 250
match, neverending, solving with atomic grouping 268
match, neverending, solving with possessive quantifiers 268
match, NFA vs. DFA 156-157, 180-183
match, of nothing 454
match, of nothing, position (see pos)
match, POSIX, Perl 335
match, shortest-leftmost 182
match, side effects 317
match, side effects, intertwined 43
match, side effects, Perl 40
match, speed 181
match, in a string 27
match, tag-team 132
match, viewing mechanics 331-332
Match Empty 433
match modes Java 368
Match (.NET) Success 96
Match object (.NET) 417
Match object (.NET), Capture 437
Match object (.NET), creating 421, 429
Match object (.NET), Groups 429
Match object (.NET), Index 429
Match object (.NET), Length 429
Match object (.NET), NextMatch 429
Match object (.NET), Result 429
Match object (.NET), Success 427
Match object (.NET), Synchronized 430
Match object (.NET), ToString 427
Match object (.NET), using 427
Match object (.NET), Value 427
match pointer Java 374, 383, 398, 400
Match (Regex object method) 421
match rejected by optimizer 363
match results Java 376
MatchCollection 422
Matcher, appendReplacement 380
Matcher, appendTail 381
Matcher, end 377
Matcher, find 375
Matcher, group 377
Matcher, groupCount 377
Matcher, hasAnchoringBounds 388
Matcher, hasTransparentBounds 387
Matcher, hitEnd 389-392
Matcher, lookingAt 376
Matcher, matches 376
Matcher, pattern 393
Matcher, quoteReplacement 379
Matcher, region 384-389
Matcher, region 386
Matcher, regionEnd 386
Matcher, regionStart 386
Matcher, replaceAll 378
Matcher, replaceFirst 379
Matcher, replacement argument 380
Matcher, requireEnd 389-392
Matcher, reset 392-393
Matcher, start 377
Matcher, text 394
Matcher, toMatchResult 377
Matcher, toString 393
Matcher, useAnchoringBounds 388
Matcher, usePattern 393, 399
Matcher, useTransparentBounds 387
Matcher object 373
Matcher object, reusing 392-393
$matches 450
$matches, vs. $all_matches 454
matches, unexpected 194-195
matches, viewing all 332
matches method 376, 395
Matches (Regex object method) 422
MatchEvaluator 423-424
matching, delimited text 196-198
matching, HTML tag 200
matching, longest-leftmost 177-179
matching comments Java 272-276
MatchObject object (.NET) creating 422
\p{Math_Symbol} 123
Maton, William xxiv, 36
mb_ereg suite 439
MBOL 362
\p{Mc} 123
McCloskey, Mike xxiv
McCulloch, Warren 85
\p{Me} 123
mechanics viewing 331-332
metacharacter, conflicting 44-46
metacharacter, differing contexts 10
metacharacter, first-class 87, 92
metacharacter, introduced 5
metacharacter, vs. metasequence 27
metasequence defined 27
method chaining 389
method chaining, Java 389
method index Java 366
mimic, $' 357
mimic, $` 357
mimic, $& 302, 357
mimic, atomic grouping 174
mimic, class set operations 126
mimic, conditional with lookaround 140
mimic, initial-character discrimination optimization 258-259
mimic, named capture 344-345
mimic, POSIX matching 335
mimic, possessive quantifiers 343-344
mimic, variable interpolation 321
mimic, word boundaries 66, 134, 341-342
minlen length 362
minus in character class 9
MISL .NET 410
missing functions PHP 471
\p{Mn} 123
mode modifier 110, 135-136
mode-modified span 110, 135-136, 367, 392, 407, 446
modes introduced with egrep 14-15
\p{Modifier_Letter} 123
modifiers 372
modifiers, combining 69
modifiers, example with five 316
modifiers, /g 51
modifiers, /i 47
modifiers, locking in 304-305
modifiers, notation 99
modifiers, /osmosis 293
modifiers, Perl 292-293
modifiers, Perl core 292-293
modifiers, with regex object 304-305
modifiers, unknown 448
\p{Modifier_Symbol} 123
Morse, Ian xxiv
motto Perl 348
multi-character quotes 165-166
Multiline (.NET) 408, 419-420, 427
multiple-byte character encoding 29
MungeRegexLiteral 342-344, 346
Mustang Java 401
my, binding 339
my, in embedded code 338-339
my, vs. local 297
MySQL, after-match data 138
MySQL, DBIx::DWIW 258
MySQL, version covered 91
MySQL, word boundaries 134
\p{N} 122, 395
\n 49, 115-116
\n, introduced 44
\n, machine-dependency 115
$^N 300-301, 344-346
(?n) 408
named capture 138
named capture, mimicking 344-345
named capture, .NET 408-409
named capture, numeric names 451
named capture, PHP 450-452, 457, 476-477
named capture, with unnamed capture 409
naughty variables 356
naughty variables, OK for debugging 331
\p{Nd} 123, 368, 406
negated class, introduced 10-11
negated class, and lazy quantifiers 167
negated class, Tcl 112
negated class, Tcl, negative lookahead (see lookahead, negative)
negated class, Tcl, negative lookbehind (see lookbehind, negative)
NEL 109, 370, 407
nervous system 85
nested constructs, .NET 436
nested constructs, Perl 328-331, 340-341
nested constructs, PHP 475-478, 481
$NestedStuffRegex 339, 346
.NET xvii, 405-438
.NET, $+ 202
.NET, after-match data 138
.NET, benchmarking 237
.NET, character-class subtraction 406
.NET, code example 219
.NET, flavor overview 92
.NET, JIT 410
.NET, line anchors 130
.NET, literal-text mode 136
.NET, MISL 410
.NET, object model 417
.NET, \p{...} 125
.NET, regex approach 96-97
.NET, regex flavor 407
.NET, search and replace 414, 423-424
.NET, URL example 204
.NET, version covered 405
.NET, word boundaries 134
neurophysiologists early regex study 85
neverending match 222-228, 330, 340
neverending match, avoiding 264-266
neverending match, discovery 226-228
neverending match, explanation 226-228
neverending match, non-determinism 264
neverending match, short-circuiting 250
neverending match, solving with atomic grouping 268
neverending match, solving with possessive quantifiers 268
New Regex 96, 99, 416, 421
newline and HTTP 115
NEXT LINE 109, 370, 407
NextMatch (Match object method) 429
NFA, acronym spelled out 156
NFA, and alternation 174-175
NFA, compared with DFA 156-157, 180-183
NFA, control benefits 155
NFA, efficiency 179
NFA, efficiency, essence (see backtracking)
NFA, first introduced 145
NFA, freeflowing regex 277-281
NFA, and greediness 162
NFA, implementation ease 183
NFA, introduction 153
NFA, nondeterminism 265
NFA, nondeterminism, checkpoint 264-265
NFA, POSIX efficiency 179
NFA, testing for 146-147
NFA, theory 180
\p{Nl} 123
\N{LATIN SMALL LETTER SHARP S} 290
\N{name} 290
\N{name}, inhibiting 292
\p{No} 123
No Dashes Hall Of Shame 458
no re 'debug' 361
no_match_vars 357
nomenclature 27
non-capturing parentheses 45, 137-138
non-capturing parentheses, (see also parentheses), Nondeterministic Finite Automaton (see NFA)
None (.NET) 421, 427
nonillion 226
nonparticipation parentheses 450, 453-454, 469
nonregular sets 180
\p{Non_Spacing_Mark} 123
normal 263-266
NUL 117
NUL, with dot 119
NULL 454
\p{Number} 122
/o 352-353
/o, with regex object 354
Obfuscated Perl Contest 320
object model, Java 371-372
object model, .NET 416-417
Object Oriented Perl 339
object-oriented handling 95-97
object-oriented handling, compile caching 244
octal escape 116, 118
octal escape, vs. backreference 412-413
octal escape, Perl 286
offset preg_match 453
on-demand recompilation 351
oneself example 332, 334
\p{Open_Punctuation} 123
operators Perl list 285
optimization 240-252
optimization, automatic possessification 251
optimization, BLTN 236
optimization, with bump-along 255
optimization, end-of-string anchor 246
optimization, excessive backtrack 249-250
optimization, hand tweaking 252-261
optimization, implicit line anchor 191
optimization, initial character discrimination 245-248, 252, 257-259, 332, 361
optimization, JIT 236, 410
optimization, lazy evaluation 181
optimization, lazy quantifier 248, 257
optimization, leading [.*] 246
optimization, literal-string concatenation 247
optimization, need cognizance 252
optimization, needless class elimination 248
optimization, needless parentheses 248
optimization, pre-check of required character 245-248, 252, 257-259, 332, 361
optimization, simple repetition, discussed 247-248
optimization, small quantifier equivalence 251-252
optimization, state suppression 250-251
optimization, string/line anchors 149, 181
optimization, super-linear short-circuiting 250
option, -0 36
option, -c 361
option, -Dr 363
option, -e 36, 53, 361
option, -i 53
option, -M 361
option, -Mre=debug 363
option, -n 36
option, -p 53
option, -w 38, 296, 326, 361
Option (.NET) 415
Option (.NET), whitespace 18
Options (Regex object method) 427
OR class set operations 125-126
Oram, Andy 5
ordered alternation 175-177
ordered alternation, pitfalls 176
osmosis 293
/osmosis 293
\p{Other} 122
\p{Other_Letter} 123
\p{Other_Number} 123
\p{Other_Punctuation} 123
\p{Other_Symbol} 123
our 295, 336
overload pragma 342
\p{...}, Java 125
\p{...}, .NET 125
\p{...}, PHP 125
\p{P} 122
\p{^...} 288
\p{All} 125
\p{All}, Perl 288
\p{all} 369
panic: top_env 332
\p{Any} 125, 442
\p{Any}, Perl 288
Papen, Jeffrey xxiv
PARAGRAPH SEPARATOR 109, 123, 370
\p{Paragraph_Separator} 123
parentheses, as \(...\) 86
parentheses, and alternation 13
parentheses, balanced 328-331, 340-341, 436, 475-478, 481
parentheses, balanced, difficulty 193-194
parentheses, capturing 137, 300
parentheses, capturing, and DFAs 150, 182
parentheses, capturing, introduced with egrep 20-22
parentheses, capturing, mechanics 149
parentheses, capturing, Perl 41
parentheses, capturing only 152
parentheses, counting 21
parentheses, elimination optimization 248
parentheses, elimination optimization, grouping-only (see non-capturing parentheses)
parentheses, limiting scope 18
parentheses, named capture 138, 344-345, 408-409, 450-452, 457, 476-477
parentheses, nested 328-331, 340-341, 436, 475-477, 481
parentheses, non-capturing 45, 137-138
parentheses, non-participating 300
parentheses, nonparticipation 450, 453-454, 469
parentheses, with split, .NET 409, 426
parentheses, with split, Perl 326
\p{Arrows} 124
parser 132, 389, 399
parsing regex 410
participate in match 140
Pascal 36, 59, 183
Pascal, matching comments of 265
\p{Assigned} 125-126
\p{Assigned}, Perl 288
patch 88
pathname example 190-192
Pattern, CANON_EQ 108, 368
Pattern, CASE_INSENSITIVE 95, 110, 368, 372
Pattern, CASE_INSENSITIVE bug 392
Pattern, COMMENTS 99, 219, 368, 401
Pattern, compile 372
Pattern, DOTALL 368, 370
Pattern, flags 394
Pattern, matcher 373
Pattern, matches 395
Pattern, MULTILINE 81, 368, 370
Pattern, MULTILINE bug 387
Pattern, pattern 394
Pattern, quote 395
Pattern, split 395-396
Pattern, toString 394
Pattern, UNICODE_CASE 368, 372
Pattern, UNIX_LINES 368, 370
pattern argument 472
pattern argument, array order 462, 464
pattern arguments PHP 444, 448
pattern method 393-394
pattern modifier, A 447
pattern modifier, D 442, 447
pattern modifier, e 459, 465, 478
pattern modifier, m 442
pattern modifier, S 259, 447, 460, 467, 478-480
pattern modifier, u 442, 447-448, 452-453
pattern modifier, U 447
pattern modifier, unknown errors 448
pattern modifier, x 443, 471
pattern modifier, X 447
pattern modifiers PHP 446-448
PatternSyntaxException 371, 373
\p{Basic_Latin} 124
\p{Box_Drawing} 124
\p{C} 122
\p{C}, Java 369
\p{Pc} 123, 406
\p{Cc} 123
\p{Cf} 123
\p{Cherokee} 122
\p{Close_Punctuation} 123
\p{Cn} 123, 125-126, 369, 408
\p{Cn}, Java 369
\p{Co} 123
\p{Connector_Punctuation} 123
\p{Control} 123
PCRE 91, 440
PCRE, extra stuff 447
PCRE, flavor overview 441
PCRE, lookbehind 134
PCRE, recursive matching 475-478
PCRE, study 447
PCRE, version covered 440
PCRE, \w 120
PCRE, web site 91
PCRE, X pattern modifier 447
pcre_study 259
\p{Currency} 124
\p{Currency_Symbol} 123
\p{Cyrillic} 122, 124
\p{Pd} 123
\p{Dash_Punctuation} 123
\p{Decimal_Digit_Number} 123
\p{Dingbats} 124
\p{Pe} 123
PeakWebhosting.com xxiv
\p{Enclosing_Mark} 123
people, Aho, Alfred 86, 180
people, Barwise, J. 85
people, Byington, Ryan xxiv
people, Click, Cliff xxiv
people, Constable, Robert 85
people, Conway, Damian 339
people, Cruise, Tom 51
people, Filo, David 397
people, Fite, Liz 33
people, Friedl, Alfred 176
people, Friedl, brothers 33
people, Friedl, Fumie v, xxiv
people, Friedl, Fumie, birthday 11-12
people, Friedl, Jeffrey xxiii
people, Friedl, Stephen xxiv, 458
people, George, Kit xxiv
people, Gill, Stuart xxiv
people, Gosling, James 89
people, Greant, Zak xxiv
people, Gutierrez, David xxiv
people, Hazel, Philip xxiv, 91, 440
people, Keisler, H. J. 85
people, Kleene, Stephen 85
people, Kunen, K. 85
people, Lord, Tom 183
people, Lunde, Ken xxiv, 29
people, Maton, William xxiv, 36
people, McCloskey, Mike xxiv
people, McCulloch, Warren 85
people, Morse, Ian xxiv
people, Oram, Andy 5
people, Papen, Jeffrey xxiv
people, Perl Porters 90
people, Pinyan, Jeff 246
people, Pitts, Walter 85
people, Reinhold, Mark xxiv
people, Sethi, Ravi 180
people, Spencer, Henry 88, 182-183, 243
people, Thompson, Ken 85-86, 111
people, Tubby 265
people, Ullman, Jeffrey 180
people, Wall, Larry 88-90, 140, 363
people, Zawodny, Jeremy 258
people, Zmievski, Andrei xxiv, 440
Perl, \p{...} 125
Perl, $/ 35
Perl, $/, context (see also match, context)
Perl, $/, contorting 294
Perl, efficiency 347-363
Perl, flavor overview 92, 287
Perl, greatest weakness 286
Perl, history 88-90, 308
Perl, introduction 37-38
Perl, line anchors 130
Perl, modifiers 292-293
Perl, motto 348
Perl, option, -0 36
Perl, option, -c 361
Perl, option, -Dr 363
Perl, option, -e 36, 53, 361
Perl, option, -i 53
Perl, option, -M 361
Perl, option, -Mre=debug 363
Perl, option, -n 36
Perl, option, -p 53
Perl, option, -w 38, 296, 326, 361
Perl, regex operators 285
Perl, search and replace 318-321
Perl, Unicode 288
Perl, version covered 283
Perl, warnings 38
Perl, warnings, ($^W variable) 297
Perl, warnings, use warnings 326, 363
Perl Porters 90
perladmin 299
\p{Pf} 123
\p{Pf}, Java 369
\p{Final_Punctuation} 123
\p{Format} 123
\p{Gujarati} 122
\p{Han} 122
\p{Hangul_Jamo} 124
\p{Hebrew} 122, 124
\p{Hiragana} 122
PHP 439-484
PHP, after-match data 138
PHP, benchmarking 234-235
PHP, callback 463, 465
PHP, CSV parsing example 480
PHP, efficiency 478-480
PHP, flavor overview 441
PHP, history 440
PHP, line anchors 130
PHP, lookbehind 134, 443
PHP, missing functions 471
PHP, \p{...} 125
PHP, pattern arguments 444, 448
PHP, recursive matching 475-478
PHP, regex delimiters 445, 448
PHP, search and replace 458-465
PHP, single-quoted string 444
PHP, strings 103-104
PHP, str_replace 458
PHP, study 447
PHP, Unicode 442, 447
PHP, version covered 440
PHP, \w 120
PHP, word boundaries 134
\p{Pi} 123
\p{Pi}, Java 369
\p{InArrows} 124
\p{InBasic_Latin} 124
\p{InBox_Drawing} 124
\p{InCurrency} 124
\p{InCyrillic} 124
\p{InDingbats} 124
\p{InHangul_Jamo} 124
\p{InHebrew} 124
\p{Inherited} 122
\p{Initial_Punctuation} 123
\p{InKatakana} 124
\p{InTamil} 124
\p{InTibetan} 124
Pinyan, Jeff 246
\p{IsCherokee} 122
\p{IsCommon} 122
\p{IsCyrillic} 122
\p{IsGujarati} 122
\p{IsHan} 122
\p{IsHebrew} 122
\p{IsHiragana} 122
\p{IsKatakana} 122
\p{IsLatin} 122
\p{IsThai} 122
\p{IsTibetan} 124
Pitts, Walter 85
\p{javaJavaIdentifierStart} 369
\p{Katakana} 122, 124
\p{L} 121-122, 133, 368, 395
\p{L&} 122-123, 125, 442
\p{L&}, Java 369
\p{L&}, Perl 288
\pL PHP 442
\p{Latin} 122
(?P<...>) 451-452, 457
\p{Letter} 122, 288
\p{Letter_Number} 123
\p{Line_Separator} 123
\p{Ll} 123, 406
\p{Lm} 123, 406
\p{Lo} 123, 406
\p{Lowercase_Letter} 123
\p{Lt} 123, 406
\p{Lu} 123, 406
plus, as \+ 141
plus, backtracking 162
plus, greedy 141, 447
plus, introduced 18-20
plus, lazy 141
plus, possessive 142
\p{M} 120, 122
\p{Mark} 122
\p{Math_Symbol} 123
\p{Mc} 123
\p{Me} 123
\p{Mn} 123
\p{Modifier_Letter} 123
\p{Modifier_Symbol} 123
\pN PHP 442
\p{N} 122, 395
\p{Nd} 123, 368, 406
\p{Nl} 123
\p{No} 123
\p{Non_Spacing_Mark} 123
\p{Number} 122
\p{Po} 123
\p{Open_Punctuation} 123
population example 59
pos 130-133, 313-314, 316
pos, (see also \G), positive lookahead (see lookahead, positive)
pos, (see also \G), positive lookbehind (see lookbehind, positive)
POSIX, [.....] 128
POSIX, [:...:] 127
POSIX, Basic Regular Expressions 87-88
POSIX, bracket expressions 127
POSIX, character class 127
POSIX, character class and locale 127
POSIX, character equivalent 128
POSIX, collating sequences 128
POSIX, dot 119
POSIX, empty alternatives 140
POSIX, Extended Regular Expressions 87-88
POSIX, superficial flavor chart 88
POSIX, locale 127
POSIX, locale, overview 87
POSIX, longest-leftmost rule 177-179, 335
POSIX NFA, backtracking example 229
POSIX NFA, testing for 146-147
possessive quantifier 477, 483
possessive quantifiers 142, 172-173, 477, 483
possessive quantifiers, automatic 251
possessive quantifiers, for efficiency 259-260, 268-270, 482
possessive quantifiers, mimicking 343-344
possessive quantifiers, optimization 250-251
possessive quantifiers example 198, 201
postal code example 209-212
\p{Other} 122
\p{Other_Letter} 123
\p{Other_Number} 123
\p{Other_Punctuation} 123
\p{Other_Symbol} 123
£ 124
\p{P} 122
\p{Paragraph_Separator} 123
\p{Pc} 123, 406
\p{Pd} 123
\p{Pe} 123
\p{Pf} 123
\p{Pf}, Java 369
\p{Pi} 123
\p{Pi}, Java 369
\p{Po} 123
\p{Private_Use} 123
\p{Ps} 123
\p{Punctuation} 122
pragma, charnames 290
pragma, overload 342
pragma, re 361, 363
pragma, strict 295, 336, 345
pragma, warnings 326, 363
pre-check of required character 245-248, 252, 257-259, 361
pre-check of required character, mimic 258-259
pre-check of required character, viewing 332
preg function interface 443-448
preg suite 439
preg suite, missing functions 471
preg_grep 469-470
PREG_GREP_INVERT 470
preg_match 449-453
preg_match, offset 453
preg_match_all 453-457
PREG_OFFSET_CAPTURE 452, 454, 456
preg_pattern_error 474
PREG_PATTERN_ORDER 455
preg_quote 136, 470-471
preg_regex_error 475
preg_regex_to_pattern 472-474
preg_replace 458-464
preg_replace_callback 463-465
PREG_SET_ORDER 456
preg_split 465-469
PREG_SPLIT_DELIM_CAPTURE 468-469
PREG_SPLIT_DELIM_CAPTURE, split limit 469
PREG_SPLIT_NO_EMPTY 468
PREG_SPLIT_OFFSET_CAPTURE 468
pre-match copy 355
prepending filename to line 79
price rounding example 51-52, 167-168
price rounding example, with alternation 175
price rounding example, with atomic grouping 170
price rounding example, with possessive quantifier 169
Principles of Compiler Design 180
printf 40
private vs. global Perl variables 295
\p{Private_Use} 123
procedural handling 95-97
procedural handling, compile caching 244
processing instructions 483
procmail 94
procmail, version covered 91
Programming Perl 283, 286, 339
promote 294-295
properties 121-123, 125-126, 288, 368-369, 442
PS 109, 123, 370
\p{S} 122
\p{Ps} 123
\p{Sc} 123-124
\p{Separator} 122
\p{Sk} 123
\p{Sm} 123
\p{So} 123
\p{Space_Separator} 123
\p{Spacing_Combining_Mark} 123
\p{Symbol} 122
\p{Tamil} 124
\p{Thai} 122
\p{Tibetan} 124
\p{Titlecase_Letter} 123
publication, Bulletin of Math. Biophysics 85
publication, CJKV Information Processing 29
publication, Communications of the ACM 85
publication, Compilers -- Principles, Techniques, and Tools 180
publication, Embodiments of Mind 85
publication, The Kleene Symposium 85
publication, A logical calculus of the ideas imminent in nervous activity 85
publication, Object Oriented Perl 339
publication, Principles of Compiler Design 180
publication, Programming Perl 283, 286, 339
publication, Regular Expression Search Algorithm 85
publication, The Role of Finite Automata in the Development of Modern Computing Theory 85
\p{Unassigned} 123, 125
\p{Unassigned}, Perl 288
\p{Punctuation} 122
\p{Uppercase_Letter} 123
Python, after-match data 138
Python, benchmarking 238-239
Python, line anchors 130
Python, mode modifiers 135
Python, regex approach 97
Python, strings 104
Python, version covered 91
Python, word boundaries 134
Python, \Z 112
\p{Z} 121-122, 368, 407
\pZ PHP 442
\p{Zl} 123
\p{Zp} 123
\p{Zs} 123
\Q Java 368, 395, 403
Qantas 11
\Q...\E 290
\Q...\E, inhibiting 292
qed 85
qed, introduced 76
qed, introduced, quantifier (see also: plus; star; question mark; interval; lazy; greedy; possessive quantifiers)
qed, and backtracking 162
qed, factor out 255
qed, grouping for 18
qed, multiple levels 266
qed, optimization 247-248
qed, and parentheses 18
qed, possessive 477, 483
qed, possessive quantifiers 142, 172-173, 477, 483
qed, possessive quantifiers, for efficiency 259-260, 268-270, 482
qed, possessive quantifiers, automatic, automatic 251
qed, possessive quantifiers, optimization, optimization 250-251
qed, possessive quantifiers, mimicking, mimicking 343-344
qed, question mark, as \? 141
qed, question mark, backtracking 160
qed, question mark, greedy 141, 447
qed, question mark, introduced 17-18
qed, question mark, lazy 141
qed, question mark, possessive 142
qed, smallest preceding subexpression 29
question mark, as \? 141
question mark, backtracking 160
question mark, greedy 141, 447
question mark, introduced 17-18
question mark, lazy 141
question mark, possessive 142
quote method 136, 395
quoteReplacement method 379
quotes multi-character 165-166
r"..." 104
\r 49, 115-116
\r, machine-dependency 115
(?R) 475
(?R), PCRE 475
(?R), PHP 475
$^R 302, 327
re 361, 363
re pragma 361, 363
reality check 226-228
reality check, Java 402
reality check, .NET 436
reality check, PCRE 475-478
reality check, PHP 475-478, 481-484
red dragon 180
Reflection 435
regex, balancing needs 186
regex, cache 242-245, 350-352, 432, 478
regex, compile 179-180, 350
regex, default 308
regex, delimiters 291-292
regex, delimiters, DFA (see DFA)
regex, delimiters, encapsulation (see regex objects)
regex, engine analogy 143-147
regex, vs. English 275
regex, error checking 474
regex, frame of mind 6
regex, freeflowing design 277-281
regex, history 85-91
regex, library 76, 208
regex, longest-leftmost match 177-179
regex, longest-leftmost match, shortest-leftmost 182
regex, mechanics 241-242
regex, mechanics, NFA (see NFA)
regex, nomenclature 27
regex, operands 288-292
regex, overloading 291, 328
regex, overloading, inhibiting 292
regex, overloading, problems 344
regex, subexpression, defined 29
regex, subroutines 476
regex approach .NET 96-97
regex delimiters PHP 445, 448
regex flavor, Java 366-370
regex flavor, .NET 407
regex literal 288-292, 307
regex literal, inhibiting processing 292
regex literal, locking in 352
regex literal, parsing of 292
regex literal, processing 350
regex literal, regex objects 354
Regex (.NET), CompileToAssembly 433, 435
Regex (.NET), creating, options 419-421
Regex (.NET), Escape 432
Regex (.NET), GetGroupNames 427-428
Regex (.NET), GetGroupNumbers 427-428
Regex (.NET), GroupNameFromNumber 427-428
Regex (.NET), GroupNumberFromName 427-428
Regex (.NET), IsMatch 413, 421, 431
Regex (.NET), Match 96, 414, 416, 421, 431
Regex (.NET), Matches 422, 431
Regex (.NET), object, creating 96, 416, 419-421
Regex (.NET), object, exceptions 419
Regex (.NET), object, using 96, 421
Regex (.NET), Options 427
Regex (.NET), Replace 414-415, 423-424, 431
Regex (.NET), RightToLeft 427
Regex (.NET), Split 425-426, 431
Regex (.NET), ToString 427
Regex (.NET), Unescape 433
regex objects 303-306
regex objects, efficiency 353-354
regex objects, /g 354
regex objects, match modes 304-305
regex objects, /o 354
regex objects, in regex literal 354
regex objects, viewing 305-306
regex operators Perl 285
regex overloading 292
regex overloading example 341-345
http://regex.info/ xxiv, 7, 345, 358, 451
RegexCompilationInfo 435
regex-directed matching 153
regex-directed matching, and backreferences 303
regex-directed matching, and greediness 162
Regex.Escape 136
RegexOptions, Compiled 237, 408, 410, 420, 427-428, 435
RegexOptions, ECMAScript 406, 408, 412-413, 421, 427
RegexOptions, ExplicitCapture 408, 420, 427
RegexOptions, IgnoreCase 96, 99, 408, 419, 427
RegexOptions, IgnorePatternWhitespace 99, 408, 419, 427
RegexOptions, Multiline 408, 419-420, 427
RegexOptions, None 421, 427
RegexOptions, RightToLeft 408, 411-412, 420, 426-427, 429-430
RegexOptions, Singleline 408, 420, 427
region, additional example 398
region, anchoring bounds 388
region, hitEnd 390
region, Java 384-389
region, methods that reset 385
region, requireEnd 390
region, resetting 392-393
region, setting one edge 386
region, transparent bounds 387
region method 386
regionEnd method 386
regionStart method 386
reg_match 454
regsub 100
regular expression origin of term 85
Regular Expression Search Algorithm 85
regular sets 85
Reinhold, Mark xxiv
removing whitespace 199-200
Replace (Regex object method) 423-424
replaceAll method 378
replaceFirst method 379
replacement argument 460
replacement argument, array order 462, 464
replacement argument, Java 380
replacement argument, PHP 459
reproductive organs 5
required character pre-check 245-248, 252, 257-259, 332, 361
requireEnd method 389-392
re-search-forward 100-101
reset method 385, 392-393
Result (Match object method) 429
RightToLeft (Regex property) 427-428
RightToLeft (.NET) 408, 411-412, 420, 426-427, 429-430
The Role of Finite Automata in the Development of Modern Computing Theory 85
Ruby, $ and ^ 112
Ruby, after-match data 138
Ruby, benchmarking 238
Ruby, line anchors 130
Ruby, mode modifiers 135
Ruby, version covered 91
Ruby, word boundaries 134
rule, earliest match wins 148-149
rule, standard quantifiers are greedy 151-153
rx 183
\p{S} 122
s/.../.../ 50, 318-321
\s 49, 121
\s, Emacs 128
\s, introduction 47
\s, Perl 288
\s, PHP 442
\s, PHP, (?s) (see: dot-matches-all mode; mode modifier)
\S 49, 56, 121
/s 135
/s, (see also: dot-matches-all mode; mode modifier), saved states (see backtracking, saved states)
SawAmpersand 358
say what you mean 195, 274
SBOL 362
\p{Sc} 123-124
scalar context 294, 310, 312-316
scalar context, forcing 310
scanner 132, 389, 399
schaffkopf 33
scope lexical vs. dynamic 299
scripts 122, 288, 442
search and replace xvii
search and replace, awk 100
search and replace, Java 378-383
search and replace, .NET 414, 423-424
search and replace, Perl 318-321
search and replace, PHP 458-465
search and replace, Tcl 100
sed, after-match data 138
sed, dot 111
sed, history 87
sed, version covered 91
sed, word boundaries 134
self-closing tag 481
\p{Separator} 122
server VM 236
Sethi, Ravi 180
shell 7
simple quantifier optimization 247-248
single quotes delimiter 292, 319
Singleline (.NET) 408, 420, 427
single-quoted string PHP 444
\p{Sk} 123
\p{Sm} 123
small quantifier equivalence 251-252
\p{So} 123
\p{Space_Separator} 123
\p{Spacing_Combining_Mark} 123
special 263-266
Spencer, Henry 88, 182-183, 243
split, with capturing parentheses, .NET 409, 426
split, with capturing parentheses, Perl 326
split, with capturing parentheses, PHP 468
split, chunk limit, Java 396
split, chunk limit, Perl 323
split, chunk limit, PHP 466
split, into characters 322
split, Java 395-396
split, limit 466-467
split, limit, Java 396
split, limit, Perl 323
split, limit, PHP 466
split, Perl 321-326
split, PHP 465-469
split, trailing empty items 324, 468
split, whitespace 325
split method 395-396
Split (Regex object method) 425-426
stacked data 456
standard formula for matching delimited text 196
star, backtracking 162
star, greedy 141, 447
star, introduced 18-20
star, lazy 141
star, possessive 142
start method 377
start-of-string anchor optimization 246, 255-256, 315
stclass `list' 362
stock pricing example 51-52, 167-168
stock pricing example, with alternation 175
stock pricing example, with atomic grouping 170
stock pricing example, with possessive quantifier 169
Strict (Option) 415
strict pragma 295, 336, 345
String, matches 376
String, replaceAll 378
String, replaceFirst 379
String, split 395
String, split, string (see also line)
String, split, double-quoted (see double-quoted string example)
String, initial string discrimination 245-248, 252, 257-259, 332, 361
String, vs. line 55
String, vs. line, match position (see pos)
String, vs. line, pos (see pos)
StringBuffer 373, 380, 382, 397
StringBuilder 373, 382, 397
strings, C# 103
strings, Emacs 101
strings, Java 102
strings, PHP 103-104
strings, Python 104
strings, as regex 101-105, 305
strings, Tcl 104
strings, VB.NET 103
stripping whitespace 199-200
str_replace 458
str_replace, PHP 458
study PHP 447
study 359-360
study, when not to use 359
subexpression defined 29
subroutines regex 476
substitution xvii
substitution, delimiter 319
substitution, s/.../.../ 50, 318-321
substring initial substring discrimination 245-248, 252, 257-259, 332, 361
subtraction, character class 406
subtraction, class (set) 126
subtraction, class (simple) 125
Success, Group object method 430
Success, Match object method 427
Success, Match object method, Sun's regex package (see java.util.regex)
Success, Match object method, super-linear (see neverending match)
super-linear short-circuiting 250
\p{Symbol} 122
Synchronized Match object method 430
syntax class Emacs 128
System.currentTimeMillis() 236
System.Reflection 435
System.Text.RegularExpressions 413, 415
\t 49, 115-116
\t, introduced 44
tag, matching 200-201
tag, XML 481
tag-team matching 132, 315
\p{Tamil} 124
Tcl, [:<:] 91
Tcl, benchmarking 239
Tcl, dot 111, 113
Tcl, flavor overview 92
Tcl, hand-tweaking 243, 259
Tcl, line anchors 113, 130
Tcl, mode modifiers 135
Tcl, regex implementation 183
Tcl, regsub 100
Tcl, search and replace 100
Tcl, strings 104
Tcl, version covered 91
Tcl, word boundaries 134
temperature conversion example, Java 382
temperature conversion example, .NET 425
temperature conversion example, Perl 37, 283
temperature conversion example, PHP 444
temperature conversion example, PHP, terminators (see line terminators)
testing engine type 146-147
text method 394
text-directed matching 153
text-directed matching, regex appearance 162
text-to-HTML example 67-77
\p{Thai} 122
theory of an NFA 180
There's more than one way to do it 349
this|that example 133, 139, 243, 245-247, 252, 255, 260-261
Thompson, Ken 85-86, 111
thread scheduling Java benchmarking 236
\p{Tibetan} 124
tied variables 299
time() 232
time of day 26
Time::HiRes 232, 358, 360
Time.new 238
Timer() 237
timezone PHP 235
title case 110
\p{Titlecase_Letter} 123
TiVo 3
tokenizer 132, 389, 399
tokenizer, building 315
toMatchResult method 377
toothpicks scattered 101
tortilla 128
ToString, Group object method 430
ToString, Match object method 427
ToString, Regex object method 427
toString method 393-394
Traditional NFA testing for 146-147
trailing context 182
trailing context, optimizations 246-247
transparent bounds 387
transparent bounds, Java 387
Tubby 265
typographical conventions xxi
\u 117, 290, 406
\U 117
\U...\E 290
\U...\E, inhibiting 292
uc 290
U+C0B5 107
ucfirst 290
UCS-2 encoding 107
UCS-4 encoding 107
Ullman, Jeffrey 180
\p{Unassigned} 123, 125
\p{Unassigned}, Perl 288
unconditional caching 350
underscore in \w history 89
Unescape 433
Unicode, block 124
Unicode, block, Java 369, 402
Unicode, block, .NET 407
Unicode, block, Perl 288
Unicode, block, Perl, categories (see Unicode, properties)
Unicode, character, combining 107, 120, 122
Unicode, code point, beyond U+FFFF 109
Unicode, code point, introduced 107
Unicode, code point, multiple 108
Unicode, code point, unassigned in block 124
Unicode, combining character 107, 120, 122
Unicode, Java 368-369, 402-403
Unicode, line terminators 109-111, 370
Unicode, line terminators, Java 370
Unicode, line terminators, Java, loose matching (see case-insensitive mode)
Unicode, .NET 407
Unicode, official web site 127
Unicode, overview 106-110
Unicode, Perl 288
Unicode, PHP 442, 447
Unicode, properties 121, 369
Unicode, properties, Java 368
Unicode, properties, list 122-123
Unicode, properties, \p{All} 125, 288
Unicode, properties, \p{Any} 125, 288, 442
Unicode, properties, \p{Assigned} 125-126, 288
Unicode, properties, Perl 288
Unicode, properties, PHP 442
Unicode, properties, \p{Unassigned} 123, 125, 288
Unicode, script 122
Unicode, script, Perl 288
Unicode, script, PHP 442
Unicode, Version 3.1 109
Unicode, \w 120
Unicode, whitespace and /x 288
UnicodeData.txt 290
unicore 290
unmatch 152, 161, 163
unmatch, .* 165
unmatch, atomic grouping 171
unrolling the loop 261-276
unrolling the loop, example 270-271, 477
unrolling the loop, general pattern 264
\p{Uppercase_Letter} 123
URL encoding 320
URL example 74-77, 201-204, 208, 260, 303-304, 306, 320, 450-451
URL example, egrep 25
URL example, Java 209
URL example, .NET 204
URL example, plucking 206-208
use charnames 290
use Config 290, 299
use English 357
use overload 342
use re 'debug' 361, 363
use re 'eval' 337
use strict 295, 336, 345
use Time::HiRes 358, 360
use warnings 326, 363
useAnchoringBounds method 388
usePattern method 393, 399
username example 73, 76, 98
username example, plucking from text 71-73
username example, in URL 74-77
useTransparentBounds method 387
using System.Text.RegularExpressions 416
UTF-16 encoding 107
UTF-8 encoding 107, 442, 447
\v 115-116, 364
\V 364
Value, Group object method 430
Value, Match object method 427
variable names example 24
variables, after match, pre-match copy 355
variables, binding 339
variables, fully qualified 295
variables, interpolation 344
variables, naughty 356
variables, tied 299
VB.NET xvii
VB.NET, code example 204, 219
VB.NET, comments 99
VB.NET, regex approach 96-97
VB.NET, strings 103
verbatim strings 103
Version 7 regex 183
Version 8 regex 183
version covered, Java 365
version covered, .NET 405
version covered, Perl 283
version covered, PHP 440
version covered, others 91
version history Java 365, 368-369, 392, 401
vertical tab 109, 370
vertical tab, Perl \s 288
vi after-match data 138
Vietnamese text processing 29
virtual machine 236
Visual Basic xvii
Visual Studio .NET 434
VM 236
VM, Java 236
VM, warming up 236
void context 294
VT 109, 370
$^W 297
\w 49, 65, 120
\w, Emacs 129
\w, Java 368
\w, many different interpretations 93
\w, Perl 288
\w, PHP 120, 442
\W 49, 121
Wall, Larry 88-90, 140, 363
warming up Java VM 236
warnings 296
warnings, ($^W variable), Perl 297
warnings, Perl 38
warnings, temporarily turning off 297
warnings, use warnings, Perl 326, 363
warnings pragma 326, 363
while vs. foreach vs. if 320
whitespace, allowing optional 18
whitespace, removing 199-200
width attribute Java example 397
wildcards filename 4
word anchor mechanics of matching 150
word boundaries 133
word boundaries, \<...\>, egrep 15
word boundaries, introduced 15
word boundaries, Java 134
word boundaries, many programs 134
word boundaries, mimicking 66, 134, 341-342
word boundaries, .NET 134
word boundaries, Perl 288
word boundaries, PHP 134
www.cpan.org 358
www.PeakWebhosting.com xxiv
www.regex.info 358
www.unixwiz.net xxiv, 458
\X 108, 120
/x 135, 288
/x, history 90
/x, introduced 72
/x, introduced, (?x) (see: comments and free-spacing mode; mode modifier)
\x 117, 406
\x, Perl 286
XML 483
XML, CDATA 483
XML example 481-484
-y old grep 86
¥ 124
Yahoo! xxiv, 74, 132, 190, 206-207, 258, 314, 397
\Z 112, 129-130
\Z, Java 370
\Z, optimization 246
\p{Z} 121-122, 368, 407
\z 112, 129-130, 316, 447
\z, optimization 246
\z, PHP 442
Zawodny, Jeremy 258
ZIP code example 209-212
\p{Zl} 123
Zmievski, Andrei xxiv, 440
\p{Zp} 123
\p{Zs} 123