Initial commit: Go 1.23 release state
This commit is contained in:
976
src/regexp/all_test.go
Normal file
976
src/regexp/all_test.go
Normal file
@@ -0,0 +1,976 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package regexp
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"regexp/syntax"
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
var goodRe = []string{
|
||||
``,
|
||||
`.`,
|
||||
`^.$`,
|
||||
`a`,
|
||||
`a*`,
|
||||
`a+`,
|
||||
`a?`,
|
||||
`a|b`,
|
||||
`a*|b*`,
|
||||
`(a*|b)(c*|d)`,
|
||||
`[a-z]`,
|
||||
`[a-abc-c\-\]\[]`,
|
||||
`[a-z]+`,
|
||||
`[abc]`,
|
||||
`[^1234]`,
|
||||
`[^\n]`,
|
||||
`\!\\`,
|
||||
}
|
||||
|
||||
type stringError struct {
|
||||
re string
|
||||
err string
|
||||
}
|
||||
|
||||
var badRe = []stringError{
|
||||
{`*`, "missing argument to repetition operator: `*`"},
|
||||
{`+`, "missing argument to repetition operator: `+`"},
|
||||
{`?`, "missing argument to repetition operator: `?`"},
|
||||
{`(abc`, "missing closing ): `(abc`"},
|
||||
{`abc)`, "unexpected ): `abc)`"},
|
||||
{`x[a-z`, "missing closing ]: `[a-z`"},
|
||||
{`[z-a]`, "invalid character class range: `z-a`"},
|
||||
{`abc\`, "trailing backslash at end of expression"},
|
||||
{`a**`, "invalid nested repetition operator: `**`"},
|
||||
{`a*+`, "invalid nested repetition operator: `*+`"},
|
||||
{`\x`, "invalid escape sequence: `\\x`"},
|
||||
{strings.Repeat(`\pL`, 27000), "expression too large"},
|
||||
}
|
||||
|
||||
func compileTest(t *testing.T, expr string, error string) *Regexp {
|
||||
re, err := Compile(expr)
|
||||
if error == "" && err != nil {
|
||||
t.Error("compiling `", expr, "`; unexpected error: ", err.Error())
|
||||
}
|
||||
if error != "" && err == nil {
|
||||
t.Error("compiling `", expr, "`; missing error")
|
||||
} else if error != "" && !strings.Contains(err.Error(), error) {
|
||||
t.Error("compiling `", expr, "`; wrong error: ", err.Error(), "; want ", error)
|
||||
}
|
||||
return re
|
||||
}
|
||||
|
||||
func TestGoodCompile(t *testing.T) {
|
||||
for i := 0; i < len(goodRe); i++ {
|
||||
compileTest(t, goodRe[i], "")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBadCompile(t *testing.T) {
|
||||
for i := 0; i < len(badRe); i++ {
|
||||
compileTest(t, badRe[i].re, badRe[i].err)
|
||||
}
|
||||
}
|
||||
|
||||
func matchTest(t *testing.T, test *FindTest) {
|
||||
re := compileTest(t, test.pat, "")
|
||||
if re == nil {
|
||||
return
|
||||
}
|
||||
m := re.MatchString(test.text)
|
||||
if m != (len(test.matches) > 0) {
|
||||
t.Errorf("MatchString failure on %s: %t should be %t", test, m, len(test.matches) > 0)
|
||||
}
|
||||
// now try bytes
|
||||
m = re.Match([]byte(test.text))
|
||||
if m != (len(test.matches) > 0) {
|
||||
t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatch(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
matchTest(t, &test)
|
||||
}
|
||||
}
|
||||
|
||||
func matchFunctionTest(t *testing.T, test *FindTest) {
|
||||
m, err := MatchString(test.pat, test.text)
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
if m != (len(test.matches) > 0) {
|
||||
t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchFunction(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
matchFunctionTest(t, &test)
|
||||
}
|
||||
}
|
||||
|
||||
func copyMatchTest(t *testing.T, test *FindTest) {
|
||||
re := compileTest(t, test.pat, "")
|
||||
if re == nil {
|
||||
return
|
||||
}
|
||||
m1 := re.MatchString(test.text)
|
||||
m2 := re.Copy().MatchString(test.text)
|
||||
if m1 != m2 {
|
||||
t.Errorf("Copied Regexp match failure on %s: original gave %t; copy gave %t; should be %t",
|
||||
test, m1, m2, len(test.matches) > 0)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCopyMatch(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
copyMatchTest(t, &test)
|
||||
}
|
||||
}
|
||||
|
||||
type ReplaceTest struct {
|
||||
pattern, replacement, input, output string
|
||||
}
|
||||
|
||||
var replaceTests = []ReplaceTest{
|
||||
// Test empty input and/or replacement, with pattern that matches the empty string.
|
||||
{"", "", "", ""},
|
||||
{"", "x", "", "x"},
|
||||
{"", "", "abc", "abc"},
|
||||
{"", "x", "abc", "xaxbxcx"},
|
||||
|
||||
// Test empty input and/or replacement, with pattern that does not match the empty string.
|
||||
{"b", "", "", ""},
|
||||
{"b", "x", "", ""},
|
||||
{"b", "", "abc", "ac"},
|
||||
{"b", "x", "abc", "axc"},
|
||||
{"y", "", "", ""},
|
||||
{"y", "x", "", ""},
|
||||
{"y", "", "abc", "abc"},
|
||||
{"y", "x", "abc", "abc"},
|
||||
|
||||
// Multibyte characters -- verify that we don't try to match in the middle
|
||||
// of a character.
|
||||
{"[a-c]*", "x", "\u65e5", "x\u65e5x"},
|
||||
{"[^\u65e5]", "x", "abc\u65e5def", "xxx\u65e5xxx"},
|
||||
|
||||
// Start and end of a string.
|
||||
{"^[a-c]*", "x", "abcdabc", "xdabc"},
|
||||
{"[a-c]*$", "x", "abcdabc", "abcdx"},
|
||||
{"^[a-c]*$", "x", "abcdabc", "abcdabc"},
|
||||
{"^[a-c]*", "x", "abc", "x"},
|
||||
{"[a-c]*$", "x", "abc", "x"},
|
||||
{"^[a-c]*$", "x", "abc", "x"},
|
||||
{"^[a-c]*", "x", "dabce", "xdabce"},
|
||||
{"[a-c]*$", "x", "dabce", "dabcex"},
|
||||
{"^[a-c]*$", "x", "dabce", "dabce"},
|
||||
{"^[a-c]*", "x", "", "x"},
|
||||
{"[a-c]*$", "x", "", "x"},
|
||||
{"^[a-c]*$", "x", "", "x"},
|
||||
|
||||
{"^[a-c]+", "x", "abcdabc", "xdabc"},
|
||||
{"[a-c]+$", "x", "abcdabc", "abcdx"},
|
||||
{"^[a-c]+$", "x", "abcdabc", "abcdabc"},
|
||||
{"^[a-c]+", "x", "abc", "x"},
|
||||
{"[a-c]+$", "x", "abc", "x"},
|
||||
{"^[a-c]+$", "x", "abc", "x"},
|
||||
{"^[a-c]+", "x", "dabce", "dabce"},
|
||||
{"[a-c]+$", "x", "dabce", "dabce"},
|
||||
{"^[a-c]+$", "x", "dabce", "dabce"},
|
||||
{"^[a-c]+", "x", "", ""},
|
||||
{"[a-c]+$", "x", "", ""},
|
||||
{"^[a-c]+$", "x", "", ""},
|
||||
|
||||
// Other cases.
|
||||
{"abc", "def", "abcdefg", "defdefg"},
|
||||
{"bc", "BC", "abcbcdcdedef", "aBCBCdcdedef"},
|
||||
{"abc", "", "abcdabc", "d"},
|
||||
{"x", "xXx", "xxxXxxx", "xXxxXxxXxXxXxxXxxXx"},
|
||||
{"abc", "d", "", ""},
|
||||
{"abc", "d", "abc", "d"},
|
||||
{".+", "x", "abc", "x"},
|
||||
{"[a-c]*", "x", "def", "xdxexfx"},
|
||||
{"[a-c]+", "x", "abcbcdcdedef", "xdxdedef"},
|
||||
{"[a-c]*", "x", "abcbcdcdedef", "xdxdxexdxexfx"},
|
||||
|
||||
// Substitutions
|
||||
{"a+", "($0)", "banana", "b(a)n(a)n(a)"},
|
||||
{"a+", "(${0})", "banana", "b(a)n(a)n(a)"},
|
||||
{"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"},
|
||||
{"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"},
|
||||
{"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, world"},
|
||||
{"hello, (.+)", "goodbye, $1x", "hello, world", "goodbye, "},
|
||||
{"hello, (.+)", "goodbye, ${1}x", "hello, world", "goodbye, worldx"},
|
||||
{"hello, (.+)", "<$0><$1><$2><$3>", "hello, world", "<hello, world><world><><>"},
|
||||
{"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, world!"},
|
||||
{"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, world"},
|
||||
{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "hihihi"},
|
||||
{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "byebyebye"},
|
||||
{"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", ""},
|
||||
{"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "hiyz"},
|
||||
{"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $x"},
|
||||
{"a+", "${oops", "aaa", "${oops"},
|
||||
{"a+", "$$", "aaa", "$"},
|
||||
{"a+", "$", "aaa", "$"},
|
||||
|
||||
// Substitution when subexpression isn't found
|
||||
{"(x)?", "$1", "123", "123"},
|
||||
{"abc", "$1", "123", "123"},
|
||||
|
||||
// Substitutions involving a (x){0}
|
||||
{"(a)(b){0}(c)", ".$1|$3.", "xacxacx", "x.a|c.x.a|c.x"},
|
||||
{"(a)(((b))){0}c", ".$1.", "xacxacx", "x.a.x.a.x"},
|
||||
{"((a(b){0}){3}){5}(h)", "y caramb$2", "say aaaaaaaaaaaaaaaah", "say ay caramba"},
|
||||
{"((a(b){0}){3}){5}h", "y caramb$2", "say aaaaaaaaaaaaaaaah", "say ay caramba"},
|
||||
}
|
||||
|
||||
var replaceLiteralTests = []ReplaceTest{
|
||||
// Substitutions
|
||||
{"a+", "($0)", "banana", "b($0)n($0)n($0)"},
|
||||
{"a+", "(${0})", "banana", "b(${0})n(${0})n(${0})"},
|
||||
{"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"},
|
||||
{"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"},
|
||||
{"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, ${1}"},
|
||||
{"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, $noun!"},
|
||||
{"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, ${noun}"},
|
||||
{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "$x$x$x"},
|
||||
{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "$x$x$x"},
|
||||
{"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", "$xyz"},
|
||||
{"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "${x}yz"},
|
||||
{"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $$x"},
|
||||
{"a+", "${oops", "aaa", "${oops"},
|
||||
{"a+", "$$", "aaa", "$$"},
|
||||
{"a+", "$", "aaa", "$"},
|
||||
}
|
||||
|
||||
type ReplaceFuncTest struct {
|
||||
pattern string
|
||||
replacement func(string) string
|
||||
input, output string
|
||||
}
|
||||
|
||||
var replaceFuncTests = []ReplaceFuncTest{
|
||||
{"[a-c]", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxayxbyxcydef"},
|
||||
{"[a-c]+", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxabcydef"},
|
||||
{"[a-c]*", func(s string) string { return "x" + s + "y" }, "defabcdef", "xydxyexyfxabcydxyexyfxy"},
|
||||
}
|
||||
|
||||
func TestReplaceAll(t *testing.T) {
|
||||
for _, tc := range replaceTests {
|
||||
re, err := Compile(tc.pattern)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
|
||||
continue
|
||||
}
|
||||
actual := re.ReplaceAllString(tc.input, tc.replacement)
|
||||
if actual != tc.output {
|
||||
t.Errorf("%q.ReplaceAllString(%q,%q) = %q; want %q",
|
||||
tc.pattern, tc.input, tc.replacement, actual, tc.output)
|
||||
}
|
||||
// now try bytes
|
||||
actual = string(re.ReplaceAll([]byte(tc.input), []byte(tc.replacement)))
|
||||
if actual != tc.output {
|
||||
t.Errorf("%q.ReplaceAll(%q,%q) = %q; want %q",
|
||||
tc.pattern, tc.input, tc.replacement, actual, tc.output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceAllLiteral(t *testing.T) {
|
||||
// Run ReplaceAll tests that do not have $ expansions.
|
||||
for _, tc := range replaceTests {
|
||||
if strings.Contains(tc.replacement, "$") {
|
||||
continue
|
||||
}
|
||||
re, err := Compile(tc.pattern)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
|
||||
continue
|
||||
}
|
||||
actual := re.ReplaceAllLiteralString(tc.input, tc.replacement)
|
||||
if actual != tc.output {
|
||||
t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q",
|
||||
tc.pattern, tc.input, tc.replacement, actual, tc.output)
|
||||
}
|
||||
// now try bytes
|
||||
actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement)))
|
||||
if actual != tc.output {
|
||||
t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q",
|
||||
tc.pattern, tc.input, tc.replacement, actual, tc.output)
|
||||
}
|
||||
}
|
||||
|
||||
// Run literal-specific tests.
|
||||
for _, tc := range replaceLiteralTests {
|
||||
re, err := Compile(tc.pattern)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
|
||||
continue
|
||||
}
|
||||
actual := re.ReplaceAllLiteralString(tc.input, tc.replacement)
|
||||
if actual != tc.output {
|
||||
t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q",
|
||||
tc.pattern, tc.input, tc.replacement, actual, tc.output)
|
||||
}
|
||||
// now try bytes
|
||||
actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement)))
|
||||
if actual != tc.output {
|
||||
t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q",
|
||||
tc.pattern, tc.input, tc.replacement, actual, tc.output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceAllFunc(t *testing.T) {
|
||||
for _, tc := range replaceFuncTests {
|
||||
re, err := Compile(tc.pattern)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
|
||||
continue
|
||||
}
|
||||
actual := re.ReplaceAllStringFunc(tc.input, tc.replacement)
|
||||
if actual != tc.output {
|
||||
t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q",
|
||||
tc.pattern, tc.input, actual, tc.output)
|
||||
}
|
||||
// now try bytes
|
||||
actual = string(re.ReplaceAllFunc([]byte(tc.input), func(s []byte) []byte { return []byte(tc.replacement(string(s))) }))
|
||||
if actual != tc.output {
|
||||
t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q",
|
||||
tc.pattern, tc.input, actual, tc.output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type MetaTest struct {
|
||||
pattern, output, literal string
|
||||
isLiteral bool
|
||||
}
|
||||
|
||||
var metaTests = []MetaTest{
|
||||
{``, ``, ``, true},
|
||||
{`foo`, `foo`, `foo`, true},
|
||||
{`日本語+`, `日本語\+`, `日本語`, false},
|
||||
{`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator
|
||||
{`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators
|
||||
{`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false},
|
||||
}
|
||||
|
||||
var literalPrefixTests = []MetaTest{
|
||||
// See golang.org/issue/11175.
|
||||
// output is unused.
|
||||
{`^0^0$`, ``, `0`, false},
|
||||
{`^0^`, ``, ``, false},
|
||||
{`^0$`, ``, `0`, true},
|
||||
{`$0^`, ``, ``, false},
|
||||
{`$0$`, ``, ``, false},
|
||||
{`^^0$$`, ``, ``, false},
|
||||
{`^$^$`, ``, ``, false},
|
||||
{`$$0^^`, ``, ``, false},
|
||||
{`a\x{fffd}b`, ``, `a`, false},
|
||||
{`\x{fffd}b`, ``, ``, false},
|
||||
{"\ufffd", ``, ``, false},
|
||||
}
|
||||
|
||||
func TestQuoteMeta(t *testing.T) {
|
||||
for _, tc := range metaTests {
|
||||
// Verify that QuoteMeta returns the expected string.
|
||||
quoted := QuoteMeta(tc.pattern)
|
||||
if quoted != tc.output {
|
||||
t.Errorf("QuoteMeta(`%s`) = `%s`; want `%s`",
|
||||
tc.pattern, quoted, tc.output)
|
||||
continue
|
||||
}
|
||||
|
||||
// Verify that the quoted string is in fact treated as expected
|
||||
// by Compile -- i.e. that it matches the original, unquoted string.
|
||||
if tc.pattern != "" {
|
||||
re, err := Compile(quoted)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error compiling QuoteMeta(`%s`): %v", tc.pattern, err)
|
||||
continue
|
||||
}
|
||||
src := "abc" + tc.pattern + "def"
|
||||
repl := "xyz"
|
||||
replaced := re.ReplaceAllString(src, repl)
|
||||
expected := "abcxyzdef"
|
||||
if replaced != expected {
|
||||
t.Errorf("QuoteMeta(`%s`).Replace(`%s`,`%s`) = `%s`; want `%s`",
|
||||
tc.pattern, src, repl, replaced, expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLiteralPrefix(t *testing.T) {
|
||||
for _, tc := range append(metaTests, literalPrefixTests...) {
|
||||
// Literal method needs to scan the pattern.
|
||||
re := MustCompile(tc.pattern)
|
||||
str, complete := re.LiteralPrefix()
|
||||
if complete != tc.isLiteral {
|
||||
t.Errorf("LiteralPrefix(`%s`) = %t; want %t", tc.pattern, complete, tc.isLiteral)
|
||||
}
|
||||
if str != tc.literal {
|
||||
t.Errorf("LiteralPrefix(`%s`) = `%s`; want `%s`", tc.pattern, str, tc.literal)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type subexpIndex struct {
|
||||
name string
|
||||
index int
|
||||
}
|
||||
|
||||
type subexpCase struct {
|
||||
input string
|
||||
num int
|
||||
names []string
|
||||
indices []subexpIndex
|
||||
}
|
||||
|
||||
var emptySubexpIndices = []subexpIndex{{"", -1}, {"missing", -1}}
|
||||
|
||||
var subexpCases = []subexpCase{
|
||||
{``, 0, nil, emptySubexpIndices},
|
||||
{`.*`, 0, nil, emptySubexpIndices},
|
||||
{`abba`, 0, nil, emptySubexpIndices},
|
||||
{`ab(b)a`, 1, []string{"", ""}, emptySubexpIndices},
|
||||
{`ab(.*)a`, 1, []string{"", ""}, emptySubexpIndices},
|
||||
{`(.*)ab(.*)a`, 2, []string{"", "", ""}, emptySubexpIndices},
|
||||
{`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
|
||||
{`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}, emptySubexpIndices},
|
||||
{`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
|
||||
{`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
|
||||
{`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}, []subexpIndex{{"", -1}, {"missing", -1}, {"foo", 1}, {"bar", 2}}},
|
||||
}
|
||||
|
||||
func TestSubexp(t *testing.T) {
|
||||
for _, c := range subexpCases {
|
||||
re := MustCompile(c.input)
|
||||
n := re.NumSubexp()
|
||||
if n != c.num {
|
||||
t.Errorf("%q: NumSubexp = %d, want %d", c.input, n, c.num)
|
||||
continue
|
||||
}
|
||||
names := re.SubexpNames()
|
||||
if len(names) != 1+n {
|
||||
t.Errorf("%q: len(SubexpNames) = %d, want %d", c.input, len(names), n)
|
||||
continue
|
||||
}
|
||||
if c.names != nil {
|
||||
for i := 0; i < 1+n; i++ {
|
||||
if names[i] != c.names[i] {
|
||||
t.Errorf("%q: SubexpNames[%d] = %q, want %q", c.input, i, names[i], c.names[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, subexp := range c.indices {
|
||||
index := re.SubexpIndex(subexp.name)
|
||||
if index != subexp.index {
|
||||
t.Errorf("%q: SubexpIndex(%q) = %d, want %d", c.input, subexp.name, index, subexp.index)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var splitTests = []struct {
|
||||
s string
|
||||
r string
|
||||
n int
|
||||
out []string
|
||||
}{
|
||||
{"foo:and:bar", ":", -1, []string{"foo", "and", "bar"}},
|
||||
{"foo:and:bar", ":", 1, []string{"foo:and:bar"}},
|
||||
{"foo:and:bar", ":", 2, []string{"foo", "and:bar"}},
|
||||
{"foo:and:bar", "foo", -1, []string{"", ":and:bar"}},
|
||||
{"foo:and:bar", "bar", -1, []string{"foo:and:", ""}},
|
||||
{"foo:and:bar", "baz", -1, []string{"foo:and:bar"}},
|
||||
{"baabaab", "a", -1, []string{"b", "", "b", "", "b"}},
|
||||
{"baabaab", "a*", -1, []string{"b", "b", "b"}},
|
||||
{"baabaab", "ba*", -1, []string{"", "", "", ""}},
|
||||
{"foobar", "f*b*", -1, []string{"", "o", "o", "a", "r"}},
|
||||
{"foobar", "f+.*b+", -1, []string{"", "ar"}},
|
||||
{"foobooboar", "o{2}", -1, []string{"f", "b", "boar"}},
|
||||
{"a,b,c,d,e,f", ",", 3, []string{"a", "b", "c,d,e,f"}},
|
||||
{"a,b,c,d,e,f", ",", 0, nil},
|
||||
{",", ",", -1, []string{"", ""}},
|
||||
{",,,", ",", -1, []string{"", "", "", ""}},
|
||||
{"", ",", -1, []string{""}},
|
||||
{"", ".*", -1, []string{""}},
|
||||
{"", ".+", -1, []string{""}},
|
||||
{"", "", -1, []string{}},
|
||||
{"foobar", "", -1, []string{"f", "o", "o", "b", "a", "r"}},
|
||||
{"abaabaccadaaae", "a*", 5, []string{"", "b", "b", "c", "cadaaae"}},
|
||||
{":x:y:z:", ":", -1, []string{"", "x", "y", "z", ""}},
|
||||
}
|
||||
|
||||
func TestSplit(t *testing.T) {
|
||||
for i, test := range splitTests {
|
||||
re, err := Compile(test.r)
|
||||
if err != nil {
|
||||
t.Errorf("#%d: %q: compile error: %s", i, test.r, err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
split := re.Split(test.s, test.n)
|
||||
if !slices.Equal(split, test.out) {
|
||||
t.Errorf("#%d: %q: got %q; want %q", i, test.r, split, test.out)
|
||||
}
|
||||
|
||||
if QuoteMeta(test.r) == test.r {
|
||||
strsplit := strings.SplitN(test.s, test.r, test.n)
|
||||
if !slices.Equal(split, strsplit) {
|
||||
t.Errorf("#%d: Split(%q, %q, %d): regexp vs strings mismatch\nregexp=%q\nstrings=%q", i, test.s, test.r, test.n, split, strsplit)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The following sequence of Match calls used to panic. See issue #12980.
|
||||
func TestParseAndCompile(t *testing.T) {
|
||||
expr := "a$"
|
||||
s := "a\nb"
|
||||
|
||||
for i, tc := range []struct {
|
||||
reFlags syntax.Flags
|
||||
expMatch bool
|
||||
}{
|
||||
{syntax.Perl | syntax.OneLine, false},
|
||||
{syntax.Perl &^ syntax.OneLine, true},
|
||||
} {
|
||||
parsed, err := syntax.Parse(expr, tc.reFlags)
|
||||
if err != nil {
|
||||
t.Fatalf("%d: parse: %v", i, err)
|
||||
}
|
||||
re, err := Compile(parsed.String())
|
||||
if err != nil {
|
||||
t.Fatalf("%d: compile: %v", i, err)
|
||||
}
|
||||
if match := re.MatchString(s); match != tc.expMatch {
|
||||
t.Errorf("%d: %q.MatchString(%q)=%t; expected=%t", i, re, s, match, tc.expMatch)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check that one-pass cutoff does trigger.
|
||||
func TestOnePassCutoff(t *testing.T) {
|
||||
re, err := syntax.Parse(`^x{1,1000}y{1,1000}$`, syntax.Perl)
|
||||
if err != nil {
|
||||
t.Fatalf("parse: %v", err)
|
||||
}
|
||||
p, err := syntax.Compile(re.Simplify())
|
||||
if err != nil {
|
||||
t.Fatalf("compile: %v", err)
|
||||
}
|
||||
if compileOnePass(p) != nil {
|
||||
t.Fatalf("makeOnePass succeeded; wanted nil")
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the same machine can be used with the standard matcher
|
||||
// and then the backtracker when there are no captures.
|
||||
func TestSwitchBacktrack(t *testing.T) {
|
||||
re := MustCompile(`a|b`)
|
||||
long := make([]byte, maxBacktrackVector+1)
|
||||
|
||||
// The following sequence of Match calls used to panic. See issue #10319.
|
||||
re.Match(long) // triggers standard matcher
|
||||
re.Match(long[:1]) // triggers backtracker
|
||||
}
|
||||
|
||||
func BenchmarkFind(b *testing.B) {
|
||||
b.StopTimer()
|
||||
re := MustCompile("a+b+")
|
||||
wantSubs := "aaabb"
|
||||
s := []byte("acbb" + wantSubs + "dd")
|
||||
b.StartTimer()
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
subs := re.Find(s)
|
||||
if string(subs) != wantSubs {
|
||||
b.Fatalf("Find(%q) = %q; want %q", s, subs, wantSubs)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFindAllNoMatches(b *testing.B) {
|
||||
re := MustCompile("a+b+")
|
||||
s := []byte("acddee")
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
all := re.FindAll(s, -1)
|
||||
if all != nil {
|
||||
b.Fatalf("FindAll(%q) = %q; want nil", s, all)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFindString(b *testing.B) {
|
||||
b.StopTimer()
|
||||
re := MustCompile("a+b+")
|
||||
wantSubs := "aaabb"
|
||||
s := "acbb" + wantSubs + "dd"
|
||||
b.StartTimer()
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
subs := re.FindString(s)
|
||||
if subs != wantSubs {
|
||||
b.Fatalf("FindString(%q) = %q; want %q", s, subs, wantSubs)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFindSubmatch(b *testing.B) {
|
||||
b.StopTimer()
|
||||
re := MustCompile("a(a+b+)b")
|
||||
wantSubs := "aaabb"
|
||||
s := []byte("acbb" + wantSubs + "dd")
|
||||
b.StartTimer()
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
subs := re.FindSubmatch(s)
|
||||
if string(subs[0]) != wantSubs {
|
||||
b.Fatalf("FindSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs)
|
||||
}
|
||||
if string(subs[1]) != "aab" {
|
||||
b.Fatalf("FindSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFindStringSubmatch(b *testing.B) {
|
||||
b.StopTimer()
|
||||
re := MustCompile("a(a+b+)b")
|
||||
wantSubs := "aaabb"
|
||||
s := "acbb" + wantSubs + "dd"
|
||||
b.StartTimer()
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
subs := re.FindStringSubmatch(s)
|
||||
if subs[0] != wantSubs {
|
||||
b.Fatalf("FindStringSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs)
|
||||
}
|
||||
if subs[1] != "aab" {
|
||||
b.Fatalf("FindStringSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkLiteral(b *testing.B) {
|
||||
x := strings.Repeat("x", 50) + "y"
|
||||
b.StopTimer()
|
||||
re := MustCompile("y")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if !re.MatchString(x) {
|
||||
b.Fatalf("no match!")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkNotLiteral(b *testing.B) {
|
||||
x := strings.Repeat("x", 50) + "y"
|
||||
b.StopTimer()
|
||||
re := MustCompile(".y")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if !re.MatchString(x) {
|
||||
b.Fatalf("no match!")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMatchClass(b *testing.B) {
|
||||
b.StopTimer()
|
||||
x := strings.Repeat("xxxx", 20) + "w"
|
||||
re := MustCompile("[abcdw]")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if !re.MatchString(x) {
|
||||
b.Fatalf("no match!")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMatchClass_InRange(b *testing.B) {
|
||||
b.StopTimer()
|
||||
// 'b' is between 'a' and 'c', so the charclass
|
||||
// range checking is no help here.
|
||||
x := strings.Repeat("bbbb", 20) + "c"
|
||||
re := MustCompile("[ac]")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if !re.MatchString(x) {
|
||||
b.Fatalf("no match!")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkReplaceAll(b *testing.B) {
|
||||
x := "abcdefghijklmnopqrstuvwxyz"
|
||||
b.StopTimer()
|
||||
re := MustCompile("[cjrw]")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
re.ReplaceAllString(x, "")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) {
|
||||
b.StopTimer()
|
||||
x := []byte("abcdefghijklmnopqrstuvwxyz")
|
||||
re := MustCompile("^zbc(d|e)")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
re.Match(x)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) {
|
||||
b.StopTimer()
|
||||
x := []byte("abcdefghijklmnopqrstuvwxyz")
|
||||
for i := 0; i < 15; i++ {
|
||||
x = append(x, x...)
|
||||
}
|
||||
re := MustCompile("^zbc(d|e)")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
re.Match(x)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAnchoredShortMatch(b *testing.B) {
|
||||
b.StopTimer()
|
||||
x := []byte("abcdefghijklmnopqrstuvwxyz")
|
||||
re := MustCompile("^.bc(d|e)")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
re.Match(x)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAnchoredLongMatch(b *testing.B) {
|
||||
b.StopTimer()
|
||||
x := []byte("abcdefghijklmnopqrstuvwxyz")
|
||||
for i := 0; i < 15; i++ {
|
||||
x = append(x, x...)
|
||||
}
|
||||
re := MustCompile("^.bc(d|e)")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
re.Match(x)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkOnePassShortA(b *testing.B) {
|
||||
b.StopTimer()
|
||||
x := []byte("abcddddddeeeededd")
|
||||
re := MustCompile("^.bc(d|e)*$")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
re.Match(x)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkNotOnePassShortA(b *testing.B) {
|
||||
b.StopTimer()
|
||||
x := []byte("abcddddddeeeededd")
|
||||
re := MustCompile(".bc(d|e)*$")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
re.Match(x)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkOnePassShortB(b *testing.B) {
|
||||
b.StopTimer()
|
||||
x := []byte("abcddddddeeeededd")
|
||||
re := MustCompile("^.bc(?:d|e)*$")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
re.Match(x)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkNotOnePassShortB(b *testing.B) {
|
||||
b.StopTimer()
|
||||
x := []byte("abcddddddeeeededd")
|
||||
re := MustCompile(".bc(?:d|e)*$")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
re.Match(x)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkOnePassLongPrefix(b *testing.B) {
|
||||
b.StopTimer()
|
||||
x := []byte("abcdefghijklmnopqrstuvwxyz")
|
||||
re := MustCompile("^abcdefghijklmnopqrstuvwxyz.*$")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
re.Match(x)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkOnePassLongNotPrefix(b *testing.B) {
|
||||
b.StopTimer()
|
||||
x := []byte("abcdefghijklmnopqrstuvwxyz")
|
||||
re := MustCompile("^.bcdefghijklmnopqrstuvwxyz.*$")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
re.Match(x)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMatchParallelShared(b *testing.B) {
|
||||
x := []byte("this is a long line that contains foo bar baz")
|
||||
re := MustCompile("foo (ba+r)? baz")
|
||||
b.ResetTimer()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
re.Match(x)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkMatchParallelCopied(b *testing.B) {
|
||||
x := []byte("this is a long line that contains foo bar baz")
|
||||
re := MustCompile("foo (ba+r)? baz")
|
||||
b.ResetTimer()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
re := re.Copy()
|
||||
for pb.Next() {
|
||||
re.Match(x)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
var sink string
|
||||
|
||||
func BenchmarkQuoteMetaAll(b *testing.B) {
|
||||
specials := make([]byte, 0)
|
||||
for i := byte(0); i < utf8.RuneSelf; i++ {
|
||||
if special(i) {
|
||||
specials = append(specials, i)
|
||||
}
|
||||
}
|
||||
s := string(specials)
|
||||
b.SetBytes(int64(len(s)))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
sink = QuoteMeta(s)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkQuoteMetaNone(b *testing.B) {
|
||||
s := "abcdefghijklmnopqrstuvwxyz"
|
||||
b.SetBytes(int64(len(s)))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
sink = QuoteMeta(s)
|
||||
}
|
||||
}
|
||||
|
||||
var compileBenchData = []struct{ name, re string }{
|
||||
{"Onepass", `^a.[l-nA-Cg-j]?e$`},
|
||||
{"Medium", `^((a|b|[d-z0-9])*(日){4,5}.)+$`},
|
||||
{"Hard", strings.Repeat(`((abc)*|`, 50) + strings.Repeat(`)`, 50)},
|
||||
}
|
||||
|
||||
func BenchmarkCompile(b *testing.B) {
|
||||
for _, data := range compileBenchData {
|
||||
b.Run(data.name, func(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if _, err := Compile(data.re); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeepEqual(t *testing.T) {
|
||||
re1 := MustCompile("a.*b.*c.*d")
|
||||
re2 := MustCompile("a.*b.*c.*d")
|
||||
if !reflect.DeepEqual(re1, re2) { // has always been true, since Go 1.
|
||||
t.Errorf("DeepEqual(re1, re2) = false, want true")
|
||||
}
|
||||
|
||||
re1.MatchString("abcdefghijklmn")
|
||||
if !reflect.DeepEqual(re1, re2) {
|
||||
t.Errorf("DeepEqual(re1, re2) = false, want true")
|
||||
}
|
||||
|
||||
re2.MatchString("abcdefghijklmn")
|
||||
if !reflect.DeepEqual(re1, re2) {
|
||||
t.Errorf("DeepEqual(re1, re2) = false, want true")
|
||||
}
|
||||
|
||||
re2.MatchString(strings.Repeat("abcdefghijklmn", 100))
|
||||
if !reflect.DeepEqual(re1, re2) {
|
||||
t.Errorf("DeepEqual(re1, re2) = false, want true")
|
||||
}
|
||||
}
|
||||
|
||||
var minInputLenTests = []struct {
|
||||
Regexp string
|
||||
min int
|
||||
}{
|
||||
{``, 0},
|
||||
{`a`, 1},
|
||||
{`aa`, 2},
|
||||
{`(aa)a`, 3},
|
||||
{`(?:aa)a`, 3},
|
||||
{`a?a`, 1},
|
||||
{`(aaa)|(aa)`, 2},
|
||||
{`(aa)+a`, 3},
|
||||
{`(aa)*a`, 1},
|
||||
{`(aa){3,5}`, 6},
|
||||
{`[a-z]`, 1},
|
||||
{`日`, 3},
|
||||
}
|
||||
|
||||
func TestMinInputLen(t *testing.T) {
|
||||
for _, tt := range minInputLenTests {
|
||||
re, _ := syntax.Parse(tt.Regexp, syntax.Perl)
|
||||
m := minInputLen(re)
|
||||
if m != tt.min {
|
||||
t.Errorf("regexp %#q has minInputLen %d, should be %d", tt.Regexp, m, tt.min)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnmarshalText(t *testing.T) {
|
||||
unmarshaled := new(Regexp)
|
||||
for i := range goodRe {
|
||||
re := compileTest(t, goodRe[i], "")
|
||||
marshaled, err := re.MarshalText()
|
||||
if err != nil {
|
||||
t.Errorf("regexp %#q failed to marshal: %s", re, err)
|
||||
continue
|
||||
}
|
||||
if err := unmarshaled.UnmarshalText(marshaled); err != nil {
|
||||
t.Errorf("regexp %#q failed to unmarshal: %s", re, err)
|
||||
continue
|
||||
}
|
||||
if unmarshaled.String() != goodRe[i] {
|
||||
t.Errorf("UnmarshalText returned unexpected value: %s", unmarshaled.String())
|
||||
}
|
||||
}
|
||||
t.Run("invalid pattern", func(t *testing.T) {
|
||||
re := new(Regexp)
|
||||
err := re.UnmarshalText([]byte(`\`))
|
||||
if err == nil {
|
||||
t.Error("unexpected success")
|
||||
}
|
||||
})
|
||||
}
|
||||
365
src/regexp/backtrack.go
Normal file
365
src/regexp/backtrack.go
Normal file
@@ -0,0 +1,365 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// backtrack is a regular expression search with submatch
|
||||
// tracking for small regular expressions and texts. It allocates
|
||||
// a bit vector with (length of input) * (length of prog) bits,
|
||||
// to make sure it never explores the same (character position, instruction)
|
||||
// state multiple times. This limits the search to run in time linear in
|
||||
// the length of the test.
|
||||
//
|
||||
// backtrack is a fast replacement for the NFA code on small
|
||||
// regexps when onepass cannot be used.
|
||||
|
||||
package regexp
|
||||
|
||||
import (
|
||||
"regexp/syntax"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// A job is an entry on the backtracker's job stack. It holds
|
||||
// the instruction pc and the position in the input.
|
||||
type job struct {
|
||||
pc uint32
|
||||
arg bool
|
||||
pos int
|
||||
}
|
||||
|
||||
const (
|
||||
visitedBits = 32
|
||||
maxBacktrackProg = 500 // len(prog.Inst) <= max
|
||||
maxBacktrackVector = 256 * 1024 // bit vector size <= max (bits)
|
||||
)
|
||||
|
||||
// bitState holds state for the backtracker.
|
||||
type bitState struct {
|
||||
end int
|
||||
cap []int
|
||||
matchcap []int
|
||||
jobs []job
|
||||
visited []uint32
|
||||
|
||||
inputs inputs
|
||||
}
|
||||
|
||||
var bitStatePool sync.Pool
|
||||
|
||||
func newBitState() *bitState {
|
||||
b, ok := bitStatePool.Get().(*bitState)
|
||||
if !ok {
|
||||
b = new(bitState)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func freeBitState(b *bitState) {
|
||||
b.inputs.clear()
|
||||
bitStatePool.Put(b)
|
||||
}
|
||||
|
||||
// maxBitStateLen returns the maximum length of a string to search with
|
||||
// the backtracker using prog.
|
||||
func maxBitStateLen(prog *syntax.Prog) int {
|
||||
if !shouldBacktrack(prog) {
|
||||
return 0
|
||||
}
|
||||
return maxBacktrackVector / len(prog.Inst)
|
||||
}
|
||||
|
||||
// shouldBacktrack reports whether the program is too
|
||||
// long for the backtracker to run.
|
||||
func shouldBacktrack(prog *syntax.Prog) bool {
|
||||
return len(prog.Inst) <= maxBacktrackProg
|
||||
}
|
||||
|
||||
// reset resets the state of the backtracker.
|
||||
// end is the end position in the input.
|
||||
// ncap is the number of captures.
|
||||
func (b *bitState) reset(prog *syntax.Prog, end int, ncap int) {
|
||||
b.end = end
|
||||
|
||||
if cap(b.jobs) == 0 {
|
||||
b.jobs = make([]job, 0, 256)
|
||||
} else {
|
||||
b.jobs = b.jobs[:0]
|
||||
}
|
||||
|
||||
visitedSize := (len(prog.Inst)*(end+1) + visitedBits - 1) / visitedBits
|
||||
if cap(b.visited) < visitedSize {
|
||||
b.visited = make([]uint32, visitedSize, maxBacktrackVector/visitedBits)
|
||||
} else {
|
||||
b.visited = b.visited[:visitedSize]
|
||||
clear(b.visited) // set to 0
|
||||
}
|
||||
|
||||
if cap(b.cap) < ncap {
|
||||
b.cap = make([]int, ncap)
|
||||
} else {
|
||||
b.cap = b.cap[:ncap]
|
||||
}
|
||||
for i := range b.cap {
|
||||
b.cap[i] = -1
|
||||
}
|
||||
|
||||
if cap(b.matchcap) < ncap {
|
||||
b.matchcap = make([]int, ncap)
|
||||
} else {
|
||||
b.matchcap = b.matchcap[:ncap]
|
||||
}
|
||||
for i := range b.matchcap {
|
||||
b.matchcap[i] = -1
|
||||
}
|
||||
}
|
||||
|
||||
// shouldVisit reports whether the combination of (pc, pos) has not
|
||||
// been visited yet.
|
||||
func (b *bitState) shouldVisit(pc uint32, pos int) bool {
|
||||
n := uint(int(pc)*(b.end+1) + pos)
|
||||
if b.visited[n/visitedBits]&(1<<(n&(visitedBits-1))) != 0 {
|
||||
return false
|
||||
}
|
||||
b.visited[n/visitedBits] |= 1 << (n & (visitedBits - 1))
|
||||
return true
|
||||
}
|
||||
|
||||
// push pushes (pc, pos, arg) onto the job stack if it should be
|
||||
// visited.
|
||||
func (b *bitState) push(re *Regexp, pc uint32, pos int, arg bool) {
|
||||
// Only check shouldVisit when arg is false.
|
||||
// When arg is true, we are continuing a previous visit.
|
||||
if re.prog.Inst[pc].Op != syntax.InstFail && (arg || b.shouldVisit(pc, pos)) {
|
||||
b.jobs = append(b.jobs, job{pc: pc, arg: arg, pos: pos})
|
||||
}
|
||||
}
|
||||
|
||||
// tryBacktrack runs a backtracking search starting at pos.
|
||||
func (re *Regexp) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
|
||||
longest := re.longest
|
||||
|
||||
b.push(re, pc, pos, false)
|
||||
for len(b.jobs) > 0 {
|
||||
l := len(b.jobs) - 1
|
||||
// Pop job off the stack.
|
||||
pc := b.jobs[l].pc
|
||||
pos := b.jobs[l].pos
|
||||
arg := b.jobs[l].arg
|
||||
b.jobs = b.jobs[:l]
|
||||
|
||||
// Optimization: rather than push and pop,
|
||||
// code that is going to Push and continue
|
||||
// the loop simply updates ip, p, and arg
|
||||
// and jumps to CheckAndLoop. We have to
|
||||
// do the ShouldVisit check that Push
|
||||
// would have, but we avoid the stack
|
||||
// manipulation.
|
||||
goto Skip
|
||||
CheckAndLoop:
|
||||
if !b.shouldVisit(pc, pos) {
|
||||
continue
|
||||
}
|
||||
Skip:
|
||||
|
||||
inst := &re.prog.Inst[pc]
|
||||
|
||||
switch inst.Op {
|
||||
default:
|
||||
panic("bad inst")
|
||||
case syntax.InstFail:
|
||||
panic("unexpected InstFail")
|
||||
case syntax.InstAlt:
|
||||
// Cannot just
|
||||
// b.push(inst.Out, pos, false)
|
||||
// b.push(inst.Arg, pos, false)
|
||||
// If during the processing of inst.Out, we encounter
|
||||
// inst.Arg via another path, we want to process it then.
|
||||
// Pushing it here will inhibit that. Instead, re-push
|
||||
// inst with arg==true as a reminder to push inst.Arg out
|
||||
// later.
|
||||
if arg {
|
||||
// Finished inst.Out; try inst.Arg.
|
||||
arg = false
|
||||
pc = inst.Arg
|
||||
goto CheckAndLoop
|
||||
} else {
|
||||
b.push(re, pc, pos, true)
|
||||
pc = inst.Out
|
||||
goto CheckAndLoop
|
||||
}
|
||||
|
||||
case syntax.InstAltMatch:
|
||||
// One opcode consumes runes; the other leads to match.
|
||||
switch re.prog.Inst[inst.Out].Op {
|
||||
case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
|
||||
// inst.Arg is the match.
|
||||
b.push(re, inst.Arg, pos, false)
|
||||
pc = inst.Arg
|
||||
pos = b.end
|
||||
goto CheckAndLoop
|
||||
}
|
||||
// inst.Out is the match - non-greedy
|
||||
b.push(re, inst.Out, b.end, false)
|
||||
pc = inst.Out
|
||||
goto CheckAndLoop
|
||||
|
||||
case syntax.InstRune:
|
||||
r, width := i.step(pos)
|
||||
if !inst.MatchRune(r) {
|
||||
continue
|
||||
}
|
||||
pos += width
|
||||
pc = inst.Out
|
||||
goto CheckAndLoop
|
||||
|
||||
case syntax.InstRune1:
|
||||
r, width := i.step(pos)
|
||||
if r != inst.Rune[0] {
|
||||
continue
|
||||
}
|
||||
pos += width
|
||||
pc = inst.Out
|
||||
goto CheckAndLoop
|
||||
|
||||
case syntax.InstRuneAnyNotNL:
|
||||
r, width := i.step(pos)
|
||||
if r == '\n' || r == endOfText {
|
||||
continue
|
||||
}
|
||||
pos += width
|
||||
pc = inst.Out
|
||||
goto CheckAndLoop
|
||||
|
||||
case syntax.InstRuneAny:
|
||||
r, width := i.step(pos)
|
||||
if r == endOfText {
|
||||
continue
|
||||
}
|
||||
pos += width
|
||||
pc = inst.Out
|
||||
goto CheckAndLoop
|
||||
|
||||
case syntax.InstCapture:
|
||||
if arg {
|
||||
// Finished inst.Out; restore the old value.
|
||||
b.cap[inst.Arg] = pos
|
||||
continue
|
||||
} else {
|
||||
if inst.Arg < uint32(len(b.cap)) {
|
||||
// Capture pos to register, but save old value.
|
||||
b.push(re, pc, b.cap[inst.Arg], true) // come back when we're done.
|
||||
b.cap[inst.Arg] = pos
|
||||
}
|
||||
pc = inst.Out
|
||||
goto CheckAndLoop
|
||||
}
|
||||
|
||||
case syntax.InstEmptyWidth:
|
||||
flag := i.context(pos)
|
||||
if !flag.match(syntax.EmptyOp(inst.Arg)) {
|
||||
continue
|
||||
}
|
||||
pc = inst.Out
|
||||
goto CheckAndLoop
|
||||
|
||||
case syntax.InstNop:
|
||||
pc = inst.Out
|
||||
goto CheckAndLoop
|
||||
|
||||
case syntax.InstMatch:
|
||||
// We found a match. If the caller doesn't care
|
||||
// where the match is, no point going further.
|
||||
if len(b.cap) == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
// Record best match so far.
|
||||
// Only need to check end point, because this entire
|
||||
// call is only considering one start position.
|
||||
if len(b.cap) > 1 {
|
||||
b.cap[1] = pos
|
||||
}
|
||||
if old := b.matchcap[1]; old == -1 || (longest && pos > 0 && pos > old) {
|
||||
copy(b.matchcap, b.cap)
|
||||
}
|
||||
|
||||
// If going for first match, we're done.
|
||||
if !longest {
|
||||
return true
|
||||
}
|
||||
|
||||
// If we used the entire text, no longer match is possible.
|
||||
if pos == b.end {
|
||||
return true
|
||||
}
|
||||
|
||||
// Otherwise, continue on in hope of a longer match.
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return longest && len(b.matchcap) > 1 && b.matchcap[1] >= 0
|
||||
}
|
||||
|
||||
// backtrack runs a backtracking search of prog on the input starting at pos.
|
||||
func (re *Regexp) backtrack(ib []byte, is string, pos int, ncap int, dstCap []int) []int {
|
||||
startCond := re.cond
|
||||
if startCond == ^syntax.EmptyOp(0) { // impossible
|
||||
return nil
|
||||
}
|
||||
if startCond&syntax.EmptyBeginText != 0 && pos != 0 {
|
||||
// Anchored match, past beginning of text.
|
||||
return nil
|
||||
}
|
||||
|
||||
b := newBitState()
|
||||
i, end := b.inputs.init(nil, ib, is)
|
||||
b.reset(re.prog, end, ncap)
|
||||
|
||||
// Anchored search must start at the beginning of the input
|
||||
if startCond&syntax.EmptyBeginText != 0 {
|
||||
if len(b.cap) > 0 {
|
||||
b.cap[0] = pos
|
||||
}
|
||||
if !re.tryBacktrack(b, i, uint32(re.prog.Start), pos) {
|
||||
freeBitState(b)
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
|
||||
// Unanchored search, starting from each possible text position.
|
||||
// Notice that we have to try the empty string at the end of
|
||||
// the text, so the loop condition is pos <= end, not pos < end.
|
||||
// This looks like it's quadratic in the size of the text,
|
||||
// but we are not clearing visited between calls to TrySearch,
|
||||
// so no work is duplicated and it ends up still being linear.
|
||||
width := -1
|
||||
for ; pos <= end && width != 0; pos += width {
|
||||
if len(re.prefix) > 0 {
|
||||
// Match requires literal prefix; fast search for it.
|
||||
advance := i.index(re, pos)
|
||||
if advance < 0 {
|
||||
freeBitState(b)
|
||||
return nil
|
||||
}
|
||||
pos += advance
|
||||
}
|
||||
|
||||
if len(b.cap) > 0 {
|
||||
b.cap[0] = pos
|
||||
}
|
||||
if re.tryBacktrack(b, i, uint32(re.prog.Start), pos) {
|
||||
// Match must be leftmost; done.
|
||||
goto Match
|
||||
}
|
||||
_, width = i.step(pos)
|
||||
}
|
||||
freeBitState(b)
|
||||
return nil
|
||||
}
|
||||
|
||||
Match:
|
||||
dstCap = append(dstCap, b.matchcap...)
|
||||
freeBitState(b)
|
||||
return dstCap
|
||||
}
|
||||
447
src/regexp/example_test.go
Normal file
447
src/regexp/example_test.go
Normal file
@@ -0,0 +1,447 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package regexp_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func Example() {
|
||||
// Compile the expression once, usually at init time.
|
||||
// Use raw strings to avoid having to quote the backslashes.
|
||||
var validID = regexp.MustCompile(`^[a-z]+\[[0-9]+\]$`)
|
||||
|
||||
fmt.Println(validID.MatchString("adam[23]"))
|
||||
fmt.Println(validID.MatchString("eve[7]"))
|
||||
fmt.Println(validID.MatchString("Job[48]"))
|
||||
fmt.Println(validID.MatchString("snakey"))
|
||||
// Output:
|
||||
// true
|
||||
// true
|
||||
// false
|
||||
// false
|
||||
}
|
||||
|
||||
func ExampleMatch() {
|
||||
matched, err := regexp.Match(`foo.*`, []byte(`seafood`))
|
||||
fmt.Println(matched, err)
|
||||
matched, err = regexp.Match(`bar.*`, []byte(`seafood`))
|
||||
fmt.Println(matched, err)
|
||||
matched, err = regexp.Match(`a(b`, []byte(`seafood`))
|
||||
fmt.Println(matched, err)
|
||||
|
||||
// Output:
|
||||
// true <nil>
|
||||
// false <nil>
|
||||
// false error parsing regexp: missing closing ): `a(b`
|
||||
}
|
||||
|
||||
func ExampleMatchString() {
|
||||
matched, err := regexp.MatchString(`foo.*`, "seafood")
|
||||
fmt.Println(matched, err)
|
||||
matched, err = regexp.MatchString(`bar.*`, "seafood")
|
||||
fmt.Println(matched, err)
|
||||
matched, err = regexp.MatchString(`a(b`, "seafood")
|
||||
fmt.Println(matched, err)
|
||||
// Output:
|
||||
// true <nil>
|
||||
// false <nil>
|
||||
// false error parsing regexp: missing closing ): `a(b`
|
||||
}
|
||||
|
||||
func ExampleQuoteMeta() {
|
||||
fmt.Println(regexp.QuoteMeta(`Escaping symbols like: .+*?()|[]{}^$`))
|
||||
// Output:
|
||||
// Escaping symbols like: \.\+\*\?\(\)\|\[\]\{\}\^\$
|
||||
}
|
||||
|
||||
func ExampleRegexp_Find() {
|
||||
re := regexp.MustCompile(`foo.?`)
|
||||
fmt.Printf("%q\n", re.Find([]byte(`seafood fool`)))
|
||||
|
||||
// Output:
|
||||
// "food"
|
||||
}
|
||||
|
||||
func ExampleRegexp_FindAll() {
|
||||
re := regexp.MustCompile(`foo.?`)
|
||||
fmt.Printf("%q\n", re.FindAll([]byte(`seafood fool`), -1))
|
||||
|
||||
// Output:
|
||||
// ["food" "fool"]
|
||||
}
|
||||
|
||||
func ExampleRegexp_FindAllSubmatch() {
|
||||
re := regexp.MustCompile(`foo(.?)`)
|
||||
fmt.Printf("%q\n", re.FindAllSubmatch([]byte(`seafood fool`), -1))
|
||||
|
||||
// Output:
|
||||
// [["food" "d"] ["fool" "l"]]
|
||||
}
|
||||
|
||||
func ExampleRegexp_FindSubmatch() {
|
||||
re := regexp.MustCompile(`foo(.?)`)
|
||||
fmt.Printf("%q\n", re.FindSubmatch([]byte(`seafood fool`)))
|
||||
|
||||
// Output:
|
||||
// ["food" "d"]
|
||||
}
|
||||
|
||||
func ExampleRegexp_Match() {
|
||||
re := regexp.MustCompile(`foo.?`)
|
||||
fmt.Println(re.Match([]byte(`seafood fool`)))
|
||||
fmt.Println(re.Match([]byte(`something else`)))
|
||||
|
||||
// Output:
|
||||
// true
|
||||
// false
|
||||
}
|
||||
|
||||
func ExampleRegexp_FindString() {
|
||||
re := regexp.MustCompile(`foo.?`)
|
||||
fmt.Printf("%q\n", re.FindString("seafood fool"))
|
||||
fmt.Printf("%q\n", re.FindString("meat"))
|
||||
// Output:
|
||||
// "food"
|
||||
// ""
|
||||
}
|
||||
|
||||
func ExampleRegexp_FindStringIndex() {
|
||||
re := regexp.MustCompile(`ab?`)
|
||||
fmt.Println(re.FindStringIndex("tablett"))
|
||||
fmt.Println(re.FindStringIndex("foo") == nil)
|
||||
// Output:
|
||||
// [1 3]
|
||||
// true
|
||||
}
|
||||
|
||||
func ExampleRegexp_FindStringSubmatch() {
|
||||
re := regexp.MustCompile(`a(x*)b(y|z)c`)
|
||||
fmt.Printf("%q\n", re.FindStringSubmatch("-axxxbyc-"))
|
||||
fmt.Printf("%q\n", re.FindStringSubmatch("-abzc-"))
|
||||
// Output:
|
||||
// ["axxxbyc" "xxx" "y"]
|
||||
// ["abzc" "" "z"]
|
||||
}
|
||||
|
||||
func ExampleRegexp_FindAllString() {
|
||||
re := regexp.MustCompile(`a.`)
|
||||
fmt.Println(re.FindAllString("paranormal", -1))
|
||||
fmt.Println(re.FindAllString("paranormal", 2))
|
||||
fmt.Println(re.FindAllString("graal", -1))
|
||||
fmt.Println(re.FindAllString("none", -1))
|
||||
// Output:
|
||||
// [ar an al]
|
||||
// [ar an]
|
||||
// [aa]
|
||||
// []
|
||||
}
|
||||
|
||||
func ExampleRegexp_FindAllStringSubmatch() {
|
||||
re := regexp.MustCompile(`a(x*)b`)
|
||||
fmt.Printf("%q\n", re.FindAllStringSubmatch("-ab-", -1))
|
||||
fmt.Printf("%q\n", re.FindAllStringSubmatch("-axxb-", -1))
|
||||
fmt.Printf("%q\n", re.FindAllStringSubmatch("-ab-axb-", -1))
|
||||
fmt.Printf("%q\n", re.FindAllStringSubmatch("-axxb-ab-", -1))
|
||||
// Output:
|
||||
// [["ab" ""]]
|
||||
// [["axxb" "xx"]]
|
||||
// [["ab" ""] ["axb" "x"]]
|
||||
// [["axxb" "xx"] ["ab" ""]]
|
||||
}
|
||||
|
||||
func ExampleRegexp_FindAllStringSubmatchIndex() {
|
||||
re := regexp.MustCompile(`a(x*)b`)
|
||||
// Indices:
|
||||
// 01234567 012345678
|
||||
// -ab-axb- -axxb-ab-
|
||||
fmt.Println(re.FindAllStringSubmatchIndex("-ab-", -1))
|
||||
fmt.Println(re.FindAllStringSubmatchIndex("-axxb-", -1))
|
||||
fmt.Println(re.FindAllStringSubmatchIndex("-ab-axb-", -1))
|
||||
fmt.Println(re.FindAllStringSubmatchIndex("-axxb-ab-", -1))
|
||||
fmt.Println(re.FindAllStringSubmatchIndex("-foo-", -1))
|
||||
// Output:
|
||||
// [[1 3 2 2]]
|
||||
// [[1 5 2 4]]
|
||||
// [[1 3 2 2] [4 7 5 6]]
|
||||
// [[1 5 2 4] [6 8 7 7]]
|
||||
// []
|
||||
}
|
||||
|
||||
func ExampleRegexp_FindSubmatchIndex() {
|
||||
re := regexp.MustCompile(`a(x*)b`)
|
||||
// Indices:
|
||||
// 01234567 012345678
|
||||
// -ab-axb- -axxb-ab-
|
||||
fmt.Println(re.FindSubmatchIndex([]byte("-ab-")))
|
||||
fmt.Println(re.FindSubmatchIndex([]byte("-axxb-")))
|
||||
fmt.Println(re.FindSubmatchIndex([]byte("-ab-axb-")))
|
||||
fmt.Println(re.FindSubmatchIndex([]byte("-axxb-ab-")))
|
||||
fmt.Println(re.FindSubmatchIndex([]byte("-foo-")))
|
||||
// Output:
|
||||
// [1 3 2 2]
|
||||
// [1 5 2 4]
|
||||
// [1 3 2 2]
|
||||
// [1 5 2 4]
|
||||
// []
|
||||
}
|
||||
|
||||
func ExampleRegexp_Longest() {
|
||||
re := regexp.MustCompile(`a(|b)`)
|
||||
fmt.Println(re.FindString("ab"))
|
||||
re.Longest()
|
||||
fmt.Println(re.FindString("ab"))
|
||||
// Output:
|
||||
// a
|
||||
// ab
|
||||
}
|
||||
|
||||
func ExampleRegexp_MatchString() {
|
||||
re := regexp.MustCompile(`(gopher){2}`)
|
||||
fmt.Println(re.MatchString("gopher"))
|
||||
fmt.Println(re.MatchString("gophergopher"))
|
||||
fmt.Println(re.MatchString("gophergophergopher"))
|
||||
// Output:
|
||||
// false
|
||||
// true
|
||||
// true
|
||||
}
|
||||
|
||||
func ExampleRegexp_NumSubexp() {
|
||||
re0 := regexp.MustCompile(`a.`)
|
||||
fmt.Printf("%d\n", re0.NumSubexp())
|
||||
|
||||
re := regexp.MustCompile(`(.*)((a)b)(.*)a`)
|
||||
fmt.Println(re.NumSubexp())
|
||||
// Output:
|
||||
// 0
|
||||
// 4
|
||||
}
|
||||
|
||||
func ExampleRegexp_ReplaceAll() {
|
||||
re := regexp.MustCompile(`a(x*)b`)
|
||||
fmt.Printf("%s\n", re.ReplaceAll([]byte("-ab-axxb-"), []byte("T")))
|
||||
fmt.Printf("%s\n", re.ReplaceAll([]byte("-ab-axxb-"), []byte("$1")))
|
||||
fmt.Printf("%s\n", re.ReplaceAll([]byte("-ab-axxb-"), []byte("$1W")))
|
||||
fmt.Printf("%s\n", re.ReplaceAll([]byte("-ab-axxb-"), []byte("${1}W")))
|
||||
|
||||
re2 := regexp.MustCompile(`a(?P<1W>x*)b`)
|
||||
fmt.Printf("%s\n", re2.ReplaceAll([]byte("-ab-axxb-"), []byte("$1W")))
|
||||
fmt.Printf("%s\n", re2.ReplaceAll([]byte("-ab-axxb-"), []byte("${1}W")))
|
||||
|
||||
// Output:
|
||||
// -T-T-
|
||||
// --xx-
|
||||
// ---
|
||||
// -W-xxW-
|
||||
// --xx-
|
||||
// -W-xxW-
|
||||
}
|
||||
|
||||
func ExampleRegexp_ReplaceAllLiteralString() {
|
||||
re := regexp.MustCompile(`a(x*)b`)
|
||||
fmt.Println(re.ReplaceAllLiteralString("-ab-axxb-", "T"))
|
||||
fmt.Println(re.ReplaceAllLiteralString("-ab-axxb-", "$1"))
|
||||
fmt.Println(re.ReplaceAllLiteralString("-ab-axxb-", "${1}"))
|
||||
// Output:
|
||||
// -T-T-
|
||||
// -$1-$1-
|
||||
// -${1}-${1}-
|
||||
}
|
||||
|
||||
func ExampleRegexp_ReplaceAllString() {
|
||||
re := regexp.MustCompile(`a(x*)b`)
|
||||
fmt.Println(re.ReplaceAllString("-ab-axxb-", "T"))
|
||||
fmt.Println(re.ReplaceAllString("-ab-axxb-", "$1"))
|
||||
fmt.Println(re.ReplaceAllString("-ab-axxb-", "$1W"))
|
||||
fmt.Println(re.ReplaceAllString("-ab-axxb-", "${1}W"))
|
||||
|
||||
re2 := regexp.MustCompile(`a(?P<1W>x*)b`)
|
||||
fmt.Printf("%s\n", re2.ReplaceAllString("-ab-axxb-", "$1W"))
|
||||
fmt.Println(re.ReplaceAllString("-ab-axxb-", "${1}W"))
|
||||
|
||||
// Output:
|
||||
// -T-T-
|
||||
// --xx-
|
||||
// ---
|
||||
// -W-xxW-
|
||||
// --xx-
|
||||
// -W-xxW-
|
||||
}
|
||||
|
||||
func ExampleRegexp_ReplaceAllStringFunc() {
|
||||
re := regexp.MustCompile(`[^aeiou]`)
|
||||
fmt.Println(re.ReplaceAllStringFunc("seafood fool", strings.ToUpper))
|
||||
// Output:
|
||||
// SeaFooD FooL
|
||||
}
|
||||
|
||||
func ExampleRegexp_SubexpNames() {
|
||||
re := regexp.MustCompile(`(?P<first>[a-zA-Z]+) (?P<last>[a-zA-Z]+)`)
|
||||
fmt.Println(re.MatchString("Alan Turing"))
|
||||
fmt.Printf("%q\n", re.SubexpNames())
|
||||
reversed := fmt.Sprintf("${%s} ${%s}", re.SubexpNames()[2], re.SubexpNames()[1])
|
||||
fmt.Println(reversed)
|
||||
fmt.Println(re.ReplaceAllString("Alan Turing", reversed))
|
||||
// Output:
|
||||
// true
|
||||
// ["" "first" "last"]
|
||||
// ${last} ${first}
|
||||
// Turing Alan
|
||||
}
|
||||
|
||||
func ExampleRegexp_SubexpIndex() {
|
||||
re := regexp.MustCompile(`(?P<first>[a-zA-Z]+) (?P<last>[a-zA-Z]+)`)
|
||||
fmt.Println(re.MatchString("Alan Turing"))
|
||||
matches := re.FindStringSubmatch("Alan Turing")
|
||||
lastIndex := re.SubexpIndex("last")
|
||||
fmt.Printf("last => %d\n", lastIndex)
|
||||
fmt.Println(matches[lastIndex])
|
||||
// Output:
|
||||
// true
|
||||
// last => 2
|
||||
// Turing
|
||||
}
|
||||
|
||||
func ExampleRegexp_Split() {
|
||||
a := regexp.MustCompile(`a`)
|
||||
fmt.Println(a.Split("banana", -1))
|
||||
fmt.Println(a.Split("banana", 0))
|
||||
fmt.Println(a.Split("banana", 1))
|
||||
fmt.Println(a.Split("banana", 2))
|
||||
zp := regexp.MustCompile(`z+`)
|
||||
fmt.Println(zp.Split("pizza", -1))
|
||||
fmt.Println(zp.Split("pizza", 0))
|
||||
fmt.Println(zp.Split("pizza", 1))
|
||||
fmt.Println(zp.Split("pizza", 2))
|
||||
// Output:
|
||||
// [b n n ]
|
||||
// []
|
||||
// [banana]
|
||||
// [b nana]
|
||||
// [pi a]
|
||||
// []
|
||||
// [pizza]
|
||||
// [pi a]
|
||||
}
|
||||
|
||||
func ExampleRegexp_Expand() {
|
||||
content := []byte(`
|
||||
# comment line
|
||||
option1: value1
|
||||
option2: value2
|
||||
|
||||
# another comment line
|
||||
option3: value3
|
||||
`)
|
||||
|
||||
// Regex pattern captures "key: value" pair from the content.
|
||||
pattern := regexp.MustCompile(`(?m)(?P<key>\w+):\s+(?P<value>\w+)$`)
|
||||
|
||||
// Template to convert "key: value" to "key=value" by
|
||||
// referencing the values captured by the regex pattern.
|
||||
template := []byte("$key=$value\n")
|
||||
|
||||
result := []byte{}
|
||||
|
||||
// For each match of the regex in the content.
|
||||
for _, submatches := range pattern.FindAllSubmatchIndex(content, -1) {
|
||||
// Apply the captured submatches to the template and append the output
|
||||
// to the result.
|
||||
result = pattern.Expand(result, template, content, submatches)
|
||||
}
|
||||
fmt.Println(string(result))
|
||||
// Output:
|
||||
// option1=value1
|
||||
// option2=value2
|
||||
// option3=value3
|
||||
}
|
||||
|
||||
func ExampleRegexp_ExpandString() {
|
||||
content := `
|
||||
# comment line
|
||||
option1: value1
|
||||
option2: value2
|
||||
|
||||
# another comment line
|
||||
option3: value3
|
||||
`
|
||||
|
||||
// Regex pattern captures "key: value" pair from the content.
|
||||
pattern := regexp.MustCompile(`(?m)(?P<key>\w+):\s+(?P<value>\w+)$`)
|
||||
|
||||
// Template to convert "key: value" to "key=value" by
|
||||
// referencing the values captured by the regex pattern.
|
||||
template := "$key=$value\n"
|
||||
|
||||
result := []byte{}
|
||||
|
||||
// For each match of the regex in the content.
|
||||
for _, submatches := range pattern.FindAllStringSubmatchIndex(content, -1) {
|
||||
// Apply the captured submatches to the template and append the output
|
||||
// to the result.
|
||||
result = pattern.ExpandString(result, template, content, submatches)
|
||||
}
|
||||
fmt.Println(string(result))
|
||||
// Output:
|
||||
// option1=value1
|
||||
// option2=value2
|
||||
// option3=value3
|
||||
}
|
||||
|
||||
func ExampleRegexp_FindIndex() {
|
||||
content := []byte(`
|
||||
# comment line
|
||||
option1: value1
|
||||
option2: value2
|
||||
`)
|
||||
// Regex pattern captures "key: value" pair from the content.
|
||||
pattern := regexp.MustCompile(`(?m)(?P<key>\w+):\s+(?P<value>\w+)$`)
|
||||
|
||||
loc := pattern.FindIndex(content)
|
||||
fmt.Println(loc)
|
||||
fmt.Println(string(content[loc[0]:loc[1]]))
|
||||
// Output:
|
||||
// [18 33]
|
||||
// option1: value1
|
||||
}
|
||||
|
||||
func ExampleRegexp_FindAllSubmatchIndex() {
|
||||
content := []byte(`
|
||||
# comment line
|
||||
option1: value1
|
||||
option2: value2
|
||||
`)
|
||||
// Regex pattern captures "key: value" pair from the content.
|
||||
pattern := regexp.MustCompile(`(?m)(?P<key>\w+):\s+(?P<value>\w+)$`)
|
||||
allIndexes := pattern.FindAllSubmatchIndex(content, -1)
|
||||
for _, loc := range allIndexes {
|
||||
fmt.Println(loc)
|
||||
fmt.Println(string(content[loc[0]:loc[1]]))
|
||||
fmt.Println(string(content[loc[2]:loc[3]]))
|
||||
fmt.Println(string(content[loc[4]:loc[5]]))
|
||||
}
|
||||
// Output:
|
||||
// [18 33 18 25 27 33]
|
||||
// option1: value1
|
||||
// option1
|
||||
// value1
|
||||
// [35 50 35 42 44 50]
|
||||
// option2: value2
|
||||
// option2
|
||||
// value2
|
||||
}
|
||||
|
||||
func ExampleRegexp_FindAllIndex() {
|
||||
content := []byte("London")
|
||||
re := regexp.MustCompile(`o.`)
|
||||
fmt.Println(re.FindAllIndex(content, 1))
|
||||
fmt.Println(re.FindAllIndex(content, -1))
|
||||
// Output:
|
||||
// [[1 3]]
|
||||
// [[1 3] [4 6]]
|
||||
}
|
||||
554
src/regexp/exec.go
Normal file
554
src/regexp/exec.go
Normal file
@@ -0,0 +1,554 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package regexp
|
||||
|
||||
import (
|
||||
"io"
|
||||
"regexp/syntax"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// A queue is a 'sparse array' holding pending threads of execution.
|
||||
// See https://research.swtch.com/2008/03/using-uninitialized-memory-for-fun-and.html
|
||||
type queue struct {
|
||||
sparse []uint32
|
||||
dense []entry
|
||||
}
|
||||
|
||||
// An entry is an entry on a queue.
|
||||
// It holds both the instruction pc and the actual thread.
|
||||
// Some queue entries are just place holders so that the machine
|
||||
// knows it has considered that pc. Such entries have t == nil.
|
||||
type entry struct {
|
||||
pc uint32
|
||||
t *thread
|
||||
}
|
||||
|
||||
// A thread is the state of a single path through the machine:
|
||||
// an instruction and a corresponding capture array.
|
||||
// See https://swtch.com/~rsc/regexp/regexp2.html
|
||||
type thread struct {
|
||||
inst *syntax.Inst
|
||||
cap []int
|
||||
}
|
||||
|
||||
// A machine holds all the state during an NFA simulation for p.
|
||||
type machine struct {
|
||||
re *Regexp // corresponding Regexp
|
||||
p *syntax.Prog // compiled program
|
||||
q0, q1 queue // two queues for runq, nextq
|
||||
pool []*thread // pool of available threads
|
||||
matched bool // whether a match was found
|
||||
matchcap []int // capture information for the match
|
||||
|
||||
inputs inputs
|
||||
}
|
||||
|
||||
type inputs struct {
|
||||
// cached inputs, to avoid allocation
|
||||
bytes inputBytes
|
||||
string inputString
|
||||
reader inputReader
|
||||
}
|
||||
|
||||
func (i *inputs) newBytes(b []byte) input {
|
||||
i.bytes.str = b
|
||||
return &i.bytes
|
||||
}
|
||||
|
||||
func (i *inputs) newString(s string) input {
|
||||
i.string.str = s
|
||||
return &i.string
|
||||
}
|
||||
|
||||
func (i *inputs) newReader(r io.RuneReader) input {
|
||||
i.reader.r = r
|
||||
i.reader.atEOT = false
|
||||
i.reader.pos = 0
|
||||
return &i.reader
|
||||
}
|
||||
|
||||
func (i *inputs) clear() {
|
||||
// We need to clear 1 of these.
|
||||
// Avoid the expense of clearing the others (pointer write barrier).
|
||||
if i.bytes.str != nil {
|
||||
i.bytes.str = nil
|
||||
} else if i.reader.r != nil {
|
||||
i.reader.r = nil
|
||||
} else {
|
||||
i.string.str = ""
|
||||
}
|
||||
}
|
||||
|
||||
func (i *inputs) init(r io.RuneReader, b []byte, s string) (input, int) {
|
||||
if r != nil {
|
||||
return i.newReader(r), 0
|
||||
}
|
||||
if b != nil {
|
||||
return i.newBytes(b), len(b)
|
||||
}
|
||||
return i.newString(s), len(s)
|
||||
}
|
||||
|
||||
func (m *machine) init(ncap int) {
|
||||
for _, t := range m.pool {
|
||||
t.cap = t.cap[:ncap]
|
||||
}
|
||||
m.matchcap = m.matchcap[:ncap]
|
||||
}
|
||||
|
||||
// alloc allocates a new thread with the given instruction.
|
||||
// It uses the free pool if possible.
|
||||
func (m *machine) alloc(i *syntax.Inst) *thread {
|
||||
var t *thread
|
||||
if n := len(m.pool); n > 0 {
|
||||
t = m.pool[n-1]
|
||||
m.pool = m.pool[:n-1]
|
||||
} else {
|
||||
t = new(thread)
|
||||
t.cap = make([]int, len(m.matchcap), cap(m.matchcap))
|
||||
}
|
||||
t.inst = i
|
||||
return t
|
||||
}
|
||||
|
||||
// A lazyFlag is a lazily-evaluated syntax.EmptyOp,
|
||||
// for checking zero-width flags like ^ $ \A \z \B \b.
|
||||
// It records the pair of relevant runes and does not
|
||||
// determine the implied flags until absolutely necessary
|
||||
// (most of the time, that means never).
|
||||
type lazyFlag uint64
|
||||
|
||||
func newLazyFlag(r1, r2 rune) lazyFlag {
|
||||
return lazyFlag(uint64(r1)<<32 | uint64(uint32(r2)))
|
||||
}
|
||||
|
||||
func (f lazyFlag) match(op syntax.EmptyOp) bool {
|
||||
if op == 0 {
|
||||
return true
|
||||
}
|
||||
r1 := rune(f >> 32)
|
||||
if op&syntax.EmptyBeginLine != 0 {
|
||||
if r1 != '\n' && r1 >= 0 {
|
||||
return false
|
||||
}
|
||||
op &^= syntax.EmptyBeginLine
|
||||
}
|
||||
if op&syntax.EmptyBeginText != 0 {
|
||||
if r1 >= 0 {
|
||||
return false
|
||||
}
|
||||
op &^= syntax.EmptyBeginText
|
||||
}
|
||||
if op == 0 {
|
||||
return true
|
||||
}
|
||||
r2 := rune(f)
|
||||
if op&syntax.EmptyEndLine != 0 {
|
||||
if r2 != '\n' && r2 >= 0 {
|
||||
return false
|
||||
}
|
||||
op &^= syntax.EmptyEndLine
|
||||
}
|
||||
if op&syntax.EmptyEndText != 0 {
|
||||
if r2 >= 0 {
|
||||
return false
|
||||
}
|
||||
op &^= syntax.EmptyEndText
|
||||
}
|
||||
if op == 0 {
|
||||
return true
|
||||
}
|
||||
if syntax.IsWordChar(r1) != syntax.IsWordChar(r2) {
|
||||
op &^= syntax.EmptyWordBoundary
|
||||
} else {
|
||||
op &^= syntax.EmptyNoWordBoundary
|
||||
}
|
||||
return op == 0
|
||||
}
|
||||
|
||||
// match runs the machine over the input starting at pos.
|
||||
// It reports whether a match was found.
|
||||
// If so, m.matchcap holds the submatch information.
|
||||
func (m *machine) match(i input, pos int) bool {
|
||||
startCond := m.re.cond
|
||||
if startCond == ^syntax.EmptyOp(0) { // impossible
|
||||
return false
|
||||
}
|
||||
m.matched = false
|
||||
for i := range m.matchcap {
|
||||
m.matchcap[i] = -1
|
||||
}
|
||||
runq, nextq := &m.q0, &m.q1
|
||||
r, r1 := endOfText, endOfText
|
||||
width, width1 := 0, 0
|
||||
r, width = i.step(pos)
|
||||
if r != endOfText {
|
||||
r1, width1 = i.step(pos + width)
|
||||
}
|
||||
var flag lazyFlag
|
||||
if pos == 0 {
|
||||
flag = newLazyFlag(-1, r)
|
||||
} else {
|
||||
flag = i.context(pos)
|
||||
}
|
||||
for {
|
||||
if len(runq.dense) == 0 {
|
||||
if startCond&syntax.EmptyBeginText != 0 && pos != 0 {
|
||||
// Anchored match, past beginning of text.
|
||||
break
|
||||
}
|
||||
if m.matched {
|
||||
// Have match; finished exploring alternatives.
|
||||
break
|
||||
}
|
||||
if len(m.re.prefix) > 0 && r1 != m.re.prefixRune && i.canCheckPrefix() {
|
||||
// Match requires literal prefix; fast search for it.
|
||||
advance := i.index(m.re, pos)
|
||||
if advance < 0 {
|
||||
break
|
||||
}
|
||||
pos += advance
|
||||
r, width = i.step(pos)
|
||||
r1, width1 = i.step(pos + width)
|
||||
}
|
||||
}
|
||||
if !m.matched {
|
||||
if len(m.matchcap) > 0 {
|
||||
m.matchcap[0] = pos
|
||||
}
|
||||
m.add(runq, uint32(m.p.Start), pos, m.matchcap, &flag, nil)
|
||||
}
|
||||
flag = newLazyFlag(r, r1)
|
||||
m.step(runq, nextq, pos, pos+width, r, &flag)
|
||||
if width == 0 {
|
||||
break
|
||||
}
|
||||
if len(m.matchcap) == 0 && m.matched {
|
||||
// Found a match and not paying attention
|
||||
// to where it is, so any match will do.
|
||||
break
|
||||
}
|
||||
pos += width
|
||||
r, width = r1, width1
|
||||
if r != endOfText {
|
||||
r1, width1 = i.step(pos + width)
|
||||
}
|
||||
runq, nextq = nextq, runq
|
||||
}
|
||||
m.clear(nextq)
|
||||
return m.matched
|
||||
}
|
||||
|
||||
// clear frees all threads on the thread queue.
|
||||
func (m *machine) clear(q *queue) {
|
||||
for _, d := range q.dense {
|
||||
if d.t != nil {
|
||||
m.pool = append(m.pool, d.t)
|
||||
}
|
||||
}
|
||||
q.dense = q.dense[:0]
|
||||
}
|
||||
|
||||
// step executes one step of the machine, running each of the threads
|
||||
// on runq and appending new threads to nextq.
|
||||
// The step processes the rune c (which may be endOfText),
|
||||
// which starts at position pos and ends at nextPos.
|
||||
// nextCond gives the setting for the empty-width flags after c.
|
||||
func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond *lazyFlag) {
|
||||
longest := m.re.longest
|
||||
for j := 0; j < len(runq.dense); j++ {
|
||||
d := &runq.dense[j]
|
||||
t := d.t
|
||||
if t == nil {
|
||||
continue
|
||||
}
|
||||
if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] {
|
||||
m.pool = append(m.pool, t)
|
||||
continue
|
||||
}
|
||||
i := t.inst
|
||||
add := false
|
||||
switch i.Op {
|
||||
default:
|
||||
panic("bad inst")
|
||||
|
||||
case syntax.InstMatch:
|
||||
if len(t.cap) > 0 && (!longest || !m.matched || m.matchcap[1] < pos) {
|
||||
t.cap[1] = pos
|
||||
copy(m.matchcap, t.cap)
|
||||
}
|
||||
if !longest {
|
||||
// First-match mode: cut off all lower-priority threads.
|
||||
for _, d := range runq.dense[j+1:] {
|
||||
if d.t != nil {
|
||||
m.pool = append(m.pool, d.t)
|
||||
}
|
||||
}
|
||||
runq.dense = runq.dense[:0]
|
||||
}
|
||||
m.matched = true
|
||||
|
||||
case syntax.InstRune:
|
||||
add = i.MatchRune(c)
|
||||
case syntax.InstRune1:
|
||||
add = c == i.Rune[0]
|
||||
case syntax.InstRuneAny:
|
||||
add = true
|
||||
case syntax.InstRuneAnyNotNL:
|
||||
add = c != '\n'
|
||||
}
|
||||
if add {
|
||||
t = m.add(nextq, i.Out, nextPos, t.cap, nextCond, t)
|
||||
}
|
||||
if t != nil {
|
||||
m.pool = append(m.pool, t)
|
||||
}
|
||||
}
|
||||
runq.dense = runq.dense[:0]
|
||||
}
|
||||
|
||||
// add adds an entry to q for pc, unless the q already has such an entry.
|
||||
// It also recursively adds an entry for all instructions reachable from pc by following
|
||||
// empty-width conditions satisfied by cond. pos gives the current position
|
||||
// in the input.
|
||||
func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond *lazyFlag, t *thread) *thread {
|
||||
Again:
|
||||
if pc == 0 {
|
||||
return t
|
||||
}
|
||||
if j := q.sparse[pc]; j < uint32(len(q.dense)) && q.dense[j].pc == pc {
|
||||
return t
|
||||
}
|
||||
|
||||
j := len(q.dense)
|
||||
q.dense = q.dense[:j+1]
|
||||
d := &q.dense[j]
|
||||
d.t = nil
|
||||
d.pc = pc
|
||||
q.sparse[pc] = uint32(j)
|
||||
|
||||
i := &m.p.Inst[pc]
|
||||
switch i.Op {
|
||||
default:
|
||||
panic("unhandled")
|
||||
case syntax.InstFail:
|
||||
// nothing
|
||||
case syntax.InstAlt, syntax.InstAltMatch:
|
||||
t = m.add(q, i.Out, pos, cap, cond, t)
|
||||
pc = i.Arg
|
||||
goto Again
|
||||
case syntax.InstEmptyWidth:
|
||||
if cond.match(syntax.EmptyOp(i.Arg)) {
|
||||
pc = i.Out
|
||||
goto Again
|
||||
}
|
||||
case syntax.InstNop:
|
||||
pc = i.Out
|
||||
goto Again
|
||||
case syntax.InstCapture:
|
||||
if int(i.Arg) < len(cap) {
|
||||
opos := cap[i.Arg]
|
||||
cap[i.Arg] = pos
|
||||
m.add(q, i.Out, pos, cap, cond, nil)
|
||||
cap[i.Arg] = opos
|
||||
} else {
|
||||
pc = i.Out
|
||||
goto Again
|
||||
}
|
||||
case syntax.InstMatch, syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
|
||||
if t == nil {
|
||||
t = m.alloc(i)
|
||||
} else {
|
||||
t.inst = i
|
||||
}
|
||||
if len(cap) > 0 && &t.cap[0] != &cap[0] {
|
||||
copy(t.cap, cap)
|
||||
}
|
||||
d.t = t
|
||||
t = nil
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
type onePassMachine struct {
|
||||
inputs inputs
|
||||
matchcap []int
|
||||
}
|
||||
|
||||
var onePassPool sync.Pool
|
||||
|
||||
func newOnePassMachine() *onePassMachine {
|
||||
m, ok := onePassPool.Get().(*onePassMachine)
|
||||
if !ok {
|
||||
m = new(onePassMachine)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func freeOnePassMachine(m *onePassMachine) {
|
||||
m.inputs.clear()
|
||||
onePassPool.Put(m)
|
||||
}
|
||||
|
||||
// doOnePass implements r.doExecute using the one-pass execution engine.
|
||||
func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap int, dstCap []int) []int {
|
||||
startCond := re.cond
|
||||
if startCond == ^syntax.EmptyOp(0) { // impossible
|
||||
return nil
|
||||
}
|
||||
|
||||
m := newOnePassMachine()
|
||||
if cap(m.matchcap) < ncap {
|
||||
m.matchcap = make([]int, ncap)
|
||||
} else {
|
||||
m.matchcap = m.matchcap[:ncap]
|
||||
}
|
||||
|
||||
matched := false
|
||||
for i := range m.matchcap {
|
||||
m.matchcap[i] = -1
|
||||
}
|
||||
|
||||
i, _ := m.inputs.init(ir, ib, is)
|
||||
|
||||
r, r1 := endOfText, endOfText
|
||||
width, width1 := 0, 0
|
||||
r, width = i.step(pos)
|
||||
if r != endOfText {
|
||||
r1, width1 = i.step(pos + width)
|
||||
}
|
||||
var flag lazyFlag
|
||||
if pos == 0 {
|
||||
flag = newLazyFlag(-1, r)
|
||||
} else {
|
||||
flag = i.context(pos)
|
||||
}
|
||||
pc := re.onepass.Start
|
||||
inst := &re.onepass.Inst[pc]
|
||||
// If there is a simple literal prefix, skip over it.
|
||||
if pos == 0 && flag.match(syntax.EmptyOp(inst.Arg)) &&
|
||||
len(re.prefix) > 0 && i.canCheckPrefix() {
|
||||
// Match requires literal prefix; fast search for it.
|
||||
if !i.hasPrefix(re) {
|
||||
goto Return
|
||||
}
|
||||
pos += len(re.prefix)
|
||||
r, width = i.step(pos)
|
||||
r1, width1 = i.step(pos + width)
|
||||
flag = i.context(pos)
|
||||
pc = int(re.prefixEnd)
|
||||
}
|
||||
for {
|
||||
inst = &re.onepass.Inst[pc]
|
||||
pc = int(inst.Out)
|
||||
switch inst.Op {
|
||||
default:
|
||||
panic("bad inst")
|
||||
case syntax.InstMatch:
|
||||
matched = true
|
||||
if len(m.matchcap) > 0 {
|
||||
m.matchcap[0] = 0
|
||||
m.matchcap[1] = pos
|
||||
}
|
||||
goto Return
|
||||
case syntax.InstRune:
|
||||
if !inst.MatchRune(r) {
|
||||
goto Return
|
||||
}
|
||||
case syntax.InstRune1:
|
||||
if r != inst.Rune[0] {
|
||||
goto Return
|
||||
}
|
||||
case syntax.InstRuneAny:
|
||||
// Nothing
|
||||
case syntax.InstRuneAnyNotNL:
|
||||
if r == '\n' {
|
||||
goto Return
|
||||
}
|
||||
// peek at the input rune to see which branch of the Alt to take
|
||||
case syntax.InstAlt, syntax.InstAltMatch:
|
||||
pc = int(onePassNext(inst, r))
|
||||
continue
|
||||
case syntax.InstFail:
|
||||
goto Return
|
||||
case syntax.InstNop:
|
||||
continue
|
||||
case syntax.InstEmptyWidth:
|
||||
if !flag.match(syntax.EmptyOp(inst.Arg)) {
|
||||
goto Return
|
||||
}
|
||||
continue
|
||||
case syntax.InstCapture:
|
||||
if int(inst.Arg) < len(m.matchcap) {
|
||||
m.matchcap[inst.Arg] = pos
|
||||
}
|
||||
continue
|
||||
}
|
||||
if width == 0 {
|
||||
break
|
||||
}
|
||||
flag = newLazyFlag(r, r1)
|
||||
pos += width
|
||||
r, width = r1, width1
|
||||
if r != endOfText {
|
||||
r1, width1 = i.step(pos + width)
|
||||
}
|
||||
}
|
||||
|
||||
Return:
|
||||
if !matched {
|
||||
freeOnePassMachine(m)
|
||||
return nil
|
||||
}
|
||||
|
||||
dstCap = append(dstCap, m.matchcap...)
|
||||
freeOnePassMachine(m)
|
||||
return dstCap
|
||||
}
|
||||
|
||||
// doMatch reports whether either r, b or s match the regexp.
|
||||
func (re *Regexp) doMatch(r io.RuneReader, b []byte, s string) bool {
|
||||
return re.doExecute(r, b, s, 0, 0, nil) != nil
|
||||
}
|
||||
|
||||
// doExecute finds the leftmost match in the input, appends the position
|
||||
// of its subexpressions to dstCap and returns dstCap.
|
||||
//
|
||||
// nil is returned if no matches are found and non-nil if matches are found.
|
||||
func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int, dstCap []int) []int {
|
||||
if dstCap == nil {
|
||||
// Make sure 'return dstCap' is non-nil.
|
||||
dstCap = arrayNoInts[:0:0]
|
||||
}
|
||||
|
||||
if r == nil && len(b)+len(s) < re.minInputLen {
|
||||
return nil
|
||||
}
|
||||
|
||||
if re.onepass != nil {
|
||||
return re.doOnePass(r, b, s, pos, ncap, dstCap)
|
||||
}
|
||||
if r == nil && len(b)+len(s) < re.maxBitStateLen {
|
||||
return re.backtrack(b, s, pos, ncap, dstCap)
|
||||
}
|
||||
|
||||
m := re.get()
|
||||
i, _ := m.inputs.init(r, b, s)
|
||||
|
||||
m.init(ncap)
|
||||
if !m.match(i, pos) {
|
||||
re.put(m)
|
||||
return nil
|
||||
}
|
||||
|
||||
dstCap = append(dstCap, m.matchcap...)
|
||||
re.put(m)
|
||||
return dstCap
|
||||
}
|
||||
|
||||
// arrayNoInts is returned by doExecute match if nil dstCap is passed
|
||||
// to it with ncap=0.
|
||||
var arrayNoInts [0]int
|
||||
20
src/regexp/exec2_test.go
Normal file
20
src/regexp/exec2_test.go
Normal file
@@ -0,0 +1,20 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !race
|
||||
|
||||
package regexp
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
// This test is excluded when running under the race detector because
|
||||
// it is a very expensive test and takes too long.
|
||||
func TestRE2Exhaustive(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping TestRE2Exhaustive during short test")
|
||||
}
|
||||
testRE2(t, "testdata/re2-exhaustive.txt.bz2")
|
||||
}
|
||||
736
src/regexp/exec_test.go
Normal file
736
src/regexp/exec_test.go
Normal file
@@ -0,0 +1,736 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package regexp
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"compress/bzip2"
|
||||
"fmt"
|
||||
"internal/testenv"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp/syntax"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// TestRE2 tests this package's regexp API against test cases
|
||||
// considered during RE2's exhaustive tests, which run all possible
|
||||
// regexps over a given set of atoms and operators, up to a given
|
||||
// complexity, over all possible strings over a given alphabet,
|
||||
// up to a given size. Rather than try to link with RE2, we read a
|
||||
// log file containing the test cases and the expected matches.
|
||||
// The log file, re2-exhaustive.txt, is generated by running 'make log'
|
||||
// in the open source RE2 distribution https://github.com/google/re2/.
|
||||
//
|
||||
// The test file format is a sequence of stanzas like:
|
||||
//
|
||||
// strings
|
||||
// "abc"
|
||||
// "123x"
|
||||
// regexps
|
||||
// "[a-z]+"
|
||||
// 0-3;0-3
|
||||
// -;-
|
||||
// "([0-9])([0-9])([0-9])"
|
||||
// -;-
|
||||
// -;0-3 0-1 1-2 2-3
|
||||
//
|
||||
// The stanza begins by defining a set of strings, quoted
|
||||
// using Go double-quote syntax, one per line. Then the
|
||||
// regexps section gives a sequence of regexps to run on
|
||||
// the strings. In the block that follows a regexp, each line
|
||||
// gives the semicolon-separated match results of running
|
||||
// the regexp on the corresponding string.
|
||||
// Each match result is either a single -, meaning no match, or a
|
||||
// space-separated sequence of pairs giving the match and
|
||||
// submatch indices. An unmatched subexpression formats
|
||||
// its pair as a single - (not illustrated above). For now
|
||||
// each regexp run produces two match results, one for a
|
||||
// “full match” that restricts the regexp to matching the entire
|
||||
// string or nothing, and one for a “partial match” that gives
|
||||
// the leftmost first match found in the string.
|
||||
//
|
||||
// Lines beginning with # are comments. Lines beginning with
|
||||
// a capital letter are test names printed during RE2's test suite
|
||||
// and are echoed into t but otherwise ignored.
|
||||
//
|
||||
// At time of writing, re2-exhaustive.txt is 59 MB but compresses to 385 kB,
|
||||
// so we store re2-exhaustive.txt.bz2 in the repository and decompress it on the fly.
|
||||
func TestRE2Search(t *testing.T) {
|
||||
testRE2(t, "testdata/re2-search.txt")
|
||||
}
|
||||
|
||||
func testRE2(t *testing.T, file string) {
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
var txt io.Reader
|
||||
if strings.HasSuffix(file, ".bz2") {
|
||||
z := bzip2.NewReader(f)
|
||||
txt = z
|
||||
file = file[:len(file)-len(".bz2")] // for error messages
|
||||
} else {
|
||||
txt = f
|
||||
}
|
||||
lineno := 0
|
||||
scanner := bufio.NewScanner(txt)
|
||||
var (
|
||||
str []string
|
||||
input []string
|
||||
inStrings bool
|
||||
re *Regexp
|
||||
refull *Regexp
|
||||
nfail int
|
||||
ncase int
|
||||
)
|
||||
for lineno := 1; scanner.Scan(); lineno++ {
|
||||
line := scanner.Text()
|
||||
switch {
|
||||
case line == "":
|
||||
t.Fatalf("%s:%d: unexpected blank line", file, lineno)
|
||||
case line[0] == '#':
|
||||
continue
|
||||
case 'A' <= line[0] && line[0] <= 'Z':
|
||||
// Test name.
|
||||
t.Logf("%s\n", line)
|
||||
continue
|
||||
case line == "strings":
|
||||
str = str[:0]
|
||||
inStrings = true
|
||||
case line == "regexps":
|
||||
inStrings = false
|
||||
case line[0] == '"':
|
||||
q, err := strconv.Unquote(line)
|
||||
if err != nil {
|
||||
// Fatal because we'll get out of sync.
|
||||
t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
|
||||
}
|
||||
if inStrings {
|
||||
str = append(str, q)
|
||||
continue
|
||||
}
|
||||
// Is a regexp.
|
||||
if len(input) != 0 {
|
||||
t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
|
||||
}
|
||||
re, err = tryCompile(q)
|
||||
if err != nil {
|
||||
if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
|
||||
// We don't and likely never will support \C; keep going.
|
||||
continue
|
||||
}
|
||||
t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
|
||||
if nfail++; nfail >= 100 {
|
||||
t.Fatalf("stopping after %d errors", nfail)
|
||||
}
|
||||
continue
|
||||
}
|
||||
full := `\A(?:` + q + `)\z`
|
||||
refull, err = tryCompile(full)
|
||||
if err != nil {
|
||||
// Fatal because q worked, so this should always work.
|
||||
t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
|
||||
}
|
||||
input = str
|
||||
case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
|
||||
// A sequence of match results.
|
||||
ncase++
|
||||
if re == nil {
|
||||
// Failed to compile: skip results.
|
||||
continue
|
||||
}
|
||||
if len(input) == 0 {
|
||||
t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
|
||||
}
|
||||
var text string
|
||||
text, input = input[0], input[1:]
|
||||
if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
|
||||
// RE2's \B considers every byte position,
|
||||
// so it sees 'not word boundary' in the
|
||||
// middle of UTF-8 sequences. This package
|
||||
// only considers the positions between runes,
|
||||
// so it disagrees. Skip those cases.
|
||||
continue
|
||||
}
|
||||
res := strings.Split(line, ";")
|
||||
if len(res) != len(run) {
|
||||
t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
|
||||
}
|
||||
for i := range res {
|
||||
have, suffix := run[i](re, refull, text)
|
||||
want := parseResult(t, file, lineno, res[i])
|
||||
if !slices.Equal(have, want) {
|
||||
t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
|
||||
if nfail++; nfail >= 100 {
|
||||
t.Fatalf("stopping after %d errors", nfail)
|
||||
}
|
||||
continue
|
||||
}
|
||||
b, suffix := match[i](re, refull, text)
|
||||
if b != (want != nil) {
|
||||
t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
|
||||
if nfail++; nfail >= 100 {
|
||||
t.Fatalf("stopping after %d errors", nfail)
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
t.Fatalf("%s:%d: %v", file, lineno, err)
|
||||
}
|
||||
if len(input) != 0 {
|
||||
t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
|
||||
}
|
||||
t.Logf("%d cases tested", ncase)
|
||||
}
|
||||
|
||||
var run = []func(*Regexp, *Regexp, string) ([]int, string){
|
||||
runFull,
|
||||
runPartial,
|
||||
runFullLongest,
|
||||
runPartialLongest,
|
||||
}
|
||||
|
||||
func runFull(re, refull *Regexp, text string) ([]int, string) {
|
||||
refull.longest = false
|
||||
return refull.FindStringSubmatchIndex(text), "[full]"
|
||||
}
|
||||
|
||||
func runPartial(re, refull *Regexp, text string) ([]int, string) {
|
||||
re.longest = false
|
||||
return re.FindStringSubmatchIndex(text), ""
|
||||
}
|
||||
|
||||
func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
|
||||
refull.longest = true
|
||||
return refull.FindStringSubmatchIndex(text), "[full,longest]"
|
||||
}
|
||||
|
||||
func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
|
||||
re.longest = true
|
||||
return re.FindStringSubmatchIndex(text), "[longest]"
|
||||
}
|
||||
|
||||
var match = []func(*Regexp, *Regexp, string) (bool, string){
|
||||
matchFull,
|
||||
matchPartial,
|
||||
matchFullLongest,
|
||||
matchPartialLongest,
|
||||
}
|
||||
|
||||
func matchFull(re, refull *Regexp, text string) (bool, string) {
|
||||
refull.longest = false
|
||||
return refull.MatchString(text), "[full]"
|
||||
}
|
||||
|
||||
func matchPartial(re, refull *Regexp, text string) (bool, string) {
|
||||
re.longest = false
|
||||
return re.MatchString(text), ""
|
||||
}
|
||||
|
||||
func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
|
||||
refull.longest = true
|
||||
return refull.MatchString(text), "[full,longest]"
|
||||
}
|
||||
|
||||
func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
|
||||
re.longest = true
|
||||
return re.MatchString(text), "[longest]"
|
||||
}
|
||||
|
||||
func isSingleBytes(s string) bool {
|
||||
for _, c := range s {
|
||||
if c >= utf8.RuneSelf {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func tryCompile(s string) (re *Regexp, err error) {
|
||||
// Protect against panic during Compile.
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
err = fmt.Errorf("panic: %v", r)
|
||||
}
|
||||
}()
|
||||
return Compile(s)
|
||||
}
|
||||
|
||||
func parseResult(t *testing.T, file string, lineno int, res string) []int {
|
||||
// A single - indicates no match.
|
||||
if res == "-" {
|
||||
return nil
|
||||
}
|
||||
// Otherwise, a space-separated list of pairs.
|
||||
n := 1
|
||||
for j := 0; j < len(res); j++ {
|
||||
if res[j] == ' ' {
|
||||
n++
|
||||
}
|
||||
}
|
||||
out := make([]int, 2*n)
|
||||
i := 0
|
||||
n = 0
|
||||
for j := 0; j <= len(res); j++ {
|
||||
if j == len(res) || res[j] == ' ' {
|
||||
// Process a single pair. - means no submatch.
|
||||
pair := res[i:j]
|
||||
if pair == "-" {
|
||||
out[n] = -1
|
||||
out[n+1] = -1
|
||||
} else {
|
||||
loStr, hiStr, _ := strings.Cut(pair, "-")
|
||||
lo, err1 := strconv.Atoi(loStr)
|
||||
hi, err2 := strconv.Atoi(hiStr)
|
||||
if err1 != nil || err2 != nil || lo > hi {
|
||||
t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
|
||||
}
|
||||
out[n] = lo
|
||||
out[n+1] = hi
|
||||
}
|
||||
n += 2
|
||||
i = j + 1
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// TestFowler runs this package's regexp API against the
|
||||
// POSIX regular expression tests collected by Glenn Fowler
|
||||
// at http://www2.research.att.com/~astopen/testregex/testregex.html.
|
||||
func TestFowler(t *testing.T) {
|
||||
files, err := filepath.Glob("testdata/*.dat")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, file := range files {
|
||||
t.Log(file)
|
||||
testFowler(t, file)
|
||||
}
|
||||
}
|
||||
|
||||
var notab = MustCompilePOSIX(`[^\t]+`)
|
||||
|
||||
func testFowler(t *testing.T, file string) {
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
b := bufio.NewReader(f)
|
||||
lineno := 0
|
||||
lastRegexp := ""
|
||||
Reading:
|
||||
for {
|
||||
lineno++
|
||||
line, err := b.ReadString('\n')
|
||||
if err != nil {
|
||||
if err != io.EOF {
|
||||
t.Errorf("%s:%d: %v", file, lineno, err)
|
||||
}
|
||||
break Reading
|
||||
}
|
||||
|
||||
// http://www2.research.att.com/~astopen/man/man1/testregex.html
|
||||
//
|
||||
// INPUT FORMAT
|
||||
// Input lines may be blank, a comment beginning with #, or a test
|
||||
// specification. A specification is five fields separated by one
|
||||
// or more tabs. NULL denotes the empty string and NIL denotes the
|
||||
// 0 pointer.
|
||||
if line[0] == '#' || line[0] == '\n' {
|
||||
continue Reading
|
||||
}
|
||||
line = line[:len(line)-1]
|
||||
field := notab.FindAllString(line, -1)
|
||||
for i, f := range field {
|
||||
if f == "NULL" {
|
||||
field[i] = ""
|
||||
}
|
||||
if f == "NIL" {
|
||||
t.Logf("%s:%d: skip: %s", file, lineno, line)
|
||||
continue Reading
|
||||
}
|
||||
}
|
||||
if len(field) == 0 {
|
||||
continue Reading
|
||||
}
|
||||
|
||||
// Field 1: the regex(3) flags to apply, one character per REG_feature
|
||||
// flag. The test is skipped if REG_feature is not supported by the
|
||||
// implementation. If the first character is not [BEASKLP] then the
|
||||
// specification is a global control line. One or more of [BEASKLP] may be
|
||||
// specified; the test will be repeated for each mode.
|
||||
//
|
||||
// B basic BRE (grep, ed, sed)
|
||||
// E REG_EXTENDED ERE (egrep)
|
||||
// A REG_AUGMENTED ARE (egrep with negation)
|
||||
// S REG_SHELL SRE (sh glob)
|
||||
// K REG_SHELL|REG_AUGMENTED KRE (ksh glob)
|
||||
// L REG_LITERAL LRE (fgrep)
|
||||
//
|
||||
// a REG_LEFT|REG_RIGHT implicit ^...$
|
||||
// b REG_NOTBOL lhs does not match ^
|
||||
// c REG_COMMENT ignore space and #...\n
|
||||
// d REG_SHELL_DOT explicit leading . match
|
||||
// e REG_NOTEOL rhs does not match $
|
||||
// f REG_MULTIPLE multiple \n separated patterns
|
||||
// g FNM_LEADING_DIR testfnmatch only -- match until /
|
||||
// h REG_MULTIREF multiple digit backref
|
||||
// i REG_ICASE ignore case
|
||||
// j REG_SPAN . matches \n
|
||||
// k REG_ESCAPE \ to escape [...] delimiter
|
||||
// l REG_LEFT implicit ^...
|
||||
// m REG_MINIMAL minimal match
|
||||
// n REG_NEWLINE explicit \n match
|
||||
// o REG_ENCLOSED (|&) magic inside [@|&](...)
|
||||
// p REG_SHELL_PATH explicit / match
|
||||
// q REG_DELIMITED delimited pattern
|
||||
// r REG_RIGHT implicit ...$
|
||||
// s REG_SHELL_ESCAPED \ not special
|
||||
// t REG_MUSTDELIM all delimiters must be specified
|
||||
// u standard unspecified behavior -- errors not counted
|
||||
// v REG_CLASS_ESCAPE \ special inside [...]
|
||||
// w REG_NOSUB no subexpression match array
|
||||
// x REG_LENIENT let some errors slide
|
||||
// y REG_LEFT regexec() implicit ^...
|
||||
// z REG_NULL NULL subexpressions ok
|
||||
// $ expand C \c escapes in fields 2 and 3
|
||||
// / field 2 is a regsubcomp() expression
|
||||
// = field 3 is a regdecomp() expression
|
||||
//
|
||||
// Field 1 control lines:
|
||||
//
|
||||
// C set LC_COLLATE and LC_CTYPE to locale in field 2
|
||||
//
|
||||
// ?test ... output field 5 if passed and != EXPECTED, silent otherwise
|
||||
// &test ... output field 5 if current and previous passed
|
||||
// |test ... output field 5 if current passed and previous failed
|
||||
// ; ... output field 2 if previous failed
|
||||
// {test ... skip if failed until }
|
||||
// } end of skip
|
||||
//
|
||||
// : comment comment copied as output NOTE
|
||||
// :comment:test :comment: ignored
|
||||
// N[OTE] comment comment copied as output NOTE
|
||||
// T[EST] comment comment
|
||||
//
|
||||
// number use number for nmatch (20 by default)
|
||||
flag := field[0]
|
||||
switch flag[0] {
|
||||
case '?', '&', '|', ';', '{', '}':
|
||||
// Ignore all the control operators.
|
||||
// Just run everything.
|
||||
flag = flag[1:]
|
||||
if flag == "" {
|
||||
continue Reading
|
||||
}
|
||||
case ':':
|
||||
var ok bool
|
||||
if _, flag, ok = strings.Cut(flag[1:], ":"); !ok {
|
||||
t.Logf("skip: %s", line)
|
||||
continue Reading
|
||||
}
|
||||
case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||||
t.Logf("skip: %s", line)
|
||||
continue Reading
|
||||
}
|
||||
|
||||
// Can check field count now that we've handled the myriad comment formats.
|
||||
if len(field) < 4 {
|
||||
t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
|
||||
continue Reading
|
||||
}
|
||||
|
||||
// Expand C escapes (a.k.a. Go escapes).
|
||||
if strings.Contains(flag, "$") {
|
||||
f := `"` + field[1] + `"`
|
||||
if field[1], err = strconv.Unquote(f); err != nil {
|
||||
t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
|
||||
}
|
||||
f = `"` + field[2] + `"`
|
||||
if field[2], err = strconv.Unquote(f); err != nil {
|
||||
t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
|
||||
}
|
||||
}
|
||||
|
||||
// Field 2: the regular expression pattern; SAME uses the pattern from
|
||||
// the previous specification.
|
||||
//
|
||||
if field[1] == "SAME" {
|
||||
field[1] = lastRegexp
|
||||
}
|
||||
lastRegexp = field[1]
|
||||
|
||||
// Field 3: the string to match.
|
||||
text := field[2]
|
||||
|
||||
// Field 4: the test outcome...
|
||||
ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
|
||||
if !ok {
|
||||
t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
|
||||
continue Reading
|
||||
}
|
||||
|
||||
// Field 5: optional comment appended to the report.
|
||||
|
||||
Testing:
|
||||
// Run test once for each specified capital letter mode that we support.
|
||||
for _, c := range flag {
|
||||
pattern := field[1]
|
||||
syn := syntax.POSIX | syntax.ClassNL
|
||||
switch c {
|
||||
default:
|
||||
continue Testing
|
||||
case 'E':
|
||||
// extended regexp (what we support)
|
||||
case 'L':
|
||||
// literal
|
||||
pattern = QuoteMeta(pattern)
|
||||
}
|
||||
|
||||
for _, c := range flag {
|
||||
switch c {
|
||||
case 'i':
|
||||
syn |= syntax.FoldCase
|
||||
}
|
||||
}
|
||||
|
||||
re, err := compile(pattern, syn, true)
|
||||
if err != nil {
|
||||
if shouldCompile {
|
||||
t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
|
||||
}
|
||||
continue Testing
|
||||
}
|
||||
if !shouldCompile {
|
||||
t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
|
||||
continue Testing
|
||||
}
|
||||
match := re.MatchString(text)
|
||||
if match != shouldMatch {
|
||||
t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
|
||||
continue Testing
|
||||
}
|
||||
have := re.FindStringSubmatchIndex(text)
|
||||
if (len(have) > 0) != match {
|
||||
t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
|
||||
continue Testing
|
||||
}
|
||||
if len(have) > len(pos) {
|
||||
have = have[:len(pos)]
|
||||
}
|
||||
if !slices.Equal(have, pos) {
|
||||
t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
|
||||
// Field 4: the test outcome. This is either one of the posix error
|
||||
// codes (with REG_ omitted) or the match array, a list of (m,n)
|
||||
// entries with m and n being first and last+1 positions in the
|
||||
// field 3 string, or NULL if REG_NOSUB is in effect and success
|
||||
// is expected. BADPAT is acceptable in place of any regcomp(3)
|
||||
// error code. The match[] array is initialized to (-2,-2) before
|
||||
// each test. All array elements from 0 to nmatch-1 must be specified
|
||||
// in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
|
||||
// Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
|
||||
// matched (?{...}) expression, where x is the text enclosed by {...},
|
||||
// o is the expression ordinal counting from 1, and n is the length of
|
||||
// the unmatched portion of the subject string. If x starts with a
|
||||
// number then that is the return value of re_execf(), otherwise 0 is
|
||||
// returned.
|
||||
switch {
|
||||
case s == "":
|
||||
// Match with no position information.
|
||||
ok = true
|
||||
compiled = true
|
||||
matched = true
|
||||
return
|
||||
case s == "NOMATCH":
|
||||
// Match failure.
|
||||
ok = true
|
||||
compiled = true
|
||||
matched = false
|
||||
return
|
||||
case 'A' <= s[0] && s[0] <= 'Z':
|
||||
// All the other error codes are compile errors.
|
||||
ok = true
|
||||
compiled = false
|
||||
return
|
||||
}
|
||||
compiled = true
|
||||
|
||||
var x []int
|
||||
for s != "" {
|
||||
var end byte = ')'
|
||||
if len(x)%2 == 0 {
|
||||
if s[0] != '(' {
|
||||
ok = false
|
||||
return
|
||||
}
|
||||
s = s[1:]
|
||||
end = ','
|
||||
}
|
||||
i := 0
|
||||
for i < len(s) && s[i] != end {
|
||||
i++
|
||||
}
|
||||
if i == 0 || i == len(s) {
|
||||
ok = false
|
||||
return
|
||||
}
|
||||
var v = -1
|
||||
var err error
|
||||
if s[:i] != "?" {
|
||||
v, err = strconv.Atoi(s[:i])
|
||||
if err != nil {
|
||||
ok = false
|
||||
return
|
||||
}
|
||||
}
|
||||
x = append(x, v)
|
||||
s = s[i+1:]
|
||||
}
|
||||
if len(x)%2 != 0 {
|
||||
ok = false
|
||||
return
|
||||
}
|
||||
ok = true
|
||||
matched = true
|
||||
pos = x
|
||||
return
|
||||
}
|
||||
|
||||
var text []byte
|
||||
|
||||
func makeText(n int) []byte {
|
||||
if len(text) >= n {
|
||||
return text[:n]
|
||||
}
|
||||
text = make([]byte, n)
|
||||
x := ^uint32(0)
|
||||
for i := range text {
|
||||
x += x
|
||||
x ^= 1
|
||||
if int32(x) < 0 {
|
||||
x ^= 0x88888eef
|
||||
}
|
||||
if x%31 == 0 {
|
||||
text[i] = '\n'
|
||||
} else {
|
||||
text[i] = byte(x%(0x7E+1-0x20) + 0x20)
|
||||
}
|
||||
}
|
||||
return text
|
||||
}
|
||||
|
||||
func BenchmarkMatch(b *testing.B) {
|
||||
isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
|
||||
|
||||
for _, data := range benchData {
|
||||
r := MustCompile(data.re)
|
||||
for _, size := range benchSizes {
|
||||
if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
|
||||
continue
|
||||
}
|
||||
t := makeText(size.n)
|
||||
b.Run(data.name+"/"+size.name, func(b *testing.B) {
|
||||
b.SetBytes(int64(size.n))
|
||||
for i := 0; i < b.N; i++ {
|
||||
if r.Match(t) {
|
||||
b.Fatal("match!")
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMatch_onepass_regex(b *testing.B) {
|
||||
isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
|
||||
r := MustCompile(`(?s)\A.*\z`)
|
||||
if r.onepass == nil {
|
||||
b.Fatalf("want onepass regex, but %q is not onepass", r)
|
||||
}
|
||||
for _, size := range benchSizes {
|
||||
if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
|
||||
continue
|
||||
}
|
||||
t := makeText(size.n)
|
||||
b.Run(size.name, func(b *testing.B) {
|
||||
b.SetBytes(int64(size.n))
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if !r.Match(t) {
|
||||
b.Fatal("not match!")
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
var benchData = []struct{ name, re string }{
|
||||
{"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
|
||||
{"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
|
||||
{"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"},
|
||||
{"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
|
||||
{"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
|
||||
{"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"},
|
||||
}
|
||||
|
||||
var benchSizes = []struct {
|
||||
name string
|
||||
n int
|
||||
}{
|
||||
{"16", 16},
|
||||
{"32", 32},
|
||||
{"1K", 1 << 10},
|
||||
{"32K", 32 << 10},
|
||||
{"1M", 1 << 20},
|
||||
{"32M", 32 << 20},
|
||||
}
|
||||
|
||||
func TestLongest(t *testing.T) {
|
||||
re, err := Compile(`a(|b)`)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if g, w := re.FindString("ab"), "a"; g != w {
|
||||
t.Errorf("first match was %q, want %q", g, w)
|
||||
}
|
||||
re.Longest()
|
||||
if g, w := re.FindString("ab"), "ab"; g != w {
|
||||
t.Errorf("longest match was %q, want %q", g, w)
|
||||
}
|
||||
}
|
||||
|
||||
// TestProgramTooLongForBacktrack tests that a regex which is too long
|
||||
// for the backtracker still executes properly.
|
||||
func TestProgramTooLongForBacktrack(t *testing.T) {
|
||||
longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`)
|
||||
if !longRegex.MatchString("two") {
|
||||
t.Errorf("longRegex.MatchString(\"two\") was false, want true")
|
||||
}
|
||||
if longRegex.MatchString("xxx") {
|
||||
t.Errorf("longRegex.MatchString(\"xxx\") was true, want false")
|
||||
}
|
||||
}
|
||||
518
src/regexp/find_test.go
Normal file
518
src/regexp/find_test.go
Normal file
@@ -0,0 +1,518 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package regexp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// For each pattern/text pair, what is the expected output of each function?
|
||||
// We can derive the textual results from the indexed results, the non-submatch
|
||||
// results from the submatched results, the single results from the 'all' results,
|
||||
// and the byte results from the string results. Therefore the table includes
|
||||
// only the FindAllStringSubmatchIndex result.
|
||||
type FindTest struct {
|
||||
pat string
|
||||
text string
|
||||
matches [][]int
|
||||
}
|
||||
|
||||
func (t FindTest) String() string {
|
||||
return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text)
|
||||
}
|
||||
|
||||
var findTests = []FindTest{
|
||||
{``, ``, build(1, 0, 0)},
|
||||
{`^abcdefg`, "abcdefg", build(1, 0, 7)},
|
||||
{`a+`, "baaab", build(1, 1, 4)},
|
||||
{"abcd..", "abcdef", build(1, 0, 6)},
|
||||
{`a`, "a", build(1, 0, 1)},
|
||||
{`x`, "y", nil},
|
||||
{`b`, "abc", build(1, 1, 2)},
|
||||
{`.`, "a", build(1, 0, 1)},
|
||||
{`.*`, "abcdef", build(1, 0, 6)},
|
||||
{`^`, "abcde", build(1, 0, 0)},
|
||||
{`$`, "abcde", build(1, 5, 5)},
|
||||
{`^abcd$`, "abcd", build(1, 0, 4)},
|
||||
{`^bcd'`, "abcdef", nil},
|
||||
{`^abcd$`, "abcde", nil},
|
||||
{`a+`, "baaab", build(1, 1, 4)},
|
||||
{`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)},
|
||||
{`[a-z]+`, "abcd", build(1, 0, 4)},
|
||||
{`[^a-z]+`, "ab1234cd", build(1, 2, 6)},
|
||||
{`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)},
|
||||
{`[^\n]+`, "abcd\n", build(1, 0, 4)},
|
||||
{`[日本語]+`, "日本語日本語", build(1, 0, 18)},
|
||||
{`日本語+`, "日本語", build(1, 0, 9)},
|
||||
{`日本語+`, "日本語語語語", build(1, 0, 18)},
|
||||
{`()`, "", build(1, 0, 0, 0, 0)},
|
||||
{`(a)`, "a", build(1, 0, 1, 0, 1)},
|
||||
{`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)},
|
||||
{`(.*)`, "", build(1, 0, 0, 0, 0)},
|
||||
{`(.*)`, "abcd", build(1, 0, 4, 0, 4)},
|
||||
{`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)},
|
||||
{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
|
||||
{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
|
||||
{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
|
||||
{`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)},
|
||||
{`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)},
|
||||
|
||||
{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
|
||||
{`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
|
||||
{`[.]`, ".", build(1, 0, 1)},
|
||||
{`/$`, "/abc/", build(1, 4, 5)},
|
||||
{`/$`, "/abc", nil},
|
||||
|
||||
// multiple matches
|
||||
{`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)},
|
||||
{`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)},
|
||||
{`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)},
|
||||
{`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)},
|
||||
{`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)},
|
||||
|
||||
// fixed bugs
|
||||
{`ab$`, "cab", build(1, 1, 3)},
|
||||
{`axxb$`, "axxcb", nil},
|
||||
{`data`, "daXY data", build(1, 5, 9)},
|
||||
{`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
|
||||
{`zx+`, "zzx", build(1, 1, 3)},
|
||||
{`ab$`, "abcab", build(1, 3, 5)},
|
||||
{`(aa)*$`, "a", build(1, 1, 1, -1, -1)},
|
||||
{`(?:.|(?:.a))`, "", nil},
|
||||
{`(?:A(?:A|a))`, "Aa", build(1, 0, 2)},
|
||||
{`(?:A|(?:A|a))`, "a", build(1, 0, 1)},
|
||||
{`(a){0}`, "", build(1, 0, 0, -1, -1)},
|
||||
{`(?-s)(?:(?:^).)`, "\n", nil},
|
||||
{`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)},
|
||||
{`(?:(?:^).)`, "\n", nil},
|
||||
{`\b`, "x", build(2, 0, 0, 1, 1)},
|
||||
{`\b`, "xx", build(2, 0, 0, 2, 2)},
|
||||
{`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)},
|
||||
{`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)},
|
||||
{`\B`, "x", nil},
|
||||
{`\B`, "xx", build(1, 1, 1)},
|
||||
{`\B`, "x y", nil},
|
||||
{`\B`, "xx yy", build(2, 1, 1, 4, 4)},
|
||||
{`(|a)*`, "aa", build(3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2)},
|
||||
|
||||
// RE2 tests
|
||||
{`[^\S\s]`, "abcd", nil},
|
||||
{`[^\S[:space:]]`, "abcd", nil},
|
||||
{`[^\D\d]`, "abcd", nil},
|
||||
{`[^\D[:digit:]]`, "abcd", nil},
|
||||
{`(?i)\W`, "x", nil},
|
||||
{`(?i)\W`, "k", nil},
|
||||
{`(?i)\W`, "s", nil},
|
||||
|
||||
// can backslash-escape any punctuation
|
||||
{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
|
||||
`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
|
||||
{`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
|
||||
`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
|
||||
{"\\`", "`", build(1, 0, 1)},
|
||||
{"[\\`]+", "`", build(1, 0, 1)},
|
||||
|
||||
{"\ufffd", "\xff", build(1, 0, 1)},
|
||||
{"\ufffd", "hello\xffworld", build(1, 5, 6)},
|
||||
{`.*`, "hello\xffworld", build(1, 0, 11)},
|
||||
{`\x{fffd}`, "\xc2\x00", build(1, 0, 1)},
|
||||
{"[\ufffd]", "\xff", build(1, 0, 1)},
|
||||
{`[\x{fffd}]`, "\xc2\x00", build(1, 0, 1)},
|
||||
|
||||
// long set of matches (longer than startSize)
|
||||
{
|
||||
".",
|
||||
"qwertyuiopasdfghjklzxcvbnm1234567890",
|
||||
build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
|
||||
10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20,
|
||||
20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30,
|
||||
30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
|
||||
},
|
||||
}
|
||||
|
||||
// build is a helper to construct a [][]int by extracting n sequences from x.
|
||||
// This represents n matches with len(x)/n submatches each.
|
||||
func build(n int, x ...int) [][]int {
|
||||
ret := make([][]int, n)
|
||||
runLength := len(x) / n
|
||||
j := 0
|
||||
for i := range ret {
|
||||
ret[i] = make([]int, runLength)
|
||||
copy(ret[i], x[j:])
|
||||
j += runLength
|
||||
if j > len(x) {
|
||||
panic("invalid build entry")
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// First the simple cases.
|
||||
|
||||
func TestFind(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
re := MustCompile(test.pat)
|
||||
if re.String() != test.pat {
|
||||
t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
|
||||
}
|
||||
result := re.Find([]byte(test.text))
|
||||
switch {
|
||||
case len(test.matches) == 0 && len(result) == 0:
|
||||
// ok
|
||||
case test.matches == nil && result != nil:
|
||||
t.Errorf("expected no match; got one: %s", test)
|
||||
case test.matches != nil && result == nil:
|
||||
t.Errorf("expected match; got none: %s", test)
|
||||
case test.matches != nil && result != nil:
|
||||
expect := test.text[test.matches[0][0]:test.matches[0][1]]
|
||||
if len(result) != cap(result) {
|
||||
t.Errorf("expected capacity %d got %d: %s", len(result), cap(result), test)
|
||||
}
|
||||
if expect != string(result) {
|
||||
t.Errorf("expected %q got %q: %s", expect, result, test)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindString(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
result := MustCompile(test.pat).FindString(test.text)
|
||||
switch {
|
||||
case len(test.matches) == 0 && len(result) == 0:
|
||||
// ok
|
||||
case test.matches == nil && result != "":
|
||||
t.Errorf("expected no match; got one: %s", test)
|
||||
case test.matches != nil && result == "":
|
||||
// Tricky because an empty result has two meanings: no match or empty match.
|
||||
if test.matches[0][0] != test.matches[0][1] {
|
||||
t.Errorf("expected match; got none: %s", test)
|
||||
}
|
||||
case test.matches != nil && result != "":
|
||||
expect := test.text[test.matches[0][0]:test.matches[0][1]]
|
||||
if expect != result {
|
||||
t.Errorf("expected %q got %q: %s", expect, result, test)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func testFindIndex(test *FindTest, result []int, t *testing.T) {
|
||||
switch {
|
||||
case len(test.matches) == 0 && len(result) == 0:
|
||||
// ok
|
||||
case test.matches == nil && result != nil:
|
||||
t.Errorf("expected no match; got one: %s", test)
|
||||
case test.matches != nil && result == nil:
|
||||
t.Errorf("expected match; got none: %s", test)
|
||||
case test.matches != nil && result != nil:
|
||||
expect := test.matches[0]
|
||||
if expect[0] != result[0] || expect[1] != result[1] {
|
||||
t.Errorf("expected %v got %v: %s", expect, result, test)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindIndex(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindStringIndex(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindReaderIndex(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t)
|
||||
}
|
||||
}
|
||||
|
||||
// Now come the simple All cases.
|
||||
|
||||
func TestFindAll(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
result := MustCompile(test.pat).FindAll([]byte(test.text), -1)
|
||||
switch {
|
||||
case test.matches == nil && result == nil:
|
||||
// ok
|
||||
case test.matches == nil && result != nil:
|
||||
t.Errorf("expected no match; got one: %s", test)
|
||||
case test.matches != nil && result == nil:
|
||||
t.Fatalf("expected match; got none: %s", test)
|
||||
case test.matches != nil && result != nil:
|
||||
if len(test.matches) != len(result) {
|
||||
t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
|
||||
continue
|
||||
}
|
||||
for k, e := range test.matches {
|
||||
got := result[k]
|
||||
if len(got) != cap(got) {
|
||||
t.Errorf("match %d: expected capacity %d got %d: %s", k, len(got), cap(got), test)
|
||||
}
|
||||
expect := test.text[e[0]:e[1]]
|
||||
if expect != string(got) {
|
||||
t.Errorf("match %d: expected %q got %q: %s", k, expect, got, test)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindAllString(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
result := MustCompile(test.pat).FindAllString(test.text, -1)
|
||||
switch {
|
||||
case test.matches == nil && result == nil:
|
||||
// ok
|
||||
case test.matches == nil && result != nil:
|
||||
t.Errorf("expected no match; got one: %s", test)
|
||||
case test.matches != nil && result == nil:
|
||||
t.Errorf("expected match; got none: %s", test)
|
||||
case test.matches != nil && result != nil:
|
||||
if len(test.matches) != len(result) {
|
||||
t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
|
||||
continue
|
||||
}
|
||||
for k, e := range test.matches {
|
||||
expect := test.text[e[0]:e[1]]
|
||||
if expect != result[k] {
|
||||
t.Errorf("expected %q got %q: %s", expect, result, test)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
|
||||
switch {
|
||||
case test.matches == nil && result == nil:
|
||||
// ok
|
||||
case test.matches == nil && result != nil:
|
||||
t.Errorf("expected no match; got one: %s", test)
|
||||
case test.matches != nil && result == nil:
|
||||
t.Errorf("expected match; got none: %s", test)
|
||||
case test.matches != nil && result != nil:
|
||||
if len(test.matches) != len(result) {
|
||||
t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
|
||||
return
|
||||
}
|
||||
for k, e := range test.matches {
|
||||
if e[0] != result[k][0] || e[1] != result[k][1] {
|
||||
t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindAllIndex(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindAllStringIndex(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t)
|
||||
}
|
||||
}
|
||||
|
||||
// Now come the Submatch cases.
|
||||
|
||||
func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
|
||||
if len(submatches) != len(result)*2 {
|
||||
t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
|
||||
return
|
||||
}
|
||||
for k := 0; k < len(submatches); k += 2 {
|
||||
if submatches[k] == -1 {
|
||||
if result[k/2] != nil {
|
||||
t.Errorf("match %d: expected nil got %q: %s", n, result, test)
|
||||
}
|
||||
continue
|
||||
}
|
||||
got := result[k/2]
|
||||
if len(got) != cap(got) {
|
||||
t.Errorf("match %d: expected capacity %d got %d: %s", n, len(got), cap(got), test)
|
||||
return
|
||||
}
|
||||
expect := test.text[submatches[k]:submatches[k+1]]
|
||||
if expect != string(got) {
|
||||
t.Errorf("match %d: expected %q got %q: %s", n, expect, got, test)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindSubmatch(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
result := MustCompile(test.pat).FindSubmatch([]byte(test.text))
|
||||
switch {
|
||||
case test.matches == nil && result == nil:
|
||||
// ok
|
||||
case test.matches == nil && result != nil:
|
||||
t.Errorf("expected no match; got one: %s", test)
|
||||
case test.matches != nil && result == nil:
|
||||
t.Errorf("expected match; got none: %s", test)
|
||||
case test.matches != nil && result != nil:
|
||||
testSubmatchBytes(&test, 0, test.matches[0], result, t)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
|
||||
if len(submatches) != len(result)*2 {
|
||||
t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
|
||||
return
|
||||
}
|
||||
for k := 0; k < len(submatches); k += 2 {
|
||||
if submatches[k] == -1 {
|
||||
if result[k/2] != "" {
|
||||
t.Errorf("match %d: expected nil got %q: %s", n, result, test)
|
||||
}
|
||||
continue
|
||||
}
|
||||
expect := test.text[submatches[k]:submatches[k+1]]
|
||||
if expect != result[k/2] {
|
||||
t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindStringSubmatch(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
result := MustCompile(test.pat).FindStringSubmatch(test.text)
|
||||
switch {
|
||||
case test.matches == nil && result == nil:
|
||||
// ok
|
||||
case test.matches == nil && result != nil:
|
||||
t.Errorf("expected no match; got one: %s", test)
|
||||
case test.matches != nil && result == nil:
|
||||
t.Errorf("expected match; got none: %s", test)
|
||||
case test.matches != nil && result != nil:
|
||||
testSubmatchString(&test, 0, test.matches[0], result, t)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
|
||||
if len(expect) != len(result) {
|
||||
t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
|
||||
return
|
||||
}
|
||||
for k, e := range expect {
|
||||
if e != result[k] {
|
||||
t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
|
||||
switch {
|
||||
case test.matches == nil && result == nil:
|
||||
// ok
|
||||
case test.matches == nil && result != nil:
|
||||
t.Errorf("expected no match; got one: %s", test)
|
||||
case test.matches != nil && result == nil:
|
||||
t.Errorf("expected match; got none: %s", test)
|
||||
case test.matches != nil && result != nil:
|
||||
testSubmatchIndices(test, 0, test.matches[0], result, t)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindSubmatchIndex(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindStringSubmatchIndex(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindReaderSubmatchIndex(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t)
|
||||
}
|
||||
}
|
||||
|
||||
// Now come the monster AllSubmatch cases.
|
||||
|
||||
func TestFindAllSubmatch(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1)
|
||||
switch {
|
||||
case test.matches == nil && result == nil:
|
||||
// ok
|
||||
case test.matches == nil && result != nil:
|
||||
t.Errorf("expected no match; got one: %s", test)
|
||||
case test.matches != nil && result == nil:
|
||||
t.Errorf("expected match; got none: %s", test)
|
||||
case len(test.matches) != len(result):
|
||||
t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
|
||||
case test.matches != nil && result != nil:
|
||||
for k, match := range test.matches {
|
||||
testSubmatchBytes(&test, k, match, result[k], t)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindAllStringSubmatch(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1)
|
||||
switch {
|
||||
case test.matches == nil && result == nil:
|
||||
// ok
|
||||
case test.matches == nil && result != nil:
|
||||
t.Errorf("expected no match; got one: %s", test)
|
||||
case test.matches != nil && result == nil:
|
||||
t.Errorf("expected match; got none: %s", test)
|
||||
case len(test.matches) != len(result):
|
||||
t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
|
||||
case test.matches != nil && result != nil:
|
||||
for k, match := range test.matches {
|
||||
testSubmatchString(&test, k, match, result[k], t)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
|
||||
switch {
|
||||
case test.matches == nil && result == nil:
|
||||
// ok
|
||||
case test.matches == nil && result != nil:
|
||||
t.Errorf("expected no match; got one: %s", test)
|
||||
case test.matches != nil && result == nil:
|
||||
t.Errorf("expected match; got none: %s", test)
|
||||
case len(test.matches) != len(result):
|
||||
t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
|
||||
case test.matches != nil && result != nil:
|
||||
for k, match := range test.matches {
|
||||
testSubmatchIndices(test, k, match, result[k], t)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindAllSubmatchIndex(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindAllStringSubmatchIndex(t *testing.T) {
|
||||
for _, test := range findTests {
|
||||
testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t)
|
||||
}
|
||||
}
|
||||
500
src/regexp/onepass.go
Normal file
500
src/regexp/onepass.go
Normal file
@@ -0,0 +1,500 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package regexp
|
||||
|
||||
import (
|
||||
"regexp/syntax"
|
||||
"slices"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// "One-pass" regexp execution.
|
||||
// Some regexps can be analyzed to determine that they never need
|
||||
// backtracking: they are guaranteed to run in one pass over the string
|
||||
// without bothering to save all the usual NFA state.
|
||||
// Detect those and execute them more quickly.
|
||||
|
||||
// A onePassProg is a compiled one-pass regular expression program.
|
||||
// It is the same as syntax.Prog except for the use of onePassInst.
|
||||
type onePassProg struct {
|
||||
Inst []onePassInst
|
||||
Start int // index of start instruction
|
||||
NumCap int // number of InstCapture insts in re
|
||||
}
|
||||
|
||||
// A onePassInst is a single instruction in a one-pass regular expression program.
|
||||
// It is the same as syntax.Inst except for the new 'Next' field.
|
||||
type onePassInst struct {
|
||||
syntax.Inst
|
||||
Next []uint32
|
||||
}
|
||||
|
||||
// onePassPrefix returns a literal string that all matches for the
|
||||
// regexp must start with. Complete is true if the prefix
|
||||
// is the entire match. Pc is the index of the last rune instruction
|
||||
// in the string. The onePassPrefix skips over the mandatory
|
||||
// EmptyBeginText.
|
||||
func onePassPrefix(p *syntax.Prog) (prefix string, complete bool, pc uint32) {
|
||||
i := &p.Inst[p.Start]
|
||||
if i.Op != syntax.InstEmptyWidth || (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText == 0 {
|
||||
return "", i.Op == syntax.InstMatch, uint32(p.Start)
|
||||
}
|
||||
pc = i.Out
|
||||
i = &p.Inst[pc]
|
||||
for i.Op == syntax.InstNop {
|
||||
pc = i.Out
|
||||
i = &p.Inst[pc]
|
||||
}
|
||||
// Avoid allocation of buffer if prefix is empty.
|
||||
if iop(i) != syntax.InstRune || len(i.Rune) != 1 {
|
||||
return "", i.Op == syntax.InstMatch, uint32(p.Start)
|
||||
}
|
||||
|
||||
// Have prefix; gather characters.
|
||||
var buf strings.Builder
|
||||
for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 && i.Rune[0] != utf8.RuneError {
|
||||
buf.WriteRune(i.Rune[0])
|
||||
pc, i = i.Out, &p.Inst[i.Out]
|
||||
}
|
||||
if i.Op == syntax.InstEmptyWidth &&
|
||||
syntax.EmptyOp(i.Arg)&syntax.EmptyEndText != 0 &&
|
||||
p.Inst[i.Out].Op == syntax.InstMatch {
|
||||
complete = true
|
||||
}
|
||||
return buf.String(), complete, pc
|
||||
}
|
||||
|
||||
// onePassNext selects the next actionable state of the prog, based on the input character.
|
||||
// It should only be called when i.Op == InstAlt or InstAltMatch, and from the one-pass machine.
|
||||
// One of the alternates may ultimately lead without input to end of line. If the instruction
|
||||
// is InstAltMatch the path to the InstMatch is in i.Out, the normal node in i.Next.
|
||||
func onePassNext(i *onePassInst, r rune) uint32 {
|
||||
next := i.MatchRunePos(r)
|
||||
if next >= 0 {
|
||||
return i.Next[next]
|
||||
}
|
||||
if i.Op == syntax.InstAltMatch {
|
||||
return i.Out
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func iop(i *syntax.Inst) syntax.InstOp {
|
||||
op := i.Op
|
||||
switch op {
|
||||
case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
|
||||
op = syntax.InstRune
|
||||
}
|
||||
return op
|
||||
}
|
||||
|
||||
// Sparse Array implementation is used as a queueOnePass.
|
||||
type queueOnePass struct {
|
||||
sparse []uint32
|
||||
dense []uint32
|
||||
size, nextIndex uint32
|
||||
}
|
||||
|
||||
func (q *queueOnePass) empty() bool {
|
||||
return q.nextIndex >= q.size
|
||||
}
|
||||
|
||||
func (q *queueOnePass) next() (n uint32) {
|
||||
n = q.dense[q.nextIndex]
|
||||
q.nextIndex++
|
||||
return
|
||||
}
|
||||
|
||||
func (q *queueOnePass) clear() {
|
||||
q.size = 0
|
||||
q.nextIndex = 0
|
||||
}
|
||||
|
||||
func (q *queueOnePass) contains(u uint32) bool {
|
||||
if u >= uint32(len(q.sparse)) {
|
||||
return false
|
||||
}
|
||||
return q.sparse[u] < q.size && q.dense[q.sparse[u]] == u
|
||||
}
|
||||
|
||||
func (q *queueOnePass) insert(u uint32) {
|
||||
if !q.contains(u) {
|
||||
q.insertNew(u)
|
||||
}
|
||||
}
|
||||
|
||||
func (q *queueOnePass) insertNew(u uint32) {
|
||||
if u >= uint32(len(q.sparse)) {
|
||||
return
|
||||
}
|
||||
q.sparse[u] = q.size
|
||||
q.dense[q.size] = u
|
||||
q.size++
|
||||
}
|
||||
|
||||
func newQueue(size int) (q *queueOnePass) {
|
||||
return &queueOnePass{
|
||||
sparse: make([]uint32, size),
|
||||
dense: make([]uint32, size),
|
||||
}
|
||||
}
|
||||
|
||||
// mergeRuneSets merges two non-intersecting runesets, and returns the merged result,
|
||||
// and a NextIp array. The idea is that if a rune matches the OnePassRunes at index
|
||||
// i, NextIp[i/2] is the target. If the input sets intersect, an empty runeset and a
|
||||
// NextIp array with the single element mergeFailed is returned.
|
||||
// The code assumes that both inputs contain ordered and non-intersecting rune pairs.
|
||||
const mergeFailed = uint32(0xffffffff)
|
||||
|
||||
var (
|
||||
noRune = []rune{}
|
||||
noNext = []uint32{mergeFailed}
|
||||
)
|
||||
|
||||
func mergeRuneSets(leftRunes, rightRunes *[]rune, leftPC, rightPC uint32) ([]rune, []uint32) {
|
||||
leftLen := len(*leftRunes)
|
||||
rightLen := len(*rightRunes)
|
||||
if leftLen&0x1 != 0 || rightLen&0x1 != 0 {
|
||||
panic("mergeRuneSets odd length []rune")
|
||||
}
|
||||
var (
|
||||
lx, rx int
|
||||
)
|
||||
merged := make([]rune, 0)
|
||||
next := make([]uint32, 0)
|
||||
ok := true
|
||||
defer func() {
|
||||
if !ok {
|
||||
merged = nil
|
||||
next = nil
|
||||
}
|
||||
}()
|
||||
|
||||
ix := -1
|
||||
extend := func(newLow *int, newArray *[]rune, pc uint32) bool {
|
||||
if ix > 0 && (*newArray)[*newLow] <= merged[ix] {
|
||||
return false
|
||||
}
|
||||
merged = append(merged, (*newArray)[*newLow], (*newArray)[*newLow+1])
|
||||
*newLow += 2
|
||||
ix += 2
|
||||
next = append(next, pc)
|
||||
return true
|
||||
}
|
||||
|
||||
for lx < leftLen || rx < rightLen {
|
||||
switch {
|
||||
case rx >= rightLen:
|
||||
ok = extend(&lx, leftRunes, leftPC)
|
||||
case lx >= leftLen:
|
||||
ok = extend(&rx, rightRunes, rightPC)
|
||||
case (*rightRunes)[rx] < (*leftRunes)[lx]:
|
||||
ok = extend(&rx, rightRunes, rightPC)
|
||||
default:
|
||||
ok = extend(&lx, leftRunes, leftPC)
|
||||
}
|
||||
if !ok {
|
||||
return noRune, noNext
|
||||
}
|
||||
}
|
||||
return merged, next
|
||||
}
|
||||
|
||||
// cleanupOnePass drops working memory, and restores certain shortcut instructions.
|
||||
func cleanupOnePass(prog *onePassProg, original *syntax.Prog) {
|
||||
for ix, instOriginal := range original.Inst {
|
||||
switch instOriginal.Op {
|
||||
case syntax.InstAlt, syntax.InstAltMatch, syntax.InstRune:
|
||||
case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop, syntax.InstMatch, syntax.InstFail:
|
||||
prog.Inst[ix].Next = nil
|
||||
case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
|
||||
prog.Inst[ix].Next = nil
|
||||
prog.Inst[ix] = onePassInst{Inst: instOriginal}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// onePassCopy creates a copy of the original Prog, as we'll be modifying it.
|
||||
func onePassCopy(prog *syntax.Prog) *onePassProg {
|
||||
p := &onePassProg{
|
||||
Start: prog.Start,
|
||||
NumCap: prog.NumCap,
|
||||
Inst: make([]onePassInst, len(prog.Inst)),
|
||||
}
|
||||
for i, inst := range prog.Inst {
|
||||
p.Inst[i] = onePassInst{Inst: inst}
|
||||
}
|
||||
|
||||
// rewrites one or more common Prog constructs that enable some otherwise
|
||||
// non-onepass Progs to be onepass. A:BD (for example) means an InstAlt at
|
||||
// ip A, that points to ips B & C.
|
||||
// A:BC + B:DA => A:BC + B:CD
|
||||
// A:BC + B:DC => A:DC + B:DC
|
||||
for pc := range p.Inst {
|
||||
switch p.Inst[pc].Op {
|
||||
default:
|
||||
continue
|
||||
case syntax.InstAlt, syntax.InstAltMatch:
|
||||
// A:Bx + B:Ay
|
||||
p_A_Other := &p.Inst[pc].Out
|
||||
p_A_Alt := &p.Inst[pc].Arg
|
||||
// make sure a target is another Alt
|
||||
instAlt := p.Inst[*p_A_Alt]
|
||||
if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) {
|
||||
p_A_Alt, p_A_Other = p_A_Other, p_A_Alt
|
||||
instAlt = p.Inst[*p_A_Alt]
|
||||
if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
instOther := p.Inst[*p_A_Other]
|
||||
// Analyzing both legs pointing to Alts is for another day
|
||||
if instOther.Op == syntax.InstAlt || instOther.Op == syntax.InstAltMatch {
|
||||
// too complicated
|
||||
continue
|
||||
}
|
||||
// simple empty transition loop
|
||||
// A:BC + B:DA => A:BC + B:DC
|
||||
p_B_Alt := &p.Inst[*p_A_Alt].Out
|
||||
p_B_Other := &p.Inst[*p_A_Alt].Arg
|
||||
patch := false
|
||||
if instAlt.Out == uint32(pc) {
|
||||
patch = true
|
||||
} else if instAlt.Arg == uint32(pc) {
|
||||
patch = true
|
||||
p_B_Alt, p_B_Other = p_B_Other, p_B_Alt
|
||||
}
|
||||
if patch {
|
||||
*p_B_Alt = *p_A_Other
|
||||
}
|
||||
|
||||
// empty transition to common target
|
||||
// A:BC + B:DC => A:DC + B:DC
|
||||
if *p_A_Other == *p_B_Alt {
|
||||
*p_A_Alt = *p_B_Other
|
||||
}
|
||||
}
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
|
||||
var anyRune = []rune{0, unicode.MaxRune}
|
||||
|
||||
// makeOnePass creates a onepass Prog, if possible. It is possible if at any alt,
|
||||
// the match engine can always tell which branch to take. The routine may modify
|
||||
// p if it is turned into a onepass Prog. If it isn't possible for this to be a
|
||||
// onepass Prog, the Prog nil is returned. makeOnePass is recursive
|
||||
// to the size of the Prog.
|
||||
func makeOnePass(p *onePassProg) *onePassProg {
|
||||
// If the machine is very long, it's not worth the time to check if we can use one pass.
|
||||
if len(p.Inst) >= 1000 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
instQueue = newQueue(len(p.Inst))
|
||||
visitQueue = newQueue(len(p.Inst))
|
||||
check func(uint32, []bool) bool
|
||||
onePassRunes = make([][]rune, len(p.Inst))
|
||||
)
|
||||
|
||||
// check that paths from Alt instructions are unambiguous, and rebuild the new
|
||||
// program as a onepass program
|
||||
check = func(pc uint32, m []bool) (ok bool) {
|
||||
ok = true
|
||||
inst := &p.Inst[pc]
|
||||
if visitQueue.contains(pc) {
|
||||
return
|
||||
}
|
||||
visitQueue.insert(pc)
|
||||
switch inst.Op {
|
||||
case syntax.InstAlt, syntax.InstAltMatch:
|
||||
ok = check(inst.Out, m) && check(inst.Arg, m)
|
||||
// check no-input paths to InstMatch
|
||||
matchOut := m[inst.Out]
|
||||
matchArg := m[inst.Arg]
|
||||
if matchOut && matchArg {
|
||||
ok = false
|
||||
break
|
||||
}
|
||||
// Match on empty goes in inst.Out
|
||||
if matchArg {
|
||||
inst.Out, inst.Arg = inst.Arg, inst.Out
|
||||
matchOut, matchArg = matchArg, matchOut
|
||||
}
|
||||
if matchOut {
|
||||
m[pc] = true
|
||||
inst.Op = syntax.InstAltMatch
|
||||
}
|
||||
|
||||
// build a dispatch operator from the two legs of the alt.
|
||||
onePassRunes[pc], inst.Next = mergeRuneSets(
|
||||
&onePassRunes[inst.Out], &onePassRunes[inst.Arg], inst.Out, inst.Arg)
|
||||
if len(inst.Next) > 0 && inst.Next[0] == mergeFailed {
|
||||
ok = false
|
||||
break
|
||||
}
|
||||
case syntax.InstCapture, syntax.InstNop:
|
||||
ok = check(inst.Out, m)
|
||||
m[pc] = m[inst.Out]
|
||||
// pass matching runes back through these no-ops.
|
||||
onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
|
||||
inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
|
||||
for i := range inst.Next {
|
||||
inst.Next[i] = inst.Out
|
||||
}
|
||||
case syntax.InstEmptyWidth:
|
||||
ok = check(inst.Out, m)
|
||||
m[pc] = m[inst.Out]
|
||||
onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
|
||||
inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
|
||||
for i := range inst.Next {
|
||||
inst.Next[i] = inst.Out
|
||||
}
|
||||
case syntax.InstMatch, syntax.InstFail:
|
||||
m[pc] = inst.Op == syntax.InstMatch
|
||||
case syntax.InstRune:
|
||||
m[pc] = false
|
||||
if len(inst.Next) > 0 {
|
||||
break
|
||||
}
|
||||
instQueue.insert(inst.Out)
|
||||
if len(inst.Rune) == 0 {
|
||||
onePassRunes[pc] = []rune{}
|
||||
inst.Next = []uint32{inst.Out}
|
||||
break
|
||||
}
|
||||
runes := make([]rune, 0)
|
||||
if len(inst.Rune) == 1 && syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
|
||||
r0 := inst.Rune[0]
|
||||
runes = append(runes, r0, r0)
|
||||
for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
|
||||
runes = append(runes, r1, r1)
|
||||
}
|
||||
slices.Sort(runes)
|
||||
} else {
|
||||
runes = append(runes, inst.Rune...)
|
||||
}
|
||||
onePassRunes[pc] = runes
|
||||
inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
|
||||
for i := range inst.Next {
|
||||
inst.Next[i] = inst.Out
|
||||
}
|
||||
inst.Op = syntax.InstRune
|
||||
case syntax.InstRune1:
|
||||
m[pc] = false
|
||||
if len(inst.Next) > 0 {
|
||||
break
|
||||
}
|
||||
instQueue.insert(inst.Out)
|
||||
runes := []rune{}
|
||||
// expand case-folded runes
|
||||
if syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
|
||||
r0 := inst.Rune[0]
|
||||
runes = append(runes, r0, r0)
|
||||
for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
|
||||
runes = append(runes, r1, r1)
|
||||
}
|
||||
slices.Sort(runes)
|
||||
} else {
|
||||
runes = append(runes, inst.Rune[0], inst.Rune[0])
|
||||
}
|
||||
onePassRunes[pc] = runes
|
||||
inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
|
||||
for i := range inst.Next {
|
||||
inst.Next[i] = inst.Out
|
||||
}
|
||||
inst.Op = syntax.InstRune
|
||||
case syntax.InstRuneAny:
|
||||
m[pc] = false
|
||||
if len(inst.Next) > 0 {
|
||||
break
|
||||
}
|
||||
instQueue.insert(inst.Out)
|
||||
onePassRunes[pc] = append([]rune{}, anyRune...)
|
||||
inst.Next = []uint32{inst.Out}
|
||||
case syntax.InstRuneAnyNotNL:
|
||||
m[pc] = false
|
||||
if len(inst.Next) > 0 {
|
||||
break
|
||||
}
|
||||
instQueue.insert(inst.Out)
|
||||
onePassRunes[pc] = append([]rune{}, anyRuneNotNL...)
|
||||
inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
|
||||
for i := range inst.Next {
|
||||
inst.Next[i] = inst.Out
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
instQueue.clear()
|
||||
instQueue.insert(uint32(p.Start))
|
||||
m := make([]bool, len(p.Inst))
|
||||
for !instQueue.empty() {
|
||||
visitQueue.clear()
|
||||
pc := instQueue.next()
|
||||
if !check(pc, m) {
|
||||
p = nil
|
||||
break
|
||||
}
|
||||
}
|
||||
if p != nil {
|
||||
for i := range p.Inst {
|
||||
p.Inst[i].Rune = onePassRunes[i]
|
||||
}
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// compileOnePass returns a new *syntax.Prog suitable for onePass execution if the original Prog
|
||||
// can be recharacterized as a one-pass regexp program, or syntax.nil if the
|
||||
// Prog cannot be converted. For a one pass prog, the fundamental condition that must
|
||||
// be true is: at any InstAlt, there must be no ambiguity about what branch to take.
|
||||
func compileOnePass(prog *syntax.Prog) (p *onePassProg) {
|
||||
if prog.Start == 0 {
|
||||
return nil
|
||||
}
|
||||
// onepass regexp is anchored
|
||||
if prog.Inst[prog.Start].Op != syntax.InstEmptyWidth ||
|
||||
syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText {
|
||||
return nil
|
||||
}
|
||||
// every instruction leading to InstMatch must be EmptyEndText
|
||||
for _, inst := range prog.Inst {
|
||||
opOut := prog.Inst[inst.Out].Op
|
||||
switch inst.Op {
|
||||
default:
|
||||
if opOut == syntax.InstMatch {
|
||||
return nil
|
||||
}
|
||||
case syntax.InstAlt, syntax.InstAltMatch:
|
||||
if opOut == syntax.InstMatch || prog.Inst[inst.Arg].Op == syntax.InstMatch {
|
||||
return nil
|
||||
}
|
||||
case syntax.InstEmptyWidth:
|
||||
if opOut == syntax.InstMatch {
|
||||
if syntax.EmptyOp(inst.Arg)&syntax.EmptyEndText == syntax.EmptyEndText {
|
||||
continue
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
// Creates a slightly optimized copy of the original Prog
|
||||
// that cleans up some Prog idioms that block valid onepass programs
|
||||
p = onePassCopy(prog)
|
||||
|
||||
// checkAmbiguity on InstAlts, build onepass Prog if possible
|
||||
p = makeOnePass(p)
|
||||
|
||||
if p != nil {
|
||||
cleanupOnePass(p, prog)
|
||||
}
|
||||
return p
|
||||
}
|
||||
225
src/regexp/onepass_test.go
Normal file
225
src/regexp/onepass_test.go
Normal file
@@ -0,0 +1,225 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package regexp
|
||||
|
||||
import (
|
||||
"regexp/syntax"
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var runeMergeTests = []struct {
|
||||
left, right, merged []rune
|
||||
next []uint32
|
||||
leftPC, rightPC uint32
|
||||
}{
|
||||
{
|
||||
// empty rhs
|
||||
[]rune{69, 69},
|
||||
[]rune{},
|
||||
[]rune{69, 69},
|
||||
[]uint32{1},
|
||||
1, 2,
|
||||
},
|
||||
{
|
||||
// identical runes, identical targets
|
||||
[]rune{69, 69},
|
||||
[]rune{69, 69},
|
||||
[]rune{},
|
||||
[]uint32{mergeFailed},
|
||||
1, 1,
|
||||
},
|
||||
{
|
||||
// identical runes, different targets
|
||||
[]rune{69, 69},
|
||||
[]rune{69, 69},
|
||||
[]rune{},
|
||||
[]uint32{mergeFailed},
|
||||
1, 2,
|
||||
},
|
||||
{
|
||||
// append right-first
|
||||
[]rune{69, 69},
|
||||
[]rune{71, 71},
|
||||
[]rune{69, 69, 71, 71},
|
||||
[]uint32{1, 2},
|
||||
1, 2,
|
||||
},
|
||||
{
|
||||
// append, left-first
|
||||
[]rune{71, 71},
|
||||
[]rune{69, 69},
|
||||
[]rune{69, 69, 71, 71},
|
||||
[]uint32{2, 1},
|
||||
1, 2,
|
||||
},
|
||||
{
|
||||
// successful interleave
|
||||
[]rune{60, 60, 71, 71, 101, 101},
|
||||
[]rune{69, 69, 88, 88},
|
||||
[]rune{60, 60, 69, 69, 71, 71, 88, 88, 101, 101},
|
||||
[]uint32{1, 2, 1, 2, 1},
|
||||
1, 2,
|
||||
},
|
||||
{
|
||||
// left surrounds right
|
||||
[]rune{69, 74},
|
||||
[]rune{71, 71},
|
||||
[]rune{},
|
||||
[]uint32{mergeFailed},
|
||||
1, 2,
|
||||
},
|
||||
{
|
||||
// right surrounds left
|
||||
[]rune{69, 74},
|
||||
[]rune{68, 75},
|
||||
[]rune{},
|
||||
[]uint32{mergeFailed},
|
||||
1, 2,
|
||||
},
|
||||
{
|
||||
// overlap at interval begin
|
||||
[]rune{69, 74},
|
||||
[]rune{74, 75},
|
||||
[]rune{},
|
||||
[]uint32{mergeFailed},
|
||||
1, 2,
|
||||
},
|
||||
{
|
||||
// overlap ar interval end
|
||||
[]rune{69, 74},
|
||||
[]rune{65, 69},
|
||||
[]rune{},
|
||||
[]uint32{mergeFailed},
|
||||
1, 2,
|
||||
},
|
||||
{
|
||||
// overlap from above
|
||||
[]rune{69, 74},
|
||||
[]rune{71, 74},
|
||||
[]rune{},
|
||||
[]uint32{mergeFailed},
|
||||
1, 2,
|
||||
},
|
||||
{
|
||||
// overlap from below
|
||||
[]rune{69, 74},
|
||||
[]rune{65, 71},
|
||||
[]rune{},
|
||||
[]uint32{mergeFailed},
|
||||
1, 2,
|
||||
},
|
||||
{
|
||||
// out of order []rune
|
||||
[]rune{69, 74, 60, 65},
|
||||
[]rune{66, 67},
|
||||
[]rune{},
|
||||
[]uint32{mergeFailed},
|
||||
1, 2,
|
||||
},
|
||||
}
|
||||
|
||||
func TestMergeRuneSet(t *testing.T) {
|
||||
for ix, test := range runeMergeTests {
|
||||
merged, next := mergeRuneSets(&test.left, &test.right, test.leftPC, test.rightPC)
|
||||
if !slices.Equal(merged, test.merged) {
|
||||
t.Errorf("mergeRuneSet :%d (%v, %v) merged\n have\n%v\nwant\n%v", ix, test.left, test.right, merged, test.merged)
|
||||
}
|
||||
if !slices.Equal(next, test.next) {
|
||||
t.Errorf("mergeRuneSet :%d(%v, %v) next\n have\n%v\nwant\n%v", ix, test.left, test.right, next, test.next)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var onePassTests = []struct {
|
||||
re string
|
||||
isOnePass bool
|
||||
}{
|
||||
{`^(?:a|(?:a*))$`, false},
|
||||
{`^(?:(a)|(?:a*))$`, false},
|
||||
{`^(?:(?:(?:.(?:$))?))$`, true},
|
||||
{`^abcd$`, true},
|
||||
{`^(?:(?:a{0,})*?)$`, false},
|
||||
{`^(?:(?:a+)*)$`, true},
|
||||
{`^(?:(?:a|(?:aa)))$`, true},
|
||||
{`^(?:[^\s\S])$`, true},
|
||||
{`^(?:(?:a{3,4}){0,})$`, false},
|
||||
{`^(?:(?:(?:a*)+))$`, true},
|
||||
{`^[a-c]+$`, true},
|
||||
{`^[a-c]*$`, true},
|
||||
{`^(?:a*)$`, true},
|
||||
{`^(?:(?:aa)|a)$`, true},
|
||||
{`^[a-c]*`, false},
|
||||
{`^...$`, true},
|
||||
{`^(?:a|(?:aa))$`, true},
|
||||
{`^a((b))c$`, true},
|
||||
{`^a.[l-nA-Cg-j]?e$`, true},
|
||||
{`^a((b))$`, true},
|
||||
{`^a(?:(b)|(c))c$`, true},
|
||||
{`^a(?:(b*)|(c))c$`, false},
|
||||
{`^a(?:b|c)$`, true},
|
||||
{`^a(?:b?|c)$`, true},
|
||||
{`^a(?:b?|c?)$`, false},
|
||||
{`^a(?:b?|c+)$`, true},
|
||||
{`^a(?:b+|(bc))d$`, false},
|
||||
{`^a(?:bc)+$`, true},
|
||||
{`^a(?:[bcd])+$`, true},
|
||||
{`^a((?:[bcd])+)$`, true},
|
||||
{`^a(:?b|c)*d$`, true},
|
||||
{`^.bc(d|e)*$`, true},
|
||||
{`^(?:(?:aa)|.)$`, false},
|
||||
{`^(?:(?:a{1,2}){1,2})$`, false},
|
||||
{`^l` + strings.Repeat("o", 2<<8) + `ng$`, true},
|
||||
}
|
||||
|
||||
func TestCompileOnePass(t *testing.T) {
|
||||
var (
|
||||
p *syntax.Prog
|
||||
re *syntax.Regexp
|
||||
err error
|
||||
)
|
||||
for _, test := range onePassTests {
|
||||
if re, err = syntax.Parse(test.re, syntax.Perl); err != nil {
|
||||
t.Errorf("Parse(%q) got err:%s, want success", test.re, err)
|
||||
continue
|
||||
}
|
||||
// needs to be done before compile...
|
||||
re = re.Simplify()
|
||||
if p, err = syntax.Compile(re); err != nil {
|
||||
t.Errorf("Compile(%q) got err:%s, want success", test.re, err)
|
||||
continue
|
||||
}
|
||||
isOnePass := compileOnePass(p) != nil
|
||||
if isOnePass != test.isOnePass {
|
||||
t.Errorf("CompileOnePass(%q) got isOnePass=%v, expected %v", test.re, isOnePass, test.isOnePass)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(cespare): Unify with onePassTests and rationalize one-pass test cases.
|
||||
var onePassTests1 = []struct {
|
||||
re string
|
||||
match string
|
||||
}{
|
||||
{`^a(/b+(#c+)*)*$`, "a/b#c"}, // golang.org/issue/11905
|
||||
}
|
||||
|
||||
func TestRunOnePass(t *testing.T) {
|
||||
for _, test := range onePassTests1 {
|
||||
re, err := Compile(test.re)
|
||||
if err != nil {
|
||||
t.Errorf("Compile(%q): got err: %s", test.re, err)
|
||||
continue
|
||||
}
|
||||
if re.onepass == nil {
|
||||
t.Errorf("Compile(%q): got nil, want one-pass", test.re)
|
||||
continue
|
||||
}
|
||||
if !re.MatchString(test.match) {
|
||||
t.Errorf("onepass %q did not match %q", test.re, test.match)
|
||||
}
|
||||
}
|
||||
}
|
||||
1299
src/regexp/regexp.go
Normal file
1299
src/regexp/regexp.go
Normal file
File diff suppressed because it is too large
Load Diff
296
src/regexp/syntax/compile.go
Normal file
296
src/regexp/syntax/compile.go
Normal file
@@ -0,0 +1,296 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package syntax
|
||||
|
||||
import "unicode"
|
||||
|
||||
// A patchList is a list of instruction pointers that need to be filled in (patched).
|
||||
// Because the pointers haven't been filled in yet, we can reuse their storage
|
||||
// to hold the list. It's kind of sleazy, but works well in practice.
|
||||
// See https://swtch.com/~rsc/regexp/regexp1.html for inspiration.
|
||||
//
|
||||
// These aren't really pointers: they're integers, so we can reinterpret them
|
||||
// this way without using package unsafe. A value l.head denotes
|
||||
// p.inst[l.head>>1].Out (l.head&1==0) or .Arg (l.head&1==1).
|
||||
// head == 0 denotes the empty list, okay because we start every program
|
||||
// with a fail instruction, so we'll never want to point at its output link.
|
||||
type patchList struct {
|
||||
head, tail uint32
|
||||
}
|
||||
|
||||
func makePatchList(n uint32) patchList {
|
||||
return patchList{n, n}
|
||||
}
|
||||
|
||||
func (l patchList) patch(p *Prog, val uint32) {
|
||||
head := l.head
|
||||
for head != 0 {
|
||||
i := &p.Inst[head>>1]
|
||||
if head&1 == 0 {
|
||||
head = i.Out
|
||||
i.Out = val
|
||||
} else {
|
||||
head = i.Arg
|
||||
i.Arg = val
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (l1 patchList) append(p *Prog, l2 patchList) patchList {
|
||||
if l1.head == 0 {
|
||||
return l2
|
||||
}
|
||||
if l2.head == 0 {
|
||||
return l1
|
||||
}
|
||||
|
||||
i := &p.Inst[l1.tail>>1]
|
||||
if l1.tail&1 == 0 {
|
||||
i.Out = l2.head
|
||||
} else {
|
||||
i.Arg = l2.head
|
||||
}
|
||||
return patchList{l1.head, l2.tail}
|
||||
}
|
||||
|
||||
// A frag represents a compiled program fragment.
|
||||
type frag struct {
|
||||
i uint32 // index of first instruction
|
||||
out patchList // where to record end instruction
|
||||
nullable bool // whether fragment can match empty string
|
||||
}
|
||||
|
||||
type compiler struct {
|
||||
p *Prog
|
||||
}
|
||||
|
||||
// Compile compiles the regexp into a program to be executed.
|
||||
// The regexp should have been simplified already (returned from re.Simplify).
|
||||
func Compile(re *Regexp) (*Prog, error) {
|
||||
var c compiler
|
||||
c.init()
|
||||
f := c.compile(re)
|
||||
f.out.patch(c.p, c.inst(InstMatch).i)
|
||||
c.p.Start = int(f.i)
|
||||
return c.p, nil
|
||||
}
|
||||
|
||||
func (c *compiler) init() {
|
||||
c.p = new(Prog)
|
||||
c.p.NumCap = 2 // implicit ( and ) for whole match $0
|
||||
c.inst(InstFail)
|
||||
}
|
||||
|
||||
var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
|
||||
var anyRune = []rune{0, unicode.MaxRune}
|
||||
|
||||
func (c *compiler) compile(re *Regexp) frag {
|
||||
switch re.Op {
|
||||
case OpNoMatch:
|
||||
return c.fail()
|
||||
case OpEmptyMatch:
|
||||
return c.nop()
|
||||
case OpLiteral:
|
||||
if len(re.Rune) == 0 {
|
||||
return c.nop()
|
||||
}
|
||||
var f frag
|
||||
for j := range re.Rune {
|
||||
f1 := c.rune(re.Rune[j:j+1], re.Flags)
|
||||
if j == 0 {
|
||||
f = f1
|
||||
} else {
|
||||
f = c.cat(f, f1)
|
||||
}
|
||||
}
|
||||
return f
|
||||
case OpCharClass:
|
||||
return c.rune(re.Rune, re.Flags)
|
||||
case OpAnyCharNotNL:
|
||||
return c.rune(anyRuneNotNL, 0)
|
||||
case OpAnyChar:
|
||||
return c.rune(anyRune, 0)
|
||||
case OpBeginLine:
|
||||
return c.empty(EmptyBeginLine)
|
||||
case OpEndLine:
|
||||
return c.empty(EmptyEndLine)
|
||||
case OpBeginText:
|
||||
return c.empty(EmptyBeginText)
|
||||
case OpEndText:
|
||||
return c.empty(EmptyEndText)
|
||||
case OpWordBoundary:
|
||||
return c.empty(EmptyWordBoundary)
|
||||
case OpNoWordBoundary:
|
||||
return c.empty(EmptyNoWordBoundary)
|
||||
case OpCapture:
|
||||
bra := c.cap(uint32(re.Cap << 1))
|
||||
sub := c.compile(re.Sub[0])
|
||||
ket := c.cap(uint32(re.Cap<<1 | 1))
|
||||
return c.cat(c.cat(bra, sub), ket)
|
||||
case OpStar:
|
||||
return c.star(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
|
||||
case OpPlus:
|
||||
return c.plus(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
|
||||
case OpQuest:
|
||||
return c.quest(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
|
||||
case OpConcat:
|
||||
if len(re.Sub) == 0 {
|
||||
return c.nop()
|
||||
}
|
||||
var f frag
|
||||
for i, sub := range re.Sub {
|
||||
if i == 0 {
|
||||
f = c.compile(sub)
|
||||
} else {
|
||||
f = c.cat(f, c.compile(sub))
|
||||
}
|
||||
}
|
||||
return f
|
||||
case OpAlternate:
|
||||
var f frag
|
||||
for _, sub := range re.Sub {
|
||||
f = c.alt(f, c.compile(sub))
|
||||
}
|
||||
return f
|
||||
}
|
||||
panic("regexp: unhandled case in compile")
|
||||
}
|
||||
|
||||
func (c *compiler) inst(op InstOp) frag {
|
||||
// TODO: impose length limit
|
||||
f := frag{i: uint32(len(c.p.Inst)), nullable: true}
|
||||
c.p.Inst = append(c.p.Inst, Inst{Op: op})
|
||||
return f
|
||||
}
|
||||
|
||||
func (c *compiler) nop() frag {
|
||||
f := c.inst(InstNop)
|
||||
f.out = makePatchList(f.i << 1)
|
||||
return f
|
||||
}
|
||||
|
||||
func (c *compiler) fail() frag {
|
||||
return frag{}
|
||||
}
|
||||
|
||||
func (c *compiler) cap(arg uint32) frag {
|
||||
f := c.inst(InstCapture)
|
||||
f.out = makePatchList(f.i << 1)
|
||||
c.p.Inst[f.i].Arg = arg
|
||||
|
||||
if c.p.NumCap < int(arg)+1 {
|
||||
c.p.NumCap = int(arg) + 1
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
func (c *compiler) cat(f1, f2 frag) frag {
|
||||
// concat of failure is failure
|
||||
if f1.i == 0 || f2.i == 0 {
|
||||
return frag{}
|
||||
}
|
||||
|
||||
// TODO: elide nop
|
||||
|
||||
f1.out.patch(c.p, f2.i)
|
||||
return frag{f1.i, f2.out, f1.nullable && f2.nullable}
|
||||
}
|
||||
|
||||
func (c *compiler) alt(f1, f2 frag) frag {
|
||||
// alt of failure is other
|
||||
if f1.i == 0 {
|
||||
return f2
|
||||
}
|
||||
if f2.i == 0 {
|
||||
return f1
|
||||
}
|
||||
|
||||
f := c.inst(InstAlt)
|
||||
i := &c.p.Inst[f.i]
|
||||
i.Out = f1.i
|
||||
i.Arg = f2.i
|
||||
f.out = f1.out.append(c.p, f2.out)
|
||||
f.nullable = f1.nullable || f2.nullable
|
||||
return f
|
||||
}
|
||||
|
||||
func (c *compiler) quest(f1 frag, nongreedy bool) frag {
|
||||
f := c.inst(InstAlt)
|
||||
i := &c.p.Inst[f.i]
|
||||
if nongreedy {
|
||||
i.Arg = f1.i
|
||||
f.out = makePatchList(f.i << 1)
|
||||
} else {
|
||||
i.Out = f1.i
|
||||
f.out = makePatchList(f.i<<1 | 1)
|
||||
}
|
||||
f.out = f.out.append(c.p, f1.out)
|
||||
return f
|
||||
}
|
||||
|
||||
// loop returns the fragment for the main loop of a plus or star.
|
||||
// For plus, it can be used after changing the entry to f1.i.
|
||||
// For star, it can be used directly when f1 can't match an empty string.
|
||||
// (When f1 can match an empty string, f1* must be implemented as (f1+)?
|
||||
// to get the priority match order correct.)
|
||||
func (c *compiler) loop(f1 frag, nongreedy bool) frag {
|
||||
f := c.inst(InstAlt)
|
||||
i := &c.p.Inst[f.i]
|
||||
if nongreedy {
|
||||
i.Arg = f1.i
|
||||
f.out = makePatchList(f.i << 1)
|
||||
} else {
|
||||
i.Out = f1.i
|
||||
f.out = makePatchList(f.i<<1 | 1)
|
||||
}
|
||||
f1.out.patch(c.p, f.i)
|
||||
return f
|
||||
}
|
||||
|
||||
func (c *compiler) star(f1 frag, nongreedy bool) frag {
|
||||
if f1.nullable {
|
||||
// Use (f1+)? to get priority match order correct.
|
||||
// See golang.org/issue/46123.
|
||||
return c.quest(c.plus(f1, nongreedy), nongreedy)
|
||||
}
|
||||
return c.loop(f1, nongreedy)
|
||||
}
|
||||
|
||||
func (c *compiler) plus(f1 frag, nongreedy bool) frag {
|
||||
return frag{f1.i, c.loop(f1, nongreedy).out, f1.nullable}
|
||||
}
|
||||
|
||||
func (c *compiler) empty(op EmptyOp) frag {
|
||||
f := c.inst(InstEmptyWidth)
|
||||
c.p.Inst[f.i].Arg = uint32(op)
|
||||
f.out = makePatchList(f.i << 1)
|
||||
return f
|
||||
}
|
||||
|
||||
func (c *compiler) rune(r []rune, flags Flags) frag {
|
||||
f := c.inst(InstRune)
|
||||
f.nullable = false
|
||||
i := &c.p.Inst[f.i]
|
||||
i.Rune = r
|
||||
flags &= FoldCase // only relevant flag is FoldCase
|
||||
if len(r) != 1 || unicode.SimpleFold(r[0]) == r[0] {
|
||||
// and sometimes not even that
|
||||
flags &^= FoldCase
|
||||
}
|
||||
i.Arg = uint32(flags)
|
||||
f.out = makePatchList(f.i << 1)
|
||||
|
||||
// Special cases for exec machine.
|
||||
switch {
|
||||
case flags&FoldCase == 0 && (len(r) == 1 || len(r) == 2 && r[0] == r[1]):
|
||||
i.Op = InstRune1
|
||||
case len(r) == 2 && r[0] == 0 && r[1] == unicode.MaxRune:
|
||||
i.Op = InstRuneAny
|
||||
case len(r) == 4 && r[0] == 0 && r[1] == '\n'-1 && r[2] == '\n'+1 && r[3] == unicode.MaxRune:
|
||||
i.Op = InstRuneAnyNotNL
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
142
src/regexp/syntax/doc.go
Normal file
142
src/regexp/syntax/doc.go
Normal file
@@ -0,0 +1,142 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Code generated by mksyntaxgo from the RE2 distribution. DO NOT EDIT.
|
||||
|
||||
/*
|
||||
Package syntax parses regular expressions into parse trees and compiles
|
||||
parse trees into programs. Most clients of regular expressions will use the
|
||||
facilities of package [regexp] (such as [regexp.Compile] and [regexp.Match]) instead of this package.
|
||||
|
||||
# Syntax
|
||||
|
||||
The regular expression syntax understood by this package when parsing with the [Perl] flag is as follows.
|
||||
Parts of the syntax can be disabled by passing alternate flags to [Parse].
|
||||
|
||||
Single characters:
|
||||
|
||||
. any character, possibly including newline (flag s=true)
|
||||
[xyz] character class
|
||||
[^xyz] negated character class
|
||||
\d Perl character class
|
||||
\D negated Perl character class
|
||||
[[:alpha:]] ASCII character class
|
||||
[[:^alpha:]] negated ASCII character class
|
||||
\pN Unicode character class (one-letter name)
|
||||
\p{Greek} Unicode character class
|
||||
\PN negated Unicode character class (one-letter name)
|
||||
\P{Greek} negated Unicode character class
|
||||
|
||||
Composites:
|
||||
|
||||
xy x followed by y
|
||||
x|y x or y (prefer x)
|
||||
|
||||
Repetitions:
|
||||
|
||||
x* zero or more x, prefer more
|
||||
x+ one or more x, prefer more
|
||||
x? zero or one x, prefer one
|
||||
x{n,m} n or n+1 or ... or m x, prefer more
|
||||
x{n,} n or more x, prefer more
|
||||
x{n} exactly n x
|
||||
x*? zero or more x, prefer fewer
|
||||
x+? one or more x, prefer fewer
|
||||
x?? zero or one x, prefer zero
|
||||
x{n,m}? n or n+1 or ... or m x, prefer fewer
|
||||
x{n,}? n or more x, prefer fewer
|
||||
x{n}? exactly n x
|
||||
|
||||
Implementation restriction: The counting forms x{n,m}, x{n,}, and x{n}
|
||||
reject forms that create a minimum or maximum repetition count above 1000.
|
||||
Unlimited repetitions are not subject to this restriction.
|
||||
|
||||
Grouping:
|
||||
|
||||
(re) numbered capturing group (submatch)
|
||||
(?P<name>re) named & numbered capturing group (submatch)
|
||||
(?<name>re) named & numbered capturing group (submatch)
|
||||
(?:re) non-capturing group
|
||||
(?flags) set flags within current group; non-capturing
|
||||
(?flags:re) set flags during re; non-capturing
|
||||
|
||||
Flag syntax is xyz (set) or -xyz (clear) or xy-z (set xy, clear z). The flags are:
|
||||
|
||||
i case-insensitive (default false)
|
||||
m multi-line mode: ^ and $ match begin/end line in addition to begin/end text (default false)
|
||||
s let . match \n (default false)
|
||||
U ungreedy: swap meaning of x* and x*?, x+ and x+?, etc (default false)
|
||||
|
||||
Empty strings:
|
||||
|
||||
^ at beginning of text or line (flag m=true)
|
||||
$ at end of text (like \z not \Z) or line (flag m=true)
|
||||
\A at beginning of text
|
||||
\b at ASCII word boundary (\w on one side and \W, \A, or \z on the other)
|
||||
\B not at ASCII word boundary
|
||||
\z at end of text
|
||||
|
||||
Escape sequences:
|
||||
|
||||
\a bell (== \007)
|
||||
\f form feed (== \014)
|
||||
\t horizontal tab (== \011)
|
||||
\n newline (== \012)
|
||||
\r carriage return (== \015)
|
||||
\v vertical tab character (== \013)
|
||||
\* literal *, for any punctuation character *
|
||||
\123 octal character code (up to three digits)
|
||||
\x7F hex character code (exactly two digits)
|
||||
\x{10FFFF} hex character code
|
||||
\Q...\E literal text ... even if ... has punctuation
|
||||
|
||||
Character class elements:
|
||||
|
||||
x single character
|
||||
A-Z character range (inclusive)
|
||||
\d Perl character class
|
||||
[:foo:] ASCII character class foo
|
||||
\p{Foo} Unicode character class Foo
|
||||
\pF Unicode character class F (one-letter name)
|
||||
|
||||
Named character classes as character class elements:
|
||||
|
||||
[\d] digits (== \d)
|
||||
[^\d] not digits (== \D)
|
||||
[\D] not digits (== \D)
|
||||
[^\D] not not digits (== \d)
|
||||
[[:name:]] named ASCII class inside character class (== [:name:])
|
||||
[^[:name:]] named ASCII class inside negated character class (== [:^name:])
|
||||
[\p{Name}] named Unicode property inside character class (== \p{Name})
|
||||
[^\p{Name}] named Unicode property inside negated character class (== \P{Name})
|
||||
|
||||
Perl character classes (all ASCII-only):
|
||||
|
||||
\d digits (== [0-9])
|
||||
\D not digits (== [^0-9])
|
||||
\s whitespace (== [\t\n\f\r ])
|
||||
\S not whitespace (== [^\t\n\f\r ])
|
||||
\w word characters (== [0-9A-Za-z_])
|
||||
\W not word characters (== [^0-9A-Za-z_])
|
||||
|
||||
ASCII character classes:
|
||||
|
||||
[[:alnum:]] alphanumeric (== [0-9A-Za-z])
|
||||
[[:alpha:]] alphabetic (== [A-Za-z])
|
||||
[[:ascii:]] ASCII (== [\x00-\x7F])
|
||||
[[:blank:]] blank (== [\t ])
|
||||
[[:cntrl:]] control (== [\x00-\x1F\x7F])
|
||||
[[:digit:]] digits (== [0-9])
|
||||
[[:graph:]] graphical (== [!-~] == [A-Za-z0-9!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~])
|
||||
[[:lower:]] lower case (== [a-z])
|
||||
[[:print:]] printable (== [ -~] == [ [:graph:]])
|
||||
[[:punct:]] punctuation (== [!-/:-@[-`{-~])
|
||||
[[:space:]] whitespace (== [\t\n\v\f\r ])
|
||||
[[:upper:]] upper case (== [A-Z])
|
||||
[[:word:]] word characters (== [0-9A-Za-z_])
|
||||
[[:xdigit:]] hex digit (== [0-9A-Fa-f])
|
||||
|
||||
Unicode character classes are those in [unicode.Categories] and [unicode.Scripts].
|
||||
*/
|
||||
package syntax
|
||||
128
src/regexp/syntax/make_perl_groups.pl
Executable file
128
src/regexp/syntax/make_perl_groups.pl
Executable file
@@ -0,0 +1,128 @@
|
||||
#!/usr/bin/perl
|
||||
# Copyright 2008 The Go Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
# Modified version of RE2's make_perl_groups.pl.
|
||||
|
||||
# Generate table entries giving character ranges
|
||||
# for POSIX/Perl character classes. Rather than
|
||||
# figure out what the definition is, it is easier to ask
|
||||
# Perl about each letter from 0-128 and write down
|
||||
# its answer.
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
my @posixclasses = (
|
||||
"[:alnum:]",
|
||||
"[:alpha:]",
|
||||
"[:ascii:]",
|
||||
"[:blank:]",
|
||||
"[:cntrl:]",
|
||||
"[:digit:]",
|
||||
"[:graph:]",
|
||||
"[:lower:]",
|
||||
"[:print:]",
|
||||
"[:punct:]",
|
||||
"[:space:]",
|
||||
"[:upper:]",
|
||||
"[:word:]",
|
||||
"[:xdigit:]",
|
||||
);
|
||||
|
||||
my @perlclasses = (
|
||||
"\\d",
|
||||
"\\s",
|
||||
"\\w",
|
||||
);
|
||||
|
||||
my %overrides = (
|
||||
# Prior to Perl 5.18, \s did not match vertical tab.
|
||||
# RE2 preserves that original behaviour.
|
||||
"\\s:11" => 0,
|
||||
);
|
||||
|
||||
sub ComputeClass($) {
|
||||
my @ranges;
|
||||
my ($class) = @_;
|
||||
my $regexp = "[$class]";
|
||||
my $start = -1;
|
||||
for (my $i=0; $i<=129; $i++) {
|
||||
if ($i == 129) { $i = 256; }
|
||||
if ($i <= 128 && ($overrides{"$class:$i"} // chr($i) =~ $regexp)) {
|
||||
if ($start < 0) {
|
||||
$start = $i;
|
||||
}
|
||||
} else {
|
||||
if ($start >= 0) {
|
||||
push @ranges, [$start, $i-1];
|
||||
}
|
||||
$start = -1;
|
||||
}
|
||||
}
|
||||
return @ranges;
|
||||
}
|
||||
|
||||
sub PrintClass($$@) {
|
||||
my ($cname, $name, @ranges) = @_;
|
||||
print "var code$cname = []rune{ /* $name */\n";
|
||||
for (my $i=0; $i<@ranges; $i++) {
|
||||
my @a = @{$ranges[$i]};
|
||||
printf "\t0x%x, 0x%x,\n", $a[0], $a[1];
|
||||
}
|
||||
print "}\n\n";
|
||||
my $n = @ranges;
|
||||
my $negname = $name;
|
||||
if ($negname =~ /:/) {
|
||||
$negname =~ s/:/:^/;
|
||||
} else {
|
||||
$negname =~ y/a-z/A-Z/;
|
||||
}
|
||||
return "\t`$name`: {+1, code$cname},\n" .
|
||||
"\t`$negname`: {-1, code$cname},\n";
|
||||
}
|
||||
|
||||
my $gen = 0;
|
||||
|
||||
sub PrintClasses($@) {
|
||||
my ($cname, @classes) = @_;
|
||||
my @entries;
|
||||
foreach my $cl (@classes) {
|
||||
my @ranges = ComputeClass($cl);
|
||||
push @entries, PrintClass(++$gen, $cl, @ranges);
|
||||
}
|
||||
print "var ${cname}Group = map[string]charGroup{\n";
|
||||
foreach my $e (@entries) {
|
||||
print $e;
|
||||
}
|
||||
print "}\n";
|
||||
my $count = @entries;
|
||||
}
|
||||
|
||||
# Prepare gofmt command
|
||||
my $gofmt;
|
||||
|
||||
if (@ARGV > 0 && $ARGV[0] =~ /\.go$/) {
|
||||
# Send the output of gofmt to the given file
|
||||
open($gofmt, '|-', 'gofmt >'.$ARGV[0]) or die;
|
||||
} else {
|
||||
open($gofmt, '|-', 'gofmt') or die;
|
||||
}
|
||||
|
||||
# Redirect STDOUT to gofmt input
|
||||
select $gofmt;
|
||||
|
||||
print <<EOF;
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Code generated by make_perl_groups.pl; DO NOT EDIT.
|
||||
|
||||
package syntax
|
||||
|
||||
EOF
|
||||
|
||||
PrintClasses("perl", @perlclasses);
|
||||
PrintClasses("posix", @posixclasses);
|
||||
52
src/regexp/syntax/op_string.go
Normal file
52
src/regexp/syntax/op_string.go
Normal file
@@ -0,0 +1,52 @@
|
||||
// Code generated by "stringer -type Op -trimprefix Op"; DO NOT EDIT.
|
||||
|
||||
package syntax
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[OpNoMatch-1]
|
||||
_ = x[OpEmptyMatch-2]
|
||||
_ = x[OpLiteral-3]
|
||||
_ = x[OpCharClass-4]
|
||||
_ = x[OpAnyCharNotNL-5]
|
||||
_ = x[OpAnyChar-6]
|
||||
_ = x[OpBeginLine-7]
|
||||
_ = x[OpEndLine-8]
|
||||
_ = x[OpBeginText-9]
|
||||
_ = x[OpEndText-10]
|
||||
_ = x[OpWordBoundary-11]
|
||||
_ = x[OpNoWordBoundary-12]
|
||||
_ = x[OpCapture-13]
|
||||
_ = x[OpStar-14]
|
||||
_ = x[OpPlus-15]
|
||||
_ = x[OpQuest-16]
|
||||
_ = x[OpRepeat-17]
|
||||
_ = x[OpConcat-18]
|
||||
_ = x[OpAlternate-19]
|
||||
_ = x[opPseudo-128]
|
||||
}
|
||||
|
||||
const (
|
||||
_Op_name_0 = "NoMatchEmptyMatchLiteralCharClassAnyCharNotNLAnyCharBeginLineEndLineBeginTextEndTextWordBoundaryNoWordBoundaryCaptureStarPlusQuestRepeatConcatAlternate"
|
||||
_Op_name_1 = "opPseudo"
|
||||
)
|
||||
|
||||
var (
|
||||
_Op_index_0 = [...]uint8{0, 7, 17, 24, 33, 45, 52, 61, 68, 77, 84, 96, 110, 117, 121, 125, 130, 136, 142, 151}
|
||||
)
|
||||
|
||||
func (i Op) String() string {
|
||||
switch {
|
||||
case 1 <= i && i <= 19:
|
||||
i -= 1
|
||||
return _Op_name_0[_Op_index_0[i]:_Op_index_0[i+1]]
|
||||
case i == 128:
|
||||
return _Op_name_1
|
||||
default:
|
||||
return "Op(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
}
|
||||
2134
src/regexp/syntax/parse.go
Normal file
2134
src/regexp/syntax/parse.go
Normal file
File diff suppressed because it is too large
Load Diff
628
src/regexp/syntax/parse_test.go
Normal file
628
src/regexp/syntax/parse_test.go
Normal file
@@ -0,0 +1,628 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package syntax
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
type parseTest struct {
|
||||
Regexp string
|
||||
Dump string
|
||||
}
|
||||
|
||||
var parseTests = []parseTest{
|
||||
// Base cases
|
||||
{`a`, `lit{a}`},
|
||||
{`a.`, `cat{lit{a}dot{}}`},
|
||||
{`a.b`, `cat{lit{a}dot{}lit{b}}`},
|
||||
{`ab`, `str{ab}`},
|
||||
{`a.b.c`, `cat{lit{a}dot{}lit{b}dot{}lit{c}}`},
|
||||
{`abc`, `str{abc}`},
|
||||
{`a|^`, `alt{lit{a}bol{}}`},
|
||||
{`a|b`, `cc{0x61-0x62}`},
|
||||
{`(a)`, `cap{lit{a}}`},
|
||||
{`(a)|b`, `alt{cap{lit{a}}lit{b}}`},
|
||||
{`a*`, `star{lit{a}}`},
|
||||
{`a+`, `plus{lit{a}}`},
|
||||
{`a?`, `que{lit{a}}`},
|
||||
{`a{2}`, `rep{2,2 lit{a}}`},
|
||||
{`a{2,3}`, `rep{2,3 lit{a}}`},
|
||||
{`a{2,}`, `rep{2,-1 lit{a}}`},
|
||||
{`a*?`, `nstar{lit{a}}`},
|
||||
{`a+?`, `nplus{lit{a}}`},
|
||||
{`a??`, `nque{lit{a}}`},
|
||||
{`a{2}?`, `nrep{2,2 lit{a}}`},
|
||||
{`a{2,3}?`, `nrep{2,3 lit{a}}`},
|
||||
{`a{2,}?`, `nrep{2,-1 lit{a}}`},
|
||||
// Malformed { } are treated as literals.
|
||||
{`x{1001`, `str{x{1001}`},
|
||||
{`x{9876543210`, `str{x{9876543210}`},
|
||||
{`x{9876543210,`, `str{x{9876543210,}`},
|
||||
{`x{2,1`, `str{x{2,1}`},
|
||||
{`x{1,9876543210`, `str{x{1,9876543210}`},
|
||||
{``, `emp{}`},
|
||||
{`|`, `emp{}`}, // alt{emp{}emp{}} but got factored
|
||||
{`|x|`, `alt{emp{}lit{x}emp{}}`},
|
||||
{`.`, `dot{}`},
|
||||
{`^`, `bol{}`},
|
||||
{`$`, `eol{}`},
|
||||
{`\|`, `lit{|}`},
|
||||
{`\(`, `lit{(}`},
|
||||
{`\)`, `lit{)}`},
|
||||
{`\*`, `lit{*}`},
|
||||
{`\+`, `lit{+}`},
|
||||
{`\?`, `lit{?}`},
|
||||
{`{`, `lit{{}`},
|
||||
{`}`, `lit{}}`},
|
||||
{`\.`, `lit{.}`},
|
||||
{`\^`, `lit{^}`},
|
||||
{`\$`, `lit{$}`},
|
||||
{`\\`, `lit{\}`},
|
||||
{`[ace]`, `cc{0x61 0x63 0x65}`},
|
||||
{`[abc]`, `cc{0x61-0x63}`},
|
||||
{`[a-z]`, `cc{0x61-0x7a}`},
|
||||
{`[a]`, `lit{a}`},
|
||||
{`\-`, `lit{-}`},
|
||||
{`-`, `lit{-}`},
|
||||
{`\_`, `lit{_}`},
|
||||
{`abc`, `str{abc}`},
|
||||
{`abc|def`, `alt{str{abc}str{def}}`},
|
||||
{`abc|def|ghi`, `alt{str{abc}str{def}str{ghi}}`},
|
||||
|
||||
// Posix and Perl extensions
|
||||
{`[[:lower:]]`, `cc{0x61-0x7a}`},
|
||||
{`[a-z]`, `cc{0x61-0x7a}`},
|
||||
{`[^[:lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`},
|
||||
{`[[:^lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`},
|
||||
{`(?i)[[:lower:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
|
||||
{`(?i)[a-z]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
|
||||
{`(?i)[^[:lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
|
||||
{`(?i)[[:^lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
|
||||
{`\d`, `cc{0x30-0x39}`},
|
||||
{`\D`, `cc{0x0-0x2f 0x3a-0x10ffff}`},
|
||||
{`\s`, `cc{0x9-0xa 0xc-0xd 0x20}`},
|
||||
{`\S`, `cc{0x0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}`},
|
||||
{`\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}`},
|
||||
{`\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}`},
|
||||
{`(?i)\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}`},
|
||||
{`(?i)\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
|
||||
{`[^\\]`, `cc{0x0-0x5b 0x5d-0x10ffff}`},
|
||||
// { `\C`, `byte{}` }, // probably never
|
||||
|
||||
// Unicode, negatives, and a double negative.
|
||||
{`\p{Braille}`, `cc{0x2800-0x28ff}`},
|
||||
{`\P{Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
|
||||
{`\p{^Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
|
||||
{`\P{^Braille}`, `cc{0x2800-0x28ff}`},
|
||||
{`\pZ`, `cc{0x20 0xa0 0x1680 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
|
||||
{`[\p{Braille}]`, `cc{0x2800-0x28ff}`},
|
||||
{`[\P{Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
|
||||
{`[\p{^Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
|
||||
{`[\P{^Braille}]`, `cc{0x2800-0x28ff}`},
|
||||
{`[\pZ]`, `cc{0x20 0xa0 0x1680 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
|
||||
{`\p{Lu}`, mkCharClass(unicode.IsUpper)},
|
||||
{`[\p{Lu}]`, mkCharClass(unicode.IsUpper)},
|
||||
{`(?i)[\p{Lu}]`, mkCharClass(isUpperFold)},
|
||||
{`\p{Any}`, `dot{}`},
|
||||
{`\p{^Any}`, `cc{}`},
|
||||
|
||||
// Hex, octal.
|
||||
{`[\012-\234]\141`, `cat{cc{0xa-0x9c}lit{a}}`},
|
||||
{`[\x{41}-\x7a]\x61`, `cat{cc{0x41-0x7a}lit{a}}`},
|
||||
|
||||
// More interesting regular expressions.
|
||||
{`a{,2}`, `str{a{,2}}`},
|
||||
{`\.\^\$\\`, `str{.^$\}`},
|
||||
{`[a-zABC]`, `cc{0x41-0x43 0x61-0x7a}`},
|
||||
{`[^a]`, `cc{0x0-0x60 0x62-0x10ffff}`},
|
||||
{`[α-ε☺]`, `cc{0x3b1-0x3b5 0x263a}`}, // utf-8
|
||||
{`a*{`, `cat{star{lit{a}}lit{{}}`},
|
||||
|
||||
// Test precedences
|
||||
{`(?:ab)*`, `star{str{ab}}`},
|
||||
{`(ab)*`, `star{cap{str{ab}}}`},
|
||||
{`ab|cd`, `alt{str{ab}str{cd}}`},
|
||||
{`a(b|c)d`, `cat{lit{a}cap{cc{0x62-0x63}}lit{d}}`},
|
||||
|
||||
// Test flattening.
|
||||
{`(?:a)`, `lit{a}`},
|
||||
{`(?:ab)(?:cd)`, `str{abcd}`},
|
||||
{`(?:a+b+)(?:c+d+)`, `cat{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`},
|
||||
{`(?:a+|b+)|(?:c+|d+)`, `alt{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`},
|
||||
{`(?:a|b)|(?:c|d)`, `cc{0x61-0x64}`},
|
||||
{`a|.`, `dot{}`},
|
||||
{`.|a`, `dot{}`},
|
||||
{`(?:[abc]|A|Z|hello|world)`, `alt{cc{0x41 0x5a 0x61-0x63}str{hello}str{world}}`},
|
||||
{`(?:[abc]|A|Z)`, `cc{0x41 0x5a 0x61-0x63}`},
|
||||
|
||||
// Test Perl quoted literals
|
||||
{`\Q+|*?{[\E`, `str{+|*?{[}`},
|
||||
{`\Q+\E+`, `plus{lit{+}}`},
|
||||
{`\Qab\E+`, `cat{lit{a}plus{lit{b}}}`},
|
||||
{`\Q\\E`, `lit{\}`},
|
||||
{`\Q\\\E`, `str{\\}`},
|
||||
|
||||
// Test Perl \A and \z
|
||||
{`(?m)^`, `bol{}`},
|
||||
{`(?m)$`, `eol{}`},
|
||||
{`(?-m)^`, `bot{}`},
|
||||
{`(?-m)$`, `eot{}`},
|
||||
{`(?m)\A`, `bot{}`},
|
||||
{`(?m)\z`, `eot{\z}`},
|
||||
{`(?-m)\A`, `bot{}`},
|
||||
{`(?-m)\z`, `eot{\z}`},
|
||||
|
||||
// Test named captures
|
||||
{`(?P<name>a)`, `cap{name:lit{a}}`},
|
||||
{`(?<name>a)`, `cap{name:lit{a}}`},
|
||||
|
||||
// Case-folded literals
|
||||
{`[Aa]`, `litfold{A}`},
|
||||
{`[\x{100}\x{101}]`, `litfold{Ā}`},
|
||||
{`[Δδ]`, `litfold{Δ}`},
|
||||
|
||||
// Strings
|
||||
{`abcde`, `str{abcde}`},
|
||||
{`[Aa][Bb]cd`, `cat{strfold{AB}str{cd}}`},
|
||||
|
||||
// Factoring.
|
||||
{`abc|abd|aef|bcx|bcy`, `alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}cat{str{bc}cc{0x78-0x79}}}`},
|
||||
{`ax+y|ax+z|ay+w`, `cat{lit{a}alt{cat{plus{lit{x}}lit{y}}cat{plus{lit{x}}lit{z}}cat{plus{lit{y}}lit{w}}}}`},
|
||||
|
||||
// Bug fixes.
|
||||
{`(?:.)`, `dot{}`},
|
||||
{`(?:x|(?:xa))`, `cat{lit{x}alt{emp{}lit{a}}}`},
|
||||
{`(?:.|(?:.a))`, `cat{dot{}alt{emp{}lit{a}}}`},
|
||||
{`(?:A(?:A|a))`, `cat{lit{A}litfold{A}}`},
|
||||
{`(?:A|a)`, `litfold{A}`},
|
||||
{`A|(?:A|a)`, `litfold{A}`},
|
||||
{`(?s).`, `dot{}`},
|
||||
{`(?-s).`, `dnl{}`},
|
||||
{`(?:(?:^).)`, `cat{bol{}dot{}}`},
|
||||
{`(?-s)(?:(?:^).)`, `cat{bol{}dnl{}}`},
|
||||
{`[\s\S]a`, `cat{cc{0x0-0x10ffff}lit{a}}`},
|
||||
|
||||
// RE2 prefix_tests
|
||||
{`abc|abd`, `cat{str{ab}cc{0x63-0x64}}`},
|
||||
{`a(?:b)c|abd`, `cat{str{ab}cc{0x63-0x64}}`},
|
||||
{`abc|abd|aef|bcx|bcy`,
|
||||
`alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}` +
|
||||
`cat{str{bc}cc{0x78-0x79}}}`},
|
||||
{`abc|x|abd`, `alt{str{abc}lit{x}str{abd}}`},
|
||||
{`(?i)abc|ABD`, `cat{strfold{AB}cc{0x43-0x44 0x63-0x64}}`},
|
||||
{`[ab]c|[ab]d`, `cat{cc{0x61-0x62}cc{0x63-0x64}}`},
|
||||
{`.c|.d`, `cat{dot{}cc{0x63-0x64}}`},
|
||||
{`x{2}|x{2}[0-9]`,
|
||||
`cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}`},
|
||||
{`x{2}y|x{2}[0-9]y`,
|
||||
`cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}`},
|
||||
{`a.*?c|a.*?b`,
|
||||
`cat{lit{a}alt{cat{nstar{dot{}}lit{c}}cat{nstar{dot{}}lit{b}}}}`},
|
||||
|
||||
// Valid repetitions.
|
||||
{`((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))`, ``},
|
||||
{`((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})`, ``},
|
||||
|
||||
// Valid nesting.
|
||||
{strings.Repeat("(", 999) + strings.Repeat(")", 999), ``},
|
||||
{strings.Repeat("(?:", 999) + strings.Repeat(")*", 999), ``},
|
||||
{"(" + strings.Repeat("|", 12345) + ")", ``}, // not nested at all
|
||||
}
|
||||
|
||||
const testFlags = MatchNL | PerlX | UnicodeGroups
|
||||
|
||||
func TestParseSimple(t *testing.T) {
|
||||
testParseDump(t, parseTests, testFlags)
|
||||
}
|
||||
|
||||
var foldcaseTests = []parseTest{
|
||||
{`AbCdE`, `strfold{ABCDE}`},
|
||||
{`[Aa]`, `litfold{A}`},
|
||||
{`a`, `litfold{A}`},
|
||||
|
||||
// 0x17F is an old English long s (looks like an f) and folds to s.
|
||||
// 0x212A is the Kelvin symbol and folds to k.
|
||||
{`A[F-g]`, `cat{litfold{A}cc{0x41-0x7a 0x17f 0x212a}}`}, // [Aa][A-z...]
|
||||
{`[[:upper:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
|
||||
{`[[:lower:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
|
||||
}
|
||||
|
||||
func TestParseFoldCase(t *testing.T) {
|
||||
testParseDump(t, foldcaseTests, FoldCase)
|
||||
}
|
||||
|
||||
var literalTests = []parseTest{
|
||||
{"(|)^$.[*+?]{5,10},\\", "str{(|)^$.[*+?]{5,10},\\}"},
|
||||
}
|
||||
|
||||
func TestParseLiteral(t *testing.T) {
|
||||
testParseDump(t, literalTests, Literal)
|
||||
}
|
||||
|
||||
var matchnlTests = []parseTest{
|
||||
{`.`, `dot{}`},
|
||||
{"\n", "lit{\n}"},
|
||||
{`[^a]`, `cc{0x0-0x60 0x62-0x10ffff}`},
|
||||
{`[a\n]`, `cc{0xa 0x61}`},
|
||||
}
|
||||
|
||||
func TestParseMatchNL(t *testing.T) {
|
||||
testParseDump(t, matchnlTests, MatchNL)
|
||||
}
|
||||
|
||||
var nomatchnlTests = []parseTest{
|
||||
{`.`, `dnl{}`},
|
||||
{"\n", "lit{\n}"},
|
||||
{`[^a]`, `cc{0x0-0x9 0xb-0x60 0x62-0x10ffff}`},
|
||||
{`[a\n]`, `cc{0xa 0x61}`},
|
||||
}
|
||||
|
||||
func TestParseNoMatchNL(t *testing.T) {
|
||||
testParseDump(t, nomatchnlTests, 0)
|
||||
}
|
||||
|
||||
// Test Parse -> Dump.
|
||||
func testParseDump(t *testing.T, tests []parseTest, flags Flags) {
|
||||
for _, tt := range tests {
|
||||
re, err := Parse(tt.Regexp, flags)
|
||||
if err != nil {
|
||||
t.Errorf("Parse(%#q): %v", tt.Regexp, err)
|
||||
continue
|
||||
}
|
||||
if tt.Dump == "" {
|
||||
// It parsed. That's all we care about.
|
||||
continue
|
||||
}
|
||||
d := dump(re)
|
||||
if d != tt.Dump {
|
||||
t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// dump prints a string representation of the regexp showing
|
||||
// the structure explicitly.
|
||||
func dump(re *Regexp) string {
|
||||
var b strings.Builder
|
||||
dumpRegexp(&b, re)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
var opNames = []string{
|
||||
OpNoMatch: "no",
|
||||
OpEmptyMatch: "emp",
|
||||
OpLiteral: "lit",
|
||||
OpCharClass: "cc",
|
||||
OpAnyCharNotNL: "dnl",
|
||||
OpAnyChar: "dot",
|
||||
OpBeginLine: "bol",
|
||||
OpEndLine: "eol",
|
||||
OpBeginText: "bot",
|
||||
OpEndText: "eot",
|
||||
OpWordBoundary: "wb",
|
||||
OpNoWordBoundary: "nwb",
|
||||
OpCapture: "cap",
|
||||
OpStar: "star",
|
||||
OpPlus: "plus",
|
||||
OpQuest: "que",
|
||||
OpRepeat: "rep",
|
||||
OpConcat: "cat",
|
||||
OpAlternate: "alt",
|
||||
}
|
||||
|
||||
// dumpRegexp writes an encoding of the syntax tree for the regexp re to b.
|
||||
// It is used during testing to distinguish between parses that might print
|
||||
// the same using re's String method.
|
||||
func dumpRegexp(b *strings.Builder, re *Regexp) {
|
||||
if int(re.Op) >= len(opNames) || opNames[re.Op] == "" {
|
||||
fmt.Fprintf(b, "op%d", re.Op)
|
||||
} else {
|
||||
switch re.Op {
|
||||
default:
|
||||
b.WriteString(opNames[re.Op])
|
||||
case OpStar, OpPlus, OpQuest, OpRepeat:
|
||||
if re.Flags&NonGreedy != 0 {
|
||||
b.WriteByte('n')
|
||||
}
|
||||
b.WriteString(opNames[re.Op])
|
||||
case OpLiteral:
|
||||
if len(re.Rune) > 1 {
|
||||
b.WriteString("str")
|
||||
} else {
|
||||
b.WriteString("lit")
|
||||
}
|
||||
if re.Flags&FoldCase != 0 {
|
||||
for _, r := range re.Rune {
|
||||
if unicode.SimpleFold(r) != r {
|
||||
b.WriteString("fold")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
b.WriteByte('{')
|
||||
switch re.Op {
|
||||
case OpEndText:
|
||||
if re.Flags&WasDollar == 0 {
|
||||
b.WriteString(`\z`)
|
||||
}
|
||||
case OpLiteral:
|
||||
for _, r := range re.Rune {
|
||||
b.WriteRune(r)
|
||||
}
|
||||
case OpConcat, OpAlternate:
|
||||
for _, sub := range re.Sub {
|
||||
dumpRegexp(b, sub)
|
||||
}
|
||||
case OpStar, OpPlus, OpQuest:
|
||||
dumpRegexp(b, re.Sub[0])
|
||||
case OpRepeat:
|
||||
fmt.Fprintf(b, "%d,%d ", re.Min, re.Max)
|
||||
dumpRegexp(b, re.Sub[0])
|
||||
case OpCapture:
|
||||
if re.Name != "" {
|
||||
b.WriteString(re.Name)
|
||||
b.WriteByte(':')
|
||||
}
|
||||
dumpRegexp(b, re.Sub[0])
|
||||
case OpCharClass:
|
||||
sep := ""
|
||||
for i := 0; i < len(re.Rune); i += 2 {
|
||||
b.WriteString(sep)
|
||||
sep = " "
|
||||
lo, hi := re.Rune[i], re.Rune[i+1]
|
||||
if lo == hi {
|
||||
fmt.Fprintf(b, "%#x", lo)
|
||||
} else {
|
||||
fmt.Fprintf(b, "%#x-%#x", lo, hi)
|
||||
}
|
||||
}
|
||||
}
|
||||
b.WriteByte('}')
|
||||
}
|
||||
|
||||
func mkCharClass(f func(rune) bool) string {
|
||||
re := &Regexp{Op: OpCharClass}
|
||||
lo := rune(-1)
|
||||
for i := rune(0); i <= unicode.MaxRune; i++ {
|
||||
if f(i) {
|
||||
if lo < 0 {
|
||||
lo = i
|
||||
}
|
||||
} else {
|
||||
if lo >= 0 {
|
||||
re.Rune = append(re.Rune, lo, i-1)
|
||||
lo = -1
|
||||
}
|
||||
}
|
||||
}
|
||||
if lo >= 0 {
|
||||
re.Rune = append(re.Rune, lo, unicode.MaxRune)
|
||||
}
|
||||
return dump(re)
|
||||
}
|
||||
|
||||
func isUpperFold(r rune) bool {
|
||||
if unicode.IsUpper(r) {
|
||||
return true
|
||||
}
|
||||
c := unicode.SimpleFold(r)
|
||||
for c != r {
|
||||
if unicode.IsUpper(c) {
|
||||
return true
|
||||
}
|
||||
c = unicode.SimpleFold(c)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func TestFoldConstants(t *testing.T) {
|
||||
last := rune(-1)
|
||||
for i := rune(0); i <= unicode.MaxRune; i++ {
|
||||
if unicode.SimpleFold(i) == i {
|
||||
continue
|
||||
}
|
||||
if last == -1 && minFold != i {
|
||||
t.Errorf("minFold=%#U should be %#U", minFold, i)
|
||||
}
|
||||
last = i
|
||||
}
|
||||
if maxFold != last {
|
||||
t.Errorf("maxFold=%#U should be %#U", maxFold, last)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAppendRangeCollapse(t *testing.T) {
|
||||
// AppendRange should collapse each of the new ranges
|
||||
// into the earlier ones (it looks back two ranges), so that
|
||||
// the slice never grows very large.
|
||||
// Note that we are not calling cleanClass.
|
||||
var r []rune
|
||||
for i := rune('A'); i <= 'Z'; i++ {
|
||||
r = appendRange(r, i, i)
|
||||
r = appendRange(r, i+'a'-'A', i+'a'-'A')
|
||||
}
|
||||
if string(r) != "AZaz" {
|
||||
t.Errorf("appendRange interlaced A-Z a-z = %s, want AZaz", string(r))
|
||||
}
|
||||
}
|
||||
|
||||
var invalidRegexps = []string{
|
||||
`(`,
|
||||
`)`,
|
||||
`(a`,
|
||||
`a)`,
|
||||
`(a))`,
|
||||
`(a|b|`,
|
||||
`a|b|)`,
|
||||
`(a|b|))`,
|
||||
`(a|b`,
|
||||
`a|b)`,
|
||||
`(a|b))`,
|
||||
`[a-z`,
|
||||
`([a-z)`,
|
||||
`[a-z)`,
|
||||
`([a-z]))`,
|
||||
`x{1001}`,
|
||||
`x{9876543210}`,
|
||||
`x{2,1}`,
|
||||
`x{1,9876543210}`,
|
||||
"\xff", // Invalid UTF-8
|
||||
"[\xff]",
|
||||
"[\\\xff]",
|
||||
"\\\xff",
|
||||
`(?P<name>a`,
|
||||
`(?P<name>`,
|
||||
`(?P<name`,
|
||||
`(?P<x y>a)`,
|
||||
`(?P<>a)`,
|
||||
`(?<name>a`,
|
||||
`(?<name>`,
|
||||
`(?<name`,
|
||||
`(?<x y>a)`,
|
||||
`(?<>a)`,
|
||||
`[a-Z]`,
|
||||
`(?i)[a-Z]`,
|
||||
`\Q\E*`,
|
||||
`a{100000}`, // too much repetition
|
||||
`a{100000,}`, // too much repetition
|
||||
"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})", // too much repetition
|
||||
strings.Repeat("(", 1000) + strings.Repeat(")", 1000), // too deep
|
||||
strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000), // too deep
|
||||
"(" + strings.Repeat("(xx?)", 1000) + "){1000}", // too long
|
||||
strings.Repeat("(xx?){1000}", 1000), // too long
|
||||
strings.Repeat(`\pL`, 27000), // too many runes
|
||||
}
|
||||
|
||||
var onlyPerl = []string{
|
||||
`[a-b-c]`,
|
||||
`\Qabc\E`,
|
||||
`\Q*+?{[\E`,
|
||||
`\Q\\E`,
|
||||
`\Q\\\E`,
|
||||
`\Q\\\\E`,
|
||||
`\Q\\\\\E`,
|
||||
`(?:a)`,
|
||||
`(?P<name>a)`,
|
||||
}
|
||||
|
||||
var onlyPOSIX = []string{
|
||||
"a++",
|
||||
"a**",
|
||||
"a?*",
|
||||
"a+*",
|
||||
"a{1}*",
|
||||
".{1}{2}.{3}",
|
||||
}
|
||||
|
||||
func TestParseInvalidRegexps(t *testing.T) {
|
||||
for _, regexp := range invalidRegexps {
|
||||
if re, err := Parse(regexp, Perl); err == nil {
|
||||
t.Errorf("Parse(%#q, Perl) = %s, should have failed", regexp, dump(re))
|
||||
}
|
||||
if re, err := Parse(regexp, POSIX); err == nil {
|
||||
t.Errorf("Parse(%#q, POSIX) = %s, should have failed", regexp, dump(re))
|
||||
}
|
||||
}
|
||||
for _, regexp := range onlyPerl {
|
||||
if _, err := Parse(regexp, Perl); err != nil {
|
||||
t.Errorf("Parse(%#q, Perl): %v", regexp, err)
|
||||
}
|
||||
if re, err := Parse(regexp, POSIX); err == nil {
|
||||
t.Errorf("Parse(%#q, POSIX) = %s, should have failed", regexp, dump(re))
|
||||
}
|
||||
}
|
||||
for _, regexp := range onlyPOSIX {
|
||||
if re, err := Parse(regexp, Perl); err == nil {
|
||||
t.Errorf("Parse(%#q, Perl) = %s, should have failed", regexp, dump(re))
|
||||
}
|
||||
if _, err := Parse(regexp, POSIX); err != nil {
|
||||
t.Errorf("Parse(%#q, POSIX): %v", regexp, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestToStringEquivalentParse(t *testing.T) {
|
||||
for _, tt := range parseTests {
|
||||
re, err := Parse(tt.Regexp, testFlags)
|
||||
if err != nil {
|
||||
t.Errorf("Parse(%#q): %v", tt.Regexp, err)
|
||||
continue
|
||||
}
|
||||
if tt.Dump == "" {
|
||||
// It parsed. That's all we care about.
|
||||
continue
|
||||
}
|
||||
d := dump(re)
|
||||
if d != tt.Dump {
|
||||
t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
|
||||
continue
|
||||
}
|
||||
|
||||
s := re.String()
|
||||
if s != tt.Regexp {
|
||||
// If ToString didn't return the original regexp,
|
||||
// it must have found one with fewer parens.
|
||||
// Unfortunately we can't check the length here, because
|
||||
// ToString produces "\\{" for a literal brace,
|
||||
// but "{" is a shorter equivalent in some contexts.
|
||||
nre, err := Parse(s, testFlags)
|
||||
if err != nil {
|
||||
t.Errorf("Parse(%#q.String() = %#q): %v", tt.Regexp, s, err)
|
||||
continue
|
||||
}
|
||||
nd := dump(nre)
|
||||
if d != nd {
|
||||
t.Errorf("Parse(%#q) -> %#q; %#q vs %#q", tt.Regexp, s, d, nd)
|
||||
}
|
||||
|
||||
ns := nre.String()
|
||||
if s != ns {
|
||||
t.Errorf("Parse(%#q) -> %#q -> %#q", tt.Regexp, s, ns)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var stringTests = []struct {
|
||||
re string
|
||||
out string
|
||||
}{
|
||||
{`x(?i:ab*c|d?e)1`, `x(?i:AB*C|D?E)1`},
|
||||
{`x(?i:ab*cd?e)1`, `x(?i:AB*CD?E)1`},
|
||||
{`0(?i:ab*c|d?e)1`, `(?i:0(?:AB*C|D?E)1)`},
|
||||
{`0(?i:ab*cd?e)1`, `(?i:0AB*CD?E1)`},
|
||||
{`x(?i:ab*c|d?e)`, `x(?i:AB*C|D?E)`},
|
||||
{`x(?i:ab*cd?e)`, `x(?i:AB*CD?E)`},
|
||||
{`0(?i:ab*c|d?e)`, `(?i:0(?:AB*C|D?E))`},
|
||||
{`0(?i:ab*cd?e)`, `(?i:0AB*CD?E)`},
|
||||
{`(?i:ab*c|d?e)1`, `(?i:(?:AB*C|D?E)1)`},
|
||||
{`(?i:ab*cd?e)1`, `(?i:AB*CD?E1)`},
|
||||
{`(?i:ab)[123](?i:cd)`, `(?i:AB[1-3]CD)`},
|
||||
{`(?i:ab*c|d?e)`, `(?i:AB*C|D?E)`},
|
||||
{`[Aa][Bb]`, `(?i:AB)`},
|
||||
{`[Aa][Bb]*[Cc]`, `(?i:AB*C)`},
|
||||
{`A(?:[Bb][Cc]|[Dd])[Zz]`, `A(?i:(?:BC|D)Z)`},
|
||||
{`[Aa](?:[Bb][Cc]|[Dd])Z`, `(?i:A(?:BC|D))Z`},
|
||||
}
|
||||
|
||||
func TestString(t *testing.T) {
|
||||
for _, tt := range stringTests {
|
||||
re, err := Parse(tt.re, Perl)
|
||||
if err != nil {
|
||||
t.Errorf("Parse(%#q): %v", tt.re, err)
|
||||
continue
|
||||
}
|
||||
out := re.String()
|
||||
if out != tt.out {
|
||||
t.Errorf("Parse(%#q).String() = %#q, want %#q", tt.re, out, tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
133
src/regexp/syntax/perl_groups.go
Normal file
133
src/regexp/syntax/perl_groups.go
Normal file
@@ -0,0 +1,133 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Code generated by make_perl_groups.pl; DO NOT EDIT.
|
||||
|
||||
package syntax
|
||||
|
||||
var code1 = []rune{ /* \d */
|
||||
0x30, 0x39,
|
||||
}
|
||||
|
||||
var code2 = []rune{ /* \s */
|
||||
0x9, 0xa,
|
||||
0xc, 0xd,
|
||||
0x20, 0x20,
|
||||
}
|
||||
|
||||
var code3 = []rune{ /* \w */
|
||||
0x30, 0x39,
|
||||
0x41, 0x5a,
|
||||
0x5f, 0x5f,
|
||||
0x61, 0x7a,
|
||||
}
|
||||
|
||||
var perlGroup = map[string]charGroup{
|
||||
`\d`: {+1, code1},
|
||||
`\D`: {-1, code1},
|
||||
`\s`: {+1, code2},
|
||||
`\S`: {-1, code2},
|
||||
`\w`: {+1, code3},
|
||||
`\W`: {-1, code3},
|
||||
}
|
||||
var code4 = []rune{ /* [:alnum:] */
|
||||
0x30, 0x39,
|
||||
0x41, 0x5a,
|
||||
0x61, 0x7a,
|
||||
}
|
||||
|
||||
var code5 = []rune{ /* [:alpha:] */
|
||||
0x41, 0x5a,
|
||||
0x61, 0x7a,
|
||||
}
|
||||
|
||||
var code6 = []rune{ /* [:ascii:] */
|
||||
0x0, 0x7f,
|
||||
}
|
||||
|
||||
var code7 = []rune{ /* [:blank:] */
|
||||
0x9, 0x9,
|
||||
0x20, 0x20,
|
||||
}
|
||||
|
||||
var code8 = []rune{ /* [:cntrl:] */
|
||||
0x0, 0x1f,
|
||||
0x7f, 0x7f,
|
||||
}
|
||||
|
||||
var code9 = []rune{ /* [:digit:] */
|
||||
0x30, 0x39,
|
||||
}
|
||||
|
||||
var code10 = []rune{ /* [:graph:] */
|
||||
0x21, 0x7e,
|
||||
}
|
||||
|
||||
var code11 = []rune{ /* [:lower:] */
|
||||
0x61, 0x7a,
|
||||
}
|
||||
|
||||
var code12 = []rune{ /* [:print:] */
|
||||
0x20, 0x7e,
|
||||
}
|
||||
|
||||
var code13 = []rune{ /* [:punct:] */
|
||||
0x21, 0x2f,
|
||||
0x3a, 0x40,
|
||||
0x5b, 0x60,
|
||||
0x7b, 0x7e,
|
||||
}
|
||||
|
||||
var code14 = []rune{ /* [:space:] */
|
||||
0x9, 0xd,
|
||||
0x20, 0x20,
|
||||
}
|
||||
|
||||
var code15 = []rune{ /* [:upper:] */
|
||||
0x41, 0x5a,
|
||||
}
|
||||
|
||||
var code16 = []rune{ /* [:word:] */
|
||||
0x30, 0x39,
|
||||
0x41, 0x5a,
|
||||
0x5f, 0x5f,
|
||||
0x61, 0x7a,
|
||||
}
|
||||
|
||||
var code17 = []rune{ /* [:xdigit:] */
|
||||
0x30, 0x39,
|
||||
0x41, 0x46,
|
||||
0x61, 0x66,
|
||||
}
|
||||
|
||||
var posixGroup = map[string]charGroup{
|
||||
`[:alnum:]`: {+1, code4},
|
||||
`[:^alnum:]`: {-1, code4},
|
||||
`[:alpha:]`: {+1, code5},
|
||||
`[:^alpha:]`: {-1, code5},
|
||||
`[:ascii:]`: {+1, code6},
|
||||
`[:^ascii:]`: {-1, code6},
|
||||
`[:blank:]`: {+1, code7},
|
||||
`[:^blank:]`: {-1, code7},
|
||||
`[:cntrl:]`: {+1, code8},
|
||||
`[:^cntrl:]`: {-1, code8},
|
||||
`[:digit:]`: {+1, code9},
|
||||
`[:^digit:]`: {-1, code9},
|
||||
`[:graph:]`: {+1, code10},
|
||||
`[:^graph:]`: {-1, code10},
|
||||
`[:lower:]`: {+1, code11},
|
||||
`[:^lower:]`: {-1, code11},
|
||||
`[:print:]`: {+1, code12},
|
||||
`[:^print:]`: {-1, code12},
|
||||
`[:punct:]`: {+1, code13},
|
||||
`[:^punct:]`: {-1, code13},
|
||||
`[:space:]`: {+1, code14},
|
||||
`[:^space:]`: {-1, code14},
|
||||
`[:upper:]`: {+1, code15},
|
||||
`[:^upper:]`: {-1, code15},
|
||||
`[:word:]`: {+1, code16},
|
||||
`[:^word:]`: {-1, code16},
|
||||
`[:xdigit:]`: {+1, code17},
|
||||
`[:^xdigit:]`: {-1, code17},
|
||||
}
|
||||
349
src/regexp/syntax/prog.go
Normal file
349
src/regexp/syntax/prog.go
Normal file
@@ -0,0 +1,349 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package syntax
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Compiled program.
|
||||
// May not belong in this package, but convenient for now.
|
||||
|
||||
// A Prog is a compiled regular expression program.
|
||||
type Prog struct {
|
||||
Inst []Inst
|
||||
Start int // index of start instruction
|
||||
NumCap int // number of InstCapture insts in re
|
||||
}
|
||||
|
||||
// An InstOp is an instruction opcode.
|
||||
type InstOp uint8
|
||||
|
||||
const (
|
||||
InstAlt InstOp = iota
|
||||
InstAltMatch
|
||||
InstCapture
|
||||
InstEmptyWidth
|
||||
InstMatch
|
||||
InstFail
|
||||
InstNop
|
||||
InstRune
|
||||
InstRune1
|
||||
InstRuneAny
|
||||
InstRuneAnyNotNL
|
||||
)
|
||||
|
||||
var instOpNames = []string{
|
||||
"InstAlt",
|
||||
"InstAltMatch",
|
||||
"InstCapture",
|
||||
"InstEmptyWidth",
|
||||
"InstMatch",
|
||||
"InstFail",
|
||||
"InstNop",
|
||||
"InstRune",
|
||||
"InstRune1",
|
||||
"InstRuneAny",
|
||||
"InstRuneAnyNotNL",
|
||||
}
|
||||
|
||||
func (i InstOp) String() string {
|
||||
if uint(i) >= uint(len(instOpNames)) {
|
||||
return ""
|
||||
}
|
||||
return instOpNames[i]
|
||||
}
|
||||
|
||||
// An EmptyOp specifies a kind or mixture of zero-width assertions.
|
||||
type EmptyOp uint8
|
||||
|
||||
const (
|
||||
EmptyBeginLine EmptyOp = 1 << iota
|
||||
EmptyEndLine
|
||||
EmptyBeginText
|
||||
EmptyEndText
|
||||
EmptyWordBoundary
|
||||
EmptyNoWordBoundary
|
||||
)
|
||||
|
||||
// EmptyOpContext returns the zero-width assertions
|
||||
// satisfied at the position between the runes r1 and r2.
|
||||
// Passing r1 == -1 indicates that the position is
|
||||
// at the beginning of the text.
|
||||
// Passing r2 == -1 indicates that the position is
|
||||
// at the end of the text.
|
||||
func EmptyOpContext(r1, r2 rune) EmptyOp {
|
||||
var op EmptyOp = EmptyNoWordBoundary
|
||||
var boundary byte
|
||||
switch {
|
||||
case IsWordChar(r1):
|
||||
boundary = 1
|
||||
case r1 == '\n':
|
||||
op |= EmptyBeginLine
|
||||
case r1 < 0:
|
||||
op |= EmptyBeginText | EmptyBeginLine
|
||||
}
|
||||
switch {
|
||||
case IsWordChar(r2):
|
||||
boundary ^= 1
|
||||
case r2 == '\n':
|
||||
op |= EmptyEndLine
|
||||
case r2 < 0:
|
||||
op |= EmptyEndText | EmptyEndLine
|
||||
}
|
||||
if boundary != 0 { // IsWordChar(r1) != IsWordChar(r2)
|
||||
op ^= (EmptyWordBoundary | EmptyNoWordBoundary)
|
||||
}
|
||||
return op
|
||||
}
|
||||
|
||||
// IsWordChar reports whether r is considered a “word character”
|
||||
// during the evaluation of the \b and \B zero-width assertions.
|
||||
// These assertions are ASCII-only: the word characters are [A-Za-z0-9_].
|
||||
func IsWordChar(r rune) bool {
|
||||
// Test for lowercase letters first, as these occur more
|
||||
// frequently than uppercase letters in common cases.
|
||||
return 'a' <= r && r <= 'z' || 'A' <= r && r <= 'Z' || '0' <= r && r <= '9' || r == '_'
|
||||
}
|
||||
|
||||
// An Inst is a single instruction in a regular expression program.
|
||||
type Inst struct {
|
||||
Op InstOp
|
||||
Out uint32 // all but InstMatch, InstFail
|
||||
Arg uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth
|
||||
Rune []rune
|
||||
}
|
||||
|
||||
func (p *Prog) String() string {
|
||||
var b strings.Builder
|
||||
dumpProg(&b, p)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// skipNop follows any no-op or capturing instructions.
|
||||
func (p *Prog) skipNop(pc uint32) *Inst {
|
||||
i := &p.Inst[pc]
|
||||
for i.Op == InstNop || i.Op == InstCapture {
|
||||
i = &p.Inst[i.Out]
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// op returns i.Op but merges all the Rune special cases into InstRune
|
||||
func (i *Inst) op() InstOp {
|
||||
op := i.Op
|
||||
switch op {
|
||||
case InstRune1, InstRuneAny, InstRuneAnyNotNL:
|
||||
op = InstRune
|
||||
}
|
||||
return op
|
||||
}
|
||||
|
||||
// Prefix returns a literal string that all matches for the
|
||||
// regexp must start with. Complete is true if the prefix
|
||||
// is the entire match.
|
||||
func (p *Prog) Prefix() (prefix string, complete bool) {
|
||||
i := p.skipNop(uint32(p.Start))
|
||||
|
||||
// Avoid allocation of buffer if prefix is empty.
|
||||
if i.op() != InstRune || len(i.Rune) != 1 {
|
||||
return "", i.Op == InstMatch
|
||||
}
|
||||
|
||||
// Have prefix; gather characters.
|
||||
var buf strings.Builder
|
||||
for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 && i.Rune[0] != utf8.RuneError {
|
||||
buf.WriteRune(i.Rune[0])
|
||||
i = p.skipNop(i.Out)
|
||||
}
|
||||
return buf.String(), i.Op == InstMatch
|
||||
}
|
||||
|
||||
// StartCond returns the leading empty-width conditions that must
|
||||
// be true in any match. It returns ^EmptyOp(0) if no matches are possible.
|
||||
func (p *Prog) StartCond() EmptyOp {
|
||||
var flag EmptyOp
|
||||
pc := uint32(p.Start)
|
||||
i := &p.Inst[pc]
|
||||
Loop:
|
||||
for {
|
||||
switch i.Op {
|
||||
case InstEmptyWidth:
|
||||
flag |= EmptyOp(i.Arg)
|
||||
case InstFail:
|
||||
return ^EmptyOp(0)
|
||||
case InstCapture, InstNop:
|
||||
// skip
|
||||
default:
|
||||
break Loop
|
||||
}
|
||||
pc = i.Out
|
||||
i = &p.Inst[pc]
|
||||
}
|
||||
return flag
|
||||
}
|
||||
|
||||
const noMatch = -1
|
||||
|
||||
// MatchRune reports whether the instruction matches (and consumes) r.
|
||||
// It should only be called when i.Op == [InstRune].
|
||||
func (i *Inst) MatchRune(r rune) bool {
|
||||
return i.MatchRunePos(r) != noMatch
|
||||
}
|
||||
|
||||
// MatchRunePos checks whether the instruction matches (and consumes) r.
|
||||
// If so, MatchRunePos returns the index of the matching rune pair
|
||||
// (or, when len(i.Rune) == 1, rune singleton).
|
||||
// If not, MatchRunePos returns -1.
|
||||
// MatchRunePos should only be called when i.Op == [InstRune].
|
||||
func (i *Inst) MatchRunePos(r rune) int {
|
||||
rune := i.Rune
|
||||
|
||||
switch len(rune) {
|
||||
case 0:
|
||||
return noMatch
|
||||
|
||||
case 1:
|
||||
// Special case: single-rune slice is from literal string, not char class.
|
||||
r0 := rune[0]
|
||||
if r == r0 {
|
||||
return 0
|
||||
}
|
||||
if Flags(i.Arg)&FoldCase != 0 {
|
||||
for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
|
||||
if r == r1 {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
}
|
||||
return noMatch
|
||||
|
||||
case 2:
|
||||
if r >= rune[0] && r <= rune[1] {
|
||||
return 0
|
||||
}
|
||||
return noMatch
|
||||
|
||||
case 4, 6, 8:
|
||||
// Linear search for a few pairs.
|
||||
// Should handle ASCII well.
|
||||
for j := 0; j < len(rune); j += 2 {
|
||||
if r < rune[j] {
|
||||
return noMatch
|
||||
}
|
||||
if r <= rune[j+1] {
|
||||
return j / 2
|
||||
}
|
||||
}
|
||||
return noMatch
|
||||
}
|
||||
|
||||
// Otherwise binary search.
|
||||
lo := 0
|
||||
hi := len(rune) / 2
|
||||
for lo < hi {
|
||||
m := int(uint(lo+hi) >> 1)
|
||||
if c := rune[2*m]; c <= r {
|
||||
if r <= rune[2*m+1] {
|
||||
return m
|
||||
}
|
||||
lo = m + 1
|
||||
} else {
|
||||
hi = m
|
||||
}
|
||||
}
|
||||
return noMatch
|
||||
}
|
||||
|
||||
// MatchEmptyWidth reports whether the instruction matches
|
||||
// an empty string between the runes before and after.
|
||||
// It should only be called when i.Op == [InstEmptyWidth].
|
||||
func (i *Inst) MatchEmptyWidth(before rune, after rune) bool {
|
||||
switch EmptyOp(i.Arg) {
|
||||
case EmptyBeginLine:
|
||||
return before == '\n' || before == -1
|
||||
case EmptyEndLine:
|
||||
return after == '\n' || after == -1
|
||||
case EmptyBeginText:
|
||||
return before == -1
|
||||
case EmptyEndText:
|
||||
return after == -1
|
||||
case EmptyWordBoundary:
|
||||
return IsWordChar(before) != IsWordChar(after)
|
||||
case EmptyNoWordBoundary:
|
||||
return IsWordChar(before) == IsWordChar(after)
|
||||
}
|
||||
panic("unknown empty width arg")
|
||||
}
|
||||
|
||||
func (i *Inst) String() string {
|
||||
var b strings.Builder
|
||||
dumpInst(&b, i)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func bw(b *strings.Builder, args ...string) {
|
||||
for _, s := range args {
|
||||
b.WriteString(s)
|
||||
}
|
||||
}
|
||||
|
||||
func dumpProg(b *strings.Builder, p *Prog) {
|
||||
for j := range p.Inst {
|
||||
i := &p.Inst[j]
|
||||
pc := strconv.Itoa(j)
|
||||
if len(pc) < 3 {
|
||||
b.WriteString(" "[len(pc):])
|
||||
}
|
||||
if j == p.Start {
|
||||
pc += "*"
|
||||
}
|
||||
bw(b, pc, "\t")
|
||||
dumpInst(b, i)
|
||||
bw(b, "\n")
|
||||
}
|
||||
}
|
||||
|
||||
func u32(i uint32) string {
|
||||
return strconv.FormatUint(uint64(i), 10)
|
||||
}
|
||||
|
||||
func dumpInst(b *strings.Builder, i *Inst) {
|
||||
switch i.Op {
|
||||
case InstAlt:
|
||||
bw(b, "alt -> ", u32(i.Out), ", ", u32(i.Arg))
|
||||
case InstAltMatch:
|
||||
bw(b, "altmatch -> ", u32(i.Out), ", ", u32(i.Arg))
|
||||
case InstCapture:
|
||||
bw(b, "cap ", u32(i.Arg), " -> ", u32(i.Out))
|
||||
case InstEmptyWidth:
|
||||
bw(b, "empty ", u32(i.Arg), " -> ", u32(i.Out))
|
||||
case InstMatch:
|
||||
bw(b, "match")
|
||||
case InstFail:
|
||||
bw(b, "fail")
|
||||
case InstNop:
|
||||
bw(b, "nop -> ", u32(i.Out))
|
||||
case InstRune:
|
||||
if i.Rune == nil {
|
||||
// shouldn't happen
|
||||
bw(b, "rune <nil>")
|
||||
}
|
||||
bw(b, "rune ", strconv.QuoteToASCII(string(i.Rune)))
|
||||
if Flags(i.Arg)&FoldCase != 0 {
|
||||
bw(b, "/i")
|
||||
}
|
||||
bw(b, " -> ", u32(i.Out))
|
||||
case InstRune1:
|
||||
bw(b, "rune1 ", strconv.QuoteToASCII(string(i.Rune)), " -> ", u32(i.Out))
|
||||
case InstRuneAny:
|
||||
bw(b, "any -> ", u32(i.Out))
|
||||
case InstRuneAnyNotNL:
|
||||
bw(b, "anynotnl -> ", u32(i.Out))
|
||||
}
|
||||
}
|
||||
144
src/regexp/syntax/prog_test.go
Normal file
144
src/regexp/syntax/prog_test.go
Normal file
@@ -0,0 +1,144 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package syntax
|
||||
|
||||
import "testing"
|
||||
|
||||
var compileTests = []struct {
|
||||
Regexp string
|
||||
Prog string
|
||||
}{
|
||||
{"a", ` 0 fail
|
||||
1* rune1 "a" -> 2
|
||||
2 match
|
||||
`},
|
||||
{"[A-M][n-z]", ` 0 fail
|
||||
1* rune "AM" -> 2
|
||||
2 rune "nz" -> 3
|
||||
3 match
|
||||
`},
|
||||
{"", ` 0 fail
|
||||
1* nop -> 2
|
||||
2 match
|
||||
`},
|
||||
{"a?", ` 0 fail
|
||||
1 rune1 "a" -> 3
|
||||
2* alt -> 1, 3
|
||||
3 match
|
||||
`},
|
||||
{"a??", ` 0 fail
|
||||
1 rune1 "a" -> 3
|
||||
2* alt -> 3, 1
|
||||
3 match
|
||||
`},
|
||||
{"a+", ` 0 fail
|
||||
1* rune1 "a" -> 2
|
||||
2 alt -> 1, 3
|
||||
3 match
|
||||
`},
|
||||
{"a+?", ` 0 fail
|
||||
1* rune1 "a" -> 2
|
||||
2 alt -> 3, 1
|
||||
3 match
|
||||
`},
|
||||
{"a*", ` 0 fail
|
||||
1 rune1 "a" -> 2
|
||||
2* alt -> 1, 3
|
||||
3 match
|
||||
`},
|
||||
{"a*?", ` 0 fail
|
||||
1 rune1 "a" -> 2
|
||||
2* alt -> 3, 1
|
||||
3 match
|
||||
`},
|
||||
{"a+b+", ` 0 fail
|
||||
1* rune1 "a" -> 2
|
||||
2 alt -> 1, 3
|
||||
3 rune1 "b" -> 4
|
||||
4 alt -> 3, 5
|
||||
5 match
|
||||
`},
|
||||
{"(a+)(b+)", ` 0 fail
|
||||
1* cap 2 -> 2
|
||||
2 rune1 "a" -> 3
|
||||
3 alt -> 2, 4
|
||||
4 cap 3 -> 5
|
||||
5 cap 4 -> 6
|
||||
6 rune1 "b" -> 7
|
||||
7 alt -> 6, 8
|
||||
8 cap 5 -> 9
|
||||
9 match
|
||||
`},
|
||||
{"a+|b+", ` 0 fail
|
||||
1 rune1 "a" -> 2
|
||||
2 alt -> 1, 6
|
||||
3 rune1 "b" -> 4
|
||||
4 alt -> 3, 6
|
||||
5* alt -> 1, 3
|
||||
6 match
|
||||
`},
|
||||
{"A[Aa]", ` 0 fail
|
||||
1* rune1 "A" -> 2
|
||||
2 rune "A"/i -> 3
|
||||
3 match
|
||||
`},
|
||||
{"(?:(?:^).)", ` 0 fail
|
||||
1* empty 4 -> 2
|
||||
2 anynotnl -> 3
|
||||
3 match
|
||||
`},
|
||||
{"(?:|a)+", ` 0 fail
|
||||
1 nop -> 4
|
||||
2 rune1 "a" -> 4
|
||||
3* alt -> 1, 2
|
||||
4 alt -> 3, 5
|
||||
5 match
|
||||
`},
|
||||
{"(?:|a)*", ` 0 fail
|
||||
1 nop -> 4
|
||||
2 rune1 "a" -> 4
|
||||
3 alt -> 1, 2
|
||||
4 alt -> 3, 6
|
||||
5* alt -> 3, 6
|
||||
6 match
|
||||
`},
|
||||
}
|
||||
|
||||
func TestCompile(t *testing.T) {
|
||||
for _, tt := range compileTests {
|
||||
re, _ := Parse(tt.Regexp, Perl)
|
||||
p, _ := Compile(re)
|
||||
s := p.String()
|
||||
if s != tt.Prog {
|
||||
t.Errorf("compiled %#q:\n--- have\n%s---\n--- want\n%s---", tt.Regexp, s, tt.Prog)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkEmptyOpContext(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
var r1 rune = -1
|
||||
for _, r2 := range "foo, bar, baz\nsome input text.\n" {
|
||||
EmptyOpContext(r1, r2)
|
||||
r1 = r2
|
||||
}
|
||||
EmptyOpContext(r1, -1)
|
||||
}
|
||||
}
|
||||
|
||||
var sink any
|
||||
|
||||
func BenchmarkIsWordChar(b *testing.B) {
|
||||
const chars = "Don't communicate by sharing memory, share memory by communicating."
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, r := range chars {
|
||||
sink = IsWordChar(r)
|
||||
}
|
||||
}
|
||||
if sink == nil {
|
||||
b.Fatal("Benchmark did not run")
|
||||
}
|
||||
sink = nil
|
||||
}
|
||||
464
src/regexp/syntax/regexp.go
Normal file
464
src/regexp/syntax/regexp.go
Normal file
@@ -0,0 +1,464 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package syntax
|
||||
|
||||
// Note to implementers:
|
||||
// In this package, re is always a *Regexp and r is always a rune.
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// A Regexp is a node in a regular expression syntax tree.
|
||||
type Regexp struct {
|
||||
Op Op // operator
|
||||
Flags Flags
|
||||
Sub []*Regexp // subexpressions, if any
|
||||
Sub0 [1]*Regexp // storage for short Sub
|
||||
Rune []rune // matched runes, for OpLiteral, OpCharClass
|
||||
Rune0 [2]rune // storage for short Rune
|
||||
Min, Max int // min, max for OpRepeat
|
||||
Cap int // capturing index, for OpCapture
|
||||
Name string // capturing name, for OpCapture
|
||||
}
|
||||
|
||||
//go:generate stringer -type Op -trimprefix Op
|
||||
|
||||
// An Op is a single regular expression operator.
|
||||
type Op uint8
|
||||
|
||||
// Operators are listed in precedence order, tightest binding to weakest.
|
||||
// Character class operators are listed simplest to most complex
|
||||
// (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar).
|
||||
|
||||
const (
|
||||
OpNoMatch Op = 1 + iota // matches no strings
|
||||
OpEmptyMatch // matches empty string
|
||||
OpLiteral // matches Runes sequence
|
||||
OpCharClass // matches Runes interpreted as range pair list
|
||||
OpAnyCharNotNL // matches any character except newline
|
||||
OpAnyChar // matches any character
|
||||
OpBeginLine // matches empty string at beginning of line
|
||||
OpEndLine // matches empty string at end of line
|
||||
OpBeginText // matches empty string at beginning of text
|
||||
OpEndText // matches empty string at end of text
|
||||
OpWordBoundary // matches word boundary `\b`
|
||||
OpNoWordBoundary // matches word non-boundary `\B`
|
||||
OpCapture // capturing subexpression with index Cap, optional name Name
|
||||
OpStar // matches Sub[0] zero or more times
|
||||
OpPlus // matches Sub[0] one or more times
|
||||
OpQuest // matches Sub[0] zero or one times
|
||||
OpRepeat // matches Sub[0] at least Min times, at most Max (Max == -1 is no limit)
|
||||
OpConcat // matches concatenation of Subs
|
||||
OpAlternate // matches alternation of Subs
|
||||
)
|
||||
|
||||
const opPseudo Op = 128 // where pseudo-ops start
|
||||
|
||||
// Equal reports whether x and y have identical structure.
|
||||
func (x *Regexp) Equal(y *Regexp) bool {
|
||||
if x == nil || y == nil {
|
||||
return x == y
|
||||
}
|
||||
if x.Op != y.Op {
|
||||
return false
|
||||
}
|
||||
switch x.Op {
|
||||
case OpEndText:
|
||||
// The parse flags remember whether this is \z or \Z.
|
||||
if x.Flags&WasDollar != y.Flags&WasDollar {
|
||||
return false
|
||||
}
|
||||
|
||||
case OpLiteral, OpCharClass:
|
||||
return slices.Equal(x.Rune, y.Rune)
|
||||
|
||||
case OpAlternate, OpConcat:
|
||||
return slices.EqualFunc(x.Sub, y.Sub, (*Regexp).Equal)
|
||||
|
||||
case OpStar, OpPlus, OpQuest:
|
||||
if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) {
|
||||
return false
|
||||
}
|
||||
|
||||
case OpRepeat:
|
||||
if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) {
|
||||
return false
|
||||
}
|
||||
|
||||
case OpCapture:
|
||||
if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// printFlags is a bit set indicating which flags (including non-capturing parens) to print around a regexp.
|
||||
type printFlags uint8
|
||||
|
||||
const (
|
||||
flagI printFlags = 1 << iota // (?i:
|
||||
flagM // (?m:
|
||||
flagS // (?s:
|
||||
flagOff // )
|
||||
flagPrec // (?: )
|
||||
negShift = 5 // flagI<<negShift is (?-i:
|
||||
)
|
||||
|
||||
// addSpan enables the flags f around start..last,
|
||||
// by setting flags[start] = f and flags[last] = flagOff.
|
||||
func addSpan(start, last *Regexp, f printFlags, flags *map[*Regexp]printFlags) {
|
||||
if *flags == nil {
|
||||
*flags = make(map[*Regexp]printFlags)
|
||||
}
|
||||
(*flags)[start] = f
|
||||
(*flags)[last] |= flagOff // maybe start==last
|
||||
}
|
||||
|
||||
// calcFlags calculates the flags to print around each subexpression in re,
|
||||
// storing that information in (*flags)[sub] for each affected subexpression.
|
||||
// The first time an entry needs to be written to *flags, calcFlags allocates the map.
|
||||
// calcFlags also calculates the flags that must be active or can't be active
|
||||
// around re and returns those flags.
|
||||
func calcFlags(re *Regexp, flags *map[*Regexp]printFlags) (must, cant printFlags) {
|
||||
switch re.Op {
|
||||
default:
|
||||
return 0, 0
|
||||
|
||||
case OpLiteral:
|
||||
// If literal is fold-sensitive, return (flagI, 0) or (0, flagI)
|
||||
// according to whether (?i) is active.
|
||||
// If literal is not fold-sensitive, return 0, 0.
|
||||
for _, r := range re.Rune {
|
||||
if minFold <= r && r <= maxFold && unicode.SimpleFold(r) != r {
|
||||
if re.Flags&FoldCase != 0 {
|
||||
return flagI, 0
|
||||
} else {
|
||||
return 0, flagI
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0, 0
|
||||
|
||||
case OpCharClass:
|
||||
// If literal is fold-sensitive, return 0, flagI - (?i) has been compiled out.
|
||||
// If literal is not fold-sensitive, return 0, 0.
|
||||
for i := 0; i < len(re.Rune); i += 2 {
|
||||
lo := max(minFold, re.Rune[i])
|
||||
hi := min(maxFold, re.Rune[i+1])
|
||||
for r := lo; r <= hi; r++ {
|
||||
for f := unicode.SimpleFold(r); f != r; f = unicode.SimpleFold(f) {
|
||||
if !(lo <= f && f <= hi) && !inCharClass(f, re.Rune) {
|
||||
return 0, flagI
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0, 0
|
||||
|
||||
case OpAnyCharNotNL: // (?-s).
|
||||
return 0, flagS
|
||||
|
||||
case OpAnyChar: // (?s).
|
||||
return flagS, 0
|
||||
|
||||
case OpBeginLine, OpEndLine: // (?m)^ (?m)$
|
||||
return flagM, 0
|
||||
|
||||
case OpEndText:
|
||||
if re.Flags&WasDollar != 0 { // (?-m)$
|
||||
return 0, flagM
|
||||
}
|
||||
return 0, 0
|
||||
|
||||
case OpCapture, OpStar, OpPlus, OpQuest, OpRepeat:
|
||||
return calcFlags(re.Sub[0], flags)
|
||||
|
||||
case OpConcat, OpAlternate:
|
||||
// Gather the must and cant for each subexpression.
|
||||
// When we find a conflicting subexpression, insert the necessary
|
||||
// flags around the previously identified span and start over.
|
||||
var must, cant, allCant printFlags
|
||||
start := 0
|
||||
last := 0
|
||||
did := false
|
||||
for i, sub := range re.Sub {
|
||||
subMust, subCant := calcFlags(sub, flags)
|
||||
if must&subCant != 0 || subMust&cant != 0 {
|
||||
if must != 0 {
|
||||
addSpan(re.Sub[start], re.Sub[last], must, flags)
|
||||
}
|
||||
must = 0
|
||||
cant = 0
|
||||
start = i
|
||||
did = true
|
||||
}
|
||||
must |= subMust
|
||||
cant |= subCant
|
||||
allCant |= subCant
|
||||
if subMust != 0 {
|
||||
last = i
|
||||
}
|
||||
if must == 0 && start == i {
|
||||
start++
|
||||
}
|
||||
}
|
||||
if !did {
|
||||
// No conflicts: pass the accumulated must and cant upward.
|
||||
return must, cant
|
||||
}
|
||||
if must != 0 {
|
||||
// Conflicts found; need to finish final span.
|
||||
addSpan(re.Sub[start], re.Sub[last], must, flags)
|
||||
}
|
||||
return 0, allCant
|
||||
}
|
||||
}
|
||||
|
||||
// writeRegexp writes the Perl syntax for the regular expression re to b.
|
||||
func writeRegexp(b *strings.Builder, re *Regexp, f printFlags, flags map[*Regexp]printFlags) {
|
||||
f |= flags[re]
|
||||
if f&flagPrec != 0 && f&^(flagOff|flagPrec) != 0 && f&flagOff != 0 {
|
||||
// flagPrec is redundant with other flags being added and terminated
|
||||
f &^= flagPrec
|
||||
}
|
||||
if f&^(flagOff|flagPrec) != 0 {
|
||||
b.WriteString(`(?`)
|
||||
if f&flagI != 0 {
|
||||
b.WriteString(`i`)
|
||||
}
|
||||
if f&flagM != 0 {
|
||||
b.WriteString(`m`)
|
||||
}
|
||||
if f&flagS != 0 {
|
||||
b.WriteString(`s`)
|
||||
}
|
||||
if f&((flagM|flagS)<<negShift) != 0 {
|
||||
b.WriteString(`-`)
|
||||
if f&(flagM<<negShift) != 0 {
|
||||
b.WriteString(`m`)
|
||||
}
|
||||
if f&(flagS<<negShift) != 0 {
|
||||
b.WriteString(`s`)
|
||||
}
|
||||
}
|
||||
b.WriteString(`:`)
|
||||
}
|
||||
if f&flagOff != 0 {
|
||||
defer b.WriteString(`)`)
|
||||
}
|
||||
if f&flagPrec != 0 {
|
||||
b.WriteString(`(?:`)
|
||||
defer b.WriteString(`)`)
|
||||
}
|
||||
|
||||
switch re.Op {
|
||||
default:
|
||||
b.WriteString("<invalid op" + strconv.Itoa(int(re.Op)) + ">")
|
||||
case OpNoMatch:
|
||||
b.WriteString(`[^\x00-\x{10FFFF}]`)
|
||||
case OpEmptyMatch:
|
||||
b.WriteString(`(?:)`)
|
||||
case OpLiteral:
|
||||
for _, r := range re.Rune {
|
||||
escape(b, r, false)
|
||||
}
|
||||
case OpCharClass:
|
||||
if len(re.Rune)%2 != 0 {
|
||||
b.WriteString(`[invalid char class]`)
|
||||
break
|
||||
}
|
||||
b.WriteRune('[')
|
||||
if len(re.Rune) == 0 {
|
||||
b.WriteString(`^\x00-\x{10FFFF}`)
|
||||
} else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune && len(re.Rune) > 2 {
|
||||
// Contains 0 and MaxRune. Probably a negated class.
|
||||
// Print the gaps.
|
||||
b.WriteRune('^')
|
||||
for i := 1; i < len(re.Rune)-1; i += 2 {
|
||||
lo, hi := re.Rune[i]+1, re.Rune[i+1]-1
|
||||
escape(b, lo, lo == '-')
|
||||
if lo != hi {
|
||||
if hi != lo+1 {
|
||||
b.WriteRune('-')
|
||||
}
|
||||
escape(b, hi, hi == '-')
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for i := 0; i < len(re.Rune); i += 2 {
|
||||
lo, hi := re.Rune[i], re.Rune[i+1]
|
||||
escape(b, lo, lo == '-')
|
||||
if lo != hi {
|
||||
if hi != lo+1 {
|
||||
b.WriteRune('-')
|
||||
}
|
||||
escape(b, hi, hi == '-')
|
||||
}
|
||||
}
|
||||
}
|
||||
b.WriteRune(']')
|
||||
case OpAnyCharNotNL, OpAnyChar:
|
||||
b.WriteString(`.`)
|
||||
case OpBeginLine:
|
||||
b.WriteString(`^`)
|
||||
case OpEndLine:
|
||||
b.WriteString(`$`)
|
||||
case OpBeginText:
|
||||
b.WriteString(`\A`)
|
||||
case OpEndText:
|
||||
if re.Flags&WasDollar != 0 {
|
||||
b.WriteString(`$`)
|
||||
} else {
|
||||
b.WriteString(`\z`)
|
||||
}
|
||||
case OpWordBoundary:
|
||||
b.WriteString(`\b`)
|
||||
case OpNoWordBoundary:
|
||||
b.WriteString(`\B`)
|
||||
case OpCapture:
|
||||
if re.Name != "" {
|
||||
b.WriteString(`(?P<`)
|
||||
b.WriteString(re.Name)
|
||||
b.WriteRune('>')
|
||||
} else {
|
||||
b.WriteRune('(')
|
||||
}
|
||||
if re.Sub[0].Op != OpEmptyMatch {
|
||||
writeRegexp(b, re.Sub[0], flags[re.Sub[0]], flags)
|
||||
}
|
||||
b.WriteRune(')')
|
||||
case OpStar, OpPlus, OpQuest, OpRepeat:
|
||||
p := printFlags(0)
|
||||
sub := re.Sub[0]
|
||||
if sub.Op > OpCapture || sub.Op == OpLiteral && len(sub.Rune) > 1 {
|
||||
p = flagPrec
|
||||
}
|
||||
writeRegexp(b, sub, p, flags)
|
||||
|
||||
switch re.Op {
|
||||
case OpStar:
|
||||
b.WriteRune('*')
|
||||
case OpPlus:
|
||||
b.WriteRune('+')
|
||||
case OpQuest:
|
||||
b.WriteRune('?')
|
||||
case OpRepeat:
|
||||
b.WriteRune('{')
|
||||
b.WriteString(strconv.Itoa(re.Min))
|
||||
if re.Max != re.Min {
|
||||
b.WriteRune(',')
|
||||
if re.Max >= 0 {
|
||||
b.WriteString(strconv.Itoa(re.Max))
|
||||
}
|
||||
}
|
||||
b.WriteRune('}')
|
||||
}
|
||||
if re.Flags&NonGreedy != 0 {
|
||||
b.WriteRune('?')
|
||||
}
|
||||
case OpConcat:
|
||||
for _, sub := range re.Sub {
|
||||
p := printFlags(0)
|
||||
if sub.Op == OpAlternate {
|
||||
p = flagPrec
|
||||
}
|
||||
writeRegexp(b, sub, p, flags)
|
||||
}
|
||||
case OpAlternate:
|
||||
for i, sub := range re.Sub {
|
||||
if i > 0 {
|
||||
b.WriteRune('|')
|
||||
}
|
||||
writeRegexp(b, sub, 0, flags)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (re *Regexp) String() string {
|
||||
var b strings.Builder
|
||||
var flags map[*Regexp]printFlags
|
||||
must, cant := calcFlags(re, &flags)
|
||||
must |= (cant &^ flagI) << negShift
|
||||
if must != 0 {
|
||||
must |= flagOff
|
||||
}
|
||||
writeRegexp(&b, re, must, flags)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
const meta = `\.+*?()|[]{}^$`
|
||||
|
||||
func escape(b *strings.Builder, r rune, force bool) {
|
||||
if unicode.IsPrint(r) {
|
||||
if strings.ContainsRune(meta, r) || force {
|
||||
b.WriteRune('\\')
|
||||
}
|
||||
b.WriteRune(r)
|
||||
return
|
||||
}
|
||||
|
||||
switch r {
|
||||
case '\a':
|
||||
b.WriteString(`\a`)
|
||||
case '\f':
|
||||
b.WriteString(`\f`)
|
||||
case '\n':
|
||||
b.WriteString(`\n`)
|
||||
case '\r':
|
||||
b.WriteString(`\r`)
|
||||
case '\t':
|
||||
b.WriteString(`\t`)
|
||||
case '\v':
|
||||
b.WriteString(`\v`)
|
||||
default:
|
||||
if r < 0x100 {
|
||||
b.WriteString(`\x`)
|
||||
s := strconv.FormatInt(int64(r), 16)
|
||||
if len(s) == 1 {
|
||||
b.WriteRune('0')
|
||||
}
|
||||
b.WriteString(s)
|
||||
break
|
||||
}
|
||||
b.WriteString(`\x{`)
|
||||
b.WriteString(strconv.FormatInt(int64(r), 16))
|
||||
b.WriteString(`}`)
|
||||
}
|
||||
}
|
||||
|
||||
// MaxCap walks the regexp to find the maximum capture index.
|
||||
func (re *Regexp) MaxCap() int {
|
||||
m := 0
|
||||
if re.Op == OpCapture {
|
||||
m = re.Cap
|
||||
}
|
||||
for _, sub := range re.Sub {
|
||||
if n := sub.MaxCap(); m < n {
|
||||
m = n
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// CapNames walks the regexp to find the names of capturing groups.
|
||||
func (re *Regexp) CapNames() []string {
|
||||
names := make([]string, re.MaxCap()+1)
|
||||
re.capNames(names)
|
||||
return names
|
||||
}
|
||||
|
||||
func (re *Regexp) capNames(names []string) {
|
||||
if re.Op == OpCapture {
|
||||
names[re.Cap] = re.Name
|
||||
}
|
||||
for _, sub := range re.Sub {
|
||||
sub.capNames(names)
|
||||
}
|
||||
}
|
||||
151
src/regexp/syntax/simplify.go
Normal file
151
src/regexp/syntax/simplify.go
Normal file
@@ -0,0 +1,151 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package syntax
|
||||
|
||||
// Simplify returns a regexp equivalent to re but without counted repetitions
|
||||
// and with various other simplifications, such as rewriting /(?:a+)+/ to /a+/.
|
||||
// The resulting regexp will execute correctly but its string representation
|
||||
// will not produce the same parse tree, because capturing parentheses
|
||||
// may have been duplicated or removed. For example, the simplified form
|
||||
// for /(x){1,2}/ is /(x)(x)?/ but both parentheses capture as $1.
|
||||
// The returned regexp may share structure with or be the original.
|
||||
func (re *Regexp) Simplify() *Regexp {
|
||||
if re == nil {
|
||||
return nil
|
||||
}
|
||||
switch re.Op {
|
||||
case OpCapture, OpConcat, OpAlternate:
|
||||
// Simplify children, building new Regexp if children change.
|
||||
nre := re
|
||||
for i, sub := range re.Sub {
|
||||
nsub := sub.Simplify()
|
||||
if nre == re && nsub != sub {
|
||||
// Start a copy.
|
||||
nre = new(Regexp)
|
||||
*nre = *re
|
||||
nre.Rune = nil
|
||||
nre.Sub = append(nre.Sub0[:0], re.Sub[:i]...)
|
||||
}
|
||||
if nre != re {
|
||||
nre.Sub = append(nre.Sub, nsub)
|
||||
}
|
||||
}
|
||||
return nre
|
||||
|
||||
case OpStar, OpPlus, OpQuest:
|
||||
sub := re.Sub[0].Simplify()
|
||||
return simplify1(re.Op, re.Flags, sub, re)
|
||||
|
||||
case OpRepeat:
|
||||
// Special special case: x{0} matches the empty string
|
||||
// and doesn't even need to consider x.
|
||||
if re.Min == 0 && re.Max == 0 {
|
||||
return &Regexp{Op: OpEmptyMatch}
|
||||
}
|
||||
|
||||
// The fun begins.
|
||||
sub := re.Sub[0].Simplify()
|
||||
|
||||
// x{n,} means at least n matches of x.
|
||||
if re.Max == -1 {
|
||||
// Special case: x{0,} is x*.
|
||||
if re.Min == 0 {
|
||||
return simplify1(OpStar, re.Flags, sub, nil)
|
||||
}
|
||||
|
||||
// Special case: x{1,} is x+.
|
||||
if re.Min == 1 {
|
||||
return simplify1(OpPlus, re.Flags, sub, nil)
|
||||
}
|
||||
|
||||
// General case: x{4,} is xxxx+.
|
||||
nre := &Regexp{Op: OpConcat}
|
||||
nre.Sub = nre.Sub0[:0]
|
||||
for i := 0; i < re.Min-1; i++ {
|
||||
nre.Sub = append(nre.Sub, sub)
|
||||
}
|
||||
nre.Sub = append(nre.Sub, simplify1(OpPlus, re.Flags, sub, nil))
|
||||
return nre
|
||||
}
|
||||
|
||||
// Special case x{0} handled above.
|
||||
|
||||
// Special case: x{1} is just x.
|
||||
if re.Min == 1 && re.Max == 1 {
|
||||
return sub
|
||||
}
|
||||
|
||||
// General case: x{n,m} means n copies of x and m copies of x?
|
||||
// The machine will do less work if we nest the final m copies,
|
||||
// so that x{2,5} = xx(x(x(x)?)?)?
|
||||
|
||||
// Build leading prefix: xx.
|
||||
var prefix *Regexp
|
||||
if re.Min > 0 {
|
||||
prefix = &Regexp{Op: OpConcat}
|
||||
prefix.Sub = prefix.Sub0[:0]
|
||||
for i := 0; i < re.Min; i++ {
|
||||
prefix.Sub = append(prefix.Sub, sub)
|
||||
}
|
||||
}
|
||||
|
||||
// Build and attach suffix: (x(x(x)?)?)?
|
||||
if re.Max > re.Min {
|
||||
suffix := simplify1(OpQuest, re.Flags, sub, nil)
|
||||
for i := re.Min + 1; i < re.Max; i++ {
|
||||
nre2 := &Regexp{Op: OpConcat}
|
||||
nre2.Sub = append(nre2.Sub0[:0], sub, suffix)
|
||||
suffix = simplify1(OpQuest, re.Flags, nre2, nil)
|
||||
}
|
||||
if prefix == nil {
|
||||
return suffix
|
||||
}
|
||||
prefix.Sub = append(prefix.Sub, suffix)
|
||||
}
|
||||
if prefix != nil {
|
||||
return prefix
|
||||
}
|
||||
|
||||
// Some degenerate case like min > max or min < max < 0.
|
||||
// Handle as impossible match.
|
||||
return &Regexp{Op: OpNoMatch}
|
||||
}
|
||||
|
||||
return re
|
||||
}
|
||||
|
||||
// simplify1 implements Simplify for the unary OpStar,
|
||||
// OpPlus, and OpQuest operators. It returns the simple regexp
|
||||
// equivalent to
|
||||
//
|
||||
// Regexp{Op: op, Flags: flags, Sub: {sub}}
|
||||
//
|
||||
// under the assumption that sub is already simple, and
|
||||
// without first allocating that structure. If the regexp
|
||||
// to be returned turns out to be equivalent to re, simplify1
|
||||
// returns re instead.
|
||||
//
|
||||
// simplify1 is factored out of Simplify because the implementation
|
||||
// for other operators generates these unary expressions.
|
||||
// Letting them call simplify1 makes sure the expressions they
|
||||
// generate are simple.
|
||||
func simplify1(op Op, flags Flags, sub, re *Regexp) *Regexp {
|
||||
// Special case: repeat the empty string as much as
|
||||
// you want, but it's still the empty string.
|
||||
if sub.Op == OpEmptyMatch {
|
||||
return sub
|
||||
}
|
||||
// The operators are idempotent if the flags match.
|
||||
if op == sub.Op && flags&NonGreedy == sub.Flags&NonGreedy {
|
||||
return sub
|
||||
}
|
||||
if re != nil && re.Op == op && re.Flags&NonGreedy == flags&NonGreedy && sub == re.Sub[0] {
|
||||
return re
|
||||
}
|
||||
|
||||
re = &Regexp{Op: op, Flags: flags}
|
||||
re.Sub = append(re.Sub0[:0], sub)
|
||||
return re
|
||||
}
|
||||
153
src/regexp/syntax/simplify_test.go
Normal file
153
src/regexp/syntax/simplify_test.go
Normal file
@@ -0,0 +1,153 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package syntax
|
||||
|
||||
import "testing"
|
||||
|
||||
var simplifyTests = []struct {
|
||||
Regexp string
|
||||
Simple string
|
||||
}{
|
||||
// Already-simple constructs
|
||||
{`a`, `a`},
|
||||
{`ab`, `ab`},
|
||||
{`a|b`, `[ab]`},
|
||||
{`ab|cd`, `ab|cd`},
|
||||
{`(ab)*`, `(ab)*`},
|
||||
{`(ab)+`, `(ab)+`},
|
||||
{`(ab)?`, `(ab)?`},
|
||||
{`.`, `(?s:.)`},
|
||||
{`^`, `(?m:^)`},
|
||||
{`$`, `(?m:$)`},
|
||||
{`[ac]`, `[ac]`},
|
||||
{`[^ac]`, `[^ac]`},
|
||||
|
||||
// Posix character classes
|
||||
{`[[:alnum:]]`, `[0-9A-Za-z]`},
|
||||
{`[[:alpha:]]`, `[A-Za-z]`},
|
||||
{`[[:blank:]]`, `[\t ]`},
|
||||
{`[[:cntrl:]]`, `[\x00-\x1f\x7f]`},
|
||||
{`[[:digit:]]`, `[0-9]`},
|
||||
{`[[:graph:]]`, `[!-~]`},
|
||||
{`[[:lower:]]`, `[a-z]`},
|
||||
{`[[:print:]]`, `[ -~]`},
|
||||
{`[[:punct:]]`, "[!-/:-@\\[-`\\{-~]"},
|
||||
{`[[:space:]]`, `[\t-\r ]`},
|
||||
{`[[:upper:]]`, `[A-Z]`},
|
||||
{`[[:xdigit:]]`, `[0-9A-Fa-f]`},
|
||||
|
||||
// Perl character classes
|
||||
{`\d`, `[0-9]`},
|
||||
{`\s`, `[\t\n\f\r ]`},
|
||||
{`\w`, `[0-9A-Z_a-z]`},
|
||||
{`\D`, `[^0-9]`},
|
||||
{`\S`, `[^\t\n\f\r ]`},
|
||||
{`\W`, `[^0-9A-Z_a-z]`},
|
||||
{`[\d]`, `[0-9]`},
|
||||
{`[\s]`, `[\t\n\f\r ]`},
|
||||
{`[\w]`, `[0-9A-Z_a-z]`},
|
||||
{`[\D]`, `[^0-9]`},
|
||||
{`[\S]`, `[^\t\n\f\r ]`},
|
||||
{`[\W]`, `[^0-9A-Z_a-z]`},
|
||||
|
||||
// Posix repetitions
|
||||
{`a{1}`, `a`},
|
||||
{`a{2}`, `aa`},
|
||||
{`a{5}`, `aaaaa`},
|
||||
{`a{0,1}`, `a?`},
|
||||
// The next three are illegible because Simplify inserts (?:)
|
||||
// parens instead of () parens to avoid creating extra
|
||||
// captured subexpressions. The comments show a version with fewer parens.
|
||||
{`(a){0,2}`, `(?:(a)(a)?)?`}, // (aa?)?
|
||||
{`(a){0,4}`, `(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // (a(a(aa?)?)?)?
|
||||
{`(a){2,6}`, `(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // aa(a(a(aa?)?)?)?
|
||||
{`a{0,2}`, `(?:aa?)?`}, // (aa?)?
|
||||
{`a{0,4}`, `(?:a(?:a(?:aa?)?)?)?`}, // (a(a(aa?)?)?)?
|
||||
{`a{2,6}`, `aa(?:a(?:a(?:aa?)?)?)?`}, // aa(a(a(aa?)?)?)?
|
||||
{`a{0,}`, `a*`},
|
||||
{`a{1,}`, `a+`},
|
||||
{`a{2,}`, `aa+`},
|
||||
{`a{5,}`, `aaaaa+`},
|
||||
|
||||
// Test that operators simplify their arguments.
|
||||
{`(?:a{1,}){1,}`, `a+`},
|
||||
{`(a{1,}b{1,})`, `(a+b+)`},
|
||||
{`a{1,}|b{1,}`, `a+|b+`},
|
||||
{`(?:a{1,})*`, `(?:a+)*`},
|
||||
{`(?:a{1,})+`, `a+`},
|
||||
{`(?:a{1,})?`, `(?:a+)?`},
|
||||
{``, `(?:)`},
|
||||
{`a{0}`, `(?:)`},
|
||||
|
||||
// Character class simplification
|
||||
{`[ab]`, `[ab]`},
|
||||
{`[abc]`, `[a-c]`},
|
||||
{`[a-za-za-z]`, `[a-z]`},
|
||||
{`[A-Za-zA-Za-z]`, `[A-Za-z]`},
|
||||
{`[ABCDEFGH]`, `[A-H]`},
|
||||
{`[AB-CD-EF-GH]`, `[A-H]`},
|
||||
{`[W-ZP-XE-R]`, `[E-Z]`},
|
||||
{`[a-ee-gg-m]`, `[a-m]`},
|
||||
{`[a-ea-ha-m]`, `[a-m]`},
|
||||
{`[a-ma-ha-e]`, `[a-m]`},
|
||||
{`[a-zA-Z0-9 -~]`, `[ -~]`},
|
||||
|
||||
// Empty character classes
|
||||
{`[^[:cntrl:][:^cntrl:]]`, `[^\x00-\x{10FFFF}]`},
|
||||
|
||||
// Full character classes
|
||||
{`[[:cntrl:][:^cntrl:]]`, `(?s:.)`},
|
||||
|
||||
// Unicode case folding.
|
||||
{`(?i)A`, `(?i:A)`},
|
||||
{`(?i)a`, `(?i:A)`},
|
||||
{`(?i)[A]`, `(?i:A)`},
|
||||
{`(?i)[a]`, `(?i:A)`},
|
||||
{`(?i)K`, `(?i:K)`},
|
||||
{`(?i)k`, `(?i:K)`},
|
||||
{`(?i)\x{212a}`, "(?i:K)"},
|
||||
{`(?i)[K]`, "[Kk\u212A]"},
|
||||
{`(?i)[k]`, "[Kk\u212A]"},
|
||||
{`(?i)[\x{212a}]`, "[Kk\u212A]"},
|
||||
{`(?i)[a-z]`, "[A-Za-z\u017F\u212A]"},
|
||||
{`(?i)[\x00-\x{FFFD}]`, "[\\x00-\uFFFD]"},
|
||||
{`(?i)[\x00-\x{10FFFF}]`, `(?s:.)`},
|
||||
|
||||
// Empty string as a regular expression.
|
||||
// The empty string must be preserved inside parens in order
|
||||
// to make submatches work right, so these tests are less
|
||||
// interesting than they might otherwise be. String inserts
|
||||
// explicit (?:) in place of non-parenthesized empty strings,
|
||||
// to make them easier to spot for other parsers.
|
||||
{`(a|b|c|)`, `([a-c]|(?:))`},
|
||||
{`(a|b|)`, `([ab]|(?:))`},
|
||||
{`(|)`, `()`},
|
||||
{`a()`, `a()`},
|
||||
{`(()|())`, `(()|())`},
|
||||
{`(a|)`, `(a|(?:))`},
|
||||
{`ab()cd()`, `ab()cd()`},
|
||||
{`()`, `()`},
|
||||
{`()*`, `()*`},
|
||||
{`()+`, `()+`},
|
||||
{`()?`, `()?`},
|
||||
{`(){0}`, `(?:)`},
|
||||
{`(){1}`, `()`},
|
||||
{`(){1,}`, `()+`},
|
||||
{`(){0,2}`, `(?:()()?)?`},
|
||||
}
|
||||
|
||||
func TestSimplify(t *testing.T) {
|
||||
for _, tt := range simplifyTests {
|
||||
re, err := Parse(tt.Regexp, MatchNL|Perl&^OneLine)
|
||||
if err != nil {
|
||||
t.Errorf("Parse(%#q) = error %v", tt.Regexp, err)
|
||||
continue
|
||||
}
|
||||
s := re.Simplify().String()
|
||||
if s != tt.Simple {
|
||||
t.Errorf("Simplify(%#q) = %#q, want %#q", tt.Regexp, s, tt.Simple)
|
||||
}
|
||||
}
|
||||
}
|
||||
24
src/regexp/testdata/README
vendored
Normal file
24
src/regexp/testdata/README
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
AT&T POSIX Test Files
|
||||
See textregex.c for copyright + license.
|
||||
|
||||
testregex.c http://www2.research.att.com/~gsf/testregex/testregex.c
|
||||
basic.dat http://www2.research.att.com/~gsf/testregex/basic.dat
|
||||
nullsubexpr.dat http://www2.research.att.com/~gsf/testregex/nullsubexpr.dat
|
||||
repetition.dat http://www2.research.att.com/~gsf/testregex/repetition.dat
|
||||
|
||||
The test data has been edited to reflect RE2/Go differences:
|
||||
* In a star of a possibly empty match like (a*)* matching x,
|
||||
the no match case runs the starred subexpression zero times,
|
||||
not once. This is consistent with (a*)* matching a, which
|
||||
runs the starred subexpression one time, not twice.
|
||||
* The submatch choice is first match, not the POSIX rule.
|
||||
|
||||
Such changes are marked with 'RE2/Go'.
|
||||
|
||||
|
||||
RE2 Test Files
|
||||
|
||||
re2-exhaustive.txt.bz2 and re2-search.txt are built by running
|
||||
'make log' in the RE2 distribution https://github.com/google/re2/
|
||||
|
||||
The exhaustive file is compressed because it is huge.
|
||||
217
src/regexp/testdata/basic.dat
vendored
Normal file
217
src/regexp/testdata/basic.dat
vendored
Normal file
@@ -0,0 +1,217 @@
|
||||
NOTE all standard compliant implementations should pass these : 2002-05-31
|
||||
|
||||
BE abracadabra$ abracadabracadabra (7,18)
|
||||
BE a...b abababbb (2,7)
|
||||
BE XXXXXX ..XXXXXX (2,8)
|
||||
E \) () (1,2)
|
||||
BE a] a]a (0,2)
|
||||
B } } (0,1)
|
||||
E \} } (0,1)
|
||||
BE \] ] (0,1)
|
||||
B ] ] (0,1)
|
||||
E ] ] (0,1)
|
||||
B { { (0,1)
|
||||
B } } (0,1)
|
||||
BE ^a ax (0,1)
|
||||
BE \^a a^a (1,3)
|
||||
BE a\^ a^ (0,2)
|
||||
BE a$ aa (1,2)
|
||||
BE a\$ a$ (0,2)
|
||||
BE ^$ NULL (0,0)
|
||||
E $^ NULL (0,0)
|
||||
E a($) aa (1,2)(2,2)
|
||||
E a*(^a) aa (0,1)(0,1)
|
||||
E (..)*(...)* a (0,0)
|
||||
E (..)*(...)* abcd (0,4)(2,4)
|
||||
E (ab|a)(bc|c) abc (0,3)(0,2)(2,3)
|
||||
E (ab)c|abc abc (0,3)(0,2)
|
||||
E a{0}b ab (1,2)
|
||||
E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
|
||||
E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
|
||||
E a{9876543210} NULL BADBR
|
||||
E ((a|a)|a) a (0,1)(0,1)(0,1)
|
||||
E (a*)(a|aa) aaaa (0,4)(0,3)(3,4)
|
||||
E a*(a.|aa) aaaa (0,4)(2,4)
|
||||
E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2)
|
||||
E (a|b)?.* b (0,1)(0,1)
|
||||
E (a|b)c|a(b|c) ac (0,2)(0,1)
|
||||
E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2)
|
||||
E (a|b)*c|(a|ab)*c abc (0,3)(1,2)
|
||||
E (a|b)*c|(a|ab)*c xc (1,2)
|
||||
E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2)
|
||||
E a?(ab|ba)ab abab (0,4)(0,2)
|
||||
E a?(ac{0}b|ba)ab abab (0,4)(0,2)
|
||||
E ab|abab abbabab (0,2)
|
||||
E aba|bab|bba baaabbbaba (5,8)
|
||||
E aba|bab baaabbbaba (6,9)
|
||||
E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2)
|
||||
E (a.|.a.)*|(a|.a...) aa (0,2)(0,2)
|
||||
E ab|a xabc (1,3)
|
||||
E ab|a xxabc (2,4)
|
||||
Ei (Ab|cD)* aBcD (0,4)(2,4)
|
||||
BE [^-] --a (2,3)
|
||||
BE [a-]* --a (0,3)
|
||||
BE [a-m-]* --amoma-- (0,4)
|
||||
E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17)
|
||||
E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17)
|
||||
{E [[:upper:]] A (0,1) [[<element>]] not supported
|
||||
E [[:lower:]]+ `az{ (1,3)
|
||||
E [[:upper:]]+ @AZ[ (1,3)
|
||||
# No collation in Go
|
||||
#BE [[-]] [[-]] (2,4)
|
||||
#BE [[.NIL.]] NULL ECOLLATE
|
||||
#BE [[=aleph=]] NULL ECOLLATE
|
||||
}
|
||||
BE$ \n \n (0,1)
|
||||
BEn$ \n \n (0,1)
|
||||
BE$ [^a] \n (0,1)
|
||||
BE$ \na \na (0,2)
|
||||
E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3)
|
||||
BE xxx xxx (0,3)
|
||||
E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6)
|
||||
E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3)
|
||||
E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11)
|
||||
E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1)
|
||||
E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2)
|
||||
E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81)
|
||||
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25)
|
||||
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22)
|
||||
E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11)
|
||||
BE$ .* \x01\xff (0,2)
|
||||
E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57)
|
||||
L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH
|
||||
E a*a*a*a*a*b aaaaaaaaab (0,10)
|
||||
BE ^ NULL (0,0)
|
||||
BE $ NULL (0,0)
|
||||
BE ^$ NULL (0,0)
|
||||
BE ^a$ a (0,1)
|
||||
BE abc abc (0,3)
|
||||
BE abc xabcy (1,4)
|
||||
BE abc ababc (2,5)
|
||||
BE ab*c abc (0,3)
|
||||
BE ab*bc abc (0,3)
|
||||
BE ab*bc abbc (0,4)
|
||||
BE ab*bc abbbbc (0,6)
|
||||
E ab+bc abbc (0,4)
|
||||
E ab+bc abbbbc (0,6)
|
||||
E ab?bc abbc (0,4)
|
||||
E ab?bc abc (0,3)
|
||||
E ab?c abc (0,3)
|
||||
BE ^abc$ abc (0,3)
|
||||
BE ^abc abcc (0,3)
|
||||
BE abc$ aabc (1,4)
|
||||
BE ^ abc (0,0)
|
||||
BE $ abc (3,3)
|
||||
BE a.c abc (0,3)
|
||||
BE a.c axc (0,3)
|
||||
BE a.*c axyzc (0,5)
|
||||
BE a[bc]d abd (0,3)
|
||||
BE a[b-d]e ace (0,3)
|
||||
BE a[b-d] aac (1,3)
|
||||
BE a[-b] a- (0,2)
|
||||
BE a[b-] a- (0,2)
|
||||
BE a] a] (0,2)
|
||||
BE a[]]b a]b (0,3)
|
||||
BE a[^bc]d aed (0,3)
|
||||
BE a[^-b]c adc (0,3)
|
||||
BE a[^]b]c adc (0,3)
|
||||
E ab|cd abc (0,2)
|
||||
E ab|cd abcd (0,2)
|
||||
E a\(b a(b (0,3)
|
||||
E a\(*b ab (0,2)
|
||||
E a\(*b a((b (0,4)
|
||||
E ((a)) abc (0,1)(0,1)(0,1)
|
||||
E (a)b(c) abc (0,3)(0,1)(2,3)
|
||||
E a+b+c aabbabc (4,7)
|
||||
E a* aaa (0,3)
|
||||
E (a*)* - (0,0)(0,0)
|
||||
E (a*)+ - (0,0)(0,0)
|
||||
E (a*|b)* - (0,0)(0,0)
|
||||
E (a+|b)* ab (0,2)(1,2)
|
||||
E (a+|b)+ ab (0,2)(1,2)
|
||||
E (a+|b)? ab (0,1)(0,1)
|
||||
BE [^ab]* cde (0,3)
|
||||
E (^)* - (0,0)(0,0)
|
||||
BE a* NULL (0,0)
|
||||
E ([abc])*d abbbcd (0,6)(4,5)
|
||||
E ([abc])*bcd abcd (0,4)(0,1)
|
||||
E a|b|c|d|e e (0,1)
|
||||
E (a|b|c|d|e)f ef (0,2)(0,1)
|
||||
E ((a*|b))* - (0,0)(0,0)(0,0)
|
||||
BE abcd*efg abcdefg (0,7)
|
||||
BE ab* xabyabbbz (1,3)
|
||||
BE ab* xayabbbz (1,2)
|
||||
E (ab|cd)e abcde (2,5)(2,4)
|
||||
BE [abhgefdc]ij hij (0,3)
|
||||
E (a|b)c*d abcd (1,4)(1,2)
|
||||
E (ab|ab*)bc abc (0,3)(0,1)
|
||||
E a([bc]*)c* abc (0,3)(1,3)
|
||||
E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4)
|
||||
E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4)
|
||||
E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4)
|
||||
E a[bcd]*dcdcde adcdcde (0,7)
|
||||
E (ab|a)b*c abc (0,3)(0,2)
|
||||
E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4)
|
||||
BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5)
|
||||
E ^a(bc+|b[eh])g|.h$ abh (1,3)
|
||||
E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5)
|
||||
E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2)
|
||||
E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6)
|
||||
E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
|
||||
BE multiple words multiple words yeah (0,14)
|
||||
E (.*)c(.*) abcde (0,5)(0,2)(3,5)
|
||||
BE abcd abcd (0,4)
|
||||
E a(bc)d abcd (0,4)(1,3)
|
||||
E a[-]?c ac (0,3)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12)
|
||||
E a+(b|c)*d+ aabcdd (0,6)(3,4)
|
||||
E ^.+$ vivi (0,4)
|
||||
E ^(.+)$ vivi (0,4)(0,4)
|
||||
E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19)
|
||||
E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3)
|
||||
E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7)
|
||||
E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7)
|
||||
E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11)
|
||||
E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3)
|
||||
E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7)
|
||||
E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3)
|
||||
E ((foo)|bar)!bas bar!bas (0,7)(0,3)
|
||||
E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7)
|
||||
E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3)
|
||||
E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3)
|
||||
E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7)
|
||||
E (foo|(bar))!bas foo!bas (0,7)(0,3)
|
||||
E (foo|bar)!bas bar!bas (0,7)(0,3)
|
||||
E (foo|bar)!bas foo!bar!bas (4,11)(4,7)
|
||||
E (foo|bar)!bas foo!bas (0,7)(0,3)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7)
|
||||
E .*(/XXX).* /XXX (0,4)(0,4)
|
||||
E .*(\\XXX).* \XXX (0,4)(0,4)
|
||||
E \\XXX \XXX (0,4)
|
||||
E .*(/000).* /000 (0,4)(0,4)
|
||||
E .*(\\000).* \000 (0,4)(0,4)
|
||||
E \\000 \000 (0,4)
|
||||
73
src/regexp/testdata/nullsubexpr.dat
vendored
Normal file
73
src/regexp/testdata/nullsubexpr.dat
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
NOTE null subexpression matches : 2002-06-06
|
||||
|
||||
E (a*)* a (0,1)(0,1)
|
||||
E SAME x (0,0)(0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E (a*)+ a (0,1)(0,1)
|
||||
E SAME x (0,0)(0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E (a+)* a (0,1)(0,1)
|
||||
E SAME x (0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E (a+)+ a (0,1)(0,1)
|
||||
E SAME x NOMATCH
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
|
||||
E ([a]*)* a (0,1)(0,1)
|
||||
E SAME x (0,0)(0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E ([a]*)+ a (0,1)(0,1)
|
||||
E SAME x (0,0)(0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E ([^b]*)* a (0,1)(0,1)
|
||||
E SAME b (0,0)(0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaab (0,6)(0,6)
|
||||
E ([ab]*)* a (0,1)(0,1)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME ababab (0,6)(0,6)
|
||||
E SAME bababa (0,6)(0,6)
|
||||
E SAME b (0,1)(0,1)
|
||||
E SAME bbbbbb (0,6)(0,6)
|
||||
E SAME aaaabcde (0,5)(0,5)
|
||||
E ([^a]*)* b (0,1)(0,1)
|
||||
E SAME bbbbbb (0,6)(0,6)
|
||||
E SAME aaaaaa (0,0)(0,0)
|
||||
E ([^ab]*)* ccccxx (0,6)(0,6)
|
||||
E SAME ababab (0,0)(0,0)
|
||||
|
||||
E ((z)+|a)* zabcde (0,2)(1,2)
|
||||
|
||||
#{E a+? aaaaaa (0,1) no *? +? minimal match ops
|
||||
#E (a) aaa (0,1)(0,1)
|
||||
#E (a*?) aaa (0,0)(0,0)
|
||||
#E (a)*? aaa (0,0)
|
||||
#E (a*?)*? aaa (0,0)
|
||||
#}
|
||||
|
||||
B \(a*\)*\(x\) x (0,1)(0,0)(0,1)
|
||||
B \(a*\)*\(x\) ax (0,2)(0,1)(1,2)
|
||||
B \(a*\)*\(x\) axa (0,2)(0,1)(1,2)
|
||||
B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1)
|
||||
B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2)
|
||||
B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3)
|
||||
B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4)
|
||||
B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3)
|
||||
|
||||
E (a*)*(x) x (0,1)(0,0)(0,1)
|
||||
E (a*)*(x) ax (0,2)(0,1)(1,2)
|
||||
E (a*)*(x) axa (0,2)(0,1)(1,2)
|
||||
|
||||
E (a*)+(x) x (0,1)(0,0)(0,1)
|
||||
E (a*)+(x) ax (0,2)(0,1)(1,2)
|
||||
E (a*)+(x) axa (0,2)(0,1)(1,2)
|
||||
|
||||
E (a*){2}(x) x (0,1)(0,0)(0,1)
|
||||
E (a*){2}(x) ax (0,2)(1,1)(1,2)
|
||||
E (a*){2}(x) axa (0,2)(1,1)(1,2)
|
||||
BIN
src/regexp/testdata/re2-exhaustive.txt.bz2
vendored
Normal file
BIN
src/regexp/testdata/re2-exhaustive.txt.bz2
vendored
Normal file
Binary file not shown.
3779
src/regexp/testdata/re2-search.txt
vendored
Normal file
3779
src/regexp/testdata/re2-search.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
163
src/regexp/testdata/repetition.dat
vendored
Normal file
163
src/regexp/testdata/repetition.dat
vendored
Normal file
@@ -0,0 +1,163 @@
|
||||
NOTE implicit vs. explicit repetitions : 2009-02-02
|
||||
|
||||
# Glenn Fowler <gsf@research.att.com>
|
||||
# conforming matches (column 4) must match one of the following BREs
|
||||
# NOMATCH
|
||||
# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
|
||||
# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
|
||||
# i.e., each 3-tuple has two identical elements and one (?,?)
|
||||
|
||||
E ((..)|(.)) NULL NOMATCH
|
||||
E ((..)|(.))((..)|(.)) NULL NOMATCH
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH
|
||||
|
||||
E ((..)|(.)){1} NULL NOMATCH
|
||||
E ((..)|(.)){2} NULL NOMATCH
|
||||
E ((..)|(.)){3} NULL NOMATCH
|
||||
|
||||
E ((..)|(.))* NULL (0,0)
|
||||
|
||||
E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1)
|
||||
E ((..)|(.))((..)|(.)) a NOMATCH
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH
|
||||
|
||||
E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1)
|
||||
E ((..)|(.)){2} a NOMATCH
|
||||
E ((..)|(.)){3} a NOMATCH
|
||||
|
||||
E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1)
|
||||
|
||||
E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH
|
||||
|
||||
E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2)
|
||||
E ((..)|(.)){3} aa NOMATCH
|
||||
|
||||
E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?)
|
||||
|
||||
E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
|
||||
|
||||
E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?)
|
||||
#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3)
|
||||
E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
|
||||
E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3)
|
||||
|
||||
#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3)
|
||||
E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
|
||||
|
||||
E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
|
||||
|
||||
E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?)
|
||||
#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4)
|
||||
E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go
|
||||
|
||||
E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?)
|
||||
|
||||
E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
|
||||
|
||||
E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?)
|
||||
#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5)
|
||||
E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
|
||||
|
||||
#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5)
|
||||
E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
|
||||
|
||||
E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
|
||||
|
||||
E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?)
|
||||
|
||||
E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?)
|
||||
|
||||
NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
|
||||
|
||||
# These test a bug in OS X / FreeBSD / NetBSD, and libtree.
|
||||
# Linux/GLIBC gets the {8,} and {8,8} wrong.
|
||||
|
||||
:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8)
|
||||
#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8)
|
||||
|
||||
# These test a fixed bug in my regex-tdfa that did not keep the expanded
|
||||
# form properly grouped, so right association did the wrong thing with
|
||||
# these ambiguous patterns (crafted just to test my code when I became
|
||||
# suspicious of my implementation). The first subexpression should use
|
||||
# "ab" then "a" then "bcd".
|
||||
|
||||
# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
|
||||
# results like (0,6)(4,5)(6,6).
|
||||
|
||||
:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH
|
||||
:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH
|
||||
:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
|
||||
|
||||
# The above worked on Linux/GLIBC but the following often fail.
|
||||
# They also trip up OS X / FreeBSD / NetBSD:
|
||||
|
||||
#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH
|
||||
#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH
|
||||
#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
2286
src/regexp/testdata/testregex.c
vendored
Normal file
2286
src/regexp/testdata/testregex.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user