Initial commit: Go 1.23 release state

2024-09-21 23:49:08 +10:00
commit 17cd57a668
13231 changed files with 3114330 additions and 0 deletions
--- a/src/unicode/casetables.go
+++ b/src/unicode/casetables.go
@@ -0,0 +1,20 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO: This file contains the special casing rules for Turkish and Azeri only.
+// It should encompass all the languages with special casing rules
+// and be generated automatically, but that requires some API
+// development first.
+
+package unicode
+
+var TurkishCase SpecialCase = _TurkishCase
+var _TurkishCase = SpecialCase{
+	CaseRange{0x0049, 0x0049, d{0, 0x131 - 0x49, 0}},
+	CaseRange{0x0069, 0x0069, d{0x130 - 0x69, 0, 0x130 - 0x69}},
+	CaseRange{0x0130, 0x0130, d{0, 0x69 - 0x130, 0}},
+	CaseRange{0x0131, 0x0131, d{0x49 - 0x131, 0, 0x49 - 0x131}},
+}
+
+var AzeriCase SpecialCase = _TurkishCase
--- a/src/unicode/digit.go
+++ b/src/unicode/digit.go
@@ -0,0 +1,13 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unicode
+
+// IsDigit reports whether the rune is a decimal digit.
+func IsDigit(r rune) bool {
+	if r <= MaxLatin1 {
+		return '0' <= r && r <= '9'
+	}
+	return isExcludingLatin(Digit, r)
+}
--- a/src/unicode/digit_test.go
+++ b/src/unicode/digit_test.go
@@ -0,0 +1,126 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unicode_test
+
+import (
+	"testing"
+	. "unicode"
+)
+
+var testDigit = []rune{
+	0x0030,
+	0x0039,
+	0x0661,
+	0x06F1,
+	0x07C9,
+	0x0966,
+	0x09EF,
+	0x0A66,
+	0x0AEF,
+	0x0B66,
+	0x0B6F,
+	0x0BE6,
+	0x0BEF,
+	0x0C66,
+	0x0CEF,
+	0x0D66,
+	0x0D6F,
+	0x0E50,
+	0x0E59,
+	0x0ED0,
+	0x0ED9,
+	0x0F20,
+	0x0F29,
+	0x1040,
+	0x1049,
+	0x1090,
+	0x1091,
+	0x1099,
+	0x17E0,
+	0x17E9,
+	0x1810,
+	0x1819,
+	0x1946,
+	0x194F,
+	0x19D0,
+	0x19D9,
+	0x1B50,
+	0x1B59,
+	0x1BB0,
+	0x1BB9,
+	0x1C40,
+	0x1C49,
+	0x1C50,
+	0x1C59,
+	0xA620,
+	0xA629,
+	0xA8D0,
+	0xA8D9,
+	0xA900,
+	0xA909,
+	0xAA50,
+	0xAA59,
+	0xFF10,
+	0xFF19,
+	0x104A1,
+	0x1D7CE,
+}
+
+var testLetter = []rune{
+	0x0041,
+	0x0061,
+	0x00AA,
+	0x00BA,
+	0x00C8,
+	0x00DB,
+	0x00F9,
+	0x02EC,
+	0x0535,
+	0x06E6,
+	0x093D,
+	0x0A15,
+	0x0B99,
+	0x0DC0,
+	0x0EDD,
+	0x1000,
+	0x1200,
+	0x1312,
+	0x1401,
+	0x1885,
+	0x2C00,
+	0xA800,
+	0xF900,
+	0xFA30,
+	0xFFDA,
+	0xFFDC,
+	0x10000,
+	0x10300,
+	0x10400,
+	0x20000,
+	0x2F800,
+	0x2FA1D,
+}
+
+func TestDigit(t *testing.T) {
+	for _, r := range testDigit {
+		if !IsDigit(r) {
+			t.Errorf("IsDigit(U+%04X) = false, want true", r)
+		}
+	}
+	for _, r := range testLetter {
+		if IsDigit(r) {
+			t.Errorf("IsDigit(U+%04X) = true, want false", r)
+		}
+	}
+}
+
+// Test that the special case in IsDigit agrees with the table
+func TestDigitOptimization(t *testing.T) {
+	for i := rune(0); i <= MaxLatin1; i++ {
+		if Is(Digit, i) != IsDigit(i) {
+			t.Errorf("IsDigit(U+%04X) disagrees with Is(Digit)", i)
+		}
+	}
+}
--- a/src/unicode/example_test.go
+++ b/src/unicode/example_test.go
@@ -0,0 +1,256 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unicode_test
+
+import (
+	"fmt"
+	"unicode"
+)
+
+// Functions starting with "Is" can be used to inspect which table of range a
+// rune belongs to. Note that runes may fit into more than one range.
+func Example_is() {
+
+	// constant with mixed type runes
+	const mixed = "\b5Ὂg̀9! ℃ᾭG"
+	for _, c := range mixed {
+		fmt.Printf("For %q:\n", c)
+		if unicode.IsControl(c) {
+			fmt.Println("\tis control rune")
+		}
+		if unicode.IsDigit(c) {
+			fmt.Println("\tis digit rune")
+		}
+		if unicode.IsGraphic(c) {
+			fmt.Println("\tis graphic rune")
+		}
+		if unicode.IsLetter(c) {
+			fmt.Println("\tis letter rune")
+		}
+		if unicode.IsLower(c) {
+			fmt.Println("\tis lower case rune")
+		}
+		if unicode.IsMark(c) {
+			fmt.Println("\tis mark rune")
+		}
+		if unicode.IsNumber(c) {
+			fmt.Println("\tis number rune")
+		}
+		if unicode.IsPrint(c) {
+			fmt.Println("\tis printable rune")
+		}
+		if !unicode.IsPrint(c) {
+			fmt.Println("\tis not printable rune")
+		}
+		if unicode.IsPunct(c) {
+			fmt.Println("\tis punct rune")
+		}
+		if unicode.IsSpace(c) {
+			fmt.Println("\tis space rune")
+		}
+		if unicode.IsSymbol(c) {
+			fmt.Println("\tis symbol rune")
+		}
+		if unicode.IsTitle(c) {
+			fmt.Println("\tis title case rune")
+		}
+		if unicode.IsUpper(c) {
+			fmt.Println("\tis upper case rune")
+		}
+	}
+
+	// Output:
+	// For '\b':
+	// 	is control rune
+	// 	is not printable rune
+	// For '5':
+	// 	is digit rune
+	// 	is graphic rune
+	// 	is number rune
+	// 	is printable rune
+	// For 'Ὂ':
+	// 	is graphic rune
+	// 	is letter rune
+	// 	is printable rune
+	// 	is upper case rune
+	// For 'g':
+	// 	is graphic rune
+	// 	is letter rune
+	// 	is lower case rune
+	// 	is printable rune
+	// For '̀':
+	// 	is graphic rune
+	// 	is mark rune
+	// 	is printable rune
+	// For '9':
+	// 	is digit rune
+	// 	is graphic rune
+	// 	is number rune
+	// 	is printable rune
+	// For '!':
+	// 	is graphic rune
+	// 	is printable rune
+	// 	is punct rune
+	// For ' ':
+	// 	is graphic rune
+	// 	is printable rune
+	// 	is space rune
+	// For '℃':
+	// 	is graphic rune
+	// 	is printable rune
+	// 	is symbol rune
+	// For 'ᾭ':
+	// 	is graphic rune
+	// 	is letter rune
+	// 	is printable rune
+	// 	is title case rune
+	// For 'G':
+	// 	is graphic rune
+	// 	is letter rune
+	// 	is printable rune
+	// 	is upper case rune
+}
+
+func ExampleSimpleFold() {
+	fmt.Printf("%#U\n", unicode.SimpleFold('A'))      // 'a'
+	fmt.Printf("%#U\n", unicode.SimpleFold('a'))      // 'A'
+	fmt.Printf("%#U\n", unicode.SimpleFold('K'))      // 'k'
+	fmt.Printf("%#U\n", unicode.SimpleFold('k'))      // '\u212A' (Kelvin symbol, K)
+	fmt.Printf("%#U\n", unicode.SimpleFold('\u212A')) // 'K'
+	fmt.Printf("%#U\n", unicode.SimpleFold('1'))      // '1'
+
+	// Output:
+	// U+0061 'a'
+	// U+0041 'A'
+	// U+006B 'k'
+	// U+212A 'K'
+	// U+004B 'K'
+	// U+0031 '1'
+}
+
+func ExampleTo() {
+	const lcG = 'g'
+	fmt.Printf("%#U\n", unicode.To(unicode.UpperCase, lcG))
+	fmt.Printf("%#U\n", unicode.To(unicode.LowerCase, lcG))
+	fmt.Printf("%#U\n", unicode.To(unicode.TitleCase, lcG))
+
+	const ucG = 'G'
+	fmt.Printf("%#U\n", unicode.To(unicode.UpperCase, ucG))
+	fmt.Printf("%#U\n", unicode.To(unicode.LowerCase, ucG))
+	fmt.Printf("%#U\n", unicode.To(unicode.TitleCase, ucG))
+
+	// Output:
+	// U+0047 'G'
+	// U+0067 'g'
+	// U+0047 'G'
+	// U+0047 'G'
+	// U+0067 'g'
+	// U+0047 'G'
+}
+
+func ExampleToLower() {
+	const ucG = 'G'
+	fmt.Printf("%#U\n", unicode.ToLower(ucG))
+
+	// Output:
+	// U+0067 'g'
+}
+func ExampleToTitle() {
+	const ucG = 'g'
+	fmt.Printf("%#U\n", unicode.ToTitle(ucG))
+
+	// Output:
+	// U+0047 'G'
+}
+
+func ExampleToUpper() {
+	const ucG = 'g'
+	fmt.Printf("%#U\n", unicode.ToUpper(ucG))
+
+	// Output:
+	// U+0047 'G'
+}
+
+func ExampleSpecialCase() {
+	t := unicode.TurkishCase
+
+	const lci = 'i'
+	fmt.Printf("%#U\n", t.ToLower(lci))
+	fmt.Printf("%#U\n", t.ToTitle(lci))
+	fmt.Printf("%#U\n", t.ToUpper(lci))
+
+	const uci = 'İ'
+	fmt.Printf("%#U\n", t.ToLower(uci))
+	fmt.Printf("%#U\n", t.ToTitle(uci))
+	fmt.Printf("%#U\n", t.ToUpper(uci))
+
+	// Output:
+	// U+0069 'i'
+	// U+0130 'İ'
+	// U+0130 'İ'
+	// U+0069 'i'
+	// U+0130 'İ'
+	// U+0130 'İ'
+}
+
+func ExampleIsDigit() {
+	fmt.Printf("%t\n", unicode.IsDigit('৩'))
+	fmt.Printf("%t\n", unicode.IsDigit('A'))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleIsNumber() {
+	fmt.Printf("%t\n", unicode.IsNumber('Ⅷ'))
+	fmt.Printf("%t\n", unicode.IsNumber('A'))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleIsLetter() {
+	fmt.Printf("%t\n", unicode.IsLetter('A'))
+	fmt.Printf("%t\n", unicode.IsLetter('7'))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleIsLower() {
+	fmt.Printf("%t\n", unicode.IsLower('a'))
+	fmt.Printf("%t\n", unicode.IsLower('A'))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleIsUpper() {
+	fmt.Printf("%t\n", unicode.IsUpper('A'))
+	fmt.Printf("%t\n", unicode.IsUpper('a'))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleIsTitle() {
+	fmt.Printf("%t\n", unicode.IsTitle('ǅ'))
+	fmt.Printf("%t\n", unicode.IsTitle('a'))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleIsSpace() {
+	fmt.Printf("%t\n", unicode.IsSpace(' '))
+	fmt.Printf("%t\n", unicode.IsSpace('\n'))
+	fmt.Printf("%t\n", unicode.IsSpace('\t'))
+	fmt.Printf("%t\n", unicode.IsSpace('a'))
+	// Output:
+	// true
+	// true
+	// true
+	// false
+}
--- a/src/unicode/graphic.go
+++ b/src/unicode/graphic.go
@@ -0,0 +1,146 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unicode
+
+// Bit masks for each code point under U+0100, for fast lookup.
+const (
+	pC     = 1 << iota // a control character.
+	pP                 // a punctuation character.
+	pN                 // a numeral.
+	pS                 // a symbolic character.
+	pZ                 // a spacing character.
+	pLu                // an upper-case letter.
+	pLl                // a lower-case letter.
+	pp                 // a printable character according to Go's definition.
+	pg     = pp | pZ   // a graphical character according to the Unicode definition.
+	pLo    = pLl | pLu // a letter that is neither upper nor lower case.
+	pLmask = pLo
+)
+
+// GraphicRanges defines the set of graphic characters according to Unicode.
+var GraphicRanges = []*RangeTable{
+	L, M, N, P, S, Zs,
+}
+
+// PrintRanges defines the set of printable characters according to Go.
+// ASCII space, U+0020, is handled separately.
+var PrintRanges = []*RangeTable{
+	L, M, N, P, S,
+}
+
+// IsGraphic reports whether the rune is defined as a Graphic by Unicode.
+// Such characters include letters, marks, numbers, punctuation, symbols, and
+// spaces, from categories [L], [M], [N], [P], [S], [Zs].
+func IsGraphic(r rune) bool {
+	// We convert to uint32 to avoid the extra test for negative,
+	// and in the index we convert to uint8 to avoid the range check.
+	if uint32(r) <= MaxLatin1 {
+		return properties[uint8(r)]&pg != 0
+	}
+	return In(r, GraphicRanges...)
+}
+
+// IsPrint reports whether the rune is defined as printable by Go. Such
+// characters include letters, marks, numbers, punctuation, symbols, and the
+// ASCII space character, from categories [L], [M], [N], [P], [S] and the ASCII space
+// character. This categorization is the same as [IsGraphic] except that the
+// only spacing character is ASCII space, U+0020.
+func IsPrint(r rune) bool {
+	if uint32(r) <= MaxLatin1 {
+		return properties[uint8(r)]&pp != 0
+	}
+	return In(r, PrintRanges...)
+}
+
+// IsOneOf reports whether the rune is a member of one of the ranges.
+// The function "In" provides a nicer signature and should be used in preference to IsOneOf.
+func IsOneOf(ranges []*RangeTable, r rune) bool {
+	for _, inside := range ranges {
+		if Is(inside, r) {
+			return true
+		}
+	}
+	return false
+}
+
+// In reports whether the rune is a member of one of the ranges.
+func In(r rune, ranges ...*RangeTable) bool {
+	for _, inside := range ranges {
+		if Is(inside, r) {
+			return true
+		}
+	}
+	return false
+}
+
+// IsControl reports whether the rune is a control character.
+// The [C] ([Other]) Unicode category includes more code points
+// such as surrogates; use [Is](C, r) to test for them.
+func IsControl(r rune) bool {
+	if uint32(r) <= MaxLatin1 {
+		return properties[uint8(r)]&pC != 0
+	}
+	// All control characters are < MaxLatin1.
+	return false
+}
+
+// IsLetter reports whether the rune is a letter (category [L]).
+func IsLetter(r rune) bool {
+	if uint32(r) <= MaxLatin1 {
+		return properties[uint8(r)]&(pLmask) != 0
+	}
+	return isExcludingLatin(Letter, r)
+}
+
+// IsMark reports whether the rune is a mark character (category [M]).
+func IsMark(r rune) bool {
+	// There are no mark characters in Latin-1.
+	return isExcludingLatin(Mark, r)
+}
+
+// IsNumber reports whether the rune is a number (category [N]).
+func IsNumber(r rune) bool {
+	if uint32(r) <= MaxLatin1 {
+		return properties[uint8(r)]&pN != 0
+	}
+	return isExcludingLatin(Number, r)
+}
+
+// IsPunct reports whether the rune is a Unicode punctuation character
+// (category [P]).
+func IsPunct(r rune) bool {
+	if uint32(r) <= MaxLatin1 {
+		return properties[uint8(r)]&pP != 0
+	}
+	return Is(Punct, r)
+}
+
+// IsSpace reports whether the rune is a space character as defined
+// by Unicode's White Space property; in the Latin-1 space
+// this is
+//
+//	'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
+//
+// Other definitions of spacing characters are set by category
+// Z and property [Pattern_White_Space].
+func IsSpace(r rune) bool {
+	// This property isn't the same as Z; special-case it.
+	if uint32(r) <= MaxLatin1 {
+		switch r {
+		case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
+			return true
+		}
+		return false
+	}
+	return isExcludingLatin(White_Space, r)
+}
+
+// IsSymbol reports whether the rune is a symbolic character.
+func IsSymbol(r rune) bool {
+	if uint32(r) <= MaxLatin1 {
+		return properties[uint8(r)]&pS != 0
+	}
+	return isExcludingLatin(Symbol, r)
+}
--- a/src/unicode/graphic_test.go
+++ b/src/unicode/graphic_test.go
@@ -0,0 +1,122 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unicode_test
+
+import (
+	"testing"
+	. "unicode"
+)
+
+// Independently check that the special "Is" functions work
+// in the Latin-1 range through the property table.
+
+func TestIsControlLatin1(t *testing.T) {
+	for i := rune(0); i <= MaxLatin1; i++ {
+		got := IsControl(i)
+		want := false
+		switch {
+		case 0x00 <= i && i <= 0x1F:
+			want = true
+		case 0x7F <= i && i <= 0x9F:
+			want = true
+		}
+		if got != want {
+			t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+		}
+	}
+}
+
+func TestIsLetterLatin1(t *testing.T) {
+	for i := rune(0); i <= MaxLatin1; i++ {
+		got := IsLetter(i)
+		want := Is(Letter, i)
+		if got != want {
+			t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+		}
+	}
+}
+
+func TestIsUpperLatin1(t *testing.T) {
+	for i := rune(0); i <= MaxLatin1; i++ {
+		got := IsUpper(i)
+		want := Is(Upper, i)
+		if got != want {
+			t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+		}
+	}
+}
+
+func TestIsLowerLatin1(t *testing.T) {
+	for i := rune(0); i <= MaxLatin1; i++ {
+		got := IsLower(i)
+		want := Is(Lower, i)
+		if got != want {
+			t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+		}
+	}
+}
+
+func TestNumberLatin1(t *testing.T) {
+	for i := rune(0); i <= MaxLatin1; i++ {
+		got := IsNumber(i)
+		want := Is(Number, i)
+		if got != want {
+			t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+		}
+	}
+}
+
+func TestIsPrintLatin1(t *testing.T) {
+	for i := rune(0); i <= MaxLatin1; i++ {
+		got := IsPrint(i)
+		want := In(i, PrintRanges...)
+		if i == ' ' {
+			want = true
+		}
+		if got != want {
+			t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+		}
+	}
+}
+
+func TestIsGraphicLatin1(t *testing.T) {
+	for i := rune(0); i <= MaxLatin1; i++ {
+		got := IsGraphic(i)
+		want := In(i, GraphicRanges...)
+		if got != want {
+			t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+		}
+	}
+}
+
+func TestIsPunctLatin1(t *testing.T) {
+	for i := rune(0); i <= MaxLatin1; i++ {
+		got := IsPunct(i)
+		want := Is(Punct, i)
+		if got != want {
+			t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+		}
+	}
+}
+
+func TestIsSpaceLatin1(t *testing.T) {
+	for i := rune(0); i <= MaxLatin1; i++ {
+		got := IsSpace(i)
+		want := Is(White_Space, i)
+		if got != want {
+			t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+		}
+	}
+}
+
+func TestIsSymbolLatin1(t *testing.T) {
+	for i := rune(0); i <= MaxLatin1; i++ {
+		got := IsSymbol(i)
+		want := Is(Symbol, i)
+		if got != want {
+			t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+		}
+	}
+}
--- a/src/unicode/letter.go
+++ b/src/unicode/letter.go
@@ -0,0 +1,371 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package unicode provides data and functions to test some properties of
+// Unicode code points.
+package unicode
+
+const (
+	MaxRune         = '\U0010FFFF' // Maximum valid Unicode code point.
+	ReplacementChar = '\uFFFD'     // Represents invalid code points.
+	MaxASCII        = '\u007F'     // maximum ASCII value.
+	MaxLatin1       = '\u00FF'     // maximum Latin-1 value.
+)
+
+// RangeTable defines a set of Unicode code points by listing the ranges of
+// code points within the set. The ranges are listed in two slices
+// to save space: a slice of 16-bit ranges and a slice of 32-bit ranges.
+// The two slices must be in sorted order and non-overlapping.
+// Also, R32 should contain only values >= 0x10000 (1<<16).
+type RangeTable struct {
+	R16         []Range16
+	R32         []Range32
+	LatinOffset int // number of entries in R16 with Hi <= MaxLatin1
+}
+
+// Range16 represents of a range of 16-bit Unicode code points. The range runs from Lo to Hi
+// inclusive and has the specified stride.
+type Range16 struct {
+	Lo     uint16
+	Hi     uint16
+	Stride uint16
+}
+
+// Range32 represents of a range of Unicode code points and is used when one or
+// more of the values will not fit in 16 bits. The range runs from Lo to Hi
+// inclusive and has the specified stride. Lo and Hi must always be >= 1<<16.
+type Range32 struct {
+	Lo     uint32
+	Hi     uint32
+	Stride uint32
+}
+
+// CaseRange represents a range of Unicode code points for simple (one
+// code point to one code point) case conversion.
+// The range runs from Lo to Hi inclusive, with a fixed stride of 1. Deltas
+// are the number to add to the code point to reach the code point for a
+// different case for that character. They may be negative. If zero, it
+// means the character is in the corresponding case. There is a special
+// case representing sequences of alternating corresponding Upper and Lower
+// pairs. It appears with a fixed Delta of
+//
+//	{UpperLower, UpperLower, UpperLower}
+//
+// The constant UpperLower has an otherwise impossible delta value.
+type CaseRange struct {
+	Lo    uint32
+	Hi    uint32
+	Delta d
+}
+
+// SpecialCase represents language-specific case mappings such as Turkish.
+// Methods of SpecialCase customize (by overriding) the standard mappings.
+type SpecialCase []CaseRange
+
+// BUG(r): There is no mechanism for full case folding, that is, for
+// characters that involve multiple runes in the input or output.
+
+// Indices into the Delta arrays inside CaseRanges for case mapping.
+const (
+	UpperCase = iota
+	LowerCase
+	TitleCase
+	MaxCase
+)
+
+type d [MaxCase]rune // to make the CaseRanges text shorter
+
+// If the Delta field of a [CaseRange] is UpperLower, it means
+// this CaseRange represents a sequence of the form (say)
+// [Upper] [Lower] [Upper] [Lower].
+const (
+	UpperLower = MaxRune + 1 // (Cannot be a valid delta.)
+)
+
+// linearMax is the maximum size table for linear search for non-Latin1 rune.
+// Derived by running 'go test -calibrate'.
+const linearMax = 18
+
+// is16 reports whether r is in the sorted slice of 16-bit ranges.
+func is16(ranges []Range16, r uint16) bool {
+	if len(ranges) <= linearMax || r <= MaxLatin1 {
+		for i := range ranges {
+			range_ := &ranges[i]
+			if r < range_.Lo {
+				return false
+			}
+			if r <= range_.Hi {
+				return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
+			}
+		}
+		return false
+	}
+
+	// binary search over ranges
+	lo := 0
+	hi := len(ranges)
+	for lo < hi {
+		m := int(uint(lo+hi) >> 1)
+		range_ := &ranges[m]
+		if range_.Lo <= r && r <= range_.Hi {
+			return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
+		}
+		if r < range_.Lo {
+			hi = m
+		} else {
+			lo = m + 1
+		}
+	}
+	return false
+}
+
+// is32 reports whether r is in the sorted slice of 32-bit ranges.
+func is32(ranges []Range32, r uint32) bool {
+	if len(ranges) <= linearMax {
+		for i := range ranges {
+			range_ := &ranges[i]
+			if r < range_.Lo {
+				return false
+			}
+			if r <= range_.Hi {
+				return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
+			}
+		}
+		return false
+	}
+
+	// binary search over ranges
+	lo := 0
+	hi := len(ranges)
+	for lo < hi {
+		m := int(uint(lo+hi) >> 1)
+		range_ := ranges[m]
+		if range_.Lo <= r && r <= range_.Hi {
+			return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
+		}
+		if r < range_.Lo {
+			hi = m
+		} else {
+			lo = m + 1
+		}
+	}
+	return false
+}
+
+// Is reports whether the rune is in the specified table of ranges.
+func Is(rangeTab *RangeTable, r rune) bool {
+	r16 := rangeTab.R16
+	// Compare as uint32 to correctly handle negative runes.
+	if len(r16) > 0 && uint32(r) <= uint32(r16[len(r16)-1].Hi) {
+		return is16(r16, uint16(r))
+	}
+	r32 := rangeTab.R32
+	if len(r32) > 0 && r >= rune(r32[0].Lo) {
+		return is32(r32, uint32(r))
+	}
+	return false
+}
+
+func isExcludingLatin(rangeTab *RangeTable, r rune) bool {
+	r16 := rangeTab.R16
+	// Compare as uint32 to correctly handle negative runes.
+	if off := rangeTab.LatinOffset; len(r16) > off && uint32(r) <= uint32(r16[len(r16)-1].Hi) {
+		return is16(r16[off:], uint16(r))
+	}
+	r32 := rangeTab.R32
+	if len(r32) > 0 && r >= rune(r32[0].Lo) {
+		return is32(r32, uint32(r))
+	}
+	return false
+}
+
+// IsUpper reports whether the rune is an upper case letter.
+func IsUpper(r rune) bool {
+	// See comment in IsGraphic.
+	if uint32(r) <= MaxLatin1 {
+		return properties[uint8(r)]&pLmask == pLu
+	}
+	return isExcludingLatin(Upper, r)
+}
+
+// IsLower reports whether the rune is a lower case letter.
+func IsLower(r rune) bool {
+	// See comment in IsGraphic.
+	if uint32(r) <= MaxLatin1 {
+		return properties[uint8(r)]&pLmask == pLl
+	}
+	return isExcludingLatin(Lower, r)
+}
+
+// IsTitle reports whether the rune is a title case letter.
+func IsTitle(r rune) bool {
+	if r <= MaxLatin1 {
+		return false
+	}
+	return isExcludingLatin(Title, r)
+}
+
+// to maps the rune using the specified case mapping.
+// It additionally reports whether caseRange contained a mapping for r.
+func to(_case int, r rune, caseRange []CaseRange) (mappedRune rune, foundMapping bool) {
+	if _case < 0 || MaxCase <= _case {
+		return ReplacementChar, false // as reasonable an error as any
+	}
+	// binary search over ranges
+	lo := 0
+	hi := len(caseRange)
+	for lo < hi {
+		m := int(uint(lo+hi) >> 1)
+		cr := caseRange[m]
+		if rune(cr.Lo) <= r && r <= rune(cr.Hi) {
+			delta := cr.Delta[_case]
+			if delta > MaxRune {
+				// In an Upper-Lower sequence, which always starts with
+				// an UpperCase letter, the real deltas always look like:
+				//	{0, 1, 0}    UpperCase (Lower is next)
+				//	{-1, 0, -1}  LowerCase (Upper, Title are previous)
+				// The characters at even offsets from the beginning of the
+				// sequence are upper case; the ones at odd offsets are lower.
+				// The correct mapping can be done by clearing or setting the low
+				// bit in the sequence offset.
+				// The constants UpperCase and TitleCase are even while LowerCase
+				// is odd so we take the low bit from _case.
+				return rune(cr.Lo) + ((r-rune(cr.Lo))&^1 | rune(_case&1)), true
+			}
+			return r + delta, true
+		}
+		if r < rune(cr.Lo) {
+			hi = m
+		} else {
+			lo = m + 1
+		}
+	}
+	return r, false
+}
+
+// To maps the rune to the specified case: [UpperCase], [LowerCase], or [TitleCase].
+func To(_case int, r rune) rune {
+	r, _ = to(_case, r, CaseRanges)
+	return r
+}
+
+// ToUpper maps the rune to upper case.
+func ToUpper(r rune) rune {
+	if r <= MaxASCII {
+		if 'a' <= r && r <= 'z' {
+			r -= 'a' - 'A'
+		}
+		return r
+	}
+	return To(UpperCase, r)
+}
+
+// ToLower maps the rune to lower case.
+func ToLower(r rune) rune {
+	if r <= MaxASCII {
+		if 'A' <= r && r <= 'Z' {
+			r += 'a' - 'A'
+		}
+		return r
+	}
+	return To(LowerCase, r)
+}
+
+// ToTitle maps the rune to title case.
+func ToTitle(r rune) rune {
+	if r <= MaxASCII {
+		if 'a' <= r && r <= 'z' { // title case is upper case for ASCII
+			r -= 'a' - 'A'
+		}
+		return r
+	}
+	return To(TitleCase, r)
+}
+
+// ToUpper maps the rune to upper case giving priority to the special mapping.
+func (special SpecialCase) ToUpper(r rune) rune {
+	r1, hadMapping := to(UpperCase, r, []CaseRange(special))
+	if r1 == r && !hadMapping {
+		r1 = ToUpper(r)
+	}
+	return r1
+}
+
+// ToTitle maps the rune to title case giving priority to the special mapping.
+func (special SpecialCase) ToTitle(r rune) rune {
+	r1, hadMapping := to(TitleCase, r, []CaseRange(special))
+	if r1 == r && !hadMapping {
+		r1 = ToTitle(r)
+	}
+	return r1
+}
+
+// ToLower maps the rune to lower case giving priority to the special mapping.
+func (special SpecialCase) ToLower(r rune) rune {
+	r1, hadMapping := to(LowerCase, r, []CaseRange(special))
+	if r1 == r && !hadMapping {
+		r1 = ToLower(r)
+	}
+	return r1
+}
+
+// caseOrbit is defined in tables.go as []foldPair. Right now all the
+// entries fit in uint16, so use uint16. If that changes, compilation
+// will fail (the constants in the composite literal will not fit in uint16)
+// and the types here can change to uint32.
+type foldPair struct {
+	From uint16
+	To   uint16
+}
+
+// SimpleFold iterates over Unicode code points equivalent under
+// the Unicode-defined simple case folding. Among the code points
+// equivalent to rune (including rune itself), SimpleFold returns the
+// smallest rune > r if one exists, or else the smallest rune >= 0.
+// If r is not a valid Unicode code point, SimpleFold(r) returns r.
+//
+// For example:
+//
+//	SimpleFold('A') = 'a'
+//	SimpleFold('a') = 'A'
+//
+//	SimpleFold('K') = 'k'
+//	SimpleFold('k') = '\u212A' (Kelvin symbol, K)
+//	SimpleFold('\u212A') = 'K'
+//
+//	SimpleFold('1') = '1'
+//
+//	SimpleFold(-2) = -2
+func SimpleFold(r rune) rune {
+	if r < 0 || r > MaxRune {
+		return r
+	}
+
+	if int(r) < len(asciiFold) {
+		return rune(asciiFold[r])
+	}
+
+	// Consult caseOrbit table for special cases.
+	lo := 0
+	hi := len(caseOrbit)
+	for lo < hi {
+		m := int(uint(lo+hi) >> 1)
+		if rune(caseOrbit[m].From) < r {
+			lo = m + 1
+		} else {
+			hi = m
+		}
+	}
+	if lo < len(caseOrbit) && rune(caseOrbit[lo].From) == r {
+		return rune(caseOrbit[lo].To)
+	}
+
+	// No folding specified. This is a one- or two-element
+	// equivalence class containing rune and ToLower(rune)
+	// and ToUpper(rune) if they are different from rune.
+	if l := ToLower(r); l != r {
+		return l
+	}
+	return ToUpper(r)
+}
--- a/src/unicode/letter_test.go
+++ b/src/unicode/letter_test.go
@@ -0,0 +1,644 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unicode_test
+
+import (
+	"flag"
+	"fmt"
+	"runtime"
+	"sort"
+	"strings"
+	"testing"
+	. "unicode"
+)
+
+var upperTest = []rune{
+	0x41,
+	0xc0,
+	0xd8,
+	0x100,
+	0x139,
+	0x14a,
+	0x178,
+	0x181,
+	0x376,
+	0x3cf,
+	0x13bd,
+	0x1f2a,
+	0x2102,
+	0x2c00,
+	0x2c10,
+	0x2c20,
+	0xa650,
+	0xa722,
+	0xff3a,
+	0x10400,
+	0x1d400,
+	0x1d7ca,
+}
+
+var notupperTest = []rune{
+	0x40,
+	0x5b,
+	0x61,
+	0x185,
+	0x1b0,
+	0x377,
+	0x387,
+	0x2150,
+	0xab7d,
+	0xffff,
+	0x10000,
+}
+
+var letterTest = []rune{
+	0x41,
+	0x61,
+	0xaa,
+	0xba,
+	0xc8,
+	0xdb,
+	0xf9,
+	0x2ec,
+	0x535,
+	0x620,
+	0x6e6,
+	0x93d,
+	0xa15,
+	0xb99,
+	0xdc0,
+	0xedd,
+	0x1000,
+	0x1200,
+	0x1312,
+	0x1401,
+	0x2c00,
+	0xa800,
+	0xf900,
+	0xfa30,
+	0xffda,
+	0xffdc,
+	0x10000,
+	0x10300,
+	0x10400,
+	0x20000,
+	0x2f800,
+	0x2fa1d,
+}
+
+var notletterTest = []rune{
+	0x20,
+	0x35,
+	0x375,
+	0x619,
+	0x700,
+	0x1885,
+	0xfffe,
+	0x1ffff,
+	0x10ffff,
+}
+
+// Contains all the special cased Latin-1 chars.
+var spaceTest = []rune{
+	0x09,
+	0x0a,
+	0x0b,
+	0x0c,
+	0x0d,
+	0x20,
+	0x85,
+	0xA0,
+	0x2000,
+	0x3000,
+}
+
+type caseT struct {
+	cas     int
+	in, out rune
+}
+
+var caseTest = []caseT{
+	// errors
+	{-1, '\n', 0xFFFD},
+	{UpperCase, -1, -1},
+	{UpperCase, 1 << 30, 1 << 30},
+
+	// ASCII (special-cased so test carefully)
+	{UpperCase, '\n', '\n'},
+	{UpperCase, 'a', 'A'},
+	{UpperCase, 'A', 'A'},
+	{UpperCase, '7', '7'},
+	{LowerCase, '\n', '\n'},
+	{LowerCase, 'a', 'a'},
+	{LowerCase, 'A', 'a'},
+	{LowerCase, '7', '7'},
+	{TitleCase, '\n', '\n'},
+	{TitleCase, 'a', 'A'},
+	{TitleCase, 'A', 'A'},
+	{TitleCase, '7', '7'},
+
+	// Latin-1: easy to read the tests!
+	{UpperCase, 0x80, 0x80},
+	{UpperCase, 'Å', 'Å'},
+	{UpperCase, 'å', 'Å'},
+	{LowerCase, 0x80, 0x80},
+	{LowerCase, 'Å', 'å'},
+	{LowerCase, 'å', 'å'},
+	{TitleCase, 0x80, 0x80},
+	{TitleCase, 'Å', 'Å'},
+	{TitleCase, 'å', 'Å'},
+
+	// 0131;LATIN SMALL LETTER DOTLESS I;Ll;0;L;;;;;N;;;0049;;0049
+	{UpperCase, 0x0131, 'I'},
+	{LowerCase, 0x0131, 0x0131},
+	{TitleCase, 0x0131, 'I'},
+
+	// 0133;LATIN SMALL LIGATURE IJ;Ll;0;L;<compat> 0069 006A;;;;N;LATIN SMALL LETTER I J;;0132;;0132
+	{UpperCase, 0x0133, 0x0132},
+	{LowerCase, 0x0133, 0x0133},
+	{TitleCase, 0x0133, 0x0132},
+
+	// 212A;KELVIN SIGN;Lu;0;L;004B;;;;N;DEGREES KELVIN;;;006B;
+	{UpperCase, 0x212A, 0x212A},
+	{LowerCase, 0x212A, 'k'},
+	{TitleCase, 0x212A, 0x212A},
+
+	// From an UpperLower sequence
+	// A640;CYRILLIC CAPITAL LETTER ZEMLYA;Lu;0;L;;;;;N;;;;A641;
+	{UpperCase, 0xA640, 0xA640},
+	{LowerCase, 0xA640, 0xA641},
+	{TitleCase, 0xA640, 0xA640},
+	// A641;CYRILLIC SMALL LETTER ZEMLYA;Ll;0;L;;;;;N;;;A640;;A640
+	{UpperCase, 0xA641, 0xA640},
+	{LowerCase, 0xA641, 0xA641},
+	{TitleCase, 0xA641, 0xA640},
+	// A64E;CYRILLIC CAPITAL LETTER NEUTRAL YER;Lu;0;L;;;;;N;;;;A64F;
+	{UpperCase, 0xA64E, 0xA64E},
+	{LowerCase, 0xA64E, 0xA64F},
+	{TitleCase, 0xA64E, 0xA64E},
+	// A65F;CYRILLIC SMALL LETTER YN;Ll;0;L;;;;;N;;;A65E;;A65E
+	{UpperCase, 0xA65F, 0xA65E},
+	{LowerCase, 0xA65F, 0xA65F},
+	{TitleCase, 0xA65F, 0xA65E},
+
+	// From another UpperLower sequence
+	// 0139;LATIN CAPITAL LETTER L WITH ACUTE;Lu;0;L;004C 0301;;;;N;LATIN CAPITAL LETTER L ACUTE;;;013A;
+	{UpperCase, 0x0139, 0x0139},
+	{LowerCase, 0x0139, 0x013A},
+	{TitleCase, 0x0139, 0x0139},
+	// 013F;LATIN CAPITAL LETTER L WITH MIDDLE DOT;Lu;0;L;<compat> 004C 00B7;;;;N;;;;0140;
+	{UpperCase, 0x013f, 0x013f},
+	{LowerCase, 0x013f, 0x0140},
+	{TitleCase, 0x013f, 0x013f},
+	// 0148;LATIN SMALL LETTER N WITH CARON;Ll;0;L;006E 030C;;;;N;LATIN SMALL LETTER N HACEK;;0147;;0147
+	{UpperCase, 0x0148, 0x0147},
+	{LowerCase, 0x0148, 0x0148},
+	{TitleCase, 0x0148, 0x0147},
+
+	// Lowercase lower than uppercase.
+	// AB78;CHEROKEE SMALL LETTER GE;Ll;0;L;;;;;N;;;13A8;;13A8
+	{UpperCase, 0xab78, 0x13a8},
+	{LowerCase, 0xab78, 0xab78},
+	{TitleCase, 0xab78, 0x13a8},
+	{UpperCase, 0x13a8, 0x13a8},
+	{LowerCase, 0x13a8, 0xab78},
+	{TitleCase, 0x13a8, 0x13a8},
+
+	// Last block in the 5.1.0 table
+	// 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428;
+	{UpperCase, 0x10400, 0x10400},
+	{LowerCase, 0x10400, 0x10428},
+	{TitleCase, 0x10400, 0x10400},
+	// 10427;DESERET CAPITAL LETTER EW;Lu;0;L;;;;;N;;;;1044F;
+	{UpperCase, 0x10427, 0x10427},
+	{LowerCase, 0x10427, 0x1044F},
+	{TitleCase, 0x10427, 0x10427},
+	// 10428;DESERET SMALL LETTER LONG I;Ll;0;L;;;;;N;;;10400;;10400
+	{UpperCase, 0x10428, 0x10400},
+	{LowerCase, 0x10428, 0x10428},
+	{TitleCase, 0x10428, 0x10400},
+	// 1044F;DESERET SMALL LETTER EW;Ll;0;L;;;;;N;;;10427;;10427
+	{UpperCase, 0x1044F, 0x10427},
+	{LowerCase, 0x1044F, 0x1044F},
+	{TitleCase, 0x1044F, 0x10427},
+
+	// First one not in the 5.1.0 table
+	// 10450;SHAVIAN LETTER PEEP;Lo;0;L;;;;;N;;;;;
+	{UpperCase, 0x10450, 0x10450},
+	{LowerCase, 0x10450, 0x10450},
+	{TitleCase, 0x10450, 0x10450},
+
+	// Non-letters with case.
+	{LowerCase, 0x2161, 0x2171},
+	{UpperCase, 0x0345, 0x0399},
+}
+
+func TestIsLetter(t *testing.T) {
+	for _, r := range upperTest {
+		if !IsLetter(r) {
+			t.Errorf("IsLetter(U+%04X) = false, want true", r)
+		}
+	}
+	for _, r := range letterTest {
+		if !IsLetter(r) {
+			t.Errorf("IsLetter(U+%04X) = false, want true", r)
+		}
+	}
+	for _, r := range notletterTest {
+		if IsLetter(r) {
+			t.Errorf("IsLetter(U+%04X) = true, want false", r)
+		}
+	}
+}
+
+func TestIsUpper(t *testing.T) {
+	for _, r := range upperTest {
+		if !IsUpper(r) {
+			t.Errorf("IsUpper(U+%04X) = false, want true", r)
+		}
+	}
+	for _, r := range notupperTest {
+		if IsUpper(r) {
+			t.Errorf("IsUpper(U+%04X) = true, want false", r)
+		}
+	}
+	for _, r := range notletterTest {
+		if IsUpper(r) {
+			t.Errorf("IsUpper(U+%04X) = true, want false", r)
+		}
+	}
+}
+
+func caseString(c int) string {
+	switch c {
+	case UpperCase:
+		return "UpperCase"
+	case LowerCase:
+		return "LowerCase"
+	case TitleCase:
+		return "TitleCase"
+	}
+	return "ErrorCase"
+}
+
+func TestTo(t *testing.T) {
+	for _, c := range caseTest {
+		r := To(c.cas, c.in)
+		if c.out != r {
+			t.Errorf("To(U+%04X, %s) = U+%04X want U+%04X", c.in, caseString(c.cas), r, c.out)
+		}
+	}
+}
+
+func TestToUpperCase(t *testing.T) {
+	for _, c := range caseTest {
+		if c.cas != UpperCase {
+			continue
+		}
+		r := ToUpper(c.in)
+		if c.out != r {
+			t.Errorf("ToUpper(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
+		}
+	}
+}
+
+func TestToLowerCase(t *testing.T) {
+	for _, c := range caseTest {
+		if c.cas != LowerCase {
+			continue
+		}
+		r := ToLower(c.in)
+		if c.out != r {
+			t.Errorf("ToLower(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
+		}
+	}
+}
+
+func TestToTitleCase(t *testing.T) {
+	for _, c := range caseTest {
+		if c.cas != TitleCase {
+			continue
+		}
+		r := ToTitle(c.in)
+		if c.out != r {
+			t.Errorf("ToTitle(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
+		}
+	}
+}
+
+func TestIsSpace(t *testing.T) {
+	for _, c := range spaceTest {
+		if !IsSpace(c) {
+			t.Errorf("IsSpace(U+%04X) = false; want true", c)
+		}
+	}
+	for _, c := range letterTest {
+		if IsSpace(c) {
+			t.Errorf("IsSpace(U+%04X) = true; want false", c)
+		}
+	}
+}
+
+// Check that the optimizations for IsLetter etc. agree with the tables.
+// We only need to check the Latin-1 range.
+func TestLetterOptimizations(t *testing.T) {
+	for i := rune(0); i <= MaxLatin1; i++ {
+		if Is(Letter, i) != IsLetter(i) {
+			t.Errorf("IsLetter(U+%04X) disagrees with Is(Letter)", i)
+		}
+		if Is(Upper, i) != IsUpper(i) {
+			t.Errorf("IsUpper(U+%04X) disagrees with Is(Upper)", i)
+		}
+		if Is(Lower, i) != IsLower(i) {
+			t.Errorf("IsLower(U+%04X) disagrees with Is(Lower)", i)
+		}
+		if Is(Title, i) != IsTitle(i) {
+			t.Errorf("IsTitle(U+%04X) disagrees with Is(Title)", i)
+		}
+		if Is(White_Space, i) != IsSpace(i) {
+			t.Errorf("IsSpace(U+%04X) disagrees with Is(White_Space)", i)
+		}
+		if To(UpperCase, i) != ToUpper(i) {
+			t.Errorf("ToUpper(U+%04X) disagrees with To(Upper)", i)
+		}
+		if To(LowerCase, i) != ToLower(i) {
+			t.Errorf("ToLower(U+%04X) disagrees with To(Lower)", i)
+		}
+		if To(TitleCase, i) != ToTitle(i) {
+			t.Errorf("ToTitle(U+%04X) disagrees with To(Title)", i)
+		}
+	}
+}
+
+func TestTurkishCase(t *testing.T) {
+	lower := []rune("abcçdefgğhıijklmnoöprsştuüvyz")
+	upper := []rune("ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ")
+	for i, l := range lower {
+		u := upper[i]
+		if TurkishCase.ToLower(l) != l {
+			t.Errorf("lower(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToLower(l), l)
+		}
+		if TurkishCase.ToUpper(u) != u {
+			t.Errorf("upper(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToUpper(u), u)
+		}
+		if TurkishCase.ToUpper(l) != u {
+			t.Errorf("upper(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToUpper(l), u)
+		}
+		if TurkishCase.ToLower(u) != l {
+			t.Errorf("lower(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToLower(l), l)
+		}
+		if TurkishCase.ToTitle(u) != u {
+			t.Errorf("title(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToTitle(u), u)
+		}
+		if TurkishCase.ToTitle(l) != u {
+			t.Errorf("title(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToTitle(l), u)
+		}
+	}
+}
+
+var simpleFoldTests = []string{
+	// SimpleFold(x) returns the next equivalent rune > x or wraps
+	// around to smaller values.
+
+	// Easy cases.
+	"Aa",
+	"δΔ",
+
+	// ASCII special cases.
+	"KkK",
+	"Ssſ",
+
+	// Non-ASCII special cases.
+	"ρϱΡ",
+	"ͅΙιι",
+
+	// Extra special cases: has lower/upper but no case fold.
+	"İ",
+	"ı",
+
+	// Upper comes before lower (Cherokee).
+	"\u13b0\uab80",
+}
+
+func TestSimpleFold(t *testing.T) {
+	for _, tt := range simpleFoldTests {
+		cycle := []rune(tt)
+		r := cycle[len(cycle)-1]
+		for _, out := range cycle {
+			if r := SimpleFold(r); r != out {
+				t.Errorf("SimpleFold(%#U) = %#U, want %#U", r, r, out)
+			}
+			r = out
+		}
+	}
+
+	if r := SimpleFold(-42); r != -42 {
+		t.Errorf("SimpleFold(-42) = %v, want -42", r)
+	}
+}
+
+// Running 'go test -calibrate' runs the calibration to find a plausible
+// cutoff point for linear search of a range list vs. binary search.
+// We create a fake table and then time how long it takes to do a
+// sequence of searches within that table, for all possible inputs
+// relative to the ranges (something before all, in each, between each, after all).
+// This assumes that all possible runes are equally likely.
+// In practice most runes are ASCII so this is a conservative estimate
+// of an effective cutoff value. In practice we could probably set it higher
+// than what this function recommends.
+
+var calibrate = flag.Bool("calibrate", false, "compute crossover for linear vs. binary search")
+
+func TestCalibrate(t *testing.T) {
+	if !*calibrate {
+		return
+	}
+
+	if runtime.GOARCH == "amd64" {
+		fmt.Printf("warning: running calibration on %s\n", runtime.GOARCH)
+	}
+
+	// Find the point where binary search wins by more than 10%.
+	// The 10% bias gives linear search an edge when they're close,
+	// because on predominantly ASCII inputs linear search is even
+	// better than our benchmarks measure.
+	n := sort.Search(64, func(n int) bool {
+		tab := fakeTable(n)
+		blinear := func(b *testing.B) {
+			tab := tab
+			max := n*5 + 20
+			for i := 0; i < b.N; i++ {
+				for j := 0; j <= max; j++ {
+					linear(tab, uint16(j))
+				}
+			}
+		}
+		bbinary := func(b *testing.B) {
+			tab := tab
+			max := n*5 + 20
+			for i := 0; i < b.N; i++ {
+				for j := 0; j <= max; j++ {
+					binary(tab, uint16(j))
+				}
+			}
+		}
+		bmlinear := testing.Benchmark(blinear)
+		bmbinary := testing.Benchmark(bbinary)
+		fmt.Printf("n=%d: linear=%d binary=%d\n", n, bmlinear.NsPerOp(), bmbinary.NsPerOp())
+		return bmlinear.NsPerOp()*100 > bmbinary.NsPerOp()*110
+	})
+	fmt.Printf("calibration: linear cutoff = %d\n", n)
+}
+
+func fakeTable(n int) []Range16 {
+	var r16 []Range16
+	for i := 0; i < n; i++ {
+		r16 = append(r16, Range16{uint16(i*5 + 10), uint16(i*5 + 12), 1})
+	}
+	return r16
+}
+
+func linear(ranges []Range16, r uint16) bool {
+	for i := range ranges {
+		range_ := &ranges[i]
+		if r < range_.Lo {
+			return false
+		}
+		if r <= range_.Hi {
+			return (r-range_.Lo)%range_.Stride == 0
+		}
+	}
+	return false
+}
+
+func binary(ranges []Range16, r uint16) bool {
+	// binary search over ranges
+	lo := 0
+	hi := len(ranges)
+	for lo < hi {
+		m := int(uint(lo+hi) >> 1)
+		range_ := &ranges[m]
+		if range_.Lo <= r && r <= range_.Hi {
+			return (r-range_.Lo)%range_.Stride == 0
+		}
+		if r < range_.Lo {
+			hi = m
+		} else {
+			lo = m + 1
+		}
+	}
+	return false
+}
+
+func TestLatinOffset(t *testing.T) {
+	var maps = []map[string]*RangeTable{
+		Categories,
+		FoldCategory,
+		FoldScript,
+		Properties,
+		Scripts,
+	}
+	for _, m := range maps {
+		for name, tab := range m {
+			i := 0
+			for i < len(tab.R16) && tab.R16[i].Hi <= MaxLatin1 {
+				i++
+			}
+			if tab.LatinOffset != i {
+				t.Errorf("%s: LatinOffset=%d, want %d", name, tab.LatinOffset, i)
+			}
+		}
+	}
+}
+
+func TestSpecialCaseNoMapping(t *testing.T) {
+	// Issue 25636
+	// no change for rune 'A', zero delta, under upper/lower/title case change.
+	var noChangeForCapitalA = CaseRange{'A', 'A', [MaxCase]rune{0, 0, 0}}
+	got := strings.ToLowerSpecial(SpecialCase([]CaseRange{noChangeForCapitalA}), "ABC")
+	want := "Abc"
+	if got != want {
+		t.Errorf("got %q; want %q", got, want)
+	}
+}
+
+func TestNegativeRune(t *testing.T) {
+	// Issue 43254
+	// These tests cover negative rune handling by testing values which,
+	// when cast to uint8 or uint16, look like a particular valid rune.
+	// This package has Latin-1-specific optimizations, so we test all of
+	// Latin-1 and representative non-Latin-1 values in the character
+	// categories covered by IsGraphic, etc.
+	nonLatin1 := []uint32{
+		// Lu: LATIN CAPITAL LETTER A WITH MACRON
+		0x0100,
+		// Ll: LATIN SMALL LETTER A WITH MACRON
+		0x0101,
+		// Lt: LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+		0x01C5,
+		// M: COMBINING GRAVE ACCENT
+		0x0300,
+		// Nd: ARABIC-INDIC DIGIT ZERO
+		0x0660,
+		// P: GREEK QUESTION MARK
+		0x037E,
+		// S: MODIFIER LETTER LEFT ARROWHEAD
+		0x02C2,
+		// Z: OGHAM SPACE MARK
+		0x1680,
+	}
+	for i := 0; i < MaxLatin1+len(nonLatin1); i++ {
+		base := uint32(i)
+		if i >= MaxLatin1 {
+			base = nonLatin1[i-MaxLatin1]
+		}
+
+		// Note r is negative, but uint8(r) == uint8(base) and
+		// uint16(r) == uint16(base).
+		r := rune(base - 1<<31)
+		if Is(Letter, r) {
+			t.Errorf("Is(Letter, 0x%x - 1<<31) = true, want false", base)
+		}
+		if IsControl(r) {
+			t.Errorf("IsControl(0x%x - 1<<31) = true, want false", base)
+		}
+		if IsDigit(r) {
+			t.Errorf("IsDigit(0x%x - 1<<31) = true, want false", base)
+		}
+		if IsGraphic(r) {
+			t.Errorf("IsGraphic(0x%x - 1<<31) = true, want false", base)
+		}
+		if IsLetter(r) {
+			t.Errorf("IsLetter(0x%x - 1<<31) = true, want false", base)
+		}
+		if IsLower(r) {
+			t.Errorf("IsLower(0x%x - 1<<31) = true, want false", base)
+		}
+		if IsMark(r) {
+			t.Errorf("IsMark(0x%x - 1<<31) = true, want false", base)
+		}
+		if IsNumber(r) {
+			t.Errorf("IsNumber(0x%x - 1<<31) = true, want false", base)
+		}
+		if IsPrint(r) {
+			t.Errorf("IsPrint(0x%x - 1<<31) = true, want false", base)
+		}
+		if IsPunct(r) {
+			t.Errorf("IsPunct(0x%x - 1<<31) = true, want false", base)
+		}
+		if IsSpace(r) {
+			t.Errorf("IsSpace(0x%x - 1<<31) = true, want false", base)
+		}
+		if IsSymbol(r) {
+			t.Errorf("IsSymbol(0x%x - 1<<31) = true, want false", base)
+		}
+		if IsTitle(r) {
+			t.Errorf("IsTitle(0x%x - 1<<31) = true, want false", base)
+		}
+		if IsUpper(r) {
+			t.Errorf("IsUpper(0x%x - 1<<31) = true, want false", base)
+		}
+	}
+}
--- a/src/unicode/script_test.go
+++ b/src/unicode/script_test.go
@@ -0,0 +1,131 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unicode_test
+
+import (
+	"testing"
+	. "unicode"
+)
+
+type T struct {
+	rune   rune
+	script string
+}
+
+var inCategoryTest = []T{
+	{0x0081, "Cc"},
+	{0x200B, "Cf"},
+	{0xf0000, "Co"},
+	{0xdb80, "Cs"},
+	{0x0236, "Ll"},
+	{0x1d9d, "Lm"},
+	{0x07cf, "Lo"},
+	{0x1f8a, "Lt"},
+	{0x03ff, "Lu"},
+	{0x0bc1, "Mc"},
+	{0x20df, "Me"},
+	{0x07f0, "Mn"},
+	{0x1bb2, "Nd"},
+	{0x10147, "Nl"},
+	{0x2478, "No"},
+	{0xfe33, "Pc"},
+	{0x2011, "Pd"},
+	{0x301e, "Pe"},
+	{0x2e03, "Pf"},
+	{0x2e02, "Pi"},
+	{0x0022, "Po"},
+	{0x2770, "Ps"},
+	{0x00a4, "Sc"},
+	{0xa711, "Sk"},
+	{0x25f9, "Sm"},
+	{0x2108, "So"},
+	{0x2028, "Zl"},
+	{0x2029, "Zp"},
+	{0x202f, "Zs"},
+	// Unifieds.
+	{0x04aa, "L"},
+	{0x0009, "C"},
+	{0x1712, "M"},
+	{0x0031, "N"},
+	{0x00bb, "P"},
+	{0x00a2, "S"},
+	{0x00a0, "Z"},
+}
+
+var inPropTest = []T{
+	{0x0046, "ASCII_Hex_Digit"},
+	{0x200F, "Bidi_Control"},
+	{0x2212, "Dash"},
+	{0xE0001, "Deprecated"},
+	{0x00B7, "Diacritic"},
+	{0x30FE, "Extender"},
+	{0xFF46, "Hex_Digit"},
+	{0x2E17, "Hyphen"},
+	{0x2FFB, "IDS_Binary_Operator"},
+	{0x2FF3, "IDS_Trinary_Operator"},
+	{0xFA6A, "Ideographic"},
+	{0x200D, "Join_Control"},
+	{0x0EC4, "Logical_Order_Exception"},
+	{0x2FFFF, "Noncharacter_Code_Point"},
+	{0x065E, "Other_Alphabetic"},
+	{0x2065, "Other_Default_Ignorable_Code_Point"},
+	{0x0BD7, "Other_Grapheme_Extend"},
+	{0x0387, "Other_ID_Continue"},
+	{0x212E, "Other_ID_Start"},
+	{0x2094, "Other_Lowercase"},
+	{0x2040, "Other_Math"},
+	{0x216F, "Other_Uppercase"},
+	{0x0027, "Pattern_Syntax"},
+	{0x0020, "Pattern_White_Space"},
+	{0x06DD, "Prepended_Concatenation_Mark"},
+	{0x300D, "Quotation_Mark"},
+	{0x2EF3, "Radical"},
+	{0x1f1ff, "Regional_Indicator"},
+	{0x061F, "STerm"}, // Deprecated alias of Sentence_Terminal
+	{0x061F, "Sentence_Terminal"},
+	{0x2071, "Soft_Dotted"},
+	{0x003A, "Terminal_Punctuation"},
+	{0x9FC3, "Unified_Ideograph"},
+	{0xFE0F, "Variation_Selector"},
+	{0x0020, "White_Space"},
+}
+
+func TestCategories(t *testing.T) {
+	notTested := make(map[string]bool)
+	for k := range Categories {
+		notTested[k] = true
+	}
+	for _, test := range inCategoryTest {
+		if _, ok := Categories[test.script]; !ok {
+			t.Fatal(test.script, "not a known category")
+		}
+		if !Is(Categories[test.script], test.rune) {
+			t.Errorf("IsCategory(%U, %s) = false, want true", test.rune, test.script)
+		}
+		delete(notTested, test.script)
+	}
+	for k := range notTested {
+		t.Error("category not tested:", k)
+	}
+}
+
+func TestProperties(t *testing.T) {
+	notTested := make(map[string]bool)
+	for k := range Properties {
+		notTested[k] = true
+	}
+	for _, test := range inPropTest {
+		if _, ok := Properties[test.script]; !ok {
+			t.Fatal(test.script, "not a known prop")
+		}
+		if !Is(Properties[test.script], test.rune) {
+			t.Errorf("IsCategory(%U, %s) = false, want true", test.rune, test.script)
+		}
+		delete(notTested, test.script)
+	}
+	for k := range notTested {
+		t.Error("property not tested:", k)
+	}
+}
--- a/src/unicode/tables.go
+++ b/src/unicode/tables.go
--- a/src/unicode/utf16/export_test.go
+++ b/src/unicode/utf16/export_test.go
@@ -0,0 +1,14 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package utf16
+
+// Extra names for constants so we can validate them during testing.
+const (
+	Surr1           = surr1
+	Surr3           = surr3
+	SurrSelf        = surrSelf
+	MaxRune         = maxRune
+	ReplacementChar = replacementChar
+)
--- a/src/unicode/utf16/utf16.go
+++ b/src/unicode/utf16/utf16.go
@@ -0,0 +1,144 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package utf16 implements encoding and decoding of UTF-16 sequences.
+package utf16
+
+// The conditions replacementChar==unicode.ReplacementChar and
+// maxRune==unicode.MaxRune are verified in the tests.
+// Defining them locally avoids this package depending on package unicode.
+
+const (
+	replacementChar = '\uFFFD'     // Unicode replacement character
+	maxRune         = '\U0010FFFF' // Maximum valid Unicode code point.
+)
+
+const (
+	// 0xd800-0xdc00 encodes the high 10 bits of a pair.
+	// 0xdc00-0xe000 encodes the low 10 bits of a pair.
+	// the value is those 20 bits plus 0x10000.
+	surr1 = 0xd800
+	surr2 = 0xdc00
+	surr3 = 0xe000
+
+	surrSelf = 0x10000
+)
+
+// IsSurrogate reports whether the specified Unicode code point
+// can appear in a surrogate pair.
+func IsSurrogate(r rune) bool {
+	return surr1 <= r && r < surr3
+}
+
+// DecodeRune returns the UTF-16 decoding of a surrogate pair.
+// If the pair is not a valid UTF-16 surrogate pair, DecodeRune returns
+// the Unicode replacement code point U+FFFD.
+func DecodeRune(r1, r2 rune) rune {
+	if surr1 <= r1 && r1 < surr2 && surr2 <= r2 && r2 < surr3 {
+		return (r1-surr1)<<10 | (r2 - surr2) + surrSelf
+	}
+	return replacementChar
+}
+
+// EncodeRune returns the UTF-16 surrogate pair r1, r2 for the given rune.
+// If the rune is not a valid Unicode code point or does not need encoding,
+// EncodeRune returns U+FFFD, U+FFFD.
+func EncodeRune(r rune) (r1, r2 rune) {
+	if r < surrSelf || r > maxRune {
+		return replacementChar, replacementChar
+	}
+	r -= surrSelf
+	return surr1 + (r>>10)&0x3ff, surr2 + r&0x3ff
+}
+
+// RuneLen returns the number of 16-bit words in the UTF-16 encoding of the rune.
+// It returns -1 if the rune is not a valid value to encode in UTF-16.
+func RuneLen(r rune) int {
+	switch {
+	case 0 <= r && r < surr1, surr3 <= r && r < surrSelf:
+		return 1
+	case surrSelf <= r && r <= maxRune:
+		return 2
+	default:
+		return -1
+	}
+}
+
+// Encode returns the UTF-16 encoding of the Unicode code point sequence s.
+func Encode(s []rune) []uint16 {
+	n := len(s)
+	for _, v := range s {
+		if v >= surrSelf {
+			n++
+		}
+	}
+
+	a := make([]uint16, n)
+	n = 0
+	for _, v := range s {
+		switch RuneLen(v) {
+		case 1: // normal rune
+			a[n] = uint16(v)
+			n++
+		case 2: // needs surrogate sequence
+			r1, r2 := EncodeRune(v)
+			a[n] = uint16(r1)
+			a[n+1] = uint16(r2)
+			n += 2
+		default:
+			a[n] = uint16(replacementChar)
+			n++
+		}
+	}
+	return a[:n]
+}
+
+// AppendRune appends the UTF-16 encoding of the Unicode code point r
+// to the end of p and returns the extended buffer. If the rune is not
+// a valid Unicode code point, it appends the encoding of U+FFFD.
+func AppendRune(a []uint16, r rune) []uint16 {
+	// This function is inlineable for fast handling of ASCII.
+	switch {
+	case 0 <= r && r < surr1, surr3 <= r && r < surrSelf:
+		// normal rune
+		return append(a, uint16(r))
+	case surrSelf <= r && r <= maxRune:
+		// needs surrogate sequence
+		r1, r2 := EncodeRune(r)
+		return append(a, uint16(r1), uint16(r2))
+	}
+	return append(a, replacementChar)
+}
+
+// Decode returns the Unicode code point sequence represented
+// by the UTF-16 encoding s.
+func Decode(s []uint16) []rune {
+	// Preallocate capacity to hold up to 64 runes.
+	// Decode inlines, so the allocation can live on the stack.
+	buf := make([]rune, 0, 64)
+	return decode(s, buf)
+}
+
+// decode appends to buf the Unicode code point sequence represented
+// by the UTF-16 encoding s and return the extended buffer.
+func decode(s []uint16, buf []rune) []rune {
+	for i := 0; i < len(s); i++ {
+		var ar rune
+		switch r := s[i]; {
+		case r < surr1, surr3 <= r:
+			// normal rune
+			ar = rune(r)
+		case surr1 <= r && r < surr2 && i+1 < len(s) &&
+			surr2 <= s[i+1] && s[i+1] < surr3:
+			// valid surrogate sequence
+			ar = DecodeRune(rune(r), rune(s[i+1]))
+			i++
+		default:
+			// invalid surrogate sequence
+			ar = replacementChar
+		}
+		buf = append(buf, ar)
+	}
+	return buf
+}
--- a/src/unicode/utf16/utf16_test.go
+++ b/src/unicode/utf16/utf16_test.go
@@ -0,0 +1,273 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package utf16_test
+
+import (
+	"internal/testenv"
+	"reflect"
+	"testing"
+	"unicode"
+	. "unicode/utf16"
+)
+
+// Validate the constants redefined from unicode.
+func TestConstants(t *testing.T) {
+	if MaxRune != unicode.MaxRune {
+		t.Errorf("utf16.maxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune)
+	}
+	if ReplacementChar != unicode.ReplacementChar {
+		t.Errorf("utf16.replacementChar is wrong: %x should be %x", ReplacementChar, unicode.ReplacementChar)
+	}
+}
+
+func TestRuneLen(t *testing.T) {
+	for _, tt := range []struct {
+		r      rune
+		length int
+	}{
+		{0, 1},
+		{Surr1 - 1, 1},
+		{Surr3, 1},
+		{SurrSelf - 1, 1},
+		{SurrSelf, 2},
+		{MaxRune, 2},
+		{MaxRune + 1, -1},
+		{-1, -1},
+	} {
+		if length := RuneLen(tt.r); length != tt.length {
+			t.Errorf("RuneLen(%#U) = %d, want %d", tt.r, length, tt.length)
+		}
+	}
+}
+
+type encodeTest struct {
+	in  []rune
+	out []uint16
+}
+
+var encodeTests = []encodeTest{
+	{[]rune{1, 2, 3, 4}, []uint16{1, 2, 3, 4}},
+	{[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff},
+		[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}},
+	{[]rune{'a', 'b', 0xd7ff, 0xd800, 0xdfff, 0xe000, 0x110000, -1},
+		[]uint16{'a', 'b', 0xd7ff, 0xfffd, 0xfffd, 0xe000, 0xfffd, 0xfffd}},
+}
+
+func TestEncode(t *testing.T) {
+	for _, tt := range encodeTests {
+		out := Encode(tt.in)
+		if !reflect.DeepEqual(out, tt.out) {
+			t.Errorf("Encode(%x) = %x; want %x", tt.in, out, tt.out)
+		}
+	}
+}
+
+func TestAppendRune(t *testing.T) {
+	for _, tt := range encodeTests {
+		var out []uint16
+		for _, u := range tt.in {
+			out = AppendRune(out, u)
+		}
+		if !reflect.DeepEqual(out, tt.out) {
+			t.Errorf("AppendRune(%x) = %x; want %x", tt.in, out, tt.out)
+		}
+	}
+}
+
+func TestEncodeRune(t *testing.T) {
+	for i, tt := range encodeTests {
+		j := 0
+		for _, r := range tt.in {
+			r1, r2 := EncodeRune(r)
+			if r < 0x10000 || r > unicode.MaxRune {
+				if j >= len(tt.out) {
+					t.Errorf("#%d: ran out of tt.out", i)
+					break
+				}
+				if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar {
+					t.Errorf("EncodeRune(%#x) = %#x, %#x; want 0xfffd, 0xfffd", r, r1, r2)
+				}
+				j++
+			} else {
+				if j+1 >= len(tt.out) {
+					t.Errorf("#%d: ran out of tt.out", i)
+					break
+				}
+				if r1 != rune(tt.out[j]) || r2 != rune(tt.out[j+1]) {
+					t.Errorf("EncodeRune(%#x) = %#x, %#x; want %#x, %#x", r, r1, r2, tt.out[j], tt.out[j+1])
+				}
+				j += 2
+				dec := DecodeRune(r1, r2)
+				if dec != r {
+					t.Errorf("DecodeRune(%#x, %#x) = %#x; want %#x", r1, r2, dec, r)
+				}
+			}
+		}
+		if j != len(tt.out) {
+			t.Errorf("#%d: EncodeRune didn't generate enough output", i)
+		}
+	}
+}
+
+type decodeTest struct {
+	in  []uint16
+	out []rune
+}
+
+var decodeTests = []decodeTest{
+	{[]uint16{1, 2, 3, 4}, []rune{1, 2, 3, 4}},
+	{[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff},
+		[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}},
+	{[]uint16{0xd800, 'a'}, []rune{0xfffd, 'a'}},
+	{[]uint16{0xdfff}, []rune{0xfffd}},
+}
+
+func TestAllocationsDecode(t *testing.T) {
+	testenv.SkipIfOptimizationOff(t)
+
+	for _, tt := range decodeTests {
+		allocs := testing.AllocsPerRun(10, func() {
+			out := Decode(tt.in)
+			if out == nil {
+				t.Errorf("Decode(%x) = nil", tt.in)
+			}
+		})
+		if allocs > 0 {
+			t.Errorf("Decode allocated %v times", allocs)
+		}
+	}
+}
+
+func TestDecode(t *testing.T) {
+	for _, tt := range decodeTests {
+		out := Decode(tt.in)
+		if !reflect.DeepEqual(out, tt.out) {
+			t.Errorf("Decode(%x) = %x; want %x", tt.in, out, tt.out)
+		}
+	}
+}
+
+var decodeRuneTests = []struct {
+	r1, r2 rune
+	want   rune
+}{
+	{0xd800, 0xdc00, 0x10000},
+	{0xd800, 0xdc01, 0x10001},
+	{0xd808, 0xdf45, 0x12345},
+	{0xdbff, 0xdfff, 0x10ffff},
+	{0xd800, 'a', 0xfffd}, // illegal, replacement rune substituted
+}
+
+func TestDecodeRune(t *testing.T) {
+	for i, tt := range decodeRuneTests {
+		got := DecodeRune(tt.r1, tt.r2)
+		if got != tt.want {
+			t.Errorf("%d: DecodeRune(%q, %q) = %v; want %v", i, tt.r1, tt.r2, got, tt.want)
+		}
+	}
+}
+
+var surrogateTests = []struct {
+	r    rune
+	want bool
+}{
+	// from https://en.wikipedia.org/wiki/UTF-16
+	{'\u007A', false},     // LATIN SMALL LETTER Z
+	{'\u6C34', false},     // CJK UNIFIED IDEOGRAPH-6C34 (water)
+	{'\uFEFF', false},     // Byte Order Mark
+	{'\U00010000', false}, // LINEAR B SYLLABLE B008 A (first non-BMP code point)
+	{'\U0001D11E', false}, // MUSICAL SYMBOL G CLEF
+	{'\U0010FFFD', false}, // PRIVATE USE CHARACTER-10FFFD (last Unicode code point)
+
+	{rune(0xd7ff), false}, // surr1-1
+	{rune(0xd800), true},  // surr1
+	{rune(0xdc00), true},  // surr2
+	{rune(0xe000), false}, // surr3
+	{rune(0xdfff), true},  // surr3-1
+}
+
+func TestIsSurrogate(t *testing.T) {
+	for i, tt := range surrogateTests {
+		got := IsSurrogate(tt.r)
+		if got != tt.want {
+			t.Errorf("%d: IsSurrogate(%q) = %v; want %v", i, tt.r, got, tt.want)
+		}
+	}
+}
+
+func BenchmarkDecodeValidASCII(b *testing.B) {
+	// "hello world"
+	data := []uint16{104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100}
+	for i := 0; i < b.N; i++ {
+		Decode(data)
+	}
+}
+
+func BenchmarkDecodeValidJapaneseChars(b *testing.B) {
+	// "日本語日本語日本語"
+	data := []uint16{26085, 26412, 35486, 26085, 26412, 35486, 26085, 26412, 35486}
+	for i := 0; i < b.N; i++ {
+		Decode(data)
+	}
+}
+
+func BenchmarkDecodeRune(b *testing.B) {
+	rs := make([]rune, 10)
+	// U+1D4D0 to U+1D4D4: MATHEMATICAL BOLD SCRIPT CAPITAL LETTERS
+	for i, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} {
+		rs[2*i], rs[2*i+1] = EncodeRune(u)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		for j := 0; j < 5; j++ {
+			DecodeRune(rs[2*j], rs[2*j+1])
+		}
+	}
+}
+
+func BenchmarkEncodeValidASCII(b *testing.B) {
+	data := []rune{'h', 'e', 'l', 'l', 'o'}
+	for i := 0; i < b.N; i++ {
+		Encode(data)
+	}
+}
+
+func BenchmarkEncodeValidJapaneseChars(b *testing.B) {
+	data := []rune{'日', '本', '語'}
+	for i := 0; i < b.N; i++ {
+		Encode(data)
+	}
+}
+
+func BenchmarkAppendRuneValidASCII(b *testing.B) {
+	data := []rune{'h', 'e', 'l', 'l', 'o'}
+	a := make([]uint16, 0, len(data)*2)
+	for i := 0; i < b.N; i++ {
+		for _, u := range data {
+			a = AppendRune(a, u)
+		}
+		a = a[:0]
+	}
+}
+
+func BenchmarkAppendRuneValidJapaneseChars(b *testing.B) {
+	data := []rune{'日', '本', '語'}
+	a := make([]uint16, 0, len(data)*2)
+	for i := 0; i < b.N; i++ {
+		for _, u := range data {
+			a = AppendRune(a, u)
+		}
+		a = a[:0]
+	}
+}
+
+func BenchmarkEncodeRune(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		for _, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} {
+			EncodeRune(u)
+		}
+	}
+}
--- a/src/unicode/utf8/example_test.go
+++ b/src/unicode/utf8/example_test.go
@@ -0,0 +1,226 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package utf8_test
+
+import (
+	"fmt"
+	"unicode/utf8"
+)
+
+func ExampleDecodeLastRune() {
+	b := []byte("Hello, 世界")
+
+	for len(b) > 0 {
+		r, size := utf8.DecodeLastRune(b)
+		fmt.Printf("%c %v\n", r, size)
+
+		b = b[:len(b)-size]
+	}
+	// Output:
+	// 界 3
+	// 世 3
+	//   1
+	// , 1
+	// o 1
+	// l 1
+	// l 1
+	// e 1
+	// H 1
+}
+
+func ExampleDecodeLastRuneInString() {
+	str := "Hello, 世界"
+
+	for len(str) > 0 {
+		r, size := utf8.DecodeLastRuneInString(str)
+		fmt.Printf("%c %v\n", r, size)
+
+		str = str[:len(str)-size]
+	}
+	// Output:
+	// 界 3
+	// 世 3
+	//   1
+	// , 1
+	// o 1
+	// l 1
+	// l 1
+	// e 1
+	// H 1
+
+}
+
+func ExampleDecodeRune() {
+	b := []byte("Hello, 世界")
+
+	for len(b) > 0 {
+		r, size := utf8.DecodeRune(b)
+		fmt.Printf("%c %v\n", r, size)
+
+		b = b[size:]
+	}
+	// Output:
+	// H 1
+	// e 1
+	// l 1
+	// l 1
+	// o 1
+	// , 1
+	//   1
+	// 世 3
+	// 界 3
+}
+
+func ExampleDecodeRuneInString() {
+	str := "Hello, 世界"
+
+	for len(str) > 0 {
+		r, size := utf8.DecodeRuneInString(str)
+		fmt.Printf("%c %v\n", r, size)
+
+		str = str[size:]
+	}
+	// Output:
+	// H 1
+	// e 1
+	// l 1
+	// l 1
+	// o 1
+	// , 1
+	//   1
+	// 世 3
+	// 界 3
+}
+
+func ExampleEncodeRune() {
+	r := '世'
+	buf := make([]byte, 3)
+
+	n := utf8.EncodeRune(buf, r)
+
+	fmt.Println(buf)
+	fmt.Println(n)
+	// Output:
+	// [228 184 150]
+	// 3
+}
+
+func ExampleEncodeRune_outOfRange() {
+	runes := []rune{
+		// Less than 0, out of range.
+		-1,
+		// Greater than 0x10FFFF, out of range.
+		0x110000,
+		// The Unicode replacement character.
+		utf8.RuneError,
+	}
+	for i, c := range runes {
+		buf := make([]byte, 3)
+		size := utf8.EncodeRune(buf, c)
+		fmt.Printf("%d: %d %[2]s %d\n", i, buf, size)
+	}
+	// Output:
+	// 0: [239 191 189] <20> 3
+	// 1: [239 191 189] <20> 3
+	// 2: [239 191 189] <20> 3
+}
+
+func ExampleFullRune() {
+	buf := []byte{228, 184, 150} // 世
+	fmt.Println(utf8.FullRune(buf))
+	fmt.Println(utf8.FullRune(buf[:2]))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleFullRuneInString() {
+	str := "世"
+	fmt.Println(utf8.FullRuneInString(str))
+	fmt.Println(utf8.FullRuneInString(str[:2]))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleRuneCount() {
+	buf := []byte("Hello, 世界")
+	fmt.Println("bytes =", len(buf))
+	fmt.Println("runes =", utf8.RuneCount(buf))
+	// Output:
+	// bytes = 13
+	// runes = 9
+}
+
+func ExampleRuneCountInString() {
+	str := "Hello, 世界"
+	fmt.Println("bytes =", len(str))
+	fmt.Println("runes =", utf8.RuneCountInString(str))
+	// Output:
+	// bytes = 13
+	// runes = 9
+}
+
+func ExampleRuneLen() {
+	fmt.Println(utf8.RuneLen('a'))
+	fmt.Println(utf8.RuneLen('界'))
+	// Output:
+	// 1
+	// 3
+}
+
+func ExampleRuneStart() {
+	buf := []byte("a界")
+	fmt.Println(utf8.RuneStart(buf[0]))
+	fmt.Println(utf8.RuneStart(buf[1]))
+	fmt.Println(utf8.RuneStart(buf[2]))
+	// Output:
+	// true
+	// true
+	// false
+}
+
+func ExampleValid() {
+	valid := []byte("Hello, 世界")
+	invalid := []byte{0xff, 0xfe, 0xfd}
+
+	fmt.Println(utf8.Valid(valid))
+	fmt.Println(utf8.Valid(invalid))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleValidRune() {
+	valid := 'a'
+	invalid := rune(0xfffffff)
+
+	fmt.Println(utf8.ValidRune(valid))
+	fmt.Println(utf8.ValidRune(invalid))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleValidString() {
+	valid := "Hello, 世界"
+	invalid := string([]byte{0xff, 0xfe, 0xfd})
+
+	fmt.Println(utf8.ValidString(valid))
+	fmt.Println(utf8.ValidString(invalid))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleAppendRune() {
+	buf1 := utf8.AppendRune(nil, 0x10000)
+	buf2 := utf8.AppendRune([]byte("init"), 0x10000)
+	fmt.Println(string(buf1))
+	fmt.Println(string(buf2))
+	// Output:
+	// 𐀀
+	// init𐀀
+}
--- a/src/unicode/utf8/utf8.go
+++ b/src/unicode/utf8/utf8.go
@@ -0,0 +1,583 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package utf8 implements functions and constants to support text encoded in
+// UTF-8. It includes functions to translate between runes and UTF-8 byte sequences.
+// See https://en.wikipedia.org/wiki/UTF-8
+package utf8
+
+// The conditions RuneError==unicode.ReplacementChar and
+// MaxRune==unicode.MaxRune are verified in the tests.
+// Defining them locally avoids this package depending on package unicode.
+
+// Numbers fundamental to the encoding.
+const (
+	RuneError = '\uFFFD'     // the "error" Rune or "Unicode replacement character"
+	RuneSelf  = 0x80         // characters below RuneSelf are represented as themselves in a single byte.
+	MaxRune   = '\U0010FFFF' // Maximum valid Unicode code point.
+	UTFMax    = 4            // maximum number of bytes of a UTF-8 encoded Unicode character.
+)
+
+// Code points in the surrogate range are not valid for UTF-8.
+const (
+	surrogateMin = 0xD800
+	surrogateMax = 0xDFFF
+)
+
+const (
+	t1 = 0b00000000
+	tx = 0b10000000
+	t2 = 0b11000000
+	t3 = 0b11100000
+	t4 = 0b11110000
+	t5 = 0b11111000
+
+	maskx = 0b00111111
+	mask2 = 0b00011111
+	mask3 = 0b00001111
+	mask4 = 0b00000111
+
+	rune1Max = 1<<7 - 1
+	rune2Max = 1<<11 - 1
+	rune3Max = 1<<16 - 1
+
+	// The default lowest and highest continuation byte.
+	locb = 0b10000000
+	hicb = 0b10111111
+
+	// These names of these constants are chosen to give nice alignment in the
+	// table below. The first nibble is an index into acceptRanges or F for
+	// special one-byte cases. The second nibble is the Rune length or the
+	// Status for the special one-byte case.
+	xx = 0xF1 // invalid: size 1
+	as = 0xF0 // ASCII: size 1
+	s1 = 0x02 // accept 0, size 2
+	s2 = 0x13 // accept 1, size 3
+	s3 = 0x03 // accept 0, size 3
+	s4 = 0x23 // accept 2, size 3
+	s5 = 0x34 // accept 3, size 4
+	s6 = 0x04 // accept 0, size 4
+	s7 = 0x44 // accept 4, size 4
+)
+
+// first is information about the first byte in a UTF-8 sequence.
+var first = [256]uint8{
+	//   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x00-0x0F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x10-0x1F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x20-0x2F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x30-0x3F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x40-0x4F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x50-0x5F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x60-0x6F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x70-0x7F
+	//   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
+	xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x80-0x8F
+	xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x90-0x9F
+	xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xA0-0xAF
+	xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xB0-0xBF
+	xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xC0-0xCF
+	s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xD0-0xDF
+	s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3, // 0xE0-0xEF
+	s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xF0-0xFF
+}
+
+// acceptRange gives the range of valid values for the second byte in a UTF-8
+// sequence.
+type acceptRange struct {
+	lo uint8 // lowest value for second byte.
+	hi uint8 // highest value for second byte.
+}
+
+// acceptRanges has size 16 to avoid bounds checks in the code that uses it.
+var acceptRanges = [16]acceptRange{
+	0: {locb, hicb},
+	1: {0xA0, hicb},
+	2: {locb, 0x9F},
+	3: {0x90, hicb},
+	4: {locb, 0x8F},
+}
+
+// FullRune reports whether the bytes in p begin with a full UTF-8 encoding of a rune.
+// An invalid encoding is considered a full Rune since it will convert as a width-1 error rune.
+func FullRune(p []byte) bool {
+	n := len(p)
+	if n == 0 {
+		return false
+	}
+	x := first[p[0]]
+	if n >= int(x&7) {
+		return true // ASCII, invalid or valid.
+	}
+	// Must be short or invalid.
+	accept := acceptRanges[x>>4]
+	if n > 1 && (p[1] < accept.lo || accept.hi < p[1]) {
+		return true
+	} else if n > 2 && (p[2] < locb || hicb < p[2]) {
+		return true
+	}
+	return false
+}
+
+// FullRuneInString is like FullRune but its input is a string.
+func FullRuneInString(s string) bool {
+	n := len(s)
+	if n == 0 {
+		return false
+	}
+	x := first[s[0]]
+	if n >= int(x&7) {
+		return true // ASCII, invalid, or valid.
+	}
+	// Must be short or invalid.
+	accept := acceptRanges[x>>4]
+	if n > 1 && (s[1] < accept.lo || accept.hi < s[1]) {
+		return true
+	} else if n > 2 && (s[2] < locb || hicb < s[2]) {
+		return true
+	}
+	return false
+}
+
+// DecodeRune unpacks the first UTF-8 encoding in p and returns the rune and
+// its width in bytes. If p is empty it returns ([RuneError], 0). Otherwise, if
+// the encoding is invalid, it returns (RuneError, 1). Both are impossible
+// results for correct, non-empty UTF-8.
+//
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
+func DecodeRune(p []byte) (r rune, size int) {
+	n := len(p)
+	if n < 1 {
+		return RuneError, 0
+	}
+	p0 := p[0]
+	x := first[p0]
+	if x >= as {
+		// The following code simulates an additional check for x == xx and
+		// handling the ASCII and invalid cases accordingly. This mask-and-or
+		// approach prevents an additional branch.
+		mask := rune(x) << 31 >> 31 // Create 0x0000 or 0xFFFF.
+		return rune(p[0])&^mask | RuneError&mask, 1
+	}
+	sz := int(x & 7)
+	accept := acceptRanges[x>>4]
+	if n < sz {
+		return RuneError, 1
+	}
+	b1 := p[1]
+	if b1 < accept.lo || accept.hi < b1 {
+		return RuneError, 1
+	}
+	if sz <= 2 { // <= instead of == to help the compiler eliminate some bounds checks
+		return rune(p0&mask2)<<6 | rune(b1&maskx), 2
+	}
+	b2 := p[2]
+	if b2 < locb || hicb < b2 {
+		return RuneError, 1
+	}
+	if sz <= 3 {
+		return rune(p0&mask3)<<12 | rune(b1&maskx)<<6 | rune(b2&maskx), 3
+	}
+	b3 := p[3]
+	if b3 < locb || hicb < b3 {
+		return RuneError, 1
+	}
+	return rune(p0&mask4)<<18 | rune(b1&maskx)<<12 | rune(b2&maskx)<<6 | rune(b3&maskx), 4
+}
+
+// DecodeRuneInString is like [DecodeRune] but its input is a string. If s is
+// empty it returns ([RuneError], 0). Otherwise, if the encoding is invalid, it
+// returns (RuneError, 1). Both are impossible results for correct, non-empty
+// UTF-8.
+//
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
+func DecodeRuneInString(s string) (r rune, size int) {
+	n := len(s)
+	if n < 1 {
+		return RuneError, 0
+	}
+	s0 := s[0]
+	x := first[s0]
+	if x >= as {
+		// The following code simulates an additional check for x == xx and
+		// handling the ASCII and invalid cases accordingly. This mask-and-or
+		// approach prevents an additional branch.
+		mask := rune(x) << 31 >> 31 // Create 0x0000 or 0xFFFF.
+		return rune(s[0])&^mask | RuneError&mask, 1
+	}
+	sz := int(x & 7)
+	accept := acceptRanges[x>>4]
+	if n < sz {
+		return RuneError, 1
+	}
+	s1 := s[1]
+	if s1 < accept.lo || accept.hi < s1 {
+		return RuneError, 1
+	}
+	if sz <= 2 { // <= instead of == to help the compiler eliminate some bounds checks
+		return rune(s0&mask2)<<6 | rune(s1&maskx), 2
+	}
+	s2 := s[2]
+	if s2 < locb || hicb < s2 {
+		return RuneError, 1
+	}
+	if sz <= 3 {
+		return rune(s0&mask3)<<12 | rune(s1&maskx)<<6 | rune(s2&maskx), 3
+	}
+	s3 := s[3]
+	if s3 < locb || hicb < s3 {
+		return RuneError, 1
+	}
+	return rune(s0&mask4)<<18 | rune(s1&maskx)<<12 | rune(s2&maskx)<<6 | rune(s3&maskx), 4
+}
+
+// DecodeLastRune unpacks the last UTF-8 encoding in p and returns the rune and
+// its width in bytes. If p is empty it returns ([RuneError], 0). Otherwise, if
+// the encoding is invalid, it returns (RuneError, 1). Both are impossible
+// results for correct, non-empty UTF-8.
+//
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
+func DecodeLastRune(p []byte) (r rune, size int) {
+	end := len(p)
+	if end == 0 {
+		return RuneError, 0
+	}
+	start := end - 1
+	r = rune(p[start])
+	if r < RuneSelf {
+		return r, 1
+	}
+	// guard against O(n^2) behavior when traversing
+	// backwards through strings with long sequences of
+	// invalid UTF-8.
+	lim := end - UTFMax
+	if lim < 0 {
+		lim = 0
+	}
+	for start--; start >= lim; start-- {
+		if RuneStart(p[start]) {
+			break
+		}
+	}
+	if start < 0 {
+		start = 0
+	}
+	r, size = DecodeRune(p[start:end])
+	if start+size != end {
+		return RuneError, 1
+	}
+	return r, size
+}
+
+// DecodeLastRuneInString is like [DecodeLastRune] but its input is a string. If
+// s is empty it returns ([RuneError], 0). Otherwise, if the encoding is invalid,
+// it returns (RuneError, 1). Both are impossible results for correct,
+// non-empty UTF-8.
+//
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
+func DecodeLastRuneInString(s string) (r rune, size int) {
+	end := len(s)
+	if end == 0 {
+		return RuneError, 0
+	}
+	start := end - 1
+	r = rune(s[start])
+	if r < RuneSelf {
+		return r, 1
+	}
+	// guard against O(n^2) behavior when traversing
+	// backwards through strings with long sequences of
+	// invalid UTF-8.
+	lim := end - UTFMax
+	if lim < 0 {
+		lim = 0
+	}
+	for start--; start >= lim; start-- {
+		if RuneStart(s[start]) {
+			break
+		}
+	}
+	if start < 0 {
+		start = 0
+	}
+	r, size = DecodeRuneInString(s[start:end])
+	if start+size != end {
+		return RuneError, 1
+	}
+	return r, size
+}
+
+// RuneLen returns the number of bytes in the UTF-8 encoding of the rune.
+// It returns -1 if the rune is not a valid value to encode in UTF-8.
+func RuneLen(r rune) int {
+	switch {
+	case r < 0:
+		return -1
+	case r <= rune1Max:
+		return 1
+	case r <= rune2Max:
+		return 2
+	case surrogateMin <= r && r <= surrogateMax:
+		return -1
+	case r <= rune3Max:
+		return 3
+	case r <= MaxRune:
+		return 4
+	}
+	return -1
+}
+
+// EncodeRune writes into p (which must be large enough) the UTF-8 encoding of the rune.
+// If the rune is out of range, it writes the encoding of [RuneError].
+// It returns the number of bytes written.
+func EncodeRune(p []byte, r rune) int {
+	// Negative values are erroneous. Making it unsigned addresses the problem.
+	switch i := uint32(r); {
+	case i <= rune1Max:
+		p[0] = byte(r)
+		return 1
+	case i <= rune2Max:
+		_ = p[1] // eliminate bounds checks
+		p[0] = t2 | byte(r>>6)
+		p[1] = tx | byte(r)&maskx
+		return 2
+	case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
+		r = RuneError
+		fallthrough
+	case i <= rune3Max:
+		_ = p[2] // eliminate bounds checks
+		p[0] = t3 | byte(r>>12)
+		p[1] = tx | byte(r>>6)&maskx
+		p[2] = tx | byte(r)&maskx
+		return 3
+	default:
+		_ = p[3] // eliminate bounds checks
+		p[0] = t4 | byte(r>>18)
+		p[1] = tx | byte(r>>12)&maskx
+		p[2] = tx | byte(r>>6)&maskx
+		p[3] = tx | byte(r)&maskx
+		return 4
+	}
+}
+
+// AppendRune appends the UTF-8 encoding of r to the end of p and
+// returns the extended buffer. If the rune is out of range,
+// it appends the encoding of [RuneError].
+func AppendRune(p []byte, r rune) []byte {
+	// This function is inlineable for fast handling of ASCII.
+	if uint32(r) <= rune1Max {
+		return append(p, byte(r))
+	}
+	return appendRuneNonASCII(p, r)
+}
+
+func appendRuneNonASCII(p []byte, r rune) []byte {
+	// Negative values are erroneous. Making it unsigned addresses the problem.
+	switch i := uint32(r); {
+	case i <= rune2Max:
+		return append(p, t2|byte(r>>6), tx|byte(r)&maskx)
+	case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
+		r = RuneError
+		fallthrough
+	case i <= rune3Max:
+		return append(p, t3|byte(r>>12), tx|byte(r>>6)&maskx, tx|byte(r)&maskx)
+	default:
+		return append(p, t4|byte(r>>18), tx|byte(r>>12)&maskx, tx|byte(r>>6)&maskx, tx|byte(r)&maskx)
+	}
+}
+
+// RuneCount returns the number of runes in p. Erroneous and short
+// encodings are treated as single runes of width 1 byte.
+func RuneCount(p []byte) int {
+	np := len(p)
+	var n int
+	for i := 0; i < np; {
+		n++
+		c := p[i]
+		if c < RuneSelf {
+			// ASCII fast path
+			i++
+			continue
+		}
+		x := first[c]
+		if x == xx {
+			i++ // invalid.
+			continue
+		}
+		size := int(x & 7)
+		if i+size > np {
+			i++ // Short or invalid.
+			continue
+		}
+		accept := acceptRanges[x>>4]
+		if c := p[i+1]; c < accept.lo || accept.hi < c {
+			size = 1
+		} else if size == 2 {
+		} else if c := p[i+2]; c < locb || hicb < c {
+			size = 1
+		} else if size == 3 {
+		} else if c := p[i+3]; c < locb || hicb < c {
+			size = 1
+		}
+		i += size
+	}
+	return n
+}
+
+// RuneCountInString is like [RuneCount] but its input is a string.
+func RuneCountInString(s string) (n int) {
+	ns := len(s)
+	for i := 0; i < ns; n++ {
+		c := s[i]
+		if c < RuneSelf {
+			// ASCII fast path
+			i++
+			continue
+		}
+		x := first[c]
+		if x == xx {
+			i++ // invalid.
+			continue
+		}
+		size := int(x & 7)
+		if i+size > ns {
+			i++ // Short or invalid.
+			continue
+		}
+		accept := acceptRanges[x>>4]
+		if c := s[i+1]; c < accept.lo || accept.hi < c {
+			size = 1
+		} else if size == 2 {
+		} else if c := s[i+2]; c < locb || hicb < c {
+			size = 1
+		} else if size == 3 {
+		} else if c := s[i+3]; c < locb || hicb < c {
+			size = 1
+		}
+		i += size
+	}
+	return n
+}
+
+// RuneStart reports whether the byte could be the first byte of an encoded,
+// possibly invalid rune. Second and subsequent bytes always have the top two
+// bits set to 10.
+func RuneStart(b byte) bool { return b&0xC0 != 0x80 }
+
+// Valid reports whether p consists entirely of valid UTF-8-encoded runes.
+func Valid(p []byte) bool {
+	// This optimization avoids the need to recompute the capacity
+	// when generating code for p[8:], bringing it to parity with
+	// ValidString, which was 20% faster on long ASCII strings.
+	p = p[:len(p):len(p)]
+
+	// Fast path. Check for and skip 8 bytes of ASCII characters per iteration.
+	for len(p) >= 8 {
+		// Combining two 32 bit loads allows the same code to be used
+		// for 32 and 64 bit platforms.
+		// The compiler can generate a 32bit load for first32 and second32
+		// on many platforms. See test/codegen/memcombine.go.
+		first32 := uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
+		second32 := uint32(p[4]) | uint32(p[5])<<8 | uint32(p[6])<<16 | uint32(p[7])<<24
+		if (first32|second32)&0x80808080 != 0 {
+			// Found a non ASCII byte (>= RuneSelf).
+			break
+		}
+		p = p[8:]
+	}
+	n := len(p)
+	for i := 0; i < n; {
+		pi := p[i]
+		if pi < RuneSelf {
+			i++
+			continue
+		}
+		x := first[pi]
+		if x == xx {
+			return false // Illegal starter byte.
+		}
+		size := int(x & 7)
+		if i+size > n {
+			return false // Short or invalid.
+		}
+		accept := acceptRanges[x>>4]
+		if c := p[i+1]; c < accept.lo || accept.hi < c {
+			return false
+		} else if size == 2 {
+		} else if c := p[i+2]; c < locb || hicb < c {
+			return false
+		} else if size == 3 {
+		} else if c := p[i+3]; c < locb || hicb < c {
+			return false
+		}
+		i += size
+	}
+	return true
+}
+
+// ValidString reports whether s consists entirely of valid UTF-8-encoded runes.
+func ValidString(s string) bool {
+	// Fast path. Check for and skip 8 bytes of ASCII characters per iteration.
+	for len(s) >= 8 {
+		// Combining two 32 bit loads allows the same code to be used
+		// for 32 and 64 bit platforms.
+		// The compiler can generate a 32bit load for first32 and second32
+		// on many platforms. See test/codegen/memcombine.go.
+		first32 := uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24
+		second32 := uint32(s[4]) | uint32(s[5])<<8 | uint32(s[6])<<16 | uint32(s[7])<<24
+		if (first32|second32)&0x80808080 != 0 {
+			// Found a non ASCII byte (>= RuneSelf).
+			break
+		}
+		s = s[8:]
+	}
+	n := len(s)
+	for i := 0; i < n; {
+		si := s[i]
+		if si < RuneSelf {
+			i++
+			continue
+		}
+		x := first[si]
+		if x == xx {
+			return false // Illegal starter byte.
+		}
+		size := int(x & 7)
+		if i+size > n {
+			return false // Short or invalid.
+		}
+		accept := acceptRanges[x>>4]
+		if c := s[i+1]; c < accept.lo || accept.hi < c {
+			return false
+		} else if size == 2 {
+		} else if c := s[i+2]; c < locb || hicb < c {
+			return false
+		} else if size == 3 {
+		} else if c := s[i+3]; c < locb || hicb < c {
+			return false
+		}
+		i += size
+	}
+	return true
+}
+
+// ValidRune reports whether r can be legally encoded as UTF-8.
+// Code points that are out of range or a surrogate half are illegal.
+func ValidRune(r rune) bool {
+	switch {
+	case 0 <= r && r < surrogateMin:
+		return true
+	case surrogateMax < r && r <= MaxRune:
+		return true
+	}
+	return false
+}
--- a/src/unicode/utf8/utf8_test.go
+++ b/src/unicode/utf8/utf8_test.go
@@ -0,0 +1,703 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package utf8_test
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+	"unicode"
+	. "unicode/utf8"
+)
+
+// Validate the constants redefined from unicode.
+func init() {
+	if MaxRune != unicode.MaxRune {
+		panic("utf8.MaxRune is wrong")
+	}
+	if RuneError != unicode.ReplacementChar {
+		panic("utf8.RuneError is wrong")
+	}
+}
+
+// Validate the constants redefined from unicode.
+func TestConstants(t *testing.T) {
+	if MaxRune != unicode.MaxRune {
+		t.Errorf("utf8.MaxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune)
+	}
+	if RuneError != unicode.ReplacementChar {
+		t.Errorf("utf8.RuneError is wrong: %x should be %x", RuneError, unicode.ReplacementChar)
+	}
+}
+
+type Utf8Map struct {
+	r   rune
+	str string
+}
+
+var utf8map = []Utf8Map{
+	{0x0000, "\x00"},
+	{0x0001, "\x01"},
+	{0x007e, "\x7e"},
+	{0x007f, "\x7f"},
+	{0x0080, "\xc2\x80"},
+	{0x0081, "\xc2\x81"},
+	{0x00bf, "\xc2\xbf"},
+	{0x00c0, "\xc3\x80"},
+	{0x00c1, "\xc3\x81"},
+	{0x00c8, "\xc3\x88"},
+	{0x00d0, "\xc3\x90"},
+	{0x00e0, "\xc3\xa0"},
+	{0x00f0, "\xc3\xb0"},
+	{0x00f8, "\xc3\xb8"},
+	{0x00ff, "\xc3\xbf"},
+	{0x0100, "\xc4\x80"},
+	{0x07ff, "\xdf\xbf"},
+	{0x0400, "\xd0\x80"},
+	{0x0800, "\xe0\xa0\x80"},
+	{0x0801, "\xe0\xa0\x81"},
+	{0x1000, "\xe1\x80\x80"},
+	{0xd000, "\xed\x80\x80"},
+	{0xd7ff, "\xed\x9f\xbf"}, // last code point before surrogate half.
+	{0xe000, "\xee\x80\x80"}, // first code point after surrogate half.
+	{0xfffe, "\xef\xbf\xbe"},
+	{0xffff, "\xef\xbf\xbf"},
+	{0x10000, "\xf0\x90\x80\x80"},
+	{0x10001, "\xf0\x90\x80\x81"},
+	{0x40000, "\xf1\x80\x80\x80"},
+	{0x10fffe, "\xf4\x8f\xbf\xbe"},
+	{0x10ffff, "\xf4\x8f\xbf\xbf"},
+	{0xFFFD, "\xef\xbf\xbd"},
+}
+
+var surrogateMap = []Utf8Map{
+	{0xd800, "\xed\xa0\x80"}, // surrogate min decodes to (RuneError, 1)
+	{0xdfff, "\xed\xbf\xbf"}, // surrogate max decodes to (RuneError, 1)
+}
+
+var testStrings = []string{
+	"",
+	"abcd",
+	"☺☻☹",
+	"日a本b語ç日ð本Ê語þ日¥本¼語i日©",
+	"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©",
+	"\x80\x80\x80\x80",
+}
+
+func TestFullRune(t *testing.T) {
+	for _, m := range utf8map {
+		b := []byte(m.str)
+		if !FullRune(b) {
+			t.Errorf("FullRune(%q) (%U) = false, want true", b, m.r)
+		}
+		s := m.str
+		if !FullRuneInString(s) {
+			t.Errorf("FullRuneInString(%q) (%U) = false, want true", s, m.r)
+		}
+		b1 := b[0 : len(b)-1]
+		if FullRune(b1) {
+			t.Errorf("FullRune(%q) = true, want false", b1)
+		}
+		s1 := string(b1)
+		if FullRuneInString(s1) {
+			t.Errorf("FullRune(%q) = true, want false", s1)
+		}
+	}
+	for _, s := range []string{"\xc0", "\xc1"} {
+		b := []byte(s)
+		if !FullRune(b) {
+			t.Errorf("FullRune(%q) = false, want true", s)
+		}
+		if !FullRuneInString(s) {
+			t.Errorf("FullRuneInString(%q) = false, want true", s)
+		}
+	}
+}
+
+func TestEncodeRune(t *testing.T) {
+	for _, m := range utf8map {
+		b := []byte(m.str)
+		var buf [10]byte
+		n := EncodeRune(buf[0:], m.r)
+		b1 := buf[0:n]
+		if !bytes.Equal(b, b1) {
+			t.Errorf("EncodeRune(%#04x) = %q want %q", m.r, b1, b)
+		}
+	}
+}
+
+func TestAppendRune(t *testing.T) {
+	for _, m := range utf8map {
+		if buf := AppendRune(nil, m.r); string(buf) != m.str {
+			t.Errorf("AppendRune(nil, %#04x) = %s, want %s", m.r, buf, m.str)
+		}
+		if buf := AppendRune([]byte("init"), m.r); string(buf) != "init"+m.str {
+			t.Errorf("AppendRune(init, %#04x) = %s, want %s", m.r, buf, "init"+m.str)
+		}
+	}
+}
+
+func TestDecodeRune(t *testing.T) {
+	for _, m := range utf8map {
+		b := []byte(m.str)
+		r, size := DecodeRune(b)
+		if r != m.r || size != len(b) {
+			t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, r, size, m.r, len(b))
+		}
+		s := m.str
+		r, size = DecodeRuneInString(s)
+		if r != m.r || size != len(b) {
+			t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, m.r, len(b))
+		}
+
+		// there's an extra byte that bytes left behind - make sure trailing byte works
+		r, size = DecodeRune(b[0:cap(b)])
+		if r != m.r || size != len(b) {
+			t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, r, size, m.r, len(b))
+		}
+		s = m.str + "\x00"
+		r, size = DecodeRuneInString(s)
+		if r != m.r || size != len(b) {
+			t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, m.r, len(b))
+		}
+
+		// make sure missing bytes fail
+		wantsize := 1
+		if wantsize >= len(b) {
+			wantsize = 0
+		}
+		r, size = DecodeRune(b[0 : len(b)-1])
+		if r != RuneError || size != wantsize {
+			t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b[0:len(b)-1], r, size, RuneError, wantsize)
+		}
+		s = m.str[0 : len(m.str)-1]
+		r, size = DecodeRuneInString(s)
+		if r != RuneError || size != wantsize {
+			t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, RuneError, wantsize)
+		}
+
+		// make sure bad sequences fail
+		if len(b) == 1 {
+			b[0] = 0x80
+		} else {
+			b[len(b)-1] = 0x7F
+		}
+		r, size = DecodeRune(b)
+		if r != RuneError || size != 1 {
+			t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, r, size, RuneError, 1)
+		}
+		s = string(b)
+		r, size = DecodeRuneInString(s)
+		if r != RuneError || size != 1 {
+			t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, RuneError, 1)
+		}
+
+	}
+}
+
+func TestDecodeSurrogateRune(t *testing.T) {
+	for _, m := range surrogateMap {
+		b := []byte(m.str)
+		r, size := DecodeRune(b)
+		if r != RuneError || size != 1 {
+			t.Errorf("DecodeRune(%q) = %x, %d want %x, %d", b, r, size, RuneError, 1)
+		}
+		s := m.str
+		r, size = DecodeRuneInString(s)
+		if r != RuneError || size != 1 {
+			t.Errorf("DecodeRuneInString(%q) = %x, %d want %x, %d", b, r, size, RuneError, 1)
+		}
+	}
+}
+
+// Check that DecodeRune and DecodeLastRune correspond to
+// the equivalent range loop.
+func TestSequencing(t *testing.T) {
+	for _, ts := range testStrings {
+		for _, m := range utf8map {
+			for _, s := range []string{ts + m.str, m.str + ts, ts + m.str + ts} {
+				testSequence(t, s)
+			}
+		}
+	}
+}
+
+func runtimeRuneCount(s string) int {
+	return len([]rune(s)) // Replaced by gc with call to runtime.countrunes(s).
+}
+
+// Check that a range loop, len([]rune(string)) optimization and
+// []rune conversions visit the same runes.
+// Not really a test of this package, but the assumption is used here and
+// it's good to verify.
+func TestRuntimeConversion(t *testing.T) {
+	for _, ts := range testStrings {
+		count := RuneCountInString(ts)
+		if n := runtimeRuneCount(ts); n != count {
+			t.Errorf("%q: len([]rune()) counted %d runes; got %d from RuneCountInString", ts, n, count)
+			break
+		}
+
+		runes := []rune(ts)
+		if n := len(runes); n != count {
+			t.Errorf("%q: []rune() has length %d; got %d from RuneCountInString", ts, n, count)
+			break
+		}
+		i := 0
+		for _, r := range ts {
+			if r != runes[i] {
+				t.Errorf("%q[%d]: expected %c (%U); got %c (%U)", ts, i, runes[i], runes[i], r, r)
+			}
+			i++
+		}
+	}
+}
+
+var invalidSequenceTests = []string{
+	"\xed\xa0\x80\x80", // surrogate min
+	"\xed\xbf\xbf\x80", // surrogate max
+
+	// xx
+	"\x91\x80\x80\x80",
+
+	// s1
+	"\xC2\x7F\x80\x80",
+	"\xC2\xC0\x80\x80",
+	"\xDF\x7F\x80\x80",
+	"\xDF\xC0\x80\x80",
+
+	// s2
+	"\xE0\x9F\xBF\x80",
+	"\xE0\xA0\x7F\x80",
+	"\xE0\xBF\xC0\x80",
+	"\xE0\xC0\x80\x80",
+
+	// s3
+	"\xE1\x7F\xBF\x80",
+	"\xE1\x80\x7F\x80",
+	"\xE1\xBF\xC0\x80",
+	"\xE1\xC0\x80\x80",
+
+	//s4
+	"\xED\x7F\xBF\x80",
+	"\xED\x80\x7F\x80",
+	"\xED\x9F\xC0\x80",
+	"\xED\xA0\x80\x80",
+
+	// s5
+	"\xF0\x8F\xBF\xBF",
+	"\xF0\x90\x7F\xBF",
+	"\xF0\x90\x80\x7F",
+	"\xF0\xBF\xBF\xC0",
+	"\xF0\xBF\xC0\x80",
+	"\xF0\xC0\x80\x80",
+
+	// s6
+	"\xF1\x7F\xBF\xBF",
+	"\xF1\x80\x7F\xBF",
+	"\xF1\x80\x80\x7F",
+	"\xF1\xBF\xBF\xC0",
+	"\xF1\xBF\xC0\x80",
+	"\xF1\xC0\x80\x80",
+
+	// s7
+	"\xF4\x7F\xBF\xBF",
+	"\xF4\x80\x7F\xBF",
+	"\xF4\x80\x80\x7F",
+	"\xF4\x8F\xBF\xC0",
+	"\xF4\x8F\xC0\x80",
+	"\xF4\x90\x80\x80",
+}
+
+func runtimeDecodeRune(s string) rune {
+	for _, r := range s {
+		return r
+	}
+	return -1
+}
+
+func TestDecodeInvalidSequence(t *testing.T) {
+	for _, s := range invalidSequenceTests {
+		r1, _ := DecodeRune([]byte(s))
+		if want := RuneError; r1 != want {
+			t.Errorf("DecodeRune(%#x) = %#04x, want %#04x", s, r1, want)
+			return
+		}
+		r2, _ := DecodeRuneInString(s)
+		if want := RuneError; r2 != want {
+			t.Errorf("DecodeRuneInString(%q) = %#04x, want %#04x", s, r2, want)
+			return
+		}
+		if r1 != r2 {
+			t.Errorf("DecodeRune(%#x) = %#04x mismatch with DecodeRuneInString(%q) = %#04x", s, r1, s, r2)
+			return
+		}
+		r3 := runtimeDecodeRune(s)
+		if r2 != r3 {
+			t.Errorf("DecodeRuneInString(%q) = %#04x mismatch with runtime.decoderune(%q) = %#04x", s, r2, s, r3)
+			return
+		}
+	}
+}
+
+func testSequence(t *testing.T, s string) {
+	type info struct {
+		index int
+		r     rune
+	}
+	index := make([]info, len(s))
+	b := []byte(s)
+	si := 0
+	j := 0
+	for i, r := range s {
+		if si != i {
+			t.Errorf("Sequence(%q) mismatched index %d, want %d", s, si, i)
+			return
+		}
+		index[j] = info{i, r}
+		j++
+		r1, size1 := DecodeRune(b[i:])
+		if r != r1 {
+			t.Errorf("DecodeRune(%q) = %#04x, want %#04x", s[i:], r1, r)
+			return
+		}
+		r2, size2 := DecodeRuneInString(s[i:])
+		if r != r2 {
+			t.Errorf("DecodeRuneInString(%q) = %#04x, want %#04x", s[i:], r2, r)
+			return
+		}
+		if size1 != size2 {
+			t.Errorf("DecodeRune/DecodeRuneInString(%q) size mismatch %d/%d", s[i:], size1, size2)
+			return
+		}
+		si += size1
+	}
+	j--
+	for si = len(s); si > 0; {
+		r1, size1 := DecodeLastRune(b[0:si])
+		r2, size2 := DecodeLastRuneInString(s[0:si])
+		if size1 != size2 {
+			t.Errorf("DecodeLastRune/DecodeLastRuneInString(%q, %d) size mismatch %d/%d", s, si, size1, size2)
+			return
+		}
+		if r1 != index[j].r {
+			t.Errorf("DecodeLastRune(%q, %d) = %#04x, want %#04x", s, si, r1, index[j].r)
+			return
+		}
+		if r2 != index[j].r {
+			t.Errorf("DecodeLastRuneInString(%q, %d) = %#04x, want %#04x", s, si, r2, index[j].r)
+			return
+		}
+		si -= size1
+		if si != index[j].index {
+			t.Errorf("DecodeLastRune(%q) index mismatch at %d, want %d", s, si, index[j].index)
+			return
+		}
+		j--
+	}
+	if si != 0 {
+		t.Errorf("DecodeLastRune(%q) finished at %d, not 0", s, si)
+	}
+}
+
+// Check that negative runes encode as U+FFFD.
+func TestNegativeRune(t *testing.T) {
+	errorbuf := make([]byte, UTFMax)
+	errorbuf = errorbuf[0:EncodeRune(errorbuf, RuneError)]
+	buf := make([]byte, UTFMax)
+	buf = buf[0:EncodeRune(buf, -1)]
+	if !bytes.Equal(buf, errorbuf) {
+		t.Errorf("incorrect encoding [% x] for -1; expected [% x]", buf, errorbuf)
+	}
+}
+
+type RuneCountTest struct {
+	in  string
+	out int
+}
+
+var runecounttests = []RuneCountTest{
+	{"abcd", 4},
+	{"☺☻☹", 3},
+	{"1,2,3,4", 7},
+	{"\xe2\x00", 2},
+	{"\xe2\x80", 2},
+	{"a\xe2\x80", 3},
+}
+
+func TestRuneCount(t *testing.T) {
+	for _, tt := range runecounttests {
+		if out := RuneCountInString(tt.in); out != tt.out {
+			t.Errorf("RuneCountInString(%q) = %d, want %d", tt.in, out, tt.out)
+		}
+		if out := RuneCount([]byte(tt.in)); out != tt.out {
+			t.Errorf("RuneCount(%q) = %d, want %d", tt.in, out, tt.out)
+		}
+	}
+}
+
+type RuneLenTest struct {
+	r    rune
+	size int
+}
+
+var runelentests = []RuneLenTest{
+	{0, 1},
+	{'e', 1},
+	{'é', 2},
+	{'☺', 3},
+	{RuneError, 3},
+	{MaxRune, 4},
+	{0xD800, -1},
+	{0xDFFF, -1},
+	{MaxRune + 1, -1},
+	{-1, -1},
+}
+
+func TestRuneLen(t *testing.T) {
+	for _, tt := range runelentests {
+		if size := RuneLen(tt.r); size != tt.size {
+			t.Errorf("RuneLen(%#U) = %d, want %d", tt.r, size, tt.size)
+		}
+	}
+}
+
+type ValidTest struct {
+	in  string
+	out bool
+}
+
+var validTests = []ValidTest{
+	{"", true},
+	{"a", true},
+	{"abc", true},
+	{"Ж", true},
+	{"ЖЖ", true},
+	{"брэд-ЛГТМ", true},
+	{"☺☻☹", true},
+	{"aa\xe2", false},
+	{string([]byte{66, 250}), false},
+	{string([]byte{66, 250, 67}), false},
+	{"a\uFFFDb", true},
+	{string("\xF4\x8F\xBF\xBF"), true},      // U+10FFFF
+	{string("\xF4\x90\x80\x80"), false},     // U+10FFFF+1; out of range
+	{string("\xF7\xBF\xBF\xBF"), false},     // 0x1FFFFF; out of range
+	{string("\xFB\xBF\xBF\xBF\xBF"), false}, // 0x3FFFFFF; out of range
+	{string("\xc0\x80"), false},             // U+0000 encoded in two bytes: incorrect
+	{string("\xed\xa0\x80"), false},         // U+D800 high surrogate (sic)
+	{string("\xed\xbf\xbf"), false},         // U+DFFF low surrogate (sic)
+}
+
+func TestValid(t *testing.T) {
+	for _, tt := range validTests {
+		if Valid([]byte(tt.in)) != tt.out {
+			t.Errorf("Valid(%q) = %v; want %v", tt.in, !tt.out, tt.out)
+		}
+		if ValidString(tt.in) != tt.out {
+			t.Errorf("ValidString(%q) = %v; want %v", tt.in, !tt.out, tt.out)
+		}
+	}
+}
+
+type ValidRuneTest struct {
+	r  rune
+	ok bool
+}
+
+var validrunetests = []ValidRuneTest{
+	{0, true},
+	{'e', true},
+	{'é', true},
+	{'☺', true},
+	{RuneError, true},
+	{MaxRune, true},
+	{0xD7FF, true},
+	{0xD800, false},
+	{0xDFFF, false},
+	{0xE000, true},
+	{MaxRune + 1, false},
+	{-1, false},
+}
+
+func TestValidRune(t *testing.T) {
+	for _, tt := range validrunetests {
+		if ok := ValidRune(tt.r); ok != tt.ok {
+			t.Errorf("ValidRune(%#U) = %t, want %t", tt.r, ok, tt.ok)
+		}
+	}
+}
+
+func BenchmarkRuneCountTenASCIIChars(b *testing.B) {
+	s := []byte("0123456789")
+	for i := 0; i < b.N; i++ {
+		RuneCount(s)
+	}
+}
+
+func BenchmarkRuneCountTenJapaneseChars(b *testing.B) {
+	s := []byte("日本語日本語日本語日")
+	for i := 0; i < b.N; i++ {
+		RuneCount(s)
+	}
+}
+
+func BenchmarkRuneCountInStringTenASCIIChars(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		RuneCountInString("0123456789")
+	}
+}
+
+func BenchmarkRuneCountInStringTenJapaneseChars(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		RuneCountInString("日本語日本語日本語日")
+	}
+}
+
+var ascii100000 = strings.Repeat("0123456789", 10000)
+
+func BenchmarkValidTenASCIIChars(b *testing.B) {
+	s := []byte("0123456789")
+	for i := 0; i < b.N; i++ {
+		Valid(s)
+	}
+}
+
+func BenchmarkValid100KASCIIChars(b *testing.B) {
+	s := []byte(ascii100000)
+	for i := 0; i < b.N; i++ {
+		Valid(s)
+	}
+}
+
+func BenchmarkValidTenJapaneseChars(b *testing.B) {
+	s := []byte("日本語日本語日本語日")
+	for i := 0; i < b.N; i++ {
+		Valid(s)
+	}
+}
+func BenchmarkValidLongMostlyASCII(b *testing.B) {
+	longMostlyASCII := []byte(longStringMostlyASCII)
+	for i := 0; i < b.N; i++ {
+		Valid(longMostlyASCII)
+	}
+}
+
+func BenchmarkValidLongJapanese(b *testing.B) {
+	longJapanese := []byte(longStringJapanese)
+	for i := 0; i < b.N; i++ {
+		Valid(longJapanese)
+	}
+}
+
+func BenchmarkValidStringTenASCIIChars(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		ValidString("0123456789")
+	}
+}
+
+func BenchmarkValidString100KASCIIChars(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		ValidString(ascii100000)
+	}
+}
+
+func BenchmarkValidStringTenJapaneseChars(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		ValidString("日本語日本語日本語日")
+	}
+}
+
+func BenchmarkValidStringLongMostlyASCII(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		ValidString(longStringMostlyASCII)
+	}
+}
+
+func BenchmarkValidStringLongJapanese(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		ValidString(longStringJapanese)
+	}
+}
+
+var longStringMostlyASCII string // ~100KB, ~97% ASCII
+var longStringJapanese string    // ~100KB, non-ASCII
+
+func init() {
+	const japanese = "日本語日本語日本語日"
+	var b strings.Builder
+	for i := 0; b.Len() < 100_000; i++ {
+		if i%100 == 0 {
+			b.WriteString(japanese)
+		} else {
+			b.WriteString("0123456789")
+		}
+	}
+	longStringMostlyASCII = b.String()
+	longStringJapanese = strings.Repeat(japanese, 100_000/len(japanese))
+}
+
+func BenchmarkEncodeASCIIRune(b *testing.B) {
+	buf := make([]byte, UTFMax)
+	for i := 0; i < b.N; i++ {
+		EncodeRune(buf, 'a')
+	}
+}
+
+func BenchmarkEncodeJapaneseRune(b *testing.B) {
+	buf := make([]byte, UTFMax)
+	for i := 0; i < b.N; i++ {
+		EncodeRune(buf, '本')
+	}
+}
+
+func BenchmarkAppendASCIIRune(b *testing.B) {
+	buf := make([]byte, UTFMax)
+	for i := 0; i < b.N; i++ {
+		AppendRune(buf[:0], 'a')
+	}
+}
+
+func BenchmarkAppendJapaneseRune(b *testing.B) {
+	buf := make([]byte, UTFMax)
+	for i := 0; i < b.N; i++ {
+		AppendRune(buf[:0], '本')
+	}
+}
+
+func BenchmarkDecodeASCIIRune(b *testing.B) {
+	a := []byte{'a'}
+	for i := 0; i < b.N; i++ {
+		DecodeRune(a)
+	}
+}
+
+func BenchmarkDecodeJapaneseRune(b *testing.B) {
+	nihon := []byte("本")
+	for i := 0; i < b.N; i++ {
+		DecodeRune(nihon)
+	}
+}
+
+// boolSink is used to reference the return value of benchmarked
+// functions to avoid dead code elimination.
+var boolSink bool
+
+func BenchmarkFullRune(b *testing.B) {
+	benchmarks := []struct {
+		name string
+		data []byte
+	}{
+		{"ASCII", []byte("a")},
+		{"Incomplete", []byte("\xf0\x90\x80")},
+		{"Japanese", []byte("本")},
+	}
+	for _, bm := range benchmarks {
+		b.Run(bm.name, func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				boolSink = FullRune(bm.data)
+			}
+		})
+	}
+}