From 7291da1e5a37bf25c9c4ba1665a60d10d9bdf0e0 Mon Sep 17 00:00:00 2001 From: golangci Date: Mon, 7 May 2018 12:00:17 +0300 Subject: [PATCH] support unconvert --- Gopkg.lock | 21 +- pkg/enabled_linters.go | 1 + pkg/golinters/testdata/govet.go | 9 +- pkg/golinters/testdata/interfacer.go | 2 +- pkg/golinters/testdata/unconvert.go | 9 + pkg/golinters/unconvert.go | 29 + vendor/github.com/golangci/unconvert/LICENSE | 27 + vendor/github.com/golangci/unconvert/README | 36 + .../golangci/unconvert/unconvert.go | 665 ++++++++ vendor/golang.org/x/text/AUTHORS | 3 + vendor/golang.org/x/text/CONTRIBUTORS | 3 + vendor/golang.org/x/text/LICENSE | 27 + vendor/golang.org/x/text/PATENTS | 22 + vendor/golang.org/x/text/internal/gen/code.go | 369 ++++ vendor/golang.org/x/text/internal/gen/gen.go | 333 ++++ .../x/text/internal/triegen/compact.go | 58 + .../x/text/internal/triegen/print.go | 251 +++ .../x/text/internal/triegen/triegen.go | 494 ++++++ vendor/golang.org/x/text/internal/ucd/ucd.go | 371 ++++ .../golang.org/x/text/transform/transform.go | 705 ++++++++ vendor/golang.org/x/text/unicode/cldr/base.go | 105 ++ vendor/golang.org/x/text/unicode/cldr/cldr.go | 130 ++ .../golang.org/x/text/unicode/cldr/collate.go | 359 ++++ .../golang.org/x/text/unicode/cldr/decode.go | 171 ++ .../golang.org/x/text/unicode/cldr/makexml.go | 400 +++++ .../golang.org/x/text/unicode/cldr/resolve.go | 602 +++++++ .../golang.org/x/text/unicode/cldr/slice.go | 144 ++ vendor/golang.org/x/text/unicode/cldr/xml.go | 1494 +++++++++++++++++ vendor/golang.org/x/text/width/gen.go | 115 ++ vendor/golang.org/x/text/width/gen_common.go | 96 ++ vendor/golang.org/x/text/width/gen_trieval.go | 34 + vendor/golang.org/x/text/width/kind_string.go | 16 + .../golang.org/x/text/width/tables10.0.0.go | 1318 +++++++++++++++ vendor/golang.org/x/text/width/tables9.0.0.go | 1286 ++++++++++++++ vendor/golang.org/x/text/width/transform.go | 239 +++ vendor/golang.org/x/text/width/trieval.go | 30 + vendor/golang.org/x/text/width/width.go | 206 +++ 37 files changed, 10175 insertions(+), 5 deletions(-) create mode 100644 pkg/golinters/testdata/unconvert.go create mode 100644 pkg/golinters/unconvert.go create mode 100644 vendor/github.com/golangci/unconvert/LICENSE create mode 100644 vendor/github.com/golangci/unconvert/README create mode 100644 vendor/github.com/golangci/unconvert/unconvert.go create mode 100644 vendor/golang.org/x/text/AUTHORS create mode 100644 vendor/golang.org/x/text/CONTRIBUTORS create mode 100644 vendor/golang.org/x/text/LICENSE create mode 100644 vendor/golang.org/x/text/PATENTS create mode 100644 vendor/golang.org/x/text/internal/gen/code.go create mode 100644 vendor/golang.org/x/text/internal/gen/gen.go create mode 100644 vendor/golang.org/x/text/internal/triegen/compact.go create mode 100644 vendor/golang.org/x/text/internal/triegen/print.go create mode 100644 vendor/golang.org/x/text/internal/triegen/triegen.go create mode 100644 vendor/golang.org/x/text/internal/ucd/ucd.go create mode 100644 vendor/golang.org/x/text/transform/transform.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/base.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/cldr.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/collate.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/decode.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/makexml.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/resolve.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/slice.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/xml.go create mode 100644 vendor/golang.org/x/text/width/gen.go create mode 100644 vendor/golang.org/x/text/width/gen_common.go create mode 100644 vendor/golang.org/x/text/width/gen_trieval.go create mode 100644 vendor/golang.org/x/text/width/kind_string.go create mode 100644 vendor/golang.org/x/text/width/tables10.0.0.go create mode 100644 vendor/golang.org/x/text/width/tables9.0.0.go create mode 100644 vendor/golang.org/x/text/width/transform.go create mode 100644 vendor/golang.org/x/text/width/trieval.go create mode 100644 vendor/golang.org/x/text/width/width.go diff --git a/Gopkg.lock b/Gopkg.lock index 56932356..6ddc13ce 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -120,6 +120,12 @@ packages = ["."] revision = "b1d89398deca2fd3f8578e5a9551e819bd01ca5f" +[[projects]] + branch = "master" + name = "github.com/golangci/unconvert" + packages = ["."] + revision = "28b1c447d1f4a810737ee6ab40ea6c1d0ceae4ad" + [[projects]] name = "github.com/inconshreveable/mousetrap" packages = ["."] @@ -237,6 +243,19 @@ ] revision = "6f686a352de66814cdd080d970febae7767857a3" +[[projects]] + name = "golang.org/x/text" + packages = [ + "internal/gen", + "internal/triegen", + "internal/ucd", + "transform", + "unicode/cldr", + "width" + ] + revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0" + version = "v0.3.0" + [[projects]] branch = "master" name = "golang.org/x/tools" @@ -281,6 +300,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "6e4437a7adb255525e8d1cfda07716617c1f28acaa721ac4f910f52f49ecea2f" + inputs-digest = "3c773b17692c1bc6ce7d565b959908aa95160e5d4f2486721deaff48608c864e" solver-name = "gps-cdcl" solver-version = 1 diff --git a/pkg/enabled_linters.go b/pkg/enabled_linters.go index 6563830c..a994f227 100644 --- a/pkg/enabled_linters.go +++ b/pkg/enabled_linters.go @@ -70,6 +70,7 @@ func GetAllSupportedLinterConfigs() []LinterConfig { enabledByDefault(golinters.Dupl{}, "Tool for code clone detection", false, false), enabledByDefault(golinters.Ineffassign{}, "Detects when assignments to existing variables are not used", false, false), enabledByDefault(golinters.Interfacer{}, "Linter that suggests narrower interface types", true, true), + enabledByDefault(golinters.Unconvert{}, "Remove unnecessary type conversions", true, false), disabledByDefault(golinters.Gofmt{}, "Gofmt checks whether code was gofmt-ed. By default this tool runs with -s option to check for code simplification", false, false), disabledByDefault(golinters.Gofmt{UseGoimports: true}, "Goimports does everything that gofmt does. Additionally it checks unused imports", false, false), diff --git a/pkg/golinters/testdata/govet.go b/pkg/golinters/testdata/govet.go index 74124715..1474f9d2 100644 --- a/pkg/golinters/testdata/govet.go +++ b/pkg/golinters/testdata/govet.go @@ -1,14 +1,17 @@ package testdata -import "os" +import ( + "io" + "os" +) func Govet() error { return &os.PathError{"first", "path", os.ErrNotExist} // ERROR "os.PathError composite literal uses unkeyed fields" } -func GovetShadow(f *os.File, buf []byte) (err error) { +func GovetShadow(f io.Reader, buf []byte) (err error) { if f != nil { - _, err := f.Read(buf) // ERROR "declaration of .err. shadows declaration at testdata/govet.go:9" + _, err := f.Read(buf) // ERROR "declaration of .err. shadows declaration at testdata/govet.go:\d+" if err != nil { return err } diff --git a/pkg/golinters/testdata/interfacer.go b/pkg/golinters/testdata/interfacer.go index 36d05742..1d232585 100644 --- a/pkg/golinters/testdata/interfacer.go +++ b/pkg/golinters/testdata/interfacer.go @@ -2,6 +2,6 @@ package testdata import "io" -func InterfacerCheck(f io.ReadCloser) { // ERROR "XXX" +func InterfacerCheck(f io.ReadCloser) { // ERROR "f can be io.Closer" f.Close() } diff --git a/pkg/golinters/testdata/unconvert.go b/pkg/golinters/testdata/unconvert.go new file mode 100644 index 00000000..85e4fb41 --- /dev/null +++ b/pkg/golinters/testdata/unconvert.go @@ -0,0 +1,9 @@ +package testdata + +import "log" + +func Unconvert() { + a := 1 + b := int(a) // ERROR "unnecessary conversion" + log.Print(b) +} diff --git a/pkg/golinters/unconvert.go b/pkg/golinters/unconvert.go new file mode 100644 index 00000000..9f8cc608 --- /dev/null +++ b/pkg/golinters/unconvert.go @@ -0,0 +1,29 @@ +package golinters + +import ( + "context" + + "github.com/golangci/golangci-lint/pkg/result" + unconvertAPI "github.com/golangci/unconvert" +) + +type Unconvert struct{} + +func (Unconvert) Name() string { + return "unconvert" +} + +func (lint Unconvert) Run(ctx context.Context, lintCtx *Context) (*result.Result, error) { + positions := unconvertAPI.Run(lintCtx.Program) + res := &result.Result{} + for _, pos := range positions { + res.Issues = append(res.Issues, result.Issue{ + File: pos.Filename, + LineNumber: pos.Line, + Text: "unnecessary conversion", + FromLinter: lint.Name(), + }) + } + + return res, nil +} diff --git a/vendor/github.com/golangci/unconvert/LICENSE b/vendor/github.com/golangci/unconvert/LICENSE new file mode 100644 index 00000000..74487567 --- /dev/null +++ b/vendor/github.com/golangci/unconvert/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2012 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/golangci/unconvert/README b/vendor/github.com/golangci/unconvert/README new file mode 100644 index 00000000..dbaea4f5 --- /dev/null +++ b/vendor/github.com/golangci/unconvert/README @@ -0,0 +1,36 @@ +About: + +The unconvert program analyzes Go packages to identify unnecessary +type conversions; i.e., expressions T(x) where x already has type T. + +Install: + + $ go get github.com/mdempsky/unconvert + +Usage: + + $ unconvert -v bytes fmt + GOROOT/src/bytes/reader.go:117:14: unnecessary conversion + abs = int64(r.i) + offset + ^ + GOROOT/src/fmt/print.go:411:21: unnecessary conversion + p.fmt.integer(int64(v), 16, unsigned, udigits) + ^ + +Flags: + +Using the -v flag, unconvert will also print the source line and a +caret to indicate the unnecessary conversion's position therein. + +Using the -apply flag, unconvert will rewrite the Go source files +without the unnecessary type conversions. + +Using the -all flag, unconvert will analyze the Go packages under all +possible GOOS/GOARCH combinations, and only identify conversions that +are unnecessary in all cases. + +E.g., syscall.Timespec's Sec and Nsec fields are int64 under +linux/amd64 but int32 under linux/386. An int64(ts.Sec) conversion +that appears in a linux/amd64-only file will be identified as +unnecessary, but it will be preserved if it occurs in a file that's +compiled for both linux/amd64 and linux/386. diff --git a/vendor/github.com/golangci/unconvert/unconvert.go b/vendor/github.com/golangci/unconvert/unconvert.go new file mode 100644 index 00000000..38737d39 --- /dev/null +++ b/vendor/github.com/golangci/unconvert/unconvert.go @@ -0,0 +1,665 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Unconvert removes redundant type conversions from Go packages. +package unconvert + +import ( + "bytes" + "flag" + "fmt" + "go/ast" + "go/build" + "go/format" + "go/parser" + "go/token" + "go/types" + "io/ioutil" + "log" + "os" + "reflect" + "runtime/pprof" + "sort" + "sync" + "unicode" + + "github.com/kisielk/gotool" + "golang.org/x/text/width" + "golang.org/x/tools/go/loader" +) + +// Unnecessary conversions are identified by the position +// of their left parenthesis within a source file. + +type editSet map[token.Position]struct{} + +type fileToEditSet map[string]editSet + +func apply(file string, edits editSet) { + if len(edits) == 0 { + return + } + + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, file, nil, parser.ParseComments) + if err != nil { + log.Fatal(err) + } + + // Note: We modify edits during the walk. + v := editor{edits: edits, file: fset.File(f.Package)} + ast.Walk(&v, f) + if len(edits) != 0 { + log.Printf("%s: missing edits %s", file, edits) + } + + // TODO(mdempsky): Write to temporary file and rename. + var buf bytes.Buffer + err = format.Node(&buf, fset, f) + if err != nil { + log.Fatal(err) + } + + err = ioutil.WriteFile(file, buf.Bytes(), 0) + if err != nil { + log.Fatal(err) + } +} + +type editor struct { + edits editSet + file *token.File +} + +func (e *editor) Visit(n ast.Node) ast.Visitor { + if n == nil { + return nil + } + v := reflect.ValueOf(n).Elem() + for i, n := 0, v.NumField(); i < n; i++ { + switch f := v.Field(i).Addr().Interface().(type) { + case *ast.Expr: + e.rewrite(f) + case *[]ast.Expr: + for i := range *f { + e.rewrite(&(*f)[i]) + } + } + } + return e +} + +func (e *editor) rewrite(f *ast.Expr) { + call, ok := (*f).(*ast.CallExpr) + if !ok { + return + } + + pos := e.file.Position(call.Lparen) + if _, ok := e.edits[pos]; !ok { + return + } + *f = call.Args[0] + delete(e.edits, pos) +} + +var ( + cr = []byte{'\r'} + nl = []byte{'\n'} +) + +func print(conversions []token.Position) { + var file string + var lines [][]byte + + for _, pos := range conversions { + fmt.Printf("%s:%d:%d: unnecessary conversion\n", pos.Filename, pos.Line, pos.Column) + if *flagV { + if pos.Filename != file { + buf, err := ioutil.ReadFile(pos.Filename) + if err != nil { + log.Fatal(err) + } + file = pos.Filename + lines = bytes.Split(buf, nl) + } + + line := bytes.TrimSuffix(lines[pos.Line-1], cr) + fmt.Printf("%s\n", line) + + // For files processed by cgo, Column is the + // column location after cgo processing, which + // may be different than the source column + // that we want here. In lieu of a better + // heuristic for detecting this case, at least + // avoid panicking if column is out of bounds. + if pos.Column <= len(line) { + fmt.Printf("%s^\n", rub(line[:pos.Column-1])) + } + } + } +} + +// Rub returns a copy of buf with all non-whitespace characters replaced +// by spaces (like rubbing them out with white out). +func rub(buf []byte) []byte { + // TODO(mdempsky): Handle combining characters? + var res bytes.Buffer + for _, r := range string(buf) { + if unicode.IsSpace(r) { + res.WriteRune(r) + continue + } + switch width.LookupRune(r).Kind() { + case width.EastAsianWide, width.EastAsianFullwidth: + res.WriteString(" ") + default: + res.WriteByte(' ') + } + } + return res.Bytes() +} + +var ( + flagAll = flag.Bool("unconvert.all", false, "type check all GOOS and GOARCH combinations") + flagApply = flag.Bool("unconvert.apply", false, "apply edits to source files") + flagCPUProfile = flag.String("unconvert.cpuprofile", "", "write CPU profile to file") + // TODO(mdempsky): Better description and maybe flag name. + flagSafe = flag.Bool("unconvert.safe", false, "be more conservative (experimental)") + flagV = flag.Bool("unconvert.v", false, "verbose output") +) + +func usage() { + fmt.Fprintf(os.Stderr, "usage: unconvert [flags] [package ...]\n") + flag.PrintDefaults() +} + +func nomain() { + flag.Usage = usage + flag.Parse() + + if *flagCPUProfile != "" { + f, err := os.Create(*flagCPUProfile) + if err != nil { + log.Fatal(err) + } + pprof.StartCPUProfile(f) + defer pprof.StopCPUProfile() + } + + importPaths := gotool.ImportPaths(flag.Args()) + if len(importPaths) == 0 { + return + } + + var m fileToEditSet + if *flagAll { + m = mergeEdits(importPaths) + } else { + m = computeEdits(importPaths, build.Default.GOOS, build.Default.GOARCH, build.Default.CgoEnabled) + } + + if *flagApply { + var wg sync.WaitGroup + for f, e := range m { + wg.Add(1) + f, e := f, e + go func() { + defer wg.Done() + apply(f, e) + }() + } + wg.Wait() + } else { + var conversions []token.Position + for _, positions := range m { + for pos := range positions { + conversions = append(conversions, pos) + } + } + sort.Sort(byPosition(conversions)) + print(conversions) + if len(conversions) > 0 { + os.Exit(1) + } + } +} + +func Run(prog *loader.Program) []token.Position { + m := computeEditsFromProg(prog) + var conversions []token.Position + for _, positions := range m { + for pos := range positions { + conversions = append(conversions, pos) + } + } + return conversions +} + +var plats = [...]struct { + goos, goarch string +}{ + // TODO(mdempsky): buildall.bash also builds linux-386-387 and linux-arm-arm5. + {"android", "386"}, + {"android", "amd64"}, + {"android", "arm"}, + {"android", "arm64"}, + {"darwin", "386"}, + {"darwin", "amd64"}, + {"darwin", "arm"}, + {"darwin", "arm64"}, + {"dragonfly", "amd64"}, + {"freebsd", "386"}, + {"freebsd", "amd64"}, + {"freebsd", "arm"}, + {"linux", "386"}, + {"linux", "amd64"}, + {"linux", "arm"}, + {"linux", "arm64"}, + {"linux", "mips64"}, + {"linux", "mips64le"}, + {"linux", "ppc64"}, + {"linux", "ppc64le"}, + {"linux", "s390x"}, + {"nacl", "386"}, + {"nacl", "amd64p32"}, + {"nacl", "arm"}, + {"netbsd", "386"}, + {"netbsd", "amd64"}, + {"netbsd", "arm"}, + {"openbsd", "386"}, + {"openbsd", "amd64"}, + {"openbsd", "arm"}, + {"plan9", "386"}, + {"plan9", "amd64"}, + {"plan9", "arm"}, + {"solaris", "amd64"}, + {"windows", "386"}, + {"windows", "amd64"}, +} + +func mergeEdits(importPaths []string) fileToEditSet { + m := make(fileToEditSet) + for _, plat := range plats { + for f, e := range computeEdits(importPaths, plat.goos, plat.goarch, false) { + if e0, ok := m[f]; ok { + for k := range e0 { + if _, ok := e[k]; !ok { + delete(e0, k) + } + } + } else { + m[f] = e + } + } + } + return m +} + +type noImporter struct{} + +func (noImporter) Import(path string) (*types.Package, error) { + panic("golang.org/x/tools/go/loader said this wouldn't be called") +} + +func computeEdits(importPaths []string, os, arch string, cgoEnabled bool) fileToEditSet { + ctxt := build.Default + ctxt.GOOS = os + ctxt.GOARCH = arch + ctxt.CgoEnabled = cgoEnabled + + var conf loader.Config + conf.Build = &ctxt + conf.TypeChecker.Importer = noImporter{} + for _, importPath := range importPaths { + conf.Import(importPath) + } + prog, err := conf.Load() + if err != nil { + log.Fatal(err) + } + + return computeEditsFromProg(prog) +} + +func computeEditsFromProg(prog *loader.Program) fileToEditSet { + type res struct { + file string + edits editSet + } + ch := make(chan res) + var wg sync.WaitGroup + for _, pkg := range prog.InitialPackages() { + for _, file := range pkg.Files { + pkg, file := pkg, file + wg.Add(1) + go func() { + defer wg.Done() + v := visitor{pkg: pkg, file: prog.Fset.File(file.Package), edits: make(editSet)} + ast.Walk(&v, file) + ch <- res{v.file.Name(), v.edits} + }() + } + } + go func() { + wg.Wait() + close(ch) + }() + + m := make(fileToEditSet) + for r := range ch { + m[r.file] = r.edits + } + return m +} + +type step struct { + n ast.Node + i int +} + +type visitor struct { + pkg *loader.PackageInfo + file *token.File + edits editSet + path []step +} + +func (v *visitor) Visit(node ast.Node) ast.Visitor { + if node != nil { + v.path = append(v.path, step{n: node}) + } else { + n := len(v.path) + v.path = v.path[:n-1] + if n >= 2 { + v.path[n-2].i++ + } + } + + if call, ok := node.(*ast.CallExpr); ok { + v.unconvert(call) + } + return v +} + +func (v *visitor) unconvert(call *ast.CallExpr) { + // TODO(mdempsky): Handle useless multi-conversions. + + // Conversions have exactly one argument. + if len(call.Args) != 1 || call.Ellipsis != token.NoPos { + return + } + ft, ok := v.pkg.Types[call.Fun] + if !ok { + fmt.Println("Missing type for function") + return + } + if !ft.IsType() { + // Function call; not a conversion. + return + } + at, ok := v.pkg.Types[call.Args[0]] + if !ok { + fmt.Println("Missing type for argument") + return + } + if !types.Identical(ft.Type, at.Type) { + // A real conversion. + return + } + if isUntypedValue(call.Args[0], &v.pkg.Info) { + // Workaround golang.org/issue/13061. + return + } + if *flagSafe && !v.isSafeContext(at.Type) { + // TODO(mdempsky): Remove this message. + fmt.Println("Skipped a possible type conversion because of -safe at", v.file.Position(call.Pos())) + return + } + if v.isCgoCheckPointerContext() { + // cmd/cgo generates explicit type conversions that + // are often redundant when introducing + // _cgoCheckPointer calls (issue #16). Users can't do + // anything about these, so skip over them. + return + } + + v.edits[v.file.Position(call.Lparen)] = struct{}{} +} + +func (v *visitor) isCgoCheckPointerContext() bool { + ctxt := &v.path[len(v.path)-2] + if ctxt.i != 1 { + return false + } + call, ok := ctxt.n.(*ast.CallExpr) + if !ok { + return false + } + ident, ok := call.Fun.(*ast.Ident) + if !ok { + return false + } + return ident.Name == "_cgoCheckPointer" +} + +// isSafeContext reports whether the current context requires +// an expression of type t. +// +// TODO(mdempsky): That's a bad explanation. +func (v *visitor) isSafeContext(t types.Type) bool { + ctxt := &v.path[len(v.path)-2] + switch n := ctxt.n.(type) { + case *ast.AssignStmt: + pos := ctxt.i - len(n.Lhs) + if pos < 0 { + fmt.Println("Type conversion on LHS of assignment?") + return false + } + if n.Tok == token.DEFINE { + // Skip := assignments. + return true + } + // We're a conversion in the pos'th element of n.Rhs. + // Check that the corresponding element of n.Lhs is of type t. + lt, ok := v.pkg.Types[n.Lhs[pos]] + if !ok { + fmt.Println("Missing type for LHS expression") + return false + } + return types.Identical(t, lt.Type) + case *ast.BinaryExpr: + if n.Op == token.SHL || n.Op == token.SHR { + if ctxt.i == 1 { + // RHS of a shift is always safe. + return true + } + // For the LHS, we should inspect up another level. + fmt.Println("TODO(mdempsky): Handle LHS of shift expressions") + return true + } + var other ast.Expr + if ctxt.i == 0 { + other = n.Y + } else { + other = n.X + } + ot, ok := v.pkg.Types[other] + if !ok { + fmt.Println("Missing type for other binop subexpr") + return false + } + return types.Identical(t, ot.Type) + case *ast.CallExpr: + pos := ctxt.i - 1 + if pos < 0 { + // Type conversion in the function subexpr is okay. + return true + } + ft, ok := v.pkg.Types[n.Fun] + if !ok { + fmt.Println("Missing type for function expression") + return false + } + sig, ok := ft.Type.(*types.Signature) + if !ok { + // "Function" is either a type conversion (ok) or a builtin (ok?). + return true + } + params := sig.Params() + var pt types.Type + if sig.Variadic() && n.Ellipsis == token.NoPos && pos >= params.Len()-1 { + pt = params.At(params.Len() - 1).Type().(*types.Slice).Elem() + } else { + pt = params.At(pos).Type() + } + return types.Identical(t, pt) + case *ast.CompositeLit, *ast.KeyValueExpr: + fmt.Println("TODO(mdempsky): Compare against value type of composite literal type at", v.file.Position(n.Pos())) + return true + case *ast.ReturnStmt: + // TODO(mdempsky): Is there a better way to get the corresponding + // return parameter type? + var funcType *ast.FuncType + for i := len(v.path) - 1; funcType == nil && i >= 0; i-- { + switch f := v.path[i].n.(type) { + case *ast.FuncDecl: + funcType = f.Type + case *ast.FuncLit: + funcType = f.Type + } + } + var typeExpr ast.Expr + for i, j := ctxt.i, 0; j < len(funcType.Results.List); j++ { + f := funcType.Results.List[j] + if len(f.Names) == 0 { + if i >= 1 { + i-- + continue + } + } else { + if i >= len(f.Names) { + i -= len(f.Names) + continue + } + } + typeExpr = f.Type + break + } + if typeExpr == nil { + fmt.Println(ctxt) + } + pt, ok := v.pkg.Types[typeExpr] + if !ok { + fmt.Println("Missing type for return parameter at", v.file.Position(n.Pos())) + return false + } + return types.Identical(t, pt.Type) + case *ast.StarExpr, *ast.UnaryExpr: + // TODO(mdempsky): I think these are always safe. + return true + case *ast.SwitchStmt: + // TODO(mdempsky): I think this is always safe? + return true + default: + // TODO(mdempsky): When can this happen? + fmt.Printf("... huh, %T at %v\n", n, v.file.Position(n.Pos())) + return true + } +} + +func isUntypedValue(n ast.Expr, info *types.Info) (res bool) { + switch n := n.(type) { + case *ast.BinaryExpr: + switch n.Op { + case token.SHL, token.SHR: + // Shifts yield an untyped value if their LHS is untyped. + return isUntypedValue(n.X, info) + case token.EQL, token.NEQ, token.LSS, token.GTR, token.LEQ, token.GEQ: + // Comparisons yield an untyped boolean value. + return true + case token.ADD, token.SUB, token.MUL, token.QUO, token.REM, + token.AND, token.OR, token.XOR, token.AND_NOT, + token.LAND, token.LOR: + return isUntypedValue(n.X, info) && isUntypedValue(n.Y, info) + } + case *ast.UnaryExpr: + switch n.Op { + case token.ADD, token.SUB, token.NOT, token.XOR: + return isUntypedValue(n.X, info) + } + case *ast.BasicLit: + // Basic literals are always untyped. + return true + case *ast.ParenExpr: + return isUntypedValue(n.X, info) + case *ast.SelectorExpr: + return isUntypedValue(n.Sel, info) + case *ast.Ident: + if obj, ok := info.Uses[n]; ok { + if obj.Pkg() == nil && obj.Name() == "nil" { + // The universal untyped zero value. + return true + } + if b, ok := obj.Type().(*types.Basic); ok && b.Info()&types.IsUntyped != 0 { + // Reference to an untyped constant. + return true + } + } + case *ast.CallExpr: + if b, ok := asBuiltin(n.Fun, info); ok { + switch b.Name() { + case "real", "imag": + return isUntypedValue(n.Args[0], info) + case "complex": + return isUntypedValue(n.Args[0], info) && isUntypedValue(n.Args[1], info) + } + } + } + + return false +} + +func asBuiltin(n ast.Expr, info *types.Info) (*types.Builtin, bool) { + for { + paren, ok := n.(*ast.ParenExpr) + if !ok { + break + } + n = paren.X + } + + ident, ok := n.(*ast.Ident) + if !ok { + return nil, false + } + + obj, ok := info.Uses[ident] + if !ok { + return nil, false + } + + b, ok := obj.(*types.Builtin) + return b, ok +} + +type byPosition []token.Position + +func (p byPosition) Len() int { + return len(p) +} + +func (p byPosition) Less(i, j int) bool { + if p[i].Filename != p[j].Filename { + return p[i].Filename < p[j].Filename + } + if p[i].Line != p[j].Line { + return p[i].Line < p[j].Line + } + return p[i].Column < p[j].Column +} + +func (p byPosition) Swap(i, j int) { + p[i], p[j] = p[j], p[i] +} diff --git a/vendor/golang.org/x/text/AUTHORS b/vendor/golang.org/x/text/AUTHORS new file mode 100644 index 00000000..15167cd7 --- /dev/null +++ b/vendor/golang.org/x/text/AUTHORS @@ -0,0 +1,3 @@ +# This source code refers to The Go Authors for copyright purposes. +# The master list of authors is in the main Go distribution, +# visible at http://tip.golang.org/AUTHORS. diff --git a/vendor/golang.org/x/text/CONTRIBUTORS b/vendor/golang.org/x/text/CONTRIBUTORS new file mode 100644 index 00000000..1c4577e9 --- /dev/null +++ b/vendor/golang.org/x/text/CONTRIBUTORS @@ -0,0 +1,3 @@ +# This source code was written by the Go contributors. +# The master list of contributors is in the main Go distribution, +# visible at http://tip.golang.org/CONTRIBUTORS. diff --git a/vendor/golang.org/x/text/LICENSE b/vendor/golang.org/x/text/LICENSE new file mode 100644 index 00000000..6a66aea5 --- /dev/null +++ b/vendor/golang.org/x/text/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/golang.org/x/text/PATENTS b/vendor/golang.org/x/text/PATENTS new file mode 100644 index 00000000..73309904 --- /dev/null +++ b/vendor/golang.org/x/text/PATENTS @@ -0,0 +1,22 @@ +Additional IP Rights Grant (Patents) + +"This implementation" means the copyrightable works distributed by +Google as part of the Go project. + +Google hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable (except as stated in this section) +patent license to make, have made, use, offer to sell, sell, import, +transfer and otherwise run, modify and propagate the contents of this +implementation of Go, where such license applies only to those patent +claims, both currently owned or controlled by Google and acquired in +the future, licensable by Google that are necessarily infringed by this +implementation of Go. This grant does not include claims that would be +infringed only as a consequence of further modification of this +implementation. If you or your agent or exclusive licensee institute or +order or agree to the institution of patent litigation against any +entity (including a cross-claim or counterclaim in a lawsuit) alleging +that this implementation of Go or any code incorporated within this +implementation of Go constitutes direct or contributory patent +infringement, or inducement of patent infringement, then any patent +rights granted to you under this License for this implementation of Go +shall terminate as of the date such litigation is filed. diff --git a/vendor/golang.org/x/text/internal/gen/code.go b/vendor/golang.org/x/text/internal/gen/code.go new file mode 100644 index 00000000..0389509f --- /dev/null +++ b/vendor/golang.org/x/text/internal/gen/code.go @@ -0,0 +1,369 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gen + +import ( + "bytes" + "encoding/gob" + "fmt" + "hash" + "hash/fnv" + "io" + "log" + "os" + "reflect" + "strings" + "unicode" + "unicode/utf8" +) + +// This file contains utilities for generating code. + +// TODO: other write methods like: +// - slices, maps, types, etc. + +// CodeWriter is a utility for writing structured code. It computes the content +// hash and size of written content. It ensures there are newlines between +// written code blocks. +type CodeWriter struct { + buf bytes.Buffer + Size int + Hash hash.Hash32 // content hash + gob *gob.Encoder + // For comments we skip the usual one-line separator if they are followed by + // a code block. + skipSep bool +} + +func (w *CodeWriter) Write(p []byte) (n int, err error) { + return w.buf.Write(p) +} + +// NewCodeWriter returns a new CodeWriter. +func NewCodeWriter() *CodeWriter { + h := fnv.New32() + return &CodeWriter{Hash: h, gob: gob.NewEncoder(h)} +} + +// WriteGoFile appends the buffer with the total size of all created structures +// and writes it as a Go file to the the given file with the given package name. +func (w *CodeWriter) WriteGoFile(filename, pkg string) { + f, err := os.Create(filename) + if err != nil { + log.Fatalf("Could not create file %s: %v", filename, err) + } + defer f.Close() + if _, err = w.WriteGo(f, pkg, ""); err != nil { + log.Fatalf("Error writing file %s: %v", filename, err) + } +} + +// WriteVersionedGoFile appends the buffer with the total size of all created +// structures and writes it as a Go file to the the given file with the given +// package name and build tags for the current Unicode version, +func (w *CodeWriter) WriteVersionedGoFile(filename, pkg string) { + tags := buildTags() + if tags != "" { + filename = insertVersion(filename, UnicodeVersion()) + } + f, err := os.Create(filename) + if err != nil { + log.Fatalf("Could not create file %s: %v", filename, err) + } + defer f.Close() + if _, err = w.WriteGo(f, pkg, tags); err != nil { + log.Fatalf("Error writing file %s: %v", filename, err) + } +} + +// WriteGo appends the buffer with the total size of all created structures and +// writes it as a Go file to the the given writer with the given package name. +func (w *CodeWriter) WriteGo(out io.Writer, pkg, tags string) (n int, err error) { + sz := w.Size + w.WriteComment("Total table size %d bytes (%dKiB); checksum: %X\n", sz, sz/1024, w.Hash.Sum32()) + defer w.buf.Reset() + return WriteGo(out, pkg, tags, w.buf.Bytes()) +} + +func (w *CodeWriter) printf(f string, x ...interface{}) { + fmt.Fprintf(w, f, x...) +} + +func (w *CodeWriter) insertSep() { + if w.skipSep { + w.skipSep = false + return + } + // Use at least two newlines to ensure a blank space between the previous + // block. WriteGoFile will remove extraneous newlines. + w.printf("\n\n") +} + +// WriteComment writes a comment block. All line starts are prefixed with "//". +// Initial empty lines are gobbled. The indentation for the first line is +// stripped from consecutive lines. +func (w *CodeWriter) WriteComment(comment string, args ...interface{}) { + s := fmt.Sprintf(comment, args...) + s = strings.Trim(s, "\n") + + // Use at least two newlines to ensure a blank space between the previous + // block. WriteGoFile will remove extraneous newlines. + w.printf("\n\n// ") + w.skipSep = true + + // strip first indent level. + sep := "\n" + for ; len(s) > 0 && (s[0] == '\t' || s[0] == ' '); s = s[1:] { + sep += s[:1] + } + + strings.NewReplacer(sep, "\n// ", "\n", "\n// ").WriteString(w, s) + + w.printf("\n") +} + +func (w *CodeWriter) writeSizeInfo(size int) { + w.printf("// Size: %d bytes\n", size) +} + +// WriteConst writes a constant of the given name and value. +func (w *CodeWriter) WriteConst(name string, x interface{}) { + w.insertSep() + v := reflect.ValueOf(x) + + switch v.Type().Kind() { + case reflect.String: + w.printf("const %s %s = ", name, typeName(x)) + w.WriteString(v.String()) + w.printf("\n") + default: + w.printf("const %s = %#v\n", name, x) + } +} + +// WriteVar writes a variable of the given name and value. +func (w *CodeWriter) WriteVar(name string, x interface{}) { + w.insertSep() + v := reflect.ValueOf(x) + oldSize := w.Size + sz := int(v.Type().Size()) + w.Size += sz + + switch v.Type().Kind() { + case reflect.String: + w.printf("var %s %s = ", name, typeName(x)) + w.WriteString(v.String()) + case reflect.Struct: + w.gob.Encode(x) + fallthrough + case reflect.Slice, reflect.Array: + w.printf("var %s = ", name) + w.writeValue(v) + w.writeSizeInfo(w.Size - oldSize) + default: + w.printf("var %s %s = ", name, typeName(x)) + w.gob.Encode(x) + w.writeValue(v) + w.writeSizeInfo(w.Size - oldSize) + } + w.printf("\n") +} + +func (w *CodeWriter) writeValue(v reflect.Value) { + x := v.Interface() + switch v.Kind() { + case reflect.String: + w.WriteString(v.String()) + case reflect.Array: + // Don't double count: callers of WriteArray count on the size being + // added, so we need to discount it here. + w.Size -= int(v.Type().Size()) + w.writeSlice(x, true) + case reflect.Slice: + w.writeSlice(x, false) + case reflect.Struct: + w.printf("%s{\n", typeName(v.Interface())) + t := v.Type() + for i := 0; i < v.NumField(); i++ { + w.printf("%s: ", t.Field(i).Name) + w.writeValue(v.Field(i)) + w.printf(",\n") + } + w.printf("}") + default: + w.printf("%#v", x) + } +} + +// WriteString writes a string literal. +func (w *CodeWriter) WriteString(s string) { + s = strings.Replace(s, `\`, `\\`, -1) + io.WriteString(w.Hash, s) // content hash + w.Size += len(s) + + const maxInline = 40 + if len(s) <= maxInline { + w.printf("%q", s) + return + } + + // We will render the string as a multi-line string. + const maxWidth = 80 - 4 - len(`"`) - len(`" +`) + + // When starting on its own line, go fmt indents line 2+ an extra level. + n, max := maxWidth, maxWidth-4 + + // As per https://golang.org/issue/18078, the compiler has trouble + // compiling the concatenation of many strings, s0 + s1 + s2 + ... + sN, + // for large N. We insert redundant, explicit parentheses to work around + // that, lowering the N at any given step: (s0 + s1 + ... + s63) + (s64 + + // ... + s127) + etc + (etc + ... + sN). + explicitParens, extraComment := len(s) > 128*1024, "" + if explicitParens { + w.printf(`(`) + extraComment = "; the redundant, explicit parens are for https://golang.org/issue/18078" + } + + // Print "" +\n, if a string does not start on its own line. + b := w.buf.Bytes() + if p := len(bytes.TrimRight(b, " \t")); p > 0 && b[p-1] != '\n' { + w.printf("\"\" + // Size: %d bytes%s\n", len(s), extraComment) + n, max = maxWidth, maxWidth + } + + w.printf(`"`) + + for sz, p, nLines := 0, 0, 0; p < len(s); { + var r rune + r, sz = utf8.DecodeRuneInString(s[p:]) + out := s[p : p+sz] + chars := 1 + if !unicode.IsPrint(r) || r == utf8.RuneError || r == '"' { + switch sz { + case 1: + out = fmt.Sprintf("\\x%02x", s[p]) + case 2, 3: + out = fmt.Sprintf("\\u%04x", r) + case 4: + out = fmt.Sprintf("\\U%08x", r) + } + chars = len(out) + } + if n -= chars; n < 0 { + nLines++ + if explicitParens && nLines&63 == 63 { + w.printf("\") + (\"") + } + w.printf("\" +\n\"") + n = max - len(out) + } + w.printf("%s", out) + p += sz + } + w.printf(`"`) + if explicitParens { + w.printf(`)`) + } +} + +// WriteSlice writes a slice value. +func (w *CodeWriter) WriteSlice(x interface{}) { + w.writeSlice(x, false) +} + +// WriteArray writes an array value. +func (w *CodeWriter) WriteArray(x interface{}) { + w.writeSlice(x, true) +} + +func (w *CodeWriter) writeSlice(x interface{}, isArray bool) { + v := reflect.ValueOf(x) + w.gob.Encode(v.Len()) + w.Size += v.Len() * int(v.Type().Elem().Size()) + name := typeName(x) + if isArray { + name = fmt.Sprintf("[%d]%s", v.Len(), name[strings.Index(name, "]")+1:]) + } + if isArray { + w.printf("%s{\n", name) + } else { + w.printf("%s{ // %d elements\n", name, v.Len()) + } + + switch kind := v.Type().Elem().Kind(); kind { + case reflect.String: + for _, s := range x.([]string) { + w.WriteString(s) + w.printf(",\n") + } + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + // nLine and nBlock are the number of elements per line and block. + nLine, nBlock, format := 8, 64, "%d," + switch kind { + case reflect.Uint8: + format = "%#02x," + case reflect.Uint16: + format = "%#04x," + case reflect.Uint32: + nLine, nBlock, format = 4, 32, "%#08x," + case reflect.Uint, reflect.Uint64: + nLine, nBlock, format = 4, 32, "%#016x," + case reflect.Int8: + nLine = 16 + } + n := nLine + for i := 0; i < v.Len(); i++ { + if i%nBlock == 0 && v.Len() > nBlock { + w.printf("// Entry %X - %X\n", i, i+nBlock-1) + } + x := v.Index(i).Interface() + w.gob.Encode(x) + w.printf(format, x) + if n--; n == 0 { + n = nLine + w.printf("\n") + } + } + w.printf("\n") + case reflect.Struct: + zero := reflect.Zero(v.Type().Elem()).Interface() + for i := 0; i < v.Len(); i++ { + x := v.Index(i).Interface() + w.gob.EncodeValue(v) + if !reflect.DeepEqual(zero, x) { + line := fmt.Sprintf("%#v,\n", x) + line = line[strings.IndexByte(line, '{'):] + w.printf("%d: ", i) + w.printf(line) + } + } + case reflect.Array: + for i := 0; i < v.Len(); i++ { + w.printf("%d: %#v,\n", i, v.Index(i).Interface()) + } + default: + panic("gen: slice elem type not supported") + } + w.printf("}") +} + +// WriteType writes a definition of the type of the given value and returns the +// type name. +func (w *CodeWriter) WriteType(x interface{}) string { + t := reflect.TypeOf(x) + w.printf("type %s struct {\n", t.Name()) + for i := 0; i < t.NumField(); i++ { + w.printf("\t%s %s\n", t.Field(i).Name, t.Field(i).Type) + } + w.printf("}\n") + return t.Name() +} + +// typeName returns the name of the go type of x. +func typeName(x interface{}) string { + t := reflect.ValueOf(x).Type() + return strings.Replace(fmt.Sprint(t), "main.", "", 1) +} diff --git a/vendor/golang.org/x/text/internal/gen/gen.go b/vendor/golang.org/x/text/internal/gen/gen.go new file mode 100644 index 00000000..4c3f7606 --- /dev/null +++ b/vendor/golang.org/x/text/internal/gen/gen.go @@ -0,0 +1,333 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package gen contains common code for the various code generation tools in the +// text repository. Its usage ensures consistency between tools. +// +// This package defines command line flags that are common to most generation +// tools. The flags allow for specifying specific Unicode and CLDR versions +// in the public Unicode data repository (http://www.unicode.org/Public). +// +// A local Unicode data mirror can be set through the flag -local or the +// environment variable UNICODE_DIR. The former takes precedence. The local +// directory should follow the same structure as the public repository. +// +// IANA data can also optionally be mirrored by putting it in the iana directory +// rooted at the top of the local mirror. Beware, though, that IANA data is not +// versioned. So it is up to the developer to use the right version. +package gen // import "golang.org/x/text/internal/gen" + +import ( + "bytes" + "flag" + "fmt" + "go/build" + "go/format" + "io" + "io/ioutil" + "log" + "net/http" + "os" + "path" + "path/filepath" + "strings" + "sync" + "unicode" + + "golang.org/x/text/unicode/cldr" +) + +var ( + url = flag.String("url", + "http://www.unicode.org/Public", + "URL of Unicode database directory") + iana = flag.String("iana", + "http://www.iana.org", + "URL of the IANA repository") + unicodeVersion = flag.String("unicode", + getEnv("UNICODE_VERSION", unicode.Version), + "unicode version to use") + cldrVersion = flag.String("cldr", + getEnv("CLDR_VERSION", cldr.Version), + "cldr version to use") +) + +func getEnv(name, def string) string { + if v := os.Getenv(name); v != "" { + return v + } + return def +} + +// Init performs common initialization for a gen command. It parses the flags +// and sets up the standard logging parameters. +func Init() { + log.SetPrefix("") + log.SetFlags(log.Lshortfile) + flag.Parse() +} + +const header = `// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. + +` + +// UnicodeVersion reports the requested Unicode version. +func UnicodeVersion() string { + return *unicodeVersion +} + +// CLDRVersion reports the requested CLDR version. +func CLDRVersion() string { + return *cldrVersion +} + +var tags = []struct{ version, buildTags string }{ + {"10.0.0", "go1.10"}, + {"", "!go1.10"}, +} + +// buildTags reports the build tags used for the current Unicode version. +func buildTags() string { + v := UnicodeVersion() + for _, x := range tags { + // We should do a numeric comparison, but including the collate package + // would create an import cycle. We approximate it by assuming that + // longer version strings are later. + if len(x.version) <= len(v) { + return x.buildTags + } + if len(x.version) == len(v) && x.version <= v { + return x.buildTags + } + } + return tags[0].buildTags +} + +// IsLocal reports whether data files are available locally. +func IsLocal() bool { + dir, err := localReadmeFile() + if err != nil { + return false + } + if _, err = os.Stat(dir); err != nil { + return false + } + return true +} + +// OpenUCDFile opens the requested UCD file. The file is specified relative to +// the public Unicode root directory. It will call log.Fatal if there are any +// errors. +func OpenUCDFile(file string) io.ReadCloser { + return openUnicode(path.Join(*unicodeVersion, "ucd", file)) +} + +// OpenCLDRCoreZip opens the CLDR core zip file. It will call log.Fatal if there +// are any errors. +func OpenCLDRCoreZip() io.ReadCloser { + return OpenUnicodeFile("cldr", *cldrVersion, "core.zip") +} + +// OpenUnicodeFile opens the requested file of the requested category from the +// root of the Unicode data archive. The file is specified relative to the +// public Unicode root directory. If version is "", it will use the default +// Unicode version. It will call log.Fatal if there are any errors. +func OpenUnicodeFile(category, version, file string) io.ReadCloser { + if version == "" { + version = UnicodeVersion() + } + return openUnicode(path.Join(category, version, file)) +} + +// OpenIANAFile opens the requested IANA file. The file is specified relative +// to the IANA root, which is typically either http://www.iana.org or the +// iana directory in the local mirror. It will call log.Fatal if there are any +// errors. +func OpenIANAFile(path string) io.ReadCloser { + return Open(*iana, "iana", path) +} + +var ( + dirMutex sync.Mutex + localDir string +) + +const permissions = 0755 + +func localReadmeFile() (string, error) { + p, err := build.Import("golang.org/x/text", "", build.FindOnly) + if err != nil { + return "", fmt.Errorf("Could not locate package: %v", err) + } + return filepath.Join(p.Dir, "DATA", "README"), nil +} + +func getLocalDir() string { + dirMutex.Lock() + defer dirMutex.Unlock() + + readme, err := localReadmeFile() + if err != nil { + log.Fatal(err) + } + dir := filepath.Dir(readme) + if _, err := os.Stat(readme); err != nil { + if err := os.MkdirAll(dir, permissions); err != nil { + log.Fatalf("Could not create directory: %v", err) + } + ioutil.WriteFile(readme, []byte(readmeTxt), permissions) + } + return dir +} + +const readmeTxt = `Generated by golang.org/x/text/internal/gen. DO NOT EDIT. + +This directory contains downloaded files used to generate the various tables +in the golang.org/x/text subrepo. + +Note that the language subtag repo (iana/assignments/language-subtag-registry) +and all other times in the iana subdirectory are not versioned and will need +to be periodically manually updated. The easiest way to do this is to remove +the entire iana directory. This is mostly of concern when updating the language +package. +` + +// Open opens subdir/path if a local directory is specified and the file exists, +// where subdir is a directory relative to the local root, or fetches it from +// urlRoot/path otherwise. It will call log.Fatal if there are any errors. +func Open(urlRoot, subdir, path string) io.ReadCloser { + file := filepath.Join(getLocalDir(), subdir, filepath.FromSlash(path)) + return open(file, urlRoot, path) +} + +func openUnicode(path string) io.ReadCloser { + file := filepath.Join(getLocalDir(), filepath.FromSlash(path)) + return open(file, *url, path) +} + +// TODO: automatically periodically update non-versioned files. + +func open(file, urlRoot, path string) io.ReadCloser { + if f, err := os.Open(file); err == nil { + return f + } + r := get(urlRoot, path) + defer r.Close() + b, err := ioutil.ReadAll(r) + if err != nil { + log.Fatalf("Could not download file: %v", err) + } + os.MkdirAll(filepath.Dir(file), permissions) + if err := ioutil.WriteFile(file, b, permissions); err != nil { + log.Fatalf("Could not create file: %v", err) + } + return ioutil.NopCloser(bytes.NewReader(b)) +} + +func get(root, path string) io.ReadCloser { + url := root + "/" + path + fmt.Printf("Fetching %s...", url) + defer fmt.Println(" done.") + resp, err := http.Get(url) + if err != nil { + log.Fatalf("HTTP GET: %v", err) + } + if resp.StatusCode != 200 { + log.Fatalf("Bad GET status for %q: %q", url, resp.Status) + } + return resp.Body +} + +// TODO: use Write*Version in all applicable packages. + +// WriteUnicodeVersion writes a constant for the Unicode version from which the +// tables are generated. +func WriteUnicodeVersion(w io.Writer) { + fmt.Fprintf(w, "// UnicodeVersion is the Unicode version from which the tables in this package are derived.\n") + fmt.Fprintf(w, "const UnicodeVersion = %q\n\n", UnicodeVersion()) +} + +// WriteCLDRVersion writes a constant for the CLDR version from which the +// tables are generated. +func WriteCLDRVersion(w io.Writer) { + fmt.Fprintf(w, "// CLDRVersion is the CLDR version from which the tables in this package are derived.\n") + fmt.Fprintf(w, "const CLDRVersion = %q\n\n", CLDRVersion()) +} + +// WriteGoFile prepends a standard file comment and package statement to the +// given bytes, applies gofmt, and writes them to a file with the given name. +// It will call log.Fatal if there are any errors. +func WriteGoFile(filename, pkg string, b []byte) { + w, err := os.Create(filename) + if err != nil { + log.Fatalf("Could not create file %s: %v", filename, err) + } + defer w.Close() + if _, err = WriteGo(w, pkg, "", b); err != nil { + log.Fatalf("Error writing file %s: %v", filename, err) + } +} + +func insertVersion(filename, version string) string { + suffix := ".go" + if strings.HasSuffix(filename, "_test.go") { + suffix = "_test.go" + } + return fmt.Sprint(filename[:len(filename)-len(suffix)], version, suffix) +} + +// WriteVersionedGoFile prepends a standard file comment, adds build tags to +// version the file for the current Unicode version, and package statement to +// the given bytes, applies gofmt, and writes them to a file with the given +// name. It will call log.Fatal if there are any errors. +func WriteVersionedGoFile(filename, pkg string, b []byte) { + tags := buildTags() + if tags != "" { + filename = insertVersion(filename, UnicodeVersion()) + } + w, err := os.Create(filename) + if err != nil { + log.Fatalf("Could not create file %s: %v", filename, err) + } + defer w.Close() + if _, err = WriteGo(w, pkg, tags, b); err != nil { + log.Fatalf("Error writing file %s: %v", filename, err) + } +} + +// WriteGo prepends a standard file comment and package statement to the given +// bytes, applies gofmt, and writes them to w. +func WriteGo(w io.Writer, pkg, tags string, b []byte) (n int, err error) { + src := []byte(header) + if tags != "" { + src = append(src, fmt.Sprintf("// +build %s\n\n", tags)...) + } + src = append(src, fmt.Sprintf("package %s\n\n", pkg)...) + src = append(src, b...) + formatted, err := format.Source(src) + if err != nil { + // Print the generated code even in case of an error so that the + // returned error can be meaningfully interpreted. + n, _ = w.Write(src) + return n, err + } + return w.Write(formatted) +} + +// Repackage rewrites a Go file from belonging to package main to belonging to +// the given package. +func Repackage(inFile, outFile, pkg string) { + src, err := ioutil.ReadFile(inFile) + if err != nil { + log.Fatalf("reading %s: %v", inFile, err) + } + const toDelete = "package main\n\n" + i := bytes.Index(src, []byte(toDelete)) + if i < 0 { + log.Fatalf("Could not find %q in %s.", toDelete, inFile) + } + w := &bytes.Buffer{} + w.Write(src[i+len(toDelete):]) + WriteGoFile(outFile, pkg, w.Bytes()) +} diff --git a/vendor/golang.org/x/text/internal/triegen/compact.go b/vendor/golang.org/x/text/internal/triegen/compact.go new file mode 100644 index 00000000..397b975c --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/compact.go @@ -0,0 +1,58 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package triegen + +// This file defines Compacter and its implementations. + +import "io" + +// A Compacter generates an alternative, more space-efficient way to store a +// trie value block. A trie value block holds all possible values for the last +// byte of a UTF-8 encoded rune. Excluding ASCII characters, a trie value block +// always has 64 values, as a UTF-8 encoding ends with a byte in [0x80, 0xC0). +type Compacter interface { + // Size returns whether the Compacter could encode the given block as well + // as its size in case it can. len(v) is always 64. + Size(v []uint64) (sz int, ok bool) + + // Store stores the block using the Compacter's compression method. + // It returns a handle with which the block can be retrieved. + // len(v) is always 64. + Store(v []uint64) uint32 + + // Print writes the data structures associated to the given store to w. + Print(w io.Writer) error + + // Handler returns the name of a function that gets called during trie + // lookup for blocks generated by the Compacter. The function should be of + // the form func (n uint32, b byte) uint64, where n is the index returned by + // the Compacter's Store method and b is the last byte of the UTF-8 + // encoding, where 0x80 <= b < 0xC0, for which to do the lookup in the + // block. + Handler() string +} + +// simpleCompacter is the default Compacter used by builder. It implements a +// normal trie block. +type simpleCompacter builder + +func (b *simpleCompacter) Size([]uint64) (sz int, ok bool) { + return blockSize * b.ValueSize, true +} + +func (b *simpleCompacter) Store(v []uint64) uint32 { + h := uint32(len(b.ValueBlocks) - blockOffset) + b.ValueBlocks = append(b.ValueBlocks, v) + return h +} + +func (b *simpleCompacter) Print(io.Writer) error { + // Structures are printed in print.go. + return nil +} + +func (b *simpleCompacter) Handler() string { + panic("Handler should be special-cased for this Compacter") +} diff --git a/vendor/golang.org/x/text/internal/triegen/print.go b/vendor/golang.org/x/text/internal/triegen/print.go new file mode 100644 index 00000000..8d9f120b --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/print.go @@ -0,0 +1,251 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package triegen + +import ( + "bytes" + "fmt" + "io" + "strings" + "text/template" +) + +// print writes all the data structures as well as the code necessary to use the +// trie to w. +func (b *builder) print(w io.Writer) error { + b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize + b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize + b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize + b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize + b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize + + // If we only have one root trie, all starter blocks are at position 0 and + // we can access the arrays directly. + if len(b.Trie) == 1 { + // At this point we cannot refer to the generated tables directly. + b.ASCIIBlock = b.Name + "Values" + b.StarterBlock = b.Name + "Index" + } else { + // Otherwise we need to have explicit starter indexes in the trie + // structure. + b.ASCIIBlock = "t.ascii" + b.StarterBlock = "t.utf8Start" + } + + b.SourceType = "[]byte" + if err := lookupGen.Execute(w, b); err != nil { + return err + } + + b.SourceType = "string" + if err := lookupGen.Execute(w, b); err != nil { + return err + } + + if err := trieGen.Execute(w, b); err != nil { + return err + } + + for _, c := range b.Compactions { + if err := c.c.Print(w); err != nil { + return err + } + } + + return nil +} + +func printValues(n int, values []uint64) string { + w := &bytes.Buffer{} + boff := n * blockSize + fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff) + var newline bool + for i, v := range values { + if i%6 == 0 { + newline = true + } + if v != 0 { + if newline { + fmt.Fprintf(w, "\n") + newline = false + } + fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v) + } + } + return w.String() +} + +func printIndex(b *builder, nr int, n *node) string { + w := &bytes.Buffer{} + boff := nr * blockSize + fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff) + var newline bool + for i, c := range n.children { + if i%8 == 0 { + newline = true + } + if c != nil { + v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index) + if v != 0 { + if newline { + fmt.Fprintf(w, "\n") + newline = false + } + fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v) + } + } + } + return w.String() +} + +var ( + trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{ + "printValues": printValues, + "printIndex": printIndex, + "title": strings.Title, + "dec": func(x int) int { return x - 1 }, + "psize": func(n int) string { + return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024) + }, + }).Parse(trieTemplate)) + lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate)) +) + +// TODO: consider the return type of lookup. It could be uint64, even if the +// internal value type is smaller. We will have to verify this with the +// performance of unicode/norm, which is very sensitive to such changes. +const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}} +// {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}. +type {{.Name}}Trie struct { {{if $multi}} + ascii []{{.ValueType}} // index for ASCII bytes + utf8Start []{{.IndexType}} // index for UTF-8 bytes >= 0xC0 +{{end}}} + +func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}} + h := {{.Name}}TrieHandles[i] + return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] } +} + +type {{.Name}}TrieHandle struct { + ascii, multi {{.IndexType}} +} + +// {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes +var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{ +{{range .Trie}} { {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}} +{{end}}}{{else}} + return &{{.Name}}Trie{} +} +{{end}} +// lookupValue determines the type of block n and looks up the value for b. +func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} { + switch { {{range $i, $c := .Compactions}} + {{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}} + n -= {{$c.Offset}}{{end}} + return {{print $b.ValueType}}({{$c.Handler}}){{end}} + } +} + +// {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes +// The third block is the zero block. +var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} { +{{range $i, $v := .ValueBlocks}}{{printValues $i $v}} +{{end}}} + +// {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes +// Block 0 is the zero block. +var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} { +{{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}} +{{end}}} +` + +// TODO: consider allowing zero-length strings after evaluating performance with +// unicode/norm. +const lookupTemplate = ` +// lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and +// the width in bytes of this encoding. The size will be 0 if s does not +// hold enough bytes to complete the encoding. len(s) must be greater than 0. +func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) { + c0 := s[0] + switch { + case c0 < 0x80: // is ASCII + return {{.ASCIIBlock}}[c0], 1 + case c0 < 0xC2: + return 0, 1 // Illegal UTF-8: not a starter, not ASCII. + case c0 < 0xE0: // 2-byte UTF-8 + if len(s) < 2 { + return 0, 0 + } + i := {{.StarterBlock}}[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c1), 2 + case c0 < 0xF0: // 3-byte UTF-8 + if len(s) < 3 { + return 0, 0 + } + i := {{.StarterBlock}}[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = {{.Name}}Index[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c2), 3 + case c0 < 0xF8: // 4-byte UTF-8 + if len(s) < 4 { + return 0, 0 + } + i := {{.StarterBlock}}[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = {{.Name}}Index[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + o = uint32(i)<<6 + uint32(c2) + i = {{.Name}}Index[o] + c3 := s[3] + if c3 < 0x80 || 0xC0 <= c3 { + return 0, 3 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c3), 4 + } + // Illegal rune + return 0, 1 +} + +// lookup{{if eq .SourceType "string"}}String{{end}}Unsafe returns the trie value for the first UTF-8 encoding in s. +// s must start with a full and valid UTF-8 encoded rune. +func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}Unsafe(s {{.SourceType}}) {{.ValueType}} { + c0 := s[0] + if c0 < 0x80 { // is ASCII + return {{.ASCIIBlock}}[c0] + } + i := {{.StarterBlock}}[c0] + if c0 < 0xE0 { // 2-byte UTF-8 + return t.lookupValue(uint32(i), s[1]) + } + i = {{.Name}}Index[uint32(i)<<6+uint32(s[1])] + if c0 < 0xF0 { // 3-byte UTF-8 + return t.lookupValue(uint32(i), s[2]) + } + i = {{.Name}}Index[uint32(i)<<6+uint32(s[2])] + if c0 < 0xF8 { // 4-byte UTF-8 + return t.lookupValue(uint32(i), s[3]) + } + return 0 +} +` diff --git a/vendor/golang.org/x/text/internal/triegen/triegen.go b/vendor/golang.org/x/text/internal/triegen/triegen.go new file mode 100644 index 00000000..adb01081 --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/triegen.go @@ -0,0 +1,494 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package triegen implements a code generator for a trie for associating +// unsigned integer values with UTF-8 encoded runes. +// +// Many of the go.text packages use tries for storing per-rune information. A +// trie is especially useful if many of the runes have the same value. If this +// is the case, many blocks can be expected to be shared allowing for +// information on many runes to be stored in little space. +// +// As most of the lookups are done directly on []byte slices, the tries use the +// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to +// runes and contributes a little bit to better performance. It also naturally +// provides a fast path for ASCII. +// +// Space is also an issue. There are many code points defined in Unicode and as +// a result tables can get quite large. So every byte counts. The triegen +// package automatically chooses the smallest integer values to represent the +// tables. Compacters allow further compression of the trie by allowing for +// alternative representations of individual trie blocks. +// +// triegen allows generating multiple tries as a single structure. This is +// useful when, for example, one wants to generate tries for several languages +// that have a lot of values in common. Some existing libraries for +// internationalization store all per-language data as a dynamically loadable +// chunk. The go.text packages are designed with the assumption that the user +// typically wants to compile in support for all supported languages, in line +// with the approach common to Go to create a single standalone binary. The +// multi-root trie approach can give significant storage savings in this +// scenario. +// +// triegen generates both tables and code. The code is optimized to use the +// automatically chosen data types. The following code is generated for a Trie +// or multiple Tries named "foo": +// - type fooTrie +// The trie type. +// +// - func newFooTrie(x int) *fooTrie +// Trie constructor, where x is the index of the trie passed to Gen. +// +// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int) +// The lookup method, where uintX is automatically chosen. +// +// - func lookupString, lookupUnsafe and lookupStringUnsafe +// Variants of the above. +// +// - var fooValues and fooIndex and any tables generated by Compacters. +// The core trie data. +// +// - var fooTrieHandles +// Indexes of starter blocks in case of multiple trie roots. +// +// It is recommended that users test the generated trie by checking the returned +// value for every rune. Such exhaustive tests are possible as the the number of +// runes in Unicode is limited. +package triegen // import "golang.org/x/text/internal/triegen" + +// TODO: Arguably, the internally optimized data types would not have to be +// exposed in the generated API. We could also investigate not generating the +// code, but using it through a package. We would have to investigate the impact +// on performance of making such change, though. For packages like unicode/norm, +// small changes like this could tank performance. + +import ( + "encoding/binary" + "fmt" + "hash/crc64" + "io" + "log" + "unicode/utf8" +) + +// builder builds a set of tries for associating values with runes. The set of +// tries can share common index and value blocks. +type builder struct { + Name string + + // ValueType is the type of the trie values looked up. + ValueType string + + // ValueSize is the byte size of the ValueType. + ValueSize int + + // IndexType is the type of trie index values used for all UTF-8 bytes of + // a rune except the last one. + IndexType string + + // IndexSize is the byte size of the IndexType. + IndexSize int + + // SourceType is used when generating the lookup functions. If the user + // requests StringSupport, all lookup functions will be generated for + // string input as well. + SourceType string + + Trie []*Trie + + IndexBlocks []*node + ValueBlocks [][]uint64 + Compactions []compaction + Checksum uint64 + + ASCIIBlock string + StarterBlock string + + indexBlockIdx map[uint64]int + valueBlockIdx map[uint64]nodeIndex + asciiBlockIdx map[uint64]int + + // Stats are used to fill out the template. + Stats struct { + NValueEntries int + NValueBytes int + NIndexEntries int + NIndexBytes int + NHandleBytes int + } + + err error +} + +// A nodeIndex encodes the index of a node, which is defined by the compaction +// which stores it and an index within the compaction. For internal nodes, the +// compaction is always 0. +type nodeIndex struct { + compaction int + index int +} + +// compaction keeps track of stats used for the compaction. +type compaction struct { + c Compacter + blocks []*node + maxHandle uint32 + totalSize int + + // Used by template-based generator and thus exported. + Cutoff uint32 + Offset uint32 + Handler string +} + +func (b *builder) setError(err error) { + if b.err == nil { + b.err = err + } +} + +// An Option can be passed to Gen. +type Option func(b *builder) error + +// Compact configures the trie generator to use the given Compacter. +func Compact(c Compacter) Option { + return func(b *builder) error { + b.Compactions = append(b.Compactions, compaction{ + c: c, + Handler: c.Handler() + "(n, b)"}) + return nil + } +} + +// Gen writes Go code for a shared trie lookup structure to w for the given +// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will +// return the *nameTrie for tries[x]. A value can be looked up by using one of +// the various lookup methods defined on nameTrie. It returns the table size of +// the generated trie. +func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) { + // The index contains two dummy blocks, followed by the zero block. The zero + // block is at offset 0x80, so that the offset for the zero block for + // continuation bytes is 0. + b := &builder{ + Name: name, + Trie: tries, + IndexBlocks: []*node{{}, {}, {}}, + Compactions: []compaction{{ + Handler: name + "Values[n<<6+uint32(b)]", + }}, + // The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero + // block. + indexBlockIdx: map[uint64]int{0: 0}, + valueBlockIdx: map[uint64]nodeIndex{0: {}}, + asciiBlockIdx: map[uint64]int{}, + } + b.Compactions[0].c = (*simpleCompacter)(b) + + for _, f := range opts { + if err := f(b); err != nil { + return 0, err + } + } + b.build() + if b.err != nil { + return 0, b.err + } + if err = b.print(w); err != nil { + return 0, err + } + return b.Size(), nil +} + +// A Trie represents a single root node of a trie. A builder may build several +// overlapping tries at once. +type Trie struct { + root *node + + hiddenTrie +} + +// hiddenTrie contains values we want to be visible to the template generator, +// but hidden from the API documentation. +type hiddenTrie struct { + Name string + Checksum uint64 + ASCIIIndex int + StarterIndex int +} + +// NewTrie returns a new trie root. +func NewTrie(name string) *Trie { + return &Trie{ + &node{ + children: make([]*node, blockSize), + values: make([]uint64, utf8.RuneSelf), + }, + hiddenTrie{Name: name}, + } +} + +// Gen is a convenience wrapper around the Gen func passing t as the only trie +// and uses the name passed to NewTrie. It returns the size of the generated +// tables. +func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) { + return Gen(w, t.Name, []*Trie{t}, opts...) +} + +// node is a node of the intermediate trie structure. +type node struct { + // children holds this node's children. It is always of length 64. + // A child node may be nil. + children []*node + + // values contains the values of this node. If it is non-nil, this node is + // either a root or leaf node: + // For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F]. + // For leaf nodes, len(values) == 64 and it maps the bytes in [0x80, 0xBF]. + values []uint64 + + index nodeIndex +} + +// Insert associates value with the given rune. Insert will panic if a non-zero +// value is passed for an invalid rune. +func (t *Trie) Insert(r rune, value uint64) { + if value == 0 { + return + } + s := string(r) + if []rune(s)[0] != r && value != 0 { + // Note: The UCD tables will always assign what amounts to a zero value + // to a surrogate. Allowing a zero value for an illegal rune allows + // users to iterate over [0..MaxRune] without having to explicitly + // exclude surrogates, which would be tedious. + panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r)) + } + if len(s) == 1 { + // It is a root node value (ASCII). + t.root.values[s[0]] = value + return + } + + n := t.root + for ; len(s) > 1; s = s[1:] { + if n.children == nil { + n.children = make([]*node, blockSize) + } + p := s[0] % blockSize + c := n.children[p] + if c == nil { + c = &node{} + n.children[p] = c + } + if len(s) > 2 && c.values != nil { + log.Fatalf("triegen: insert(%U): found internal node with values", r) + } + n = c + } + if n.values == nil { + n.values = make([]uint64, blockSize) + } + if n.children != nil { + log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r) + } + n.values[s[0]-0x80] = value +} + +// Size returns the number of bytes the generated trie will take to store. It +// needs to be exported as it is used in the templates. +func (b *builder) Size() int { + // Index blocks. + sz := len(b.IndexBlocks) * blockSize * b.IndexSize + + // Skip the first compaction, which represents the normal value blocks, as + // its totalSize does not account for the ASCII blocks, which are managed + // separately. + sz += len(b.ValueBlocks) * blockSize * b.ValueSize + for _, c := range b.Compactions[1:] { + sz += c.totalSize + } + + // TODO: this computation does not account for the fixed overhead of a using + // a compaction, either code or data. As for data, though, the typical + // overhead of data is in the order of bytes (2 bytes for cases). Further, + // the savings of using a compaction should anyway be substantial for it to + // be worth it. + + // For multi-root tries, we also need to account for the handles. + if len(b.Trie) > 1 { + sz += 2 * b.IndexSize * len(b.Trie) + } + return sz +} + +func (b *builder) build() { + // Compute the sizes of the values. + var vmax uint64 + for _, t := range b.Trie { + vmax = maxValue(t.root, vmax) + } + b.ValueType, b.ValueSize = getIntType(vmax) + + // Compute all block allocations. + // TODO: first compute the ASCII blocks for all tries and then the other + // nodes. ASCII blocks are more restricted in placement, as they require two + // blocks to be placed consecutively. Processing them first may improve + // sharing (at least one zero block can be expected to be saved.) + for _, t := range b.Trie { + b.Checksum += b.buildTrie(t) + } + + // Compute the offsets for all the Compacters. + offset := uint32(0) + for i := range b.Compactions { + c := &b.Compactions[i] + c.Offset = offset + offset += c.maxHandle + 1 + c.Cutoff = offset + } + + // Compute the sizes of indexes. + // TODO: different byte positions could have different sizes. So far we have + // not found a case where this is beneficial. + imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff) + for _, ib := range b.IndexBlocks { + if x := uint64(ib.index.index); x > imax { + imax = x + } + } + b.IndexType, b.IndexSize = getIntType(imax) +} + +func maxValue(n *node, max uint64) uint64 { + if n == nil { + return max + } + for _, c := range n.children { + max = maxValue(c, max) + } + for _, v := range n.values { + if max < v { + max = v + } + } + return max +} + +func getIntType(v uint64) (string, int) { + switch { + case v < 1<<8: + return "uint8", 1 + case v < 1<<16: + return "uint16", 2 + case v < 1<<32: + return "uint32", 4 + } + return "uint64", 8 +} + +const ( + blockSize = 64 + + // Subtract two blocks to offset 0x80, the first continuation byte. + blockOffset = 2 + + // Subtract three blocks to offset 0xC0, the first non-ASCII starter. + rootBlockOffset = 3 +) + +var crcTable = crc64.MakeTable(crc64.ISO) + +func (b *builder) buildTrie(t *Trie) uint64 { + n := t.root + + // Get the ASCII offset. For the first trie, the ASCII block will be at + // position 0. + hasher := crc64.New(crcTable) + binary.Write(hasher, binary.BigEndian, n.values) + hash := hasher.Sum64() + + v, ok := b.asciiBlockIdx[hash] + if !ok { + v = len(b.ValueBlocks) + b.asciiBlockIdx[hash] = v + + b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:]) + if v == 0 { + // Add the zero block at position 2 so that it will be assigned a + // zero reference in the lookup blocks. + // TODO: always do this? This would allow us to remove a check from + // the trie lookup, but at the expense of extra space. Analyze + // performance for unicode/norm. + b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize)) + } + } + t.ASCIIIndex = v + + // Compute remaining offsets. + t.Checksum = b.computeOffsets(n, true) + // We already subtracted the normal blockOffset from the index. Subtract the + // difference for starter bytes. + t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset) + return t.Checksum +} + +func (b *builder) computeOffsets(n *node, root bool) uint64 { + // For the first trie, the root lookup block will be at position 3, which is + // the offset for UTF-8 non-ASCII starter bytes. + first := len(b.IndexBlocks) == rootBlockOffset + if first { + b.IndexBlocks = append(b.IndexBlocks, n) + } + + // We special-case the cases where all values recursively are 0. This allows + // for the use of a zero block to which all such values can be directed. + hash := uint64(0) + if n.children != nil || n.values != nil { + hasher := crc64.New(crcTable) + for _, c := range n.children { + var v uint64 + if c != nil { + v = b.computeOffsets(c, false) + } + binary.Write(hasher, binary.BigEndian, v) + } + binary.Write(hasher, binary.BigEndian, n.values) + hash = hasher.Sum64() + } + + if first { + b.indexBlockIdx[hash] = rootBlockOffset - blockOffset + } + + // Compacters don't apply to internal nodes. + if n.children != nil { + v, ok := b.indexBlockIdx[hash] + if !ok { + v = len(b.IndexBlocks) - blockOffset + b.IndexBlocks = append(b.IndexBlocks, n) + b.indexBlockIdx[hash] = v + } + n.index = nodeIndex{0, v} + } else { + h, ok := b.valueBlockIdx[hash] + if !ok { + bestI, bestSize := 0, blockSize*b.ValueSize + for i, c := range b.Compactions[1:] { + if sz, ok := c.c.Size(n.values); ok && bestSize > sz { + bestI, bestSize = i+1, sz + } + } + c := &b.Compactions[bestI] + c.totalSize += bestSize + v := c.c.Store(n.values) + if c.maxHandle < v { + c.maxHandle = v + } + h = nodeIndex{bestI, int(v)} + b.valueBlockIdx[hash] = h + } + n.index = h + } + return hash +} diff --git a/vendor/golang.org/x/text/internal/ucd/ucd.go b/vendor/golang.org/x/text/internal/ucd/ucd.go new file mode 100644 index 00000000..8c45b5f3 --- /dev/null +++ b/vendor/golang.org/x/text/internal/ucd/ucd.go @@ -0,0 +1,371 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package ucd provides a parser for Unicode Character Database files, the +// format of which is defined in http://www.unicode.org/reports/tr44/. See +// http://www.unicode.org/Public/UCD/latest/ucd/ for example files. +// +// It currently does not support substitutions of missing fields. +package ucd // import "golang.org/x/text/internal/ucd" + +import ( + "bufio" + "errors" + "fmt" + "io" + "log" + "regexp" + "strconv" + "strings" +) + +// UnicodeData.txt fields. +const ( + CodePoint = iota + Name + GeneralCategory + CanonicalCombiningClass + BidiClass + DecompMapping + DecimalValue + DigitValue + NumericValue + BidiMirrored + Unicode1Name + ISOComment + SimpleUppercaseMapping + SimpleLowercaseMapping + SimpleTitlecaseMapping +) + +// Parse calls f for each entry in the given reader of a UCD file. It will close +// the reader upon return. It will call log.Fatal if any error occurred. +// +// This implements the most common usage pattern of using Parser. +func Parse(r io.ReadCloser, f func(p *Parser)) { + defer r.Close() + + p := New(r) + for p.Next() { + f(p) + } + if err := p.Err(); err != nil { + r.Close() // os.Exit will cause defers not to be called. + log.Fatal(err) + } +} + +// An Option is used to configure a Parser. +type Option func(p *Parser) + +func keepRanges(p *Parser) { + p.keepRanges = true +} + +var ( + // KeepRanges prevents the expansion of ranges. The raw ranges can be + // obtained by calling Range(0) on the parser. + KeepRanges Option = keepRanges +) + +// The Part option register a handler for lines starting with a '@'. The text +// after a '@' is available as the first field. Comments are handled as usual. +func Part(f func(p *Parser)) Option { + return func(p *Parser) { + p.partHandler = f + } +} + +// The CommentHandler option passes comments that are on a line by itself to +// a given handler. +func CommentHandler(f func(s string)) Option { + return func(p *Parser) { + p.commentHandler = f + } +} + +// A Parser parses Unicode Character Database (UCD) files. +type Parser struct { + scanner *bufio.Scanner + + keepRanges bool // Don't expand rune ranges in field 0. + + err error + comment string + field []string + // parsedRange is needed in case Range(0) is called more than once for one + // field. In some cases this requires scanning ahead. + line int + parsedRange bool + rangeStart, rangeEnd rune + + partHandler func(p *Parser) + commentHandler func(s string) +} + +func (p *Parser) setError(err error, msg string) { + if p.err == nil && err != nil { + if msg == "" { + p.err = fmt.Errorf("ucd:line:%d: %v", p.line, err) + } else { + p.err = fmt.Errorf("ucd:line:%d:%s: %v", p.line, msg, err) + } + } +} + +func (p *Parser) getField(i int) string { + if i >= len(p.field) { + return "" + } + return p.field[i] +} + +// Err returns a non-nil error if any error occurred during parsing. +func (p *Parser) Err() error { + return p.err +} + +// New returns a Parser for the given Reader. +func New(r io.Reader, o ...Option) *Parser { + p := &Parser{ + scanner: bufio.NewScanner(r), + } + for _, f := range o { + f(p) + } + return p +} + +// Next parses the next line in the file. It returns true if a line was parsed +// and false if it reached the end of the file. +func (p *Parser) Next() bool { + if !p.keepRanges && p.rangeStart < p.rangeEnd { + p.rangeStart++ + return true + } + p.comment = "" + p.field = p.field[:0] + p.parsedRange = false + + for p.scanner.Scan() && p.err == nil { + p.line++ + s := p.scanner.Text() + if s == "" { + continue + } + if s[0] == '#' { + if p.commentHandler != nil { + p.commentHandler(strings.TrimSpace(s[1:])) + } + continue + } + + // Parse line + if i := strings.IndexByte(s, '#'); i != -1 { + p.comment = strings.TrimSpace(s[i+1:]) + s = s[:i] + } + if s[0] == '@' { + if p.partHandler != nil { + p.field = append(p.field, strings.TrimSpace(s[1:])) + p.partHandler(p) + p.field = p.field[:0] + } + p.comment = "" + continue + } + for { + i := strings.IndexByte(s, ';') + if i == -1 { + p.field = append(p.field, strings.TrimSpace(s)) + break + } + p.field = append(p.field, strings.TrimSpace(s[:i])) + s = s[i+1:] + } + if !p.keepRanges { + p.rangeStart, p.rangeEnd = p.getRange(0) + } + return true + } + p.setError(p.scanner.Err(), "scanner failed") + return false +} + +func parseRune(b string) (rune, error) { + if len(b) > 2 && b[0] == 'U' && b[1] == '+' { + b = b[2:] + } + x, err := strconv.ParseUint(b, 16, 32) + return rune(x), err +} + +func (p *Parser) parseRune(s string) rune { + x, err := parseRune(s) + p.setError(err, "failed to parse rune") + return x +} + +// Rune parses and returns field i as a rune. +func (p *Parser) Rune(i int) rune { + if i > 0 || p.keepRanges { + return p.parseRune(p.getField(i)) + } + return p.rangeStart +} + +// Runes interprets and returns field i as a sequence of runes. +func (p *Parser) Runes(i int) (runes []rune) { + add := func(s string) { + if s = strings.TrimSpace(s); len(s) > 0 { + runes = append(runes, p.parseRune(s)) + } + } + for b := p.getField(i); ; { + i := strings.IndexByte(b, ' ') + if i == -1 { + add(b) + break + } + add(b[:i]) + b = b[i+1:] + } + return +} + +var ( + errIncorrectLegacyRange = errors.New("ucd: unmatched <* First>") + + // reRange matches one line of a legacy rune range. + reRange = regexp.MustCompile("^([0-9A-F]*);<([^,]*), ([^>]*)>(.*)$") +) + +// Range parses and returns field i as a rune range. A range is inclusive at +// both ends. If the field only has one rune, first and last will be identical. +// It supports the legacy format for ranges used in UnicodeData.txt. +func (p *Parser) Range(i int) (first, last rune) { + if !p.keepRanges { + return p.rangeStart, p.rangeStart + } + return p.getRange(i) +} + +func (p *Parser) getRange(i int) (first, last rune) { + b := p.getField(i) + if k := strings.Index(b, ".."); k != -1 { + return p.parseRune(b[:k]), p.parseRune(b[k+2:]) + } + // The first field may not be a rune, in which case we may ignore any error + // and set the range as 0..0. + x, err := parseRune(b) + if err != nil { + // Disable range parsing henceforth. This ensures that an error will be + // returned if the user subsequently will try to parse this field as + // a Rune. + p.keepRanges = true + } + // Special case for UnicodeData that was retained for backwards compatibility. + if i == 0 && len(p.field) > 1 && strings.HasSuffix(p.field[1], "First>") { + if p.parsedRange { + return p.rangeStart, p.rangeEnd + } + mf := reRange.FindStringSubmatch(p.scanner.Text()) + p.line++ + if mf == nil || !p.scanner.Scan() { + p.setError(errIncorrectLegacyRange, "") + return x, x + } + // Using Bytes would be more efficient here, but Text is a lot easier + // and this is not a frequent case. + ml := reRange.FindStringSubmatch(p.scanner.Text()) + if ml == nil || mf[2] != ml[2] || ml[3] != "Last" || mf[4] != ml[4] { + p.setError(errIncorrectLegacyRange, "") + return x, x + } + p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Text()[:len(ml[1])]) + p.parsedRange = true + return p.rangeStart, p.rangeEnd + } + return x, x +} + +// bools recognizes all valid UCD boolean values. +var bools = map[string]bool{ + "": false, + "N": false, + "No": false, + "F": false, + "False": false, + "Y": true, + "Yes": true, + "T": true, + "True": true, +} + +// Bool parses and returns field i as a boolean value. +func (p *Parser) Bool(i int) bool { + f := p.getField(i) + for s, v := range bools { + if f == s { + return v + } + } + p.setError(strconv.ErrSyntax, "error parsing bool") + return false +} + +// Int parses and returns field i as an integer value. +func (p *Parser) Int(i int) int { + x, err := strconv.ParseInt(string(p.getField(i)), 10, 64) + p.setError(err, "error parsing int") + return int(x) +} + +// Uint parses and returns field i as an unsigned integer value. +func (p *Parser) Uint(i int) uint { + x, err := strconv.ParseUint(string(p.getField(i)), 10, 64) + p.setError(err, "error parsing uint") + return uint(x) +} + +// Float parses and returns field i as a decimal value. +func (p *Parser) Float(i int) float64 { + x, err := strconv.ParseFloat(string(p.getField(i)), 64) + p.setError(err, "error parsing float") + return x +} + +// String parses and returns field i as a string value. +func (p *Parser) String(i int) string { + return string(p.getField(i)) +} + +// Strings parses and returns field i as a space-separated list of strings. +func (p *Parser) Strings(i int) []string { + ss := strings.Split(string(p.getField(i)), " ") + for i, s := range ss { + ss[i] = strings.TrimSpace(s) + } + return ss +} + +// Comment returns the comments for the current line. +func (p *Parser) Comment() string { + return string(p.comment) +} + +var errUndefinedEnum = errors.New("ucd: undefined enum value") + +// Enum interprets and returns field i as a value that must be one of the values +// in enum. +func (p *Parser) Enum(i int, enum ...string) string { + f := p.getField(i) + for _, s := range enum { + if f == s { + return s + } + } + p.setError(errUndefinedEnum, "error parsing enum") + return "" +} diff --git a/vendor/golang.org/x/text/transform/transform.go b/vendor/golang.org/x/text/transform/transform.go new file mode 100644 index 00000000..fe47b9b3 --- /dev/null +++ b/vendor/golang.org/x/text/transform/transform.go @@ -0,0 +1,705 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package transform provides reader and writer wrappers that transform the +// bytes passing through as well as various transformations. Example +// transformations provided by other packages include normalization and +// conversion between character sets. +package transform // import "golang.org/x/text/transform" + +import ( + "bytes" + "errors" + "io" + "unicode/utf8" +) + +var ( + // ErrShortDst means that the destination buffer was too short to + // receive all of the transformed bytes. + ErrShortDst = errors.New("transform: short destination buffer") + + // ErrShortSrc means that the source buffer has insufficient data to + // complete the transformation. + ErrShortSrc = errors.New("transform: short source buffer") + + // ErrEndOfSpan means that the input and output (the transformed input) + // are not identical. + ErrEndOfSpan = errors.New("transform: input and output are not identical") + + // errInconsistentByteCount means that Transform returned success (nil + // error) but also returned nSrc inconsistent with the src argument. + errInconsistentByteCount = errors.New("transform: inconsistent byte count returned") + + // errShortInternal means that an internal buffer is not large enough + // to make progress and the Transform operation must be aborted. + errShortInternal = errors.New("transform: short internal buffer") +) + +// Transformer transforms bytes. +type Transformer interface { + // Transform writes to dst the transformed bytes read from src, and + // returns the number of dst bytes written and src bytes read. The + // atEOF argument tells whether src represents the last bytes of the + // input. + // + // Callers should always process the nDst bytes produced and account + // for the nSrc bytes consumed before considering the error err. + // + // A nil error means that all of the transformed bytes (whether freshly + // transformed from src or left over from previous Transform calls) + // were written to dst. A nil error can be returned regardless of + // whether atEOF is true. If err is nil then nSrc must equal len(src); + // the converse is not necessarily true. + // + // ErrShortDst means that dst was too short to receive all of the + // transformed bytes. ErrShortSrc means that src had insufficient data + // to complete the transformation. If both conditions apply, then + // either error may be returned. Other than the error conditions listed + // here, implementations are free to report other errors that arise. + Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) + + // Reset resets the state and allows a Transformer to be reused. + Reset() +} + +// SpanningTransformer extends the Transformer interface with a Span method +// that determines how much of the input already conforms to the Transformer. +type SpanningTransformer interface { + Transformer + + // Span returns a position in src such that transforming src[:n] results in + // identical output src[:n] for these bytes. It does not necessarily return + // the largest such n. The atEOF argument tells whether src represents the + // last bytes of the input. + // + // Callers should always account for the n bytes consumed before + // considering the error err. + // + // A nil error means that all input bytes are known to be identical to the + // output produced by the Transformer. A nil error can be be returned + // regardless of whether atEOF is true. If err is nil, then then n must + // equal len(src); the converse is not necessarily true. + // + // ErrEndOfSpan means that the Transformer output may differ from the + // input after n bytes. Note that n may be len(src), meaning that the output + // would contain additional bytes after otherwise identical output. + // ErrShortSrc means that src had insufficient data to determine whether the + // remaining bytes would change. Other than the error conditions listed + // here, implementations are free to report other errors that arise. + // + // Calling Span can modify the Transformer state as a side effect. In + // effect, it does the transformation just as calling Transform would, only + // without copying to a destination buffer and only up to a point it can + // determine the input and output bytes are the same. This is obviously more + // limited than calling Transform, but can be more efficient in terms of + // copying and allocating buffers. Calls to Span and Transform may be + // interleaved. + Span(src []byte, atEOF bool) (n int, err error) +} + +// NopResetter can be embedded by implementations of Transformer to add a nop +// Reset method. +type NopResetter struct{} + +// Reset implements the Reset method of the Transformer interface. +func (NopResetter) Reset() {} + +// Reader wraps another io.Reader by transforming the bytes read. +type Reader struct { + r io.Reader + t Transformer + err error + + // dst[dst0:dst1] contains bytes that have been transformed by t but + // not yet copied out via Read. + dst []byte + dst0, dst1 int + + // src[src0:src1] contains bytes that have been read from r but not + // yet transformed through t. + src []byte + src0, src1 int + + // transformComplete is whether the transformation is complete, + // regardless of whether or not it was successful. + transformComplete bool +} + +const defaultBufSize = 4096 + +// NewReader returns a new Reader that wraps r by transforming the bytes read +// via t. It calls Reset on t. +func NewReader(r io.Reader, t Transformer) *Reader { + t.Reset() + return &Reader{ + r: r, + t: t, + dst: make([]byte, defaultBufSize), + src: make([]byte, defaultBufSize), + } +} + +// Read implements the io.Reader interface. +func (r *Reader) Read(p []byte) (int, error) { + n, err := 0, error(nil) + for { + // Copy out any transformed bytes and return the final error if we are done. + if r.dst0 != r.dst1 { + n = copy(p, r.dst[r.dst0:r.dst1]) + r.dst0 += n + if r.dst0 == r.dst1 && r.transformComplete { + return n, r.err + } + return n, nil + } else if r.transformComplete { + return 0, r.err + } + + // Try to transform some source bytes, or to flush the transformer if we + // are out of source bytes. We do this even if r.r.Read returned an error. + // As the io.Reader documentation says, "process the n > 0 bytes returned + // before considering the error". + if r.src0 != r.src1 || r.err != nil { + r.dst0 = 0 + r.dst1, n, err = r.t.Transform(r.dst, r.src[r.src0:r.src1], r.err == io.EOF) + r.src0 += n + + switch { + case err == nil: + if r.src0 != r.src1 { + r.err = errInconsistentByteCount + } + // The Transform call was successful; we are complete if we + // cannot read more bytes into src. + r.transformComplete = r.err != nil + continue + case err == ErrShortDst && (r.dst1 != 0 || n != 0): + // Make room in dst by copying out, and try again. + continue + case err == ErrShortSrc && r.src1-r.src0 != len(r.src) && r.err == nil: + // Read more bytes into src via the code below, and try again. + default: + r.transformComplete = true + // The reader error (r.err) takes precedence over the + // transformer error (err) unless r.err is nil or io.EOF. + if r.err == nil || r.err == io.EOF { + r.err = err + } + continue + } + } + + // Move any untransformed source bytes to the start of the buffer + // and read more bytes. + if r.src0 != 0 { + r.src0, r.src1 = 0, copy(r.src, r.src[r.src0:r.src1]) + } + n, r.err = r.r.Read(r.src[r.src1:]) + r.src1 += n + } +} + +// TODO: implement ReadByte (and ReadRune??). + +// Writer wraps another io.Writer by transforming the bytes read. +// The user needs to call Close to flush unwritten bytes that may +// be buffered. +type Writer struct { + w io.Writer + t Transformer + dst []byte + + // src[:n] contains bytes that have not yet passed through t. + src []byte + n int +} + +// NewWriter returns a new Writer that wraps w by transforming the bytes written +// via t. It calls Reset on t. +func NewWriter(w io.Writer, t Transformer) *Writer { + t.Reset() + return &Writer{ + w: w, + t: t, + dst: make([]byte, defaultBufSize), + src: make([]byte, defaultBufSize), + } +} + +// Write implements the io.Writer interface. If there are not enough +// bytes available to complete a Transform, the bytes will be buffered +// for the next write. Call Close to convert the remaining bytes. +func (w *Writer) Write(data []byte) (n int, err error) { + src := data + if w.n > 0 { + // Append bytes from data to the last remainder. + // TODO: limit the amount copied on first try. + n = copy(w.src[w.n:], data) + w.n += n + src = w.src[:w.n] + } + for { + nDst, nSrc, err := w.t.Transform(w.dst, src, false) + if _, werr := w.w.Write(w.dst[:nDst]); werr != nil { + return n, werr + } + src = src[nSrc:] + if w.n == 0 { + n += nSrc + } else if len(src) <= n { + // Enough bytes from w.src have been consumed. We make src point + // to data instead to reduce the copying. + w.n = 0 + n -= len(src) + src = data[n:] + if n < len(data) && (err == nil || err == ErrShortSrc) { + continue + } + } + switch err { + case ErrShortDst: + // This error is okay as long as we are making progress. + if nDst > 0 || nSrc > 0 { + continue + } + case ErrShortSrc: + if len(src) < len(w.src) { + m := copy(w.src, src) + // If w.n > 0, bytes from data were already copied to w.src and n + // was already set to the number of bytes consumed. + if w.n == 0 { + n += m + } + w.n = m + err = nil + } else if nDst > 0 || nSrc > 0 { + // Not enough buffer to store the remainder. Keep processing as + // long as there is progress. Without this case, transforms that + // require a lookahead larger than the buffer may result in an + // error. This is not something one may expect to be common in + // practice, but it may occur when buffers are set to small + // sizes during testing. + continue + } + case nil: + if w.n > 0 { + err = errInconsistentByteCount + } + } + return n, err + } +} + +// Close implements the io.Closer interface. +func (w *Writer) Close() error { + src := w.src[:w.n] + for { + nDst, nSrc, err := w.t.Transform(w.dst, src, true) + if _, werr := w.w.Write(w.dst[:nDst]); werr != nil { + return werr + } + if err != ErrShortDst { + return err + } + src = src[nSrc:] + } +} + +type nop struct{ NopResetter } + +func (nop) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + n := copy(dst, src) + if n < len(src) { + err = ErrShortDst + } + return n, n, err +} + +func (nop) Span(src []byte, atEOF bool) (n int, err error) { + return len(src), nil +} + +type discard struct{ NopResetter } + +func (discard) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + return 0, len(src), nil +} + +var ( + // Discard is a Transformer for which all Transform calls succeed + // by consuming all bytes and writing nothing. + Discard Transformer = discard{} + + // Nop is a SpanningTransformer that copies src to dst. + Nop SpanningTransformer = nop{} +) + +// chain is a sequence of links. A chain with N Transformers has N+1 links and +// N+1 buffers. Of those N+1 buffers, the first and last are the src and dst +// buffers given to chain.Transform and the middle N-1 buffers are intermediate +// buffers owned by the chain. The i'th link transforms bytes from the i'th +// buffer chain.link[i].b at read offset chain.link[i].p to the i+1'th buffer +// chain.link[i+1].b at write offset chain.link[i+1].n, for i in [0, N). +type chain struct { + link []link + err error + // errStart is the index at which the error occurred plus 1. Processing + // errStart at this level at the next call to Transform. As long as + // errStart > 0, chain will not consume any more source bytes. + errStart int +} + +func (c *chain) fatalError(errIndex int, err error) { + if i := errIndex + 1; i > c.errStart { + c.errStart = i + c.err = err + } +} + +type link struct { + t Transformer + // b[p:n] holds the bytes to be transformed by t. + b []byte + p int + n int +} + +func (l *link) src() []byte { + return l.b[l.p:l.n] +} + +func (l *link) dst() []byte { + return l.b[l.n:] +} + +// Chain returns a Transformer that applies t in sequence. +func Chain(t ...Transformer) Transformer { + if len(t) == 0 { + return nop{} + } + c := &chain{link: make([]link, len(t)+1)} + for i, tt := range t { + c.link[i].t = tt + } + // Allocate intermediate buffers. + b := make([][defaultBufSize]byte, len(t)-1) + for i := range b { + c.link[i+1].b = b[i][:] + } + return c +} + +// Reset resets the state of Chain. It calls Reset on all the Transformers. +func (c *chain) Reset() { + for i, l := range c.link { + if l.t != nil { + l.t.Reset() + } + c.link[i].p, c.link[i].n = 0, 0 + } +} + +// TODO: make chain use Span (is going to be fun to implement!) + +// Transform applies the transformers of c in sequence. +func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + // Set up src and dst in the chain. + srcL := &c.link[0] + dstL := &c.link[len(c.link)-1] + srcL.b, srcL.p, srcL.n = src, 0, len(src) + dstL.b, dstL.n = dst, 0 + var lastFull, needProgress bool // for detecting progress + + // i is the index of the next Transformer to apply, for i in [low, high]. + // low is the lowest index for which c.link[low] may still produce bytes. + // high is the highest index for which c.link[high] has a Transformer. + // The error returned by Transform determines whether to increase or + // decrease i. We try to completely fill a buffer before converting it. + for low, i, high := c.errStart, c.errStart, len(c.link)-2; low <= i && i <= high; { + in, out := &c.link[i], &c.link[i+1] + nDst, nSrc, err0 := in.t.Transform(out.dst(), in.src(), atEOF && low == i) + out.n += nDst + in.p += nSrc + if i > 0 && in.p == in.n { + in.p, in.n = 0, 0 + } + needProgress, lastFull = lastFull, false + switch err0 { + case ErrShortDst: + // Process the destination buffer next. Return if we are already + // at the high index. + if i == high { + return dstL.n, srcL.p, ErrShortDst + } + if out.n != 0 { + i++ + // If the Transformer at the next index is not able to process any + // source bytes there is nothing that can be done to make progress + // and the bytes will remain unprocessed. lastFull is used to + // detect this and break out of the loop with a fatal error. + lastFull = true + continue + } + // The destination buffer was too small, but is completely empty. + // Return a fatal error as this transformation can never complete. + c.fatalError(i, errShortInternal) + case ErrShortSrc: + if i == 0 { + // Save ErrShortSrc in err. All other errors take precedence. + err = ErrShortSrc + break + } + // Source bytes were depleted before filling up the destination buffer. + // Verify we made some progress, move the remaining bytes to the errStart + // and try to get more source bytes. + if needProgress && nSrc == 0 || in.n-in.p == len(in.b) { + // There were not enough source bytes to proceed while the source + // buffer cannot hold any more bytes. Return a fatal error as this + // transformation can never complete. + c.fatalError(i, errShortInternal) + break + } + // in.b is an internal buffer and we can make progress. + in.p, in.n = 0, copy(in.b, in.src()) + fallthrough + case nil: + // if i == low, we have depleted the bytes at index i or any lower levels. + // In that case we increase low and i. In all other cases we decrease i to + // fetch more bytes before proceeding to the next index. + if i > low { + i-- + continue + } + default: + c.fatalError(i, err0) + } + // Exhausted level low or fatal error: increase low and continue + // to process the bytes accepted so far. + i++ + low = i + } + + // If c.errStart > 0, this means we found a fatal error. We will clear + // all upstream buffers. At this point, no more progress can be made + // downstream, as Transform would have bailed while handling ErrShortDst. + if c.errStart > 0 { + for i := 1; i < c.errStart; i++ { + c.link[i].p, c.link[i].n = 0, 0 + } + err, c.errStart, c.err = c.err, 0, nil + } + return dstL.n, srcL.p, err +} + +// Deprecated: use runes.Remove instead. +func RemoveFunc(f func(r rune) bool) Transformer { + return removeF(f) +} + +type removeF func(r rune) bool + +func (removeF) Reset() {} + +// Transform implements the Transformer interface. +func (t removeF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + for r, sz := rune(0), 0; len(src) > 0; src = src[sz:] { + + if r = rune(src[0]); r < utf8.RuneSelf { + sz = 1 + } else { + r, sz = utf8.DecodeRune(src) + + if sz == 1 { + // Invalid rune. + if !atEOF && !utf8.FullRune(src) { + err = ErrShortSrc + break + } + // We replace illegal bytes with RuneError. Not doing so might + // otherwise turn a sequence of invalid UTF-8 into valid UTF-8. + // The resulting byte sequence may subsequently contain runes + // for which t(r) is true that were passed unnoticed. + if !t(r) { + if nDst+3 > len(dst) { + err = ErrShortDst + break + } + nDst += copy(dst[nDst:], "\uFFFD") + } + nSrc++ + continue + } + } + + if !t(r) { + if nDst+sz > len(dst) { + err = ErrShortDst + break + } + nDst += copy(dst[nDst:], src[:sz]) + } + nSrc += sz + } + return +} + +// grow returns a new []byte that is longer than b, and copies the first n bytes +// of b to the start of the new slice. +func grow(b []byte, n int) []byte { + m := len(b) + if m <= 32 { + m = 64 + } else if m <= 256 { + m *= 2 + } else { + m += m >> 1 + } + buf := make([]byte, m) + copy(buf, b[:n]) + return buf +} + +const initialBufSize = 128 + +// String returns a string with the result of converting s[:n] using t, where +// n <= len(s). If err == nil, n will be len(s). It calls Reset on t. +func String(t Transformer, s string) (result string, n int, err error) { + t.Reset() + if s == "" { + // Fast path for the common case for empty input. Results in about a + // 86% reduction of running time for BenchmarkStringLowerEmpty. + if _, _, err := t.Transform(nil, nil, true); err == nil { + return "", 0, nil + } + } + + // Allocate only once. Note that both dst and src escape when passed to + // Transform. + buf := [2 * initialBufSize]byte{} + dst := buf[:initialBufSize:initialBufSize] + src := buf[initialBufSize : 2*initialBufSize] + + // The input string s is transformed in multiple chunks (starting with a + // chunk size of initialBufSize). nDst and nSrc are per-chunk (or + // per-Transform-call) indexes, pDst and pSrc are overall indexes. + nDst, nSrc := 0, 0 + pDst, pSrc := 0, 0 + + // pPrefix is the length of a common prefix: the first pPrefix bytes of the + // result will equal the first pPrefix bytes of s. It is not guaranteed to + // be the largest such value, but if pPrefix, len(result) and len(s) are + // all equal after the final transform (i.e. calling Transform with atEOF + // being true returned nil error) then we don't need to allocate a new + // result string. + pPrefix := 0 + for { + // Invariant: pDst == pPrefix && pSrc == pPrefix. + + n := copy(src, s[pSrc:]) + nDst, nSrc, err = t.Transform(dst, src[:n], pSrc+n == len(s)) + pDst += nDst + pSrc += nSrc + + // TODO: let transformers implement an optional Spanner interface, akin + // to norm's QuickSpan. This would even allow us to avoid any allocation. + if !bytes.Equal(dst[:nDst], src[:nSrc]) { + break + } + pPrefix = pSrc + if err == ErrShortDst { + // A buffer can only be short if a transformer modifies its input. + break + } else if err == ErrShortSrc { + if nSrc == 0 { + // No progress was made. + break + } + // Equal so far and !atEOF, so continue checking. + } else if err != nil || pPrefix == len(s) { + return string(s[:pPrefix]), pPrefix, err + } + } + // Post-condition: pDst == pPrefix + nDst && pSrc == pPrefix + nSrc. + + // We have transformed the first pSrc bytes of the input s to become pDst + // transformed bytes. Those transformed bytes are discontiguous: the first + // pPrefix of them equal s[:pPrefix] and the last nDst of them equal + // dst[:nDst]. We copy them around, into a new dst buffer if necessary, so + // that they become one contiguous slice: dst[:pDst]. + if pPrefix != 0 { + newDst := dst + if pDst > len(newDst) { + newDst = make([]byte, len(s)+nDst-nSrc) + } + copy(newDst[pPrefix:pDst], dst[:nDst]) + copy(newDst[:pPrefix], s[:pPrefix]) + dst = newDst + } + + // Prevent duplicate Transform calls with atEOF being true at the end of + // the input. Also return if we have an unrecoverable error. + if (err == nil && pSrc == len(s)) || + (err != nil && err != ErrShortDst && err != ErrShortSrc) { + return string(dst[:pDst]), pSrc, err + } + + // Transform the remaining input, growing dst and src buffers as necessary. + for { + n := copy(src, s[pSrc:]) + nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], pSrc+n == len(s)) + pDst += nDst + pSrc += nSrc + + // If we got ErrShortDst or ErrShortSrc, do not grow as long as we can + // make progress. This may avoid excessive allocations. + if err == ErrShortDst { + if nDst == 0 { + dst = grow(dst, pDst) + } + } else if err == ErrShortSrc { + if nSrc == 0 { + src = grow(src, 0) + } + } else if err != nil || pSrc == len(s) { + return string(dst[:pDst]), pSrc, err + } + } +} + +// Bytes returns a new byte slice with the result of converting b[:n] using t, +// where n <= len(b). If err == nil, n will be len(b). It calls Reset on t. +func Bytes(t Transformer, b []byte) (result []byte, n int, err error) { + return doAppend(t, 0, make([]byte, len(b)), b) +} + +// Append appends the result of converting src[:n] using t to dst, where +// n <= len(src), If err == nil, n will be len(src). It calls Reset on t. +func Append(t Transformer, dst, src []byte) (result []byte, n int, err error) { + if len(dst) == cap(dst) { + n := len(src) + len(dst) // It is okay for this to be 0. + b := make([]byte, n) + dst = b[:copy(b, dst)] + } + return doAppend(t, len(dst), dst[:cap(dst)], src) +} + +func doAppend(t Transformer, pDst int, dst, src []byte) (result []byte, n int, err error) { + t.Reset() + pSrc := 0 + for { + nDst, nSrc, err := t.Transform(dst[pDst:], src[pSrc:], true) + pDst += nDst + pSrc += nSrc + if err != ErrShortDst { + return dst[:pDst], pSrc, err + } + + // Grow the destination buffer, but do not grow as long as we can make + // progress. This may avoid excessive allocations. + if nDst == 0 { + dst = grow(dst, pDst) + } + } +} diff --git a/vendor/golang.org/x/text/unicode/cldr/base.go b/vendor/golang.org/x/text/unicode/cldr/base.go new file mode 100644 index 00000000..63cdc16c --- /dev/null +++ b/vendor/golang.org/x/text/unicode/cldr/base.go @@ -0,0 +1,105 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cldr + +import ( + "encoding/xml" + "regexp" + "strconv" +) + +// Elem is implemented by every XML element. +type Elem interface { + setEnclosing(Elem) + setName(string) + enclosing() Elem + + GetCommon() *Common +} + +type hidden struct { + CharData string `xml:",chardata"` + Alias *struct { + Common + Source string `xml:"source,attr"` + Path string `xml:"path,attr"` + } `xml:"alias"` + Def *struct { + Common + Choice string `xml:"choice,attr,omitempty"` + Type string `xml:"type,attr,omitempty"` + } `xml:"default"` +} + +// Common holds several of the most common attributes and sub elements +// of an XML element. +type Common struct { + XMLName xml.Name + name string + enclElem Elem + Type string `xml:"type,attr,omitempty"` + Reference string `xml:"reference,attr,omitempty"` + Alt string `xml:"alt,attr,omitempty"` + ValidSubLocales string `xml:"validSubLocales,attr,omitempty"` + Draft string `xml:"draft,attr,omitempty"` + hidden +} + +// Default returns the default type to select from the enclosed list +// or "" if no default value is specified. +func (e *Common) Default() string { + if e.Def == nil { + return "" + } + if e.Def.Choice != "" { + return e.Def.Choice + } else if e.Def.Type != "" { + // Type is still used by the default element in collation. + return e.Def.Type + } + return "" +} + +// Element returns the XML element name. +func (e *Common) Element() string { + return e.name +} + +// GetCommon returns e. It is provided such that Common implements Elem. +func (e *Common) GetCommon() *Common { + return e +} + +// Data returns the character data accumulated for this element. +func (e *Common) Data() string { + e.CharData = charRe.ReplaceAllStringFunc(e.CharData, replaceUnicode) + return e.CharData +} + +func (e *Common) setName(s string) { + e.name = s +} + +func (e *Common) enclosing() Elem { + return e.enclElem +} + +func (e *Common) setEnclosing(en Elem) { + e.enclElem = en +} + +// Escape characters that can be escaped without further escaping the string. +var charRe = regexp.MustCompile(`&#x[0-9a-fA-F]*;|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\[abtnvfr]`) + +// replaceUnicode converts hexadecimal Unicode codepoint notations to a one-rune string. +// It assumes the input string is correctly formatted. +func replaceUnicode(s string) string { + if s[1] == '#' { + r, _ := strconv.ParseInt(s[3:len(s)-1], 16, 32) + return string(r) + } + r, _, _, _ := strconv.UnquoteChar(s, 0) + return string(r) +} diff --git a/vendor/golang.org/x/text/unicode/cldr/cldr.go b/vendor/golang.org/x/text/unicode/cldr/cldr.go new file mode 100644 index 00000000..2197f8ac --- /dev/null +++ b/vendor/golang.org/x/text/unicode/cldr/cldr.go @@ -0,0 +1,130 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run makexml.go -output xml.go + +// Package cldr provides a parser for LDML and related XML formats. +// This package is intended to be used by the table generation tools +// for the various internationalization-related packages. +// As the XML types are generated from the CLDR DTD, and as the CLDR standard +// is periodically amended, this package may change considerably over time. +// This mostly means that data may appear and disappear between versions. +// That is, old code should keep compiling for newer versions, but data +// may have moved or changed. +// CLDR version 22 is the first version supported by this package. +// Older versions may not work. +package cldr // import "golang.org/x/text/unicode/cldr" + +import ( + "fmt" + "sort" +) + +// CLDR provides access to parsed data of the Unicode Common Locale Data Repository. +type CLDR struct { + parent map[string][]string + locale map[string]*LDML + resolved map[string]*LDML + bcp47 *LDMLBCP47 + supp *SupplementalData +} + +func makeCLDR() *CLDR { + return &CLDR{ + parent: make(map[string][]string), + locale: make(map[string]*LDML), + resolved: make(map[string]*LDML), + bcp47: &LDMLBCP47{}, + supp: &SupplementalData{}, + } +} + +// BCP47 returns the parsed BCP47 LDML data. If no such data was parsed, nil is returned. +func (cldr *CLDR) BCP47() *LDMLBCP47 { + return nil +} + +// Draft indicates the draft level of an element. +type Draft int + +const ( + Approved Draft = iota + Contributed + Provisional + Unconfirmed +) + +var drafts = []string{"unconfirmed", "provisional", "contributed", "approved", ""} + +// ParseDraft returns the Draft value corresponding to the given string. The +// empty string corresponds to Approved. +func ParseDraft(level string) (Draft, error) { + if level == "" { + return Approved, nil + } + for i, s := range drafts { + if level == s { + return Unconfirmed - Draft(i), nil + } + } + return Approved, fmt.Errorf("cldr: unknown draft level %q", level) +} + +func (d Draft) String() string { + return drafts[len(drafts)-1-int(d)] +} + +// SetDraftLevel sets which draft levels to include in the evaluated LDML. +// Any draft element for which the draft level is higher than lev will be excluded. +// If multiple draft levels are available for a single element, the one with the +// lowest draft level will be selected, unless preferDraft is true, in which case +// the highest draft will be chosen. +// It is assumed that the underlying LDML is canonicalized. +func (cldr *CLDR) SetDraftLevel(lev Draft, preferDraft bool) { + // TODO: implement + cldr.resolved = make(map[string]*LDML) +} + +// RawLDML returns the LDML XML for id in unresolved form. +// id must be one of the strings returned by Locales. +func (cldr *CLDR) RawLDML(loc string) *LDML { + return cldr.locale[loc] +} + +// LDML returns the fully resolved LDML XML for loc, which must be one of +// the strings returned by Locales. +func (cldr *CLDR) LDML(loc string) (*LDML, error) { + return cldr.resolve(loc) +} + +// Supplemental returns the parsed supplemental data. If no such data was parsed, +// nil is returned. +func (cldr *CLDR) Supplemental() *SupplementalData { + return cldr.supp +} + +// Locales returns the locales for which there exist files. +// Valid sublocales for which there is no file are not included. +// The root locale is always sorted first. +func (cldr *CLDR) Locales() []string { + loc := []string{"root"} + hasRoot := false + for l, _ := range cldr.locale { + if l == "root" { + hasRoot = true + continue + } + loc = append(loc, l) + } + sort.Strings(loc[1:]) + if !hasRoot { + return loc[1:] + } + return loc +} + +// Get fills in the fields of x based on the XPath path. +func Get(e Elem, path string) (res Elem, err error) { + return walkXPath(e, path) +} diff --git a/vendor/golang.org/x/text/unicode/cldr/collate.go b/vendor/golang.org/x/text/unicode/cldr/collate.go new file mode 100644 index 00000000..80ee28d7 --- /dev/null +++ b/vendor/golang.org/x/text/unicode/cldr/collate.go @@ -0,0 +1,359 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cldr + +import ( + "bufio" + "encoding/xml" + "errors" + "fmt" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +// RuleProcessor can be passed to Collator's Process method, which +// parses the rules and calls the respective method for each rule found. +type RuleProcessor interface { + Reset(anchor string, before int) error + Insert(level int, str, context, extend string) error + Index(id string) +} + +const ( + // cldrIndex is a Unicode-reserved sentinel value used to mark the start + // of a grouping within an index. + // We ignore any rule that starts with this rune. + // See http://unicode.org/reports/tr35/#Collation_Elements for details. + cldrIndex = "\uFDD0" + + // specialAnchor is the format in which to represent logical reset positions, + // such as "first tertiary ignorable". + specialAnchor = "<%s/>" +) + +// Process parses the rules for the tailorings of this collation +// and calls the respective methods of p for each rule found. +func (c Collation) Process(p RuleProcessor) (err error) { + if len(c.Cr) > 0 { + if len(c.Cr) > 1 { + return fmt.Errorf("multiple cr elements, want 0 or 1") + } + return processRules(p, c.Cr[0].Data()) + } + if c.Rules.Any != nil { + return c.processXML(p) + } + return errors.New("no tailoring data") +} + +// processRules parses rules in the Collation Rule Syntax defined in +// http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Tailorings. +func processRules(p RuleProcessor, s string) (err error) { + chk := func(s string, e error) string { + if err == nil { + err = e + } + return s + } + i := 0 // Save the line number for use after the loop. + scanner := bufio.NewScanner(strings.NewReader(s)) + for ; scanner.Scan() && err == nil; i++ { + for s := skipSpace(scanner.Text()); s != "" && s[0] != '#'; s = skipSpace(s) { + level := 5 + var ch byte + switch ch, s = s[0], s[1:]; ch { + case '&': // followed by or '[' ']' + if s = skipSpace(s); consume(&s, '[') { + s = chk(parseSpecialAnchor(p, s)) + } else { + s = chk(parseAnchor(p, 0, s)) + } + case '<': // sort relation '<'{1,4}, optionally followed by '*'. + for level = 1; consume(&s, '<'); level++ { + } + if level > 4 { + err = fmt.Errorf("level %d > 4", level) + } + fallthrough + case '=': // identity relation, optionally followed by *. + if consume(&s, '*') { + s = chk(parseSequence(p, level, s)) + } else { + s = chk(parseOrder(p, level, s)) + } + default: + chk("", fmt.Errorf("illegal operator %q", ch)) + break + } + } + } + if chk("", scanner.Err()); err != nil { + return fmt.Errorf("%d: %v", i, err) + } + return nil +} + +// parseSpecialAnchor parses the anchor syntax which is either of the form +// ['before' ] +// or +// [