diff --git a/Gopkg.lock b/Gopkg.lock index 0b1a1744..0b9f9f58 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -72,7 +72,7 @@ "unused", "version" ] - revision = "9a1f066598f8e2c1a58fab3249341c8d808b70ad" + revision = "38c5f4a0efc6bd0efc88bdd3890e5e64e698bdc2" [[projects]] branch = "master" @@ -142,6 +142,20 @@ revision = "0360b2af4f38e8d38c7fce2a9f4e702702d73a39" version = "v0.0.3" +[[projects]] + branch = "master" + name = "github.com/mibk/dupl" + packages = [ + ".", + "job", + "printer", + "suffixtree", + "syntax", + "syntax/golang" + ] + revision = "53b9af5a45362a6f2896cfa39cc17d17ba9667ea" + source = "github.com/golangci/dupl" + [[projects]] name = "github.com/pmezard/go-difflib" packages = ["difflib"] @@ -238,6 +252,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "c267ef4b3c1b5396f8d60efc4d262a5f563d9542ea535ab6f1402141ceb4aff0" + inputs-digest = "6ded01ff91bda8204ee2c22da5f6665af28fc5e9551a8b3782ff65582f08d738" solver-name = "gps-cdcl" solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml index 5576086e..53fc9e9a 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -29,10 +29,6 @@ name = "github.com/bradleyfalzon/revgrep" version = "0.3.0" -[[constraint]] - name = "github.com/golang/mock" - version = "1.1.1" - [[constraint]] branch = "master" name = "github.com/golangci/golangci-shared" @@ -54,6 +50,11 @@ name = "github.com/golangci/govet" branch = "master" +[[constraint]] + name = "github.com/mibk/dupl" + branch = "master" + source = "github.com/golangci/dupl" + [prune] go-tests = true unused-packages = true diff --git a/internal/commands/run.go b/internal/commands/run.go index 1b946e11..508f2a51 100644 --- a/internal/commands/run.go +++ b/internal/commands/run.go @@ -60,6 +60,8 @@ func (e *Executor) initRun() { runCmd.Flags().BoolVar(&rc.Megacheck.EnableStaticcheck, "megacheck.staticcheck", true, "Megacheck: run Staticcheck sub-linter: staticcheck is go vet on steroids, applying a ton of static analysis checks") runCmd.Flags().BoolVar(&rc.Megacheck.EnableGosimple, "megacheck.gosimple", true, "Megacheck: run Gosimple sub-linter: gosimple is a linter for Go source code that specialises on simplifying code") runCmd.Flags().BoolVar(&rc.Megacheck.EnableUnused, "megacheck.unused", true, "Megacheck: run Unused sub-linter: unused checks Go code for unused constants, variables, functions and types") + runCmd.Flags().IntVar(&rc.Dupl.Threshold, "dupl.threshold", + 20, "Minimal threshold to detect copy-paste") runCmd.Flags().StringSliceVarP(&rc.EnabledLinters, "enable", "E", []string{}, "Enable specific linter") runCmd.Flags().StringSliceVarP(&rc.DisabledLinters, "disable", "D", []string{}, "Disable specific linter") @@ -95,7 +97,8 @@ func loadWholeAppIfNeeded(ctx context.Context, linters []pkg.Linter, cfg *config bctx := build.Default bctx.BuildTags = append(bctx.BuildTags, cfg.BuildTags...) loadcfg := &loader.Config{ - Build: &bctx, + Build: &bctx, + AllowErrors: true, // Try to analyze event partially } const needTests = true // TODO: configure and take into account in paths resolver rest, err := loadcfg.FromArgs(paths.MixedPaths(), needTests) diff --git a/pkg/config/config.go b/pkg/config/config.go index 13a7c611..70736278 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -61,6 +61,9 @@ type Run struct { EnableUnused bool EnableGosimple bool } + Dupl struct { + Threshold int + } EnabledLinters []string DisabledLinters []string diff --git a/pkg/enabled_linters.go b/pkg/enabled_linters.go index 3cc624bc..08547f11 100644 --- a/pkg/enabled_linters.go +++ b/pkg/enabled_linters.go @@ -64,6 +64,7 @@ func GetAllSupportedLinterConfigs() []LinterConfig { enabledByDefault(golinters.Structcheck{}, "Finds unused struct fields", true), enabledByDefault(golinters.Varcheck{}, "Finds unused global variables and constants", true), enabledByDefault(golinters.Megacheck{}, "Megacheck: 3 sub-linters in one: staticcheck, gosimple and unused", true), + enabledByDefault(golinters.Dupl{}, "Tool for code clone detection", false), disabledByDefault(golinters.Gofmt{}, "Gofmt checks whether code was gofmt-ed. By default this tool runs with -s option to check for code simplification", false), disabledByDefault(golinters.Gofmt{UseGoimports: true}, "Goimports does everything that gofmt does. Additionally it checks unused imports", false), diff --git a/pkg/golinters/dupl.go b/pkg/golinters/dupl.go new file mode 100644 index 00000000..c391bdee --- /dev/null +++ b/pkg/golinters/dupl.go @@ -0,0 +1,37 @@ +package golinters + +import ( + "context" + "fmt" + + "github.com/golangci/golangci-lint/pkg/result" + duplAPI "github.com/mibk/dupl" +) + +type Dupl struct{} + +func (Dupl) Name() string { + return "dupl" +} + +func (d Dupl) Run(ctx context.Context, lintCtx *Context) (*result.Result, error) { + issues, err := duplAPI.Run(lintCtx.Paths.Files, lintCtx.RunCfg().Dupl.Threshold) + if err != nil { + return nil, err + } + + res := &result.Result{} + for _, i := range issues { + dupl := fmt.Sprintf("%s:%d-%d", i.To.Filename(), i.To.LineStart(), i.To.LineEnd()) + text := fmt.Sprintf("%d-%d lines are duplicate of %s", + i.From.LineStart(), i.From.LineEnd(), + formatCode(dupl, lintCtx.RunCfg())) + res.Issues = append(res.Issues, result.Issue{ + File: i.From.Filename(), + LineNumber: i.From.LineStart(), + Text: text, + FromLinter: d.Name(), + }) + } + return res, nil +} diff --git a/pkg/golinters/testdata/dupl.go b/pkg/golinters/testdata/dupl.go new file mode 100644 index 00000000..9fe413ee --- /dev/null +++ b/pkg/golinters/testdata/dupl.go @@ -0,0 +1,32 @@ +package testdata + +type DuplLogger struct{} + +func (DuplLogger) level() int { + return 1 +} + +func (DuplLogger) Debug(args ...interface{}) {} +func (DuplLogger) Info(args ...interface{}) {} + +func (logger *DuplLogger) First(args ...interface{}) { // ERROR "12-21 lines are duplicate of `testdata/dupl.go:23-32`" + if logger.level() >= 0 { + logger.Debug(args...) + logger.Debug(args...) + logger.Debug(args...) + logger.Debug(args...) + logger.Debug(args...) + logger.Debug(args...) + } +} + +func (logger *DuplLogger) Second(args ...interface{}) { // ERROR "23-32 lines are duplicate of `testdata/dupl.go:12-21`" + if logger.level() >= 1 { + logger.Info(args...) + logger.Info(args...) + logger.Info(args...) + logger.Info(args...) + logger.Info(args...) + logger.Info(args...) + } +} diff --git a/pkg/golinters/testdata/gocyclo.go b/pkg/golinters/testdata/gocyclo.go index e0775031..b1a9983b 100644 --- a/pkg/golinters/testdata/gocyclo.go +++ b/pkg/golinters/testdata/gocyclo.go @@ -1,15 +1,15 @@ package testdata func GocycloBigComplexity(s string) { // ERROR "cyclomatic complexity .* of func .* is high .*" - if s == "1" || s == "2" || s == "3" || s == "4" || s == "5" || s == "6" || s == "7" { + if s == "1" || s == "2" || s == "3" || s == "4" || s == "5" || s == "6" || s == "7" { // nolint:dupl return } - if s == "1" || s == "2" || s == "3" || s == "4" || s == "5" || s == "6" || s == "7" { + if s == "1" || s == "2" || s == "3" || s == "4" || s == "5" || s == "6" || s == "7" { // nolint:dupl return } - if s == "1" || s == "2" || s == "3" || s == "4" || s == "5" || s == "6" || s == "7" { + if s == "1" || s == "2" || s == "3" || s == "4" || s == "5" || s == "6" || s == "7" { // nolint:dupl return } } diff --git a/pkg/golinters/testdata/gofmt.go b/pkg/golinters/testdata/gofmt.go index 48d605c5..c0370875 100644 --- a/pkg/golinters/testdata/gofmt.go +++ b/pkg/golinters/testdata/gofmt.go @@ -4,5 +4,5 @@ import "fmt" func GofmtNotSimplified() { var x []string - fmt.Print(x[1:len(x)]) // nolint:megacheck // ERROR "File is not gofmt-ed with -s" + fmt.Print(x[1:len(x)]) // nolint:megacheck // ERROR "File is not gofmt-ed with -s" } diff --git a/pkg/runner.go b/pkg/runner.go index 14a4c985..6d6b3130 100644 --- a/pkg/runner.go +++ b/pkg/runner.go @@ -126,15 +126,16 @@ func (r SimpleRunner) processResults(ctx context.Context, results []result.Resul } for _, p := range r.Processors { - var err error startedAt := time.Now() - results, err = p.Process(results) + newResults, err := p.Process(results) elapsed := time.Since(startedAt) if elapsed > 50*time.Millisecond { analytics.Log(ctx).Infof("Result processor %s took %s", p.Name(), elapsed) } if err != nil { - return nil, err + analytics.Log(ctx).Warnf("Can't process result by %s processor: %s", p.Name(), err) + } else { + results = newResults } } diff --git a/vendor/github.com/golangci/go-tools/lint/lint.go b/vendor/github.com/golangci/go-tools/lint/lint.go index 6badd4dc..aad75b84 100644 --- a/vendor/github.com/golangci/go-tools/lint/lint.go +++ b/vendor/github.com/golangci/go-tools/lint/lint.go @@ -23,10 +23,10 @@ import ( "sync" "unicode" - "golang.org/x/tools/go/ast/astutil" - "golang.org/x/tools/go/loader" "github.com/golangci/go-tools/ssa" "github.com/golangci/go-tools/ssa/ssautil" + "golang.org/x/tools/go/ast/astutil" + "golang.org/x/tools/go/loader" ) type Job struct { @@ -836,6 +836,9 @@ func (v *fnVisitor) Visit(node ast.Node) ast.Visitor { switch node := node.(type) { case *ast.FuncDecl: var ssafn *ssa.Function + if v.pkg == nil || v.pkg.Prog == nil { + return nil // partially loaded + } ssafn = v.pkg.Prog.FuncValue(v.pkg.Info.ObjectOf(node.Name).(*types.Func)) v.m[node] = ssafn if ssafn == nil { diff --git a/vendor/github.com/mibk/dupl/.travis.yml b/vendor/github.com/mibk/dupl/.travis.yml new file mode 100644 index 00000000..33de24c0 --- /dev/null +++ b/vendor/github.com/mibk/dupl/.travis.yml @@ -0,0 +1,5 @@ +language: go +go: + - 1.3 + - 1.8 + - 1.9 diff --git a/vendor/github.com/mibk/dupl/LICENSE b/vendor/github.com/mibk/dupl/LICENSE new file mode 100644 index 00000000..ab317d84 --- /dev/null +++ b/vendor/github.com/mibk/dupl/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Michal Bohuslávek + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/mibk/dupl/README.md b/vendor/github.com/mibk/dupl/README.md new file mode 100644 index 00000000..4267fe23 --- /dev/null +++ b/vendor/github.com/mibk/dupl/README.md @@ -0,0 +1,63 @@ +# dupl [](https://travis-ci.org/mibk/dupl) + +**dupl** is a tool written in Go for finding code clones. So far it can find clones only +in the Go source files. The method uses suffix tree for serialized ASTs. It ignores values +of AST nodes. It just operates with their types (e.g. `if a == 13 {}` and `if x == 100 {}` are +considered the same provided it exceeds the minimal token sequence size). + +Due to the used method dupl can report so called "false positives" on the output. These are +the ones we do not consider clones (whether they are too small, or the values of the matched +tokens are completely different). + +## Installation + +```bash +go get -u github.com/mibk/dupl +``` + +## Usage + +``` +Usage of dupl: + dupl [flags] [paths] + +Paths: + If the given path is a file, dupl will use it regardless of + the file extension. If it is a directory it will recursively + search for *.go files in that directory. + + If no path is given dupl will recursively search for *.go + files in the current directory. + +Flags: + -files + read file names from stdin one at each line + -html + output the results as HTML, including duplicate code fragments + -plumbing + plumbing (easy-to-parse) output for consumption by scripts or tools + -t, -threshold size + minimum token sequence size as a clone (default 15) + -vendor + check files in vendor directory + -v, -verbose + explain what is being done + +Examples: + dupl -t 100 + Search clones in the current directory of size at least + 100 tokens. + dupl $(find app/ -name '*_test.go') + Search for clones in tests in the app directory. + find app/ -name '*_test.go' |dupl -files + The same as above. +``` + +## Example + +The reduced output of this command with the following parameters for the [Docker](https://www.docker.com) source code +looks like [this](http://htmlpreview.github.io/?https://github.com/mibk/dupl/blob/master/_output_example/docker.html). + +```bash +$ dupl -t 200 -html >docker.html +``` diff --git a/vendor/github.com/mibk/dupl/job/buildtree.go b/vendor/github.com/mibk/dupl/job/buildtree.go new file mode 100644 index 00000000..2a18e950 --- /dev/null +++ b/vendor/github.com/mibk/dupl/job/buildtree.go @@ -0,0 +1,22 @@ +package job + +import ( + "github.com/mibk/dupl/suffixtree" + "github.com/mibk/dupl/syntax" +) + +func BuildTree(schan chan []*syntax.Node) (t *suffixtree.STree, d *[]*syntax.Node, done chan bool) { + t = suffixtree.New() + data := make([]*syntax.Node, 0, 100) + done = make(chan bool) + go func() { + for seq := range schan { + data = append(data, seq...) + for _, node := range seq { + t.Update(node) + } + } + done <- true + }() + return t, &data, done +} diff --git a/vendor/github.com/mibk/dupl/job/parse.go b/vendor/github.com/mibk/dupl/job/parse.go new file mode 100644 index 00000000..ca14e82a --- /dev/null +++ b/vendor/github.com/mibk/dupl/job/parse.go @@ -0,0 +1,36 @@ +package job + +import ( + "log" + + "github.com/mibk/dupl/syntax" + "github.com/mibk/dupl/syntax/golang" +) + +func Parse(fchan chan string) chan []*syntax.Node { + + // parse AST + achan := make(chan *syntax.Node) + go func() { + for file := range fchan { + ast, err := golang.Parse(file) + if err != nil { + log.Println(err) + continue + } + achan <- ast + } + close(achan) + }() + + // serialize + schan := make(chan []*syntax.Node) + go func() { + for ast := range achan { + seq := syntax.Serialize(ast) + schan <- seq + } + close(schan) + }() + return schan +} diff --git a/vendor/github.com/mibk/dupl/main.go b/vendor/github.com/mibk/dupl/main.go new file mode 100644 index 00000000..02750c36 --- /dev/null +++ b/vendor/github.com/mibk/dupl/main.go @@ -0,0 +1,148 @@ +package dupl + +import ( + "flag" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "sort" + + "github.com/mibk/dupl/job" + "github.com/mibk/dupl/printer" + "github.com/mibk/dupl/syntax" +) + +const defaultThreshold = 15 + +var ( + paths = []string{"."} + vendor = flag.Bool("dupl.vendor", false, "") + verbose = flag.Bool("dupl.verbose", false, "") + files = flag.Bool("dupl.files", false, "") + + html = flag.Bool("dupl.html", false, "") + plumbing = flag.Bool("dupl.plumbing", false, "") +) + +const ( + vendorDirPrefix = "vendor" + string(filepath.Separator) + vendorDirInPath = string(filepath.Separator) + vendorDirPrefix +) + +func init() { + flag.BoolVar(verbose, "dupl.v", false, "alias for -verbose") +} + +func Run(files []string, threshold int) ([]printer.Issue, error) { + fchan := make(chan string, 1024) + go func() { + for _, f := range files { + fchan <- f + } + close(fchan) + }() + schan := job.Parse(fchan) + t, data, done := job.BuildTree(schan) + <-done + + // finish stream + t.Update(&syntax.Node{Type: -1}) + + mchan := t.FindDuplOver(threshold) + duplChan := make(chan syntax.Match) + go func() { + for m := range mchan { + match := syntax.FindSyntaxUnits(*data, m, threshold) + if len(match.Frags) > 0 { + duplChan <- match + } + } + close(duplChan) + }() + + return makeIssues(duplChan) +} + +func makeIssues(duplChan <-chan syntax.Match) ([]printer.Issue, error) { + groups := make(map[string][][]*syntax.Node) + for dupl := range duplChan { + groups[dupl.Hash] = append(groups[dupl.Hash], dupl.Frags...) + } + keys := make([]string, 0, len(groups)) + for k := range groups { + keys = append(keys, k) + } + sort.Strings(keys) + + p := printer.NewPlumbing(ioutil.ReadFile) + + var issues []printer.Issue + for _, k := range keys { + uniq := unique(groups[k]) + if len(uniq) > 1 { + i, err := p.MakeIssues(uniq) + if err != nil { + return nil, err + } + issues = append(issues, i...) + } + } + + return issues, nil +} + +func unique(group [][]*syntax.Node) [][]*syntax.Node { + fileMap := make(map[string]map[int]struct{}) + + var newGroup [][]*syntax.Node + for _, seq := range group { + node := seq[0] + file, ok := fileMap[node.Filename] + if !ok { + file = make(map[int]struct{}) + fileMap[node.Filename] = file + } + if _, ok := file[node.Pos]; !ok { + file[node.Pos] = struct{}{} + newGroup = append(newGroup, seq) + } + } + return newGroup +} + +func usage() { + fmt.Fprintln(os.Stderr, `Usage: dupl [flags] [paths] + +Paths: + If the given path is a file, dupl will use it regardless of + the file extension. If it is a directory, it will recursively + search for *.go files in that directory. + + If no path is given, dupl will recursively search for *.go + files in the current directory. + +Flags: + -files + read file names from stdin one at each line + -html + output the results as HTML, including duplicate code fragments + -plumbing + plumbing (easy-to-parse) output for consumption by scripts or tools + -t, -threshold size + minimum token sequence size as a clone (default 15) + -vendor + check files in vendor directory + -v, -verbose + explain what is being done + +Examples: + dupl -t 100 + Search clones in the current directory of size at least + 100 tokens. + dupl $(find app/ -name '*_test.go') + Search for clones in tests in the app directory. + find app/ -name '*_test.go' |dupl -files + The same as above.`) + os.Exit(2) +} diff --git a/vendor/github.com/mibk/dupl/printer/html.go b/vendor/github.com/mibk/dupl/printer/html.go new file mode 100644 index 00000000..667f0437 --- /dev/null +++ b/vendor/github.com/mibk/dupl/printer/html.go @@ -0,0 +1,120 @@ +package printer + +import ( + "bytes" + "fmt" + "io" + "regexp" + "sort" + + "github.com/mibk/dupl/syntax" +) + +type html struct { + iota int + w io.Writer + ReadFile +} + +func NewHTML(w io.Writer, fread ReadFile) Printer { + return &html{w: w, ReadFile: fread} +} + +func (p *html) PrintHeader() error { + _, err := fmt.Fprint(p.w, ` + +
%s\n", cl.filename, cl.lineStart, cl.fragment) + } + return nil +} + +func (*html) PrintFooter() error { return nil } + +func findLineBeg(file []byte, index int) int { + for i := index; i >= 0; i-- { + if file[i] == '\n' { + return i + 1 + } + } + return 0 +} + +func toWhitespace(str []byte) []byte { + var out []byte + for _, c := range bytes.Runes(str) { + if c == '\t' { + out = append(out, '\t') + } else { + out = append(out, ' ') + } + } + return out +} + +func deindent(block []byte) []byte { + const maxVal = 99 + min := maxVal + re := regexp.MustCompile(`(^|\n)(\t*)\S`) + for _, line := range re.FindAllSubmatch(block, -1) { + indent := line[2] + if len(indent) < min { + min = len(indent) + } + } + if min == 0 || min == maxVal { + return block + } + block = block[min:] +Loop: + for i := 0; i < len(block); i++ { + if block[i] == '\n' && i != len(block)-1 { + for j := 0; j < min; j++ { + if block[i+j+1] != '\t' { + continue Loop + } + } + block = append(block[:i+1], block[i+1+min:]...) + } + } + return block +} diff --git a/vendor/github.com/mibk/dupl/printer/plumbing.go b/vendor/github.com/mibk/dupl/printer/plumbing.go new file mode 100644 index 00000000..284213cd --- /dev/null +++ b/vendor/github.com/mibk/dupl/printer/plumbing.go @@ -0,0 +1,50 @@ +package printer + +import ( + "sort" + + "github.com/mibk/dupl/syntax" +) + +type Clone clone + +func (c Clone) Filename() string { + return c.filename +} + +func (c Clone) LineStart() int { + return c.lineStart +} + +func (c Clone) LineEnd() int { + return c.lineEnd +} + +type Issue struct { + From, To Clone +} + +type Plumbing struct { + ReadFile +} + +func NewPlumbing(fread ReadFile) *Plumbing { + return &Plumbing{fread} +} + +func (p *Plumbing) MakeIssues(dups [][]*syntax.Node) ([]Issue, error) { + clones, err := prepareClonesInfo(p.ReadFile, dups) + if err != nil { + return nil, err + } + sort.Sort(byNameAndLine(clones)) + var issues []Issue + for i, cl := range clones { + nextCl := clones[(i+1)%len(clones)] + issues = append(issues, Issue{ + From: Clone(cl), + To: Clone(nextCl), + }) + } + return issues, nil +} diff --git a/vendor/github.com/mibk/dupl/printer/printer.go b/vendor/github.com/mibk/dupl/printer/printer.go new file mode 100644 index 00000000..855ca099 --- /dev/null +++ b/vendor/github.com/mibk/dupl/printer/printer.go @@ -0,0 +1,11 @@ +package printer + +import "github.com/mibk/dupl/syntax" + +type ReadFile func(filename string) ([]byte, error) + +type Printer interface { + PrintHeader() error + PrintClones(dups [][]*syntax.Node) error + PrintFooter() error +} diff --git a/vendor/github.com/mibk/dupl/printer/text.go b/vendor/github.com/mibk/dupl/printer/text.go new file mode 100644 index 00000000..f8a1071b --- /dev/null +++ b/vendor/github.com/mibk/dupl/printer/text.go @@ -0,0 +1,100 @@ +package printer + +import ( + "fmt" + "io" + "sort" + + "github.com/mibk/dupl/syntax" +) + +type text struct { + cnt int + w io.Writer + ReadFile +} + +func NewText(w io.Writer, fread ReadFile) Printer { + return &text{w: w, ReadFile: fread} +} + +func (p *text) PrintHeader() error { return nil } + +func (p *text) PrintClones(dups [][]*syntax.Node) error { + p.cnt++ + fmt.Fprintf(p.w, "found %d clones:\n", len(dups)) + clones, err := prepareClonesInfo(p.ReadFile, dups) + if err != nil { + return err + } + sort.Sort(byNameAndLine(clones)) + for _, cl := range clones { + fmt.Fprintf(p.w, " %s:%d,%d\n", cl.filename, cl.lineStart, cl.lineEnd) + } + return nil +} + +func (p *text) PrintFooter() error { + _, err := fmt.Fprintf(p.w, "\nFound total %d clone groups.\n", p.cnt) + return err +} + +func prepareClonesInfo(fread ReadFile, dups [][]*syntax.Node) ([]clone, error) { + clones := make([]clone, len(dups)) + for i, dup := range dups { + cnt := len(dup) + if cnt == 0 { + panic("zero length dup") + } + nstart := dup[0] + nend := dup[cnt-1] + + file, err := fread(nstart.Filename) + if err != nil { + return nil, err + } + + cl := clone{filename: nstart.Filename} + cl.lineStart, cl.lineEnd = blockLines(file, nstart.Pos, nend.End) + clones[i] = cl + } + return clones, nil +} + +func blockLines(file []byte, from, to int) (int, int) { + line := 1 + lineStart, lineEnd := 0, 0 + for offset, b := range file { + if b == '\n' { + line++ + } + if offset == from { + lineStart = line + } + if offset == to-1 { + lineEnd = line + break + } + } + return lineStart, lineEnd +} + +type clone struct { + filename string + lineStart int + lineEnd int + fragment []byte +} + +type byNameAndLine []clone + +func (c byNameAndLine) Len() int { return len(c) } + +func (c byNameAndLine) Swap(i, j int) { c[i], c[j] = c[j], c[i] } + +func (c byNameAndLine) Less(i, j int) bool { + if c[i].filename == c[j].filename { + return c[i].lineStart < c[j].lineStart + } + return c[i].filename < c[j].filename +} diff --git a/vendor/github.com/mibk/dupl/suffixtree/dupl.go b/vendor/github.com/mibk/dupl/suffixtree/dupl.go new file mode 100644 index 00000000..ab145b4f --- /dev/null +++ b/vendor/github.com/mibk/dupl/suffixtree/dupl.go @@ -0,0 +1,98 @@ +package suffixtree + +import "sort" + +type Match struct { + Ps []Pos + Len Pos +} + +type posList struct { + positions []Pos +} + +func newPosList() *posList { + return &posList{make([]Pos, 0)} +} + +func (p *posList) append(p2 *posList) { + p.positions = append(p.positions, p2.positions...) +} + +func (p *posList) add(pos Pos) { + p.positions = append(p.positions, pos) +} + +type contextList struct { + lists map[int]*posList +} + +func newContextList() *contextList { + return &contextList{make(map[int]*posList)} +} + +func (c *contextList) getAll() []Pos { + keys := make([]int, 0, len(c.lists)) + for k := range c.lists { + keys = append(keys, k) + } + sort.Ints(keys) + var ps []Pos + for _, k := range keys { + ps = append(ps, c.lists[k].positions...) + } + return ps +} + +func (c *contextList) append(c2 *contextList) { + for lc, pl := range c2.lists { + if _, ok := c.lists[lc]; ok { + c.lists[lc].append(pl) + } else { + c.lists[lc] = pl + } + } +} + +// FindDuplOver find pairs of maximal duplicities over a threshold +// length. +func (t *STree) FindDuplOver(threshold int) <-chan Match { + auxTran := newTran(0, 0, t.root) + ch := make(chan Match) + go func() { + walkTrans(auxTran, 0, threshold, ch) + close(ch) + }() + return ch +} + +func walkTrans(parent *tran, length, threshold int, ch chan<- Match) *contextList { + s := parent.state + + cl := newContextList() + + if len(s.trans) == 0 { + pl := newPosList() + start := parent.end + 1 - Pos(length) + pl.add(start) + ch := 0 + if start > 0 { + ch = s.tree.data[start-1].Val() + } + cl.lists[ch] = pl + return cl + } + + for _, t := range s.trans { + ln := length + t.len() + cl2 := walkTrans(t, ln, threshold, ch) + if ln >= threshold { + cl.append(cl2) + } + } + if length >= threshold && len(cl.lists) > 1 { + m := Match{cl.getAll(), Pos(length)} + ch <- m + } + return cl +} diff --git a/vendor/github.com/mibk/dupl/suffixtree/suffixtree.go b/vendor/github.com/mibk/dupl/suffixtree/suffixtree.go new file mode 100644 index 00000000..73801502 --- /dev/null +++ b/vendor/github.com/mibk/dupl/suffixtree/suffixtree.go @@ -0,0 +1,216 @@ +package suffixtree + +import ( + "bytes" + "fmt" + "math" + "strings" +) + +const infinity = math.MaxInt32 + +// Pos denotes position in data slice. +type Pos int32 + +type Token interface { + Val() int +} + +// STree is a struct representing a suffix tree. +type STree struct { + data []Token + root *state + auxState *state // auxiliary state + + // active point + s *state + start, end Pos +} + +// New creates new suffix tree. +func New() *STree { + t := new(STree) + t.data = make([]Token, 0, 50) + t.root = newState(t) + t.auxState = newState(t) + t.root.linkState = t.auxState + t.s = t.root + return t +} + +// Update refreshes the suffix tree to by new data. +func (t *STree) Update(data ...Token) { + t.data = append(t.data, data...) + for _ = range data { + t.update() + t.s, t.start = t.canonize(t.s, t.start, t.end) + t.end++ + } +} + +// update transforms suffix tree T(n) to T(n+1). +func (t *STree) update() { + oldr := t.root + + // (s, (start, end)) is the canonical reference pair for the active point + s := t.s + start, end := t.start, t.end + var r *state + for { + var endPoint bool + r, endPoint = t.testAndSplit(s, start, end-1) + if endPoint { + break + } + r.fork(end) + if oldr != t.root { + oldr.linkState = r + } + oldr = r + s, start = t.canonize(s.linkState, start, end-1) + } + if oldr != t.root { + oldr.linkState = r + } + + // update active point + t.s = s + t.start = start +} + +// testAndSplit tests whether a state with canonical ref. pair +// (s, (start, end)) is the end point, that is, a state that have +// a c-transition. If not, then state (exs, (start, end)) is made +// explicit (if not already so). +func (t *STree) testAndSplit(s *state, start, end Pos) (exs *state, endPoint bool) { + c := t.data[t.end] + if start <= end { + tr := s.findTran(t.data[start]) + splitPoint := tr.start + end - start + 1 + if t.data[splitPoint].Val() == c.Val() { + return s, true + } + // make the (s, (start, end)) state explicit + newSt := newState(s.tree) + newSt.addTran(splitPoint, tr.end, tr.state) + tr.end = splitPoint - 1 + tr.state = newSt + return newSt, false + } + if s == t.auxState || s.findTran(c) != nil { + return s, true + } + return s, false +} + +// canonize returns updated state and start position for ref. pair +// (s, (start, end)) of state r so the new ref. pair is canonical, +// that is, referenced from the closest explicit ancestor of r. +func (t *STree) canonize(s *state, start, end Pos) (*state, Pos) { + if s == t.auxState { + s, start = t.root, start+1 + } + if start > end { + return s, start + } + + var tr *tran + for { + if start <= end { + tr = s.findTran(t.data[start]) + if tr == nil { + panic(fmt.Sprintf("there should be some transition for '%d' at %d", + t.data[start].Val(), start)) + } + } + if tr.end-tr.start > end-start { + break + } + start += tr.end - tr.start + 1 + s = tr.state + } + if s == nil { + panic("there should always be some suffix link resolution") + } + return s, start +} + +func (t *STree) At(p Pos) Token { + if p < 0 || p >= Pos(len(t.data)) { + panic("position out of bounds") + } + return t.data[p] +} + +func (t *STree) String() string { + buf := new(bytes.Buffer) + printState(buf, t.root, 0) + return buf.String() +} + +func printState(buf *bytes.Buffer, s *state, ident int) { + for _, tr := range s.trans { + fmt.Fprint(buf, strings.Repeat(" ", ident)) + fmt.Fprintf(buf, "* (%d, %d)\n", tr.start, tr.ActEnd()) + printState(buf, tr.state, ident+1) + } +} + +// state is an explicit state of the suffix tree. +type state struct { + tree *STree + trans []*tran + linkState *state +} + +func newState(t *STree) *state { + return &state{ + tree: t, + trans: make([]*tran, 0), + linkState: nil, + } +} + +func (s *state) addTran(start, end Pos, r *state) { + s.trans = append(s.trans, newTran(start, end, r)) +} + +// fork creates a new branch from the state s. +func (s *state) fork(i Pos) *state { + r := newState(s.tree) + s.addTran(i, infinity, r) + return r +} + +// findTran finds c-transition. +func (s *state) findTran(c Token) *tran { + for _, tran := range s.trans { + if s.tree.data[tran.start].Val() == c.Val() { + return tran + } + } + return nil +} + +// tran represents a state's transition. +type tran struct { + start, end Pos + state *state +} + +func newTran(start, end Pos, s *state) *tran { + return &tran{start, end, s} +} + +func (t *tran) len() int { + return int(t.end - t.start + 1) +} + +// ActEnd returns actual end position as consistent with +// the actual length of the data in the STree. +func (t *tran) ActEnd() Pos { + if t.end == infinity { + return Pos(len(t.state.tree.data)) - 1 + } + return t.end +} diff --git a/vendor/github.com/mibk/dupl/syntax/golang/golang.go b/vendor/github.com/mibk/dupl/syntax/golang/golang.go new file mode 100644 index 00000000..8cc304f4 --- /dev/null +++ b/vendor/github.com/mibk/dupl/syntax/golang/golang.go @@ -0,0 +1,392 @@ +package golang + +import ( + "go/ast" + "go/parser" + "go/token" + + "github.com/mibk/dupl/syntax" +) + +const ( + BadNode = iota + File + ArrayType + AssignStmt + BasicLit + BinaryExpr + BlockStmt + BranchStmt + CallExpr + CaseClause + ChanType + CommClause + CompositeLit + DeclStmt + DeferStmt + Ellipsis + EmptyStmt + ExprStmt + Field + FieldList + ForStmt + FuncDecl + FuncLit + FuncType + GenDecl + GoStmt + Ident + IfStmt + IncDecStmt + IndexExpr + InterfaceType + KeyValueExpr + LabeledStmt + MapType + ParenExpr + RangeStmt + ReturnStmt + SelectStmt + SelectorExpr + SendStmt + SliceExpr + StarExpr + StructType + SwitchStmt + TypeAssertExpr + TypeSpec + TypeSwitchStmt + UnaryExpr + ValueSpec +) + +// Parse the given file and return uniform syntax tree. +func Parse(filename string) (*syntax.Node, error) { + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, filename, nil, 0) + if err != nil { + return nil, err + } + t := &transformer{ + fileset: fset, + filename: filename, + } + return t.trans(file), nil +} + +type transformer struct { + fileset *token.FileSet + filename string +} + +// trans transforms given golang AST to uniform tree structure. +func (t *transformer) trans(node ast.Node) (o *syntax.Node) { + o = syntax.NewNode() + o.Filename = t.filename + st, end := node.Pos(), node.End() + o.Pos, o.End = t.fileset.File(st).Offset(st), t.fileset.File(end).Offset(end) + + switch n := node.(type) { + case *ast.ArrayType: + o.Type = ArrayType + if n.Len != nil { + o.AddChildren(t.trans(n.Len)) + } + o.AddChildren(t.trans(n.Elt)) + + case *ast.AssignStmt: + o.Type = AssignStmt + for _, e := range n.Rhs { + o.AddChildren(t.trans(e)) + } + + for _, e := range n.Lhs { + o.AddChildren(t.trans(e)) + } + + case *ast.BasicLit: + o.Type = BasicLit + + case *ast.BinaryExpr: + o.Type = BinaryExpr + o.AddChildren(t.trans(n.X), t.trans(n.Y)) + + case *ast.BlockStmt: + o.Type = BlockStmt + for _, stmt := range n.List { + o.AddChildren(t.trans(stmt)) + } + + case *ast.BranchStmt: + o.Type = BranchStmt + if n.Label != nil { + o.AddChildren(t.trans(n.Label)) + } + + case *ast.CallExpr: + o.Type = CallExpr + o.AddChildren(t.trans(n.Fun)) + for _, arg := range n.Args { + o.AddChildren(t.trans(arg)) + } + + case *ast.CaseClause: + o.Type = CaseClause + for _, e := range n.List { + o.AddChildren(t.trans(e)) + } + for _, stmt := range n.Body { + o.AddChildren(t.trans(stmt)) + } + + case *ast.ChanType: + o.Type = ChanType + o.AddChildren(t.trans(n.Value)) + + case *ast.CommClause: + o.Type = CommClause + if n.Comm != nil { + o.AddChildren(t.trans(n.Comm)) + } + for _, stmt := range n.Body { + o.AddChildren(t.trans(stmt)) + } + + case *ast.CompositeLit: + o.Type = CompositeLit + if n.Type != nil { + o.AddChildren(t.trans(n.Type)) + } + for _, e := range n.Elts { + o.AddChildren(t.trans(e)) + } + + case *ast.DeclStmt: + o.Type = DeclStmt + o.AddChildren(t.trans(n.Decl)) + + case *ast.DeferStmt: + o.Type = DeferStmt + o.AddChildren(t.trans(n.Call)) + + case *ast.Ellipsis: + o.Type = Ellipsis + if n.Elt != nil { + o.AddChildren(t.trans(n.Elt)) + } + + case *ast.EmptyStmt: + o.Type = EmptyStmt + + case *ast.ExprStmt: + o.Type = ExprStmt + o.AddChildren(t.trans(n.X)) + + case *ast.Field: + o.Type = Field + for _, name := range n.Names { + o.AddChildren(t.trans(name)) + } + o.AddChildren(t.trans(n.Type)) + + case *ast.FieldList: + o.Type = FieldList + for _, field := range n.List { + o.AddChildren(t.trans(field)) + } + + case *ast.File: + o.Type = File + for _, decl := range n.Decls { + if genDecl, ok := decl.(*ast.GenDecl); ok && genDecl.Tok == token.IMPORT { + // skip import declarations + continue + } + o.AddChildren(t.trans(decl)) + } + + case *ast.ForStmt: + o.Type = ForStmt + if n.Init != nil { + o.AddChildren(t.trans(n.Init)) + } + if n.Cond != nil { + o.AddChildren(t.trans(n.Cond)) + } + if n.Post != nil { + o.AddChildren(t.trans(n.Post)) + } + o.AddChildren(t.trans(n.Body)) + + case *ast.FuncDecl: + o.Type = FuncDecl + if n.Recv != nil { + o.AddChildren(t.trans(n.Recv)) + } + o.AddChildren(t.trans(n.Name), t.trans(n.Type)) + if n.Body != nil { + o.AddChildren(t.trans(n.Body)) + } + + case *ast.FuncLit: + o.Type = FuncLit + o.AddChildren(t.trans(n.Type), t.trans(n.Body)) + + case *ast.FuncType: + o.Type = FuncType + o.AddChildren(t.trans(n.Params)) + if n.Results != nil { + o.AddChildren(t.trans(n.Results)) + } + + case *ast.GenDecl: + o.Type = GenDecl + for _, spec := range n.Specs { + o.AddChildren(t.trans(spec)) + } + + case *ast.GoStmt: + o.Type = GoStmt + o.AddChildren(t.trans(n.Call)) + + case *ast.Ident: + o.Type = Ident + + case *ast.IfStmt: + o.Type = IfStmt + if n.Init != nil { + o.AddChildren(t.trans(n.Init)) + } + o.AddChildren(t.trans(n.Cond), t.trans(n.Body)) + if n.Else != nil { + o.AddChildren(t.trans(n.Else)) + } + + case *ast.IncDecStmt: + o.Type = IncDecStmt + o.AddChildren(t.trans(n.X)) + + case *ast.IndexExpr: + o.Type = IndexExpr + o.AddChildren(t.trans(n.X), t.trans(n.Index)) + + case *ast.InterfaceType: + o.Type = InterfaceType + o.AddChildren(t.trans(n.Methods)) + + case *ast.KeyValueExpr: + o.Type = KeyValueExpr + o.AddChildren(t.trans(n.Key), t.trans(n.Value)) + + case *ast.LabeledStmt: + o.Type = LabeledStmt + o.AddChildren(t.trans(n.Label), t.trans(n.Stmt)) + + case *ast.MapType: + o.Type = MapType + o.AddChildren(t.trans(n.Key), t.trans(n.Value)) + + case *ast.ParenExpr: + o.Type = ParenExpr + o.AddChildren(t.trans(n.X)) + + case *ast.RangeStmt: + o.Type = RangeStmt + if n.Key != nil { + o.AddChildren(t.trans(n.Key)) + } + if n.Value != nil { + o.AddChildren(t.trans(n.Value)) + } + o.AddChildren(t.trans(n.X), t.trans(n.Body)) + + case *ast.ReturnStmt: + o.Type = ReturnStmt + for _, e := range n.Results { + o.AddChildren(t.trans(e)) + } + + case *ast.SelectStmt: + o.Type = SelectStmt + o.AddChildren(t.trans(n.Body)) + + case *ast.SelectorExpr: + o.Type = SelectorExpr + o.AddChildren(t.trans(n.X), t.trans(n.Sel)) + + case *ast.SendStmt: + o.Type = SendStmt + o.AddChildren(t.trans(n.Chan), t.trans(n.Value)) + + case *ast.SliceExpr: + o.Type = SliceExpr + o.AddChildren(t.trans(n.X)) + if n.Low != nil { + o.AddChildren(t.trans(n.Low)) + } + if n.High != nil { + o.AddChildren(t.trans(n.High)) + } + if n.Max != nil { + o.AddChildren(t.trans(n.Max)) + } + + case *ast.StarExpr: + o.Type = StarExpr + o.AddChildren(t.trans(n.X)) + + case *ast.StructType: + o.Type = StructType + o.AddChildren(t.trans(n.Fields)) + + case *ast.SwitchStmt: + o.Type = SwitchStmt + if n.Init != nil { + o.AddChildren(t.trans(n.Init)) + } + if n.Tag != nil { + o.AddChildren(t.trans(n.Tag)) + } + o.AddChildren(t.trans(n.Body)) + + case *ast.TypeAssertExpr: + o.Type = TypeAssertExpr + o.AddChildren(t.trans(n.X)) + if n.Type != nil { + o.AddChildren(t.trans(n.Type)) + } + + case *ast.TypeSpec: + o.Type = TypeSpec + o.AddChildren(t.trans(n.Name), t.trans(n.Type)) + + case *ast.TypeSwitchStmt: + o.Type = TypeSwitchStmt + if n.Init != nil { + o.AddChildren(t.trans(n.Init)) + } + o.AddChildren(t.trans(n.Assign), t.trans(n.Body)) + + case *ast.UnaryExpr: + o.Type = UnaryExpr + o.AddChildren(t.trans(n.X)) + + case *ast.ValueSpec: + o.Type = ValueSpec + for _, name := range n.Names { + o.AddChildren(t.trans(name)) + } + if n.Type != nil { + o.AddChildren(t.trans(n.Type)) + } + for _, val := range n.Values { + o.AddChildren(t.trans(val)) + } + + default: + o.Type = BadNode + + } + + return o +} diff --git a/vendor/github.com/mibk/dupl/syntax/syntax.go b/vendor/github.com/mibk/dupl/syntax/syntax.go new file mode 100644 index 00000000..9d737b68 --- /dev/null +++ b/vendor/github.com/mibk/dupl/syntax/syntax.go @@ -0,0 +1,175 @@ +package syntax + +import ( + "crypto/sha1" + + "github.com/mibk/dupl/suffixtree" +) + +type Node struct { + Type int + Filename string + Pos, End int + Children []*Node + Owns int +} + +func NewNode() *Node { + return &Node{} +} + +func (n *Node) AddChildren(children ...*Node) { + n.Children = append(n.Children, children...) +} + +func (n *Node) Val() int { + return n.Type +} + +type Match struct { + Hash string + Frags [][]*Node +} + +func Serialize(n *Node) []*Node { + stream := make([]*Node, 0, 10) + serial(n, &stream) + return stream +} + +func serial(n *Node, stream *[]*Node) int { + *stream = append(*stream, n) + var count int + for _, child := range n.Children { + count += serial(child, stream) + } + n.Owns = count + return count + 1 +} + +// FindSyntaxUnits finds all complete syntax units in the match group and returns them +// with the corresponding hash. +func FindSyntaxUnits(data []*Node, m suffixtree.Match, threshold int) Match { + if len(m.Ps) == 0 { + return Match{} + } + firstSeq := data[m.Ps[0] : m.Ps[0]+m.Len] + indexes := getUnitsIndexes(firstSeq, threshold) + + // TODO: is this really working? + indexCnt := len(indexes) + if indexCnt > 0 { + lasti := indexes[indexCnt-1] + firstn := firstSeq[lasti] + for i := 1; i < len(m.Ps); i++ { + n := data[int(m.Ps[i])+lasti] + if firstn.Owns != n.Owns { + indexes = indexes[:indexCnt-1] + break + } + } + } + if len(indexes) == 0 || isCyclic(indexes, firstSeq) || spansMultipleFiles(indexes, firstSeq) { + return Match{} + } + + match := Match{Frags: make([][]*Node, len(m.Ps))} + for i, pos := range m.Ps { + match.Frags[i] = make([]*Node, len(indexes)) + for j, index := range indexes { + match.Frags[i][j] = data[int(pos)+index] + } + } + + lastIndex := indexes[len(indexes)-1] + match.Hash = hashSeq(firstSeq[indexes[0] : lastIndex+firstSeq[lastIndex].Owns]) + return match +} + +func getUnitsIndexes(nodeSeq []*Node, threshold int) []int { + var indexes []int + var split bool + for i := 0; i < len(nodeSeq); { + n := nodeSeq[i] + switch { + case n.Owns >= len(nodeSeq)-i: + // not complete syntax unit + i++ + split = true + continue + case n.Owns+1 < threshold: + split = true + default: + if split { + indexes = indexes[:0] + split = false + } + indexes = append(indexes, i) + } + i += n.Owns + 1 + } + return indexes +} + +// isCyclic finds out whether there is a repetive pattern in the found clone. If positive, +// it return false to point out that the clone would be redundant. +func isCyclic(indexes []int, nodes []*Node) bool { + cnt := len(indexes) + if cnt <= 1 { + return false + } + + alts := make(map[int]bool) + for i := 1; i <= cnt/2; i++ { + if cnt%i == 0 { + alts[i] = true + } + } + + for i := 0; i < indexes[cnt/2]; i++ { + nstart := nodes[i+indexes[0]] + AltLoop: + for alt := range alts { + for j := alt; j < cnt; j += alt { + index := i + indexes[j] + if index < len(nodes) { + nalt := nodes[index] + if nstart.Owns == nalt.Owns && nstart.Type == nalt.Type { + continue + } + } else if i >= indexes[alt] { + return true + } + delete(alts, alt) + continue AltLoop + } + } + if len(alts) == 0 { + return false + } + } + return true +} + +func spansMultipleFiles(indexes []int, nodes []*Node) bool { + if len(indexes) < 2 { + return false + } + f := nodes[indexes[0]].Filename + for i := 1; i < len(indexes); i++ { + if nodes[indexes[i]].Filename != f { + return true + } + } + return false +} + +func hashSeq(nodes []*Node) string { + h := sha1.New() + bytes := make([]byte, len(nodes)) + for i, node := range nodes { + bytes[i] = byte(node.Type) + } + h.Write(bytes) + return string(h.Sum(nil)) +}