support dupl

This commit is contained in:
golangci 2018-05-07 09:09:10 +03:00
parent 51c1751db0
commit 7c4ab92d00
25 changed files with 1568 additions and 16 deletions

18
Gopkg.lock generated
View File

@ -72,7 +72,7 @@
"unused",
"version"
]
revision = "9a1f066598f8e2c1a58fab3249341c8d808b70ad"
revision = "38c5f4a0efc6bd0efc88bdd3890e5e64e698bdc2"
[[projects]]
branch = "master"
@ -142,6 +142,20 @@
revision = "0360b2af4f38e8d38c7fce2a9f4e702702d73a39"
version = "v0.0.3"
[[projects]]
branch = "master"
name = "github.com/mibk/dupl"
packages = [
".",
"job",
"printer",
"suffixtree",
"syntax",
"syntax/golang"
]
revision = "53b9af5a45362a6f2896cfa39cc17d17ba9667ea"
source = "github.com/golangci/dupl"
[[projects]]
name = "github.com/pmezard/go-difflib"
packages = ["difflib"]
@ -238,6 +252,6 @@
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "c267ef4b3c1b5396f8d60efc4d262a5f563d9542ea535ab6f1402141ceb4aff0"
inputs-digest = "6ded01ff91bda8204ee2c22da5f6665af28fc5e9551a8b3782ff65582f08d738"
solver-name = "gps-cdcl"
solver-version = 1

View File

@ -29,10 +29,6 @@
name = "github.com/bradleyfalzon/revgrep"
version = "0.3.0"
[[constraint]]
name = "github.com/golang/mock"
version = "1.1.1"
[[constraint]]
branch = "master"
name = "github.com/golangci/golangci-shared"
@ -54,6 +50,11 @@
name = "github.com/golangci/govet"
branch = "master"
[[constraint]]
name = "github.com/mibk/dupl"
branch = "master"
source = "github.com/golangci/dupl"
[prune]
go-tests = true
unused-packages = true

View File

@ -60,6 +60,8 @@ func (e *Executor) initRun() {
runCmd.Flags().BoolVar(&rc.Megacheck.EnableStaticcheck, "megacheck.staticcheck", true, "Megacheck: run Staticcheck sub-linter: staticcheck is go vet on steroids, applying a ton of static analysis checks")
runCmd.Flags().BoolVar(&rc.Megacheck.EnableGosimple, "megacheck.gosimple", true, "Megacheck: run Gosimple sub-linter: gosimple is a linter for Go source code that specialises on simplifying code")
runCmd.Flags().BoolVar(&rc.Megacheck.EnableUnused, "megacheck.unused", true, "Megacheck: run Unused sub-linter: unused checks Go code for unused constants, variables, functions and types")
runCmd.Flags().IntVar(&rc.Dupl.Threshold, "dupl.threshold",
20, "Minimal threshold to detect copy-paste")
runCmd.Flags().StringSliceVarP(&rc.EnabledLinters, "enable", "E", []string{}, "Enable specific linter")
runCmd.Flags().StringSliceVarP(&rc.DisabledLinters, "disable", "D", []string{}, "Disable specific linter")
@ -95,7 +97,8 @@ func loadWholeAppIfNeeded(ctx context.Context, linters []pkg.Linter, cfg *config
bctx := build.Default
bctx.BuildTags = append(bctx.BuildTags, cfg.BuildTags...)
loadcfg := &loader.Config{
Build: &bctx,
Build: &bctx,
AllowErrors: true, // Try to analyze event partially
}
const needTests = true // TODO: configure and take into account in paths resolver
rest, err := loadcfg.FromArgs(paths.MixedPaths(), needTests)

View File

@ -61,6 +61,9 @@ type Run struct {
EnableUnused bool
EnableGosimple bool
}
Dupl struct {
Threshold int
}
EnabledLinters []string
DisabledLinters []string

View File

@ -64,6 +64,7 @@ func GetAllSupportedLinterConfigs() []LinterConfig {
enabledByDefault(golinters.Structcheck{}, "Finds unused struct fields", true),
enabledByDefault(golinters.Varcheck{}, "Finds unused global variables and constants", true),
enabledByDefault(golinters.Megacheck{}, "Megacheck: 3 sub-linters in one: staticcheck, gosimple and unused", true),
enabledByDefault(golinters.Dupl{}, "Tool for code clone detection", false),
disabledByDefault(golinters.Gofmt{}, "Gofmt checks whether code was gofmt-ed. By default this tool runs with -s option to check for code simplification", false),
disabledByDefault(golinters.Gofmt{UseGoimports: true}, "Goimports does everything that gofmt does. Additionally it checks unused imports", false),

37
pkg/golinters/dupl.go Normal file
View File

@ -0,0 +1,37 @@
package golinters
import (
"context"
"fmt"
"github.com/golangci/golangci-lint/pkg/result"
duplAPI "github.com/mibk/dupl"
)
type Dupl struct{}
func (Dupl) Name() string {
return "dupl"
}
func (d Dupl) Run(ctx context.Context, lintCtx *Context) (*result.Result, error) {
issues, err := duplAPI.Run(lintCtx.Paths.Files, lintCtx.RunCfg().Dupl.Threshold)
if err != nil {
return nil, err
}
res := &result.Result{}
for _, i := range issues {
dupl := fmt.Sprintf("%s:%d-%d", i.To.Filename(), i.To.LineStart(), i.To.LineEnd())
text := fmt.Sprintf("%d-%d lines are duplicate of %s",
i.From.LineStart(), i.From.LineEnd(),
formatCode(dupl, lintCtx.RunCfg()))
res.Issues = append(res.Issues, result.Issue{
File: i.From.Filename(),
LineNumber: i.From.LineStart(),
Text: text,
FromLinter: d.Name(),
})
}
return res, nil
}

32
pkg/golinters/testdata/dupl.go vendored Normal file
View File

@ -0,0 +1,32 @@
package testdata
type DuplLogger struct{}
func (DuplLogger) level() int {
return 1
}
func (DuplLogger) Debug(args ...interface{}) {}
func (DuplLogger) Info(args ...interface{}) {}
func (logger *DuplLogger) First(args ...interface{}) { // ERROR "12-21 lines are duplicate of `testdata/dupl.go:23-32`"
if logger.level() >= 0 {
logger.Debug(args...)
logger.Debug(args...)
logger.Debug(args...)
logger.Debug(args...)
logger.Debug(args...)
logger.Debug(args...)
}
}
func (logger *DuplLogger) Second(args ...interface{}) { // ERROR "23-32 lines are duplicate of `testdata/dupl.go:12-21`"
if logger.level() >= 1 {
logger.Info(args...)
logger.Info(args...)
logger.Info(args...)
logger.Info(args...)
logger.Info(args...)
logger.Info(args...)
}
}

View File

@ -1,15 +1,15 @@
package testdata
func GocycloBigComplexity(s string) { // ERROR "cyclomatic complexity .* of func .* is high .*"
if s == "1" || s == "2" || s == "3" || s == "4" || s == "5" || s == "6" || s == "7" {
if s == "1" || s == "2" || s == "3" || s == "4" || s == "5" || s == "6" || s == "7" { // nolint:dupl
return
}
if s == "1" || s == "2" || s == "3" || s == "4" || s == "5" || s == "6" || s == "7" {
if s == "1" || s == "2" || s == "3" || s == "4" || s == "5" || s == "6" || s == "7" { // nolint:dupl
return
}
if s == "1" || s == "2" || s == "3" || s == "4" || s == "5" || s == "6" || s == "7" {
if s == "1" || s == "2" || s == "3" || s == "4" || s == "5" || s == "6" || s == "7" { // nolint:dupl
return
}
}

View File

@ -4,5 +4,5 @@ import "fmt"
func GofmtNotSimplified() {
var x []string
fmt.Print(x[1:len(x)]) // nolint:megacheck // ERROR "File is not gofmt-ed with -s"
fmt.Print(x[1:len(x)]) // nolint:megacheck // ERROR "File is not gofmt-ed with -s"
}

View File

@ -126,15 +126,16 @@ func (r SimpleRunner) processResults(ctx context.Context, results []result.Resul
}
for _, p := range r.Processors {
var err error
startedAt := time.Now()
results, err = p.Process(results)
newResults, err := p.Process(results)
elapsed := time.Since(startedAt)
if elapsed > 50*time.Millisecond {
analytics.Log(ctx).Infof("Result processor %s took %s", p.Name(), elapsed)
}
if err != nil {
return nil, err
analytics.Log(ctx).Warnf("Can't process result by %s processor: %s", p.Name(), err)
} else {
results = newResults
}
}

View File

@ -23,10 +23,10 @@ import (
"sync"
"unicode"
"golang.org/x/tools/go/ast/astutil"
"golang.org/x/tools/go/loader"
"github.com/golangci/go-tools/ssa"
"github.com/golangci/go-tools/ssa/ssautil"
"golang.org/x/tools/go/ast/astutil"
"golang.org/x/tools/go/loader"
)
type Job struct {
@ -836,6 +836,9 @@ func (v *fnVisitor) Visit(node ast.Node) ast.Visitor {
switch node := node.(type) {
case *ast.FuncDecl:
var ssafn *ssa.Function
if v.pkg == nil || v.pkg.Prog == nil {
return nil // partially loaded
}
ssafn = v.pkg.Prog.FuncValue(v.pkg.Info.ObjectOf(node.Name).(*types.Func))
v.m[node] = ssafn
if ssafn == nil {

5
vendor/github.com/mibk/dupl/.travis.yml generated vendored Normal file
View File

@ -0,0 +1,5 @@
language: go
go:
- 1.3
- 1.8
- 1.9

21
vendor/github.com/mibk/dupl/LICENSE generated vendored Normal file
View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2015 Michal Bohuslávek
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

63
vendor/github.com/mibk/dupl/README.md generated vendored Normal file
View File

@ -0,0 +1,63 @@
# dupl [![Build Status](https://travis-ci.org/mibk/dupl.png)](https://travis-ci.org/mibk/dupl)
**dupl** is a tool written in Go for finding code clones. So far it can find clones only
in the Go source files. The method uses suffix tree for serialized ASTs. It ignores values
of AST nodes. It just operates with their types (e.g. `if a == 13 {}` and `if x == 100 {}` are
considered the same provided it exceeds the minimal token sequence size).
Due to the used method dupl can report so called "false positives" on the output. These are
the ones we do not consider clones (whether they are too small, or the values of the matched
tokens are completely different).
## Installation
```bash
go get -u github.com/mibk/dupl
```
## Usage
```
Usage of dupl:
dupl [flags] [paths]
Paths:
If the given path is a file, dupl will use it regardless of
the file extension. If it is a directory it will recursively
search for *.go files in that directory.
If no path is given dupl will recursively search for *.go
files in the current directory.
Flags:
-files
read file names from stdin one at each line
-html
output the results as HTML, including duplicate code fragments
-plumbing
plumbing (easy-to-parse) output for consumption by scripts or tools
-t, -threshold size
minimum token sequence size as a clone (default 15)
-vendor
check files in vendor directory
-v, -verbose
explain what is being done
Examples:
dupl -t 100
Search clones in the current directory of size at least
100 tokens.
dupl $(find app/ -name '*_test.go')
Search for clones in tests in the app directory.
find app/ -name '*_test.go' |dupl -files
The same as above.
```
## Example
The reduced output of this command with the following parameters for the [Docker](https://www.docker.com) source code
looks like [this](http://htmlpreview.github.io/?https://github.com/mibk/dupl/blob/master/_output_example/docker.html).
```bash
$ dupl -t 200 -html >docker.html
```

22
vendor/github.com/mibk/dupl/job/buildtree.go generated vendored Normal file
View File

@ -0,0 +1,22 @@
package job
import (
"github.com/mibk/dupl/suffixtree"
"github.com/mibk/dupl/syntax"
)
func BuildTree(schan chan []*syntax.Node) (t *suffixtree.STree, d *[]*syntax.Node, done chan bool) {
t = suffixtree.New()
data := make([]*syntax.Node, 0, 100)
done = make(chan bool)
go func() {
for seq := range schan {
data = append(data, seq...)
for _, node := range seq {
t.Update(node)
}
}
done <- true
}()
return t, &data, done
}

36
vendor/github.com/mibk/dupl/job/parse.go generated vendored Normal file
View File

@ -0,0 +1,36 @@
package job
import (
"log"
"github.com/mibk/dupl/syntax"
"github.com/mibk/dupl/syntax/golang"
)
func Parse(fchan chan string) chan []*syntax.Node {
// parse AST
achan := make(chan *syntax.Node)
go func() {
for file := range fchan {
ast, err := golang.Parse(file)
if err != nil {
log.Println(err)
continue
}
achan <- ast
}
close(achan)
}()
// serialize
schan := make(chan []*syntax.Node)
go func() {
for ast := range achan {
seq := syntax.Serialize(ast)
schan <- seq
}
close(schan)
}()
return schan
}

148
vendor/github.com/mibk/dupl/main.go generated vendored Normal file
View File

@ -0,0 +1,148 @@
package dupl
import (
"flag"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"sort"
"github.com/mibk/dupl/job"
"github.com/mibk/dupl/printer"
"github.com/mibk/dupl/syntax"
)
const defaultThreshold = 15
var (
paths = []string{"."}
vendor = flag.Bool("dupl.vendor", false, "")
verbose = flag.Bool("dupl.verbose", false, "")
files = flag.Bool("dupl.files", false, "")
html = flag.Bool("dupl.html", false, "")
plumbing = flag.Bool("dupl.plumbing", false, "")
)
const (
vendorDirPrefix = "vendor" + string(filepath.Separator)
vendorDirInPath = string(filepath.Separator) + vendorDirPrefix
)
func init() {
flag.BoolVar(verbose, "dupl.v", false, "alias for -verbose")
}
func Run(files []string, threshold int) ([]printer.Issue, error) {
fchan := make(chan string, 1024)
go func() {
for _, f := range files {
fchan <- f
}
close(fchan)
}()
schan := job.Parse(fchan)
t, data, done := job.BuildTree(schan)
<-done
// finish stream
t.Update(&syntax.Node{Type: -1})
mchan := t.FindDuplOver(threshold)
duplChan := make(chan syntax.Match)
go func() {
for m := range mchan {
match := syntax.FindSyntaxUnits(*data, m, threshold)
if len(match.Frags) > 0 {
duplChan <- match
}
}
close(duplChan)
}()
return makeIssues(duplChan)
}
func makeIssues(duplChan <-chan syntax.Match) ([]printer.Issue, error) {
groups := make(map[string][][]*syntax.Node)
for dupl := range duplChan {
groups[dupl.Hash] = append(groups[dupl.Hash], dupl.Frags...)
}
keys := make([]string, 0, len(groups))
for k := range groups {
keys = append(keys, k)
}
sort.Strings(keys)
p := printer.NewPlumbing(ioutil.ReadFile)
var issues []printer.Issue
for _, k := range keys {
uniq := unique(groups[k])
if len(uniq) > 1 {
i, err := p.MakeIssues(uniq)
if err != nil {
return nil, err
}
issues = append(issues, i...)
}
}
return issues, nil
}
func unique(group [][]*syntax.Node) [][]*syntax.Node {
fileMap := make(map[string]map[int]struct{})
var newGroup [][]*syntax.Node
for _, seq := range group {
node := seq[0]
file, ok := fileMap[node.Filename]
if !ok {
file = make(map[int]struct{})
fileMap[node.Filename] = file
}
if _, ok := file[node.Pos]; !ok {
file[node.Pos] = struct{}{}
newGroup = append(newGroup, seq)
}
}
return newGroup
}
func usage() {
fmt.Fprintln(os.Stderr, `Usage: dupl [flags] [paths]
Paths:
If the given path is a file, dupl will use it regardless of
the file extension. If it is a directory, it will recursively
search for *.go files in that directory.
If no path is given, dupl will recursively search for *.go
files in the current directory.
Flags:
-files
read file names from stdin one at each line
-html
output the results as HTML, including duplicate code fragments
-plumbing
plumbing (easy-to-parse) output for consumption by scripts or tools
-t, -threshold size
minimum token sequence size as a clone (default 15)
-vendor
check files in vendor directory
-v, -verbose
explain what is being done
Examples:
dupl -t 100
Search clones in the current directory of size at least
100 tokens.
dupl $(find app/ -name '*_test.go')
Search for clones in tests in the app directory.
find app/ -name '*_test.go' |dupl -files
The same as above.`)
os.Exit(2)
}

120
vendor/github.com/mibk/dupl/printer/html.go generated vendored Normal file
View File

@ -0,0 +1,120 @@
package printer
import (
"bytes"
"fmt"
"io"
"regexp"
"sort"
"github.com/mibk/dupl/syntax"
)
type html struct {
iota int
w io.Writer
ReadFile
}
func NewHTML(w io.Writer, fread ReadFile) Printer {
return &html{w: w, ReadFile: fread}
}
func (p *html) PrintHeader() error {
_, err := fmt.Fprint(p.w, `<!DOCTYPE html>
<meta charset="utf-8"/>
<title>Duplicates</title>
<style>
pre {
background-color: #FFD;
border: 1px solid #E2E2E2;
padding: 1ex;
}
</style>
`)
return err
}
func (p *html) PrintClones(dups [][]*syntax.Node) error {
p.iota++
fmt.Fprintf(p.w, "<h1>#%d found %d clones</h1>\n", p.iota, len(dups))
clones := make([]clone, len(dups))
for i, dup := range dups {
cnt := len(dup)
if cnt == 0 {
panic("zero length dup")
}
nstart := dup[0]
nend := dup[cnt-1]
file, err := p.ReadFile(nstart.Filename)
if err != nil {
return err
}
lineStart, _ := blockLines(file, nstart.Pos, nend.End)
cl := clone{filename: nstart.Filename, lineStart: lineStart}
start := findLineBeg(file, nstart.Pos)
content := append(toWhitespace(file[start:nstart.Pos]), file[nstart.Pos:nend.End]...)
cl.fragment = deindent(content)
clones[i] = cl
}
sort.Sort(byNameAndLine(clones))
for _, cl := range clones {
fmt.Fprintf(p.w, "<h2>%s:%d</h2>\n<pre>%s</pre>\n", cl.filename, cl.lineStart, cl.fragment)
}
return nil
}
func (*html) PrintFooter() error { return nil }
func findLineBeg(file []byte, index int) int {
for i := index; i >= 0; i-- {
if file[i] == '\n' {
return i + 1
}
}
return 0
}
func toWhitespace(str []byte) []byte {
var out []byte
for _, c := range bytes.Runes(str) {
if c == '\t' {
out = append(out, '\t')
} else {
out = append(out, ' ')
}
}
return out
}
func deindent(block []byte) []byte {
const maxVal = 99
min := maxVal
re := regexp.MustCompile(`(^|\n)(\t*)\S`)
for _, line := range re.FindAllSubmatch(block, -1) {
indent := line[2]
if len(indent) < min {
min = len(indent)
}
}
if min == 0 || min == maxVal {
return block
}
block = block[min:]
Loop:
for i := 0; i < len(block); i++ {
if block[i] == '\n' && i != len(block)-1 {
for j := 0; j < min; j++ {
if block[i+j+1] != '\t' {
continue Loop
}
}
block = append(block[:i+1], block[i+1+min:]...)
}
}
return block
}

50
vendor/github.com/mibk/dupl/printer/plumbing.go generated vendored Normal file
View File

@ -0,0 +1,50 @@
package printer
import (
"sort"
"github.com/mibk/dupl/syntax"
)
type Clone clone
func (c Clone) Filename() string {
return c.filename
}
func (c Clone) LineStart() int {
return c.lineStart
}
func (c Clone) LineEnd() int {
return c.lineEnd
}
type Issue struct {
From, To Clone
}
type Plumbing struct {
ReadFile
}
func NewPlumbing(fread ReadFile) *Plumbing {
return &Plumbing{fread}
}
func (p *Plumbing) MakeIssues(dups [][]*syntax.Node) ([]Issue, error) {
clones, err := prepareClonesInfo(p.ReadFile, dups)
if err != nil {
return nil, err
}
sort.Sort(byNameAndLine(clones))
var issues []Issue
for i, cl := range clones {
nextCl := clones[(i+1)%len(clones)]
issues = append(issues, Issue{
From: Clone(cl),
To: Clone(nextCl),
})
}
return issues, nil
}

11
vendor/github.com/mibk/dupl/printer/printer.go generated vendored Normal file
View File

@ -0,0 +1,11 @@
package printer
import "github.com/mibk/dupl/syntax"
type ReadFile func(filename string) ([]byte, error)
type Printer interface {
PrintHeader() error
PrintClones(dups [][]*syntax.Node) error
PrintFooter() error
}

100
vendor/github.com/mibk/dupl/printer/text.go generated vendored Normal file
View File

@ -0,0 +1,100 @@
package printer
import (
"fmt"
"io"
"sort"
"github.com/mibk/dupl/syntax"
)
type text struct {
cnt int
w io.Writer
ReadFile
}
func NewText(w io.Writer, fread ReadFile) Printer {
return &text{w: w, ReadFile: fread}
}
func (p *text) PrintHeader() error { return nil }
func (p *text) PrintClones(dups [][]*syntax.Node) error {
p.cnt++
fmt.Fprintf(p.w, "found %d clones:\n", len(dups))
clones, err := prepareClonesInfo(p.ReadFile, dups)
if err != nil {
return err
}
sort.Sort(byNameAndLine(clones))
for _, cl := range clones {
fmt.Fprintf(p.w, " %s:%d,%d\n", cl.filename, cl.lineStart, cl.lineEnd)
}
return nil
}
func (p *text) PrintFooter() error {
_, err := fmt.Fprintf(p.w, "\nFound total %d clone groups.\n", p.cnt)
return err
}
func prepareClonesInfo(fread ReadFile, dups [][]*syntax.Node) ([]clone, error) {
clones := make([]clone, len(dups))
for i, dup := range dups {
cnt := len(dup)
if cnt == 0 {
panic("zero length dup")
}
nstart := dup[0]
nend := dup[cnt-1]
file, err := fread(nstart.Filename)
if err != nil {
return nil, err
}
cl := clone{filename: nstart.Filename}
cl.lineStart, cl.lineEnd = blockLines(file, nstart.Pos, nend.End)
clones[i] = cl
}
return clones, nil
}
func blockLines(file []byte, from, to int) (int, int) {
line := 1
lineStart, lineEnd := 0, 0
for offset, b := range file {
if b == '\n' {
line++
}
if offset == from {
lineStart = line
}
if offset == to-1 {
lineEnd = line
break
}
}
return lineStart, lineEnd
}
type clone struct {
filename string
lineStart int
lineEnd int
fragment []byte
}
type byNameAndLine []clone
func (c byNameAndLine) Len() int { return len(c) }
func (c byNameAndLine) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
func (c byNameAndLine) Less(i, j int) bool {
if c[i].filename == c[j].filename {
return c[i].lineStart < c[j].lineStart
}
return c[i].filename < c[j].filename
}

98
vendor/github.com/mibk/dupl/suffixtree/dupl.go generated vendored Normal file
View File

@ -0,0 +1,98 @@
package suffixtree
import "sort"
type Match struct {
Ps []Pos
Len Pos
}
type posList struct {
positions []Pos
}
func newPosList() *posList {
return &posList{make([]Pos, 0)}
}
func (p *posList) append(p2 *posList) {
p.positions = append(p.positions, p2.positions...)
}
func (p *posList) add(pos Pos) {
p.positions = append(p.positions, pos)
}
type contextList struct {
lists map[int]*posList
}
func newContextList() *contextList {
return &contextList{make(map[int]*posList)}
}
func (c *contextList) getAll() []Pos {
keys := make([]int, 0, len(c.lists))
for k := range c.lists {
keys = append(keys, k)
}
sort.Ints(keys)
var ps []Pos
for _, k := range keys {
ps = append(ps, c.lists[k].positions...)
}
return ps
}
func (c *contextList) append(c2 *contextList) {
for lc, pl := range c2.lists {
if _, ok := c.lists[lc]; ok {
c.lists[lc].append(pl)
} else {
c.lists[lc] = pl
}
}
}
// FindDuplOver find pairs of maximal duplicities over a threshold
// length.
func (t *STree) FindDuplOver(threshold int) <-chan Match {
auxTran := newTran(0, 0, t.root)
ch := make(chan Match)
go func() {
walkTrans(auxTran, 0, threshold, ch)
close(ch)
}()
return ch
}
func walkTrans(parent *tran, length, threshold int, ch chan<- Match) *contextList {
s := parent.state
cl := newContextList()
if len(s.trans) == 0 {
pl := newPosList()
start := parent.end + 1 - Pos(length)
pl.add(start)
ch := 0
if start > 0 {
ch = s.tree.data[start-1].Val()
}
cl.lists[ch] = pl
return cl
}
for _, t := range s.trans {
ln := length + t.len()
cl2 := walkTrans(t, ln, threshold, ch)
if ln >= threshold {
cl.append(cl2)
}
}
if length >= threshold && len(cl.lists) > 1 {
m := Match{cl.getAll(), Pos(length)}
ch <- m
}
return cl
}

216
vendor/github.com/mibk/dupl/suffixtree/suffixtree.go generated vendored Normal file
View File

@ -0,0 +1,216 @@
package suffixtree
import (
"bytes"
"fmt"
"math"
"strings"
)
const infinity = math.MaxInt32
// Pos denotes position in data slice.
type Pos int32
type Token interface {
Val() int
}
// STree is a struct representing a suffix tree.
type STree struct {
data []Token
root *state
auxState *state // auxiliary state
// active point
s *state
start, end Pos
}
// New creates new suffix tree.
func New() *STree {
t := new(STree)
t.data = make([]Token, 0, 50)
t.root = newState(t)
t.auxState = newState(t)
t.root.linkState = t.auxState
t.s = t.root
return t
}
// Update refreshes the suffix tree to by new data.
func (t *STree) Update(data ...Token) {
t.data = append(t.data, data...)
for _ = range data {
t.update()
t.s, t.start = t.canonize(t.s, t.start, t.end)
t.end++
}
}
// update transforms suffix tree T(n) to T(n+1).
func (t *STree) update() {
oldr := t.root
// (s, (start, end)) is the canonical reference pair for the active point
s := t.s
start, end := t.start, t.end
var r *state
for {
var endPoint bool
r, endPoint = t.testAndSplit(s, start, end-1)
if endPoint {
break
}
r.fork(end)
if oldr != t.root {
oldr.linkState = r
}
oldr = r
s, start = t.canonize(s.linkState, start, end-1)
}
if oldr != t.root {
oldr.linkState = r
}
// update active point
t.s = s
t.start = start
}
// testAndSplit tests whether a state with canonical ref. pair
// (s, (start, end)) is the end point, that is, a state that have
// a c-transition. If not, then state (exs, (start, end)) is made
// explicit (if not already so).
func (t *STree) testAndSplit(s *state, start, end Pos) (exs *state, endPoint bool) {
c := t.data[t.end]
if start <= end {
tr := s.findTran(t.data[start])
splitPoint := tr.start + end - start + 1
if t.data[splitPoint].Val() == c.Val() {
return s, true
}
// make the (s, (start, end)) state explicit
newSt := newState(s.tree)
newSt.addTran(splitPoint, tr.end, tr.state)
tr.end = splitPoint - 1
tr.state = newSt
return newSt, false
}
if s == t.auxState || s.findTran(c) != nil {
return s, true
}
return s, false
}
// canonize returns updated state and start position for ref. pair
// (s, (start, end)) of state r so the new ref. pair is canonical,
// that is, referenced from the closest explicit ancestor of r.
func (t *STree) canonize(s *state, start, end Pos) (*state, Pos) {
if s == t.auxState {
s, start = t.root, start+1
}
if start > end {
return s, start
}
var tr *tran
for {
if start <= end {
tr = s.findTran(t.data[start])
if tr == nil {
panic(fmt.Sprintf("there should be some transition for '%d' at %d",
t.data[start].Val(), start))
}
}
if tr.end-tr.start > end-start {
break
}
start += tr.end - tr.start + 1
s = tr.state
}
if s == nil {
panic("there should always be some suffix link resolution")
}
return s, start
}
func (t *STree) At(p Pos) Token {
if p < 0 || p >= Pos(len(t.data)) {
panic("position out of bounds")
}
return t.data[p]
}
func (t *STree) String() string {
buf := new(bytes.Buffer)
printState(buf, t.root, 0)
return buf.String()
}
func printState(buf *bytes.Buffer, s *state, ident int) {
for _, tr := range s.trans {
fmt.Fprint(buf, strings.Repeat(" ", ident))
fmt.Fprintf(buf, "* (%d, %d)\n", tr.start, tr.ActEnd())
printState(buf, tr.state, ident+1)
}
}
// state is an explicit state of the suffix tree.
type state struct {
tree *STree
trans []*tran
linkState *state
}
func newState(t *STree) *state {
return &state{
tree: t,
trans: make([]*tran, 0),
linkState: nil,
}
}
func (s *state) addTran(start, end Pos, r *state) {
s.trans = append(s.trans, newTran(start, end, r))
}
// fork creates a new branch from the state s.
func (s *state) fork(i Pos) *state {
r := newState(s.tree)
s.addTran(i, infinity, r)
return r
}
// findTran finds c-transition.
func (s *state) findTran(c Token) *tran {
for _, tran := range s.trans {
if s.tree.data[tran.start].Val() == c.Val() {
return tran
}
}
return nil
}
// tran represents a state's transition.
type tran struct {
start, end Pos
state *state
}
func newTran(start, end Pos, s *state) *tran {
return &tran{start, end, s}
}
func (t *tran) len() int {
return int(t.end - t.start + 1)
}
// ActEnd returns actual end position as consistent with
// the actual length of the data in the STree.
func (t *tran) ActEnd() Pos {
if t.end == infinity {
return Pos(len(t.state.tree.data)) - 1
}
return t.end
}

392
vendor/github.com/mibk/dupl/syntax/golang/golang.go generated vendored Normal file
View File

@ -0,0 +1,392 @@
package golang
import (
"go/ast"
"go/parser"
"go/token"
"github.com/mibk/dupl/syntax"
)
const (
BadNode = iota
File
ArrayType
AssignStmt
BasicLit
BinaryExpr
BlockStmt
BranchStmt
CallExpr
CaseClause
ChanType
CommClause
CompositeLit
DeclStmt
DeferStmt
Ellipsis
EmptyStmt
ExprStmt
Field
FieldList
ForStmt
FuncDecl
FuncLit
FuncType
GenDecl
GoStmt
Ident
IfStmt
IncDecStmt
IndexExpr
InterfaceType
KeyValueExpr
LabeledStmt
MapType
ParenExpr
RangeStmt
ReturnStmt
SelectStmt
SelectorExpr
SendStmt
SliceExpr
StarExpr
StructType
SwitchStmt
TypeAssertExpr
TypeSpec
TypeSwitchStmt
UnaryExpr
ValueSpec
)
// Parse the given file and return uniform syntax tree.
func Parse(filename string) (*syntax.Node, error) {
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, filename, nil, 0)
if err != nil {
return nil, err
}
t := &transformer{
fileset: fset,
filename: filename,
}
return t.trans(file), nil
}
type transformer struct {
fileset *token.FileSet
filename string
}
// trans transforms given golang AST to uniform tree structure.
func (t *transformer) trans(node ast.Node) (o *syntax.Node) {
o = syntax.NewNode()
o.Filename = t.filename
st, end := node.Pos(), node.End()
o.Pos, o.End = t.fileset.File(st).Offset(st), t.fileset.File(end).Offset(end)
switch n := node.(type) {
case *ast.ArrayType:
o.Type = ArrayType
if n.Len != nil {
o.AddChildren(t.trans(n.Len))
}
o.AddChildren(t.trans(n.Elt))
case *ast.AssignStmt:
o.Type = AssignStmt
for _, e := range n.Rhs {
o.AddChildren(t.trans(e))
}
for _, e := range n.Lhs {
o.AddChildren(t.trans(e))
}
case *ast.BasicLit:
o.Type = BasicLit
case *ast.BinaryExpr:
o.Type = BinaryExpr
o.AddChildren(t.trans(n.X), t.trans(n.Y))
case *ast.BlockStmt:
o.Type = BlockStmt
for _, stmt := range n.List {
o.AddChildren(t.trans(stmt))
}
case *ast.BranchStmt:
o.Type = BranchStmt
if n.Label != nil {
o.AddChildren(t.trans(n.Label))
}
case *ast.CallExpr:
o.Type = CallExpr
o.AddChildren(t.trans(n.Fun))
for _, arg := range n.Args {
o.AddChildren(t.trans(arg))
}
case *ast.CaseClause:
o.Type = CaseClause
for _, e := range n.List {
o.AddChildren(t.trans(e))
}
for _, stmt := range n.Body {
o.AddChildren(t.trans(stmt))
}
case *ast.ChanType:
o.Type = ChanType
o.AddChildren(t.trans(n.Value))
case *ast.CommClause:
o.Type = CommClause
if n.Comm != nil {
o.AddChildren(t.trans(n.Comm))
}
for _, stmt := range n.Body {
o.AddChildren(t.trans(stmt))
}
case *ast.CompositeLit:
o.Type = CompositeLit
if n.Type != nil {
o.AddChildren(t.trans(n.Type))
}
for _, e := range n.Elts {
o.AddChildren(t.trans(e))
}
case *ast.DeclStmt:
o.Type = DeclStmt
o.AddChildren(t.trans(n.Decl))
case *ast.DeferStmt:
o.Type = DeferStmt
o.AddChildren(t.trans(n.Call))
case *ast.Ellipsis:
o.Type = Ellipsis
if n.Elt != nil {
o.AddChildren(t.trans(n.Elt))
}
case *ast.EmptyStmt:
o.Type = EmptyStmt
case *ast.ExprStmt:
o.Type = ExprStmt
o.AddChildren(t.trans(n.X))
case *ast.Field:
o.Type = Field
for _, name := range n.Names {
o.AddChildren(t.trans(name))
}
o.AddChildren(t.trans(n.Type))
case *ast.FieldList:
o.Type = FieldList
for _, field := range n.List {
o.AddChildren(t.trans(field))
}
case *ast.File:
o.Type = File
for _, decl := range n.Decls {
if genDecl, ok := decl.(*ast.GenDecl); ok && genDecl.Tok == token.IMPORT {
// skip import declarations
continue
}
o.AddChildren(t.trans(decl))
}
case *ast.ForStmt:
o.Type = ForStmt
if n.Init != nil {
o.AddChildren(t.trans(n.Init))
}
if n.Cond != nil {
o.AddChildren(t.trans(n.Cond))
}
if n.Post != nil {
o.AddChildren(t.trans(n.Post))
}
o.AddChildren(t.trans(n.Body))
case *ast.FuncDecl:
o.Type = FuncDecl
if n.Recv != nil {
o.AddChildren(t.trans(n.Recv))
}
o.AddChildren(t.trans(n.Name), t.trans(n.Type))
if n.Body != nil {
o.AddChildren(t.trans(n.Body))
}
case *ast.FuncLit:
o.Type = FuncLit
o.AddChildren(t.trans(n.Type), t.trans(n.Body))
case *ast.FuncType:
o.Type = FuncType
o.AddChildren(t.trans(n.Params))
if n.Results != nil {
o.AddChildren(t.trans(n.Results))
}
case *ast.GenDecl:
o.Type = GenDecl
for _, spec := range n.Specs {
o.AddChildren(t.trans(spec))
}
case *ast.GoStmt:
o.Type = GoStmt
o.AddChildren(t.trans(n.Call))
case *ast.Ident:
o.Type = Ident
case *ast.IfStmt:
o.Type = IfStmt
if n.Init != nil {
o.AddChildren(t.trans(n.Init))
}
o.AddChildren(t.trans(n.Cond), t.trans(n.Body))
if n.Else != nil {
o.AddChildren(t.trans(n.Else))
}
case *ast.IncDecStmt:
o.Type = IncDecStmt
o.AddChildren(t.trans(n.X))
case *ast.IndexExpr:
o.Type = IndexExpr
o.AddChildren(t.trans(n.X), t.trans(n.Index))
case *ast.InterfaceType:
o.Type = InterfaceType
o.AddChildren(t.trans(n.Methods))
case *ast.KeyValueExpr:
o.Type = KeyValueExpr
o.AddChildren(t.trans(n.Key), t.trans(n.Value))
case *ast.LabeledStmt:
o.Type = LabeledStmt
o.AddChildren(t.trans(n.Label), t.trans(n.Stmt))
case *ast.MapType:
o.Type = MapType
o.AddChildren(t.trans(n.Key), t.trans(n.Value))
case *ast.ParenExpr:
o.Type = ParenExpr
o.AddChildren(t.trans(n.X))
case *ast.RangeStmt:
o.Type = RangeStmt
if n.Key != nil {
o.AddChildren(t.trans(n.Key))
}
if n.Value != nil {
o.AddChildren(t.trans(n.Value))
}
o.AddChildren(t.trans(n.X), t.trans(n.Body))
case *ast.ReturnStmt:
o.Type = ReturnStmt
for _, e := range n.Results {
o.AddChildren(t.trans(e))
}
case *ast.SelectStmt:
o.Type = SelectStmt
o.AddChildren(t.trans(n.Body))
case *ast.SelectorExpr:
o.Type = SelectorExpr
o.AddChildren(t.trans(n.X), t.trans(n.Sel))
case *ast.SendStmt:
o.Type = SendStmt
o.AddChildren(t.trans(n.Chan), t.trans(n.Value))
case *ast.SliceExpr:
o.Type = SliceExpr
o.AddChildren(t.trans(n.X))
if n.Low != nil {
o.AddChildren(t.trans(n.Low))
}
if n.High != nil {
o.AddChildren(t.trans(n.High))
}
if n.Max != nil {
o.AddChildren(t.trans(n.Max))
}
case *ast.StarExpr:
o.Type = StarExpr
o.AddChildren(t.trans(n.X))
case *ast.StructType:
o.Type = StructType
o.AddChildren(t.trans(n.Fields))
case *ast.SwitchStmt:
o.Type = SwitchStmt
if n.Init != nil {
o.AddChildren(t.trans(n.Init))
}
if n.Tag != nil {
o.AddChildren(t.trans(n.Tag))
}
o.AddChildren(t.trans(n.Body))
case *ast.TypeAssertExpr:
o.Type = TypeAssertExpr
o.AddChildren(t.trans(n.X))
if n.Type != nil {
o.AddChildren(t.trans(n.Type))
}
case *ast.TypeSpec:
o.Type = TypeSpec
o.AddChildren(t.trans(n.Name), t.trans(n.Type))
case *ast.TypeSwitchStmt:
o.Type = TypeSwitchStmt
if n.Init != nil {
o.AddChildren(t.trans(n.Init))
}
o.AddChildren(t.trans(n.Assign), t.trans(n.Body))
case *ast.UnaryExpr:
o.Type = UnaryExpr
o.AddChildren(t.trans(n.X))
case *ast.ValueSpec:
o.Type = ValueSpec
for _, name := range n.Names {
o.AddChildren(t.trans(name))
}
if n.Type != nil {
o.AddChildren(t.trans(n.Type))
}
for _, val := range n.Values {
o.AddChildren(t.trans(val))
}
default:
o.Type = BadNode
}
return o
}

175
vendor/github.com/mibk/dupl/syntax/syntax.go generated vendored Normal file
View File

@ -0,0 +1,175 @@
package syntax
import (
"crypto/sha1"
"github.com/mibk/dupl/suffixtree"
)
type Node struct {
Type int
Filename string
Pos, End int
Children []*Node
Owns int
}
func NewNode() *Node {
return &Node{}
}
func (n *Node) AddChildren(children ...*Node) {
n.Children = append(n.Children, children...)
}
func (n *Node) Val() int {
return n.Type
}
type Match struct {
Hash string
Frags [][]*Node
}
func Serialize(n *Node) []*Node {
stream := make([]*Node, 0, 10)
serial(n, &stream)
return stream
}
func serial(n *Node, stream *[]*Node) int {
*stream = append(*stream, n)
var count int
for _, child := range n.Children {
count += serial(child, stream)
}
n.Owns = count
return count + 1
}
// FindSyntaxUnits finds all complete syntax units in the match group and returns them
// with the corresponding hash.
func FindSyntaxUnits(data []*Node, m suffixtree.Match, threshold int) Match {
if len(m.Ps) == 0 {
return Match{}
}
firstSeq := data[m.Ps[0] : m.Ps[0]+m.Len]
indexes := getUnitsIndexes(firstSeq, threshold)
// TODO: is this really working?
indexCnt := len(indexes)
if indexCnt > 0 {
lasti := indexes[indexCnt-1]
firstn := firstSeq[lasti]
for i := 1; i < len(m.Ps); i++ {
n := data[int(m.Ps[i])+lasti]
if firstn.Owns != n.Owns {
indexes = indexes[:indexCnt-1]
break
}
}
}
if len(indexes) == 0 || isCyclic(indexes, firstSeq) || spansMultipleFiles(indexes, firstSeq) {
return Match{}
}
match := Match{Frags: make([][]*Node, len(m.Ps))}
for i, pos := range m.Ps {
match.Frags[i] = make([]*Node, len(indexes))
for j, index := range indexes {
match.Frags[i][j] = data[int(pos)+index]
}
}
lastIndex := indexes[len(indexes)-1]
match.Hash = hashSeq(firstSeq[indexes[0] : lastIndex+firstSeq[lastIndex].Owns])
return match
}
func getUnitsIndexes(nodeSeq []*Node, threshold int) []int {
var indexes []int
var split bool
for i := 0; i < len(nodeSeq); {
n := nodeSeq[i]
switch {
case n.Owns >= len(nodeSeq)-i:
// not complete syntax unit
i++
split = true
continue
case n.Owns+1 < threshold:
split = true
default:
if split {
indexes = indexes[:0]
split = false
}
indexes = append(indexes, i)
}
i += n.Owns + 1
}
return indexes
}
// isCyclic finds out whether there is a repetive pattern in the found clone. If positive,
// it return false to point out that the clone would be redundant.
func isCyclic(indexes []int, nodes []*Node) bool {
cnt := len(indexes)
if cnt <= 1 {
return false
}
alts := make(map[int]bool)
for i := 1; i <= cnt/2; i++ {
if cnt%i == 0 {
alts[i] = true
}
}
for i := 0; i < indexes[cnt/2]; i++ {
nstart := nodes[i+indexes[0]]
AltLoop:
for alt := range alts {
for j := alt; j < cnt; j += alt {
index := i + indexes[j]
if index < len(nodes) {
nalt := nodes[index]
if nstart.Owns == nalt.Owns && nstart.Type == nalt.Type {
continue
}
} else if i >= indexes[alt] {
return true
}
delete(alts, alt)
continue AltLoop
}
}
if len(alts) == 0 {
return false
}
}
return true
}
func spansMultipleFiles(indexes []int, nodes []*Node) bool {
if len(indexes) < 2 {
return false
}
f := nodes[indexes[0]].Filename
for i := 1; i < len(indexes); i++ {
if nodes[indexes[i]].Filename != f {
return true
}
}
return false
}
func hashSeq(nodes []*Node) string {
h := sha1.New()
bytes := make([]byte, len(nodes))
for i, node := range nodes {
bytes[i] = byte(node.Type)
}
h.Write(bytes)
return string(h.Sum(nil))
}