247 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			247 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
package misspell
 | 
						|
 | 
						|
import (
 | 
						|
	"bufio"
 | 
						|
	"bytes"
 | 
						|
	"io"
 | 
						|
	"regexp"
 | 
						|
	"strings"
 | 
						|
	"text/scanner"
 | 
						|
)
 | 
						|
 | 
						|
func max(x, y int) int {
 | 
						|
	if x > y {
 | 
						|
		return x
 | 
						|
	}
 | 
						|
	return y
 | 
						|
}
 | 
						|
 | 
						|
func inArray(haystack []string, needle string) bool {
 | 
						|
	for _, word := range haystack {
 | 
						|
		if needle == word {
 | 
						|
			return true
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
var wordRegexp = regexp.MustCompile(`[a-zA-Z0-9']+`)
 | 
						|
 | 
						|
// Diff is datastructure showing what changed in a single line
 | 
						|
type Diff struct {
 | 
						|
	Filename  string
 | 
						|
	FullLine  string
 | 
						|
	Line      int
 | 
						|
	Column    int
 | 
						|
	Original  string
 | 
						|
	Corrected string
 | 
						|
}
 | 
						|
 | 
						|
// Replacer is the main struct for spelling correction
 | 
						|
type Replacer struct {
 | 
						|
	Replacements []string
 | 
						|
	Debug        bool
 | 
						|
	engine       *StringReplacer
 | 
						|
	corrected    map[string]string
 | 
						|
}
 | 
						|
 | 
						|
// New creates a new default Replacer using the main rule list
 | 
						|
func New() *Replacer {
 | 
						|
	r := Replacer{
 | 
						|
		Replacements: DictMain,
 | 
						|
	}
 | 
						|
	r.Compile()
 | 
						|
	return &r
 | 
						|
}
 | 
						|
 | 
						|
// RemoveRule deletes existings rules.
 | 
						|
// TODO: make inplace to save memory
 | 
						|
func (r *Replacer) RemoveRule(ignore []string) {
 | 
						|
	newwords := make([]string, 0, len(r.Replacements))
 | 
						|
	for i := 0; i < len(r.Replacements); i += 2 {
 | 
						|
		if inArray(ignore, r.Replacements[i]) {
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		newwords = append(newwords, r.Replacements[i:i+2]...)
 | 
						|
	}
 | 
						|
	r.engine = nil
 | 
						|
	r.Replacements = newwords
 | 
						|
}
 | 
						|
 | 
						|
// AddRuleList appends new rules.
 | 
						|
// Input is in the same form as Strings.Replacer: [ old1, new1, old2, new2, ....]
 | 
						|
// Note: does not check for duplictes
 | 
						|
func (r *Replacer) AddRuleList(additions []string) {
 | 
						|
	r.engine = nil
 | 
						|
	r.Replacements = append(r.Replacements, additions...)
 | 
						|
}
 | 
						|
 | 
						|
// Compile compiles the rules.  Required before using the Replace functions
 | 
						|
func (r *Replacer) Compile() {
 | 
						|
 | 
						|
	r.corrected = make(map[string]string, len(r.Replacements)/2)
 | 
						|
	for i := 0; i < len(r.Replacements); i += 2 {
 | 
						|
		r.corrected[r.Replacements[i]] = r.Replacements[i+1]
 | 
						|
	}
 | 
						|
	r.engine = NewStringReplacer(r.Replacements...)
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
line1 and line2 are different
 | 
						|
extract words from each line1
 | 
						|
 | 
						|
replace word -> newword
 | 
						|
if word == new-word
 | 
						|
  continue
 | 
						|
if new-word in list of replacements
 | 
						|
  continue
 | 
						|
new word not original, and not in list of replacements
 | 
						|
  some substring got mixed up.  UNdo
 | 
						|
*/
 | 
						|
func (r *Replacer) recheckLine(s string, lineNum int, buf io.Writer, next func(Diff)) {
 | 
						|
	first := 0
 | 
						|
	redacted := RemoveNotWords(s)
 | 
						|
 | 
						|
	idx := wordRegexp.FindAllStringIndex(redacted, -1)
 | 
						|
	for _, ab := range idx {
 | 
						|
		word := s[ab[0]:ab[1]]
 | 
						|
		newword := r.engine.Replace(word)
 | 
						|
		if newword == word {
 | 
						|
			// no replacement done
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// ignore camelCase words
 | 
						|
		// https://github.com/client9/misspell/issues/113
 | 
						|
		if CaseStyle(word) == CaseUnknown {
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		if StringEqualFold(r.corrected[strings.ToLower(word)], newword) {
 | 
						|
			// word got corrected into something we know
 | 
						|
			io.WriteString(buf, s[first:ab[0]])
 | 
						|
			io.WriteString(buf, newword)
 | 
						|
			first = ab[1]
 | 
						|
			next(Diff{
 | 
						|
				FullLine:  s,
 | 
						|
				Line:      lineNum,
 | 
						|
				Original:  word,
 | 
						|
				Corrected: newword,
 | 
						|
				Column:    ab[0],
 | 
						|
			})
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		// Word got corrected into something unknown. Ignore it
 | 
						|
	}
 | 
						|
	io.WriteString(buf, s[first:])
 | 
						|
}
 | 
						|
 | 
						|
// ReplaceGo is a specialized routine for correcting Golang source
 | 
						|
// files.  Currently only checks comments, not identifiers for
 | 
						|
// spelling.
 | 
						|
func (r *Replacer) ReplaceGo(input string) (string, []Diff) {
 | 
						|
	var s scanner.Scanner
 | 
						|
	s.Init(strings.NewReader(input))
 | 
						|
	s.Mode = scanner.ScanIdents | scanner.ScanFloats | scanner.ScanChars | scanner.ScanStrings | scanner.ScanRawStrings | scanner.ScanComments
 | 
						|
	lastPos := 0
 | 
						|
	output := ""
 | 
						|
Loop:
 | 
						|
	for {
 | 
						|
		switch s.Scan() {
 | 
						|
		case scanner.Comment:
 | 
						|
			origComment := s.TokenText()
 | 
						|
			newComment := r.engine.Replace(origComment)
 | 
						|
 | 
						|
			if origComment != newComment {
 | 
						|
				// s.Pos().Offset is the end of the current token
 | 
						|
				// subtract len(origComment) to get the start of the token
 | 
						|
				offset := s.Pos().Offset
 | 
						|
				output = output + input[lastPos:offset-len(origComment)] + newComment
 | 
						|
				lastPos = offset
 | 
						|
			}
 | 
						|
		case scanner.EOF:
 | 
						|
			break Loop
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	if lastPos == 0 {
 | 
						|
		// no changes, no copies
 | 
						|
		return input, nil
 | 
						|
	}
 | 
						|
	if lastPos < len(input) {
 | 
						|
		output = output + input[lastPos:]
 | 
						|
	}
 | 
						|
	diffs := make([]Diff, 0, 8)
 | 
						|
	buf := bytes.NewBuffer(make([]byte, 0, max(len(input), len(output))+100))
 | 
						|
	// faster that making a bytes.Buffer and bufio.ReadString
 | 
						|
	outlines := strings.SplitAfter(output, "\n")
 | 
						|
	inlines := strings.SplitAfter(input, "\n")
 | 
						|
	for i := 0; i < len(inlines); i++ {
 | 
						|
		if inlines[i] == outlines[i] {
 | 
						|
			buf.WriteString(outlines[i])
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		r.recheckLine(inlines[i], i+1, buf, func(d Diff) {
 | 
						|
			diffs = append(diffs, d)
 | 
						|
		})
 | 
						|
	}
 | 
						|
 | 
						|
	return buf.String(), diffs
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
// Replace is corrects misspellings in input, returning corrected version
 | 
						|
//  along with a list of diffs.
 | 
						|
func (r *Replacer) Replace(input string) (string, []Diff) {
 | 
						|
	output := r.engine.Replace(input)
 | 
						|
	if input == output {
 | 
						|
		return input, nil
 | 
						|
	}
 | 
						|
	diffs := make([]Diff, 0, 8)
 | 
						|
	buf := bytes.NewBuffer(make([]byte, 0, max(len(input), len(output))+100))
 | 
						|
	// faster that making a bytes.Buffer and bufio.ReadString
 | 
						|
	outlines := strings.SplitAfter(output, "\n")
 | 
						|
	inlines := strings.SplitAfter(input, "\n")
 | 
						|
	for i := 0; i < len(inlines); i++ {
 | 
						|
		if inlines[i] == outlines[i] {
 | 
						|
			buf.WriteString(outlines[i])
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		r.recheckLine(inlines[i], i+1, buf, func(d Diff) {
 | 
						|
			diffs = append(diffs, d)
 | 
						|
		})
 | 
						|
	}
 | 
						|
 | 
						|
	return buf.String(), diffs
 | 
						|
}
 | 
						|
 | 
						|
// ReplaceReader applies spelling corrections to a reader stream.  Diffs are
 | 
						|
// emitted through a callback.
 | 
						|
func (r *Replacer) ReplaceReader(raw io.Reader, w io.Writer, next func(Diff)) error {
 | 
						|
	var (
 | 
						|
		err     error
 | 
						|
		line    string
 | 
						|
		lineNum int
 | 
						|
	)
 | 
						|
	reader := bufio.NewReader(raw)
 | 
						|
	for err == nil {
 | 
						|
		lineNum++
 | 
						|
		line, err = reader.ReadString('\n')
 | 
						|
 | 
						|
		// if it's EOF, then line has the last line
 | 
						|
		// don't like the check of err here and
 | 
						|
		// in for loop
 | 
						|
		if err != nil && err != io.EOF {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
		// easily 5x faster than regexp+map
 | 
						|
		if line == r.engine.Replace(line) {
 | 
						|
			io.WriteString(w, line)
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		// but it can be inaccurate, so we need to double check
 | 
						|
		r.recheckLine(line, lineNum, w, next)
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 |