golang社工库数据扫描程序

时间:2021-07-24 14:09:33

https://github.com/xiaojiong/scanfile

演示站点: http://www.weigongkai.com/   7G数据 2s完成扫描

package scanfile

/*
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int IndexStr(char *haystack, char *needle,unsigned int begin) {
 char *p = strstr(haystack+begin, needle);
 if (p)
 return p - haystack;
 return -1;
}

int IndexChar(char *haystack, char c,unsigned int begin) {
 char *p = haystack = haystack + begin;

 while(*p != '') {
 if(*p == c) {
 return p - haystack;
}
++p;
}
 return -1;
}

int LastIndexChar(char *haystack, char c,unsigned int begin) {
 int len = strlen(haystack);
 if(begin > 0) {
 if (begin > len) {
 return -1;
}
 } else {
 begin = len - 1;

}

 haystack +=begin;
 while(1) {
 if(*haystack == c) {
 return begin;
}
 if(begin == 0) {
 return -1;
}
--haystack;
--begin;
}
 return -1;
}
*/
import"C"
import"unsafe"

func strScan(str *string, key *string, counter *Counter) []string {
 begin := 0
 CStr := C.CString(*str)
 Ckey := C.CString(*key)

 defer func() {
C.free(unsafe.Pointer(CStr))
C.free(unsafe.Pointer(Ckey))
}()

 var res []string

 for {
 var index int = 0
 if index = int(C.IndexStr(CStr, Ckey, C.uint(begin))); index == -1 {
break
}
 var startIndex int = 0
 if index > 0 {
 if pos := int(C.LastIndexChar(CStr, 'n', C.uint(index))); pos != -1 {
 startIndex = pos + 1
}
}
 var endIndex int = len(*str)
 if pos := int(C.IndexChar(CStr, 'n', C.uint(index))); pos != -1 {
 endIndex = pos + index
}
 begin = endIndex

 if counter.IsMax() {
break
}
 res = append(res, (*str)[startIndex:endIndex])
counter.Add()
 if begin == len(*str) {
break
}
}
 return res
}
package scanfile

import (
"io"
"os"
"sync"
)

var LineFeed = byte('n') //文本换行符标识
var BufSize = 1024 * 1024 // buf大小

func Scan(files []string, searchStr *string) string {

 var result ScanResult
//计数器
 counter := InitCounter(10)

//扫描结果输出通道
 out := make(chan *FileRes, 10)

 fileCount := len(files)

 for i := 0; i < fileCount; i++ {
 go ScanFile(files[i], searchStr, counter, out)
}

 for i := 0; i < fileCount; i++ {
result.AddFileRes(<-out)
}

result.AddCounter(counter)
 return result.ToJson()
}

func ScanFile(fileName string, searchStr *string, counter *Counter, out chan *FileRes) {
 //文件 IO
 fileContentChan := fileRead(fileName, counter)

 fileRes := InitFileRes(fileName)

 //使用多路复用 wg防止线程泄漏
 wg := sync.WaitGroup{}
 for i := 0; i < 3; i++ {
wg.Add(1)
 go func() {
 for {
 if text, ok := <-fileContentChan; ok {
 if counter.IsMax() {
//清空未读取channel
clearFileContentChan(fileContentChan)
break
 } else {
 if counter.IsMax() {
break
}
 rs := strScan(text, searchStr, counter)
 for i := 0; i < len(rs); i++ {
fileRes.Add(rs[i])
}
}
 } else {
break
}
}
wg.Done()
}()
}
wg.Wait()
 out <- fileRes
}

func clearFileContentChan(c chan *string) {
 for {
 if _, ok := <-c; ok == false {
break
}
}
}

func fileRead(fileName string, counter *Counter) chan *string {
 fileContentChan := make(chan *string, 5)
 go func() {
 fh, err := os.Open(fileName)
 if err != nil {
panic(err)
}

//异常处理
 defer fh.Close()

 buf := make([]byte, BufSize)

 var start int64
 fh.Seek(start, 0)
 for {
 //超过计数器最大返回值 跳出程序
 if counter.IsMax() {
break
}
 n, err := fh.Read(buf)
 if err != nil && err != io.EOF {
panic(err)
}
 if n == 0 {
break
}

 l := lastByteIndex(buf, LineFeed)
 content := string(buf[0 : l+1])
 start += int64(l + 1)
 fh.Seek(start, 0)
 fileContentChan <- &content
}
close(fileContentChan)
}()
 return fileContentChan
}

func lastByteIndex(s []byte, sep byte) int {
 for i := len(s) - 1; i >= 0; i-- {
 if s[i] == sep {
 return i
}
}
 return -1
}