用Golang与perl脚本比较, 初想至多差一倍吧...结果可不是一般的坑爹, 简直就是坑爷了.
Perl脚本
#!/bin/bash
source /etc/profile;
function extractAndZip(){
_debug "$FUNCNAME,$@";
local logFile="${2}"
local gzipFile="${1}"
perl -ne 'if(m/([^ ]*) \- ([^ ]*) \[([^ ]*) [\+\-][0-9]{4}\] \"(\-|(([^ ]*) )?([^\?\;\% ]*)([\?\;\%]([^ ]*))?( ([^\"]*))?)\" ([^ ]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([^ ]*) (\-|([^\-]+))/){printf("%s\001%s\001%s\001%s\001%s\001%s\001%s\001%s\001%s\001%s\001%s\001%s\001%s\001%s\n", ${1}, ${2}, ${3}, ${6}, ${7}, ${9}, ${11}, ${12}, ${13}, ${14}, ${15}, ${16}, ${17}*1000, ${19}*1000)}' ${logFile} | gzip > ${gzipFile};
}
extractAndZip "$@"
www-data@dc26:/data2/rsynclog/gotest$ time bash perl.sh result.gz 2014-06-17+yyexplorer+58.215.138.18+yyexplorer-access.log
/data/sa/profile_common: line 23: ulimit: open files: cannot modify limit: Operation not permitted
perl.sh: line 6: _debug: command not found
real 4m5.222s
user 5m54.630s
sys 0m9.720s
6分钟全部搞定...
golang代码:
package main
import (
"bufio"
"compress/gzip"
"fmt"
"os"
"regexp"
"strconv"
//"strings"
)
var recordRegExp = regexp.MustCompile(`([^ ]*) \- ([^ ]*) \[([^ ]*) [\+\-][0-9]{4}\] \"(\-|(([^ ]*) )?([^\?\;\% ]*)([\?\;\%]([^ ]*))?( ([^\"]*))?)\" ([^ ]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([^ ]*) (\-|([^\-]+))`)
func toInt(str string) int {
val, err := strconv.Atoi(str)
if err != nil {
return val
}
return 0
}
func main() {
if len(os.Args) < 3 {
fmt.Println("Usage:", os.Args[0], "<out_zip_file>", "<in_txt_file1...>")
os.Exit(1)
}
outZipFile, err := os.Create(os.Args[1])
if err != nil {
fmt.Errorf("错误:%s\n", err.Error())
os.Exit(1)
}
defer outZipFile.Close()
inTxtFiles := make([]*os.File, len(os.Args)-2)
for _, path := range os.Args[2:] {
file, err := os.Open(path)
if err != nil {
fmt.Errorf("错误:%s\n", err.Error())
os.Exit(1)
}
defer file.Close()
inTxtFiles = append(inTxtFiles, file)
}
zipIo := gzip.NewWriter(outZipFile)
defer zipIo.Close()
out := bufio.NewWriter(zipIo)
for _, file := range inTxtFiles {
scan := bufio.NewScanner(file)
for scan.Scan() {
line := scan.Bytes()
items := recordRegExp.FindSubmatch(line)
out.Write(items[1])
out.Write([]byte("\t"))
out.Write(items[2])
out.Write([]byte("\t"))
out.Write(items[3])
out.Write([]byte("\t"))
out.Write(items[6])
out.Write([]byte("\t"))
out.Write(items[7])
out.Write([]byte("\t"))
out.Write(items[9])
out.Write([]byte("\t"))
out.Write(items[11])
out.Write([]byte("\t"))
out.Write(items[12])
out.Write([]byte("\t"))
out.Write(items[13])
out.Write([]byte("\t"))
out.Write(items[14])
out.Write([]byte("\t"))
out.Write(items[15])
out.Write([]byte("\t"))
out.Write(items[16])
out.Write([]byte("\t"))
out.Write([]byte(strconv.Itoa(toInt(string(items[17])) * 1000)))
out.Write([]byte("\t"))
out.Write([]byte(strconv.Itoa(toInt(string(items[19])) * 1000)))
out.Write([]byte("\n"))
}
out.Flush()
}
}
结果手工kill时:
16m才完成了3分之1左右...坑你爷了...