1.导包
gopm get -g -v github.com/lestrrat-go/libxml2
2.使用示例
func ExampleHTML() {
res, err := http.Get("http://golang.org")
if err != nil {
panic("failed to get golang.org: " + err.Error())
}
doc, err := libxml2.ParseHTMLReader(res.Body)
if err != nil {
panic("failed to parse HTML: " + err.Error())
}
defer doc.Free()
doc.Walk(func(n types.Node) error {
log.Printf(n.NodeName())
return nil
})
nodes := xpath.NodeList(doc.Find(`//div[@id="menu"]/a`))
for i := 0; i < len(nodes); i++ {
log.Printf("Found node: %s", nodes[i].NodeName())
}
}
//bytes[]转io.Reader()的例子
package parser
import (
"bytes"
"fmt"
"github.com/lestrrat-go/libxml2"
"goproject/crawler/Fetcher"
"testing"
)
func TestParseCityList(t *testing.T) {
contents, err := Fetcher.Fetch("http://www.zhenai.com/zhenghun")
if err != nil {
panic(err)
}
//bytes[]转io.Reader()
doc, err := libxml2.ParseHTMLReader(bytes.NewReader(contents))
defer doc.Free()
nodes, err := doc.Find("//dl[@class='city-list clearfix']/dd/a")
fmt.Println(nodes.NodeList()[0].TextContent(), err)
}