可运行的最小版本,已准备好用于新模块:
package main
import (
"encoding/csv"
"encoding/json"
"fmt"
"log"
"os"
"strings"
"github.com/gocolly/colly/v2"
)
type tableData struct {
Name, Position, Office, Age, StartDate, Salary string
}
func main() {
var rows []tableData
c := colly.NewCollector()
c.OnHTML("table#example > tbody", func(h *colly.HTMLElement) {
h.ForEach("tr", func(_ int, el *colly.HTMLElement) {
rows = append(rows, tableData{
Name: strings.TrimSpace(el.ChildText("td:nth-child(1)")),
Position: strings.TrimSpace(el.ChildText("td:nth-child(2)")),
Office: strings.TrimSpace(el.ChildText("td:nth-child(3)")),
Age: strings.TrimSpace(el.ChildText("td:nth-child(4)")),
StartDate: strings.TrimSpace(el.ChildText("td:nth-child(5)")),
Salary: strings.TrimSpace(el.ChildText("td:nth-child(6)")),
})
})
})
if err := c.Visit("https://datatables.net/examples/styling/display.html"); err != nil {
log.Fatal(err)
}
j, _ := json.MarshalIndent(rows, "", " ")
_ = os.WriteFile("employees.json", j, 0644)
f, _ := os.Create("employees.csv")
defer f.Close()
w := csv.NewWriter(f)
defer w.Flush()
_ = w.Write([]string{"Name", "Position", "Office", "Age", "StartDate", "Salary"})
for _, r := range rows {
_ = w.Write([]string{r.Name, r.Position, r.Office, r.Age, r.StartDate, r.Salary})
}
fmt.Println("scraped:", len(rows), "rows")
}
本文撰写时已在 Go 1.22 搭配 Colly v2 环境下测试通过。当您不再仅限于演示 URL 时,请逐步集成速率限制、代理切换器及用户代理扩展功能。我们关于使用 Go 进行网页抓取的全面指南涵盖了相关工具链。