1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
| package main
import ( "fmt" "log" "net/http" "os" "strconv" "strings"
"github.com/PuerkitoBio/goquery" )
func main() {
file, err := os.Create("Douban-Book-TOP250") if err != nil { fmt.Println(err) } defer file.Close()
var client = http.Client{}
for i := 0; i < 250; i += 25 { req, _ := http.NewRequest("GET", "https://book.douban.com/top250?start="+strconv.Itoa(i), nil) req.Header.Set("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)")
resp, err := client.Do(req) if err != nil { fmt.Println("http get error", err) return } defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { log.Fatal(err) }
doc.Find("div.indent>table>tbody>tr.item").Each(func(i int, s *goquery.Selection) { item := s.Find("td[valign=top]")
bookName := strings.Replace(strings.Replace(item.Find("div.pl2>a").Text(), "\n", "", -1), " ", "", -1)
author := strings.Split(s.Find("p.pl").Text(), "/")[0]
quote := strings.Replace(strings.Replace(s.Find("p.quote").Text(), "\n", "", -1), " ", "", -1)
bookName = bookName + strings.Repeat(" ", (120-len(bookName))) author = author + strings.Repeat(" ", (50-len(author)))
content := "TOP" + strconv.Itoa(i) + "\t" + bookName + author + quote + "\n"
file.WriteString(content) }) }
fmt.Print(" 程序执行完毕,请查看结果。")
}
|