美文网首页
使用golang爬取 斗鱼 所有颜值主播照片

使用golang爬取 斗鱼 所有颜值主播照片

作者: FredricZhu | 来源:发表于2020-02-01 15:14 被阅读0次
package main

import (
    "fmt"
    "io"
    "net/http"
    "os"
    "regexp"
    "strconv"
    "strings"
)

func HttpGet(url string) (result string, err error) {

    client := &http.Client{}
    req, err1 := http.NewRequest("GET", url, nil)

    if err1 != nil {
        err = err1
        return
    }

    resp, err2 := client.Do(req)
    if err2 != nil {
        err = err2
        return
    }

    defer resp.Body.Close()

    buf := make([]byte, 4096)
    for {
        n, err3 := resp.Body.Read(buf)
        if n == 0 {
            break
        }

        if err3 != nil && err3 != io.EOF {
            err = err3
            return
        }
        result += string(buf[:n])
    }

    return
}

func SaveImage(imageName, url string) {
    f, err := os.Create(imageName + ".jpg")
    if err != nil {
        fmt.Println("os.Create error:", err)
        return
    }

    defer f.Close()

    client := &http.Client{}
    req, err := http.NewRequest("GET", url, nil)

    if err != nil {
        fmt.Println("http.NewRequest error:", err)
        return
    }

    resp, err := client.Do(req)
    if err != nil {
        fmt.Println("client.Do error:", err)
        return
    }

    defer resp.Body.Close()

    buf := make([]byte, 4096)
    for {
        n, err3 := resp.Body.Read(buf)
        if n == 0 {
            break
        }

        if err3 != nil && err3 != io.EOF {
            err = err3
            return
        }
        f.Write(buf[:n])
    }
}

func SaveImages2File(imageNameArr, imageURLArr []string) {
    n := len(imageNameArr)

    for i := 0; i < n; i++ {
        imageName := imageNameArr[i]
        imageName = strings.Replace(imageName, "?", "", -1)
        SaveImage(imageName, imageURLArr[i])
    }
}

func SpidePage(idx int, page chan int) {
    url := "https://www.douyu.com/gapi/rknc/directory/yzRec/" + strconv.Itoa(idx)
    result, err := HttpGet(url)
    if err != nil {
        fmt.Println("HttpGet error:", err)
        return
    }

    imageURLExp := regexp.MustCompile(`"rs1":"(?s:(.*?))/dy2"`)
    imageURLs := imageURLExp.FindAllStringSubmatch(result, -1)

    imageNameExp := regexp.MustCompile(`"nn":"(?s:(.*?))"`)
    imageNames := imageNameExp.FindAllStringSubmatch(result, -1)

    n := len(imageURLs)
    imageNameArr := make([]string, 0)
    imageURLArr := make([]string, 0)

    for i := 0; i < n; i++ {
        imageNameArr = append(imageNameArr, imageNames[i][1])
        imageURLArr = append(imageURLArr, imageURLs[i][1])
    }

    SaveImages2File(imageNameArr, imageURLArr)
    page <- idx
}

func toWork(start, end int) {
    fmt.Printf("正在爬取第%d页到第%d页\n", start, end)
    page := make(chan int)
    for i := start; i <= end; i++ {
        go SpidePage(i, page)
    }

    for i := start; i <= end; i++ {
        fmt.Printf("第%d页爬取完毕\n", <-page)
    }
}

func main() {
    var start, end int
    fmt.Print("请输入开始爬取的起始页(>=1):")
    fmt.Scan(&start)
    fmt.Print("请输入结束爬取的终止页(>=start):")
    fmt.Scan(&end)
    toWork(start, end)
}

爬取完毕的效果如下(加了滤镜颜值也不咋地),

image.png image.png

相关文章

网友评论

      本文标题:使用golang爬取 斗鱼 所有颜值主播照片

      本文链接:https://www.haomeiwen.com/subject/cpcxxhtx.html