美文网首页
使用tkinter制作爬虫GUI程序

使用tkinter制作爬虫GUI程序

作者: 天雨流芳hodo | 来源:发表于2019-08-21 09:17 被阅读0次

1.如何使用tkinter调用系统路径
2.构造url,参数化图片分类、抓取页数
3.使用tkinter获取输入参数传给执行代码
4.利用tkinter模块做成GUI程序
直接上代码

# coding:utf-8
import tkinter as tk
from tkinter import ttk
from tkinter.filedialog import askdirectory
import requests
import re
import os

"""图形界面爬取图片cj20190807"""


class BXS():
    def __init__(self):
        self.window = tk.Tk()
        self.window.title("爬取不羞涩图片")
        self.window.geometry("580x580")

        self.label1 = tk.Label(self.window, text='存放路径')
        self.label2 = tk.Label(self.window, text='选择分类')
        self.label3 = tk.Label(self.window, text='爬取页数')

        # 创建一个文本展示框,并设置尺寸
        self.info = tk.Text(self.window,width=70)

        # 下拉选择按钮:爬取类型
        self.num1 = tk.StringVar()
        self.cmb1 = ttk.Combobox(self.window, textvariable=self.num1)

        # 设置下拉菜单中的值
        self.cmb1['values'] = ('大胸妹', '小翘臀', '黑丝袜', '美腿控', '有颜值', '大杂烩')
        self.cmb1.current(0)  # 设置默认值,即默认下拉框中的内容,默认值中的内容为索引,从0开始

        # 下拉选择按钮:爬取页数
        self.num2 = tk.StringVar()
        self.cmb2 = ttk.Combobox(self.window, textvariable=self.num2)

        # 设置下拉菜单中的值
        self.cmb2['values'] = (1, 2, 3, 4, 5)
        self.cmb2.current(0)  # 设置默认值,即默认下拉框中的内容,默认值中的内容为索引,从0开始

        # 输入框,显示选择的路径
        self.path = tk.StringVar()
        self.e = tk.Entry(self.window, show=None, width=8, textvariable=self.path)

        # button按钮
        self.t1_button = tk.Button(self.window, text='选择路径', relief=tk.RAISED, width=8, height=1, command=lambda:self.select_Path())

        # 清空输入框
        self.t2_button = tk.Button(self.window, text='清空路径', relief=tk.RAISED, width=8, height=1, command=lambda:self.clear_Path())

        # 提取按钮
        self.run_button = tk.Button(self.window,text="开始提取",relief=tk.RAISED,command=lambda:self.run())


    def gui(self):
        self.label1.grid(row=1, column=1, padx=10, pady=10, ipadx=10, ipady=10)
        self.label2.grid(row=2, column=1, padx=10, pady=20, ipadx=10, ipady=10)
        self.cmb1.grid(row=2, column=2, padx=10, pady=20, ipadx=30, ipady=8)
        self.label3.grid(row=3, column=1, padx=10, pady=10, ipadx=10, ipady=10)
        self.cmb2.grid(row=3, column=2, padx=10, pady=20, ipadx=30, ipady=8)
        self.e.grid(row=1, column=2, padx=1, pady=10, ipadx=130, ipady=8)
        self.t1_button.grid(row=1, column=3, padx=10)
        self.t2_button.grid(row=1, column=4)
        self.run_button.grid(row=2, column=3)
        self.info.grid(row=4, rowspan=5, column=1, columnspan=4)



    def select_Path(self):
        """选取本地路径"""
        filepath = askdirectory()
        self.path.set(filepath)

    def clear_Path(self):
        """清空输入框中路径"""
        self.e.delete(0, "end")  # 从第一行清除到最后一行

    def get_cid(self):
        if self.cmb1.get() == "大胸妹":
            cid = 2
        if self.cmb1.get() == "小翘臀":
            cid = 6
        if self.cmb1.get() == "黑丝袜":
            cid = 7
        if self.cmb1.get() == "美腿控":
            cid = 3
        if self.cmb1.get() == "有颜值":
            cid = 4
        if self.cmb1.get() == "大杂烩":
            cid = 5
        return cid

    def get_pagenum(self):
        return self.cmb2.get()

    def get_url(self):
        base_url = "https://www.dbmeinv.com/index.htm?"
        url_list = []
        for i in range(int(self.get_pagenum())):
            url = base_url + "cid=" + str(self.get_cid()) + "&pager_offset=" + str(self.get_pagenum())
            url_list.append(url)
        return url_list

    def get_dictdata(self,url):
        # url = get_url()
        result = requests.get(url=url).text

        name_pattern = re.compile(r'<img class="height_min".*?title="(.*?)"', re.S)
        src_pattern = re.compile(r'<img class="height_min".*?src="(.*?.jpg)"', re.S)
        name = re.findall(name_pattern, result)  # 提取title
        scr = re.findall(src_pattern, result)  # 提取图片url
        d = []
        for i in range(len(name)):
            d.append([name[i], scr[i]])
        return d  # 列表转换为字典,key相同则只能保存一个链接,所以这里不转换为字典
        # print(d)
        # dictdata = dict(d)#转换为字典
        # return dictdata

    def get_content(self,url):
        try:
            r = requests.get(url)
            if r.status_code == 200:
                return r.content
            else:
                return None
        except Exception as e:
            return None

    def download(self, url):
        a = self.e.get()
        print(a)

        # for t in get_dictdata(url).items():
        for t in self.get_dictdata(url):
            try:
                # title可能相同,所以文件名取了链接后四位作随机数
                file_path = self.e.get() + "\\" + t[0] + t[1][-8:-4] + ".jpg"
                print("正在下载:" + t[0] + " " + t[1])
                self.info.insert('end',"正在下载:" + t[0] + " " + t[1]+'\n')
                if not os.path.exists(file_path):  # 如果不存在则下载写文件
                    with open(file_path, "wb") as f:
                        f.write(self.get_content(t[1]))
            except Exception:
                continue

    def run(self):
        for i in self.get_url():
            self.download(i)


def main():
    t = BXS()
    t.gui()
    tk.mainloop()


if __name__ == '__main__':
    main()


相关文章

网友评论

      本文标题:使用tkinter制作爬虫GUI程序

      本文链接:https://www.haomeiwen.com/subject/howvsctx.html