美文网首页
爬取蔬菜网(完整版)

爬取蔬菜网(完整版)

作者: My_sh | 来源:发表于2022-04-21 08:03 被阅读0次

import urllib.request

import urllib.parse

import json

from lxmlimport etree

def load_page(url):

headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.360.3497.92 Safari/537.36"

              }

request = urllib.request.Request(url, headers=headers)

proxies = {'http':'39.130.150.44:80'}

proxy_handler = urllib.request.ProxyHandler(proxies)

opener = urllib.request.build_opener(proxy_handler)

html=opener.open(request).read().decode('utf-8')

#print(html)

    parse(html)

def Write_file(veg_items):

for contentin veg_items:

with open('veg.json','a',encoding='UTF-8')as file:

file.write(json.dumps(content,ensure_ascii=false))

file.write("\n")

def parse(html):

parse = etree.HTML(html)

#查找符合条件的标签

    node_list = parse.xpath('//div[@class="pri_k"]')

print(node_list)

for nodein node_list:

print(node)

try:

veg_date = node.xpath(".//span[@class='k_1']/text()")

print(veg_date)

veg_breed = node.xpath(".//span[2]/text()")

print(veg_breed)

veg_terminal_market_title = node.xpath(".//span[@class='k_3']/a/text()")

print(veg_terminal_market_title)

veg_terminal_market_href = node.xpath(".//span[@class='k_3']/a/href")

print(veg_terminal_market_href)

veg_price_low = node.xpath(".//span[4]/text()")

print(veg_price_low)

veg_price_high = node.xpath(".//span[5]/text()")

print(veg_price_high)

veg_price_avg = node.xpath(".//span[6]/text()")

print(veg_price_avg)

veg_unit = node.xpath(".//span[7]/text()")

print(veg_unit)

veg_items = []

for iin range(0, len(veg_date)):

veg_item = {}

veg_item['日期'] = veg_date[i]

veg_item[''] = veg_breed[i]

veg_item[''] = veg_terminal_market_title[i]

veg_item[''] = veg_terminal_market_href[i]

veg_item[''] = veg_price_low[i].replace(" ", "").replace("\n","").replace("\r","")

veg_item[''] = veg_price_high[i]

veg_item[''] = veg_price_avg[i]

veg_item[''] = veg_unit[i]

veg_item.append(veg_item)

# print(veg_items)

            Write_file(veg_items)

except:

pass

if __name__=="__main__":

url="http://www.vegnet.com.cn/Price/List.html"

    load_page(url)

相关文章

网友评论

      本文标题:爬取蔬菜网(完整版)

      本文链接:https://www.haomeiwen.com/subject/zxpvertx.html