import urllib.request
import urllib.parse
import json
from lxmlimport etree
def load_page(url):
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.360.3497.92 Safari/537.36"
}
request = urllib.request.Request(url, headers=headers)
proxies = {'http':'39.130.150.44:80'}
proxy_handler = urllib.request.ProxyHandler(proxies)
opener = urllib.request.build_opener(proxy_handler)
html=opener.open(request).read().decode('utf-8')
#print(html)
parse(html)
def Write_file(veg_items):
for contentin veg_items:
with open('veg.json','a',encoding='UTF-8')as file:
file.write(json.dumps(content,ensure_ascii=false))
file.write("\n")
def parse(html):
parse = etree.HTML(html)
#查找符合条件的标签
node_list = parse.xpath('//div[@class="pri_k"]')
print(node_list)
for nodein node_list:
print(node)
try:
veg_date = node.xpath(".//span[@class='k_1']/text()")
print(veg_date)
veg_breed = node.xpath(".//span[2]/text()")
print(veg_breed)
veg_terminal_market_title = node.xpath(".//span[@class='k_3']/a/text()")
print(veg_terminal_market_title)
veg_terminal_market_href = node.xpath(".//span[@class='k_3']/a/href")
print(veg_terminal_market_href)
veg_price_low = node.xpath(".//span[4]/text()")
print(veg_price_low)
veg_price_high = node.xpath(".//span[5]/text()")
print(veg_price_high)
veg_price_avg = node.xpath(".//span[6]/text()")
print(veg_price_avg)
veg_unit = node.xpath(".//span[7]/text()")
print(veg_unit)
veg_items = []
for iin range(0, len(veg_date)):
veg_item = {}
veg_item['日期'] = veg_date[i]
veg_item[''] = veg_breed[i]
veg_item[''] = veg_terminal_market_title[i]
veg_item[''] = veg_terminal_market_href[i]
veg_item[''] = veg_price_low[i].replace(" ", "").replace("\n","").replace("\r","")
veg_item[''] = veg_price_high[i]
veg_item[''] = veg_price_avg[i]
veg_item[''] = veg_unit[i]
veg_item.append(veg_item)
# print(veg_items)
Write_file(veg_items)
except:
pass
if __name__=="__main__":
url="http://www.vegnet.com.cn/Price/List.html"
load_page(url)











网友评论