在使用etree.HTML 发现报如下错误
from lxml import etree
selector = etree.HTML(text)

通过使用lxml 中的html避免该错误
from lxml import html
selector = html.etree.HTML(text)
另附获取baidu title测试
#!/usr/bin/python3
import requests
from lxml import html
import time
#//img[@id="mainphoto"]'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome'
'49.0.2623.112 Safari/537.36'
}
def get_baidu():
url = 'https://www.baidu.com'
resp = requests.get(url, headers=headers, timeout=8)
text = resp.content.decode('utf-8')
selector = html.etree.HTML(text)
print('selector',selector)
title = selector.xpath('/html/body/div[1]/div[1]/div[5]/div/div/div[1]/map/area/@title')
print('title',title)
if __name__ == '__main__':
save_amazon_html()
网友评论