import urllib.request
import urllib.parse
def main():
# 开始页码
start_page = int(input("开始页码:"))
# 结束页码
end_page = int(input("结束页码:"))
# 处理url
url = 'https://movie.douban.com/top250?'
# 每页显示的个是25个
for page in range(start_page, end_page + 1):
request = handle_request(url, page)
handle_response(request)
# 处理url
def handle_request(url, page):
page = (page - 1) * 25
data = {
'start': page
}
# 对data数据进行处理
data = urllib.parse.urlencode(data)
# 拼接路径
url += data
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
request = urllib.request.Request(url=url, headers=headers)
return request
def handle_response(request):
response = urllib.request.urlopen(request)
html = response.read().decode("utf8")
print(html)
if __name__ == '__main__':
main()










网友评论