from pyquery import PyQuery as pq
import requests
import pandas as pd
results = []
def get_one_page(offset):
url = 'https://maoyan.com/board/4?offset='+str(offset)
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
}
html = requests.get(url,headers=headers).text
doc = pq(html)
top =doc(".board-wrapper dd")
for item in top.items():
rank = item('.board-index').text()
title = item('.name').text()
actors = item('.star').text().strip()[3:]
time = item.find('.releasetime').text().strip()[5:]
score = item.find('.score').text()
result_list = [int(rank),title,actors,time,float(score)]
results.append(result_list)
def main():
for i in range(10):
get_one_page(i*10)
dt = pd.DataFrame(results,columns=['rank','title','actors','time','score'])
main()
关于pyquery的使用,请查看pyquery简介
网友评论