Python脚本
基于**阁网站写的一个Python爬虫
代码如下:
import requests
from bs4 import BeautifulSoup
def simple_crawel(url):
html = requests.get(url).content.decode('utf-8')
soup = BeautifulSoup(html, 'lxml')
content = soup.find('div',id='content').get_text()
f = open("./sancunrenjian.txt",'a',encoding='utf-8')
f.write(content)
def get_all():
html = requests.get(URL).content.decode('utf-8')
html1 = html.replace("\n", "").replace('\t','')
soup = BeautifulSoup(html1, 'lxml')
list = soup.find('div',id='list').dl.children
for dd in list:
try:
f = open("./sancunrenjian.txt", 'a', encoding='utf-8')
f.write('\n\n\n'+dd.a.get_text()+'\n')
print(dd.a.get_text())
xr = base_url+dd.a.get('href')
simple_crawel(xr)
except:
print("")
if __name__ == '__main__':
get_all()
Python是真的强大,不到30行代码就完成了
网友评论