1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| import requests import re import csv
url = "https://movie.douban.com/top250" headers = { "User-Agent":": Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36" } obj = re.compile(r'<li>.*?<span class="title">(?P<name>.*?)</span>.*?' r'<p class="">(?P<actors>.*?)</p>.*?' r'<span class="rating_num" property="v:average">(?P<score>.*?)</span>.*?', re.S)
with open("peaTop250.csv", mode="w", encoding="utf-8") as f: num = 0 while(num<250): URL = url + "?start={}&filter=".format(num) print(URL) content = requests.get(url=URL, headers=headers).text res = obj.finditer(content) for i in res: f.write("作者:" + i.group("name") + "\n") f.write(i.group("actors").replace(" ",'').replace('\n','').replace(" ",' ') + "\n") f.write("评分:" + i.group("score") + "\n") num+=25
f.close() print("over")
|