通过Beautiful定位标签,获取图片链接,仅限于图片直接内嵌于网页源代码中,有的网站图片链接藏在js文件,无法爬取
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
import requests from bs4 import BeautifulSoup
domain = "https://umei.cc/katongdongman/dongmantupian/" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36" } res = requests.get(domain, headers=headers) res.encoding = 'utf-8' content = res.text
mainPage = BeautifulSoup(content, "html.parser") img_list = mainPage.find("div", class_="TypeList").find_all("img")
for img in img_list: img_scr = img.get("src") imgName = "img/{}".format(img_scr.split('/')[-1]) with open(imgName, mode="wb") as f: f.write(requests.get(img_scr).content) f.close() print("over")
print("all over!")
|