import requests
from bs4 import BeautifulSoup
import os
def download_img(url,save_path):
print(f'正在下载图片……{url}')
response = requests.get(url)
with open(save_path,'wb') as f:
f.write(response.content)
print('-'*30)
def main():
url = 'https://www.ptt.cc/bbs/Beauty/M.1686997472.A.FDA.html'
headers = {"Cookie": "over18=1"}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
# print(soup.prettify())
spans=soup.find_all('span', class_='article-meta-value')
titles=spans[2].text
dir_name=f'images/{titles}'
os.makedirs(dir_name, exist_ok=True)
#找出网页中所有的图片
links=soup.find_all('a')
allow_file_name=["jpg","jpeg","png","gif"]
for link in links:
href=link.get('href')
if not href:
continue
file_name=href.split('/')[-1]
extension=href.split('.')[-1].lower()
if extension in allow_file_name:
print(f"图片类型:{extension} ")
print(f'url:{href}')
download_img(href,f'{dir_name}/{file_name} ')
# print(extension)
# print(href)
if __name__ == '__main__':
main()
这个程序下载下来的图片是不能打开,其实还没有找到真正的图片地址,后期有时候再修改代码。
继续阅读












