使用selenium
pip install requests beautifulsoup4 selenium具体参考代码
import requests from bs4 import BeautifulSoup from selenium import webdriver import time import os def download_file(url, folder, filename): response = requests.get(url) if response.status_code == 200: with open(os.path.join(folder, filename), 'wb') as f: f.write(response.content) def scrape_taobao_product(url): # 使用Selenium来加载动态内容 driver = webdriver.Chrome() # 需要下载ChromeDriver并设置路径 driver.get(url) time.sleep(5) # 等待页面加载 # 获取页面源代码 page_source = driver.page_source soup = BeautifulSoup(page_source, 'html.parser') # 创建保存文件夹 product_id = url.split('id=')[1].split('&')[0] folder = f'taobao_product_{product_id}' os.makedirs(folder, exist_ok=True) # 下载图片 img_tags = soup.find_all('img') for i, img in enumerate(img_tags): img_url = img.get('src') if img_url: if not img_url.startswith('http'): img_url = 'https:' + img_url download_file(img_url, folder, f'image_{i}.jpg') # 下载视频 video_tags = soup.find_all('video') for i, video in enumerate(video_tags): video_url = video.get('src') if video_url: if not video_url.startswith('http'): video_url = 'https:' + video_url download_file(video_url, folder, f'video_{i}.mp4') driver.quit() # 使用示例 product_url = 'https://item.taobao.com/item.htm?id=YOUR_PRODUCT_ID' scrape_taobao_product(product_url)
网友回复