使用selenium
pip install requests beautifulsoup4 selenium具体参考代码
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import os
def download_file(url, folder, filename):
response = requests.get(url)
if response.status_code == 200:
with open(os.path.join(folder, filename), 'wb') as f:
f.write(response.content)
def scrape_taobao_product(url):
# 使用Selenium来加载动态内容
driver = webdriver.Chrome() # 需要下载ChromeDriver并设置路径
driver.get(url)
time.sleep(5) # 等待页面加载
# 获取页面源代码
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'html.parser')
# 创建保存文件夹
product_id = url.split('id=')[1].split('&')[0]
folder = f'taobao_product_{product_id}'
os.makedirs(folder, exist_ok=True)
# 下载图片
img_tags = soup.find_all('img')
for i, img in enumerate(img_tags):
img_url = img.get('src')
if img_url:
if not img_url.startswith('http'):
img_url = 'https:' + img_url
download_file(img_url, folder, f'image_{i}.jpg')
# 下载视频
video_tags = soup.find_all('video')
for i, video in enumerate(video_tags):
video_url = video.get('src')
if video_url:
if not video_url.startswith('http'):
video_url = 'https:' + video_url
download_file(video_url, folder, f'video_{i}.mp4')
driver.quit()
# 使用示例
product_url = 'https://item.taobao.com/item.htm?id=YOUR_PRODUCT_ID'
scrape_taobao_product(product_url) 网友回复


