使用selenium
pip install requests beautifulsoup4 selenium具体参考代码
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import os
def download_file(url, folder, filename):
response = requests.get(url)
if response.status_code == 200:
with open(os.path.join(folder, filename), 'wb') as f:
f.write(response.content)
def scrape_taobao_product(url):
# 使用Selenium来加载动态内容
driver = webdriver.Chrome() # 需要下载ChromeDriver并设置路径
driver.get(url)
time.sleep(5) # 等待页面加载
# 获取页面源代码
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'html.parser')
# 创建保存文件夹
product_id = url.split('id=')[1].split('&')[0]
folder = f'taobao_product_{product_id}'
os.makedirs(folder, exist_ok=True)
# 下载图片
img_tags = soup.find_all('img')
for i, img in enumerate(img_tags):
img_url = img.get('src')
if img_url:
if not img_url.startswith('http'):
img_url = 'https:' + img_url
download_file(img_url, folder, f'image_{i}.jpg')
# 下载视频
video_tags = soup.find_all('video')
for i, video in enumerate(video_tags):
video_url = video.get('src')
if video_url:
if not video_url.startswith('http'):
video_url = 'https:' + video_url
download_file(video_url, folder, f'video_{i}.mp4')
driver.quit()
# 使用示例
product_url = 'https://item.taobao.com/item.htm?id=YOUR_PRODUCT_ID'
scrape_taobao_product(product_url) 网友回复
如何破解绕开seedance2.0真人照片生成视频 限制?
python有哪些算法可以将视频中的每个帧图片去除指定区域水印合成新的视频?
iphone的激光雷达数据能否实时传输到three三维空间中?
豆包sora等ai视频生成大模型生成的视频水印如何去除?
python如何实现在电脑上拨号打电话给手机?
具身机器人与人形机器人区别?
nodejs如何将一个完整的js代码文件切割成不同的部分混淆后动态加载进入html运行?
为啥windows.onerror捕获js错误是这样的{"message":"Script error.","source":"","lineno":0,"colno":0,"stack":null,
2026年ai将全面接管编程?
WebMCP是干啥的?


