使用selenium
pip install requests beautifulsoup4 selenium具体参考代码
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import os
def download_file(url, folder, filename):
response = requests.get(url)
if response.status_code == 200:
with open(os.path.join(folder, filename), 'wb') as f:
f.write(response.content)
def scrape_taobao_product(url):
# 使用Selenium来加载动态内容
driver = webdriver.Chrome() # 需要下载ChromeDriver并设置路径
driver.get(url)
time.sleep(5) # 等待页面加载
# 获取页面源代码
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'html.parser')
# 创建保存文件夹
product_id = url.split('id=')[1].split('&')[0]
folder = f'taobao_product_{product_id}'
os.makedirs(folder, exist_ok=True)
# 下载图片
img_tags = soup.find_all('img')
for i, img in enumerate(img_tags):
img_url = img.get('src')
if img_url:
if not img_url.startswith('http'):
img_url = 'https:' + img_url
download_file(img_url, folder, f'image_{i}.jpg')
# 下载视频
video_tags = soup.find_all('video')
for i, video in enumerate(video_tags):
video_url = video.get('src')
if video_url:
if not video_url.startswith('http'):
video_url = 'https:' + video_url
download_file(video_url, folder, f'video_{i}.mp4')
driver.quit()
# 使用示例
product_url = 'https://item.taobao.com/item.htm?id=YOUR_PRODUCT_ID'
scrape_taobao_product(product_url) 网友回复
如何写ai提示词让大模型根据主题生成视频脚本json,然后让Hyperframe渲染出mp4视频?
有哪些字体使用等宽编程代码展示?
如果让演唱会歌迷的上万手机屏幕和闪光灯一起被现场中控控制闪烁?
Midjourney为啥进军医疗领域了?
python如何跟踪足球比赛指定球员全场运动标注打聚光灯合成
如何将linux服务器的文件目录映射到windows电脑磁盘?
Docling 与 MarkItDown 两个库有啥不同?
豆包收费后国产其他ai软件也会跟进收费吗?
JPEG 与 HEIF图片格式区别?
centos7版本太旧无法安装python3.11,如何在docker中运行python3.11?


