使用selenium
pip install requests beautifulsoup4 selenium具体参考代码
import requests from bs4 import BeautifulSoup from selenium import webdriver import time import os def download_file(url, folder, filename): response = requests.get(url) if response.status_code == 200: with open(os.path.join(folder, filename), 'wb') as f: f.write(response.content) def scrape_taobao_product(url): # 使用Selenium来加载动态内容 driver = webdriver.Chrome() # 需要下载ChromeDriver并设置路径 driver.get(url) time.sleep(5) # 等待页面加载 # 获取页面源代码 page_source = driver.page_source soup = BeautifulSoup(page_source, 'html.parser') # 创建保存文件夹 product_id = url.split('id=')[1].split('&')[0] folder = f'taobao_product_{product_id}' os.makedirs(folder, exist_ok=True) # 下载图片 img_tags = soup.find_all('img') for i, img in enumerate(img_tags): img_url = img.get('src') if img_url: if not img_url.startswith('http'): img_url = 'https:' + img_url download_file(img_url, folder, f'image_{i}.jpg') # 下载视频 video_tags = soup.find_all('video') for i, video in enumerate(video_tags): video_url = video.get('src') if video_url: if not video_url.startswith('http'): video_url = 'https:' + video_url download_file(video_url, folder, f'video_{i}.mp4') driver.quit() # 使用示例 product_url = 'https://item.taobao.com/item.htm?id=YOUR_PRODUCT_ID' scrape_taobao_product(product_url)
网友回复
js如何流式输出ai的回答并折叠代码块,点击代码块右侧可预览代码?
ai大模型如何将文章转换成可视化一目了然的图片流程图图表?
大模型生成html版本的ui原型图和ppt演示文档的系统提示词怎么写?
rtsp视频直播流如何转换成websocket流在h5页面上观看?
为啥coze会开源工作流agent coze studio?
如何检测网页是通过收藏夹打开的?
python如何实现类似php的http动态脚本请求处理响应代码?
js如何实现类似php的http动态脚本请求处理响应代码?
trae与solo有啥区别不同?
vue如何让ai动态生成问卷调查多步骤表单式收集基础信息自动规划执行任务?