使用selenium
pip install requests beautifulsoup4 selenium具体参考代码
import requests from bs4 import BeautifulSoup from selenium import webdriver import time import os def download_file(url, folder, filename): response = requests.get(url) if response.status_code == 200: with open(os.path.join(folder, filename), 'wb') as f: f.write(response.content) def scrape_taobao_product(url): # 使用Selenium来加载动态内容 driver = webdriver.Chrome() # 需要下载ChromeDriver并设置路径 driver.get(url) time.sleep(5) # 等待页面加载 # 获取页面源代码 page_source = driver.page_source soup = BeautifulSoup(page_source, 'html.parser') # 创建保存文件夹 product_id = url.split('id=')[1].split('&')[0] folder = f'taobao_product_{product_id}' os.makedirs(folder, exist_ok=True) # 下载图片 img_tags = soup.find_all('img') for i, img in enumerate(img_tags): img_url = img.get('src') if img_url: if not img_url.startswith('http'): img_url = 'https:' + img_url download_file(img_url, folder, f'image_{i}.jpg') # 下载视频 video_tags = soup.find_all('video') for i, video in enumerate(video_tags): video_url = video.get('src') if video_url: if not video_url.startswith('http'): video_url = 'https:' + video_url download_file(video_url, folder, f'video_{i}.mp4') driver.quit() # 使用示例 product_url = 'https://item.taobao.com/item.htm?id=YOUR_PRODUCT_ID' scrape_taobao_product(product_url)
网友回复
腾讯混元模型广场里都是混元模型的垂直小模型,如何api调用?
为啥所有的照片分辨率提升工具都会修改照片上的图案细节?
js如何在浏览器中将webm视频的声音分离为单独音频?
微信小程序如何播放第三方域名url的mp4视频?
ai多模态大模型能实时识别视频中的手语为文字吗?
如何远程调试别人的chrome浏览器获取调试信息?
为啥js打开新网页window.open设置窗口宽高无效?
浏览器中js的navigator.mediaDevices.getDisplayMedia屏幕录像无法录制SpeechSynthesisUtterance产生的说话声音?
js中mediaRecorder如何录制window.speechSynthesis声音音频并下载?
python如何直接获取抖音短视频的音频文件url?