+
99
-

回答

使用selenium

pip install requests beautifulsoup4 selenium

具体参考代码

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import os

def download_file(url, folder, filename):
    response = requests.get(url)
    if response.status_code == 200:
        with open(os.path.join(folder, filename), 'wb') as f:
            f.write(response.content)

def scrape_taobao_product(url):
    # 使用Selenium来加载动态内容
    driver = webdriver.Chrome()  # 需要下载ChromeDriver并设置路径
    driver.get(url)
    time.sleep(5)  # 等待页面加载

    # 获取页面源代码
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')

    # 创建保存文件夹
    product_id = url.split('id=')[1].split('&')[0]
    folder = f'taobao_product_{product_id}'
    os.makedirs(folder, exist_ok=True)

    # 下载图片
    img_tags = soup.find_all('img')
    for i, img in enumerate(img_tags):
        img_url = img.get('src')
        if img_url:
            if not img_url.startswith('http'):
                img_url = 'https:' + img_url
            download_file(img_url, folder, f'image_{i}.jpg')

    # 下载视频
    video_tags = soup.find_all('video')
    for i, video in enumerate(video_tags):
        video_url = video.get('src')
        if video_url:
            if not video_url.startswith('http'):
                video_url = 'https:' + video_url
            download_file(video_url, folder, f'video_{i}.mp4')

    driver.quit()

# 使用示例
product_url = 'https://item.taobao.com/item.htm?id=YOUR_PRODUCT_ID'
scrape_taobao_product(product_url)

网友回复

我知道答案,我要回答