搜索

回答

步骤：

1. 提取PPT中的文本和图片2. 使用Google Text-to-Speech生成语音3. 为每页PPT创建视频片段4. 自动排版图片和文字5. 合成最终视频

参考代码：

from pptx import Presentation
from moviepy.editor import ImageClip, TextClip, CompositeVideoClip, concatenate_videoclips, AudioFileClip
from gtts import gTTS
import os
import tempfile
import shutil

class PPTVideoGenerator:
    def __init__(self, 
                 ppt_path: str,
                 output_path: str,
                 slide_duration: float = 5.0,
                 language: str = 'zh-cn',
                 size: tuple = (1920, 1080)):
        """
        初始化PPT视频生成器
        Args:
            ppt_path: PPT文件路径
            output_path: 输出视频路径
            slide_duration: 每页PPT的默认时长(秒)
            language: 文字转语音的语言
            size: 视频尺寸
        """
        self.ppt_path = ppt_path
        self.output_path = output_path
        self.slide_duration = slide_duration
        self.language = language
        self.size = size
        self.temp_dir = tempfile.mkdtemp()
        
    def extract_slide_content(self, slide):
        """提取幻灯片中的文本和图片"""
        texts = []
        images = []
        
        # 提取文本
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                if shape.text.strip():
                    texts.append(shape.text.strip())
                    
            # 提取图片
            if shape.shape_type == 13:  # MSO_SHAPE_TYPE.PICTURE
                image_path = os.path.join(self.temp_dir, f"image_{len(images)}.png")
                with open(image_path, 'wb') as f:
                    f.write(shape.image.blob)
                images.append(image_path)
                
        return texts, images
    
    def create_slide_video(self, texts: list, images: list, audio_path: str = None) -> CompositeVideoClip:
        """为单个幻灯片创建视频片段"""
        clips = []
        
        # 添加图片
        if images:
            for img_path in images:
                img_clip = (ImageClip(img_path)
                          .resize(width=self.size[0] * 0.8)  # 图片宽度为视频宽度的80%
                          .set_position('center'))
                clips.append(img_clip)
        
        # 添加文本
        y_position = self.size[1] * 0.7  # 文本位于底部30%位置
        for text in texts:
            text_clip = (TextClip(text,
                                font='Microsoft YaHei',
                                fontsize=40,
                                color='white',
                                size=(self.size[0] * 0.8, None),
                                method='label')
                        .set_position(('center', y_position)))
            clips.append(text_clip)
            y_position += text_clip.h + 10
        
        # 创建背景
        background = ColorClip(size=self.size, color=(0, 0, 0))
        
        # 合成视频片段
        if audio_path and os.path.exists(audio_path):
            audio = AudioFileClip(audio_path)
            duration = max(audio.duration, self.slide_duration)
        else:
            duration = self.slide_duration
            
        # 设置所有片段的持续时间
        clips = [clip.set_duration(duration) for clip in clips]
        background = background.set_duration(duration)
        
        # 合成最终视频
        video = CompositeVideoClip([background] + clips)
        
        # 添加音频
        if audio_path and os.path.exists(audio_path):
            video = video.set_audio(audio)
            
        return video
    
    def generate(self):
        """生成完整视频"""
        try:
            # 加载PPT
            prs = Presentation(self.ppt_path)
            final_clips = []
            
            # 处理每一页
            for i, slide in enumerate(prs.slides):
                print(f"处理第 {i+1} 页...")
                
                # 提取内容
                texts, images = self.extract_slide_content(slide)
                
                # 生成语音
                audio_path = None
                if texts:
                    text_for_tts = ' '.join(texts)
                    audio_path = os.path.join(self.temp_dir, f"audio_{i}.mp3")
                    tts = gTTS(text=text_for_tts, lang=self.language)
                    tts.save(audio_path)
                
                # 创建视频片段
                video_clip = self.create_slide_video(texts, images, audio_path)
                final_clips.append(video_clip)
            
            # 连接所有视频片段
            final_video = concatenate_videoclips(final_clips)
            
            # 导出最终视频
            final_video.write_videofile(
                self.output_path,
                fps=24,
                codec='libx264',
                audio_codec='aac',
                threads=4
            )
            
            print(f"视频已生成: {self.output_path}")
            
        except Exception as e:
            print(f"生成视频时出错: {str(e)}")
            raise
            
        finally:
            # 清理临时文件
            shutil.rmtree(self.temp_dir, ignore_errors=True)
            
def main():
    # 使用示例
    generator = PPTVideoGenerator(
        ppt_path="presentation.pptx",
        output_path="output_video.mp4",
        slide_duration=5.0,
        language='zh-cn',
        size=(1920, 1080)
    )
    generator.generate()

if __name__ == "__main__":
    main()

网友回复

我知道答案，我要回答

我有问题

私活外包

回答

开发了一个网站ai聊天助手

一个月开发一套类似coze的智能体平台

部署一套内网离线ai助理

私有ai助理开发

类似如家的租房app开发

h5手机端考试网站开发

开发一个短剧解锁剧集的小程序

我要开发一个酒类拍卖交易平台

开发艺术品拍卖收藏买画卖画h5网站

帮我做个数字货币交易所网站

并发与并行的区别？

跨平台开发工具kuikly与wails有啥不同？

swoole+html如何实现中转式即时通讯websocket应用？

go如何实现一个mysql读写分离代理？

如何用go编写一个类似mysql的数据库？

python的cmd命令行样式如何修改？

如果用ai来写一个新的操作系统？

如果用ai开发一款新的编程语言？

有没有调动gemini api国内可访问的类似gemini cli的ai编程助手代码？

python如何将word文档中的标注文本分离成试卷和答案？