步骤:
1. 提取PPT中的文本和图片2. 使用Google Text-to-Speech生成语音3. 为每页PPT创建视频片段4. 自动排版图片和文字5. 合成最终视频
参考代码:
from pptx import Presentation from moviepy.editor import ImageClip, TextClip, CompositeVideoClip, concatenate_videoclips, AudioFileClip from gtts import gTTS import os import tempfile import shutil class PPTVideoGenerator: def __init__(self, ppt_path: str, output_path: str, slide_duration: float = 5.0, language: str = 'zh-cn', size: tuple = (1920, 1080)): """ 初始化PPT视频生成器 Args: ppt_path: PPT文件路径 output_path: 输出视频路径 slide_duration: 每页PPT的默认时长(秒) language: 文字转语音的语言 size: 视频尺寸 """ self.ppt_path = ppt_path self.output_path = output_path self.slide_duration = slide_duration self.language = language self.size = size self.temp_dir = tempfile.mkdtemp() def extract_slide_content(self, slide): """提取幻灯片中的文本和图片""" texts = [] images = [] # 提取文本 for shape in slide.shapes: if hasattr(shape, "text"): if shape.text.strip(): texts.append(shape.text.strip()) # 提取图片 if shape.shape_type == 13: # MSO_SHAPE_TYPE.PICTURE image_path = os.path.join(self.temp_dir, f"image_{len(images)}.png") with open(image_path, 'wb') as f: f.write(shape.image.blob) images.append(image_path) return texts, images def create_slide_video(self, texts: list, images: list, audio_path: str = None) -> CompositeVideoClip: """为单个幻灯片创建视频片段""" clips = [] # 添加图片 if images: for img_path in images: img_clip = (ImageClip(img_path) .resize(width=self.size[0] * 0.8) # 图片宽度为视频宽度的80% .set_position('center')) clips.append(img_clip) # 添加文本 y_position = self.size[1] * 0.7 # 文本位于底部30%位置 for text in texts: text_clip = (TextClip(text, font='Microsoft YaHei', fontsize=40, color='white', size=(self.size[0] * 0.8, None), method='label') .set_position(('center', y_position))) clips.append(text_clip) y_position += text_clip.h + 10 # 创建背景 background = ColorClip(size=self.size, color=(0, 0, 0)) # 合成视频片段 if audio_path and os.path.exists(audio_path): audio = AudioFileClip(audio_path) duration = max(audio.duration, self.slide_duration) else: duration = self.slide_duration # 设置所有片段的持续时间 clips = [clip.set_duration(duration) for clip in clips] background = background.set_duration(duration) # 合成最终视频 video = CompositeVideoClip([background] + clips) # 添加音频 if audio_path and os.path.exists(audio_path): video = video.set_audio(audio) return video def generate(self): """生成完整视频""" try: # 加载PPT prs = Presentation(self.ppt_path) final_clips = [] # 处理每一页 for i, slide in enumerate(prs.slides): print(f"处理第 {i+1} 页...") # 提取内容 texts, images = self.extract_slide_content(slide) # 生成语音 audio_path = None if texts: text_for_tts = ' '.join(texts) audio_path = os.path.join(self.temp_dir, f"audio_{i}.mp3") tts = gTTS(text=text_for_tts, lang=self.language) tts.save(audio_path) # 创建视频片段 video_clip = self.create_slide_video(texts, images, audio_path) final_clips.append(video_clip) # 连接所有视频片段 final_video = concatenate_videoclips(final_clips) # 导出最终视频 final_video.write_videofile( self.output_path, fps=24, codec='libx264', audio_codec='aac', threads=4 ) print(f"视频已生成: {self.output_path}") except Exception as e: print(f"生成视频时出错: {str(e)}") raise finally: # 清理临时文件 shutil.rmtree(self.temp_dir, ignore_errors=True) def main(): # 使用示例 generator = PPTVideoGenerator( ppt_path="presentation.pptx", output_path="output_video.mp4", slide_duration=5.0, language='zh-cn', size=(1920, 1080) ) generator.generate() if __name__ == "__main__": main()
网友回复