步骤:
1. 提取PPT中的文本和图片2. 使用Google Text-to-Speech生成语音3. 为每页PPT创建视频片段4. 自动排版图片和文字5. 合成最终视频
参考代码:
from pptx import Presentation
from moviepy.editor import ImageClip, TextClip, CompositeVideoClip, concatenate_videoclips, AudioFileClip
from gtts import gTTS
import os
import tempfile
import shutil
class PPTVideoGenerator:
def __init__(self,
ppt_path: str,
output_path: str,
slide_duration: float = 5.0,
language: str = 'zh-cn',
size: tuple = (1920, 1080)):
"""
初始化PPT视频生成器
Args:
ppt_path: PPT文件路径
output_path: 输出视频路径
slide_duration: 每页PPT的默认时长(秒)
language: 文字转语音的语言
size: 视频尺寸
"""
self.ppt_path = ppt_path
self.output_path = output_path
self.slide_duration = slide_duration
self.language = language
self.size = size
self.temp_dir = tempfile.mkdtemp()
def extract_slide_content(self, slide):
"""提取幻灯片中的文本和图片"""
texts = []
images = []
# 提取文本
for shape in slide.shapes:
if hasattr(shape, "text"):
if shape.text.strip():
texts.append(shape.text.strip())
# 提取图片
if shape.shape_type == 13: # MSO_SHAPE_TYPE.PICTURE
image_path = os.path.join(self.temp_dir, f"image_{len(images)}.png")
with open(image_path, 'wb') as f:
f.write(shape.image.blob)
images.append(image_path)
return texts, images
def create_slide_video(self, texts: list, images: list, audio_path: str = None) -> CompositeVideoClip:
"""为单个幻灯片创建视频片段"""
clips = []
# 添加图片
if images:
for img_path in images:
img_clip = (ImageClip(img_path)
.resize(width=self.size[0] * 0.8) # 图片宽度为视频宽度的80%
.set_position('center'))
clips.append(img_clip)
# 添加文本
y_position = self.size[1] * 0.7 # 文本位于底部30%位置
for text in texts:
text_clip = (TextClip(text,
font='Microsoft YaHei',
fontsize=40,
color='white',
size=(self.size[0] * 0.8, None),
method='label')
.set_position(('center', y_position)))
clips.append(text_clip)
y_position += text_clip.h + 10
# 创建背景
background = ColorClip(size=self.size, color=(0, 0, 0))
# 合成视频片段
if audio_path and os.path.exists(audio_path):
audio = AudioFileClip(audio_path)
duration = max(audio.duration, self.slide_duration)
else:
duration = self.slide_duration
# 设置所有片段的持续时间
clips = [clip.set_duration(duration) for clip in clips]
background = background.set_duration(duration)
# 合成最终视频
video = CompositeVideoClip([background] + clips)
# 添加音频
if audio_path and os.path.exists(audio_path):
video = video.set_audio(audio)
return video
def generate(self):
"""生成完整视频"""
try:
# 加载PPT
prs = Presentation(self.ppt_path)
final_clips = []
# 处理每一页
for i, slide in enumerate(prs.slides):
print(f"处理第 {i+1} 页...")
# 提取内容
texts, images = self.extract_slide_content(slide)
# 生成语音
audio_path = None
if texts:
text_for_tts = ' '.join(texts)
audio_path = os.path.join(self.temp_dir, f"audio_{i}.mp3")
tts = gTTS(text=text_for_tts, lang=self.language)
tts.save(audio_path)
# 创建视频片段
video_clip = self.create_slide_video(texts, images, audio_path)
final_clips.append(video_clip)
# 连接所有视频片段
final_video = concatenate_videoclips(final_clips)
# 导出最终视频
final_video.write_videofile(
self.output_path,
fps=24,
codec='libx264',
audio_codec='aac',
threads=4
)
print(f"视频已生成: {self.output_path}")
except Exception as e:
print(f"生成视频时出错: {str(e)}")
raise
finally:
# 清理临时文件
shutil.rmtree(self.temp_dir, ignore_errors=True)
def main():
# 使用示例
generator = PPTVideoGenerator(
ppt_path="presentation.pptx",
output_path="output_video.mp4",
slide_duration=5.0,
language='zh-cn',
size=(1920, 1080)
)
generator.generate()
if __name__ == "__main__":
main() 网友回复


