在Python中,可以使用python-pptx库来批量替换PPT(PowerPoint)文件中的文字。以下是详细的实现方法:
安装必要库pip install python-pptx基本替换方法
from pptx import Presentation
def replace_text_in_ppt(input_path, output_path, replace_dict):
"""
替换PPT中的文本内容
:param input_path: 输入PPT文件路径
:param output_path: 输出PPT文件路径
:param replace_dict: 替换字典,格式为 {'旧文本': '新文本'}
"""
prs = Presentation(input_path)
for slide in prs.slides:
for shape in slide.shapes:
if shape.has_text_frame:
# 处理普通文本框
for paragraph in shape.text_frame.paragraphs:
for run in paragraph.runs:
for old_text, new_text in replace_dict.items():
if old_text in run.text:
run.text = run.text.replace(old_text, new_text)
elif shape.has_table:
# 处理表格中的文本
for row in shape.table.rows:
for cell in row.cells:
for paragraph in cell.text_frame.paragraphs:
for run in paragraph.runs:
for old_text, new_text in replace_dict.items():
if old_text in run.text:
run.text = run.text.replace(old_text, new_text)
prs.save(output_path)
print(f"文件已保存到: {output_path}")
# 使用示例
replace_dict = {
"旧文本1": "新文本1",
"旧文本2": "新文本2",
"{日期}": "2023-11-15"
}
replace_text_in_ppt("input.pptx", "output.pptx", replace_dict) 高级功能扩展1. 批量处理多个PPT文件 import os
from pptx import Presentation
def batch_replace_ppt(folder_path, output_folder, replace_dict):
"""
批量处理文件夹中的所有PPT文件
:param folder_path: 包含PPT文件的文件夹路径
:param output_folder: 输出文件夹路径
:param replace_dict: 替换字典
"""
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for filename in os.listdir(folder_path):
if filename.endswith(('.pptx', '.ppt')):
input_path = os.path.join(folder_path, filename)
output_path = os.path.join(output_folder, filename)
prs = Presentation(input_path)
for slide in prs.slides:
for shape in slide.shapes:
if shape.has_text_frame:
for paragraph in shape.text_frame.paragraphs:
for run in paragraph.runs:
for old_text, new_text in replace_dict.items():
if old_text in run.text:
run.text = run.text.replace(old_text, new_text)
prs.save(output_path)
print(f"已处理: {filename}")
# 使用示例
replace_dict = {
"公司名称": "ABC科技有限公司",
"联系电话": "400-123-4567"
}
batch_replace_ppt("./ppt_files", "./output_ppt", replace_dict) 2. 保留原始格式的替换 from pptx import Presentation
from pptx.util import Pt
from pptx.dml.color import RGBColor
def replace_text_with_format(input_path, output_path, replace_dict):
"""
替换文本并保留原始格式
:param input_path: 输入文件路径
:param output_path: 输出文件路径
:param replace_dict: 替换字典,值可以是字符串或包含文本和格式的字典
"""
prs = Presentation(input_path)
for slide in prs.slides:
for shape in slide.shapes:
if shape.has_text_frame:
for paragraph in shape.text_frame.paragraphs:
for run in paragraph.runs:
for old_text, new_text in replace_dict.items():
if old_text in run.text:
# 如果是字典格式,应用格式
if isinstance(new_text, dict):
run.text = run.text.replace(old_text, new_text['text'])
if 'font' in new_text:
run.font.name = new_text['font']
if 'size' in new_text:
run.font.size = Pt(new_text['size'])
if 'color' in new_text:
run.font.color.rgb = RGBColor(*new_text['color'])
else:
run.text = run.text.replace(old_text, new_text)
prs.save(output_path)
# 使用示例(包含格式设置)
advanced_replace_dict = {
"普通替换": "新文本",
"格式替换": {
'text': "带格式的新文本",
'font': '微软雅黑',
'size': 18,
'color': (255, 0, 0) # RGB红色
}
}
replace_text_with_format("input.pptx", "formatted_output.pptx", advanced_replace_dict) 3. 使用正则表达式替换 import re
from pptx import Presentation
def regex_replace_in_ppt(input_path, output_path, replace_patterns):
"""
使用正则表达式替换PPT中的文本
:param input_path: 输入文件路径
:param output_path: 输出文件路径
:param replace_patterns: 替换模式列表,每个元素是 (pattern, repl) 元组
"""
prs = Presentation(input_path)
for slide in prs.slides:
for shape in slide.shapes:
if shape.has_text_frame:
for paragraph in shape.text_frame.paragraphs:
for run in paragraph.runs:
for pattern, repl in replace_patterns:
run.text = re.sub(pattern, repl, run.text)
prs.save(output_path)
# 使用示例
regex_patterns = [
(r'\d{4}-\d{2}-\d{2}', '2023-11-15'), # 替换所有日期格式
(r'#\w+#', 'REPLACED') # 替换所有#包围的单词
]
regex_replace_in_ppt("input.pptx", "regex_output.pptx", regex_patterns) 注意事项备份原始文件:在进行批量替换前,务必备份原始PPT文件。
格式保留:
简单替换会保留原始文本格式如果需要修改格式,可以使用高级替换方法图表和SmartArt:
上述代码主要处理文本框和表格中的文本图表和SmartArt中的文本可能需要特殊处理性能考虑:
对于大型PPT文件,处理可能需要一些时间可以考虑添加进度显示错误处理:
添加适当的异常处理来应对文件损坏等情况Word/Excel中的替换:
如果需要处理Word或Excel中的文本替换,可以使用python-docx和openpyxl库通过以上方法,你可以灵活地实现PPT文件中文本的批量替换,从简单替换到复杂的格式保留和正则表达式替换都能覆盖。
网友回复


