方法思路
边缘检测:使用Canny算法检测文档边缘轮廓识别:找到最大四边形轮廓(假定文档是最大的四边形)透视变换:将倾斜的四边形转换为矩形图像增强:二值化处理增强可读性批量处理:遍历文件夹处理所有图片实现代码import cv2 import numpy as np import glob import os def order_points(pts): """将四个点排序为:左上、右上、右下、左下""" rect = np.zeros((4, 2), dtype="float32") s = pts.sum(axis=1) rect[0] = pts[np.argmin(s)] # 最小和为左上 rect[2] = pts[np.argmax(s)] # 最大和为右下 diff = np.diff(pts, axis=1) rect[1] = pts[np.argmin(diff)] # 最小差为右上 rect[3] = pts[np.argmax(diff)] # 最大差为左下 return rect def four_point_transform(image, pts): """执行透视变换""" rect = order_points(pts) (tl, tr, br, bl) = rect # 计算新宽度 widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) maxWidth = max(int(widthA), int(widthB)) # 计算新高度 heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) maxHeight = max(int(heightA), int(heightB)) # 构建目标点坐标 dst = np.array([ [0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype="float32") # 计算变换矩阵并执行变换 M = cv2.getPerspectiveTransform(rect, dst) warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) return warped def process_image(image_path, output_dir): """处理单个图片""" # 读取图片 image = cv2.imread(image_path) orig = image.copy() # 调整图片大小(高度500px) ratio = image.shape[0] / 500.0 image = cv2.resize(image, (int(image.shape[1]/ratio), 500)) # 转换为灰度图并模糊处理 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray = cv2.GaussianBlur(gray, (5, 5), 0) # 边缘检测 edged = cv2.Canny(gray, 75, 200) # 寻找轮廓 contours = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) contours = sorted(contours[0], key=cv2.contourArea, reverse=True)[:5] screenCnt = None # 遍历轮廓寻找四边形 for c in contours: peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.02 * peri, True) if len(approx) == 4: screenCnt = approx break if screenCnt is None: print(f"未找到文档轮廓:{image_path}") return # 执行透视变换 warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio) # 转换为灰度图并二值化 warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) warped = cv2.adaptiveThreshold(warped, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 51, 10) # 保存结果 filename = os.path.basename(image_path) output_path = os.path.join(output_dir, filename) cv2.imwrite(output_path, warped) if __name__ == "__main__": # 配置输入输出路径 input_dir = "input_images/" output_dir = "output_images/" # 创建输出目录 os.makedirs(output_dir, exist_ok=True) # 处理所有JPG和PNG文件 for image_path in glob.glob(os.path.join(input_dir, "*.[jJ][pP][gG]")) + glob.glob(os.path.join(input_dir, "*.[pP][nN][gG]")): try: print(f"正在处理:{image_path}") process_image(image_path, output_dir) except Exception as e: print(f"处理失败:{image_path},错误:{str(e)}")使用说明
安装依赖库:
pip install opencv-python numpy
准备文件结构:
├── input_images/ # 存放原始图片 │ ├── doc1.jpg │ └── doc2.png ├── output_images/ # 自动创建 └── document_scanner.py
运行脚本:
python document_scanner.py注意事项
图片质量要求:
文档需要与背景有一定对比度拍摄时尽量包含完整四边避免严重反光或阴影参数调整建议:
调整cv2.Canny参数(75, 200)优化边缘检测修改cv2.adaptiveThreshold参数优化二值化效果调整图片缩放比例(当前为500px高度)支持格式:
支持常见图片格式:JPG、PNG等自动保持原始文件名错误处理:
自动跳过无法识别的图片错误信息会在控制台显示此方案适用于大多数文档扫描场景,对轻度弯曲或阴影的文档有较好处理效果。对于复杂背景或严重变形的文档,可能需要结合更复杂的图像处理算法。
网友回复