Python提供了多种库和方法来实现这一功能。以下是几种常用的图片摆正方法:
1. 基于OpenCV的图像校正
OpenCV是最常用的图像处理库,提供了多种图像校正方法:
1.1 基于边缘检测和霍夫变换的校正
import cv2 import numpy as np import math def deskew_image(image_path, output_path=None): # 读取图像 image = cv2.imread(image_path) # 转换为灰度图 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # 边缘检测 edges = cv2.Canny(gray, 50, 150, apertureSize=3) # 霍夫线变换 lines = cv2.HoughLines(edges, 1, np.pi/180, 200) if lines is not None: # 计算倾斜角度 angles = [] for line in lines: rho, theta = line[0] # 只考虑接近水平或垂直的线 if (theta < np.pi/4 or theta > 3*np.pi/4): angles.append(theta) if angles: # 计算平均角度 median_angle = np.median(angles) # 将角度转换为度数 angle = median_angle * 180 / np.pi # 调整角度(确保角度在-45到45度之间) if angle > 45: angle = 90 - angle elif angle < -45: angle = -90 - angle # 获取图像中心 (h, w) = image.shape[:2] center = (w // 2, h // 2) # 旋转图像 M = cv2.getRotationMatrix2D(center, angle, 1.0) rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) # 保存结果 if output_path: cv2.imwrite(output_path, rotated) return rotated # 如果没有检测到线条,返回原图 if output_path: cv2.imwrite(output_path, image) return image # 使用示例 deskew_image("tilted_image.jpg", "corrected_image.jpg")
1.2 基于文本行的校正(适用于文档图像)
import cv2 import numpy as np def correct_skew(image_path, output_path=None, delta=1, limit=5): # 读取图像 image = cv2.imread(image_path) # 转换为灰度图 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # 二值化处理 thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] # 计算各个角度的投影,找到文本行最整齐的角度 scores = [] angles = np.arange(-limit, limit + delta, delta) for angle in angles: # 旋转图像 (h, w) = thresh.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, angle, 1.0) rotated = cv2.warpAffine(thresh, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) # 计算水平投影 hist = cv2.reduce(rotated, 1, cv2.REDUCE_SUM, dtype=cv2.CV_32S) # 计算投影的方差(方差越大,说明文本行越整齐) score = np.var(hist) scores.append(score) # 找到得分最高的角度 best_angle = angles[np.argmax(scores)] # 旋转原始图像 (h, w) = image.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, best_angle, 1.0) rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) # 保存结果 if output_path: cv2.imwrite(output_path, rotated) return rotated # 使用示例 correct_skew("skewed_document.jpg", "corrected_document.jpg")
2. 基于特征点匹配的图像对齐
当你有一个参考图像(正确摆放的图像)时,可以使用特征点匹配来对齐新图像:
import cv2 import numpy as np def align_images(template_path, image_path, output_path=None): # 读取模板图像和待校正图像 template = cv2.imread(template_path) image = cv2.imread(image_path) # 转换为灰度图 template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # 初始化SIFT特征检测器 sift = cv2.SIFT_create() # 检测关键点和描述符 kp1, des1 = sift.detectAndCompute(template_gray, None) kp2, des2 = sift.detectAndCompute(image_gray, None) # 使用FLANN匹配器进行特征匹配 FLANN_INDEX_KDTREE = 1 index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5) search_params = dict(checks=50) flann = cv2.FlannBasedMatcher(index_params, search_params) matches = flann.knnMatch(des1, des2, k=2) # 应用比率测试,筛选好的匹配点 good_matches = [] for m, n in matches: if m.distance < 0.7 * n.distance: good_matches.append(m) if len(good_matches) > 10: # 提取匹配点的坐标 src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2) dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2) # 计算单应性矩阵 H, mask = cv2.findHomography(dst_pts, src_pts, cv2.RANSAC, 5.0) # 应用变换 h, w = template.shape[:2] aligned = cv2.warpPerspective(image, H, (w, h)) # 保存结果 if output_path: cv2.imwrite(output_path, aligned) return aligned else: print("没有足够的匹配点") return image # 使用示例 align_images("reference_image.jpg", "misaligned_image.jpg", "aligned_image.jpg")
3. 基于深度学习的图像校正
对于更复杂的场景,可以使用深度学习方法:
# 需要安装:pip install tensorflow keras import cv2 import numpy as np import tensorflow as tf from tensorflow.keras.models import load_model def correct_orientation_with_dl(image_path, model_path, output_path=None): # 加载预训练模型 model = load_model(model_path) # 读取图像 image = cv2.imread(image_path) original = image.copy() # 预处理图像 image = cv2.resize(image, (224, 224)) image = image / 255.0 image = np.expand_dims(image, axis=0) # 预测角度 pred = model.predict(image) angle = np.argmax(pred) * 90 # 假设模型预测0°, 90°, 180°, 270°四个类别 # 旋转图像 (h, w) = original.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, angle, 1.0) rotated = cv2.warpAffine(original, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) # 保存结果 if output_path: cv2.imwrite(output_path, rotated) return rotated # 使用示例(需要预训练模型) # correct_orientation_with_dl("rotated_image.jpg", "orientation_model.h5", "corrected_image.jpg")
4. 基于透视变换的文档校正
对于文档图像,可以检测四个角点并应用透视变换:
import cv2 import numpy as np def four_point_transform(image, pts): # 获取输入坐标点 rect = np.zeros((4, 2), dtype="float32") # 计算左上、右上、右下、左下点的坐标 s = pts.sum(axis=1) rect[0] = pts[np.argmin(s)] # 左上 rect[2] = pts[np.argmax(s)] # 右下 diff = np.diff(pts, axis=1) rect[1] = pts[np.argmin(diff)] # 右上 rect[3] = pts[np.argmax(diff)] # 左下 # 计算新图像的宽度和高度 (tl, tr, br, bl) = rect widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) maxWidth = max(int(widthA), int(widthB)) heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) maxHeight = max(int(heightA), int(heightB)) # 设置目标坐标 dst = np.array([ [0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1] ], dtype="float32") # 计算透视变换矩阵并应用 M = cv2.getPerspectiveTransform(rect, dst) warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) return warped def document_scanner(image_path, output_path=None): # 读取图像 image = cv2.imread(image_path) orig = image.copy() # 预处理 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(gray, (5, 5), 0) edged = cv2.Canny(blurred, 75, 200) # 寻找轮廓 contours, _ = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5] # 寻找文档轮廓 document_contour = None for c in contours: # 计算轮廓周长 peri = cv2.arcLength(c, True) # 近似轮廓 approx = cv2.approxPolyDP(c, 0.02 * peri, True) # 如果轮廓有四个点,则认为找到了文档 if len(approx) == 4: document_contour = approx break if document_contour is not None: # 应用透视变换 warped = four_point_transform(orig, document_contour.reshape(4, 2)) # 保存结果 if output_path: cv2.imwrite(output_path, warped) return warped else: print("未能检测到文档边界") return image # 使用示例 document_scanner("document_photo.jpg", "scanned_document.jpg")
总结
基于边缘和线条的方法:适用于有明显直线的图像,如文档、表格等。基于文本行的方法:适用于文本密集的文档图像。特征点匹配:当有参考图像时,可以实现精确对齐。深度学习方法:适用于复杂场景,但需要训练数据。透视变换:适用于拍摄的文档图像,可以校正透视变形。
网友回复