Python提供了多种库和方法来实现这一功能。以下是几种常用的图片摆正方法:
1. 基于OpenCV的图像校正
OpenCV是最常用的图像处理库,提供了多种图像校正方法:
1.1 基于边缘检测和霍夫变换的校正
import cv2
import numpy as np
import math
def deskew_image(image_path, output_path=None):
# 读取图像
image = cv2.imread(image_path)
# 转换为灰度图
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 边缘检测
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
# 霍夫线变换
lines = cv2.HoughLines(edges, 1, np.pi/180, 200)
if lines is not None:
# 计算倾斜角度
angles = []
for line in lines:
rho, theta = line[0]
# 只考虑接近水平或垂直的线
if (theta < np.pi/4 or theta > 3*np.pi/4):
angles.append(theta)
if angles:
# 计算平均角度
median_angle = np.median(angles)
# 将角度转换为度数
angle = median_angle * 180 / np.pi
# 调整角度(确保角度在-45到45度之间)
if angle > 45:
angle = 90 - angle
elif angle < -45:
angle = -90 - angle
# 获取图像中心
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
# 旋转图像
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
# 保存结果
if output_path:
cv2.imwrite(output_path, rotated)
return rotated
# 如果没有检测到线条,返回原图
if output_path:
cv2.imwrite(output_path, image)
return image
# 使用示例
deskew_image("tilted_image.jpg", "corrected_image.jpg") 1.2 基于文本行的校正(适用于文档图像)
import cv2
import numpy as np
def correct_skew(image_path, output_path=None, delta=1, limit=5):
# 读取图像
image = cv2.imread(image_path)
# 转换为灰度图
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 二值化处理
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# 计算各个角度的投影,找到文本行最整齐的角度
scores = []
angles = np.arange(-limit, limit + delta, delta)
for angle in angles:
# 旋转图像
(h, w) = thresh.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(thresh, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
# 计算水平投影
hist = cv2.reduce(rotated, 1, cv2.REDUCE_SUM, dtype=cv2.CV_32S)
# 计算投影的方差(方差越大,说明文本行越整齐)
score = np.var(hist)
scores.append(score)
# 找到得分最高的角度
best_angle = angles[np.argmax(scores)]
# 旋转原始图像
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, best_angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
# 保存结果
if output_path:
cv2.imwrite(output_path, rotated)
return rotated
# 使用示例
correct_skew("skewed_document.jpg", "corrected_document.jpg") 2. 基于特征点匹配的图像对齐
当你有一个参考图像(正确摆放的图像)时,可以使用特征点匹配来对齐新图像:
import cv2
import numpy as np
def align_images(template_path, image_path, output_path=None):
# 读取模板图像和待校正图像
template = cv2.imread(template_path)
image = cv2.imread(image_path)
# 转换为灰度图
template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 初始化SIFT特征检测器
sift = cv2.SIFT_create()
# 检测关键点和描述符
kp1, des1 = sift.detectAndCompute(template_gray, None)
kp2, des2 = sift.detectAndCompute(image_gray, None)
# 使用FLANN匹配器进行特征匹配
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1, des2, k=2)
# 应用比率测试,筛选好的匹配点
good_matches = []
for m, n in matches:
if m.distance < 0.7 * n.distance:
good_matches.append(m)
if len(good_matches) > 10:
# 提取匹配点的坐标
src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
# 计算单应性矩阵
H, mask = cv2.findHomography(dst_pts, src_pts, cv2.RANSAC, 5.0)
# 应用变换
h, w = template.shape[:2]
aligned = cv2.warpPerspective(image, H, (w, h))
# 保存结果
if output_path:
cv2.imwrite(output_path, aligned)
return aligned
else:
print("没有足够的匹配点")
return image
# 使用示例
align_images("reference_image.jpg", "misaligned_image.jpg", "aligned_image.jpg") 3. 基于深度学习的图像校正
对于更复杂的场景,可以使用深度学习方法:
# 需要安装:pip install tensorflow keras
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
def correct_orientation_with_dl(image_path, model_path, output_path=None):
# 加载预训练模型
model = load_model(model_path)
# 读取图像
image = cv2.imread(image_path)
original = image.copy()
# 预处理图像
image = cv2.resize(image, (224, 224))
image = image / 255.0
image = np.expand_dims(image, axis=0)
# 预测角度
pred = model.predict(image)
angle = np.argmax(pred) * 90 # 假设模型预测0°, 90°, 180°, 270°四个类别
# 旋转图像
(h, w) = original.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(original, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
# 保存结果
if output_path:
cv2.imwrite(output_path, rotated)
return rotated
# 使用示例(需要预训练模型)
# correct_orientation_with_dl("rotated_image.jpg", "orientation_model.h5", "corrected_image.jpg") 4. 基于透视变换的文档校正
对于文档图像,可以检测四个角点并应用透视变换:
import cv2
import numpy as np
def four_point_transform(image, pts):
# 获取输入坐标点
rect = np.zeros((4, 2), dtype="float32")
# 计算左上、右上、右下、左下点的坐标
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)] # 左上
rect[2] = pts[np.argmax(s)] # 右下
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)] # 右上
rect[3] = pts[np.argmax(diff)] # 左下
# 计算新图像的宽度和高度
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# 设置目标坐标
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]
], dtype="float32")
# 计算透视变换矩阵并应用
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
def document_scanner(image_path, output_path=None):
# 读取图像
image = cv2.imread(image_path)
orig = image.copy()
# 预处理
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 75, 200)
# 寻找轮廓
contours, _ = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
# 寻找文档轮廓
document_contour = None
for c in contours:
# 计算轮廓周长
peri = cv2.arcLength(c, True)
# 近似轮廓
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
# 如果轮廓有四个点,则认为找到了文档
if len(approx) == 4:
document_contour = approx
break
if document_contour is not None:
# 应用透视变换
warped = four_point_transform(orig, document_contour.reshape(4, 2))
# 保存结果
if output_path:
cv2.imwrite(output_path, warped)
return warped
else:
print("未能检测到文档边界")
return image
# 使用示例
document_scanner("document_photo.jpg", "scanned_document.jpg") 总结
基于边缘和线条的方法:适用于有明显直线的图像,如文档、表格等。基于文本行的方法:适用于文本密集的文档图像。特征点匹配:当有参考图像时,可以实现精确对齐。深度学习方法:适用于复杂场景,但需要训练数据。透视变换:适用于拍摄的文档图像,可以校正透视变形。
网友回复


