image_to_pixle_params_yoloSAM/main/main.py

238 lines
9.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
car_measure.py
--------------
1. 读取显著性图 -> 阈值化生成纯白掩模
2. 形态学闭运算 -> 去噪 & 填孔
3. 计算 + 绘制外接矩形 (显示宽、高像素)
4. 霍夫圆检测 -> 仅画圆心 & 连线 + 距离标注
所有可视化与结果文件统一写到 out_dir
"""
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont
from u2net_saliency import generate_saliency_map
# ----------------------------------------------------------------------
# -------------- 辅助:显著性图增强 & 调试可视化(可选) -------------------
# ----------------------------------------------------------------------
def enhance_saliency_map(saliency_map):
"""对显著性图做对比度增强、CLAHE、双边滤波——调参用可删"""
saliency_map = cv2.normalize(saliency_map, None, 0, 255, cv2.NORM_MINMAX)
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
saliency_map = clahe.apply(saliency_map)
saliency_map = cv2.bilateralFilter(saliency_map, 9, 75, 75)
return saliency_map
# ----------------------------------------------------------------------
# ------------------------- 圆心检测 & 距离标注 -------------------------
# ----------------------------------------------------------------------
import os
import cv2
import numpy as np
def detect_and_draw_circles(salient_path, original_path, output_dir):
"""
霍夫圆检测:
- 在原图上:画圆心、连线、标注距离
- 在显著图上:画完整圆 和 圆心
—— 输出两张图:
- detected_centers_salient.png
- detected_centers_original.png
"""
salient_img = cv2.imread(salient_path, cv2.IMREAD_GRAYSCALE)
original_img = cv2.imread(original_path)
if salient_img is None or original_img is None:
raise FileNotFoundError("Salient 或 original 图片路径有误")
# 模糊+圆检测
blurred = cv2.GaussianBlur(salient_img, (9, 9), 2)
circles = cv2.HoughCircles(
blurred, cv2.HOUGH_GRADIENT,
dp=1.2, minDist=290,
param1=50, param2=17,
minRadius=85, maxRadius=95
)
output_salient = cv2.cvtColor(salient_img, cv2.COLOR_GRAY2BGR)
output_original = original_img.copy()
if circles is not None:
circles = np.uint16(np.around(circles))
centers = sorted([(c[0], c[1], c[2]) for c in circles[0]],
key=lambda p: p[1], reverse=True)[:2]
# 显著图上:画完整圆 + 圆心
for (x, y, r) in centers:
cv2.circle(output_salient, (x, y), r, (0, 255, 0), 2) # 画圆边
cv2.circle(output_salient, (x, y), 3, (0, 0, 255), -1) # 画圆心
# 原图上:画圆心 + 连线 + 距离
if len(centers) >= 2:
(x1, y1, _), (x2, y2, _) = centers
cv2.circle(output_original, (x1, y1), 3, (0, 255, 0), -1)
cv2.circle(output_original, (x2, y2), 3, (0, 255, 0), -1)
cv2.line(output_original, (x1, y1), (x2, y2), (0, 0, 255), 2)
dist = np.hypot(x1 - x2, y1 - y2)
mid_pt = (int((x1 + x2) / 2), int((y1 + y2) / 2) - 10)
cv2.putText(output_original, f"{dist:.1f}px", mid_pt,
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
print(f"[Circle] 两圆心距离:{dist:.2f} px")
else:
print("[Circle] 检测到的圆少于 2 个")
else:
print("[Circle] 未检测到圆")
os.makedirs(output_dir, exist_ok=True)
cv2.imwrite(os.path.join(output_dir, 'detected_centers_salient.png'), output_salient)
cv2.imwrite(os.path.join(output_dir, 'detected_centers_original.png'), output_original)
# ----------------------------------------------------------------------
# ----------------------- 外接矩形 & 像素尺寸 ---------------------------
# ----------------------------------------------------------------------
def _get_font(size):
"""跨平台字体加载"""
for path in ("/usr/share/fonts/truetype/ubuntu/Ubuntu-B.ttf", "arial.ttf"):
try:
return ImageFont.truetype(path, size)
except IOError:
continue
return ImageFont.load_default()
def calculate_and_draw_bbox(mask_path,
output_mask_path,
original_path=None,
output_original_path=None,
display_width=None,
display_height=None):
"""
仅画一条顶边宽度线、一条右边高度线,并标注像素尺寸
"""
# ---------- 获取外接框 ----------
mask_img = Image.open(mask_path).convert("L")
arr = np.array(mask_img)
coords = np.argwhere(arr > 0)
if coords.size == 0:
raise RuntimeError("掩模为空,无法测量尺寸")
ymin, xmin = coords.min(axis=0)
ymax, xmax = coords.max(axis=0)
w_px = xmax - xmin + 1
h_px = ymax - ymin + 1
show_w = w_px if display_width is None else display_width
show_h = h_px if display_height is None else display_height
font = _get_font(34)
# ---------- 在掩模图上绘制 ----------
vis_mask = mask_img.convert("RGB")
draw_m = ImageDraw.Draw(vis_mask)
# 顶边水平线
draw_m.line([(xmin, ymin), (xmax, ymin)], fill="red", width=4)
# 右边垂直线
draw_m.line([(xmax, ymin), (xmax, ymax)], fill="red", width=4)
# 文本位置计算
# 宽度文字:顶边中点偏上 10px若超出图片则放到线下方 10px
tx_w = int((xmin + xmax) / 2) - 40
ty_w = ymin - 40
if ty_w < 0:
ty_w = ymin + 10
w_text = f"W:{int(round(show_w))}px"
draw_m.text((tx_w, ty_w), w_text, fill="yellow", font=font)
# 高度文字:右边线中点偏右 10px
tx_h = xmax + 10
ty_h = int((ymin + ymax) / 2) - 20
h_text = f"H:{int(round(show_h))}px"
draw_m.text((tx_h, ty_h), h_text, fill="yellow", font=font)
vis_mask.save(output_mask_path)
print(f"[Size] 掩模可视化已保存: {output_mask_path}")
# ---------- 同步绘制到原图 ----------
if original_path and output_original_path:
orig = Image.open(original_path).convert("RGB")
draw_o = ImageDraw.Draw(orig)
draw_o.line([(xmin, ymin), (xmax, ymin)], fill="red", width=4)
draw_o.line([(xmax, ymin), (xmax, ymax)], fill="red", width=4)
draw_o.text((tx_w, ty_w), w_text, fill="yellow", font=font)
draw_o.text((tx_h, ty_h), h_text, fill="yellow", font=font)
orig.save(output_original_path)
print(f"[Size] 原图可视化已保存: {output_original_path}")
return w_px, h_px
# ----------------------------------------------------------------------
# ------------------------------- 主程序 -------------------------------
# ----------------------------------------------------------------------
if __name__ == '__main__':
# ======================= 路径配置 =======================
triplets = [
# (标签, 原图路径, 显著 / 掩模 路径)
('front', './image/front_2.jpg', './saliency/front_2.jpg'), # 正面
('rear', './image/rear_2.jpg', './saliency/rear_2.jpg'), # 后面
('side', './image/side_2.jpg', './saliency/side_2.jpg'), # 侧面(做圆检测)
]
out_dir = './result2'
thresh_dir = './thresh2'
os.makedirs(out_dir, exist_ok=True)
os.makedirs(thresh_dir, exist_ok=True)
for tag, orig_path, mask_src in triplets:
# # ======================= 生成显著性图 可以注释掉在u2net_saliency生成=======================
print(f"处理 {tag} 图像中...")
generate_saliency_map(orig_path, mask_src)
# # ==========================================================================================
# #======================= 阈值化处理 =======================
print(f'\n===== 处理 {tag} =====')
# ---------- 1) 阈值化掩模 ----------
gray = cv2.imread(mask_src, cv2.IMREAD_GRAYSCALE)
if gray is None:
raise FileNotFoundError(mask_src)
# Otsu 自动阈值 + 可选偏移偏移范围建议0~20之间
offset = -10 # 负值让阈值变更敏感,保留更多区域
otsu_val, _ = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
final_thresh = max(0, min(255, otsu_val + offset))
_, mask_bin = cv2.threshold(gray, final_thresh, 255, cv2.THRESH_BINARY)
print(f'[Mask-{tag}] Otsu阈值={otsu_val:.1f}, 最终阈值={final_thresh}')
# 可选的轻度闭运算(平滑小孔,不破坏细节)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
mask_bin = cv2.morphologyEx(mask_bin, cv2.MORPH_CLOSE, kernel, iterations=1)
# 保存阈值化结果
mask_path = os.path.join(thresh_dir, f'{tag}_1_mask_thresh.png')
cv2.imwrite(mask_path, mask_bin)
print(f'[Mask-{tag}] 阈值化掩模已保存: {mask_path}')
# ---------- 2) 画长/宽线并写像素尺寸 ----------
mask_vis_path = os.path.join(out_dir, f'{tag}_size_lines_mask.png')
orig_vis_path = os.path.join(out_dir, f'{tag}_size_lines_orig.png')
calculate_and_draw_bbox(
mask_path, # 纯白掩模
mask_vis_path, # 绘制后的掩模输出
orig_path, # 原图
orig_vis_path # 绘制后的原图输出
)
# ---------- 3) 仅“side”做圆心检测 ----------
if tag == 'side':
detect_and_draw_circles(mask_src, orig_path, out_dir)