commit 02db9e80eb042927c2e510c70fed67a3f96cb5e6 Author: shenyiwei22 <1365821736@qq.com> Date: Thu Aug 7 17:45:41 2025 +0800 上传文件至 / diff --git a/best_model.pth b/best_model.pth new file mode 100644 index 0000000..a05540f Binary files /dev/null and b/best_model.pth differ diff --git a/inference.py b/inference.py new file mode 100644 index 0000000..fc328a9 --- /dev/null +++ b/inference.py @@ -0,0 +1,96 @@ +import os +import torch +import numpy as np +from PIL import Image +import albumentations as A +from albumentations.pytorch import ToTensorV2 +from torch.nn import functional as F +from tqdm import tqdm +import shutil + +from torch.serialization import add_safe_globals +from efficientnet_pytorch.model import EfficientNet as EfficientNetClass +add_safe_globals([EfficientNetClass]) + + +model_path = "model_save/best_model.pth" +image_root = "test_images" # 原始图片路径(支持子目录) +output_root = "classified_images" # 输出结果文件夹 +HW = 260 +device = "cuda" if torch.cuda.is_available() else "cpu" + +idx_to_label = { + 0: "front", + 1: "rear", + 2: "side", + 3: "front-left", + 4: "front-right", + 5: "rear-left", + 6: "rear-right", + 7: "others" +} +num_classes = len(idx_to_label) +τ_others = 0.75 + +# ---------- 图像预处理 ---------- +val_transform = A.Compose([ + A.Resize(HW, HW), + A.Normalize(mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225)), + ToTensorV2() +]) + +# ---------- 加载模型 ---------- +print("🔍 Loading model...") +model = torch.load(model_path, map_location=device, weights_only=False) +model.eval().to(device) + +# ---------- 收集所有图片路径 ---------- +def collect_image_paths(root_dir): + image_paths = [] + for dirpath, _, filenames in os.walk(root_dir): + for filename in filenames: + if filename.lower().endswith((".jpg", ".jpeg", ".png", ".bmp")): + image_paths.append(os.path.join(dirpath, filename)) + return image_paths + +# ---------- 推理函数 ---------- +def infer_and_save(image_paths): + os.makedirs(output_root, exist_ok=True) + for label in idx_to_label.values(): + os.makedirs(os.path.join(output_root, label), exist_ok=True) + + for img_path in tqdm(image_paths, desc="推理中"): + try: + image = np.array(Image.open(img_path).convert("RGB")) + image = val_transform(image=image)["image"].unsqueeze(0).to(device) + + with torch.no_grad(): + logits = model(image) + probs = F.softmax(logits, dim=1).cpu().numpy()[0] + + sorted_indices = np.argsort(probs)[::-1] + top1_idx = sorted_indices[0] + top1_prob = probs[top1_idx] + + if top1_idx == 7 and top1_prob < τ_others: + pred_idx = sorted_indices[1] # 置信度不高 → 用次高 + else: + pred_idx = top1_idx + + pred_label = idx_to_label[pred_idx] + + # 复制图片到对应文件夹 + filename = os.path.basename(img_path) + dst_path = os.path.join(output_root, pred_label, filename) + shutil.copy(img_path, dst_path) + + except Exception as e: + print(f"❌ Error processing {img_path}: {e}") + +# ---------- 主程序 ---------- +if __name__ == "__main__": + image_paths = collect_image_paths(image_root) + print(f"✅ Found {len(image_paths)} images.") + infer_and_save(image_paths) + print("🎉 分类完成 ✅") diff --git a/train.py b/train.py new file mode 100644 index 0000000..bd5048b --- /dev/null +++ b/train.py @@ -0,0 +1,220 @@ +import random +import os +from efficientnet_pytorch import EfficientNet +from PIL import Image +from torch.utils.data import Dataset, DataLoader +from tqdm import tqdm +import time +import torch.nn as nn +import matplotlib +import cv2 +from torch.utils.data import WeightedRandomSampler +import numpy as np, torch +import albumentations as A +from albumentations.pytorch import ToTensorV2 +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt + +#随机种子 +def seed_everything(seed): + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + random.seed(seed) + np.random.seed(seed) + os.environ['PYTHONHASHSEED'] = str(seed) +seed_everything(0) + +HW = 260 + +train_transform = A.Compose([ + A.LongestMaxSize(max_size=260, interpolation=cv2.INTER_AREA), + A.PadIfNeeded(min_height=260, min_width=260, border_mode=cv2.BORDER_CONSTANT, value=(114, 114, 114)), + A.ShiftScaleRotate( + shift_limit=0.02, + scale_limit=0.05, + rotate_limit=15, + interpolation=cv2.INTER_LINEAR, + border_mode=cv2.BORDER_CONSTANT, value=(114, 114, 114), + p=0.5 + ), + + A.ColorJitter( + brightness=0.3, + contrast=0.3, + saturation=0.15, + hue=0.05, + p=0.5 + ), + + A.ImageCompression(quality_lower=75, quality_upper=100, p=0.2), + + A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), + ToTensorV2(), +]) + +val_transform = A.Compose([ + A.LongestMaxSize(max_size=260, interpolation=cv2.INTER_AREA), + A.PadIfNeeded(260, 260, border_mode=cv2.BORDER_CONSTANT, value=(114,114,114)), + A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)), + ToTensorV2(), +]) + +def read_file(path): + label_map = { + "front": 0, + "rear": 1, + "side": 2, + "front-left": 3, + "front-right": 4, + "rear-left": 5, + "rear-right": 6, + "others": 7 + } + img_paths, labels = [], [] + for folder_name in tqdm(os.listdir(path)): + folder_path = os.path.join(path, folder_name) + if not os.path.isdir(folder_path) or folder_name not in label_map: + continue + label = label_map[folder_name] + for img_name in os.listdir(folder_path): + img_paths.append(os.path.join(folder_path, img_name)) + labels.append(label) + print("读到了%d个训练数据" % len(labels)) + return img_paths, torch.LongTensor(labels) + +class Carview_Dataset(Dataset): + def __init__(self, path, mode="train"): + self.paths, self.Y = read_file(path) + self.transform = train_transform if mode == "train" else val_transform + + def __getitem__(self, idx): + img = np.array(Image.open(self.paths[idx]).convert("RGB")) + if self.transform: + img = self.transform(image=img)['image'] + label = self.Y[idx] + return img, label + + def __len__(self): + return len(self.Y) + + +train_path=r"D:\jili\car_viewpoint_classification\dataset\train" +val_path=r"D:\jili\car_viewpoint_classification\dataset\val" +train_set = Carview_Dataset(train_path, "train") +val_set = Carview_Dataset(val_path, "val") + + +labels = train_set.Y.cpu().numpy() +class_counts = np.bincount(labels, minlength=8) +class_weights = 1.0 / (class_counts + 1e-6) +sample_weights = class_weights[labels] + +sampler = WeightedRandomSampler( + weights=torch.from_numpy(sample_weights).double(), + num_samples=len(sample_weights), # 每个 epoch 采样这么多样本 + replacement=True # 支持少数类过采样 +) + +train_loader = DataLoader( + train_set, batch_size=16, sampler=sampler, + num_workers=0, pin_memory=True +) + +# 验证集保持顺序评测 +val_loader = DataLoader( + val_set, batch_size=16, shuffle=False, + num_workers=0, pin_memory=True +) + + +def build_efficientnet_model(num_classes=8): + model = EfficientNet.from_pretrained('efficientnet-b2') + in_features = model._fc.in_features + model._fc = nn.Linear(in_features, num_classes) + return model + + +def train_val(model, train_loader, val_loader, device, epochs, optimizer, loss, scheduler, save_path, patience=10): + model= model.to(device) + plt_train_loss = [] + plt_val_loss = [] + plt_train_acc = [] + plt_val_acc = [] + max_acc=0.0 + + for epoch in range(epochs): + train_loss = 0.0 + val_loss = 0.0 + train_acc = 0.0 + val_acc = 0.0 + start_time = time.time() + #模型开始训练 + model.train() + for batch_x, batch_y in train_loader: + x, target = batch_x.to(device), batch_y.to(device) + pred = model(x) #向前传播 + train_bat_loss = loss(pred, target) #计算预测与真实标签之间的损失 + pred_labels = torch.argmax(pred, dim=1) + true_labels = target + optimizer.zero_grad() # 清除旧梯度 + train_bat_loss.backward() # 反向传播计算当前梯度 + optimizer.step() # 更新参数 + train_loss += train_bat_loss.cpu().item() #计算batch损失累加值 + train_acc += (pred_labels == true_labels).sum().item() + plt_train_loss.append(train_loss / train_loader.__len__()) + plt_train_acc.append(train_acc / train_loader.dataset.__len__()) + # 模型开始验证 + model.eval() + with torch.no_grad(): #验证不用梯度计算 + for batch_x, batch_y in val_loader: + x, target = batch_x.to(device), batch_y.to(device) + pred = model(x) + val_bat_loss = loss(pred, target) + pred_labels = torch.argmax(pred, dim=1) + true_labels = target + val_loss += val_bat_loss.cpu().item() + val_acc += (pred_labels == true_labels).sum().item() + plt_val_loss.append(val_loss / val_loader.__len__()) + plt_val_acc.append(val_acc / val_loader.dataset.__len__()) + val_acc_avg=val_acc/len(val_loader.dataset) + if val_acc_avg > max_acc: + max_acc = val_acc_avg + torch.save(model, save_path) + print('[%03d/%03d] %2.2f sec(s) TrainLoss : %.6f | valLoss: %.6f Trainacc : %.6f | valacc: %.6f' % \ + (epoch, epochs, time.time() - start_time, plt_train_loss[-1], plt_val_loss[-1], plt_train_acc[-1], plt_val_acc[-1]) + ) + scheduler.step() + plt.plot(plt_train_loss) + plt.plot(plt_val_loss) + plt.title("loss") + plt.legend(["train", "val"]) + plt.show() + + plt.plot(plt_train_acc) + plt.plot(plt_val_acc) + plt.title("acc") + plt.legend(["train", "val"]) + plt.show() + + +model = build_efficientnet_model(num_classes=8) + +lr=0.0003 + +loss = nn.CrossEntropyLoss(label_smoothing=0.1) + +optimizer=torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4) + + +scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.5) +device="cuda" if torch.cuda.is_available() else "cpu" +save_path = "model_save/best_model.pth" +epochs = 15 +os.makedirs("model_save", exist_ok=True) +train_val(model, train_loader, val_loader, device, epochs, optimizer, loss, scheduler, save_path) + + +