# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license from copy import copy from typing import Optional from ultralytics.models.yolo.detect import DetectionTrainer from ultralytics.nn.tasks import RTDETRDetectionModel from ultralytics.utils import RANK, colorstr from .val import RTDETRDataset, RTDETRValidator class RTDETRTrainer(DetectionTrainer): """ Trainer class for the RT-DETR model developed by Baidu for real-time object detection. This class extends the DetectionTrainer class for YOLO to adapt to the specific features and architecture of RT-DETR. The model leverages Vision Transformers and has capabilities like IoU-aware query selection and adaptable inference speed. Attributes: loss_names (tuple): Names of the loss components used for training. data (dict): Dataset configuration containing class count and other parameters. args (dict): Training arguments and hyperparameters. save_dir (Path): Directory to save training results. test_loader (DataLoader): DataLoader for validation/testing data. Methods: get_model: Initialize and return an RT-DETR model for object detection tasks. build_dataset: Build and return an RT-DETR dataset for training or validation. get_validator: Return a DetectionValidator suitable for RT-DETR model validation. Notes: - F.grid_sample used in RT-DETR does not support the `deterministic=True` argument. - AMP training can lead to NaN outputs and may produce errors during bipartite graph matching. Examples: >>> from ultralytics.models.rtdetr.train import RTDETRTrainer >>> args = dict(model="rtdetr-l.yaml", data="coco8.yaml", imgsz=640, epochs=3) >>> trainer = RTDETRTrainer(overrides=args) >>> trainer.train() """ def get_model(self, cfg: Optional[dict] = None, weights: Optional[str] = None, verbose: bool = True): """ Initialize and return an RT-DETR model for object detection tasks. Args: cfg (dict, optional): Model configuration. weights (str, optional): Path to pre-trained model weights. verbose (bool): Verbose logging if True. Returns: (RTDETRDetectionModel): Initialized model. """ model = RTDETRDetectionModel(cfg, nc=self.data["nc"], ch=self.data["channels"], verbose=verbose and RANK == -1) if weights: model.load(weights) return model def build_dataset(self, img_path: str, mode: str = "val", batch: Optional[int] = None): """ Build and return an RT-DETR dataset for training or validation. Args: img_path (str): Path to the folder containing images. mode (str): Dataset mode, either 'train' or 'val'. batch (int, optional): Batch size for rectangle training. Returns: (RTDETRDataset): Dataset object for the specific mode. """ return RTDETRDataset( img_path=img_path, imgsz=self.args.imgsz, batch_size=batch, augment=mode == "train", hyp=self.args, rect=False, cache=self.args.cache or None, single_cls=self.args.single_cls or False, prefix=colorstr(f"{mode}: "), classes=self.args.classes, data=self.data, fraction=self.args.fraction if mode == "train" else 1.0, ) def get_validator(self): """Return a DetectionValidator suitable for RT-DETR model validation.""" self.loss_names = "giou_loss", "cls_loss", "l1_loss" return RTDETRValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args))