I have trained a YOLO NAS-s format model for image detection in a retail store using pytorch. I trained my model for approx ~8000 images, 20 epochs as per below training code.Due to memory issues my model interrupted in between but I resumed training from checkpoint file loaded epoch. But when I try to test image it simply loads image without any bounding box. Please help PFB code details.
-------------------------------------
#below part deals with respective YOLO image,label path declarations
from super_gradients.training.dataloaders.dataloaders import coco_detection_yolo_format_train, coco_detection_yolo_format_val BATCH_SIZE = 1 CLASSES = ['product'] CLASSES += [str(i) for i in range(80 - len(CLASSES))] dataset_params = {'data_dir': r"C:\Users\Giriraj\Documents\Prernas ML Models\SKU110KDataset\SKU110K_fixed",'train_images_dir':r'C:\Users\Giriraj\Documents\Prernas ML Models\SKU110KDataset\SKU110K_fixed\images\train','train_labels_dir':r'C:\Users\Giriraj\Documents\Prernas ML Models\SKU110KDataset\SKU110K_fixed\labels\train','val_images_dir':r'C:\Users\Giriraj\Documents\Prernas ML Models\SKU110KDataset\SKU110K_fixed\images\val','val_labels_dir':r'C:\Users\Giriraj\Documents\Prernas ML Models\SKU110KDataset\SKU110K_fixed\labels\val','test_images_dir':r'C:\Users\Giriraj\Documents\Prernas ML Models\SKU110KDataset\SKU110K_fixed\images\test','test_labels_dir':r'C:\Users\Giriraj\Documents\Prernas ML Models\SKU110KDataset\SKU110K_fixed\labels\test','classes': CLASSES } train_data = coco_detection_yolo_format_train( dataset_params={'data_dir': dataset_params['data_dir'],'images_dir': dataset_params['train_images_dir'],'labels_dir': dataset_params['train_labels_dir'],'classes': dataset_params['classes'] }, dataloader_params={'batch_size': BATCH_SIZE,'num_workers': 2 } ) val_data = coco_detection_yolo_format_val( dataset_params={'data_dir': dataset_params['data_dir'],'images_dir': dataset_params['val_images_dir'],'labels_dir': dataset_params['val_labels_dir'],'classes': dataset_params['classes'] }, dataloader_params={'batch_size': BATCH_SIZE,'num_workers': 2 } ) test_data = coco_detection_yolo_format_val( dataset_params={'data_dir': dataset_params['data_dir'],'images_dir': dataset_params['test_images_dir'],'labels_dir': dataset_params['test_labels_dir'],'classes': dataset_params['classes'] }, dataloader_params={'batch_size': BATCH_SIZE,'num_workers': 2 } )
#below part deals with respective model,device declarations
import torchfrom super_gradients.training import modelsfrom super_gradients.training import TrainerDEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'model = models.get('yolo_nas_s', pretrained_weights="coco").to(DEVICE)#optimizer = torch.optim.Adam(model.parameters(), lr=0.01)#optimizer = torch.optim.Adam(model.parameters())trainer = Trainer(experiment_name="SKU110K", ckpt_root_dir="./weights") ---------------------------------------------------------------
#below part deals with respective parameter declarations like max epoch etc
from super_gradients.training.losses import PPYoloELossfrom super_gradients.training.metrics import DetectionMetrics_050from super_gradients.training.models.detection_models.pp_yolo_e import PPYoloEPostPredictionCallback MAX_EPOCHS = 20 train_params = {'silent_mode': False,"average_best_models":True,"warmup_mode": "linear_epoch_step","warmup_initial_lr": 1e-6,"lr_warmup_epochs": 3,"initial_lr": 5e-4,"lr_mode": "cosine","cosine_final_lr_ratio": 0.1,"optimizer": "Adam","optimizer_params": {"weight_decay": 0.0001},"zero_weight_decay_on_bias_and_bn": True,"ema": True,"resume":True,"ema_params": {"decay": 0.9, "decay_type": "threshold"},"max_epochs": MAX_EPOCHS,"mixed_precision": True,"loss": PPYoloELoss( use_static_assigner=False, num_classes=len(dataset_params['classes']), reg_max=16 ),"valid_metrics_list": [ DetectionMetrics_050( score_thres=0.1, top_k_predictions=50, num_cls=len(dataset_params['classes']), normalize_targets=True, post_prediction_callback=PPYoloEPostPredictionCallback( score_threshold=0.01, nms_top_k=100, max_predictions=20, nms_threshold=0.7 ) ) ],"metric_to_watch": 'mAP@0.50' }
#below is training model part and same is used during retraining by changing start epoch number
import torch # Set the desired starting epoch # Load the model with the weights from the average checkpoint file checkpoint_file = torch.load(r'C:\Users\Giriraj\Documents\Prernas ML Models\yolo-nas-retail-training-main\yolo-nas-retail-training-main\weights\SKU110K\average_model.pth') model.load_state_dict(checkpoint_file['net']) START_EPOCH = checkpoint_file['epoch'] + 1 max_epochs=20 # Your training loop for epoch in range(START_EPOCH, max_epochs): # Train the model trainer.train( model=model, training_params=train_params, train_loader=train_data, valid_loader=val_data )