get_FPS.py 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. import warnings
  2. warnings.filterwarnings('ignore')
  3. import argparse
  4. import logging
  5. import math
  6. import os
  7. import random
  8. import time
  9. import sys
  10. from copy import deepcopy
  11. from pathlib import Path
  12. from threading import Thread
  13. import numpy as np
  14. import torch.distributed as dist
  15. import torch.nn as nn
  16. import torch.nn.functional as F
  17. import torch.optim as optim
  18. import torch.optim.lr_scheduler as lr_scheduler
  19. import torch.utils.data
  20. import yaml
  21. from torch.cuda import amp
  22. from torch.nn.parallel import DistributedDataParallel as DDP
  23. from tqdm import tqdm
  24. from ultralytics import YOLO
  25. from ultralytics.utils.torch_utils import select_device
  26. from ultralytics.nn.tasks import attempt_load_weights
  27. def get_weight_size(path):
  28. stats = os.stat(path)
  29. return f'{stats.st_size / 1024 / 1024:.1f}'
  30. if __name__ == '__main__':
  31. parser = argparse.ArgumentParser()
  32. parser.add_argument('--weights', type=str, default='yolov8n.pt', help='trained weights path')
  33. parser.add_argument('--batch', type=int, default=1, help='total batch size for all GPUs')
  34. parser.add_argument('--imgs', nargs='+', type=int, default=[640, 640], help='[height, width] image sizes')
  35. parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
  36. parser.add_argument('--warmup', default=200, type=int, help='warmup time')
  37. parser.add_argument('--testtime', default=1000, type=int, help='test time')
  38. parser.add_argument('--half', action='store_true', default=False, help='fp16 mode.')
  39. opt = parser.parse_args()
  40. device = select_device(opt.device, batch=opt.batch)
  41. # Model
  42. weights = opt.weights
  43. if weights.endswith('.pt'):
  44. model = attempt_load_weights(weights, device=device, fuse=True)
  45. print(f'Loaded {weights}') # report
  46. else:
  47. model = YOLO(weights).model
  48. model.fuse()
  49. model = model.to(device)
  50. example_inputs = torch.randn((opt.batch, 3, *opt.imgs)).to(device)
  51. if opt.half:
  52. model = model.half()
  53. example_inputs = example_inputs.half()
  54. print('begin warmup...')
  55. for i in tqdm(range(opt.warmup), desc='warmup....'):
  56. model(example_inputs)
  57. print('begin test latency...')
  58. time_arr = []
  59. for i in tqdm(range(opt.testtime), desc='test latency....'):
  60. if device.type == 'cuda':
  61. torch.cuda.synchronize()
  62. start_time = time.time()
  63. model(example_inputs)
  64. if device.type == 'cuda':
  65. torch.cuda.synchronize()
  66. end_time = time.time()
  67. time_arr.append(end_time - start_time)
  68. std_time = np.std(time_arr)
  69. infer_time_per_image = np.sum(time_arr) / (opt.testtime * opt.batch)
  70. if weights.endswith('.pt'):
  71. print(f'model weights:{opt.weights} size:{get_weight_size(opt.weights)}M (bs:{opt.batch})Latency:{infer_time_per_image:.5f}s +- {std_time:.5f}s fps:{1 / infer_time_per_image:.1f}')
  72. else:
  73. print(f'model yaml:{opt.weights} (bs:{opt.batch})Latency:{infer_time_per_image:.5f}s +- {std_time:.5f}s fps:{1 / infer_time_per_image:.1f}')