313 lines
14 KiB
Python
313 lines
14 KiB
Python
import argparse
|
|
|
|
import os
|
|
# limit the number of cpus used by high performance libraries
|
|
os.environ["OMP_NUM_THREADS"] = "1"
|
|
os.environ["OPENBLAS_NUM_THREADS"] = "1"
|
|
os.environ["MKL_NUM_THREADS"] = "1"
|
|
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
|
|
os.environ["NUMEXPR_NUM_THREADS"] = "1"
|
|
|
|
import sys
|
|
import numpy as np
|
|
from pathlib import Path
|
|
import torch
|
|
import time
|
|
import platform
|
|
import pandas as pd
|
|
import subprocess
|
|
import torch.backends.cudnn as cudnn
|
|
from torch.utils.mobile_optimizer import optimize_for_mobile
|
|
|
|
FILE = Path(__file__).resolve()
|
|
ROOT = FILE.parents[0].parents[0] # yolov5 strongsort root directory
|
|
WEIGHTS = ROOT / 'weights'
|
|
|
|
|
|
if str(ROOT) not in sys.path:
|
|
sys.path.append(str(ROOT)) # add ROOT to PATH
|
|
if str(ROOT / 'yolov5') not in sys.path:
|
|
sys.path.append(str(ROOT / 'yolov5')) # add yolov5 ROOT to PATH
|
|
|
|
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
|
|
|
|
import logging
|
|
from ultralytics.yolo.utils.torch_utils import select_device
|
|
from ultralytics.yolo.utils import LOGGER, colorstr, ops
|
|
from ultralytics.yolo.utils.checks import check_requirements, check_version
|
|
from trackers.strongsort.deep.models import build_model
|
|
from trackers.strongsort.deep.reid_model_factory import get_model_name, load_pretrained_weights
|
|
|
|
|
|
def file_size(path):
|
|
# Return file/dir size (MB)
|
|
path = Path(path)
|
|
if path.is_file():
|
|
return path.stat().st_size / 1E6
|
|
elif path.is_dir():
|
|
return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / 1E6
|
|
else:
|
|
return 0.0
|
|
|
|
|
|
def export_formats():
|
|
# YOLOv5 export formats
|
|
x = [
|
|
['PyTorch', '-', '.pt', True, True],
|
|
['TorchScript', 'torchscript', '.torchscript', True, True],
|
|
['ONNX', 'onnx', '.onnx', True, True],
|
|
['OpenVINO', 'openvino', '_openvino_model', True, False],
|
|
['TensorRT', 'engine', '.engine', False, True],
|
|
['TensorFlow Lite', 'tflite', '.tflite', True, False],
|
|
]
|
|
return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU'])
|
|
|
|
|
|
def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')):
|
|
# YOLOv5 TorchScript model export
|
|
try:
|
|
LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...')
|
|
f = file.with_suffix('.torchscript')
|
|
|
|
ts = torch.jit.trace(model, im, strict=False)
|
|
if optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html
|
|
optimize_for_mobile(ts)._save_for_lite_interpreter(str(f))
|
|
else:
|
|
ts.save(str(f))
|
|
|
|
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
|
|
return f
|
|
except Exception as e:
|
|
LOGGER.info(f'{prefix} export failure: {e}')
|
|
|
|
|
|
def export_onnx(model, im, file, opset, dynamic, simplify, prefix=colorstr('ONNX:')):
|
|
# ONNX export
|
|
try:
|
|
check_requirements(('onnx',))
|
|
import onnx
|
|
|
|
f = file.with_suffix('.onnx')
|
|
LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...')
|
|
|
|
if dynamic:
|
|
dynamic = {'images': {0: 'batch'}} # shape(1,3,640,640)
|
|
dynamic['output'] = {0: 'batch'} # shape(1,25200,85)
|
|
|
|
torch.onnx.export(
|
|
model.cpu() if dynamic else model, # --dynamic only compatible with cpu
|
|
im.cpu() if dynamic else im,
|
|
f,
|
|
verbose=False,
|
|
opset_version=opset,
|
|
do_constant_folding=True,
|
|
input_names=['images'],
|
|
output_names=['output'],
|
|
dynamic_axes=dynamic or None
|
|
)
|
|
# Checks
|
|
model_onnx = onnx.load(f) # load onnx model
|
|
onnx.checker.check_model(model_onnx) # check onnx model
|
|
onnx.save(model_onnx, f)
|
|
|
|
# Simplify
|
|
if simplify:
|
|
try:
|
|
cuda = torch.cuda.is_available()
|
|
check_requirements(('onnxruntime-gpu' if cuda else 'onnxruntime', 'onnx-simplifier>=0.4.1'))
|
|
import onnxsim
|
|
|
|
LOGGER.info(f'simplifying with onnx-simplifier {onnxsim.__version__}...')
|
|
model_onnx, check = onnxsim.simplify(model_onnx)
|
|
assert check, 'assert check failed'
|
|
onnx.save(model_onnx, f)
|
|
except Exception as e:
|
|
LOGGER.info(f'simplifier failure: {e}')
|
|
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
|
|
return f
|
|
except Exception as e:
|
|
LOGGER.info(f'export failure: {e}')
|
|
|
|
|
|
|
|
def export_openvino(file, half, prefix=colorstr('OpenVINO:')):
|
|
# YOLOv5 OpenVINO export
|
|
check_requirements(('openvino-dev',)) # requires openvino-dev: https://pypi.org/project/openvino-dev/
|
|
import openvino.inference_engine as ie
|
|
try:
|
|
LOGGER.info(f'\n{prefix} starting export with openvino {ie.__version__}...')
|
|
f = str(file).replace('.pt', f'_openvino_model{os.sep}')
|
|
|
|
cmd = f"mo --input_model {file.with_suffix('.onnx')} --output_dir {f} --data_type {'FP16' if half else 'FP32'}"
|
|
subprocess.check_output(cmd.split()) # export
|
|
except Exception as e:
|
|
LOGGER.info(f'export failure: {e}')
|
|
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
|
|
return f
|
|
|
|
|
|
def export_tflite(file, half, prefix=colorstr('TFLite:')):
|
|
# YOLOv5 OpenVINO export
|
|
try:
|
|
check_requirements(('openvino2tensorflow', 'tensorflow', 'tensorflow_datasets')) # requires openvino-dev: https://pypi.org/project/openvino-dev/
|
|
import openvino.inference_engine as ie
|
|
LOGGER.info(f'\n{prefix} starting export with openvino {ie.__version__}...')
|
|
output = Path(str(file).replace(f'_openvino_model{os.sep}', f'_tflite_model{os.sep}'))
|
|
modelxml = list(Path(file).glob('*.xml'))[0]
|
|
cmd = f"openvino2tensorflow \
|
|
--model_path {modelxml} \
|
|
--model_output_path {output} \
|
|
--output_pb \
|
|
--output_saved_model \
|
|
--output_no_quant_float32_tflite \
|
|
--output_dynamic_range_quant_tflite"
|
|
subprocess.check_output(cmd.split()) # export
|
|
|
|
LOGGER.info(f'{prefix} export success, results saved in {output} ({file_size(f):.1f} MB)')
|
|
return f
|
|
except Exception as e:
|
|
LOGGER.info(f'\n{prefix} export failure: {e}')
|
|
|
|
|
|
def export_engine(model, im, file, half, dynamic, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):
|
|
# YOLOv5 TensorRT export https://developer.nvidia.com/tensorrt
|
|
try:
|
|
assert im.device.type != 'cpu', 'export running on CPU but must be on GPU, i.e. `python export.py --device 0`'
|
|
try:
|
|
import tensorrt as trt
|
|
except Exception:
|
|
if platform.system() == 'Linux':
|
|
check_requirements(('nvidia-tensorrt',), cmds=('-U --index-url https://pypi.ngc.nvidia.com',))
|
|
import tensorrt as trt
|
|
|
|
if trt.__version__[0] == '7': # TensorRT 7 handling https://github.com/ultralytics/yolov5/issues/6012
|
|
grid = model.model[-1].anchor_grid
|
|
model.model[-1].anchor_grid = [a[..., :1, :1, :] for a in grid]
|
|
export_onnx(model, im, file, 12, dynamic, simplify) # opset 12
|
|
model.model[-1].anchor_grid = grid
|
|
else: # TensorRT >= 8
|
|
check_version(trt.__version__, '8.0.0', hard=True) # require tensorrt>=8.0.0
|
|
export_onnx(model, im, file, 12, dynamic, simplify) # opset 13
|
|
onnx = file.with_suffix('.onnx')
|
|
|
|
LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...')
|
|
assert onnx.exists(), f'failed to export ONNX file: {onnx}'
|
|
f = file.with_suffix('.engine') # TensorRT engine file
|
|
logger = trt.Logger(trt.Logger.INFO)
|
|
if verbose:
|
|
logger.min_severity = trt.Logger.Severity.VERBOSE
|
|
|
|
builder = trt.Builder(logger)
|
|
config = builder.create_builder_config()
|
|
config.max_workspace_size = workspace * 1 << 30
|
|
# config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30) # fix TRT 8.4 deprecation notice
|
|
|
|
flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
|
|
network = builder.create_network(flag)
|
|
parser = trt.OnnxParser(network, logger)
|
|
if not parser.parse_from_file(str(onnx)):
|
|
raise RuntimeError(f'failed to load ONNX file: {onnx}')
|
|
|
|
inputs = [network.get_input(i) for i in range(network.num_inputs)]
|
|
outputs = [network.get_output(i) for i in range(network.num_outputs)]
|
|
LOGGER.info(f'{prefix} Network Description:')
|
|
for inp in inputs:
|
|
LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
|
|
for out in outputs:
|
|
LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
|
|
|
|
if dynamic:
|
|
if im.shape[0] <= 1:
|
|
LOGGER.warning(f"{prefix}WARNING: --dynamic model requires maximum --batch-size argument")
|
|
profile = builder.create_optimization_profile()
|
|
for inp in inputs:
|
|
profile.set_shape(inp.name, (1, *im.shape[1:]), (max(1, im.shape[0] // 2), *im.shape[1:]), im.shape)
|
|
config.add_optimization_profile(profile)
|
|
|
|
LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 and half else 32} engine in {f}')
|
|
if builder.platform_has_fast_fp16 and half:
|
|
config.set_flag(trt.BuilderFlag.FP16)
|
|
with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
|
|
t.write(engine.serialize())
|
|
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
|
|
return f
|
|
except Exception as e:
|
|
LOGGER.info(f'\n{prefix} export failure: {e}')
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
parser = argparse.ArgumentParser(description="ReID export")
|
|
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
|
|
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[256, 128], help='image (h, w)')
|
|
parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
|
parser.add_argument('--optimize', action='store_true', help='TorchScript: optimize for mobile')
|
|
parser.add_argument('--dynamic', action='store_true', help='ONNX/TF/TensorRT: dynamic axes')
|
|
parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
|
|
parser.add_argument('--opset', type=int, default=12, help='ONNX: opset version')
|
|
parser.add_argument('--workspace', type=int, default=4, help='TensorRT: workspace size (GB)')
|
|
parser.add_argument('--verbose', action='store_true', help='TensorRT: verbose log')
|
|
parser.add_argument('--weights', nargs='+', type=str, default=WEIGHTS / 'osnet_x0_25_msmt17.pt', help='model.pt path(s)')
|
|
parser.add_argument('--half', action='store_true', help='FP16 half-precision export')
|
|
parser.add_argument('--include',
|
|
nargs='+',
|
|
default=['torchscript'],
|
|
help='torchscript, onnx, openvino, engine')
|
|
args = parser.parse_args()
|
|
|
|
t = time.time()
|
|
|
|
include = [x.lower() for x in args.include] # to lowercase
|
|
fmts = tuple(export_formats()['Argument'][1:]) # --include arguments
|
|
flags = [x in include for x in fmts]
|
|
assert sum(flags) == len(include), f'ERROR: Invalid --include {include}, valid --include arguments are {fmts}'
|
|
jit, onnx, openvino, engine, tflite = flags # export booleans
|
|
|
|
args.device = select_device(args.device)
|
|
if args.half:
|
|
assert args.device.type != 'cpu', '--half only compatible with GPU export, i.e. use --device 0'
|
|
assert not args.dynamic, '--half not compatible with --dynamic, i.e. use either --half or --dynamic but not both'
|
|
|
|
if type(args.weights) is list:
|
|
args.weights = Path(args.weights[0])
|
|
|
|
model = build_model(
|
|
get_model_name(args.weights),
|
|
num_classes=1,
|
|
pretrained=not (args.weights and args.weights.is_file() and args.weights.suffix == '.pt'),
|
|
use_gpu=args.device
|
|
).to(args.device)
|
|
load_pretrained_weights(model, args.weights)
|
|
model.eval()
|
|
|
|
if args.optimize:
|
|
assert device.type == 'cpu', '--optimize not compatible with cuda devices, i.e. use --device cpu'
|
|
|
|
im = torch.zeros(args.batch_size, 3, args.imgsz[0], args.imgsz[1]).to(args.device) # image size(1,3,640,480) BCHW iDetection
|
|
for _ in range(2):
|
|
y = model(im) # dry runs
|
|
if args.half:
|
|
im, model = im.half(), model.half() # to FP16
|
|
shape = tuple((y[0] if isinstance(y, tuple) else y).shape) # model output shape
|
|
LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {args.weights} with output shape {shape} ({file_size(args.weights):.1f} MB)")
|
|
|
|
# Exports
|
|
f = [''] * len(fmts) # exported filenames
|
|
if jit:
|
|
f[0] = export_torchscript(model, im, args.weights, args.optimize) # opset 12
|
|
if engine: # TensorRT required before ONNX
|
|
f[1] = export_engine(model, im, args.weights, args.half, args.dynamic, args.simplify, args.workspace, args.verbose)
|
|
if onnx: # OpenVINO requires ONNX
|
|
f[2] = export_onnx(model, im, args.weights, args.opset, args.dynamic, args.simplify) # opset 12
|
|
if openvino:
|
|
f[3] = export_openvino(args.weights, args.half)
|
|
if tflite:
|
|
export_tflite(f, False)
|
|
|
|
# Finish
|
|
f = [str(x) for x in f if x] # filter out '' and None
|
|
if any(f):
|
|
LOGGER.info(f'\nExport complete ({time.time() - t:.1f}s)'
|
|
f"\nResults saved to {colorstr('bold', args.weights.parent.resolve())}"
|
|
f"\nVisualize: https://netron.app")
|
|
|