fix: gpu memory leaks

This commit is contained in:
Siwat Sirichai 2025-11-10 22:10:46 +07:00
parent 3a47920186
commit 593611cdb7
13 changed files with 420 additions and 166 deletions

View file

@ -125,12 +125,19 @@ class PTConverter:
mapping = self.mapping_db[pt_hash]
trt_hash = mapping["trt_hash"]
# Check both .engine and .trt extensions (Ultralytics uses .engine, generic uses .trt)
engine_key = f"trt/{trt_hash}.engine"
trt_key = f"trt/{trt_hash}.trt"
# Verify TRT file still exists in storage
if not self.storage.exists(trt_key):
# Try .engine first (Ultralytics native format)
if self.storage.exists(engine_key):
cached_key = engine_key
elif self.storage.exists(trt_key):
cached_key = trt_key
else:
logger.warning(
f"Mapping exists for PT hash {pt_hash[:16]}... but TRT file missing. "
f"Mapping exists for PT hash {pt_hash[:16]}... but engine file missing. "
f"Will reconvert."
)
# Remove stale mapping
@ -139,16 +146,16 @@ class PTConverter:
return None
# Get local path
trt_path = self.storage.get_local_path(trt_key)
if trt_path is None:
logger.error(f"Could not get local path for TRT file {trt_key}")
cached_path = self.storage.get_local_path(cached_key)
if cached_path is None:
logger.error(f"Could not get local path for engine file {cached_key}")
return None
logger.info(
f"Found cached conversion for PT hash {pt_hash[:16]}... -> "
f"TRT hash {trt_hash[:16]}..."
f"Engine hash {trt_hash[:16]}... ({cached_key})"
)
return (trt_hash, trt_path)
return (trt_hash, cached_path)
def convert(
self,
@ -241,24 +248,21 @@ class PTConverter:
precision: torch.dtype,
) -> Tuple[str, str]:
"""
Convert ultralytics YOLO model using ONNX TensorRT pipeline.
Uses the same approach as scripts/convert_pt_to_tensorrt.py
Convert ultralytics YOLO model using native .engine export.
This produces .engine files with embedded metadata (no manual input_shapes needed).
Args:
pt_path: Path to PT file
pt_hash: PT file hash
input_shapes: Input tensor shapes
input_shapes: Input tensor shapes (IGNORED for Ultralytics - auto-detected)
precision: Target precision
Returns:
Tuple of (trt_hash, trt_file_path)
Tuple of (engine_hash, engine_file_path)
"""
import tensorrt as trt
import tempfile
import os
import shutil
logger.info("Detected ultralytics YOLO model, using ONNX → TensorRT pipeline...")
logger.info("Detected ultralytics YOLO model, using native .engine export...")
# Load ultralytics model
try:
@ -267,83 +271,48 @@ class PTConverter:
except ImportError:
raise ImportError("ultralytics package not found. Install with: pip install ultralytics")
# Determine input shape
if not input_shapes:
raise ValueError("input_shapes required for ultralytics conversion")
# Export to native .engine format with embedded metadata
logger.info(f"Exporting to native TensorRT .engine (precision: {'FP16' if precision == torch.float16 else 'FP32'})...")
input_key = 'images' if 'images' in input_shapes else list(input_shapes.keys())[0]
input_shape = input_shapes[input_key]
# Ultralytics export creates .engine file in same directory as .pt
engine_path = model.export(
format='engine',
half=(precision == torch.float16),
device=self.gpu_id,
batch=1,
simplify=True
)
# Export to ONNX first
logger.info(f"Exporting to ONNX (input shape: {input_shape})...")
with tempfile.NamedTemporaryFile(suffix='.onnx', delete=False) as tmp_onnx:
onnx_path = tmp_onnx.name
# Convert to string (Ultralytics returns Path object)
engine_path = str(engine_path)
logger.info(f"Native .engine export complete: {engine_path}")
logger.info("Metadata embedded in .engine file (stride, imgsz, names, etc.)")
try:
# Use ultralytics export to ONNX
model.export(format='onnx', imgsz=input_shape[2], batch=input_shape[0])
# Ultralytics saves as model_name.onnx in same directory
pt_dir = os.path.dirname(pt_path)
pt_name = os.path.splitext(os.path.basename(pt_path))[0]
onnx_export_path = os.path.join(pt_dir, f"{pt_name}.onnx")
# Read the exported .engine file
with open(engine_path, 'rb') as f:
engine_data = f.read()
# Move to our temp location (use shutil.move for cross-device support)
if os.path.exists(onnx_export_path):
shutil.move(onnx_export_path, onnx_path)
else:
raise RuntimeError(f"ONNX export failed, file not found: {onnx_export_path}")
# Compute hash of the .engine file
engine_hash = hashlib.sha256(engine_data).hexdigest()
logger.info(f"ONNX export complete: {onnx_path}")
# Store in our cache (as .engine to preserve metadata)
engine_key = f"trt/{engine_hash}.engine"
self.storage.write(engine_key, engine_data)
# Build TensorRT engine from ONNX
logger.info("Building TensorRT engine from ONNX...")
trt_logger = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(trt_logger)
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
parser = trt.OnnxParser(network, trt_logger)
cached_path = self.storage.get_local_path(engine_key)
if cached_path is None:
raise RuntimeError("Failed to get local path for .engine file")
# Parse ONNX
with open(onnx_path, 'rb') as f:
if not parser.parse(f.read()):
errors = [parser.get_error(i) for i in range(parser.num_errors)]
raise RuntimeError(f"Failed to parse ONNX: {errors}")
# Clean up the original export (we've cached it)
# Only delete if it's different from cached path
if os.path.exists(engine_path) and os.path.abspath(engine_path) != os.path.abspath(cached_path):
logger.info(f"Removing original export (cached): {engine_path}")
os.unlink(engine_path)
else:
logger.info(f"Keeping original export at: {engine_path}")
# Configure builder
config = builder.create_builder_config()
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 4 << 30) # 4GB
# Set precision
if precision == torch.float16:
if builder.platform_has_fast_fp16:
config.set_flag(trt.BuilderFlag.FP16)
logger.info("FP16 mode enabled")
# Build engine
logger.info("Building TensorRT engine (this may take a few minutes)...")
serialized_engine = builder.build_serialized_network(network, config)
if serialized_engine is None:
raise RuntimeError("Failed to build TensorRT engine")
# Convert IHostMemory to bytes
engine_bytes = bytes(serialized_engine)
# Save to storage
trt_hash = hashlib.sha256(engine_bytes).hexdigest()
trt_key = f"trt/{trt_hash}.trt"
self.storage.write(trt_key, engine_bytes)
trt_path = self.storage.get_local_path(trt_key)
if trt_path is None:
raise RuntimeError("Failed to get local path for TRT file")
logger.info(f"TensorRT engine built successfully: {trt_path}")
return (trt_hash, trt_path)
finally:
# Cleanup ONNX file
if os.path.exists(onnx_path):
os.unlink(onnx_path)
logger.info(f"Cached .engine file: {cached_path}")
return (engine_hash, cached_path)
def _perform_conversion(
self,
@ -387,11 +356,21 @@ class PTConverter:
# Check if this is an ultralytics model
if self._is_ultralytics_model(model):
logger.info("Detected ultralytics model, using ultralytics export API")
logger.info("Detected Ultralytics YOLO model, using native .engine export")
logger.info("Note: input_shapes parameter is ignored for Ultralytics models (auto-detected)")
return self._convert_ultralytics_model(pt_path, pt_hash, input_shapes, precision)
# For non-ultralytics models, use torch_tensorrt
logger.info("Using torch_tensorrt for conversion")
logger.info("Using torch_tensorrt for conversion (non-Ultralytics model)")
# Non-Ultralytics models REQUIRE input_shapes
if input_shapes is None:
raise ValueError(
"input_shapes required for non-Ultralytics PyTorch models. "
"For Ultralytics YOLO models, input_shapes is auto-detected. "
"Example: input_shapes={'images': (1, 3, 640, 640)}"
)
model.eval()
# Convert model to target precision to avoid mixed precision issues