fix: gpu memory leaks
This commit is contained in:
parent
3a47920186
commit
593611cdb7
13 changed files with 420 additions and 166 deletions
|
|
@ -125,12 +125,19 @@ class PTConverter:
|
|||
|
||||
mapping = self.mapping_db[pt_hash]
|
||||
trt_hash = mapping["trt_hash"]
|
||||
|
||||
# Check both .engine and .trt extensions (Ultralytics uses .engine, generic uses .trt)
|
||||
engine_key = f"trt/{trt_hash}.engine"
|
||||
trt_key = f"trt/{trt_hash}.trt"
|
||||
|
||||
# Verify TRT file still exists in storage
|
||||
if not self.storage.exists(trt_key):
|
||||
# Try .engine first (Ultralytics native format)
|
||||
if self.storage.exists(engine_key):
|
||||
cached_key = engine_key
|
||||
elif self.storage.exists(trt_key):
|
||||
cached_key = trt_key
|
||||
else:
|
||||
logger.warning(
|
||||
f"Mapping exists for PT hash {pt_hash[:16]}... but TRT file missing. "
|
||||
f"Mapping exists for PT hash {pt_hash[:16]}... but engine file missing. "
|
||||
f"Will reconvert."
|
||||
)
|
||||
# Remove stale mapping
|
||||
|
|
@ -139,16 +146,16 @@ class PTConverter:
|
|||
return None
|
||||
|
||||
# Get local path
|
||||
trt_path = self.storage.get_local_path(trt_key)
|
||||
if trt_path is None:
|
||||
logger.error(f"Could not get local path for TRT file {trt_key}")
|
||||
cached_path = self.storage.get_local_path(cached_key)
|
||||
if cached_path is None:
|
||||
logger.error(f"Could not get local path for engine file {cached_key}")
|
||||
return None
|
||||
|
||||
logger.info(
|
||||
f"Found cached conversion for PT hash {pt_hash[:16]}... -> "
|
||||
f"TRT hash {trt_hash[:16]}..."
|
||||
f"Engine hash {trt_hash[:16]}... ({cached_key})"
|
||||
)
|
||||
return (trt_hash, trt_path)
|
||||
return (trt_hash, cached_path)
|
||||
|
||||
def convert(
|
||||
self,
|
||||
|
|
@ -241,24 +248,21 @@ class PTConverter:
|
|||
precision: torch.dtype,
|
||||
) -> Tuple[str, str]:
|
||||
"""
|
||||
Convert ultralytics YOLO model using ONNX → TensorRT pipeline.
|
||||
Uses the same approach as scripts/convert_pt_to_tensorrt.py
|
||||
Convert ultralytics YOLO model using native .engine export.
|
||||
This produces .engine files with embedded metadata (no manual input_shapes needed).
|
||||
|
||||
Args:
|
||||
pt_path: Path to PT file
|
||||
pt_hash: PT file hash
|
||||
input_shapes: Input tensor shapes
|
||||
input_shapes: Input tensor shapes (IGNORED for Ultralytics - auto-detected)
|
||||
precision: Target precision
|
||||
|
||||
Returns:
|
||||
Tuple of (trt_hash, trt_file_path)
|
||||
Tuple of (engine_hash, engine_file_path)
|
||||
"""
|
||||
import tensorrt as trt
|
||||
import tempfile
|
||||
import os
|
||||
import shutil
|
||||
|
||||
logger.info("Detected ultralytics YOLO model, using ONNX → TensorRT pipeline...")
|
||||
logger.info("Detected ultralytics YOLO model, using native .engine export...")
|
||||
|
||||
# Load ultralytics model
|
||||
try:
|
||||
|
|
@ -267,83 +271,48 @@ class PTConverter:
|
|||
except ImportError:
|
||||
raise ImportError("ultralytics package not found. Install with: pip install ultralytics")
|
||||
|
||||
# Determine input shape
|
||||
if not input_shapes:
|
||||
raise ValueError("input_shapes required for ultralytics conversion")
|
||||
# Export to native .engine format with embedded metadata
|
||||
logger.info(f"Exporting to native TensorRT .engine (precision: {'FP16' if precision == torch.float16 else 'FP32'})...")
|
||||
|
||||
input_key = 'images' if 'images' in input_shapes else list(input_shapes.keys())[0]
|
||||
input_shape = input_shapes[input_key]
|
||||
# Ultralytics export creates .engine file in same directory as .pt
|
||||
engine_path = model.export(
|
||||
format='engine',
|
||||
half=(precision == torch.float16),
|
||||
device=self.gpu_id,
|
||||
batch=1,
|
||||
simplify=True
|
||||
)
|
||||
|
||||
# Export to ONNX first
|
||||
logger.info(f"Exporting to ONNX (input shape: {input_shape})...")
|
||||
with tempfile.NamedTemporaryFile(suffix='.onnx', delete=False) as tmp_onnx:
|
||||
onnx_path = tmp_onnx.name
|
||||
# Convert to string (Ultralytics returns Path object)
|
||||
engine_path = str(engine_path)
|
||||
logger.info(f"Native .engine export complete: {engine_path}")
|
||||
logger.info("Metadata embedded in .engine file (stride, imgsz, names, etc.)")
|
||||
|
||||
try:
|
||||
# Use ultralytics export to ONNX
|
||||
model.export(format='onnx', imgsz=input_shape[2], batch=input_shape[0])
|
||||
# Ultralytics saves as model_name.onnx in same directory
|
||||
pt_dir = os.path.dirname(pt_path)
|
||||
pt_name = os.path.splitext(os.path.basename(pt_path))[0]
|
||||
onnx_export_path = os.path.join(pt_dir, f"{pt_name}.onnx")
|
||||
# Read the exported .engine file
|
||||
with open(engine_path, 'rb') as f:
|
||||
engine_data = f.read()
|
||||
|
||||
# Move to our temp location (use shutil.move for cross-device support)
|
||||
if os.path.exists(onnx_export_path):
|
||||
shutil.move(onnx_export_path, onnx_path)
|
||||
else:
|
||||
raise RuntimeError(f"ONNX export failed, file not found: {onnx_export_path}")
|
||||
# Compute hash of the .engine file
|
||||
engine_hash = hashlib.sha256(engine_data).hexdigest()
|
||||
|
||||
logger.info(f"ONNX export complete: {onnx_path}")
|
||||
# Store in our cache (as .engine to preserve metadata)
|
||||
engine_key = f"trt/{engine_hash}.engine"
|
||||
self.storage.write(engine_key, engine_data)
|
||||
|
||||
# Build TensorRT engine from ONNX
|
||||
logger.info("Building TensorRT engine from ONNX...")
|
||||
trt_logger = trt.Logger(trt.Logger.WARNING)
|
||||
builder = trt.Builder(trt_logger)
|
||||
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
|
||||
parser = trt.OnnxParser(network, trt_logger)
|
||||
cached_path = self.storage.get_local_path(engine_key)
|
||||
if cached_path is None:
|
||||
raise RuntimeError("Failed to get local path for .engine file")
|
||||
|
||||
# Parse ONNX
|
||||
with open(onnx_path, 'rb') as f:
|
||||
if not parser.parse(f.read()):
|
||||
errors = [parser.get_error(i) for i in range(parser.num_errors)]
|
||||
raise RuntimeError(f"Failed to parse ONNX: {errors}")
|
||||
# Clean up the original export (we've cached it)
|
||||
# Only delete if it's different from cached path
|
||||
if os.path.exists(engine_path) and os.path.abspath(engine_path) != os.path.abspath(cached_path):
|
||||
logger.info(f"Removing original export (cached): {engine_path}")
|
||||
os.unlink(engine_path)
|
||||
else:
|
||||
logger.info(f"Keeping original export at: {engine_path}")
|
||||
|
||||
# Configure builder
|
||||
config = builder.create_builder_config()
|
||||
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 4 << 30) # 4GB
|
||||
|
||||
# Set precision
|
||||
if precision == torch.float16:
|
||||
if builder.platform_has_fast_fp16:
|
||||
config.set_flag(trt.BuilderFlag.FP16)
|
||||
logger.info("FP16 mode enabled")
|
||||
|
||||
# Build engine
|
||||
logger.info("Building TensorRT engine (this may take a few minutes)...")
|
||||
serialized_engine = builder.build_serialized_network(network, config)
|
||||
|
||||
if serialized_engine is None:
|
||||
raise RuntimeError("Failed to build TensorRT engine")
|
||||
|
||||
# Convert IHostMemory to bytes
|
||||
engine_bytes = bytes(serialized_engine)
|
||||
|
||||
# Save to storage
|
||||
trt_hash = hashlib.sha256(engine_bytes).hexdigest()
|
||||
trt_key = f"trt/{trt_hash}.trt"
|
||||
self.storage.write(trt_key, engine_bytes)
|
||||
|
||||
trt_path = self.storage.get_local_path(trt_key)
|
||||
if trt_path is None:
|
||||
raise RuntimeError("Failed to get local path for TRT file")
|
||||
|
||||
logger.info(f"TensorRT engine built successfully: {trt_path}")
|
||||
return (trt_hash, trt_path)
|
||||
|
||||
finally:
|
||||
# Cleanup ONNX file
|
||||
if os.path.exists(onnx_path):
|
||||
os.unlink(onnx_path)
|
||||
logger.info(f"Cached .engine file: {cached_path}")
|
||||
return (engine_hash, cached_path)
|
||||
|
||||
def _perform_conversion(
|
||||
self,
|
||||
|
|
@ -387,11 +356,21 @@ class PTConverter:
|
|||
|
||||
# Check if this is an ultralytics model
|
||||
if self._is_ultralytics_model(model):
|
||||
logger.info("Detected ultralytics model, using ultralytics export API")
|
||||
logger.info("Detected Ultralytics YOLO model, using native .engine export")
|
||||
logger.info("Note: input_shapes parameter is ignored for Ultralytics models (auto-detected)")
|
||||
return self._convert_ultralytics_model(pt_path, pt_hash, input_shapes, precision)
|
||||
|
||||
# For non-ultralytics models, use torch_tensorrt
|
||||
logger.info("Using torch_tensorrt for conversion")
|
||||
logger.info("Using torch_tensorrt for conversion (non-Ultralytics model)")
|
||||
|
||||
# Non-Ultralytics models REQUIRE input_shapes
|
||||
if input_shapes is None:
|
||||
raise ValueError(
|
||||
"input_shapes required for non-Ultralytics PyTorch models. "
|
||||
"For Ultralytics YOLO models, input_shapes is auto-detected. "
|
||||
"Example: input_shapes={'images': (1, 3, 640, 640)}"
|
||||
)
|
||||
|
||||
model.eval()
|
||||
|
||||
# Convert model to target precision to avoid mixed precision issues
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue