More T train fixes

This commit is contained in:
Dobromir Popov
2025-11-17 21:12:20 +02:00
parent a8d59a946e
commit 2d1d036c07

View File

@@ -2128,13 +2128,16 @@ class RealTrainingAdapter:
# MEMORY FIX: Final cleanup
logger.info(" Final memory cleanup...")
# Clear cached batches
for batch in cached_batches:
for key in list(batch.keys()):
if isinstance(batch[key], torch.Tensor):
del batch[key]
cached_batches.clear()
del cached_batches
# Clear grouped batches (cached_batches was already cleared earlier)
# Note: Don't delete batch contents as they may be referenced elsewhere
# Just clear the list reference - Python GC will handle cleanup
try:
if grouped_batches:
grouped_batches.clear()
del grouped_batches
except NameError:
# grouped_batches already cleaned up or doesn't exist
pass
gc.collect()
if torch.cuda.is_available():