106 lines
3.9 KiB
JSON
106 lines
3.9 KiB
JSON
{
|
|
"version": "2.0.0",
|
|
"tasks": [
|
|
{
|
|
"label": "Kill Stale Processes",
|
|
"type": "shell",
|
|
"command": "python",
|
|
"args": [
|
|
"-c",
|
|
"import psutil; [p.kill() for p in psutil.process_iter() if any(x in p.name().lower() for x in ['python', 'tensorboard']) and any(x in ' '.join(p.cmdline()) for x in ['scalping', 'training', 'tensorboard']) and p.pid != psutil.Process().pid]; print('Stale processes killed')"
|
|
],
|
|
"presentation": {
|
|
"reveal": "silent",
|
|
"panel": "shared"
|
|
},
|
|
"problemMatcher": []
|
|
},
|
|
{
|
|
"label": "Start TensorBoard",
|
|
"type": "shell",
|
|
"command": "python",
|
|
"args": [
|
|
"run_tensorboard.py"
|
|
],
|
|
"isBackground": true,
|
|
"problemMatcher": {
|
|
"pattern": {
|
|
"regexp": "^.*$",
|
|
"file": 1,
|
|
"location": 2,
|
|
"message": 3
|
|
},
|
|
"background": {
|
|
"activeOnStart": true,
|
|
"beginsPattern": ".*Starting TensorBoard.*",
|
|
"endsPattern": ".*TensorBoard.*available.*"
|
|
}
|
|
},
|
|
"presentation": {
|
|
"reveal": "always",
|
|
"panel": "new",
|
|
"group": "monitoring"
|
|
},
|
|
"runOptions": {
|
|
"runOn": "folderOpen"
|
|
}
|
|
},
|
|
{
|
|
"label": "Monitor GPU Usage",
|
|
"type": "shell",
|
|
"command": "python",
|
|
"args": [
|
|
"-c",
|
|
"import GPUtil; import time; [print(f'GPU {gpu.id}: {gpu.load*100:.1f}% load, {gpu.memoryUsed}/{gpu.memoryTotal}MB memory ({gpu.memoryUsed/gpu.memoryTotal*100:.1f}%)') or time.sleep(5) for _ in iter(int, 1) for gpu in GPUtil.getGPUs()]"
|
|
],
|
|
"isBackground": true,
|
|
"presentation": {
|
|
"reveal": "always",
|
|
"panel": "new",
|
|
"group": "monitoring"
|
|
},
|
|
"problemMatcher": []
|
|
},
|
|
{
|
|
"label": "Check CUDA Setup",
|
|
"type": "shell",
|
|
"command": "python",
|
|
"args": [
|
|
"-c",
|
|
"import torch; print(f'PyTorch: {torch.__version__}'); print(f'CUDA Available: {torch.cuda.is_available()}'); print(f'CUDA Version: {torch.version.cuda}' if torch.cuda.is_available() else 'CUDA not available'); [print(f'GPU {i}: {torch.cuda.get_device_name(i)}') for i in range(torch.cuda.device_count())] if torch.cuda.is_available() else None"
|
|
],
|
|
"presentation": {
|
|
"reveal": "always",
|
|
"panel": "shared"
|
|
},
|
|
"problemMatcher": []
|
|
},
|
|
{
|
|
"label": "Setup Training Environment",
|
|
"type": "shell",
|
|
"command": "python",
|
|
"args": [
|
|
"-c",
|
|
"import os; os.makedirs('models/rl', exist_ok=True); os.makedirs('models/cnn', exist_ok=True); os.makedirs('logs/overnight_training', exist_ok=True); os.makedirs('reports/overnight_training', exist_ok=True); os.makedirs('plots/overnight_training', exist_ok=True); print('Training directories created')"
|
|
],
|
|
"presentation": {
|
|
"reveal": "silent",
|
|
"panel": "shared"
|
|
},
|
|
"problemMatcher": []
|
|
},
|
|
{
|
|
"label": "Validate Model Parameters",
|
|
"type": "shell",
|
|
"command": "python",
|
|
"args": [
|
|
"model_parameter_audit.py"
|
|
],
|
|
"presentation": {
|
|
"reveal": "always",
|
|
"panel": "shared"
|
|
},
|
|
"problemMatcher": []
|
|
}
|
|
]
|
|
} |