8. 工程可视化
日志保存与指标监控
- 字段含义可视化意义
import torch import torch.nn as nn import torch.optim as optim import os import numpy as np import matplotlib.pyplot as plt # ========== 配置 ========== epochs = 10 log_step = 20 # 每多少 batch 打一次日志 checkpoint_step = 2 # 每多少 epoch 保存一次模型 save_dir = "./checkpoints" os.makedirs(save_dir, exist_ok=True) # ========== 模型 & 数据 ========== model = nn.Sequential( nn.Linear(10, 50), nn.ReLU(), nn.Linear(50, 2) ) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-3) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) # 假数据 x_train = torch.randn(200, 10) y_train = torch.randint(0, 2, (200,)) train_loader = [(x_train[i:i+20], y_train[i:i+20]) for i in range(0, 200, 20)] # ========== 训练循环 ========== train_losses, train_accs, lrs = [], [], [] for epoch in range(epochs): model.train() batch_losses, batch_accs = [], [] for step, (x, y) in enumerate(train_loader): optimizer.zero_grad() out = model(x) loss = criterion(out, y) loss.backward() optimizer.step() batch_losses.append(loss.item()) preds = out.argmax(dim=1) batch_accs.append((preds == y).float().mean().item()) # 控制日志打印 if (step + 1) % log_step == 0 or (step+1)==len(train_loader): print(f"Epoch {epoch+1}, Step {step+1}, Loss {np.mean(batch_losses):.4f}, Acc {np.mean(batch_accs):.4f}") train_losses.append(np.mean(batch_losses)) train_accs.append(np.mean(batch_accs)) lrs.append(optimizer.param_groups[0]['lr']) scheduler.step() # 控制 checkpoint 保存 if (epoch + 1) % checkpoint_step == 0: torch.save(model.state_dict(), os.path.join(save_dir, f"model_epoch{epoch+1}.pt")) # ========== 绘图 ========== plt.figure(figsize=(8,4)) plt.plot(train_losses, label="Train Loss") plt.plot(train_accs, label="Train Acc") plt.xlabel("Epoch") plt.ylabel("Value") plt.legend() plt.title("Training Metrics") plt.show() plt.figure(figsize=(8,4)) plt.plot(lrs, label="Learning Rate") plt.xlabel("Epoch") plt.ylabel("LR") plt.title("Learning Rate Schedule") plt.show()
wandb
- 配置含义使用场景
- 方法作用备注
TensorBoard
- 方法功能使用示例
PyTorch Profiler
- 方法作用示例
CUDA Profiling
Last updated
Was this helpful?