Weights & Biases (wandb): Difference between revisions

Jump to navigation Jump to search
Line 21: Line 21:


=== Example ===
=== Example ===
The following is an example of how to use wandb to track experiments on Béluga. To reproduce this on Cedar, it is not necessary to load the module <tt>httpproxy</tt>.
{{File
  |name=wandb-test.sh
  |lang="bash"
  |contents=
#!/bin/bash
#SBATCH --cpus-per-task=1
#SBATCH --mem=2G     
#SBATCH --time=0-03:00
#SBATCH --output=%N-%j.out
module load python/3.6 httpproxy
virtualenv --no-download $SLURM_TMPDIR/env
source $SLURM_TMPDIR/env/bin/activate
pip install torch wandb --no-index
### Save your wandb API key in your .bash_profile or replace $API_KEY with your actual API key:
wandb login $API_KEY
python wandb-test.py
}}
The script wandb-test.py uses the <tt>watch()</tt> method to log default metrics to Weights & Biases. See their [https://docs.wandb.ai full documentation] for more options.
{{File
  |name=wandb-test.py
  |lang="python"
  |contents=
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
import argparse
import wandb
parser = argparse.ArgumentParser(description='cifar10 classification models, wandb test')
parser.add_argument('--lr', default=0.1, help='')
parser.add_argument('--batch_size', type=int, default=768, help='')
parser.add_argument('--max_epochs', type=int, default=4, help='')
parser.add_argument('--num_workers', type=int, default=0, help='')
def main():
   
    args = parser.parse_args()
    print("Starting Wandb...")
    wandb.init(project="wandb-pytorch-test", config=args)
    class Net(nn.Module):
      def __init__(self):
          super(Net, self).__init__()
          self.conv1 = nn.Conv2d(3, 6, 5)
          self.pool = nn.MaxPool2d(2, 2)
          self.conv2 = nn.Conv2d(6, 16, 5)
          self.fc1 = nn.Linear(16 * 5 * 5, 120)
          self.fc2 = nn.Linear(120, 84)
          self.fc3 = nn.Linear(84, 10)
      def forward(self, x):
          x = self.pool(F.relu(self.conv1(x)))
          x = self.pool(F.relu(self.conv2(x)))
          x = x.view(-1, 16 * 5 * 5)
          x = F.relu(self.fc1(x))
          x = F.relu(self.fc2(x))
          x = self.fc3(x)
          return x
    net = Net()
    transform_train = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    dataset_train = CIFAR10(root='./data', train=True, download=False, transform=transform_train)
    train_loader = DataLoader(dataset_train, batch_size=args.batch_size, num_workers=args.num_workers)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=args.lr)
    wandb.watch(net)
    for epoch in range(args.max_epochs):
        train(epoch, net, criterion, optimizer, train_loader)
def train(epoch, net, criterion, optimizer, train_loader):
    for batch_idx, (inputs, targets) in enumerate(train_loader):
      outputs = net(inputs)
      loss = criterion(outputs, targets)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
if __name__=='__main__':
  main()
}}
cc_staff
282

edits

Navigation menu