Mie-informed tandem neural network#

Here, we demonstrate how to train a design generator network capable to suggest core-shell particles with specific spectral response using PyMieDiff as differentiable forward-evaluator. The training pipeline follows the “Tandem” model:

target spectrum –> generator NN –> design –> Mie –> real spectrum

training loss is: MSE(target spec., real spec.)

author: O. Jackson, P. Wiecha, 06/2025

imports#

import time

import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
from torch import nn

import pymiediff as pmd

setup optimiation target#

We setup the main configuration here: pymiediff backend, torch device, parameter limits and wavelengths

# pymiediff backend to use and torch compute device
backend = "torch"
device = "cpu"

# general config
N_samples = 25000
n_max = 4  # maximum Mie order fixed for performance
eps_env = torch.tensor(1.0, device=device)

lim_r = torch.as_tensor([40, 100], device=device)
lim_n_re = torch.as_tensor([1.5, 4.0], device=device)
lim_n_im = torch.as_tensor([0.0, 0.1], device=device)

wl0 = torch.linspace(400, 800, 40, device=device)
k0 = 2 * torch.pi / wl0

generate reference spectra#

we generate a large number of reference Mie spectra for existing particles, that will be used as design targets during training.

Note: this step could also be done without any physics knowledge, for example with artificial spectra (e.g. Lorentzians), or a scattering maximization loss.

# datagen: generate existing spectra (won't use the geometries for training)
r_c = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
d_s = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
r_s = r_c + d_s
n_re = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_re)[0] + lim_n_re[0]
n_im = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_im)[0] + lim_n_im[0]
n = n_re + 1j * n_im

# low-level API: permittivity required as spectra (for vectorization)
eps_c = torch.ones_like(k0).unsqueeze(0) * n[:, 0].unsqueeze(1) ** 2
eps_s = torch.ones_like(k0).unsqueeze(0) * n[:, 1].unsqueeze(1) ** 2

all_particles = pmd.coreshell.cross_sections(
    k0,
    r_c=r_c,
    eps_c=eps_c,
    r_s=r_s,
    eps_s=eps_s,
    eps_env=eps_env,
    backend=backend,
    n_max=n_max,
)

N_test = 128  # keep a few samples for testing
q_sca_target = all_particles["q_sca"][N_test:].to(dtype=torch.float32)
q_sca_target_test = all_particles["q_sca"][:N_test].to(dtype=torch.float32)

plt.plot(q_sca_target[30].detach().cpu().numpy())  # plot some test sample
ex 05 tandem
[<matplotlib.lines.Line2D object at 0x7ff455aee1b0>]

Neural network classes / functions#

define the network model (simple MLP) and training loop

class FullyConnected(nn.Module):
    def __init__(self, hidden_dim=1024):
        super().__init__()
        self.fc_in = nn.Linear(len(k0), hidden_dim)
        self.relu1 = nn.ReLU()
        self.fc_1 = nn.Linear(hidden_dim, hidden_dim)
        self.relu2 = nn.ReLU()
        self.fc_2 = nn.Linear(hidden_dim, hidden_dim)
        self.relu3 = nn.ReLU()
        self.fc_out = nn.Linear(hidden_dim, 6)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc_in(x)
        x = self.relu1(x)
        x = self.fc_1(x)
        x = self.relu2(x)
        x = self.fc_2(x)
        x = self.relu3(x)
        x = self.fc_out(x)
        x = self.sigmoid(x)
        return x


def nn_pred_to_mie_geometry(pred):
    # implicit normalization: multiply by user-defined limits
    r_c = lim_r.max() * (pred[:, 0])
    r_s = lim_r.max() * (pred[:, 0] + pred[:, 1])
    n_c = lim_n_re.max() * pred[:, 2] + lim_n_im.max() * (1j * pred[:, 3])
    n_s = lim_n_re.max() * pred[:, 4] + lim_n_im.max() * (1j * pred[:, 5])

    eps_c = torch.ones_like(k0).unsqueeze(0) * n_c.unsqueeze(1) ** 2
    eps_s = torch.ones_like(k0).unsqueeze(0) * n_s.unsqueeze(1) ** 2

    return r_c, r_s, eps_c, eps_s


def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    prog_bar = tqdm(enumerate(dataloader), total=size // dataloader.batch_size)
    for i_batch, X in prog_bar:
        # model prediction: generate core-shell particles
        pred = model(X)

        # evaluate Mie
        r_c, r_s, eps_c, eps_s = nn_pred_to_mie_geometry(pred)
        res_mie = pmd.coreshell.cross_sections(
            k0,
            r_c=r_c,
            eps_c=eps_c,
            r_s=r_s,
            eps_s=eps_s,
            eps_env=eps_env,
            backend=backend,
            n_max=n_max,
        )
        q_sca_mie = res_mie["q_sca"].to(dtype=torch.float32)

        # calc. loss
        loss = loss_fn(q_sca_mie, X)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # if i_batch % 100 == 0:
        loss, current = loss.item(), i_batch * dataloader.batch_size + len(X)
        prog_bar.set_description(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

training the Mie-informed network#

here we use some simple, manually optimized training schedule.

model = FullyConnected().to(device)

confs = [
    dict(bs=32, lr=1e-4, n_ep=5),
    dict(bs=64, lr=1e-4, n_ep=5),
    dict(bs=128, lr=1e-4, n_ep=6),
    dict(bs=256, lr=1e-5, n_ep=6),
]

t_start = time.time()
for conf in confs:
    learning_rate = conf["lr"]
    batch_size = conf["bs"]
    epochs = conf["n_ep"]
    print("-------------------------------")
    print(f"LR={learning_rate}, batch_size={batch_size}")
    print("-------------------------------")

    loss_fn = nn.MSELoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
    train_dataloader = torch.utils.data.DataLoader(q_sca_target, batch_size=batch_size)
    for t in range(epochs):
        print(f"Epoch {t+1}, time={time.time()-t_start:.2f}s")
        train_loop(train_dataloader, model, loss_fn, optimizer)
print("Done!")
-------------------------------
LR=0.0001, batch_size=32
-------------------------------
Epoch 1, time=0.00s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 6.182572  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 3.334477  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 3.108343  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.594806  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.844598  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.672254  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.921931  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.853018  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.622688  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.216375  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.715949  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.650837  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.526762  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.614265  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.356275  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.488001  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.352542  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.450811  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.607834  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.281707  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.873309  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.139984  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.418243  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.244231  [  768/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.997306  [  800/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.153756  [  832/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.859865  [  864/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.801029  [  896/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.482426  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.537925  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.206968  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.808883  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.273452  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.159672  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.055174  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.956229  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.101278  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.929632  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.420024  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.146415  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.852853  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.998428  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.928108  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.794232  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.030994  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.257447  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.857558  [ 1504/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.045353  [ 1536/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.110978  [ 1568/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.056010  [ 1600/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.706728  [ 1632/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.913445  [ 1664/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.084007  [ 1696/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.784323  [ 1728/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.185635  [ 1760/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.762800  [ 1792/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.925333  [ 1824/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.823907  [ 1856/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.882497  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.755688  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.833456  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.070379  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.695112  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.852664  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.874298  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.781030  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.801805  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.926717  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.022391  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.681406  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.781619  [ 2272/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.838514  [ 2304/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.901780  [ 2336/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.690984  [ 2368/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.573145  [ 2400/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.812639  [ 2432/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.735887  [ 2464/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.910911  [ 2496/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.759571  [ 2528/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.857392  [ 2560/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.082136  [ 2592/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.853489  [ 2624/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.477143  [ 2656/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.925727  [ 2688/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.668492  [ 2720/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.837689  [ 2752/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.025828  [ 2784/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.877934  [ 2816/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.950735  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.715084  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.919077  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.927000  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.868034  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.859462  [ 3008/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.917695  [ 3040/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.645564  [ 3072/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.771818  [ 3104/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.754923  [ 3136/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 1.095286  [ 3168/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.665557  [ 3200/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 1.015794  [ 3232/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.869047  [ 3264/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.633646  [ 3296/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.861033  [ 3328/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.745661  [ 3360/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.544293  [ 3392/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.803379  [ 3424/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.735419  [ 3456/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.608767  [ 3488/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.768060  [ 3520/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.821219  [ 3552/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.733536  [ 3584/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.768524  [ 3616/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.720838  [ 3648/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.707083  [ 3680/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.932173  [ 3712/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.715108  [ 3744/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.877462  [ 3776/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.843866  [ 3808/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.549525  [ 3840/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.929485  [ 3872/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.725561  [ 3904/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.729177  [ 3936/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.634928  [ 3968/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.764124  [ 4000/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.940358  [ 4032/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.697993  [ 4064/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.934334  [ 4096/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.734423  [ 4128/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.829414  [ 4160/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.720620  [ 4192/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.781999  [ 4224/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.783253  [ 4256/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.766130  [ 4288/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.708845  [ 4320/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.507006  [ 4352/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.636664  [ 4384/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.837828  [ 4416/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.715154  [ 4448/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.686331  [ 4480/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.533245  [ 4512/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.560344  [ 4544/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.738786  [ 4576/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.624795  [ 4608/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.699484  [ 4640/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.821406  [ 4672/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.748804  [ 4704/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.697531  [ 4736/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.601929  [ 4768/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.534576  [ 4800/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.745200  [ 4832/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.695427  [ 4864/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.691930  [ 4896/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.505526  [ 4928/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.917637  [ 4960/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.518932  [ 4992/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.652021  [ 5024/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.604701  [ 5056/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.760159  [ 5088/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.706993  [ 5120/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.912276  [ 5152/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.853322  [ 5184/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.479915  [ 5216/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.642461  [ 5248/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.805953  [ 5280/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.758529  [ 5312/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.562470  [ 5344/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.651807  [ 5376/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.693378  [ 5408/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.477579  [ 5440/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.440845  [ 5472/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.755653  [ 5504/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.543157  [ 5536/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.525984  [ 5568/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.738688  [ 5600/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.505031  [ 5632/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.603095  [ 5664/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.495685  [ 5696/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.649035  [ 5728/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.626231  [ 5760/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.624574  [ 5792/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.539009  [ 5824/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.608602  [ 5856/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.517679  [ 5888/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.645958  [ 5920/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.633316  [ 5952/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.456997  [ 5984/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.474314  [ 6016/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.585139  [ 6048/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.695903  [ 6080/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.426125  [ 6112/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.650087  [ 6144/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.549055  [ 6176/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.719173  [ 6208/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.478238  [ 6240/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.570678  [ 6272/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.565205  [ 6304/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.443512  [ 6336/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.571824  [ 6368/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.499425  [ 6400/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.587146  [ 6432/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.484214  [ 6464/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.591078  [ 6496/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.529968  [ 6528/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.574853  [ 6560/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.558849  [ 6592/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.477197  [ 6624/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.366215  [ 6656/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.419809  [ 6688/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.556782  [ 6720/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.456732  [ 6752/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.501543  [ 6784/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.547164  [ 6816/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.443912  [ 6848/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.691703  [ 6880/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.586831  [ 6912/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.483213  [ 6944/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.596832  [ 6976/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.554465  [ 7008/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.521209  [ 7040/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.491609  [ 7072/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.387293  [ 7104/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.794813  [ 7136/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.425498  [ 7168/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.389268  [ 7200/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.420405  [ 7232/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.542045  [ 7264/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.487033  [ 7296/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.399472  [ 7328/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.448121  [ 7360/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.406630  [ 7392/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.591825  [ 7424/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.627374  [ 7456/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.447223  [ 7488/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.565641  [ 7520/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.624734  [ 7552/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.451864  [ 7584/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.415504  [ 7616/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.691492  [ 7648/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.648329  [ 7680/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.595378  [ 7712/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.359191  [ 7744/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.518852  [ 7776/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.610308  [ 7808/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.353026  [ 7840/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.380109  [ 7872/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.606776  [ 7904/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.590940  [ 7936/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.511027  [ 7968/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.414065  [ 8000/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.376860  [ 8032/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.442762  [ 8064/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.452412  [ 8096/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.528809  [ 8128/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.394380  [ 8160/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.465563  [ 8192/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.566898  [ 8224/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.474321  [ 8256/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.663122  [ 8288/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.516909  [ 8320/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.398049  [ 8352/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.659187  [ 8384/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.659491  [ 8416/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.484123  [ 8448/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.606162  [ 8480/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.388272  [ 8512/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.389152  [ 8544/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.465150  [ 8576/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.599031  [ 8608/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.521233  [ 8640/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.741320  [ 8672/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.648986  [ 8704/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.331249  [ 8736/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.425979  [ 8768/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.389425  [ 8800/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.479067  [ 8832/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.398535  [ 8864/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.400271  [ 8896/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.489922  [ 8928/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.517244  [ 8960/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.549319  [ 8992/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.408617  [ 9024/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.604844  [ 9056/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.494191  [ 9088/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.536059  [ 9120/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.328432  [ 9152/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.642036  [ 9184/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.454632  [ 9216/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.615949  [ 9248/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.544591  [ 9280/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.364256  [ 9312/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.426344  [ 9344/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.616912  [ 9376/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.385636  [ 9408/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.628750  [ 9440/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.631596  [ 9472/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.461516  [ 9504/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.524834  [ 9536/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.431266  [ 9568/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.571660  [ 9600/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.389676  [ 9632/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.347735  [ 9664/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.334361  [ 9696/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.485077  [ 9728/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.619223  [ 9760/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.404663  [ 9792/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.430423  [ 9824/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.418580  [ 9856/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.473991  [ 9888/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.610956  [ 9920/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.435558  [ 9952/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.416647  [ 9984/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.440566  [10016/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.552326  [10048/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.617116  [10080/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.520148  [10112/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.370968  [10144/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.439676  [10176/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.462503  [10208/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.547763  [10240/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.418268  [10272/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.683885  [10304/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.608562  [10336/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.545290  [10368/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.402975  [10400/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.532819  [10432/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.473393  [10464/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.501363  [10496/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.352207  [10528/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.472003  [10560/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.452334  [10592/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.455455  [10624/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.498290  [10656/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.496919  [10688/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.488055  [10720/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.425092  [10752/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.503697  [10784/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.459601  [10816/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.381308  [10848/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.382794  [10880/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.422137  [10912/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.550531  [10944/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.439775  [10976/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.459610  [11008/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.597789  [11040/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.508992  [11072/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.416530  [11104/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.432492  [11136/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.454946  [11168/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.449624  [11200/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.450659  [11232/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.475472  [11264/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.442137  [11296/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.378373  [11328/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.487064  [11360/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.410949  [11392/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.615205  [11424/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.378739  [11456/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.405390  [11488/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.537704  [11520/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.498317  [11552/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.377109  [11584/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.477670  [11616/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.531565  [11648/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.538401  [11680/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.481005  [11712/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.533812  [11744/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.233503  [11776/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.479941  [11808/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.481281  [11840/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.592981  [11872/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.298879  [11904/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.495679  [11936/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.400152  [11968/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.373099  [12000/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.439246  [12032/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.316510  [12064/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.425532  [12096/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.531142  [12128/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.411219  [12160/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.451959  [12192/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.470364  [12224/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.409591  [12256/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.453683  [12288/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.324992  [12320/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.323058  [12352/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.370793  [12384/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.467052  [12416/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.398762  [12448/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.482727  [12480/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.485286  [12512/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.405564  [12544/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.537323  [12576/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.521915  [12608/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.433399  [12640/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.660012  [12672/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.444537  [12704/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.560645  [12736/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.393242  [12768/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.416149  [12800/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.633953  [12832/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.616306  [12864/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.596975  [12896/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.580615  [12928/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.474852  [12960/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.343369  [12992/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.372915  [13024/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.592035  [13056/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.423999  [13088/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.493554  [13120/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.349277  [13152/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.446138  [13184/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.350674  [13216/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.377281  [13248/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.352433  [13280/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.343349  [13312/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.491824  [13344/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.537138  [13376/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.403118  [13408/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.531287  [13440/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.451567  [13472/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.418220  [13504/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.460424  [13536/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.449146  [13568/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.618755  [13600/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.376046  [13632/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.488001  [13664/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.400850  [13696/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.520549  [13728/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.358996  [13760/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.373340  [13792/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.368646  [13824/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.514603  [13856/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.517549  [13888/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.360675  [13920/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.488427  [13952/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.494517  [13984/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.410651  [14016/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.314336  [14048/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.293638  [14080/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.476437  [14112/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.555156  [14144/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.324506  [14176/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.388002  [14208/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.442456  [14240/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.350908  [14272/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.319808  [14304/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.424010  [14336/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.392068  [14368/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.398116  [14400/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.554329  [14432/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.561060  [14464/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.386620  [14496/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.380240  [14528/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.497517  [14560/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.597805  [14592/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.545099  [14624/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.374111  [14656/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.385943  [14688/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.525957  [14720/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.385689  [14752/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.407729  [14784/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.416215  [14816/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.424749  [14848/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.431131  [14880/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.320945  [14912/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.372937  [14944/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.404364  [14976/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.337524  [15008/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.444859  [15040/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.542595  [15072/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.256636  [15104/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.398489  [15136/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.374267  [15168/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.381496  [15200/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.469922  [15232/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.494171  [15264/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.445082  [15296/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.286583  [15328/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.427008  [15360/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.486539  [15392/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.363454  [15424/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.292049  [15456/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.426690  [15488/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.436988  [15520/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.390411  [15552/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.365706  [15584/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.217163  [15616/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.365467  [15648/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.548497  [15680/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.352967  [15712/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.418756  [15744/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.294217  [15776/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.427893  [15808/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.486844  [15840/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.396313  [15872/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.399575  [15904/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.269003  [15936/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.246287  [15968/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.422631  [16000/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.330260  [16032/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.392084  [16064/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.309957  [16096/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.345493  [16128/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.418565  [16160/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.424585  [16192/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.216848  [16224/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.354302  [16256/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.529730  [16288/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.302277  [16320/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.290925  [16352/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.404665  [16384/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.411469  [16416/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.332088  [16448/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.336992  [16480/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.344732  [16512/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.520639  [16544/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.346569  [16576/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.420527  [16608/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.436842  [16640/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.423338  [16672/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.489803  [16704/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.331524  [16736/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.381501  [16768/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.515839  [16800/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.460788  [16832/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.391189  [16864/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.296698  [16896/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.407435  [16928/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.320570  [16960/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.339994  [16992/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.354870  [17024/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.309354  [17056/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.421184  [17088/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.313710  [17120/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.349468  [17152/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.277739  [17184/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.504557  [17216/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.359512  [17248/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.341378  [17280/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.400877  [17312/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.346884  [17344/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.287972  [17376/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.347368  [17408/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.432697  [17440/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.451170  [17472/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.325171  [17504/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.312805  [17536/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.372722  [17568/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.291218  [17600/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.381447  [17632/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.358265  [17664/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.322823  [17696/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.230518  [17728/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.339897  [17760/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.525428  [17792/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.374745  [17824/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.456367  [17856/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.276648  [17888/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.280241  [17920/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.434213  [17952/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.316658  [17984/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.306616  [18016/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.380962  [18048/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.496836  [18080/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.322949  [18112/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.326471  [18144/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.351888  [18176/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.377811  [18208/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.300397  [18240/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.393547  [18272/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.357333  [18304/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.401228  [18336/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.403133  [18368/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.411685  [18400/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.319454  [18432/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.572745  [18464/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.417655  [18496/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.281128  [18528/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.334777  [18560/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.495568  [18592/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.450313  [18624/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.396474  [18656/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.445340  [18688/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.361275  [18720/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.401411  [18752/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.355343  [18784/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.354503  [18816/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.371070  [18848/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.295151  [18880/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.398222  [18912/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.322440  [18944/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.365946  [18976/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.237211  [19008/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.298748  [19040/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.347349  [19072/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.216835  [19104/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.410963  [19136/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.360430  [19168/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.224400  [19200/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.437457  [19232/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.383434  [19264/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.308794  [19296/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.396776  [19328/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.423565  [19360/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.434738  [19392/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.303214  [19424/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.259686  [19456/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.334746  [19488/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.457153  [19520/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.268551  [19552/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.385748  [19584/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.403775  [19616/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.422714  [19648/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.325159  [19680/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.467854  [19712/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.305899  [19744/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.263826  [19776/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.424393  [19808/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.316809  [19840/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.268500  [19872/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.419471  [19904/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.277837  [19936/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.380701  [19968/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.424425  [20000/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.320018  [20032/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.256738  [20064/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.269271  [20096/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.390987  [20128/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.321418  [20160/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.350246  [20192/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.337882  [20224/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.399227  [20256/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.312488  [20288/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.270803  [20320/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.430310  [20352/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.309064  [20384/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.220802  [20416/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.276694  [20448/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.430284  [20480/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.407599  [20512/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.482259  [20544/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.373769  [20576/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.371380  [20608/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.367435  [20640/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.454970  [20672/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.236993  [20704/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.308130  [20736/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.374640  [20768/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.427230  [20800/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.408032  [20832/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.392227  [20864/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.395829  [20896/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.602216  [20928/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.455932  [20960/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.363813  [20992/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.302878  [21024/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.488638  [21056/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.441445  [21088/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.371620  [21120/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.303022  [21152/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.393241  [21184/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.536671  [21216/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.405784  [21248/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.426318  [21280/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.436494  [21312/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.271370  [21344/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.394010  [21376/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.317655  [21408/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.221180  [21440/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.432571  [21472/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.273507  [21504/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.422910  [21536/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.408548  [21568/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.359962  [21600/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.372469  [21632/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.283168  [21664/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.341756  [21696/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.380907  [21728/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.341792  [21760/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.422473  [21792/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.383469  [21824/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.297440  [21856/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.254103  [21888/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.294036  [21920/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.274219  [21952/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.342979  [21984/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.323180  [22016/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.537353  [22048/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.356895  [22080/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.479990  [22112/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.397183  [22144/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.390467  [22176/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.324865  [22208/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.253007  [22240/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.343804  [22272/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.516181  [22304/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.288809  [22336/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.335244  [22368/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.301399  [22400/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.267387  [22432/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.403985  [22464/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.355732  [22496/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.440882  [22528/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.323637  [22560/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.306853  [22592/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.425291  [22624/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.286754  [22656/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.500194  [22688/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.339172  [22720/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.376058  [22752/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.375402  [22784/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.299163  [22816/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.362746  [22848/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.192124  [22880/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.277537  [22912/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.252056  [22944/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.369647  [22976/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.218066  [23008/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.327654  [23040/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.303217  [23072/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.358150  [23104/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.299988  [23136/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.375925  [23168/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.360307  [23200/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.347974  [23232/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.321921  [23264/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.332618  [23296/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.359337  [23328/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.302615  [23360/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.283038  [23392/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.519574  [23424/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.407474  [23456/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.382742  [23488/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.418022  [23520/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.421506  [23552/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.322997  [23584/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.347220  [23616/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.267300  [23648/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.547674  [23680/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.332545  [23712/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.406222  [23744/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.311692  [23776/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.499420  [23808/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.370106  [23840/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.438601  [23872/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.504836  [23904/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.304768  [23936/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.343738  [23968/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.290378  [24000/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.241026  [24032/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.297337  [24064/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.268155  [24096/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.275849  [24128/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.336545  [24160/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.308935  [24192/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.394887  [24224/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.367017  [24256/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.382758  [24288/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.541768  [24320/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.287573  [24352/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.343699  [24384/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.249040  [24416/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.345051  [24448/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.335610  [24480/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.259211  [24512/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.301738  [24544/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.290498  [24576/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.385153  [24608/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.362023  [24640/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.334413  [24672/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.233532  [24704/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.288486  [24736/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.343870  [24768/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.290819  [24800/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.299607  [24832/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.267837  [24864/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.451986  [24872/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.451986  [24872/24872]: : 778it [00:25, 30.20it/s]
Epoch 2, time=25.79s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.303133  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.151487  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.318765  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.285219  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.351937  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.288636  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.343785  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.235235  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.413842  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.475065  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.434514  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.236320  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.311150  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.398568  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.273421  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.349302  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.283959  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.301722  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.350492  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.360828  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.248867  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.471385  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.381764  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.348401  [  768/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.282063  [  800/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.380425  [  832/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.248235  [  864/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.460929  [  896/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.308660  [  928/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.308168  [  960/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.388460  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.240443  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.307169  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.407995  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.260567  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.408227  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.359861  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.276127  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.378850  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.357901  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.358293  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.260461  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.417661  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.422087  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.400386  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.384299  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.236981  [ 1504/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.346517  [ 1536/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.372406  [ 1568/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.348665  [ 1600/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.286150  [ 1632/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.358694  [ 1664/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.352164  [ 1696/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.267228  [ 1728/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.363219  [ 1760/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.216982  [ 1792/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.447193  [ 1824/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.272310  [ 1856/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.272730  [ 1888/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.252365  [ 1920/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.259615  [ 1952/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.313676  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.254766  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.413763  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.326718  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.237399  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.257179  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.366702  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.322454  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.321178  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.271390  [ 2272/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.352137  [ 2304/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.368009  [ 2336/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.241145  [ 2368/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.200141  [ 2400/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.243110  [ 2432/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.207349  [ 2464/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.377745  [ 2496/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.321258  [ 2528/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.281937  [ 2560/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.319191  [ 2592/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.388786  [ 2624/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.152815  [ 2656/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.410009  [ 2688/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.276721  [ 2720/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.308788  [ 2752/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.313796  [ 2784/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.292488  [ 2816/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.382648  [ 2848/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.256953  [ 2880/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.377486  [ 2912/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.393062  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.378297  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.233271  [ 3008/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.416272  [ 3040/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.213773  [ 3072/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.294011  [ 3104/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.347402  [ 3136/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.326073  [ 3168/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.278834  [ 3200/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.494409  [ 3232/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.283643  [ 3264/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.263155  [ 3296/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.316357  [ 3328/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.344766  [ 3360/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.213819  [ 3392/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.262157  [ 3424/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.368405  [ 3456/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.357644  [ 3488/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.349842  [ 3520/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.296208  [ 3552/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.409783  [ 3584/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.386074  [ 3616/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.211272  [ 3648/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.357898  [ 3680/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.456691  [ 3712/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.248428  [ 3744/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.431181  [ 3776/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.336599  [ 3808/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.248759  [ 3840/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.305417  [ 3872/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.296353  [ 3904/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.396774  [ 3936/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.314589  [ 3968/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.362420  [ 4000/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.378754  [ 4032/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.234301  [ 4064/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.455254  [ 4096/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.249636  [ 4128/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.304295  [ 4160/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.365081  [ 4192/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.368285  [ 4224/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.310641  [ 4256/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.325222  [ 4288/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.268216  [ 4320/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.217904  [ 4352/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.195660  [ 4384/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.379354  [ 4416/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.297099  [ 4448/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.356829  [ 4480/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.247164  [ 4512/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.242046  [ 4544/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.355358  [ 4576/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.219030  [ 4608/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.375675  [ 4640/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.384247  [ 4672/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.306783  [ 4704/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.268594  [ 4736/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.296113  [ 4768/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.289031  [ 4800/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.382115  [ 4832/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.258346  [ 4864/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.311838  [ 4896/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.211418  [ 4928/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.480172  [ 4960/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.207919  [ 4992/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.310867  [ 5024/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.280997  [ 5056/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.320377  [ 5088/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.350414  [ 5120/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.497772  [ 5152/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.363241  [ 5184/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.210222  [ 5216/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.337163  [ 5248/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.439612  [ 5280/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.351622  [ 5312/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.352642  [ 5344/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.385271  [ 5376/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.313662  [ 5408/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.308999  [ 5440/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.254717  [ 5472/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.360613  [ 5504/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.339263  [ 5536/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.314112  [ 5568/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.372617  [ 5600/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.203795  [ 5632/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.295589  [ 5664/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.294730  [ 5696/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.418652  [ 5728/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.313686  [ 5760/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.332274  [ 5792/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.301104  [ 5824/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.322097  [ 5856/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.278526  [ 5888/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.427306  [ 5920/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.320515  [ 5952/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.232794  [ 5984/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.213078  [ 6016/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.196728  [ 6048/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.288600  [ 6080/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.244271  [ 6112/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.295850  [ 6144/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.295084  [ 6176/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.353055  [ 6208/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.256223  [ 6240/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.211577  [ 6272/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.381216  [ 6304/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.284269  [ 6336/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.290153  [ 6368/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.247505  [ 6400/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.327339  [ 6432/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.280928  [ 6464/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.336600  [ 6496/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.213314  [ 6528/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.323938  [ 6560/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.332015  [ 6592/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.249672  [ 6624/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.216879  [ 6656/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.296798  [ 6688/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.276672  [ 6720/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.248381  [ 6752/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.216865  [ 6784/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.356984  [ 6816/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.261532  [ 6848/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.470693  [ 6880/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.283144  [ 6912/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.254472  [ 6944/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.392836  [ 6976/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.287748  [ 7008/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.233608  [ 7040/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.269309  [ 7072/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.214611  [ 7104/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.598052  [ 7136/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.255252  [ 7168/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.231600  [ 7200/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.199889  [ 7232/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.401102  [ 7264/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.348018  [ 7296/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.181148  [ 7328/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.268579  [ 7360/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.246925  [ 7392/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.332897  [ 7424/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.389495  [ 7456/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.292720  [ 7488/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.374388  [ 7520/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.414756  [ 7552/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.249407  [ 7584/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.193524  [ 7616/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.363473  [ 7648/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.463107  [ 7680/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.313540  [ 7712/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.253451  [ 7744/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.334758  [ 7776/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.363350  [ 7808/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.212543  [ 7840/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.245239  [ 7872/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.462380  [ 7904/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.290131  [ 7936/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.324035  [ 7968/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.239098  [ 8000/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.225909  [ 8032/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.267676  [ 8064/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.281602  [ 8096/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.289612  [ 8128/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.292887  [ 8160/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.353696  [ 8192/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.356935  [ 8224/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.265488  [ 8256/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.362858  [ 8288/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.358608  [ 8320/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.241614  [ 8352/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.359301  [ 8384/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.439738  [ 8416/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.283827  [ 8448/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.347467  [ 8480/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.225025  [ 8512/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.225355  [ 8544/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.265922  [ 8576/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.397294  [ 8608/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.351067  [ 8640/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.412683  [ 8672/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.318631  [ 8704/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.174167  [ 8736/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.246119  [ 8768/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.212205  [ 8800/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.254155  [ 8832/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.236251  [ 8864/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.277684  [ 8896/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.328624  [ 8928/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.327932  [ 8960/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.335622  [ 8992/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.244634  [ 9024/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.336718  [ 9056/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.332665  [ 9088/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.328690  [ 9120/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.200213  [ 9152/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.446267  [ 9184/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.250528  [ 9216/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.333183  [ 9248/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.341000  [ 9280/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.166288  [ 9312/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.284169  [ 9344/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.329725  [ 9376/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.262552  [ 9408/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.362154  [ 9440/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.332740  [ 9472/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.275097  [ 9504/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.310746  [ 9536/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.260296  [ 9568/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.333383  [ 9600/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.216976  [ 9632/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.223088  [ 9664/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.183558  [ 9696/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.253209  [ 9728/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.308288  [ 9760/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.234160  [ 9792/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.256225  [ 9824/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.259891  [ 9856/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.278649  [ 9888/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.318951  [ 9920/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.250676  [ 9952/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.327347  [ 9984/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.240595  [10016/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.277525  [10048/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.389894  [10080/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.275317  [10112/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.229365  [10144/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.302348  [10176/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.223232  [10208/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.362307  [10240/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.289681  [10272/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.396656  [10304/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.267646  [10336/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.290277  [10368/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.253198  [10400/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.278985  [10432/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.273663  [10464/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.260337  [10496/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.232959  [10528/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.305088  [10560/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.286901  [10592/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.303053  [10624/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.313963  [10656/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.320910  [10688/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.290774  [10720/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.269046  [10752/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.262283  [10784/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.312536  [10816/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.231918  [10848/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.244936  [10880/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.270064  [10912/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.402147  [10944/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.326589  [10976/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.296078  [11008/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.371504  [11040/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.341900  [11072/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.273013  [11104/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.271616  [11136/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.307845  [11168/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.272735  [11200/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.306107  [11232/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.329905  [11264/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.259639  [11296/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.261085  [11328/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.293442  [11360/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.224815  [11392/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.384917  [11424/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.271664  [11456/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.286021  [11488/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.342144  [11520/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.276867  [11552/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.197430  [11584/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.372719  [11616/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.277789  [11648/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.350666  [11680/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.323493  [11712/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.284939  [11744/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.149889  [11776/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.291829  [11808/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.228500  [11840/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.426203  [11872/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.159113  [11904/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.251576  [11936/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.285275  [11968/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.215936  [12000/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.285926  [12032/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.241284  [12064/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.293944  [12096/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.307429  [12128/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.244891  [12160/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.390276  [12192/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.285725  [12224/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.255911  [12256/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.308401  [12288/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.199325  [12320/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.207913  [12352/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.202352  [12384/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.323255  [12416/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.272744  [12448/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.295949  [12480/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.390260  [12512/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.258522  [12544/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.404557  [12576/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.326143  [12608/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.274634  [12640/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.445921  [12672/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.280693  [12704/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.391015  [12736/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.202972  [12768/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.270509  [12800/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.440166  [12832/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.357475  [12864/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.387353  [12896/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.324554  [12928/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.324412  [12960/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.219365  [12992/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.275552  [13024/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.397977  [13056/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.289492  [13088/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.399457  [13120/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.211956  [13152/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.270755  [13184/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.238808  [13216/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.296015  [13248/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.236756  [13280/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.258413  [13312/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.347807  [13344/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.357918  [13376/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.269763  [13408/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.415229  [13440/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.284834  [13472/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.315424  [13504/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.320082  [13536/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.334028  [13568/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.426173  [13600/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.259601  [13632/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.293818  [13664/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.239248  [13696/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.391869  [13728/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.222464  [13760/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.237191  [13792/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.253453  [13824/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.314276  [13856/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.401094  [13888/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.242208  [13920/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.326807  [13952/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.323126  [13984/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.294869  [14016/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.222796  [14048/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.218435  [14080/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.358977  [14112/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.400003  [14144/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.188797  [14176/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.234490  [14208/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.347323  [14240/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.252103  [14272/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.207874  [14304/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.290380  [14336/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.263102  [14368/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.285539  [14400/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.332403  [14432/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.372083  [14464/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.250543  [14496/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.236166  [14528/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.292430  [14560/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.296883  [14592/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.343973  [14624/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.265554  [14656/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.205606  [14688/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.271599  [14720/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.256223  [14752/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.186307  [14784/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.228002  [14816/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.293524  [14848/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.264713  [14880/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.206909  [14912/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.204052  [14944/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.246196  [14976/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.230419  [15008/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.286018  [15040/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.359338  [15072/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.174416  [15104/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.300374  [15136/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.254701  [15168/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.259431  [15200/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.342193  [15232/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.382313  [15264/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.326361  [15296/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.240418  [15328/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.314700  [15360/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.366739  [15392/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.301770  [15424/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.210980  [15456/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.278877  [15488/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.344644  [15520/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.294641  [15552/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.275871  [15584/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.189176  [15616/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.317312  [15648/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.410210  [15680/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.230495  [15712/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.337459  [15744/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.190412  [15776/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.329188  [15808/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.373150  [15840/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.274105  [15872/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.276758  [15904/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.161294  [15936/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.192966  [15968/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.304727  [16000/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.237223  [16032/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.290962  [16064/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.214022  [16096/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.265724  [16128/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.300678  [16160/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.339999  [16192/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.152751  [16224/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.233744  [16256/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.429419  [16288/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.208122  [16320/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.209372  [16352/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.349976  [16384/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.296406  [16416/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.265580  [16448/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.346201  [16480/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.292108  [16512/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.406158  [16544/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.258977  [16576/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.367486  [16608/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.351955  [16640/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.347997  [16672/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.304599  [16704/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.217212  [16736/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.243842  [16768/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.386387  [16800/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.323660  [16832/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.283327  [16864/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.229813  [16896/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.317713  [16928/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.213633  [16960/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.252861  [16992/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.281653  [17024/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.211963  [17056/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.246053  [17088/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.225357  [17120/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.241846  [17152/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.191816  [17184/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.402124  [17216/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.257863  [17248/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.230556  [17280/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.297148  [17312/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.279942  [17344/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.224683  [17376/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.277290  [17408/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.349480  [17440/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.360346  [17472/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.235744  [17504/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.228038  [17536/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.283239  [17568/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.206871  [17600/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.277635  [17632/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.265183  [17664/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.247978  [17696/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.175265  [17728/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.254162  [17760/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.414983  [17792/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.292880  [17824/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.318824  [17856/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.201217  [17888/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.197148  [17920/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.282633  [17952/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.227035  [17984/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.201345  [18016/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.328046  [18048/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.263796  [18080/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.229658  [18112/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.236355  [18144/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.271864  [18176/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.272884  [18208/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.176701  [18240/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.296985  [18272/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.230988  [18304/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.309428  [18336/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.282127  [18368/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.304954  [18400/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.246903  [18432/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.358340  [18464/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.269290  [18496/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.195571  [18528/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.200206  [18560/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.246740  [18592/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.304646  [18624/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.258973  [18656/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.317295  [18688/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.252912  [18720/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.242716  [18752/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.217068  [18784/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.257769  [18816/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.284054  [18848/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.169886  [18880/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.243300  [18912/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.197740  [18944/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.290609  [18976/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.178745  [19008/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.198900  [19040/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.237155  [19072/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.152844  [19104/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.276023  [19136/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.291355  [19168/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.133390  [19200/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.359730  [19232/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.279950  [19264/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.237443  [19296/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.306100  [19328/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.328075  [19360/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.315068  [19392/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.215919  [19424/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.164590  [19456/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.285745  [19488/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.344997  [19520/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.197502  [19552/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.281358  [19584/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.311932  [19616/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.357053  [19648/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.248726  [19680/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.382287  [19712/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.218087  [19744/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.176243  [19776/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.334624  [19808/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.248178  [19840/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.179909  [19872/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.344811  [19904/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.161862  [19936/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.306887  [19968/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.311564  [20000/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.249135  [20032/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.201237  [20064/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.188803  [20096/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.275602  [20128/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.247099  [20160/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.258894  [20192/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.259421  [20224/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.320954  [20256/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.218188  [20288/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.238401  [20320/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.356239  [20352/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.231858  [20384/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.162773  [20416/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.185307  [20448/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.303748  [20480/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.364712  [20512/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.379899  [20544/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.288685  [20576/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.293558  [20608/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.258540  [20640/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.374775  [20672/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.185943  [20704/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.252642  [20736/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.265647  [20768/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.324871  [20800/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.295063  [20832/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.331432  [20864/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.316155  [20896/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.438002  [20928/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.277831  [20960/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.226185  [20992/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.247540  [21024/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.343280  [21056/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.348798  [21088/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.262779  [21120/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.232653  [21152/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.289281  [21184/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.390659  [21216/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.254055  [21248/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.305377  [21280/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.327732  [21312/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.169941  [21344/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.272807  [21376/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.250862  [21408/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.164074  [21440/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.334611  [21472/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.237527  [21504/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.277351  [21536/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.302444  [21568/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.304827  [21600/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.302240  [21632/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.246095  [21664/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.261681  [21696/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.289353  [21728/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.298168  [21760/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.359767  [21792/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.276318  [21824/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.218288  [21856/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.190514  [21888/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.236717  [21920/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.224375  [21952/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.286538  [21984/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.274712  [22016/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.382420  [22048/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.269880  [22080/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.370807  [22112/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.303565  [22144/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.282563  [22176/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.249236  [22208/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.202789  [22240/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.247546  [22272/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.379700  [22304/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.204498  [22336/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.277383  [22368/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.247470  [22400/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.212669  [22432/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.291487  [22464/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.304226  [22496/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.359521  [22528/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.238555  [22560/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.262983  [22592/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.394989  [22624/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.220864  [22656/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.386830  [22688/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.234881  [22720/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.305209  [22752/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.261686  [22784/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.244508  [22816/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.284644  [22848/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.149995  [22880/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.183516  [22912/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.176404  [22944/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.250626  [22976/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.169945  [23008/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.237774  [23040/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.243029  [23072/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.269865  [23104/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.240894  [23136/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.322879  [23168/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.291763  [23200/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.263074  [23232/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.262996  [23264/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.239295  [23296/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.275148  [23328/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.246467  [23360/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.216884  [23392/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.355339  [23424/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.328829  [23456/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.292813  [23488/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.357123  [23520/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.299480  [23552/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.241856  [23584/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.245799  [23616/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.212699  [23648/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.420962  [23680/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.268863  [23712/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.329218  [23744/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.231208  [23776/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.406248  [23808/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.282387  [23840/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.290265  [23872/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.359932  [23904/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.235806  [23936/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.267296  [23968/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.208566  [24000/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.174631  [24032/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.270326  [24064/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.197406  [24096/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.211010  [24128/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.252246  [24160/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.232171  [24192/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.278593  [24224/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.288373  [24256/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.290347  [24288/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.451115  [24320/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.236581  [24352/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.282577  [24384/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.206184  [24416/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.266583  [24448/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.225022  [24480/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.205182  [24512/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.245083  [24544/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.230072  [24576/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.326534  [24608/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.277316  [24640/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.265430  [24672/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.197444  [24704/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.236102  [24736/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.260714  [24768/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.236401  [24800/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.239560  [24832/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.232556  [24864/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.366332  [24872/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.366332  [24872/24872]: : 778it [00:25, 30.43it/s]
Epoch 3, time=51.36s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.259917  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.133448  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.257873  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.226052  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.339895  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.231713  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.298509  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.198882  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.343592  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.368277  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.376619  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.174103  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.261486  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.316372  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.202255  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.243246  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.181049  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.239581  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.242148  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.296759  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.216131  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.370782  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.319695  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.281411  [  768/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.217297  [  800/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.308868  [  832/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.192286  [  864/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.370300  [  896/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.248558  [  928/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.233442  [  960/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.310863  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.178946  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.215138  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.319469  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.196824  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.292250  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.297258  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.223693  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.283594  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.230443  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.284839  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.189376  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.321194  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.349125  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.329215  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.300142  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.164003  [ 1504/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.300856  [ 1536/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.285098  [ 1568/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.253601  [ 1600/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.226194  [ 1632/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.276605  [ 1664/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.248555  [ 1696/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.215476  [ 1728/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.296749  [ 1760/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.171113  [ 1792/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.323289  [ 1824/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.215258  [ 1856/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.202022  [ 1888/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.190667  [ 1920/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.193275  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.278926  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.201555  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.343263  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.270567  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.171627  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.198513  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.284093  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.279404  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.231045  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.217605  [ 2272/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.319346  [ 2304/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.330966  [ 2336/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.169071  [ 2368/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.193549  [ 2400/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.209680  [ 2432/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.167133  [ 2464/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.292905  [ 2496/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.284207  [ 2528/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.217026  [ 2560/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.273270  [ 2592/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.283612  [ 2624/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.122817  [ 2656/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.345089  [ 2688/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.241586  [ 2720/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.234237  [ 2752/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.260582  [ 2784/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.197454  [ 2816/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.325249  [ 2848/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.231117  [ 2880/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.291106  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.306838  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.318577  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.184495  [ 3008/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.267041  [ 3040/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.177663  [ 3072/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.208256  [ 3104/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.292494  [ 3136/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.286138  [ 3168/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.207910  [ 3200/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.357816  [ 3232/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.235342  [ 3264/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.211518  [ 3296/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.254697  [ 3328/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.338541  [ 3360/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.185375  [ 3392/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.237856  [ 3424/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.284345  [ 3456/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.311840  [ 3488/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.231553  [ 3520/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.216513  [ 3552/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.225019  [ 3584/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.337229  [ 3616/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.181323  [ 3648/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.283847  [ 3680/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.395832  [ 3712/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.242331  [ 3744/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.401717  [ 3776/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.231947  [ 3808/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.245750  [ 3840/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.329342  [ 3872/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.263291  [ 3904/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.350760  [ 3936/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.269715  [ 3968/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.286917  [ 4000/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.291706  [ 4032/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.244544  [ 4064/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.381661  [ 4096/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.220869  [ 4128/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.283097  [ 4160/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.314917  [ 4192/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.316092  [ 4224/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.253730  [ 4256/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.262474  [ 4288/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.221142  [ 4320/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.167506  [ 4352/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.143155  [ 4384/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.319452  [ 4416/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.262687  [ 4448/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.276052  [ 4480/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.202040  [ 4512/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.208690  [ 4544/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.297428  [ 4576/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.171564  [ 4608/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.322281  [ 4640/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.322240  [ 4672/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.267004  [ 4704/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.186321  [ 4736/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.248065  [ 4768/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.239040  [ 4800/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.290792  [ 4832/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.196189  [ 4864/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.261187  [ 4896/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.157313  [ 4928/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.389824  [ 4960/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.146317  [ 4992/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.212021  [ 5024/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.230537  [ 5056/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.240115  [ 5088/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.268675  [ 5120/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.408089  [ 5152/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.295832  [ 5184/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.196929  [ 5216/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.289348  [ 5248/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.312176  [ 5280/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.287764  [ 5312/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.264762  [ 5344/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.323825  [ 5376/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.253335  [ 5408/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.241468  [ 5440/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.194344  [ 5472/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.277783  [ 5504/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.294577  [ 5536/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.223120  [ 5568/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.290405  [ 5600/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.164381  [ 5632/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.248161  [ 5664/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.183970  [ 5696/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.340037  [ 5728/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.270642  [ 5760/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.234937  [ 5792/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.226297  [ 5824/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.259604  [ 5856/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.211422  [ 5888/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.275512  [ 5920/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.274842  [ 5952/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.158804  [ 5984/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.165263  [ 6016/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.147210  [ 6048/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.218948  [ 6080/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.181547  [ 6112/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.258224  [ 6144/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.217771  [ 6176/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.311941  [ 6208/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.189189  [ 6240/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.161905  [ 6272/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.298004  [ 6304/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.232341  [ 6336/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.230152  [ 6368/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.185888  [ 6400/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.259177  [ 6432/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.249232  [ 6464/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.255151  [ 6496/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.167400  [ 6528/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.240027  [ 6560/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.299642  [ 6592/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.204676  [ 6624/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.195192  [ 6656/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.229543  [ 6688/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.215474  [ 6720/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.186669  [ 6752/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.166817  [ 6784/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.262543  [ 6816/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.221373  [ 6848/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.345002  [ 6880/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.234489  [ 6912/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.205097  [ 6944/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.296815  [ 6976/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.208081  [ 7008/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.183096  [ 7040/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.211310  [ 7072/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.141329  [ 7104/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.402029  [ 7136/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.195386  [ 7168/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.173314  [ 7200/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.147041  [ 7232/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.309373  [ 7264/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.279198  [ 7296/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.147959  [ 7328/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.235827  [ 7360/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.193601  [ 7392/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.287469  [ 7424/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.272506  [ 7456/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.267424  [ 7488/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.319756  [ 7520/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.393181  [ 7552/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.231678  [ 7584/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.161236  [ 7616/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.291546  [ 7648/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.407158  [ 7680/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.242340  [ 7712/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.212140  [ 7744/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.294089  [ 7776/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.310650  [ 7808/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.172313  [ 7840/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.229934  [ 7872/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.312317  [ 7904/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.238250  [ 7936/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.219365  [ 7968/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.221485  [ 8000/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.211503  [ 8032/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.205028  [ 8064/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.233227  [ 8096/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.280661  [ 8128/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.228702  [ 8160/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.215751  [ 8192/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.311365  [ 8224/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.233110  [ 8256/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.259886  [ 8288/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.346322  [ 8320/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.227183  [ 8352/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.276408  [ 8384/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.326453  [ 8416/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.264186  [ 8448/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.271820  [ 8480/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.171903  [ 8512/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.215494  [ 8544/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.214273  [ 8576/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.321551  [ 8608/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.322574  [ 8640/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.298144  [ 8672/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.280490  [ 8704/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.154354  [ 8736/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.226651  [ 8768/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.180264  [ 8800/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.211269  [ 8832/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.196281  [ 8864/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.217213  [ 8896/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.264608  [ 8928/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.278295  [ 8960/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.268731  [ 8992/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.221184  [ 9024/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.295999  [ 9056/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.260257  [ 9088/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.264174  [ 9120/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.152802  [ 9152/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.383222  [ 9184/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.202278  [ 9216/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.295135  [ 9248/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.263502  [ 9280/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.150187  [ 9312/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.237786  [ 9344/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.237437  [ 9376/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.232956  [ 9408/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.309592  [ 9440/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.277174  [ 9472/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.232796  [ 9504/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.231270  [ 9536/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.194771  [ 9568/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.297863  [ 9600/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.189612  [ 9632/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.173611  [ 9664/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.151066  [ 9696/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.186478  [ 9728/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.248958  [ 9760/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.187405  [ 9792/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.199996  [ 9824/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.210459  [ 9856/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.216282  [ 9888/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.250086  [ 9920/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.250735  [ 9952/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.242513  [ 9984/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.173583  [10016/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.223578  [10048/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.318513  [10080/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.223835  [10112/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.167080  [10144/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.258499  [10176/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.195231  [10208/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.310748  [10240/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.243302  [10272/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.313932  [10304/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.223290  [10336/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.234476  [10368/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.224577  [10400/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.242397  [10432/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.213297  [10464/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.219361  [10496/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.180115  [10528/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.259696  [10560/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.238192  [10592/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.267369  [10624/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.225783  [10656/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.275526  [10688/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.267887  [10720/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.238545  [10752/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.240198  [10784/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.259523  [10816/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.179656  [10848/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.203456  [10880/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.215303  [10912/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.300403  [10944/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.294362  [10976/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.226630  [11008/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.334805  [11040/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.274114  [11072/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.229412  [11104/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.225405  [11136/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.265916  [11168/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.246810  [11200/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.224748  [11232/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.253950  [11264/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.201075  [11296/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.210392  [11328/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.210528  [11360/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.194229  [11392/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.317965  [11424/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.234362  [11456/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.240912  [11488/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.277551  [11520/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.209548  [11552/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.160196  [11584/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.296273  [11616/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.215045  [11648/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.289761  [11680/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.270645  [11712/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.258560  [11744/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.128173  [11776/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.265594  [11808/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.194880  [11840/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.386499  [11872/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.167454  [11904/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.205936  [11936/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.262073  [11968/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.182021  [12000/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.244531  [12032/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.233178  [12064/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.264112  [12096/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.243710  [12128/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.214578  [12160/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.299682  [12192/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.248152  [12224/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.223311  [12256/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.270476  [12288/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.177286  [12320/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.170483  [12352/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.144388  [12384/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.281063  [12416/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.237007  [12448/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.232190  [12480/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.298494  [12512/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.211307  [12544/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.329322  [12576/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.245668  [12608/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.216092  [12640/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.351085  [12672/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.256483  [12704/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.352272  [12736/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.186267  [12768/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.223722  [12800/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.370025  [12832/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.287044  [12864/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.382696  [12896/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.228721  [12928/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.258223  [12960/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.199664  [12992/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.195529  [13024/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.294829  [13056/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.247827  [13088/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.319405  [13120/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.198425  [13152/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.198567  [13184/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.214266  [13216/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.258472  [13248/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.201444  [13280/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.237183  [13312/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.251081  [13344/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.298955  [13376/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.248409  [13408/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.371551  [13440/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.233283  [13472/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.305346  [13504/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.230876  [13536/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.282234  [13568/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.352999  [13600/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.259379  [13632/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.290953  [13664/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.227558  [13696/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.319647  [13728/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.185801  [13760/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.195016  [13792/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.200038  [13824/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.215244  [13856/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.343351  [13888/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.209499  [13920/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.279436  [13952/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.267820  [13984/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.261441  [14016/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.170516  [14048/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.195342  [14080/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.319262  [14112/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.294356  [14144/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.155751  [14176/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.183959  [14208/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.291507  [14240/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.188965  [14272/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.163963  [14304/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.246936  [14336/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.202078  [14368/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.246225  [14400/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.303233  [14432/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.290910  [14464/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.215511  [14496/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.200522  [14528/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.247468  [14560/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.261085  [14592/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.339841  [14624/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.193078  [14656/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.189173  [14688/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.235494  [14720/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.211398  [14752/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.148213  [14784/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.190912  [14816/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.243371  [14848/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.215669  [14880/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.169197  [14912/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.176123  [14944/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.194862  [14976/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.203661  [15008/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.238262  [15040/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.329295  [15072/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.145623  [15104/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.248604  [15136/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.211265  [15168/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.231651  [15200/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.282548  [15232/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.327522  [15264/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.250838  [15296/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.197841  [15328/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.245601  [15360/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.291849  [15392/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.275916  [15424/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.166287  [15456/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.212691  [15488/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.257553  [15520/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.242608  [15552/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.235472  [15584/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.156086  [15616/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.236467  [15648/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.295418  [15680/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.197350  [15712/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.256031  [15744/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.175184  [15776/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.315035  [15808/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.332682  [15840/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.264982  [15872/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.210997  [15904/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.142088  [15936/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.191162  [15968/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.256318  [16000/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.181382  [16032/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.245139  [16064/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.190683  [16096/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.216413  [16128/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.290746  [16160/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.282196  [16192/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.122098  [16224/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.234072  [16256/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.369487  [16288/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.180677  [16320/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.193113  [16352/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.337277  [16384/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.265208  [16416/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.262669  [16448/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.293449  [16480/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.248336  [16512/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.381682  [16544/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.210488  [16576/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.297353  [16608/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.307239  [16640/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.293780  [16672/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.262830  [16704/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.225258  [16736/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.209408  [16768/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.354732  [16800/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.289113  [16832/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.245865  [16864/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.245420  [16896/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.343679  [16928/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.185871  [16960/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.221855  [16992/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.213311  [17024/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.191249  [17056/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.206536  [17088/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.214319  [17120/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.222978  [17152/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.179849  [17184/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.391196  [17216/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.253232  [17248/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.236022  [17280/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.243289  [17312/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.244301  [17344/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.215551  [17376/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.290312  [17408/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.339300  [17440/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.359274  [17472/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.221125  [17504/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.203924  [17536/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.260224  [17568/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.178287  [17600/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.248800  [17632/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.247698  [17664/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.259031  [17696/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.172052  [17728/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.221501  [17760/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.400682  [17792/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.275554  [17824/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.352417  [17856/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.197196  [17888/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.212693  [17920/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.296924  [17952/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.193646  [17984/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.187362  [18016/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.336501  [18048/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.245470  [18080/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.228910  [18112/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.317726  [18144/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.215619  [18176/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.249990  [18208/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.170383  [18240/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.304470  [18272/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.204958  [18304/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.309301  [18336/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.267794  [18368/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.323445  [18400/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.244736  [18432/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.333562  [18464/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.290588  [18496/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.192296  [18528/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.225856  [18560/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.190842  [18592/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.265692  [18624/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.228016  [18656/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.279088  [18688/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.243506  [18720/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.249890  [18752/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.189667  [18784/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.246745  [18816/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.261651  [18848/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.163163  [18880/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.223198  [18912/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.171335  [18944/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.262808  [18976/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.167001  [19008/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.188874  [19040/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.219421  [19072/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.141352  [19104/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.315034  [19136/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.262178  [19168/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.112934  [19200/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.383430  [19232/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.225213  [19264/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.217929  [19296/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.296653  [19328/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.334721  [19360/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.284167  [19392/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.230429  [19424/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.197629  [19456/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.282298  [19488/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.323674  [19520/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.198270  [19552/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.309101  [19584/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.273271  [19616/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.383388  [19648/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.222627  [19680/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.343308  [19712/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.244312  [19744/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.193330  [19776/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.339656  [19808/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.211226  [19840/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.173756  [19872/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.334964  [19904/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.158177  [19936/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.229633  [19968/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.322492  [20000/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.256172  [20032/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.185431  [20064/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.143254  [20096/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.256250  [20128/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.222652  [20160/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.223486  [20192/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.252727  [20224/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.295576  [20256/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.196721  [20288/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.219200  [20320/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.379318  [20352/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.214188  [20384/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.138697  [20416/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.203189  [20448/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.289095  [20480/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.287460  [20512/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.410413  [20544/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.191641  [20576/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.207316  [20608/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.232563  [20640/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.340270  [20672/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.142700  [20704/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.220905  [20736/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.218943  [20768/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.256748  [20800/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.264992  [20832/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.279587  [20864/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.240057  [20896/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.396746  [20928/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.197722  [20960/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.177783  [20992/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.237491  [21024/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.290867  [21056/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.320859  [21088/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.245761  [21120/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.183968  [21152/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.240413  [21184/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.302792  [21216/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.270728  [21248/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.338218  [21280/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.257223  [21312/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.144631  [21344/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.251811  [21376/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.206455  [21408/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.146936  [21440/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.276798  [21472/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.226126  [21504/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.241575  [21536/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.268472  [21568/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.262064  [21600/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.279933  [21632/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.241662  [21664/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.221624  [21696/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.272946  [21728/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.206676  [21760/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.314812  [21792/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.258609  [21824/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.175822  [21856/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.180292  [21888/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.193939  [21920/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.198491  [21952/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.248054  [21984/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.237408  [22016/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.355932  [22048/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.243555  [22080/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.325389  [22112/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.265030  [22144/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.218707  [22176/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.228372  [22208/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.189167  [22240/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.238556  [22272/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.330373  [22304/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.190457  [22336/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.213339  [22368/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.201129  [22400/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.155077  [22432/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.205796  [22464/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.270510  [22496/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.316961  [22528/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.187409  [22560/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.248273  [22592/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.315164  [22624/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.196658  [22656/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.308614  [22688/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.182606  [22720/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.247947  [22752/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.248126  [22784/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.208982  [22816/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.243404  [22848/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.130744  [22880/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.154663  [22912/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.138735  [22944/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.224049  [22976/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.124504  [23008/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.213373  [23040/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.207826  [23072/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.249063  [23104/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.257725  [23136/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.301310  [23168/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.280369  [23200/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.252953  [23232/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.234502  [23264/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.216934  [23296/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.262053  [23328/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.212884  [23360/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.192764  [23392/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.341757  [23424/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.315590  [23456/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.236546  [23488/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.302758  [23520/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.270098  [23552/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.204905  [23584/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.230853  [23616/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.167697  [23648/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.362016  [23680/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.258344  [23712/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.231601  [23744/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.191444  [23776/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.395695  [23808/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.221760  [23840/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.228403  [23872/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.307847  [23904/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.210763  [23936/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.200769  [23968/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.177718  [24000/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.153095  [24032/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.238598  [24064/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.211421  [24096/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.198866  [24128/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.238747  [24160/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.196147  [24192/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.247746  [24224/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.225107  [24256/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.238882  [24288/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.417105  [24320/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.170018  [24352/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.248273  [24384/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.207109  [24416/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.242141  [24448/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.191656  [24480/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.181119  [24512/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.228254  [24544/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.210715  [24576/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.272530  [24608/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.253632  [24640/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.251849  [24672/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.154865  [24704/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.185394  [24736/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.204700  [24768/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.217671  [24800/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.198603  [24832/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.190060  [24864/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.324468  [24872/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.324468  [24872/24872]: : 778it [00:25, 30.21it/s]
Epoch 4, time=77.11s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.229137  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.115430  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.260233  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.193371  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.306043  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.196072  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.253413  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.197863  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.341282  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.342348  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.394056  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.141439  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.232247  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.305493  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.194196  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.238496  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.152531  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.228968  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.205876  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.252938  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.216376  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.303535  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.261250  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.273585  [  768/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.181899  [  800/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.268625  [  832/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.166549  [  864/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.318496  [  896/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.191807  [  928/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.221348  [  960/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.265097  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.139490  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.202750  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.284964  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.147856  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.222365  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.290630  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.185340  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.266233  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.199120  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.248111  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.165602  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.271174  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.307323  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.307557  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.254487  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.174024  [ 1504/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.195439  [ 1536/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.248530  [ 1568/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.249487  [ 1600/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.222999  [ 1632/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.226646  [ 1664/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.183667  [ 1696/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.204611  [ 1728/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.238586  [ 1760/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.129869  [ 1792/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.296865  [ 1824/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.173689  [ 1856/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.142256  [ 1888/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.165062  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.179955  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.216331  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.171883  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.312439  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.230140  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.141145  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.172990  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.249892  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.233810  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.200136  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.190875  [ 2272/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.288086  [ 2304/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.272147  [ 2336/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.137088  [ 2368/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.149323  [ 2400/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.192358  [ 2432/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.137373  [ 2464/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.267044  [ 2496/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.242220  [ 2528/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.156356  [ 2560/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.231234  [ 2592/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.228126  [ 2624/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.115121  [ 2656/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.297800  [ 2688/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.212814  [ 2720/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.201503  [ 2752/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.242346  [ 2784/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.166593  [ 2816/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.259547  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.208023  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.248117  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.258057  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.292537  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.160387  [ 3008/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.224940  [ 3040/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.157378  [ 3072/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.175899  [ 3104/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.259803  [ 3136/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.238393  [ 3168/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.166572  [ 3200/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.339481  [ 3232/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.213730  [ 3264/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.194394  [ 3296/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.229719  [ 3328/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.318574  [ 3360/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.168573  [ 3392/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.222694  [ 3424/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.242664  [ 3456/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.299655  [ 3488/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.195822  [ 3520/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.183814  [ 3552/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.206970  [ 3584/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.304999  [ 3616/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.169169  [ 3648/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.260885  [ 3680/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.350206  [ 3712/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.237155  [ 3744/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.325396  [ 3776/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.206216  [ 3808/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.198491  [ 3840/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.265571  [ 3872/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.230072  [ 3904/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.309027  [ 3936/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.239606  [ 3968/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.260489  [ 4000/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.247163  [ 4032/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.191527  [ 4064/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.311670  [ 4096/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.200683  [ 4128/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.272754  [ 4160/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.280228  [ 4192/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.288332  [ 4224/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.222057  [ 4256/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.246583  [ 4288/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.178516  [ 4320/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.160641  [ 4352/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.127669  [ 4384/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.274056  [ 4416/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.232207  [ 4448/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.257553  [ 4480/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.185044  [ 4512/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.178394  [ 4544/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.265838  [ 4576/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.136488  [ 4608/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.309901  [ 4640/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.301284  [ 4672/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.225423  [ 4704/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.149709  [ 4736/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.221891  [ 4768/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.217960  [ 4800/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.266266  [ 4832/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.175798  [ 4864/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.222051  [ 4896/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.137811  [ 4928/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.330141  [ 4960/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.123000  [ 4992/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.182576  [ 5024/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.206478  [ 5056/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.217102  [ 5088/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.236084  [ 5120/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.387905  [ 5152/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.249394  [ 5184/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.172475  [ 5216/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.264106  [ 5248/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.274683  [ 5280/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.247053  [ 5312/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.265208  [ 5344/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.256910  [ 5376/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.232811  [ 5408/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.213998  [ 5440/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.179962  [ 5472/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.255561  [ 5504/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.214954  [ 5536/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.146029  [ 5568/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.260821  [ 5600/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.120278  [ 5632/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.213001  [ 5664/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.165778  [ 5696/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.250009  [ 5728/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.222928  [ 5760/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.236212  [ 5792/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.209924  [ 5824/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.248001  [ 5856/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.183770  [ 5888/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.236668  [ 5920/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.215126  [ 5952/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.136778  [ 5984/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.148797  [ 6016/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.126319  [ 6048/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.208600  [ 6080/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.164590  [ 6112/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.236569  [ 6144/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.199915  [ 6176/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.268137  [ 6208/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.159682  [ 6240/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.157083  [ 6272/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.302389  [ 6304/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.240395  [ 6336/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.221025  [ 6368/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.172067  [ 6400/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.247050  [ 6432/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.253472  [ 6464/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.226650  [ 6496/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.160569  [ 6528/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.218044  [ 6560/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.279959  [ 6592/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.183084  [ 6624/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.192506  [ 6656/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.178745  [ 6688/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.192153  [ 6720/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.164385  [ 6752/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.151949  [ 6784/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.234765  [ 6816/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.174078  [ 6848/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.296372  [ 6880/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.229505  [ 6912/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.196197  [ 6944/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.243328  [ 6976/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.177817  [ 7008/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.155138  [ 7040/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.189459  [ 7072/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.126992  [ 7104/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.280099  [ 7136/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.175875  [ 7168/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.158129  [ 7200/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.164702  [ 7232/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.277001  [ 7264/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.210509  [ 7296/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.123481  [ 7328/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.226150  [ 7360/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.154819  [ 7392/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.233503  [ 7424/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.220385  [ 7456/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.187288  [ 7488/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.276369  [ 7520/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.334941  [ 7552/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.197451  [ 7584/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.143598  [ 7616/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.245081  [ 7648/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.360295  [ 7680/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.203139  [ 7712/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.194223  [ 7744/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.270007  [ 7776/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.230305  [ 7808/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.152431  [ 7840/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.191935  [ 7872/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.249609  [ 7904/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.220676  [ 7936/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.206100  [ 7968/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.192009  [ 8000/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.183450  [ 8032/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.173722  [ 8064/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.200684  [ 8096/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.224084  [ 8128/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.214537  [ 8160/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.189935  [ 8192/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.245387  [ 8224/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.177492  [ 8256/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.241513  [ 8288/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.305691  [ 8320/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.211412  [ 8352/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.250193  [ 8384/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.295288  [ 8416/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.229402  [ 8448/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.244670  [ 8480/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.149557  [ 8512/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.183073  [ 8544/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.212229  [ 8576/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.288090  [ 8608/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.279171  [ 8640/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.266588  [ 8672/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.253061  [ 8704/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.138534  [ 8736/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.173121  [ 8768/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.132802  [ 8800/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.201729  [ 8832/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.172576  [ 8864/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.199443  [ 8896/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.217102  [ 8928/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.244003  [ 8960/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.279398  [ 8992/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.193641  [ 9024/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.268952  [ 9056/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.225186  [ 9088/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.245573  [ 9120/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.108579  [ 9152/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.314519  [ 9184/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.171780  [ 9216/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.273603  [ 9248/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.229656  [ 9280/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.138488  [ 9312/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.221423  [ 9344/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.232574  [ 9376/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.210066  [ 9408/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.297898  [ 9440/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.248950  [ 9472/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.202373  [ 9504/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.205766  [ 9536/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.181967  [ 9568/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.254840  [ 9600/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.186254  [ 9632/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.148409  [ 9664/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.130409  [ 9696/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.161464  [ 9728/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.208292  [ 9760/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.180636  [ 9792/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.183695  [ 9824/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.172057  [ 9856/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.204111  [ 9888/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.232008  [ 9920/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.232224  [ 9952/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.223755  [ 9984/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.149436  [10016/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.217344  [10048/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.316035  [10080/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.209150  [10112/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.154807  [10144/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.255307  [10176/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.172469  [10208/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.294119  [10240/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.200856  [10272/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.311626  [10304/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.211747  [10336/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.206505  [10368/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.211683  [10400/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.226631  [10432/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.200263  [10464/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.212335  [10496/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.181286  [10528/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.249905  [10560/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.216955  [10592/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.245049  [10624/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.196793  [10656/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.244641  [10688/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.208036  [10720/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.223739  [10752/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.220024  [10784/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.234750  [10816/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.150610  [10848/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.212684  [10880/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.202939  [10912/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.267466  [10944/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.273974  [10976/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.200842  [11008/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.306530  [11040/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.238354  [11072/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.253187  [11104/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.195061  [11136/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.242094  [11168/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.209779  [11200/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.202064  [11232/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.222127  [11264/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.188260  [11296/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.181366  [11328/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.200751  [11360/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.166633  [11392/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.274091  [11424/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.220228  [11456/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.190726  [11488/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.267203  [11520/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.192319  [11552/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.137862  [11584/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.240537  [11616/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.194731  [11648/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.263322  [11680/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.243187  [11712/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.245530  [11744/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.124146  [11776/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.263128  [11808/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.179014  [11840/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.347486  [11872/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.160716  [11904/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.183134  [11936/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.224966  [11968/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.175730  [12000/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.216601  [12032/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.204837  [12064/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.235793  [12096/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.190376  [12128/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.186091  [12160/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.288791  [12192/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.202720  [12224/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.217059  [12256/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.262589  [12288/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.165220  [12320/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.159530  [12352/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.145644  [12384/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.245007  [12416/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.201984  [12448/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.244171  [12480/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.271882  [12512/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.216339  [12544/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.324662  [12576/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.232038  [12608/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.241010  [12640/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.340556  [12672/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.256851  [12704/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.387833  [12736/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.183579  [12768/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.211246  [12800/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.378077  [12832/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.315023  [12864/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.319278  [12896/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.227820  [12928/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.295771  [12960/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.203950  [12992/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.194704  [13024/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.323154  [13056/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.238219  [13088/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.267464  [13120/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.201145  [13152/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.196906  [13184/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.203677  [13216/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.263554  [13248/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.193726  [13280/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.198828  [13312/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.238259  [13344/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.308062  [13376/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.256295  [13408/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.333683  [13440/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.252994  [13472/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.268362  [13504/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.227659  [13536/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.284095  [13568/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.371464  [13600/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.203985  [13632/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.242077  [13664/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.240195  [13696/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.267135  [13728/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.194992  [13760/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.206432  [13792/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.191620  [13824/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.201569  [13856/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.323337  [13888/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.196177  [13920/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.261937  [13952/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.251222  [13984/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.219835  [14016/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.136323  [14048/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.182986  [14080/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.288272  [14112/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.270266  [14144/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.142259  [14176/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.171108  [14208/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.236305  [14240/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.174552  [14272/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.126953  [14304/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.237397  [14336/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.173166  [14368/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.212297  [14400/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.243950  [14432/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.271612  [14464/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.204678  [14496/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.181931  [14528/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.204002  [14560/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.243872  [14592/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.238708  [14624/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.164005  [14656/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.178919  [14688/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.212821  [14720/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.184249  [14752/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.138164  [14784/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.192193  [14816/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.223106  [14848/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.194680  [14880/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.152574  [14912/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.149429  [14944/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.177374  [14976/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.195126  [15008/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.212739  [15040/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.307032  [15072/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.139463  [15104/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.230076  [15136/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.188955  [15168/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.203653  [15200/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.256737  [15232/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.297091  [15264/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.216233  [15296/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.178064  [15328/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.238862  [15360/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.264962  [15392/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.264734  [15424/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.158293  [15456/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.186746  [15488/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.230235  [15520/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.202321  [15552/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.203030  [15584/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.138795  [15616/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.215941  [15648/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.258951  [15680/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.182581  [15712/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.232647  [15744/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.140191  [15776/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.239489  [15808/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.319775  [15840/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.214280  [15872/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.185741  [15904/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.121622  [15936/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.168076  [15968/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.206719  [16000/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.167060  [16032/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.225868  [16064/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.159247  [16096/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.188460  [16128/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.246527  [16160/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.262945  [16192/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.104999  [16224/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.209233  [16256/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.308121  [16288/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.153163  [16320/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.174002  [16352/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.296604  [16384/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.249524  [16416/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.250250  [16448/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.260052  [16480/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.223199  [16512/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.316725  [16544/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.183175  [16576/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.272867  [16608/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.296211  [16640/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.265999  [16672/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.231721  [16704/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.195683  [16736/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.213650  [16768/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.286673  [16800/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.263476  [16832/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.234497  [16864/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.150373  [16896/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.302963  [16928/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.176755  [16960/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.214825  [16992/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.199393  [17024/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.147685  [17056/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.193601  [17088/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.200121  [17120/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.212253  [17152/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.185324  [17184/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.379382  [17216/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.210509  [17248/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.190974  [17280/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.221303  [17312/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.236319  [17344/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.160153  [17376/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.199367  [17408/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.360566  [17440/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.268990  [17472/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.211084  [17504/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.182210  [17536/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.219046  [17568/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.170215  [17600/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.206004  [17632/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.254731  [17664/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.200516  [17696/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.158702  [17728/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.194522  [17760/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.345973  [17792/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.266225  [17824/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.279849  [17856/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.163907  [17888/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.174108  [17920/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.227016  [17952/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.171964  [17984/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.187405  [18016/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.197508  [18048/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.191537  [18080/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.204899  [18112/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.173480  [18144/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.186809  [18176/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.221090  [18208/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.137874  [18240/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.249297  [18272/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.165768  [18304/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.244533  [18336/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.239737  [18368/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.241896  [18400/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.189370  [18432/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.250210  [18464/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.216707  [18496/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.155109  [18528/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.159431  [18560/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.162089  [18592/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.274022  [18624/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.185149  [18656/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.233813  [18688/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.217691  [18720/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.212723  [18752/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.159225  [18784/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.185577  [18816/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.241097  [18848/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.161478  [18880/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.156518  [18912/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.139688  [18944/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.232818  [18976/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.145945  [19008/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.189076  [19040/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.226806  [19072/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.127456  [19104/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.266221  [19136/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.218490  [19168/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.097862  [19200/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.289249  [19232/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.147940  [19264/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.234521  [19296/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.247782  [19328/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.267581  [19360/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.256745  [19392/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.207757  [19424/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.139942  [19456/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.217612  [19488/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.285373  [19520/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.142084  [19552/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.223353  [19584/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.250938  [19616/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.302070  [19648/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.146938  [19680/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.316811  [19712/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.169301  [19744/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.127351  [19776/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.267716  [19808/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.194572  [19840/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.149062  [19872/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.262702  [19904/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.128083  [19936/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.245276  [19968/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.267508  [20000/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.227068  [20032/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.169155  [20064/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.120882  [20096/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.215232  [20128/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.199171  [20160/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.179092  [20192/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.222220  [20224/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.267710  [20256/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.197963  [20288/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.165684  [20320/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.298664  [20352/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.235801  [20384/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.144519  [20416/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.154535  [20448/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.259273  [20480/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.276344  [20512/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.325690  [20544/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.183084  [20576/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.184644  [20608/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.167353  [20640/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.347487  [20672/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.121812  [20704/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.207398  [20736/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.214216  [20768/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.220034  [20800/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.185389  [20832/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.262640  [20864/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.251275  [20896/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.330999  [20928/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.186302  [20960/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.168784  [20992/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.208204  [21024/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.298147  [21056/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.304344  [21088/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.212848  [21120/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.170375  [21152/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.211850  [21184/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.297419  [21216/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.203983  [21248/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.235733  [21280/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.263904  [21312/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.129099  [21344/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.228037  [21376/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.218525  [21408/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.129618  [21440/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.305955  [21472/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.219280  [21504/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.247484  [21536/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.273206  [21568/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.224433  [21600/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.259896  [21632/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.244686  [21664/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.212818  [21696/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.244019  [21728/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.217285  [21760/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.275797  [21792/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.240123  [21824/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.174152  [21856/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.157241  [21888/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.190499  [21920/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.190707  [21952/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.233388  [21984/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.180262  [22016/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.317126  [22048/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.220327  [22080/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.272859  [22112/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.299664  [22144/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.229596  [22176/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.248504  [22208/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.162580  [22240/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.231606  [22272/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.303593  [22304/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.172868  [22336/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.210748  [22368/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.167460  [22400/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.147383  [22432/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.164735  [22464/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.249734  [22496/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.276678  [22528/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.178584  [22560/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.212889  [22592/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.272670  [22624/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.189566  [22656/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.316943  [22688/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.167052  [22720/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.231092  [22752/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.196497  [22784/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.303791  [22816/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.263763  [22848/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.134698  [22880/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.140981  [22912/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.163067  [22944/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.223385  [22976/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.130540  [23008/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.192235  [23040/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.200063  [23072/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.250664  [23104/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.211039  [23136/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.302456  [23168/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.227083  [23200/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.268214  [23232/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.273416  [23264/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.234224  [23296/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.255429  [23328/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.200053  [23360/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.173686  [23392/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.348766  [23424/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.307109  [23456/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.222225  [23488/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.295277  [23520/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.238522  [23552/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.199476  [23584/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.257934  [23616/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.207991  [23648/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.336085  [23680/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.256725  [23712/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.193944  [23744/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.201569  [23776/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.346324  [23808/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.215513  [23840/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.213656  [23872/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.292028  [23904/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.193876  [23936/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.204560  [23968/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.191703  [24000/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.162407  [24032/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.242294  [24064/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.161897  [24096/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.166683  [24128/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.217233  [24160/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.194068  [24192/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.266831  [24224/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.210604  [24256/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.243981  [24288/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.371541  [24320/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.171825  [24352/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.238861  [24384/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.247933  [24416/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.226811  [24448/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.181774  [24480/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.201145  [24512/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.239167  [24544/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.211622  [24576/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.291568  [24608/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.250099  [24640/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.252220  [24672/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.145238  [24704/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.201767  [24736/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.198486  [24768/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.250616  [24800/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.204755  [24832/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.185662  [24864/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.295531  [24872/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.295531  [24872/24872]: : 778it [00:26, 29.80it/s]
Epoch 5, time=103.22s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.247508  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.107770  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.191895  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.185435  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.273653  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.197334  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.206585  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.168245  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.354287  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.315022  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.350037  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.152254  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.213365  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.248350  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.203811  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.196706  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.143516  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.249365  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.188079  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.263245  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.200564  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.317188  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.249287  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.241221  [  768/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.186920  [  800/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.233776  [  832/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.150031  [  864/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.287429  [  896/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.190570  [  928/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.220570  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.288047  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.179356  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.230236  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.276638  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.175973  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.214085  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.225440  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.222119  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.250723  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.223197  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.241433  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.177675  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.242223  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.317011  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.304138  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.245571  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.124739  [ 1504/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.175748  [ 1536/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.261992  [ 1568/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.203177  [ 1600/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.203423  [ 1632/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.226729  [ 1664/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.183304  [ 1696/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.174823  [ 1728/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.234233  [ 1760/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.130547  [ 1792/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.273105  [ 1824/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.162498  [ 1856/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.115217  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.159306  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.171839  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.202890  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.168796  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.287259  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.202491  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.126466  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.155766  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.246015  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.214302  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.180958  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.213087  [ 2272/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.268096  [ 2304/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.232260  [ 2336/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.134445  [ 2368/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.128096  [ 2400/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.169859  [ 2432/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.123063  [ 2464/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.301201  [ 2496/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.197454  [ 2528/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.121975  [ 2560/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.212761  [ 2592/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.225658  [ 2624/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.105566  [ 2656/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.233698  [ 2688/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.262342  [ 2720/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.192893  [ 2752/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.212394  [ 2784/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.158042  [ 2816/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.244831  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.192648  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.247019  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.255004  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.286649  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.157880  [ 3008/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.207636  [ 3040/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.169127  [ 3072/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.189640  [ 3104/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.257230  [ 3136/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.239012  [ 3168/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.144183  [ 3200/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.298920  [ 3232/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.208603  [ 3264/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.175511  [ 3296/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.203100  [ 3328/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.266310  [ 3360/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.144680  [ 3392/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.178288  [ 3424/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.225451  [ 3456/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.207763  [ 3488/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.189666  [ 3520/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.184755  [ 3552/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.174529  [ 3584/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.293467  [ 3616/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.155563  [ 3648/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.238449  [ 3680/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.322177  [ 3712/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.161467  [ 3744/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.267087  [ 3776/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.187396  [ 3808/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.176611  [ 3840/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.182544  [ 3872/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.193825  [ 3904/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.329341  [ 3936/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.202257  [ 3968/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.249214  [ 4000/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.238769  [ 4032/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.134370  [ 4064/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.238305  [ 4096/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.161641  [ 4128/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.239942  [ 4160/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.226567  [ 4192/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.264315  [ 4224/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.226197  [ 4256/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.230171  [ 4288/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.149618  [ 4320/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.141170  [ 4352/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.099565  [ 4384/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.261980  [ 4416/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.176185  [ 4448/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.229652  [ 4480/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.165650  [ 4512/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.145513  [ 4544/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.246008  [ 4576/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.122437  [ 4608/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.232757  [ 4640/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.270647  [ 4672/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.195059  [ 4704/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.138158  [ 4736/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.213085  [ 4768/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.290771  [ 4800/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.264968  [ 4832/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.171512  [ 4864/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.201805  [ 4896/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.126105  [ 4928/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.316081  [ 4960/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.137034  [ 4992/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.178077  [ 5024/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.200271  [ 5056/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.208012  [ 5088/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.215928  [ 5120/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.356353  [ 5152/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.250612  [ 5184/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.156720  [ 5216/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.255849  [ 5248/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.256303  [ 5280/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.232425  [ 5312/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.245643  [ 5344/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.242419  [ 5376/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.223038  [ 5408/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.209865  [ 5440/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.165906  [ 5472/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.225257  [ 5504/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.199320  [ 5536/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.135503  [ 5568/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.251455  [ 5600/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.115998  [ 5632/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.203420  [ 5664/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.148431  [ 5696/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.235961  [ 5728/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.206691  [ 5760/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.210224  [ 5792/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.183741  [ 5824/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.209713  [ 5856/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.173321  [ 5888/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.222689  [ 5920/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.201670  [ 5952/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.126265  [ 5984/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.134253  [ 6016/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.113076  [ 6048/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.190462  [ 6080/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.150892  [ 6112/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.226339  [ 6144/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.176824  [ 6176/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.235889  [ 6208/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.150273  [ 6240/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.143641  [ 6272/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.271235  [ 6304/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.190144  [ 6336/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.211342  [ 6368/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.167190  [ 6400/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.232733  [ 6432/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.223113  [ 6464/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.206491  [ 6496/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.154523  [ 6528/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.227253  [ 6560/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.262261  [ 6592/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.169113  [ 6624/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.165750  [ 6656/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.183133  [ 6688/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.178251  [ 6720/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.147637  [ 6752/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.152851  [ 6784/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.201335  [ 6816/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.153556  [ 6848/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.272862  [ 6880/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.242918  [ 6912/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.197100  [ 6944/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.217622  [ 6976/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.165381  [ 7008/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.139402  [ 7040/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.182072  [ 7072/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.114737  [ 7104/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.249631  [ 7136/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.170580  [ 7168/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.158931  [ 7200/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.110436  [ 7232/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.251782  [ 7264/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.185439  [ 7296/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.107795  [ 7328/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.217161  [ 7360/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.148459  [ 7392/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.210315  [ 7424/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.222556  [ 7456/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.172128  [ 7488/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.275211  [ 7520/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.331196  [ 7552/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.196378  [ 7584/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.137247  [ 7616/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.235294  [ 7648/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.362841  [ 7680/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.189168  [ 7712/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.181004  [ 7744/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.252733  [ 7776/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.213108  [ 7808/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.148658  [ 7840/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.190155  [ 7872/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.222807  [ 7904/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.202676  [ 7936/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.176224  [ 7968/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.172070  [ 8000/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.155767  [ 8032/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.165599  [ 8064/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.193346  [ 8096/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.185500  [ 8128/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.207330  [ 8160/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.177091  [ 8192/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.210450  [ 8224/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.162254  [ 8256/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.228665  [ 8288/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.292407  [ 8320/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.204844  [ 8352/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.229784  [ 8384/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.267839  [ 8416/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.225809  [ 8448/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.235837  [ 8480/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.137724  [ 8512/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.164164  [ 8544/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.196807  [ 8576/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.263497  [ 8608/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.270165  [ 8640/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.242788  [ 8672/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.216690  [ 8704/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.133095  [ 8736/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.165847  [ 8768/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.121086  [ 8800/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.186255  [ 8832/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.150729  [ 8864/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.193898  [ 8896/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.192652  [ 8928/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.219541  [ 8960/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.277546  [ 8992/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.169681  [ 9024/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.253011  [ 9056/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.177170  [ 9088/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.222629  [ 9120/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.108920  [ 9152/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.273092  [ 9184/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.154700  [ 9216/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.218985  [ 9248/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.220054  [ 9280/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.126910  [ 9312/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.210224  [ 9344/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.183356  [ 9376/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.199658  [ 9408/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.289492  [ 9440/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.254268  [ 9472/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.199278  [ 9504/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.233597  [ 9536/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.163353  [ 9568/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.249097  [ 9600/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.160562  [ 9632/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.140268  [ 9664/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.129684  [ 9696/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.156645  [ 9728/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.156258  [ 9760/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.159889  [ 9792/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.184656  [ 9824/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.148294  [ 9856/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.227532  [ 9888/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.233673  [ 9920/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.148888  [ 9952/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.209379  [ 9984/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.143813  [10016/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.186751  [10048/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.302387  [10080/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.202299  [10112/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.148842  [10144/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.236497  [10176/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.177302  [10208/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.299292  [10240/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.205802  [10272/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.371802  [10304/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.197504  [10336/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.210421  [10368/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.229371  [10400/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.240836  [10432/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.172733  [10464/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.227181  [10496/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.158819  [10528/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.215702  [10560/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.240556  [10592/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.220990  [10624/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.185463  [10656/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.315996  [10688/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.203269  [10720/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.210756  [10752/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.224285  [10784/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.230184  [10816/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.148410  [10848/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.184887  [10880/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.217372  [10912/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.270042  [10944/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.267989  [10976/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.211244  [11008/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.310693  [11040/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.235077  [11072/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.169195  [11104/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.190278  [11136/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.247734  [11168/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.196276  [11200/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.188102  [11232/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.221072  [11264/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.171037  [11296/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.184853  [11328/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.176064  [11360/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.173173  [11392/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.274672  [11424/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.212240  [11456/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.168762  [11488/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.249401  [11520/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.197696  [11552/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.157797  [11584/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.230841  [11616/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.178271  [11648/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.247963  [11680/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.244281  [11712/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.233837  [11744/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.114347  [11776/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.302478  [11808/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.157859  [11840/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.372856  [11872/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.149265  [11904/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.201164  [11936/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.182170  [11968/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.175074  [12000/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.200924  [12032/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.181783  [12064/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.206998  [12096/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.176619  [12128/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.185017  [12160/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.272565  [12192/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.212487  [12224/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.203197  [12256/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.244044  [12288/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.159910  [12320/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.160865  [12352/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.125429  [12384/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.198813  [12416/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.172970  [12448/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.213912  [12480/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.248921  [12512/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.176658  [12544/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.305529  [12576/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.219589  [12608/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.211535  [12640/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.365839  [12672/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.198785  [12704/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.296669  [12736/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.158382  [12768/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.178897  [12800/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.285128  [12832/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.267766  [12864/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.283559  [12896/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.188823  [12928/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.217540  [12960/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.172569  [12992/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.177102  [13024/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.242473  [13056/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.199408  [13088/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.255144  [13120/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.171487  [13152/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.185129  [13184/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.185782  [13216/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.236279  [13248/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.133608  [13280/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.202357  [13312/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.237526  [13344/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.261440  [13376/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.208343  [13408/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.280436  [13440/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.186130  [13472/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.207987  [13504/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.209745  [13536/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.251543  [13568/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.352739  [13600/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.167130  [13632/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.208868  [13664/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.205086  [13696/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.236874  [13728/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.160045  [13760/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.158523  [13792/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.169635  [13824/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.182511  [13856/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.287796  [13888/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.179064  [13920/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.253593  [13952/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.244165  [13984/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.197946  [14016/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.134705  [14048/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.171018  [14080/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.269732  [14112/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.262816  [14144/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.132004  [14176/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.157615  [14208/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.207860  [14240/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.152994  [14272/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.126422  [14304/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.221026  [14336/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.170114  [14368/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.196365  [14400/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.218124  [14432/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.254258  [14464/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.195850  [14496/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.176781  [14528/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.190524  [14560/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.244443  [14592/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.214383  [14624/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.149467  [14656/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.171658  [14688/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.196224  [14720/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.169640  [14752/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.129997  [14784/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.183199  [14816/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.213874  [14848/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.187771  [14880/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.146269  [14912/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.140021  [14944/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.160234  [14976/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.183723  [15008/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.204388  [15040/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.295938  [15072/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.130085  [15104/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.222190  [15136/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.179690  [15168/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.202846  [15200/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.212662  [15232/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.268404  [15264/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.206478  [15296/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.173849  [15328/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.217304  [15360/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.259610  [15392/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.217900  [15424/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.158644  [15456/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.168078  [15488/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.220365  [15520/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.184349  [15552/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.220990  [15584/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.148588  [15616/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.201541  [15648/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.247374  [15680/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.181644  [15712/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.203527  [15744/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.130473  [15776/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.223455  [15808/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.312939  [15840/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.183315  [15872/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.174071  [15904/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.103600  [15936/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.134437  [15968/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.181711  [16000/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.159613  [16032/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.220307  [16064/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.153396  [16096/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.192372  [16128/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.225377  [16160/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.244734  [16192/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.113850  [16224/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.188969  [16256/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.288497  [16288/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.141324  [16320/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.166621  [16352/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.249977  [16384/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.236134  [16416/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.184115  [16448/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.235798  [16480/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.204944  [16512/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.250042  [16544/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.184391  [16576/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.272976  [16608/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.271391  [16640/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.241098  [16672/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.214935  [16704/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.192067  [16736/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.183790  [16768/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.259282  [16800/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.242484  [16832/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.236282  [16864/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.141057  [16896/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.286503  [16928/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.157681  [16960/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.195680  [16992/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.186801  [17024/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.150122  [17056/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.175086  [17088/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.188172  [17120/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.197018  [17152/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.179202  [17184/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.351348  [17216/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.188835  [17248/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.172498  [17280/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.208911  [17312/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.212363  [17344/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.151236  [17376/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.182299  [17408/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.350897  [17440/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.250088  [17472/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.176898  [17504/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.171316  [17536/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.198807  [17568/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.169363  [17600/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.184010  [17632/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.226215  [17664/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.177780  [17696/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.133549  [17728/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.180349  [17760/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.316981  [17792/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.264614  [17824/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.279382  [17856/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.143900  [17888/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.160202  [17920/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.232302  [17952/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.160265  [17984/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.167604  [18016/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.177793  [18048/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.188289  [18080/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.198893  [18112/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.186278  [18144/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.172507  [18176/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.204215  [18208/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.130652  [18240/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.201629  [18272/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.147360  [18304/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.237929  [18336/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.250633  [18368/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.219960  [18400/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.177439  [18432/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.232267  [18464/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.202983  [18496/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.142400  [18528/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.138345  [18560/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.155356  [18592/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.243271  [18624/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.184351  [18656/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.212904  [18688/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.213517  [18720/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.194587  [18752/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.140571  [18784/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.161093  [18816/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.239187  [18848/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.155477  [18880/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.158432  [18912/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.129022  [18944/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.231216  [18976/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.133290  [19008/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.169377  [19040/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.206397  [19072/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.119073  [19104/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.262641  [19136/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.207476  [19168/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.102448  [19200/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.286786  [19232/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.128933  [19264/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.225220  [19296/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.247869  [19328/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.245628  [19360/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.250395  [19392/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.213789  [19424/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.142537  [19456/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.219244  [19488/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.265136  [19520/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.138637  [19552/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.214988  [19584/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.242010  [19616/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.307417  [19648/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.140510  [19680/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.312215  [19712/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.156168  [19744/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.123452  [19776/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.265549  [19808/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.189731  [19840/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.146729  [19872/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.243617  [19904/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.115797  [19936/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.238741  [19968/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.219978  [20000/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.225330  [20032/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.181529  [20064/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.114161  [20096/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.215235  [20128/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.226567  [20160/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.163598  [20192/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.226241  [20224/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.261433  [20256/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.176619  [20288/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.152696  [20320/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.279440  [20352/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.201909  [20384/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.123842  [20416/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.151668  [20448/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.269098  [20480/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.284852  [20512/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.297632  [20544/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.184770  [20576/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.181546  [20608/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.160485  [20640/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.316723  [20672/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.127855  [20704/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.186993  [20736/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.203413  [20768/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.217267  [20800/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.170487  [20832/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.257547  [20864/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.251880  [20896/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.314565  [20928/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.180826  [20960/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.183479  [20992/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.197496  [21024/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.258218  [21056/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.306564  [21088/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.177421  [21120/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.149210  [21152/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.203659  [21184/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.262192  [21216/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.191984  [21248/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.217407  [21280/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.218462  [21312/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.115107  [21344/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.206755  [21376/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.166911  [21408/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.115886  [21440/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.264001  [21472/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.189434  [21504/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.238090  [21536/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.264603  [21568/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.216812  [21600/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.236615  [21632/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.226862  [21664/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.191937  [21696/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.217208  [21728/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.183312  [21760/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.252311  [21792/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.212096  [21824/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.168643  [21856/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.129594  [21888/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.180189  [21920/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.176878  [21952/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.225498  [21984/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.177756  [22016/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.274748  [22048/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.207351  [22080/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.239114  [22112/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.302270  [22144/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.195189  [22176/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.190007  [22208/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.160698  [22240/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.224011  [22272/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.260529  [22304/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.144000  [22336/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.195128  [22368/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.157393  [22400/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.144305  [22432/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.148379  [22464/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.237341  [22496/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.252153  [22528/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.175248  [22560/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.213259  [22592/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.257127  [22624/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.172523  [22656/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.302998  [22688/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.147909  [22720/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.224967  [22752/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.184078  [22784/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.277167  [22816/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.246037  [22848/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.122506  [22880/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.127886  [22912/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.158597  [22944/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.190603  [22976/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.135003  [23008/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.161976  [23040/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.163299  [23072/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.199570  [23104/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.185233  [23136/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.261228  [23168/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.230479  [23200/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.244232  [23232/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.219994  [23264/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.198829  [23296/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.218773  [23328/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.163774  [23360/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.157422  [23392/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.290444  [23424/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.255668  [23456/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.190272  [23488/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.262361  [23520/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.244520  [23552/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.175543  [23584/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.164477  [23616/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.130075  [23648/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.311188  [23680/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.155061  [23712/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.193287  [23744/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.155188  [23776/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.321149  [23808/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.219336  [23840/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.193887  [23872/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.258977  [23904/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.163867  [23936/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.166623  [23968/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.145518  [24000/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.129750  [24032/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.209367  [24064/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.147102  [24096/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.148828  [24128/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.196962  [24160/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.175421  [24192/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.227778  [24224/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.164391  [24256/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.173856  [24288/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.340976  [24320/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.142517  [24352/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.222114  [24384/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.174540  [24416/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.205554  [24448/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.149958  [24480/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.138430  [24512/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.218126  [24544/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.158507  [24576/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.229717  [24608/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.187091  [24640/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.182804  [24672/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.124053  [24704/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.194424  [24736/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.161039  [24768/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.216243  [24800/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.163729  [24832/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.171278  [24864/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.279357  [24872/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.279357  [24872/24872]: : 778it [00:26, 29.45it/s]
-------------------------------
LR=0.0001, batch_size=64
-------------------------------
Epoch 1, time=129.64s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.133366  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.326509  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.325199  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.355387  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.293687  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.308483  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.308965  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.203022  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.214851  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.294941  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.307943  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.259772  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.191066  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.210546  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.212936  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.203774  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.282132  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.177345  [ 1152/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.223716  [ 1216/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.252527  [ 1280/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.219695  [ 1344/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.262135  [ 1408/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.282792  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.141393  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.218497  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.240207  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.184582  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.176843  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.194721  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.131786  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.197942  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.208849  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.182575  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.206571  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.184141  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.214068  [ 2304/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.178484  [ 2368/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.135989  [ 2432/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.178147  [ 2496/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.148615  [ 2560/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.206434  [ 2624/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.163136  [ 2688/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.193882  [ 2752/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.155130  [ 2816/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.197887  [ 2880/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.214459  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.204185  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.164303  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.190950  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.173251  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.236935  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.168717  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.177948  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.184629  [ 3456/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.177435  [ 3520/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.151363  [ 3584/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.199391  [ 3648/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.255873  [ 3712/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.194871  [ 3776/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.169156  [ 3840/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.169487  [ 3904/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.240689  [ 3968/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.230403  [ 4032/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.159089  [ 4096/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.181625  [ 4160/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.221735  [ 4224/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.227257  [ 4288/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.131202  [ 4352/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.158625  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.190837  [ 4480/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.146930  [ 4544/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.163294  [ 4608/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.213105  [ 4672/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.150272  [ 4736/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.240600  [ 4800/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.201634  [ 4864/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.141715  [ 4928/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.180824  [ 4992/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.168242  [ 5056/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.187154  [ 5120/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.271973  [ 5184/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.193207  [ 5248/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.225593  [ 5312/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.213886  [ 5376/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.186173  [ 5440/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.179756  [ 5504/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.156615  [ 5568/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.170035  [ 5632/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.161685  [ 5696/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.193922  [ 5760/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.174961  [ 5824/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.175302  [ 5888/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.186055  [ 5952/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.121212  [ 6016/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.136798  [ 6080/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.166369  [ 6144/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.188559  [ 6208/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.127903  [ 6272/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.201999  [ 6336/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.178250  [ 6400/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.222933  [ 6464/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.165756  [ 6528/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.220571  [ 6592/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.151626  [ 6656/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.166558  [ 6720/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.123148  [ 6784/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.150391  [ 6848/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.214993  [ 6912/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.174884  [ 6976/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.132659  [ 7040/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.120999  [ 7104/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.177120  [ 7168/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.123543  [ 7232/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.200391  [ 7296/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.137669  [ 7360/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.141243  [ 7424/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.163485  [ 7488/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.273054  [ 7552/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.135979  [ 7616/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.276608  [ 7680/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.159490  [ 7744/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.209934  [ 7808/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.136751  [ 7872/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.162265  [ 7936/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.132551  [ 8000/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.134825  [ 8064/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.158195  [ 8128/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.164824  [ 8192/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.173572  [ 8256/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.225060  [ 8320/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.170527  [ 8384/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.218285  [ 8448/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.150272  [ 8512/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.133625  [ 8576/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.238535  [ 8640/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.189558  [ 8704/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.135930  [ 8768/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.137357  [ 8832/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.166591  [ 8896/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.175790  [ 8960/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.202972  [ 9024/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.178389  [ 9088/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.140743  [ 9152/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.174452  [ 9216/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.187299  [ 9280/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.155173  [ 9344/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.160811  [ 9408/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.210892  [ 9472/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.156258  [ 9536/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.174203  [ 9600/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.134615  [ 9664/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.126607  [ 9728/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.139438  [ 9792/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.144796  [ 9856/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.196952  [ 9920/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.152886  [ 9984/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.137573  [10048/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.186161  [10112/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.173481  [10176/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.182654  [10240/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.229356  [10304/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.175341  [10368/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.180665  [10432/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.142846  [10496/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.161497  [10560/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.189039  [10624/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.239473  [10688/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.184373  [10752/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.178806  [10816/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.137413  [10880/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.171836  [10944/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.180752  [11008/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.210429  [11072/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.148429  [11136/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.209502  [11200/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.194127  [11264/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.159846  [11328/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.149638  [11392/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.211487  [11456/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.219176  [11520/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.135006  [11584/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.193055  [11648/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.212627  [11712/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.150065  [11776/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.189432  [11840/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.179838  [11904/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.168510  [11968/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.173489  [12032/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.158893  [12096/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.157273  [12160/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.197268  [12224/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.180402  [12288/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.141144  [12352/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.142577  [12416/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.170030  [12480/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.186192  [12544/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.216878  [12608/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.230309  [12672/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.213298  [12736/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.140170  [12800/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.212651  [12864/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.199103  [12928/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.158151  [12992/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.187512  [13056/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.173922  [13120/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.156914  [13184/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.167743  [13248/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.134397  [13312/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.188661  [13376/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.216547  [13440/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.174825  [13504/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.196487  [13568/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.255848  [13632/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.186590  [13696/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.162368  [13760/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.156290  [13824/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.217423  [13888/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.200581  [13952/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.173493  [14016/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.133542  [14080/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.244333  [14144/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.111312  [14208/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.152576  [14272/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.142898  [14336/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.171330  [14400/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.202308  [14464/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.142568  [14528/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.166106  [14592/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.149810  [14656/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.175661  [14720/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.130899  [14784/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.190066  [14848/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.165681  [14912/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.149854  [14976/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.191838  [15040/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.163529  [15104/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.184118  [15168/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.179958  [15232/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.206280  [15296/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.157448  [15360/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.207729  [15424/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.137487  [15488/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.171686  [15552/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.187326  [15616/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.191365  [15680/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.146203  [15744/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.160742  [15808/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.207917  [15872/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.144751  [15936/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.147069  [16000/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.171630  [16064/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.149827  [16128/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.215683  [16192/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.139163  [16256/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.177713  [16320/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.177380  [16384/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.189262  [16448/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.190021  [16512/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.188711  [16576/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.237488  [16640/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.207170  [16704/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.169288  [16768/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.218069  [16832/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.169668  [16896/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.191287  [16960/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.160681  [17024/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.128080  [17088/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.170648  [17152/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.239830  [17216/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.169874  [17280/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.193673  [17344/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.143785  [17408/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.246633  [17472/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.159175  [17536/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.167990  [17600/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.153034  [17664/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.128885  [17728/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.188206  [17792/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.228901  [17856/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.136013  [17920/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.171578  [17984/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.135050  [18048/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.149606  [18112/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.140483  [18176/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.135811  [18240/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.161396  [18304/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.173972  [18368/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.166932  [18432/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.195368  [18496/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.113958  [18560/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.163612  [18624/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.179606  [18688/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.165775  [18752/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.129534  [18816/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.145984  [18880/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.115991  [18944/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.135087  [19008/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.149978  [19072/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.142073  [19136/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.129902  [19200/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.169214  [19264/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.184922  [19328/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.220730  [19392/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.117113  [19456/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.196686  [19520/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.128044  [19584/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.194744  [19648/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.175873  [19712/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.110328  [19776/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.165426  [19840/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.157409  [19904/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.153667  [19968/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.160936  [20032/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.108374  [20096/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.176724  [20160/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.141364  [20224/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.157007  [20288/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.175053  [20352/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.112780  [20416/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.166766  [20480/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.219687  [20544/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.140653  [20608/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.184016  [20672/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.130835  [20736/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.167980  [20800/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.189809  [20864/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.212056  [20928/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.117394  [20992/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.173080  [21056/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.186306  [21120/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.140258  [21184/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.190644  [21248/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.177605  [21312/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.121780  [21376/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.128702  [21440/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.180445  [21504/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.146468  [21568/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.188789  [21632/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.153426  [21696/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.184967  [21760/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.199600  [21824/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.126863  [21888/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.147453  [21952/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.160082  [22016/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.209494  [22080/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.221971  [22144/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.175902  [22208/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.160886  [22272/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.164972  [22336/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.146695  [22400/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.123840  [22464/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.189016  [22528/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.175538  [22592/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.170306  [22656/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.188685  [22720/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.157568  [22784/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.184678  [22848/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.136545  [22912/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.140964  [22976/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.125604  [23040/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.164640  [23104/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.189047  [23168/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.218105  [23232/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.185461  [23296/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.179036  [23360/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.190118  [23424/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.189230  [23488/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.208049  [23552/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.148944  [23616/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.195873  [23680/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.137984  [23744/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.189436  [23808/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.199210  [23872/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.218320  [23936/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.142636  [24000/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.174528  [24064/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.134548  [24128/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.157875  [24192/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.212446  [24256/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.256906  [24320/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.157928  [24384/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.209396  [24448/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.127353  [24512/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.171139  [24576/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.179462  [24640/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.141369  [24704/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.190760  [24768/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.182720  [24832/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.192798  [24872/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.192798  [24872/24872]: : 389it [00:17, 22.73it/s]
Epoch 2, time=146.75s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.153127  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.139508  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.175046  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.161584  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.247147  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.211305  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.198388  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.154755  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.180241  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.172434  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.187744  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.198697  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.164316  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.172504  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.168386  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.149413  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.219847  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.144735  [ 1152/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.158594  [ 1216/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.190114  [ 1280/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.169058  [ 1344/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.219379  [ 1408/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.222583  [ 1472/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.120177  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.190531  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.188300  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.140966  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.162330  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.162021  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.110977  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.155462  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.176826  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.144904  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.189658  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.143212  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.178990  [ 2304/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.157189  [ 2368/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.111909  [ 2432/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.144637  [ 2496/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.125878  [ 2560/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.166497  [ 2624/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.140939  [ 2688/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.174989  [ 2752/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.121717  [ 2816/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.176149  [ 2880/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.163736  [ 2944/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.158127  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.121892  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.159129  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.158165  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.203688  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.137229  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.150478  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.171562  [ 3456/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.156247  [ 3520/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.120609  [ 3584/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.166549  [ 3648/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.216781  [ 3712/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.165652  [ 3776/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.167092  [ 3840/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.138901  [ 3904/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.204779  [ 3968/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.203013  [ 4032/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.134880  [ 4096/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.158663  [ 4160/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.208128  [ 4224/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.213989  [ 4288/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.117230  [ 4352/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.143975  [ 4416/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.173899  [ 4480/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.130009  [ 4544/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.139001  [ 4608/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.169583  [ 4672/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.133356  [ 4736/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.221562  [ 4800/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.158587  [ 4864/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.119094  [ 4928/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.146199  [ 4992/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.133218  [ 5056/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.158833  [ 5120/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.236753  [ 5184/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.177883  [ 5248/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.186966  [ 5312/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.200773  [ 5376/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.157111  [ 5440/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.174470  [ 5504/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.163077  [ 5568/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.164969  [ 5632/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.151903  [ 5696/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.167498  [ 5760/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.149744  [ 5824/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.164182  [ 5888/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.137516  [ 5952/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.119027  [ 6016/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.130808  [ 6080/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.142099  [ 6144/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.170916  [ 6208/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.124619  [ 6272/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.174905  [ 6336/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.171572  [ 6400/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.207616  [ 6464/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.157898  [ 6528/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.215106  [ 6592/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.130360  [ 6656/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.155118  [ 6720/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.116212  [ 6784/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.131683  [ 6848/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.186417  [ 6912/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.156351  [ 6976/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.123992  [ 7040/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.110771  [ 7104/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.167088  [ 7168/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.109063  [ 7232/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.179579  [ 7296/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.133720  [ 7360/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.127362  [ 7424/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.160118  [ 7488/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.232772  [ 7552/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.148088  [ 7616/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.243491  [ 7680/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.158968  [ 7744/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.190201  [ 7808/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.125625  [ 7872/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.147102  [ 7936/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.119913  [ 8000/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.111235  [ 8064/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.125626  [ 8128/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.163835  [ 8192/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.154150  [ 8256/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.222695  [ 8320/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.157577  [ 8384/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.184876  [ 8448/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.129264  [ 8512/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.124867  [ 8576/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.198023  [ 8640/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.165810  [ 8704/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.120222  [ 8768/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.135000  [ 8832/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.148297  [ 8896/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.162105  [ 8960/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.197082  [ 9024/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.159731  [ 9088/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.131613  [ 9152/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.156532  [ 9216/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.181093  [ 9280/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.147347  [ 9344/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.148276  [ 9408/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.213498  [ 9472/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.135637  [ 9536/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.157520  [ 9600/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.122931  [ 9664/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.118930  [ 9728/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.126739  [ 9792/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.127244  [ 9856/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.184343  [ 9920/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.133016  [ 9984/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.115161  [10048/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.173453  [10112/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.172230  [10176/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.169308  [10240/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.203515  [10304/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.152574  [10368/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.171364  [10432/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.150459  [10496/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.134896  [10560/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.166158  [10624/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.222076  [10688/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.161491  [10752/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.150307  [10816/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.110211  [10880/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.172832  [10944/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.147228  [11008/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.191407  [11072/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.132294  [11136/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.187994  [11200/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.182377  [11264/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.124875  [11328/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.128042  [11392/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.189690  [11456/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.176962  [11520/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.114304  [11584/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.154994  [11648/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.173673  [11712/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.131340  [11776/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.141511  [11840/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.155784  [11904/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.154201  [11968/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.128197  [12032/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.142880  [12096/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.121996  [12160/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.173509  [12224/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.157994  [12288/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.113938  [12352/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.127031  [12416/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.143425  [12480/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.169611  [12544/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.198466  [12608/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.207992  [12672/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.196928  [12736/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.129021  [12800/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.189860  [12864/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.187040  [12928/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.133553  [12992/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.174734  [13056/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.159039  [13120/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.142490  [13184/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.164245  [13248/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.107537  [13312/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.185548  [13376/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.188764  [13440/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.180918  [13504/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.177975  [13568/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.244117  [13632/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.191490  [13696/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.148172  [13760/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.151967  [13824/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.217882  [13888/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.182348  [13952/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.171058  [14016/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.127825  [14080/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.217544  [14144/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.114157  [14208/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.150593  [14272/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.134807  [14336/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.160778  [14400/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.203829  [14464/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.137679  [14528/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.150763  [14592/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.142343  [14656/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.180275  [14720/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.132265  [14784/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.185787  [14848/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.164606  [14912/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.136768  [14976/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.185903  [15040/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.153535  [15104/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.170276  [15168/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.166898  [15232/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.187526  [15296/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.148430  [15360/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.184593  [15424/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.118113  [15488/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.153959  [15552/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.169823  [15616/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.163167  [15680/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.127574  [15744/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.146627  [15808/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.198272  [15872/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.119022  [15936/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.130621  [16000/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.147472  [16064/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.135757  [16128/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.186232  [16192/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.119497  [16256/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.152141  [16320/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.166705  [16384/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.153089  [16448/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.169481  [16512/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.153984  [16576/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.207960  [16640/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.162292  [16704/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.143875  [16768/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.180700  [16832/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.163965  [16896/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.172932  [16960/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.144628  [17024/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.110916  [17088/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.155694  [17152/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.215729  [17216/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.139817  [17280/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.180204  [17344/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.131223  [17408/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.240944  [17472/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.137300  [17536/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.154541  [17600/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.138524  [17664/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.111773  [17728/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.168397  [17792/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.183923  [17856/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.114207  [17920/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.149790  [17984/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.124857  [18048/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.128253  [18112/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.120373  [18176/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.115717  [18240/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.139280  [18304/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.154039  [18368/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.148768  [18432/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.167685  [18496/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.099908  [18560/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.143165  [18624/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.148479  [18688/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.140793  [18752/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.112152  [18816/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.129147  [18880/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.098721  [18944/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.120619  [19008/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.125317  [19072/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.127776  [19136/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.113285  [19200/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.161967  [19264/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.154757  [19328/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.203927  [19392/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.107234  [19456/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.182854  [19520/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.117301  [19584/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.174213  [19648/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.166859  [19712/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.098775  [19776/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.142947  [19840/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.140251  [19904/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.141602  [19968/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.141291  [20032/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.101481  [20096/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.152086  [20160/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.126075  [20224/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.141580  [20288/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.157709  [20352/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.101134  [20416/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.137804  [20480/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.196426  [20544/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.130069  [20608/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.172093  [20672/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.115589  [20736/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.150067  [20800/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.179186  [20864/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.192129  [20928/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.105787  [20992/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.157286  [21056/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.167446  [21120/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.126381  [21184/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.178681  [21248/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.160904  [21312/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.106307  [21376/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.116328  [21440/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.170325  [21504/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.122338  [21568/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.175800  [21632/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.132517  [21696/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.166068  [21760/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.182935  [21824/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.119629  [21888/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.136303  [21952/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.151746  [22016/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.198420  [22080/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.203093  [22144/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.145727  [22208/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.148666  [22272/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.144187  [22336/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.132944  [22400/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.111073  [22464/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.167830  [22528/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.152749  [22592/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.163565  [22656/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.182216  [22720/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.132501  [22784/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.162875  [22848/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.109640  [22912/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.121576  [22976/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.106244  [23040/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.127562  [23104/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.175882  [23168/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.184688  [23232/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.169939  [23296/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.158899  [23360/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.162949  [23424/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.166768  [23488/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.171726  [23552/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.142870  [23616/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.172641  [23680/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.128357  [23744/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.179956  [23808/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.182724  [23872/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.201997  [23936/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.131861  [24000/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.174904  [24064/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.125611  [24128/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.150098  [24192/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.209089  [24256/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.223271  [24320/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.156775  [24384/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.191948  [24448/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.109328  [24512/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.155200  [24576/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.149295  [24640/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.116538  [24704/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.158871  [24768/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.152416  [24832/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.173006  [24872/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.173006  [24872/24872]: : 389it [00:16, 23.36it/s]
Epoch 3, time=163.41s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.128567  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.130031  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.169425  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.134189  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.214461  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.162720  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.164600  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.143451  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.148668  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.153749  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.172608  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.167979  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.142618  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.153021  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.153692  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.130351  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.169204  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.130264  [ 1152/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.139379  [ 1216/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.156771  [ 1280/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.134147  [ 1344/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.208397  [ 1408/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.181793  [ 1472/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.105362  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.171905  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.172748  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.123863  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.149150  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.135181  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.106245  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.129181  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.159087  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.149532  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.156249  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.124402  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.169772  [ 2304/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.139311  [ 2368/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.101386  [ 2432/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.126165  [ 2496/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.112761  [ 2560/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.153434  [ 2624/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.129690  [ 2688/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.145254  [ 2752/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.114850  [ 2816/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.163931  [ 2880/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.155113  [ 2944/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.138496  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.119365  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.144327  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.134504  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.182582  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.122708  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.157618  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.157441  [ 3456/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.154547  [ 3520/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.113507  [ 3584/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.147706  [ 3648/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.205235  [ 3712/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.167800  [ 3776/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.160051  [ 3840/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.122474  [ 3904/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.177568  [ 3968/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.180463  [ 4032/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.132042  [ 4096/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.140848  [ 4160/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.189176  [ 4224/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.156976  [ 4288/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.099151  [ 4352/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.144056  [ 4416/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.164788  [ 4480/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.133504  [ 4544/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.137223  [ 4608/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.161772  [ 4672/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.120464  [ 4736/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.166105  [ 4800/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.126030  [ 4864/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.106341  [ 4928/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.129042  [ 4992/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.132961  [ 5056/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.146263  [ 5120/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.259194  [ 5184/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.149937  [ 5248/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.185214  [ 5312/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.168053  [ 5376/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.157313  [ 5440/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.142178  [ 5504/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.138589  [ 5568/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.154732  [ 5632/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.145531  [ 5696/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.166824  [ 5760/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.170775  [ 5824/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.186273  [ 5888/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.129159  [ 5952/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.111487  [ 6016/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.121206  [ 6080/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.136584  [ 6144/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.176544  [ 6208/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.141064  [ 6272/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.176542  [ 6336/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.145625  [ 6400/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.184241  [ 6464/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.149607  [ 6528/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.198038  [ 6592/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.127658  [ 6656/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.115881  [ 6720/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.118514  [ 6784/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.163754  [ 6848/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.176315  [ 6912/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.171017  [ 6976/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.108714  [ 7040/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.104299  [ 7104/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.167426  [ 7168/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.097813  [ 7232/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.168274  [ 7296/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.142704  [ 7360/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.125893  [ 7424/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.149907  [ 7488/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.246342  [ 7552/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.123352  [ 7616/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.202357  [ 7680/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.134500  [ 7744/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.178272  [ 7808/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.133571  [ 7872/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.135341  [ 7936/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.126822  [ 8000/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.103333  [ 8064/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.112139  [ 8128/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.145017  [ 8192/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.129808  [ 8256/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.207208  [ 8320/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.129251  [ 8384/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.152072  [ 8448/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.116166  [ 8512/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.111605  [ 8576/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.204963  [ 8640/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.138401  [ 8704/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.108321  [ 8768/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.111708  [ 8832/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.147429  [ 8896/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.141054  [ 8960/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.177374  [ 9024/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.144821  [ 9088/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.116917  [ 9152/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.139516  [ 9216/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.179714  [ 9280/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.129068  [ 9344/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.137191  [ 9408/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.187103  [ 9472/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.114260  [ 9536/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.162461  [ 9600/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.114314  [ 9664/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.113200  [ 9728/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.128327  [ 9792/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.112513  [ 9856/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.205296  [ 9920/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.126496  [ 9984/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.108267  [10048/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.207468  [10112/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.158521  [10176/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.167879  [10240/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.201560  [10304/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.194922  [10368/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.186186  [10432/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.134467  [10496/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.176274  [10560/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.175340  [10624/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.218573  [10688/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.212181  [10752/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.152056  [10816/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.139324  [10880/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.206736  [10944/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.169147  [11008/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.198010  [11072/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.158362  [11136/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.196037  [11200/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.217747  [11264/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.143332  [11328/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.144943  [11392/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.204412  [11456/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.172692  [11520/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.129608  [11584/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.155123  [11648/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.166769  [11712/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.156810  [11776/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.138762  [11840/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.147027  [11904/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.159088  [11968/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.118207  [12032/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.140527  [12096/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.121596  [12160/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.170926  [12224/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.160071  [12288/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.104422  [12352/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.114699  [12416/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.134730  [12480/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.159328  [12544/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.178308  [12608/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.200683  [12672/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.184814  [12736/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.123687  [12800/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.161822  [12864/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.159905  [12928/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.123795  [12992/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.156308  [13056/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.141329  [13120/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.140347  [13184/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.138539  [13248/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.094336  [13312/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.167651  [13376/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.176404  [13440/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.139656  [13504/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.164042  [13568/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.209585  [13632/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.142585  [13696/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.134354  [13760/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.114380  [13824/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.166664  [13888/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.164756  [13952/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.127618  [14016/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.121312  [14080/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.179100  [14144/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.100870  [14208/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.103787  [14272/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.118762  [14336/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.119588  [14400/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.161557  [14464/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.112899  [14528/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.153544  [14592/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.110306  [14656/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.151225  [14720/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.101581  [14784/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.154017  [14848/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.135661  [14912/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.104329  [14976/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.134446  [15040/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.130665  [15104/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.166650  [15168/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.142895  [15232/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.166311  [15296/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.127112  [15360/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.163366  [15424/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.106538  [15488/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.130023  [15552/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.148083  [15616/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.150037  [15680/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.136284  [15744/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.133891  [15808/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.168276  [15872/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.105799  [15936/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.118251  [16000/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.132523  [16064/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.118919  [16128/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.159558  [16192/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.105014  [16256/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.139289  [16320/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.151410  [16384/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.142872  [16448/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.159288  [16512/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.145891  [16576/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.201459  [16640/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.157354  [16704/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.135787  [16768/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.182502  [16832/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.149721  [16896/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.172993  [16960/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.130358  [17024/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.109810  [17088/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.156231  [17152/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.156829  [17216/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.142607  [17280/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.173903  [17344/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.129783  [17408/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.226474  [17472/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.114599  [17536/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.148505  [17600/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.126269  [17664/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.097337  [17728/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.158161  [17792/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.167917  [17856/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.102535  [17920/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.134460  [17984/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.113225  [18048/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.125332  [18112/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.123355  [18176/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.113760  [18240/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.120761  [18304/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.160967  [18368/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.135199  [18432/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.152228  [18496/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.096324  [18560/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.145969  [18624/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.140102  [18688/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.131109  [18752/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.119087  [18816/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.121416  [18880/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.094087  [18944/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.111511  [19008/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.127839  [19072/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.125457  [19136/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.105385  [19200/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.157277  [19264/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.144635  [19328/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.193414  [19392/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.109342  [19456/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.192913  [19520/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.116963  [19584/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.168445  [19648/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.146978  [19712/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.094890  [19776/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.136153  [19840/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.133880  [19904/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.140193  [19968/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.127459  [20032/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.109475  [20096/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.131493  [20160/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.118529  [20224/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.138263  [20288/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.163749  [20352/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.116009  [20416/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.117084  [20480/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.182817  [20544/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.136970  [20608/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.164798  [20672/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.112216  [20736/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.147578  [20800/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.175785  [20864/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.193150  [20928/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.097465  [20992/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.147134  [21056/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.155635  [21120/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.119246  [21184/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.165648  [21248/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.149052  [21312/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.097276  [21376/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.114593  [21440/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.168997  [21504/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.125680  [21568/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.163906  [21632/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.126723  [21696/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.146595  [21760/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.170508  [21824/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.120913  [21888/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.120121  [21952/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.141858  [22016/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.186105  [22080/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.178809  [22144/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.139238  [22208/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.143665  [22272/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.139205  [22336/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.127275  [22400/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.119806  [22464/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.161918  [22528/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.161664  [22592/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.155806  [22656/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.207917  [22720/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.139042  [22784/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.164632  [22848/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.094965  [22912/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.120561  [22976/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.110674  [23040/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.141644  [23104/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.178596  [23168/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.195233  [23232/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.171941  [23296/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.168871  [23360/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.171530  [23424/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.161155  [23488/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.166159  [23552/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.146863  [23616/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.167582  [23680/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.146454  [23744/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.144665  [23808/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.178004  [23872/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.143357  [23936/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.139506  [24000/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.156455  [24064/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.117177  [24128/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.176298  [24192/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.172884  [24256/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.199133  [24320/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.126438  [24384/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.176594  [24448/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.106799  [24512/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.151900  [24576/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.147147  [24640/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.108888  [24704/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.105874  [24768/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.118743  [24832/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.161859  [24872/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.161859  [24872/24872]: : 389it [00:16, 23.66it/s]
Epoch 4, time=179.85s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.112099  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.128977  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.159523  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.113006  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.236121  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.150129  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.154497  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.129142  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.131036  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.151273  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.167274  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.151103  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.134983  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.146992  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.140264  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.126964  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.170745  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.130408  [ 1152/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.125425  [ 1216/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.137542  [ 1280/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.158778  [ 1344/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.167529  [ 1408/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.183268  [ 1472/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.102821  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.161315  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.209533  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.122508  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.143378  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.169759  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.100954  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.109417  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.157832  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.124035  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.145800  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.108372  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.161142  [ 2304/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.140393  [ 2368/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.099065  [ 2432/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.121081  [ 2496/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.105945  [ 2560/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.152750  [ 2624/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.117185  [ 2688/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.141732  [ 2752/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.110606  [ 2816/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.164693  [ 2880/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.143678  [ 2944/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.125217  [ 3008/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.133587  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.131701  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.124170  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.169109  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.112293  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.124431  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.141608  [ 3456/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.151892  [ 3520/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.106051  [ 3584/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.152158  [ 3648/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.180479  [ 3712/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.142411  [ 3776/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.175167  [ 3840/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.113615  [ 3904/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.165662  [ 3968/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.169391  [ 4032/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.126649  [ 4096/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.133866  [ 4160/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.153807  [ 4224/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.185225  [ 4288/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.105317  [ 4352/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.148316  [ 4416/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.174843  [ 4480/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.114830  [ 4544/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.154126  [ 4608/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.157464  [ 4672/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.122067  [ 4736/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.180931  [ 4800/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.158594  [ 4864/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.111485  [ 4928/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.147701  [ 4992/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.127033  [ 5056/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.153207  [ 5120/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.242842  [ 5184/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.135645  [ 5248/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.175444  [ 5312/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.152122  [ 5376/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.164153  [ 5440/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.146594  [ 5504/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.138545  [ 5568/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.154268  [ 5632/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.146511  [ 5696/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.152316  [ 5760/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.160523  [ 5824/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.154531  [ 5888/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.139504  [ 5952/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.104932  [ 6016/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.121044  [ 6080/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.137826  [ 6144/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.181790  [ 6208/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.106400  [ 6272/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.171867  [ 6336/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.144551  [ 6400/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.197561  [ 6464/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.144417  [ 6528/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.171255  [ 6592/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.126393  [ 6656/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.107634  [ 6720/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.091854  [ 6784/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.141274  [ 6848/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.158857  [ 6912/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.148564  [ 6976/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.109925  [ 7040/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.096317  [ 7104/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.150043  [ 7168/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.095592  [ 7232/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.154342  [ 7296/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.125231  [ 7360/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.114432  [ 7424/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.133424  [ 7488/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.231613  [ 7552/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.114824  [ 7616/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.214302  [ 7680/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.126044  [ 7744/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.159536  [ 7808/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.116602  [ 7872/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.123693  [ 7936/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.099781  [ 8000/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.099274  [ 8064/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.107231  [ 8128/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.139131  [ 8192/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.126285  [ 8256/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.196466  [ 8320/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.137710  [ 8384/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.156153  [ 8448/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.104082  [ 8512/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.104716  [ 8576/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.193374  [ 8640/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.142842  [ 8704/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.110824  [ 8768/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.107875  [ 8832/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.140657  [ 8896/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.154418  [ 8960/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.169031  [ 9024/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.121102  [ 9088/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.113712  [ 9152/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.143457  [ 9216/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.156581  [ 9280/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.125986  [ 9344/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.150345  [ 9408/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.173942  [ 9472/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.099818  [ 9536/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.148422  [ 9600/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.124318  [ 9664/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.110612  [ 9728/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.120353  [ 9792/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.117701  [ 9856/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.175580  [ 9920/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.132825  [ 9984/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.107819  [10048/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.155694  [10112/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.166528  [10176/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.147625  [10240/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.219336  [10304/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.146034  [10368/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.154301  [10432/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.184737  [10496/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.108577  [10560/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.176694  [10624/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.212975  [10688/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.145923  [10752/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.152206  [10816/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.102650  [10880/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.157126  [10944/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.161775  [11008/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.161159  [11072/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.168591  [11136/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.179705  [11200/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.168099  [11264/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.130930  [11328/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.113136  [11392/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.171600  [11456/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.174083  [11520/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.118414  [11584/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.151544  [11648/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.149602  [11712/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.130368  [11776/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.124676  [11840/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.159068  [11904/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.174589  [11968/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.146792  [12032/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.137735  [12096/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.119056  [12160/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.181018  [12224/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.174932  [12288/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.104281  [12352/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.134598  [12416/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.125559  [12480/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.167175  [12544/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.163215  [12608/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.179854  [12672/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.179322  [12736/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.126904  [12800/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.198524  [12864/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.160764  [12928/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.135596  [12992/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.152877  [13056/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.163846  [13120/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.143642  [13184/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.133454  [13248/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.100192  [13312/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.175209  [13376/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.179400  [13440/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.155157  [13504/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.154561  [13568/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.206355  [13632/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.142033  [13696/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.126693  [13760/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.113940  [13824/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.146349  [13888/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.162715  [13952/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.137574  [14016/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.108341  [14080/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.169479  [14144/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.091690  [14208/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.097335  [14272/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.132073  [14336/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.102472  [14400/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.176299  [14464/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.098871  [14528/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.112278  [14592/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.096675  [14656/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.142478  [14720/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.095429  [14784/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.140602  [14848/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.132929  [14912/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.098072  [14976/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.129309  [15040/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.116213  [15104/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.141323  [15168/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.126520  [15232/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.159679  [15296/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.123986  [15360/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.166042  [15424/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.108299  [15488/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.131244  [15552/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.121557  [15616/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.145037  [15680/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.112066  [15744/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.127260  [15808/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.144614  [15872/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.101241  [15936/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.116677  [16000/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.149729  [16064/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.109881  [16128/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.177565  [16192/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.110859  [16256/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.128052  [16320/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.159100  [16384/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.147679  [16448/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.170513  [16512/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.147950  [16576/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.180049  [16640/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.148188  [16704/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.128121  [16768/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.177893  [16832/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.137246  [16896/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.159044  [16960/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.127687  [17024/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.122637  [17088/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.176479  [17152/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.173991  [17216/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.140646  [17280/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.172350  [17344/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.117919  [17408/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.218729  [17472/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.113961  [17536/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.144978  [17600/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.130867  [17664/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.096718  [17728/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.163581  [17792/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.157457  [17856/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.102610  [17920/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.147181  [17984/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.115012  [18048/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.129042  [18112/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.128309  [18176/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.108573  [18240/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.113824  [18304/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.152912  [18368/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.118468  [18432/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.147363  [18496/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.092562  [18560/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.128287  [18624/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.125822  [18688/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.124344  [18752/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.109575  [18816/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.116436  [18880/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.094980  [18944/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.112625  [19008/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.097685  [19072/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.116600  [19136/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.094742  [19200/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.153409  [19264/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.138041  [19328/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.174703  [19392/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.095442  [19456/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.168263  [19520/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.113852  [19584/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.156028  [19648/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.147562  [19712/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.091588  [19776/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.137949  [19840/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.131395  [19904/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.131612  [19968/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.133610  [20032/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.090399  [20096/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.119987  [20160/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.108078  [20224/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.121230  [20288/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.147420  [20352/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.095908  [20416/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.107917  [20480/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.174144  [20544/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.130808  [20608/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.157405  [20672/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.110702  [20736/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.132857  [20800/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.164523  [20864/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.176328  [20928/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.093987  [20992/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.137734  [21056/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.144144  [21120/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.113320  [21184/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.161037  [21248/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.140941  [21312/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.081417  [21376/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.094490  [21440/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.139799  [21504/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.094226  [21568/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.154110  [21632/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.098780  [21696/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.116690  [21760/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.150228  [21824/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.098691  [21888/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.106219  [21952/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.127807  [22016/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.163302  [22080/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.162989  [22144/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.117685  [22208/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.120681  [22272/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.130448  [22336/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.102601  [22400/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.100944  [22464/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.145102  [22528/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.128819  [22592/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.136212  [22656/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.159549  [22720/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.117918  [22784/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.129408  [22848/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.081262  [22912/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.095195  [22976/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.084172  [23040/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.111864  [23104/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.158214  [23168/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.158965  [23232/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.155022  [23296/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.149405  [23360/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.153448  [23424/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.150478  [23488/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.146456  [23552/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.130772  [23616/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.155088  [23680/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.109409  [23744/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.154228  [23808/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.163112  [23872/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.162200  [23936/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.130795  [24000/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.142287  [24064/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.109636  [24128/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.153231  [24192/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.150080  [24256/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.195478  [24320/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.116763  [24384/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.159409  [24448/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.121127  [24512/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.124822  [24576/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.152258  [24640/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.119899  [24704/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.115407  [24768/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.124312  [24832/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.191032  [24872/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.191032  [24872/24872]: : 389it [00:16, 23.62it/s]
Epoch 5, time=196.32s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.106798  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.137706  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.166715  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.113610  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.211569  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.149909  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.148275  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.119354  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.124071  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.126543  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.158801  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.146109  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.126904  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.144905  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.138445  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.115154  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.159452  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.140735  [ 1152/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.112319  [ 1216/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.108575  [ 1280/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.144928  [ 1344/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.180317  [ 1408/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.145077  [ 1472/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.099639  [ 1536/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.165182  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.167129  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.145723  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.118810  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.150760  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.089760  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.099737  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.138503  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.110397  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.123904  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.102552  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.143136  [ 2304/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.141443  [ 2368/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.091358  [ 2432/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.110392  [ 2496/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.095485  [ 2560/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.132434  [ 2624/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.104214  [ 2688/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.137010  [ 2752/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.105065  [ 2816/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.160837  [ 2880/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.130883  [ 2944/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.117348  [ 3008/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.122136  [ 3072/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.122333  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.131640  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.170559  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.107703  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.136015  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.135361  [ 3456/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.153442  [ 3520/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.114733  [ 3584/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.142890  [ 3648/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.183534  [ 3712/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.144513  [ 3776/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.169675  [ 3840/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.127837  [ 3904/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.141175  [ 3968/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.190530  [ 4032/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.131197  [ 4096/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.134427  [ 4160/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.179907  [ 4224/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.160833  [ 4288/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.119763  [ 4352/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.157364  [ 4416/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.146727  [ 4480/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.107556  [ 4544/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.125678  [ 4608/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.130333  [ 4672/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.131329  [ 4736/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.177210  [ 4800/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.122284  [ 4864/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.133630  [ 4928/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.133111  [ 4992/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.135136  [ 5056/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.170690  [ 5120/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.205092  [ 5184/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.125272  [ 5248/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.193143  [ 5312/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.151063  [ 5376/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.190092  [ 5440/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.158094  [ 5504/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.124334  [ 5568/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.182261  [ 5632/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.139165  [ 5696/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.163844  [ 5760/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.151817  [ 5824/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.138982  [ 5888/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.136609  [ 5952/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.105312  [ 6016/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.121490  [ 6080/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.117748  [ 6144/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.138689  [ 6208/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.096432  [ 6272/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.150178  [ 6336/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.142006  [ 6400/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.163744  [ 6464/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.119938  [ 6528/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.172283  [ 6592/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.112202  [ 6656/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.107823  [ 6720/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.084592  [ 6784/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.117513  [ 6848/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.146230  [ 6912/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.133363  [ 6976/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.092399  [ 7040/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.090051  [ 7104/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.149456  [ 7168/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.097840  [ 7232/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.148954  [ 7296/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.126386  [ 7360/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.109061  [ 7424/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.122518  [ 7488/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.218252  [ 7552/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.110672  [ 7616/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.190246  [ 7680/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.112463  [ 7744/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.151963  [ 7808/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.108481  [ 7872/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.114355  [ 7936/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.097038  [ 8000/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.093878  [ 8064/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.095400  [ 8128/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.131091  [ 8192/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.121960  [ 8256/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.176377  [ 8320/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.116515  [ 8384/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.131886  [ 8448/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.103876  [ 8512/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.095454  [ 8576/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.177126  [ 8640/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.119111  [ 8704/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.094153  [ 8768/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.102576  [ 8832/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.123544  [ 8896/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.141846  [ 8960/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.157633  [ 9024/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.118075  [ 9088/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.117021  [ 9152/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.147857  [ 9216/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.159304  [ 9280/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.118228  [ 9344/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.124896  [ 9408/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.162065  [ 9472/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.125312  [ 9536/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.140245  [ 9600/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.113340  [ 9664/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.115085  [ 9728/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.113595  [ 9792/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.126363  [ 9856/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.170213  [ 9920/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.123281  [ 9984/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.096273  [10048/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.166492  [10112/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.151892  [10176/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.196253  [10240/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.152971  [10304/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.161704  [10368/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.138273  [10432/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.117721  [10496/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.108402  [10560/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.124378  [10624/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.176952  [10688/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.122314  [10752/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.135301  [10816/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.092093  [10880/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.131034  [10944/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.133744  [11008/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.140734  [11072/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.104260  [11136/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.151803  [11200/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.144294  [11264/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.120235  [11328/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.106725  [11392/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.151315  [11456/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.165230  [11520/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.108967  [11584/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.142957  [11648/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.151200  [11712/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.120043  [11776/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.126550  [11840/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.140871  [11904/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.141049  [11968/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.126535  [12032/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.131851  [12096/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.116183  [12160/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.175873  [12224/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.155207  [12288/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.101250  [12352/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.137586  [12416/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.120356  [12480/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.145853  [12544/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.163197  [12608/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.188744  [12672/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.172705  [12736/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.122666  [12800/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.160706  [12864/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.153419  [12928/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.130605  [12992/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.147208  [13056/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.159561  [13120/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.137538  [13184/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.116673  [13248/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.082891  [13312/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.179675  [13376/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.181039  [13440/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.130992  [13504/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.146476  [13568/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.196037  [13632/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.128258  [13696/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.118208  [13760/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.101101  [13824/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.154225  [13888/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.167914  [13952/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.107207  [14016/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.105612  [14080/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.163359  [14144/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.078930  [14208/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.091284  [14272/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.107411  [14336/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.109154  [14400/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.161247  [14464/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.095574  [14528/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.144496  [14592/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.104981  [14656/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.148882  [14720/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.104403  [14784/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.139131  [14848/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.118965  [14912/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.108017  [14976/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.148062  [15040/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.121020  [15104/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.152680  [15168/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.132968  [15232/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.157071  [15296/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.118119  [15360/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.165903  [15424/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.090495  [15488/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.120574  [15552/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.123914  [15616/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.140807  [15680/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.112958  [15744/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.144036  [15808/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.134582  [15872/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.096468  [15936/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.115021  [16000/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.129513  [16064/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.112162  [16128/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.182993  [16192/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.106274  [16256/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.121145  [16320/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.145858  [16384/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.151259  [16448/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.157064  [16512/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.143562  [16576/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.183419  [16640/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.141245  [16704/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.125791  [16768/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.169506  [16832/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.135965  [16896/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.163002  [16960/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.134137  [17024/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.116979  [17088/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.175638  [17152/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.133016  [17216/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.148104  [17280/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.173661  [17344/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.128680  [17408/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.224841  [17472/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.111000  [17536/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.134954  [17600/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.110026  [17664/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.097082  [17728/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.173718  [17792/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.163522  [17856/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.091734  [17920/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.135291  [17984/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.108004  [18048/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.116084  [18112/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.127235  [18176/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.107304  [18240/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.120904  [18304/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.142978  [18368/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.117640  [18432/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.132521  [18496/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.086455  [18560/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.144536  [18624/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.115652  [18688/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.112206  [18752/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.102201  [18816/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.104853  [18880/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.081660  [18944/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.101374  [19008/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.119478  [19072/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.105833  [19136/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.091370  [19200/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.150770  [19264/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.123183  [19328/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.173173  [19392/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.107393  [19456/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.172872  [19520/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.107151  [19584/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.160615  [19648/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.143834  [19712/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.086864  [19776/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.144307  [19840/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.123104  [19904/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.117970  [19968/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.134663  [20032/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.104965  [20096/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.124940  [20160/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.115213  [20224/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.122191  [20288/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.130015  [20352/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.102495  [20416/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.113653  [20480/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.150182  [20544/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.120628  [20608/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.154880  [20672/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.094343  [20736/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.138240  [20800/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.152564  [20864/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.155426  [20928/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.082665  [20992/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.120307  [21056/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.124703  [21120/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.101850  [21184/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.142431  [21248/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.130125  [21312/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.083917  [21376/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.088299  [21440/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.142293  [21504/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.084126  [21568/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.163427  [21632/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.089910  [21696/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.100711  [21760/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.138599  [21824/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.092398  [21888/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.101683  [21952/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.124547  [22016/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.166795  [22080/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.165912  [22144/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.107025  [22208/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.137311  [22272/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.123390  [22336/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.101641  [22400/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.099970  [22464/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.148323  [22528/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.123451  [22592/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.154363  [22656/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.149519  [22720/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.110666  [22784/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.128374  [22848/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.085248  [22912/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.091474  [22976/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.090902  [23040/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.130448  [23104/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.172940  [23168/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.160047  [23232/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.149607  [23296/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.160536  [23360/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.155293  [23424/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.158200  [23488/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.179090  [23552/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.117256  [23616/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.180366  [23680/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.102616  [23744/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.139525  [23808/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.159403  [23872/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.138817  [23936/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.125802  [24000/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.130404  [24064/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.102359  [24128/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.154934  [24192/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.141856  [24256/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.208381  [24320/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.114953  [24384/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.154392  [24448/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.117111  [24512/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.173359  [24576/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.179067  [24640/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.104396  [24704/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.112175  [24768/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.137737  [24832/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.159709  [24872/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.159709  [24872/24872]: : 389it [00:16, 24.10it/s]
-------------------------------
LR=0.0001, batch_size=128
-------------------------------
Epoch 1, time=212.47s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.122156  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.532525  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.230023  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.260358  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.371063  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.264400  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.167511  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.172567  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.258811  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.223883  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.181570  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.135147  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.193150  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.198653  [ 1792/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.155479  [ 1920/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.129085  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.117739  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.148112  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.158476  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.140710  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.130181  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.114181  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.147795  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.127318  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.137527  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.130001  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.130665  [ 3456/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.112193  [ 3584/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.162338  [ 3712/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.133151  [ 3840/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.116251  [ 3968/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.135087  [ 4096/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.132628  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.111989  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.132752  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.103715  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.112738  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.126775  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.095134  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.106934  [ 5120/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.153226  [ 5248/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.131216  [ 5376/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.124267  [ 5504/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.118697  [ 5632/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.125684  [ 5760/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.114984  [ 5888/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.092487  [ 6016/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.101726  [ 6144/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.121520  [ 6272/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.126651  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.148728  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.123171  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.081301  [ 6784/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.113527  [ 6912/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.098887  [ 7040/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.102354  [ 7168/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.099034  [ 7296/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.100529  [ 7424/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.159161  [ 7552/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.127866  [ 7680/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.124128  [ 7808/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.108941  [ 7936/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.086589  [ 8064/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.101496  [ 8192/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.136796  [ 8320/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.111602  [ 8448/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.093196  [ 8576/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.126907  [ 8704/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.085682  [ 8832/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.112823  [ 8960/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.123603  [ 9088/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.111171  [ 9216/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.116388  [ 9344/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.117584  [ 9472/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.102638  [ 9600/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.111112  [ 9728/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.084484  [ 9856/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.115327  [ 9984/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.107592  [10112/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.127313  [10240/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.128376  [10368/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.106718  [10496/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.095497  [10624/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.132650  [10752/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.088796  [10880/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.115453  [11008/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.097108  [11136/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.130016  [11264/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.084015  [11392/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.125042  [11520/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.096446  [11648/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.122071  [11776/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.102391  [11904/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.111101  [12032/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.105743  [12160/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.130560  [12288/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.081451  [12416/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.118350  [12544/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.156366  [12672/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.129613  [12800/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.126635  [12928/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.113681  [13056/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.129017  [13184/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.094417  [13312/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.152595  [13440/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.135923  [13568/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.142917  [13696/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.096708  [13824/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.145853  [13952/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.094536  [14080/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.098455  [14208/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.087935  [14336/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.111246  [14464/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.093643  [14592/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.100872  [14720/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.100851  [14848/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.093974  [14976/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.107065  [15104/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.121461  [15232/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.121991  [15360/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.103023  [15488/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.117235  [15616/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.102476  [15744/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.112138  [15872/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.087372  [16000/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.101009  [16128/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.117300  [16256/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.105270  [16384/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.127102  [16512/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.144113  [16640/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.106421  [16768/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.131738  [16896/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.118189  [17024/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.111562  [17152/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.122858  [17280/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.125352  [17408/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.142434  [17536/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.113171  [17664/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.102457  [17792/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.110022  [17920/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.107978  [18048/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.105614  [18176/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.102217  [18304/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.114702  [18432/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.098478  [18560/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.112646  [18688/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.091055  [18816/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.094796  [18944/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.091717  [19072/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.093144  [19200/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.122519  [19328/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.114869  [19456/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.122085  [19584/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.129574  [19712/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.093639  [19840/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.120236  [19968/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.095610  [20096/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.097554  [20224/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.121313  [20352/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.083390  [20480/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.131705  [20608/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.117952  [20736/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.137019  [20864/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.109513  [20992/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.120105  [21120/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.119260  [21248/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.097090  [21376/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.109732  [21504/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.104594  [21632/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.087010  [21760/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.105615  [21888/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.104891  [22016/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.144621  [22144/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.103614  [22272/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.105911  [22400/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.103315  [22528/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.126820  [22656/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.117499  [22784/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.096184  [22912/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.083021  [23040/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.124367  [23168/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.152178  [23296/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.132283  [23424/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.127272  [23552/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.119083  [23680/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.096152  [23808/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.112712  [23936/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.114094  [24064/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.102791  [24192/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.156620  [24320/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.124944  [24448/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.095747  [24576/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.098979  [24704/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.106164  [24832/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.147257  [24872/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.147257  [24872/24872]: : 195it [00:11, 16.28it/s]
Epoch 2, time=224.44s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.107305  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.114779  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.148364  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.112922  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.098234  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.131202  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.138700  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.101445  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.136815  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.099743  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.125671  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.097025  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.129602  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.095975  [ 1792/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.089459  [ 1920/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.093792  [ 2048/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.102970  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.103681  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.092324  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.091613  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.118171  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.095688  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.125213  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.098872  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.107371  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.111503  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.119554  [ 3456/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.096953  [ 3584/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.137932  [ 3712/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.130063  [ 3840/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.114319  [ 3968/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.121181  [ 4096/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.127258  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.103830  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.130245  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.090845  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.104880  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.120434  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.093167  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.103372  [ 5120/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.132143  [ 5248/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.135441  [ 5376/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.131234  [ 5504/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.104222  [ 5632/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.119869  [ 5760/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.107856  [ 5888/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.090401  [ 6016/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.098265  [ 6144/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.116023  [ 6272/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.121026  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.142599  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.117303  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.074856  [ 6784/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.108502  [ 6912/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.095109  [ 7040/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.101717  [ 7168/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.093891  [ 7296/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.090138  [ 7424/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.143951  [ 7552/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.124596  [ 7680/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.122153  [ 7808/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.105557  [ 7936/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.085692  [ 8064/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.101006  [ 8192/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.129942  [ 8320/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.099649  [ 8448/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.090209  [ 8576/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.125162  [ 8704/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.082322  [ 8832/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.101510  [ 8960/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.110344  [ 9088/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.104484  [ 9216/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.112453  [ 9344/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.107816  [ 9472/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.103367  [ 9600/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.092806  [ 9728/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.082589  [ 9856/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.115746  [ 9984/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.106666  [10112/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.124718  [10240/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.127615  [10368/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.103030  [10496/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.086819  [10624/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.119901  [10752/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.098744  [10880/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.104625  [11008/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.093589  [11136/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.121132  [11264/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.081412  [11392/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.119990  [11520/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.093227  [11648/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.117389  [11776/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.099345  [11904/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.112254  [12032/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.100856  [12160/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.128793  [12288/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.075338  [12416/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.112519  [12544/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.147571  [12672/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.121459  [12800/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.131828  [12928/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.112557  [13056/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.125178  [13184/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.082635  [13312/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.146190  [13440/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.120961  [13568/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.132144  [13696/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.088938  [13824/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.132646  [13952/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.086337  [14080/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.091380  [14208/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.075749  [14336/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.095876  [14464/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.091470  [14592/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.096439  [14720/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.097224  [14848/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.087271  [14976/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.105551  [15104/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.120611  [15232/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.121883  [15360/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.100296  [15488/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.114565  [15616/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.110895  [15744/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.108937  [15872/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.082311  [16000/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.102282  [16128/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.110154  [16256/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.102684  [16384/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.125887  [16512/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.133543  [16640/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.110772  [16768/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.118487  [16896/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.111522  [17024/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.107627  [17152/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.113483  [17280/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.119772  [17408/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.135429  [17536/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.106568  [17664/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.099056  [17792/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.108056  [17920/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.105499  [18048/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.106079  [18176/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.094195  [18304/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.112190  [18432/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.097404  [18560/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.109099  [18688/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.090790  [18816/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.088754  [18944/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.085361  [19072/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.085985  [19200/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.119850  [19328/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.110984  [19456/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.116506  [19584/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.125185  [19712/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.086749  [19840/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.109954  [19968/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.107057  [20096/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.095951  [20224/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.122111  [20352/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.087552  [20480/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.140860  [20608/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.110987  [20736/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.135036  [20864/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.107754  [20992/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.121696  [21120/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.114767  [21248/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.102066  [21376/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.102520  [21504/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.102217  [21632/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.076849  [21760/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.099550  [21888/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.098687  [22016/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.124170  [22144/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.101147  [22272/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.096961  [22400/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.099713  [22528/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.117695  [22656/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.106618  [22784/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.088248  [22912/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.070960  [23040/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.119358  [23168/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.134639  [23296/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.126058  [23424/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.126277  [23552/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.114280  [23680/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.096110  [23808/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.111149  [23936/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.125303  [24064/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.100773  [24192/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.146991  [24320/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.115098  [24448/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.090025  [24576/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.096830  [24704/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.095789  [24832/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.146779  [24872/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.146779  [24872/24872]: : 195it [00:12, 16.24it/s]
Epoch 3, time=236.46s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.095773  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.116752  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.155022  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.100028  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.095155  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.123847  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.113446  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.107440  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.128190  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.099410  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.113363  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.092703  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.129406  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.097643  [ 1792/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.081637  [ 1920/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.105902  [ 2048/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.100972  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.100209  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.091705  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.086306  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.108733  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.092021  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.114824  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.101971  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.100559  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.104831  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.111304  [ 3456/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.093903  [ 3584/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.128623  [ 3712/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.124233  [ 3840/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.103535  [ 3968/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.122377  [ 4096/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.115306  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.095817  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.120287  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.085213  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.100989  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.117907  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.094395  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.105650  [ 5120/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.127287  [ 5248/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.138750  [ 5376/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.123998  [ 5504/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.110407  [ 5632/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.113664  [ 5760/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.099429  [ 5888/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.098197  [ 6016/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.091191  [ 6144/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.105902  [ 6272/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.120664  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.141437  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.126964  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.076101  [ 6784/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.116484  [ 6912/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.094742  [ 7040/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.112411  [ 7168/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.089831  [ 7296/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.100991  [ 7424/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.155167  [ 7552/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.133941  [ 7680/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.119206  [ 7808/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.105543  [ 7936/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.101784  [ 8064/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.096085  [ 8192/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.133418  [ 8320/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.106210  [ 8448/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.093895  [ 8576/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.147552  [ 8704/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.087243  [ 8832/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.156310  [ 8960/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.121189  [ 9088/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.115764  [ 9216/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.124492  [ 9344/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.126963  [ 9472/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.124432  [ 9600/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.114537  [ 9728/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.108429  [ 9856/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.109656  [ 9984/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.120960  [10112/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.121768  [10240/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.129859  [10368/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.103853  [10496/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.094527  [10624/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.132651  [10752/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.083706  [10880/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.106801  [11008/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.090775  [11136/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.123774  [11264/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.082434  [11392/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.124011  [11520/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.092589  [11648/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.115287  [11776/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.100992  [11904/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.110120  [12032/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.102789  [12160/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.122817  [12288/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.078775  [12416/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.114761  [12544/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.154039  [12672/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.117400  [12800/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.125357  [12928/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.104627  [13056/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.117017  [13184/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.079760  [13312/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.135770  [13440/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.120972  [13568/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.126105  [13696/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.086944  [13824/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.134212  [13952/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.095726  [14080/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.096567  [14208/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.077348  [14336/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.089476  [14464/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.089030  [14592/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.094709  [14720/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.096561  [14848/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.088089  [14976/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.102665  [15104/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.117732  [15232/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.116001  [15360/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.092786  [15488/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.120790  [15616/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.097581  [15744/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.098279  [15872/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.081567  [16000/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.095743  [16128/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.110129  [16256/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.094847  [16384/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.127687  [16512/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.131729  [16640/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.108397  [16768/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.116477  [16896/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.106595  [17024/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.111949  [17152/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.117197  [17280/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.119948  [17408/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.123975  [17536/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.100788  [17664/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.096331  [17792/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.100552  [17920/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.101883  [18048/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.102852  [18176/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.089266  [18304/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.108225  [18432/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.092448  [18560/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.105987  [18688/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.085369  [18816/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.084743  [18944/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.087964  [19072/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.081903  [19200/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.121035  [19328/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.105449  [19456/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.109307  [19584/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.117960  [19712/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.084615  [19840/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.102304  [19968/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.082229  [20096/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.088304  [20224/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.111533  [20352/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.080760  [20480/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.115795  [20608/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.109970  [20736/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.137034  [20864/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.104878  [20992/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.131528  [21120/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.111493  [21248/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.100254  [21376/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.099310  [21504/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.098161  [21632/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.073454  [21760/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.097872  [21888/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.100735  [22016/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.126354  [22144/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.102897  [22272/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.087911  [22400/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.099909  [22528/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.109609  [22656/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.106766  [22784/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.083233  [22912/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.069284  [23040/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.113453  [23168/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.135038  [23296/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.122645  [23424/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.117060  [23552/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.108473  [23680/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.085530  [23808/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.108587  [23936/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.107182  [24064/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.098679  [24192/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.136856  [24320/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.127295  [24448/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.093150  [24576/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.111618  [24704/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.142508  [24832/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.132718  [24872/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.132718  [24872/24872]: : 195it [00:11, 16.54it/s]
Epoch 4, time=248.24s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.135374  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.111067  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.174731  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.131826  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.089872  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.151521  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.130673  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.117038  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.143231  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.110184  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.121925  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.091830  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.140403  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.093813  [ 1792/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.099321  [ 1920/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.104722  [ 2048/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.120762  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.112117  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.092448  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.099880  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.115898  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.108887  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.135578  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.106474  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.127278  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.114720  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.112347  [ 3456/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.095900  [ 3584/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.143814  [ 3712/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.123865  [ 3840/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.114486  [ 3968/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.127136  [ 4096/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.123374  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.106330  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.129601  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.085254  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.102772  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.132967  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.098226  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.101491  [ 5120/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.127849  [ 5248/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.136067  [ 5376/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.130126  [ 5504/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.100074  [ 5632/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.109950  [ 5760/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.106406  [ 5888/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.087880  [ 6016/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.096539  [ 6144/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.101768  [ 6272/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.119012  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.141337  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.109615  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.075213  [ 6784/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.103281  [ 6912/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.097409  [ 7040/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.097977  [ 7168/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.088561  [ 7296/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.083947  [ 7424/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.139651  [ 7552/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.116696  [ 7680/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.107785  [ 7808/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.098728  [ 7936/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.081264  [ 8064/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.091918  [ 8192/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.134417  [ 8320/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.089920  [ 8448/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.085423  [ 8576/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.116639  [ 8704/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.075259  [ 8832/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.102642  [ 8960/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.109253  [ 9088/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.094938  [ 9216/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.106973  [ 9344/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.105862  [ 9472/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.090705  [ 9600/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.101729  [ 9728/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.076310  [ 9856/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.112538  [ 9984/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.095887  [10112/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.126436  [10240/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.113729  [10368/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.106162  [10496/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.092710  [10624/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.123580  [10752/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.092798  [10880/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.122788  [11008/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.103671  [11136/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.123786  [11264/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.080669  [11392/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.113593  [11520/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.094745  [11648/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.111181  [11776/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.102095  [11904/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.101364  [12032/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.103757  [12160/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.128469  [12288/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.077625  [12416/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.112598  [12544/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.145020  [12672/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.114227  [12800/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.120188  [12928/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.102672  [13056/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.114247  [13184/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.085894  [13312/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.136619  [13440/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.119337  [13568/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.119039  [13696/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.085528  [13824/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.128632  [13952/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.095237  [14080/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.097954  [14208/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.082459  [14336/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.090644  [14464/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.087860  [14592/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.101392  [14720/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.093513  [14848/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.096861  [14976/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.105424  [15104/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.113805  [15232/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.116030  [15360/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.089805  [15488/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.118213  [15616/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.097049  [15744/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.102782  [15872/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.077135  [16000/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.099780  [16128/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.106601  [16256/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.094622  [16384/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.121412  [16512/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.128271  [16640/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.098476  [16768/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.112262  [16896/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.098852  [17024/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.107311  [17152/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.107885  [17280/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.114229  [17408/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.116632  [17536/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.100863  [17664/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.100975  [17792/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.100443  [17920/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.111365  [18048/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.110719  [18176/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.085903  [18304/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.122988  [18432/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.097310  [18560/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.114838  [18688/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.079089  [18816/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.082534  [18944/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.096263  [19072/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.083646  [19200/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.120293  [19328/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.110200  [19456/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.115442  [19584/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.115414  [19712/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.081498  [19840/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.106288  [19968/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.097937  [20096/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.088837  [20224/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.108773  [20352/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.078210  [20480/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.113033  [20608/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.112928  [20736/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.127399  [20864/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.103494  [20992/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.109910  [21120/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.108070  [21248/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.098932  [21376/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.098786  [21504/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.100337  [21632/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.071494  [21760/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.096207  [21888/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.100615  [22016/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.133218  [22144/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.099954  [22272/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.085585  [22400/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.097673  [22528/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.110981  [22656/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.096133  [22784/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.086632  [22912/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.072652  [23040/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.127028  [23168/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.133073  [23296/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.121845  [23424/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.117922  [23552/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.103372  [23680/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.093321  [23808/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.112725  [23936/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.110604  [24064/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.092192  [24192/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.132547  [24320/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.099465  [24448/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.092470  [24576/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.097940  [24704/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.095011  [24832/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.125390  [24872/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.125390  [24872/24872]: : 195it [00:11, 16.51it/s]
Epoch 5, time=260.06s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.086734  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.113330  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.153846  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.105647  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.085745  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.129264  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.115729  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.104496  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.113628  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.107210  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.106721  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.097848  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.140879  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.097755  [ 1792/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.085540  [ 1920/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.112676  [ 2048/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.099909  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.096779  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.090117  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.084252  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.112068  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.089418  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.117494  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.105551  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.101948  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.108484  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.108632  [ 3456/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.091723  [ 3584/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.121193  [ 3712/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.116417  [ 3840/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.103275  [ 3968/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.117264  [ 4096/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.115402  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.099520  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.117024  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.081660  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.094448  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.111009  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.087784  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.091584  [ 5120/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.114355  [ 5248/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.119125  [ 5376/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.115439  [ 5504/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.096546  [ 5632/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.109863  [ 5760/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.094782  [ 5888/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.085500  [ 6016/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.088183  [ 6144/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.089292  [ 6272/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.116973  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.126372  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.106114  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.069902  [ 6784/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.106315  [ 6912/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.095162  [ 7040/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.104339  [ 7168/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.086201  [ 7296/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.087326  [ 7424/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.137546  [ 7552/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.117452  [ 7680/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.106768  [ 7808/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.098493  [ 7936/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.089917  [ 8064/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.091400  [ 8192/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.125611  [ 8320/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.099362  [ 8448/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.083160  [ 8576/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.122269  [ 8704/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.079102  [ 8832/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.129776  [ 8960/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.117235  [ 9088/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.105427  [ 9216/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.109238  [ 9344/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.109366  [ 9472/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.102864  [ 9600/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.113972  [ 9728/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.088334  [ 9856/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.110209  [ 9984/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.116375  [10112/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.122775  [10240/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.143907  [10368/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.098149  [10496/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.105562  [10624/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.111171  [10752/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.110890  [10880/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.100376  [11008/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.112896  [11136/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.122415  [11264/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.080991  [11392/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.121167  [11520/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.090630  [11648/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.120199  [11776/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.095638  [11904/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.120215  [12032/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.097962  [12160/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.124119  [12288/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.077408  [12416/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.111671  [12544/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.150175  [12672/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.112540  [12800/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.131821  [12928/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.098987  [13056/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.117806  [13184/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.078095  [13312/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.136928  [13440/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.108011  [13568/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.121579  [13696/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.081153  [13824/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.127181  [13952/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.086385  [14080/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.087310  [14208/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.070868  [14336/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.081380  [14464/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.081112  [14592/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.088942  [14720/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.086706  [14848/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.083129  [14976/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.102596  [15104/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.109019  [15232/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.106676  [15360/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.086323  [15488/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.106982  [15616/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.093570  [15744/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.094113  [15872/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.088369  [16000/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.092959  [16128/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.115068  [16256/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.091986  [16384/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.121271  [16512/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.119999  [16640/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.100182  [16768/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.113229  [16896/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.095942  [17024/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.109780  [17152/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.095973  [17280/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.115897  [17408/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.112721  [17536/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.094265  [17664/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.095343  [17792/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.098880  [17920/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.104944  [18048/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.098531  [18176/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.080899  [18304/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.111875  [18432/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.088326  [18560/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.098957  [18688/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.076304  [18816/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.077784  [18944/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.090157  [19072/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.082571  [19200/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.111141  [19328/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.106464  [19456/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.097029  [19584/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.113249  [19712/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.079545  [19840/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.089927  [19968/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.091489  [20096/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.088602  [20224/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.104613  [20352/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.072484  [20480/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.103270  [20608/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.109971  [20736/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.129122  [20864/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.094443  [20992/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.115960  [21120/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.103204  [21248/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.094947  [21376/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.092436  [21504/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.089319  [21632/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.070861  [21760/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.089867  [21888/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.098957  [22016/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.118739  [22144/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.100127  [22272/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.087348  [22400/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.096856  [22528/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.109438  [22656/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.092895  [22784/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.092426  [22912/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.073941  [23040/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.108748  [23168/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.133846  [23296/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.123218  [23424/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.108719  [23552/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.100829  [23680/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.082881  [23808/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.114438  [23936/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.104261  [24064/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.091799  [24192/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.143965  [24320/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.092299  [24448/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.088934  [24576/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.084905  [24704/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.093388  [24832/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.119240  [24872/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.119240  [24872/24872]: : 195it [00:11, 16.58it/s]
Epoch 6, time=271.82s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.089978  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.097510  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.134250  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.099042  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.078824  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.111546  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.112137  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.090104  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.112216  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.081722  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.100992  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.080644  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.118227  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.081838  [ 1792/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.080418  [ 1920/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.091068  [ 2048/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.105800  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.086459  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.087434  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.078494  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.102631  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.087619  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.108094  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.104099  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.094748  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.103652  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.114145  [ 3456/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.095169  [ 3584/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.121442  [ 3712/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.117018  [ 3840/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.096357  [ 3968/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.110104  [ 4096/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.105597  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.095251  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.108159  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.077132  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.091196  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.113955  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.089608  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.095208  [ 5120/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.111980  [ 5248/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.112791  [ 5376/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.117910  [ 5504/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.096388  [ 5632/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.099514  [ 5760/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.093931  [ 5888/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.083099  [ 6016/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.087119  [ 6144/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.088613  [ 6272/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.113439  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.113884  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.100652  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.066689  [ 6784/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.095393  [ 6912/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.087704  [ 7040/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.090307  [ 7168/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.081560  [ 7296/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.092488  [ 7424/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.123035  [ 7552/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.102320  [ 7680/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.106450  [ 7808/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.096580  [ 7936/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.084258  [ 8064/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.089448  [ 8192/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.127187  [ 8320/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.082507  [ 8448/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.081428  [ 8576/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.100322  [ 8704/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.073122  [ 8832/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.100338  [ 8960/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.105269  [ 9088/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.097959  [ 9216/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.096389  [ 9344/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.098683  [ 9472/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.094094  [ 9600/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.088014  [ 9728/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.071243  [ 9856/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.100963  [ 9984/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.100369  [10112/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.110423  [10240/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.105602  [10368/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.090433  [10496/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.076926  [10624/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.110837  [10752/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.078361  [10880/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.095650  [11008/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.087548  [11136/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.114357  [11264/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.074214  [11392/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.114810  [11520/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.084231  [11648/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.104605  [11776/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.092587  [11904/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.100420  [12032/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.092173  [12160/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.115180  [12288/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.067031  [12416/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.096756  [12544/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.132679  [12672/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.107433  [12800/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.110645  [12928/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.089449  [13056/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.113611  [13184/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.072472  [13312/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.129041  [13440/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.104773  [13568/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.115107  [13696/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.079521  [13824/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.128622  [13952/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.082759  [14080/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.087243  [14208/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.074627  [14336/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.093604  [14464/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.088274  [14592/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.089390  [14720/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.092463  [14848/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.088297  [14976/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.099423  [15104/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.116228  [15232/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.104428  [15360/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.100301  [15488/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.099587  [15616/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.095550  [15744/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.089489  [15872/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.074197  [16000/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.093647  [16128/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.103075  [16256/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.090960  [16384/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.119221  [16512/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.121708  [16640/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.097291  [16768/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.107872  [16896/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.097154  [17024/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.094398  [17152/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.096166  [17280/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.110396  [17408/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.114369  [17536/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.095184  [17664/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.092425  [17792/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.084882  [17920/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.103864  [18048/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.100141  [18176/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.082516  [18304/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.116720  [18432/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.090227  [18560/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.107126  [18688/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.070657  [18816/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.080097  [18944/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.096103  [19072/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.078128  [19200/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.111681  [19328/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.102159  [19456/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.100453  [19584/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.119626  [19712/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.078265  [19840/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.095018  [19968/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.089357  [20096/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.086828  [20224/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.121732  [20352/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.073293  [20480/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.116543  [20608/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.109598  [20736/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.126783  [20864/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.093351  [20992/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.109621  [21120/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.115976  [21248/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.103990  [21376/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.100838  [21504/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.105953  [21632/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.070661  [21760/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.092944  [21888/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.093128  [22016/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.110312  [22144/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.085797  [22272/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.083524  [22400/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.086886  [22528/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.100244  [22656/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.088413  [22784/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.083506  [22912/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.067924  [23040/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.122870  [23168/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.124282  [23296/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.113686  [23424/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.110884  [23552/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.096449  [23680/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.084182  [23808/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.100223  [23936/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.108465  [24064/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.092804  [24192/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.123724  [24320/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.100985  [24448/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.084884  [24576/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.084692  [24704/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.085637  [24832/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.112173  [24872/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.112173  [24872/24872]: : 195it [00:11, 16.73it/s]
-------------------------------
LR=1e-05, batch_size=256
-------------------------------
Epoch 1, time=283.48s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.094213  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.116586  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.090711  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.098942  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.091897  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.087287  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.097673  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.083586  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.092909  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.079073  [ 2560/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.096799  [ 2816/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.084060  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.090736  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.086189  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.109929  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.099515  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.103230  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.085613  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.098835  [ 4864/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.078482  [ 5120/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.106432  [ 5376/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.093882  [ 5632/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.097403  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.076952  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.095374  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.091011  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.074658  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.084081  [ 7168/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.076612  [ 7424/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.108627  [ 7680/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.084431  [ 7936/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.076183  [ 8192/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.095250  [ 8448/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.085050  [ 8704/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.072636  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.088514  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.087395  [ 9472/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.074884  [ 9728/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.080439  [ 9984/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.087580  [10240/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.090822  [10496/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.084174  [10752/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.074893  [11008/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.089407  [11264/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.080884  [11520/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.080058  [11776/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.081448  [12032/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.093950  [12288/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.073966  [12544/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.108944  [12800/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.090052  [13056/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.081634  [13312/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.107067  [13568/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.086885  [13824/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.093123  [14080/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.069636  [14336/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.069092  [14592/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.078140  [14848/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.079016  [15104/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.098032  [15360/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.084350  [15616/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.084317  [15872/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.076037  [16128/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.088324  [16384/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.109893  [16640/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.093443  [16896/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.087997  [17152/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.097372  [17408/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.097424  [17664/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.083162  [17920/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.085789  [18176/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.085701  [18432/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.083752  [18688/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.070727  [18944/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.074059  [19200/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.097433  [19456/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.089015  [19712/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.074259  [19968/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.078170  [20224/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.077103  [20480/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.092410  [20736/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.096658  [20992/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.091319  [21248/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.085325  [21504/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.073085  [21760/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.082501  [22016/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.093628  [22272/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.079266  [22528/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.084848  [22784/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.063658  [23040/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.106770  [23296/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.104110  [23552/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.078660  [23808/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.092491  [24064/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.094260  [24320/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.080091  [24576/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.073125  [24832/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.106891  [24872/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.106891  [24872/24872]: : 98it [00:08, 11.51it/s]
Epoch 2, time=291.99s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.082805  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.101750  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.084893  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.090809  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.086020  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.082086  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.089244  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.080428  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.088275  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.076363  [ 2560/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.093737  [ 2816/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.079251  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.088477  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.083839  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.105294  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.097829  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.099408  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.083745  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.092101  [ 4864/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.075847  [ 5120/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.105517  [ 5376/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.091249  [ 5632/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.093576  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.075257  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.092287  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.088650  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.072178  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.079838  [ 7168/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.073444  [ 7424/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.098744  [ 7680/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.082209  [ 7936/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.074963  [ 8192/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.091284  [ 8448/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.084904  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.074025  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.085706  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.086985  [ 9472/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.080180  [ 9728/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.078058  [ 9984/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.086914  [10240/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.091526  [10496/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.086341  [10752/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.072348  [11008/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.090982  [11264/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.080346  [11520/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.078043  [11776/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.080256  [12032/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.096111  [12288/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.072834  [12544/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.108325  [12800/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.088881  [13056/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.080755  [13312/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.111474  [13568/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.085013  [13824/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.092690  [14080/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.068797  [14336/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.066769  [14592/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.077365  [14848/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.077262  [15104/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.096980  [15360/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.081922  [15616/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.079205  [15872/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.075253  [16128/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.085663  [16384/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.105200  [16640/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.092458  [16896/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.083709  [17152/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.093082  [17408/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.092877  [17664/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.080572  [17920/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.084242  [18176/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.087011  [18432/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.082381  [18688/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.068107  [18944/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.071558  [19200/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.095729  [19456/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.087071  [19712/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.073448  [19968/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.078611  [20224/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.076077  [20480/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.091027  [20736/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.095250  [20992/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.090685  [21248/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.083847  [21504/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.071178  [21760/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.081360  [22016/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.091122  [22272/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.078179  [22528/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.083583  [22784/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.063274  [23040/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.105656  [23296/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.104004  [23552/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.077727  [23808/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.091738  [24064/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.092727  [24320/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.078961  [24576/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.071645  [24832/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.103781  [24872/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.103781  [24872/24872]: : 98it [00:08, 11.41it/s]
Epoch 3, time=300.59s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.080721  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.100496  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.083580  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.090124  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.085342  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.080981  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.086950  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.079358  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.085183  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.074113  [ 2560/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.091179  [ 2816/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.076848  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.086388  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.082872  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.102404  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.096539  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.094900  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.082795  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.086604  [ 4864/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.075205  [ 5120/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.104727  [ 5376/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.089960  [ 5632/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.087702  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.074787  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.088691  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.087954  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.071115  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.078559  [ 7168/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.072509  [ 7424/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.099391  [ 7680/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.080933  [ 7936/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.074169  [ 8192/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.089439  [ 8448/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.082660  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.070691  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.084248  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.086013  [ 9472/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.080274  [ 9728/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.077178  [ 9984/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.086887  [10240/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.091201  [10496/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.090802  [10752/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.071140  [11008/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.087468  [11264/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.079865  [11520/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.076824  [11776/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.080708  [12032/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.095564  [12288/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.071748  [12544/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.109829  [12800/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.088006  [13056/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.080252  [13312/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.112197  [13568/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.084423  [13824/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.092777  [14080/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.068101  [14336/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.065700  [14592/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.076886  [14848/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.076262  [15104/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.097174  [15360/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.081189  [15616/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.077630  [15872/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.074660  [16128/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.084855  [16384/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.103580  [16640/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.092566  [16896/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.082691  [17152/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.092215  [17408/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.090832  [17664/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.079401  [17920/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.084631  [18176/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.085921  [18432/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.081711  [18688/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.066162  [18944/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.071086  [19200/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.094446  [19456/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.085940  [19712/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.073759  [19968/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.079514  [20224/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.075561  [20480/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.090293  [20736/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.094390  [20992/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.090522  [21248/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.083047  [21504/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.069979  [21760/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.080729  [22016/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.089907  [22272/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.077353  [22528/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.083033  [22784/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.064046  [23040/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.105281  [23296/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.102955  [23552/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.077064  [23808/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.091703  [24064/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.091981  [24320/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.077447  [24576/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.070873  [24832/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.101979  [24872/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.101979  [24872/24872]: : 98it [00:08, 11.26it/s]
Epoch 4, time=309.30s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.079267  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.098893  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.083140  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.089850  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.086182  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.080758  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.084613  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.077202  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.081525  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.072634  [ 2560/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.086762  [ 2816/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.076436  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.085035  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.082315  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.101225  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.094473  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.092112  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.082011  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.085008  [ 4864/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.074412  [ 5120/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.099987  [ 5376/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.089791  [ 5632/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.085194  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.074496  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.086186  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.088266  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.070604  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.078278  [ 7168/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.072087  [ 7424/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.100721  [ 7680/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.080249  [ 7936/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.073933  [ 8192/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.088682  [ 8448/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.080909  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.068724  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.083760  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.085372  [ 9472/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.080210  [ 9728/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.076461  [ 9984/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.087224  [10240/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.090346  [10496/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.093405  [10752/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.070437  [11008/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.085374  [11264/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.079516  [11520/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.076098  [11776/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.081031  [12032/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.094233  [12288/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.071156  [12544/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.112222  [12800/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.087513  [13056/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.079966  [13312/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.108995  [13568/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.084605  [13824/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.093326  [14080/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.067628  [14336/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.065080  [14592/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.076385  [14848/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.075470  [15104/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.098633  [15360/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.080826  [15616/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.076340  [15872/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.074086  [16128/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.084416  [16384/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.102783  [16640/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.093899  [16896/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.082481  [17152/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.091577  [17408/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.089705  [17664/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.078465  [17920/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.085937  [18176/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.083810  [18432/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.081145  [18688/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.064432  [18944/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.071778  [19200/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.093398  [19456/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.085308  [19712/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.074306  [19968/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.079209  [20224/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.075086  [20480/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.089645  [20736/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.093787  [20992/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.090537  [21248/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.082255  [21504/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.068499  [21760/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.080005  [22016/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.088790  [22272/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.076506  [22528/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.082495  [22784/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.064621  [23040/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.104798  [23296/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.101409  [23552/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.076478  [23808/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.092327  [24064/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.092872  [24320/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.077058  [24576/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.070568  [24832/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.101072  [24872/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.101072  [24872/24872]: : 98it [00:08, 11.24it/s]
Epoch 5, time=318.02s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.078784  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.098791  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.081490  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.088289  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.085420  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.079472  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.083717  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.075143  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.078780  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.070884  [ 2560/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.084611  [ 2816/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.075588  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.083793  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.081463  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.099244  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.092508  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.089386  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.080700  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.083627  [ 4864/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.073758  [ 5120/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.096869  [ 5376/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.088522  [ 5632/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.083411  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.073582  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.084750  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.087760  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.069685  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.077658  [ 7168/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.071114  [ 7424/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.100236  [ 7680/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.079620  [ 7936/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.073347  [ 8192/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.087920  [ 8448/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.079876  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.067395  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.083220  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.084438  [ 9472/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.079512  [ 9728/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.076189  [ 9984/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.086984  [10240/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.089412  [10496/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.093171  [10752/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.069701  [11008/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.084410  [11264/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.078874  [11520/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.075257  [11776/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.080646  [12032/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.093012  [12288/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.070566  [12544/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.111865  [12800/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.086537  [13056/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.079540  [13312/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.107472  [13568/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.083933  [13824/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.092945  [14080/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.067196  [14336/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.064374  [14592/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.075735  [14848/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.074984  [15104/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.098305  [15360/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.080056  [15616/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.075088  [15872/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.073740  [16128/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.083704  [16384/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.102047  [16640/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.093668  [16896/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.081837  [17152/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.091042  [17408/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.089022  [17664/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.077224  [17920/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.086085  [18176/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.082645  [18432/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.080524  [18688/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.063452  [18944/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.071610  [19200/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.092826  [19456/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.084497  [19712/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.073584  [19968/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.078745  [20224/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.074558  [20480/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.089126  [20736/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.093276  [20992/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.090214  [21248/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.081436  [21504/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.067355  [21760/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.079273  [22016/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.087659  [22272/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.075767  [22528/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.082192  [22784/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.064333  [23040/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.104009  [23296/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.100599  [23552/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.075846  [23808/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.092407  [24064/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.091440  [24320/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.075919  [24576/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.070175  [24832/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.100423  [24872/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.100423  [24872/24872]: : 98it [00:08, 11.33it/s]
Epoch 6, time=326.67s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.078181  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.097793  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.080580  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.087432  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.084689  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.078568  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.082771  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.074167  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.077487  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.070069  [ 2560/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.083533  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.075839  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.083339  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.080968  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.099202  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.091588  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.089384  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.079473  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.083145  [ 4864/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.073589  [ 5120/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.094968  [ 5376/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.088257  [ 5632/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.082600  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.073406  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.084197  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.087514  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.069199  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.077134  [ 7168/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.071067  [ 7424/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.099126  [ 7680/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.079061  [ 7936/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.073193  [ 8192/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.087081  [ 8448/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.079429  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.066909  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.082242  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.083784  [ 9472/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.079159  [ 9728/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.075567  [ 9984/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.086876  [10240/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.088948  [10496/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.092778  [10752/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.069180  [11008/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.083891  [11264/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.078392  [11520/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.074676  [11776/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.080393  [12032/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.092405  [12288/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.069990  [12544/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.111550  [12800/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.085647  [13056/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.078889  [13312/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.106112  [13568/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.083363  [13824/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.092432  [14080/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.066444  [14336/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.063787  [14592/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.075055  [14848/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.074396  [15104/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.097767  [15360/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.079408  [15616/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.074024  [15872/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.073348  [16128/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.083116  [16384/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.101377  [16640/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.093296  [16896/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.081314  [17152/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.090512  [17408/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.088511  [17664/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.076117  [17920/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.086020  [18176/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.081778  [18432/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.079907  [18688/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.062807  [18944/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.071391  [19200/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.092179  [19456/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.083607  [19712/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.072706  [19968/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.078236  [20224/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.073965  [20480/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.088561  [20736/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.092750  [20992/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.089845  [21248/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.080725  [21504/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.066760  [21760/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.078606  [22016/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.086789  [22272/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.075057  [22528/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.081731  [22784/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.064044  [23040/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.103373  [23296/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.099864  [23552/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.075277  [23808/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.092325  [24064/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.089697  [24320/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.075146  [24576/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.070050  [24832/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.099753  [24872/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.099753  [24872/24872]: : 98it [00:08, 11.35it/s]
Done!

test the network#

Do some qualitative tests: Let the trained network predict some particle geometries and compare their Mie spectra with the traget spectrum.

# pick a few of the training samples for testing.
# Note: Ideally tests should be done on separate samples!
sca_test = q_sca_target_test
pred = model(sca_test)

# evaluate Mie
r_c_test, r_s_test, eps_c_test, eps_s_test = nn_pred_to_mie_geometry(pred)
res_mie = pmd.coreshell.cross_sections(
    k0,
    r_c=r_c_test,
    eps_c=eps_c_test,
    r_s=r_s_test,
    eps_s=eps_s_test,
    eps_env=eps_env,
    n_max=n_max,
)

# plot
i_plot = np.random.randint(len(sca_test), size=4)
plt.figure(figsize=(12, 10))
for i_n, i in enumerate(i_plot):
    plt.subplot(2, 2, i_n + 1)
    plt.plot(
        wl0.detach().cpu().numpy(),
        sca_test[i].detach().cpu().numpy(),
        label="reference",
    )
    plt.plot(
        wl0.detach().cpu().numpy(),
        res_mie["q_sca"][i].detach().cpu().numpy(),
        label="predicted particle",
    )
    plt.legend()
    plt.xlabel("wavelength (nm)")
    plt.ylabel("scat. efficiency")
plt.show()
ex 05 tandem

Total running time of the script: (5 minutes 41.364 seconds)

Estimated memory usage: 5356 MB

Gallery generated by Sphinx-Gallery