Mie-informed tandem neural network#

Here, we demonstrate how to train a design generator network capable to suggest core-shell particles with specific spectral response using PyMieDiff as differentiable forward-evaluator. The training pipeline follows the “Tandem” model:

target spectrum –> generator NN –> design –> Mie –> real spectrum

training loss is: MSE(target spec., real spec.)

author: O. Jackson, P. Wiecha, 06/2025

imports#

import time

import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
from torch import nn

import pymiediff as pmd

setup optimiation target#

We setup the main configuration here: torch device, parameter limits and wavelengths

# torch compute device
device = "cpu"

# general config
N_samples = 25000
n_max = 4  # maximum Mie order fixed for performance
eps_env = torch.tensor(1.0, device=device)

lim_r = torch.as_tensor([40, 100], device=device)
lim_n_re = torch.as_tensor([1.5, 4.0], device=device)
lim_n_im = torch.as_tensor([0.0, 0.1], device=device)

wl0 = torch.linspace(400, 800, 40, device=device)
k0 = 2 * torch.pi / wl0

generate reference spectra#

we generate a large number of reference Mie spectra for existing particles, that will be used as design targets during training.

Note: this step could also be done without any physics knowledge, for example with artificial spectra (e.g. Lorentzians), or a scattering maximization loss.

# datagen: generate existing spectra (won't use the geometries for training)
r_c = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
d_s = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
r_s = r_c + d_s
n_re = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_re)[0] + lim_n_re[0]
n_im = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_im)[0] + lim_n_im[0]
n = n_re + 1j * n_im

# low-level API: permittivity required as spectra (for vectorization)
eps_c = torch.ones_like(k0).unsqueeze(0) * n[:, 0].unsqueeze(1) ** 2
eps_s = torch.ones_like(k0).unsqueeze(0) * n[:, 1].unsqueeze(1) ** 2

all_particles = pmd.multishell.cross_sections(
    k0,
    r_c=r_c,
    eps_c=eps_c,
    r_s=r_s,
    eps_s=eps_s,
    eps_env=eps_env,
    n_max=n_max,
)

N_test = 128  # keep a few samples for testing
q_sca_target = all_particles["q_sca"][N_test:].to(dtype=torch.float32)
q_sca_target_test = all_particles["q_sca"][:N_test].to(dtype=torch.float32)

plt.plot(q_sca_target[30].detach().cpu().numpy())  # plot some test sample
ex 09 tandem
[<matplotlib.lines.Line2D object at 0x7f5d621a5340>]

Neural network classes / functions#

define the network model (simple MLP) and training loop

class FullyConnected(nn.Module):
    def __init__(self, hidden_dim=1024):
        super().__init__()
        self.fc_in = nn.Linear(len(k0), hidden_dim)
        self.relu1 = nn.ReLU()
        self.fc_1 = nn.Linear(hidden_dim, hidden_dim)
        self.relu2 = nn.ReLU()
        self.fc_2 = nn.Linear(hidden_dim, hidden_dim)
        self.relu3 = nn.ReLU()
        self.fc_out = nn.Linear(hidden_dim, 6)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc_in(x)
        x = self.relu1(x)
        x = self.fc_1(x)
        x = self.relu2(x)
        x = self.fc_2(x)
        x = self.relu3(x)
        x = self.fc_out(x)
        x = self.sigmoid(x)
        return x


def nn_pred_to_mie_geometry(pred):
    # implicit normalization: multiply by user-defined limits
    r_c = lim_r.max() * (pred[:, 0])
    r_s = lim_r.max() * (pred[:, 0] + pred[:, 1])
    n_c = lim_n_re.max() * pred[:, 2] + lim_n_im.max() * (1j * pred[:, 3])
    n_s = lim_n_re.max() * pred[:, 4] + lim_n_im.max() * (1j * pred[:, 5])

    eps_c = torch.ones_like(k0).unsqueeze(0) * n_c.unsqueeze(1) ** 2
    eps_s = torch.ones_like(k0).unsqueeze(0) * n_s.unsqueeze(1) ** 2

    return r_c, r_s, eps_c, eps_s


def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    prog_bar = tqdm(enumerate(dataloader), total=size // dataloader.batch_size)
    for i_batch, X in prog_bar:
        # model prediction: generate core-shell particles
        pred = model(X)

        # evaluate Mie
        r_c, r_s, eps_c, eps_s = nn_pred_to_mie_geometry(pred)
        res_mie = pmd.multishell.cross_sections(
            k0,
            r_c=r_c,
            eps_c=eps_c,
            r_s=r_s,
            eps_s=eps_s,
            eps_env=eps_env,
            n_max=n_max,
        )
        q_sca_mie = res_mie["q_sca"].to(dtype=torch.float32)

        # calc. loss
        loss = loss_fn(q_sca_mie, X)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # if i_batch % 100 == 0:
        loss, current = loss.item(), i_batch * dataloader.batch_size + len(X)
        prog_bar.set_description(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

training the Mie-informed network#

here we use some simple, manually optimized training schedule.

model = FullyConnected().to(device)

confs = [
    dict(bs=32, lr=1e-4, n_ep=5),
    dict(bs=64, lr=1e-4, n_ep=5),
    dict(bs=128, lr=1e-4, n_ep=6),
    dict(bs=256, lr=1e-5, n_ep=6),
]

t_start = time.time()
for conf in confs:
    learning_rate = conf["lr"]
    batch_size = conf["bs"]
    epochs = conf["n_ep"]
    print("-------------------------------")
    print(f"LR={learning_rate}, batch_size={batch_size}")
    print("-------------------------------")

    loss_fn = nn.MSELoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
    train_dataloader = torch.utils.data.DataLoader(q_sca_target, batch_size=batch_size)
    for t in range(epochs):
        print(f"Epoch {t+1}, time={time.time()-t_start:.2f}s")
        train_loop(train_dataloader, model, loss_fn, optimizer)
print("Done!")
-------------------------------
LR=0.0001, batch_size=32
-------------------------------
Epoch 1, time=0.00s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 6.164481  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.737760  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.875986  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.720785  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.874957  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.450446  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.243979  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.964936  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.352512  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.009158  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.603234  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.629543  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.831257  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.614799  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.220146  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.890595  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.974652  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.636398  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.938516  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.319782  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.623832  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.313473  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.091473  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.252733  [  768/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.260256  [  800/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.270293  [  832/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.324409  [  864/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.109571  [  896/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.162570  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.819867  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.104579  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.353768  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.453144  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.992753  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.958361  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.169763  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.124153  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.091988  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.108627  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.054809  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.978839  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.858665  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.057194  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.104313  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.858314  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.017307  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.961133  [ 1504/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.002170  [ 1536/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.022681  [ 1568/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.068828  [ 1600/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.915891  [ 1632/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.022553  [ 1664/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.121984  [ 1696/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.817024  [ 1728/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.849087  [ 1760/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.732544  [ 1792/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.140341  [ 1824/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.233776  [ 1856/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.670891  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.960571  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.897371  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.909648  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.818682  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.795169  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.679846  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.670422  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.963210  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.920963  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.765692  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.568434  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.954701  [ 2272/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.133265  [ 2304/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.247346  [ 2336/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.753909  [ 2368/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.709689  [ 2400/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.883656  [ 2432/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.999551  [ 2464/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.742481  [ 2496/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 1.023800  [ 2528/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.774954  [ 2560/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.763109  [ 2592/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.916699  [ 2624/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.746391  [ 2656/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.915866  [ 2688/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 1.024970  [ 2720/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.610243  [ 2752/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.741972  [ 2784/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.794500  [ 2816/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.667142  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.648914  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.652748  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.900908  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.967347  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.755115  [ 3008/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 1.019181  [ 3040/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.874483  [ 3072/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.882794  [ 3104/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.752526  [ 3136/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.758126  [ 3168/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.665612  [ 3200/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.589667  [ 3232/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.696020  [ 3264/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.894405  [ 3296/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.757941  [ 3328/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.878593  [ 3360/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.797711  [ 3392/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.715678  [ 3424/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.736084  [ 3456/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.708245  [ 3488/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.708949  [ 3520/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.720824  [ 3552/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.984631  [ 3584/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.786663  [ 3616/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.931745  [ 3648/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.868698  [ 3680/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.770071  [ 3712/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.616093  [ 3744/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.638998  [ 3776/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.513895  [ 3808/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.795441  [ 3840/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.810363  [ 3872/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.933426  [ 3904/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.918894  [ 3936/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.634410  [ 3968/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.714248  [ 4000/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.619162  [ 4032/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.742969  [ 4064/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.628853  [ 4096/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.680418  [ 4128/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.737965  [ 4160/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.613960  [ 4192/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.697630  [ 4224/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.745663  [ 4256/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.880557  [ 4288/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.617042  [ 4320/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.718834  [ 4352/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.763029  [ 4384/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.609054  [ 4416/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.792580  [ 4448/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.542121  [ 4480/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.532433  [ 4512/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.588701  [ 4544/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.702374  [ 4576/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.691495  [ 4608/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.728311  [ 4640/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.504064  [ 4672/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.487640  [ 4704/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.949446  [ 4736/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.791738  [ 4768/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.409219  [ 4800/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.632877  [ 4832/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.585334  [ 4864/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.568657  [ 4896/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.671711  [ 4928/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.601464  [ 4960/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.545707  [ 4992/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.774876  [ 5024/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.691815  [ 5056/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.612153  [ 5088/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.639408  [ 5120/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.535402  [ 5152/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.698626  [ 5184/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.855511  [ 5216/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.629847  [ 5248/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.685521  [ 5280/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.747082  [ 5312/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.873829  [ 5344/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.538996  [ 5376/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.603878  [ 5408/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.576532  [ 5440/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.569921  [ 5472/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.601796  [ 5504/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.664271  [ 5536/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.591459  [ 5568/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.568649  [ 5600/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.470834  [ 5632/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.614080  [ 5664/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.726296  [ 5696/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.596752  [ 5728/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.555599  [ 5760/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.477164  [ 5792/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.511473  [ 5824/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.721067  [ 5856/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.460814  [ 5888/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.587306  [ 5920/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.559756  [ 5952/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.735799  [ 5984/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.604562  [ 6016/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.759143  [ 6048/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.567662  [ 6080/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.514022  [ 6112/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.627488  [ 6144/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.646896  [ 6176/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.553794  [ 6208/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.461629  [ 6240/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.697395  [ 6272/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.553990  [ 6304/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.471745  [ 6336/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.520045  [ 6368/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.790232  [ 6400/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.546048  [ 6432/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.674127  [ 6464/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.518294  [ 6496/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.405453  [ 6528/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.448474  [ 6560/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.683180  [ 6592/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.421975  [ 6624/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.660372  [ 6656/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.573604  [ 6688/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.593175  [ 6720/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.816774  [ 6752/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.633772  [ 6784/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.687386  [ 6816/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.610047  [ 6848/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.601501  [ 6880/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.497183  [ 6912/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.605369  [ 6944/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.594093  [ 6976/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.729000  [ 7008/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.499710  [ 7040/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.764414  [ 7072/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.507794  [ 7104/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.532414  [ 7136/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.574363  [ 7168/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.601733  [ 7200/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.600877  [ 7232/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.740827  [ 7264/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.628635  [ 7296/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.533615  [ 7328/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.681888  [ 7360/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.467761  [ 7392/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.509412  [ 7424/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.543921  [ 7456/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.519471  [ 7488/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.562687  [ 7520/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.476441  [ 7552/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.499534  [ 7584/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.592533  [ 7616/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.630596  [ 7648/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.511322  [ 7680/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.675612  [ 7712/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.470358  [ 7744/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.426941  [ 7776/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.531452  [ 7808/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.602817  [ 7840/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.591064  [ 7872/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.464712  [ 7904/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.610520  [ 7936/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.471653  [ 7968/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.381827  [ 8000/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.545269  [ 8032/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.566303  [ 8064/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.447763  [ 8096/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.456645  [ 8128/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.557786  [ 8160/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.667689  [ 8192/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.570170  [ 8224/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.547727  [ 8256/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.524546  [ 8288/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.476321  [ 8320/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.494603  [ 8352/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.500116  [ 8384/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.384330  [ 8416/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.437409  [ 8448/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.660500  [ 8480/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.483352  [ 8512/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.342019  [ 8544/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.485371  [ 8576/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.539697  [ 8608/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.645562  [ 8640/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.551717  [ 8672/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.499212  [ 8704/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.481490  [ 8736/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.558849  [ 8768/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.499785  [ 8800/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.441693  [ 8832/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.528619  [ 8864/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.484029  [ 8896/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.336259  [ 8928/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.485258  [ 8960/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.446639  [ 8992/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.488524  [ 9024/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.446786  [ 9056/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.442423  [ 9088/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.557848  [ 9120/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.432377  [ 9152/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.547403  [ 9184/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.600596  [ 9216/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.350248  [ 9248/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.393484  [ 9280/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.520538  [ 9312/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.298799  [ 9344/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.404730  [ 9376/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.512682  [ 9408/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.533546  [ 9440/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.573727  [ 9472/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.463179  [ 9504/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.565591  [ 9536/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.611485  [ 9568/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.507239  [ 9600/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.553235  [ 9632/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.614904  [ 9664/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.500576  [ 9696/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.403709  [ 9728/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.578954  [ 9760/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.422608  [ 9792/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.490583  [ 9824/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.454019  [ 9856/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.550727  [ 9888/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.433561  [ 9920/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.483665  [ 9952/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.551014  [ 9984/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.471864  [10016/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.480280  [10048/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.591849  [10080/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.494830  [10112/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.534624  [10144/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.423455  [10176/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.377640  [10208/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.442957  [10240/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.565998  [10272/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.494577  [10304/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.541149  [10336/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.482146  [10368/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.492007  [10400/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.492258  [10432/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.321000  [10464/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.448232  [10496/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.491537  [10528/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.364877  [10560/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.495876  [10592/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.481563  [10624/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.479789  [10656/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.443664  [10688/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.502584  [10720/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.385685  [10752/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.363244  [10784/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.344523  [10816/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.361157  [10848/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.307963  [10880/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.479893  [10912/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.564132  [10944/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.255729  [10976/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.493022  [11008/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.359864  [11040/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.469917  [11072/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.486194  [11104/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.566673  [11136/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.513065  [11168/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.544822  [11200/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.527525  [11232/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.381965  [11264/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.343938  [11296/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.401624  [11328/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.432339  [11360/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.520971  [11392/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.505763  [11424/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.271518  [11456/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.405863  [11488/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.321346  [11520/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.419883  [11552/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.466337  [11584/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.383390  [11616/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.416236  [11648/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.387445  [11680/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.471862  [11712/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.531733  [11744/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.351787  [11776/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.333254  [11808/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.305919  [11840/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.387773  [11872/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.353196  [11904/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.480133  [11936/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.401466  [11968/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.417345  [12000/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.425895  [12032/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.478947  [12064/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.399976  [12096/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.451708  [12128/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.364525  [12160/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.439444  [12192/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.382794  [12224/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.431667  [12256/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.316090  [12288/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.400298  [12320/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.462672  [12352/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.342367  [12384/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.330446  [12416/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.476799  [12448/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.450942  [12480/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.336902  [12512/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.364853  [12544/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.431173  [12576/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.550652  [12608/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.403326  [12640/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.353696  [12672/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.300439  [12704/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.329600  [12736/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.402558  [12768/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.579772  [12800/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.608851  [12832/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.773836  [12864/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.404577  [12896/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.387139  [12928/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.575440  [12960/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.559383  [12992/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.410424  [13024/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.546545  [13056/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.308444  [13088/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.371378  [13120/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.360736  [13152/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.369335  [13184/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.496179  [13216/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.308689  [13248/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.499237  [13280/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.374767  [13312/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.451602  [13344/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.439136  [13376/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.370242  [13408/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.392848  [13440/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.410021  [13472/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.420327  [13504/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.457675  [13536/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.355162  [13568/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.474337  [13600/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.264589  [13632/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.457975  [13664/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.394797  [13696/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.262375  [13728/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.446653  [13760/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.428418  [13792/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.275490  [13824/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.492444  [13856/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.319403  [13888/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.327027  [13920/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.199375  [13952/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.426176  [13984/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.452906  [14016/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.455700  [14048/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.421430  [14080/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.457690  [14112/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.336038  [14144/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.315600  [14176/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.431287  [14208/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.413768  [14240/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.426661  [14272/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.474721  [14304/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.526632  [14336/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.470570  [14368/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.421071  [14400/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.447360  [14432/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.596888  [14464/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.347469  [14496/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.516718  [14528/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.592666  [14560/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.386945  [14592/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.458444  [14624/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.661369  [14656/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.504070  [14688/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.435507  [14720/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.468812  [14752/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.559835  [14784/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.558869  [14816/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.604500  [14848/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.476561  [14880/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.486849  [14912/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.374346  [14944/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.424815  [14976/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.486685  [15008/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.532743  [15040/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.376641  [15072/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.489207  [15104/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.283214  [15136/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.396738  [15168/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.306378  [15200/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.622631  [15232/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.449364  [15264/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.425564  [15296/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.467183  [15328/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.367500  [15360/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.479868  [15392/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.480306  [15424/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.419648  [15456/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.414952  [15488/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.314269  [15520/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.337400  [15552/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.349557  [15584/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.436922  [15616/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.452547  [15648/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.494932  [15680/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.457965  [15712/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.471573  [15744/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.378888  [15776/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.369957  [15808/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.453205  [15840/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.251479  [15872/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.290858  [15904/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.426465  [15936/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.525461  [15968/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.442222  [16000/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.505852  [16032/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.358162  [16064/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.301516  [16096/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.375555  [16128/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.524034  [16160/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.488457  [16192/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.428641  [16224/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.439230  [16256/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.438243  [16288/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.463229  [16320/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.503034  [16352/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.382813  [16384/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.419839  [16416/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.333028  [16448/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.446058  [16480/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.391882  [16512/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.317515  [16544/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.222456  [16576/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.323043  [16608/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.538504  [16640/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.354220  [16672/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.331572  [16704/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.501393  [16736/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.443438  [16768/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.346256  [16800/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.449919  [16832/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.355996  [16864/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.404949  [16896/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.389284  [16928/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.335724  [16960/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.545809  [16992/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.445820  [17024/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.407232  [17056/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.383329  [17088/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.437687  [17120/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.428839  [17152/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.401619  [17184/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.465694  [17216/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.408829  [17248/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.413443  [17280/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.416003  [17312/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.503340  [17344/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.566891  [17376/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.173165  [17408/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.434798  [17440/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.293052  [17472/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.394416  [17504/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.452694  [17536/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.366316  [17568/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.450925  [17600/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.445385  [17632/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.285724  [17664/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.430205  [17696/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.450709  [17728/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.488189  [17760/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.302865  [17792/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.324937  [17824/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.504436  [17856/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.444063  [17888/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.330172  [17920/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.344504  [17952/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.546266  [17984/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.327963  [18016/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.324779  [18048/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.358217  [18080/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.416842  [18112/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.294857  [18144/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.332755  [18176/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.457839  [18208/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.389204  [18240/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.394812  [18272/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.467936  [18304/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.473143  [18336/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.354342  [18368/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.318570  [18400/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.568328  [18432/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.418184  [18464/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.370156  [18496/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.444739  [18528/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.523204  [18560/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.311710  [18592/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.305019  [18624/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.335084  [18656/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.436217  [18688/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.396871  [18720/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.400851  [18752/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.479577  [18784/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.386131  [18816/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.454749  [18848/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.341027  [18880/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.304143  [18912/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.397492  [18944/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.550720  [18976/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.405478  [19008/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.310278  [19040/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.367927  [19072/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.503123  [19104/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.413198  [19136/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.334535  [19168/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.310399  [19200/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.382061  [19232/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.228281  [19264/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.483055  [19296/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.388800  [19328/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.372158  [19360/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.359320  [19392/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.240768  [19424/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.342772  [19456/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.386745  [19488/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.570611  [19520/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.396872  [19552/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.307266  [19584/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.395296  [19616/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.294635  [19648/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.197110  [19680/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.291161  [19712/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.292942  [19744/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.306654  [19776/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.407621  [19808/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.293375  [19840/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.349851  [19872/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.291374  [19904/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.392952  [19936/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.365261  [19968/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.401104  [20000/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.425249  [20032/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.355111  [20064/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.269007  [20096/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.459430  [20128/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.341830  [20160/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.299207  [20192/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.333568  [20224/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.322445  [20256/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.316737  [20288/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.343932  [20320/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.471067  [20352/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.318223  [20384/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.272798  [20416/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.359059  [20448/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.425376  [20480/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.340415  [20512/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.366516  [20544/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.298011  [20576/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.278228  [20608/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.280206  [20640/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.320808  [20672/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.295453  [20704/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.452652  [20736/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.209483  [20768/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.397573  [20800/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.416354  [20832/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.342583  [20864/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.415048  [20896/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.438201  [20928/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.368795  [20960/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.380330  [20992/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.423544  [21024/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.404173  [21056/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.272423  [21088/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.408709  [21120/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.416119  [21152/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.416682  [21184/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.362662  [21216/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.361368  [21248/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.516891  [21280/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.376424  [21312/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.318392  [21344/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.323624  [21376/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.381395  [21408/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.376439  [21440/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.347040  [21472/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.291771  [21504/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.268036  [21536/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.277370  [21568/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.321673  [21600/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.372983  [21632/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.432959  [21664/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.218108  [21696/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.291943  [21728/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.324434  [21760/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.231176  [21792/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.340811  [21824/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.269099  [21856/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.292310  [21888/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.411023  [21920/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.217042  [21952/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.434015  [21984/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.386317  [22016/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.330260  [22048/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.297162  [22080/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.336870  [22112/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.358483  [22144/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.302095  [22176/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.300444  [22208/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.392768  [22240/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.240118  [22272/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.282357  [22304/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.350900  [22336/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.362419  [22368/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.384580  [22400/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.412200  [22432/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.293520  [22464/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.347582  [22496/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.289150  [22528/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.411087  [22560/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.370978  [22592/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.374850  [22624/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.309391  [22656/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.389198  [22688/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.399159  [22720/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.339568  [22752/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.343452  [22784/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.391304  [22816/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.374068  [22848/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.343143  [22880/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.396483  [22912/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.397452  [22944/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.506754  [22976/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.335878  [23008/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.397195  [23040/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.268821  [23072/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.326466  [23104/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.292304  [23136/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.343123  [23168/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.407657  [23200/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.402419  [23232/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.401593  [23264/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.406507  [23296/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.402965  [23328/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.414556  [23360/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.332363  [23392/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.320690  [23424/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.257641  [23456/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.366760  [23488/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.328922  [23520/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.345113  [23552/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.256553  [23584/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.339989  [23616/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.314803  [23648/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.414970  [23680/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.315931  [23712/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.455096  [23744/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.222365  [23776/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.344899  [23808/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.376577  [23840/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.457034  [23872/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.376500  [23904/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.361700  [23936/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.401792  [23968/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.263831  [24000/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.409723  [24032/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.353604  [24064/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.403589  [24096/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.379990  [24128/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.310417  [24160/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.249213  [24192/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.327041  [24224/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.350532  [24256/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.357905  [24288/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.289430  [24320/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.269042  [24352/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.319808  [24384/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.311022  [24416/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.375248  [24448/24872]:   0%|          | 0/777 [00:30<?, ?it/s]
loss: 0.375248  [24448/24872]:  98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.350070  [24480/24872]:  98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.268360  [24512/24872]:  98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.315079  [24544/24872]:  98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.262206  [24576/24872]:  98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.436917  [24608/24872]:  98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.294415  [24640/24872]:  98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.242801  [24672/24872]:  98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.342124  [24704/24872]:  98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.479245  [24736/24872]:  98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.214594  [24768/24872]:  98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.261813  [24800/24872]:  98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.429851  [24832/24872]:  98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.491314  [24864/24872]:  98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.479038  [24872/24872]:  98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.479038  [24872/24872]: : 778it [00:30, 25.47it/s]
Epoch 2, time=30.54s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.333662  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.349014  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.360450  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.276553  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.290454  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.302002  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.314912  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.277260  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.400150  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.258316  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.418012  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.295628  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.358790  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.321909  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.337019  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.173866  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.311167  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.272052  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.329776  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.290595  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.267222  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.382368  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.192103  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.386307  [  768/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.294742  [  800/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.310129  [  832/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.360554  [  864/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.353763  [  896/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.502205  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.292156  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.357291  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.473111  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.427209  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.288267  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.324332  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.331146  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.356051  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.314671  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.297612  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.217868  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.271724  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.286501  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.275884  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.373024  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.387251  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.358557  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.278860  [ 1504/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.424750  [ 1536/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.361930  [ 1568/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.347943  [ 1600/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.403776  [ 1632/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.311494  [ 1664/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.310218  [ 1696/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.381897  [ 1728/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.315516  [ 1760/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.267412  [ 1792/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.374829  [ 1824/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.370299  [ 1856/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.243289  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.324852  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.322733  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.339508  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.305992  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.291732  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.281408  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.225140  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.430692  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.319322  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.286176  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.224133  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.262513  [ 2272/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.583305  [ 2304/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.447943  [ 2336/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.334720  [ 2368/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.230506  [ 2400/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.306729  [ 2432/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.335475  [ 2464/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.365699  [ 2496/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.331867  [ 2528/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.305401  [ 2560/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.412135  [ 2592/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.316049  [ 2624/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.254667  [ 2656/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.358729  [ 2688/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.487864  [ 2720/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.215994  [ 2752/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.288173  [ 2784/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.288142  [ 2816/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.233857  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.277111  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.267570  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.273528  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.501639  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.328003  [ 3008/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.420068  [ 3040/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.326933  [ 3072/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.378846  [ 3104/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.314695  [ 3136/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.304697  [ 3168/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.219396  [ 3200/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.217817  [ 3232/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.258255  [ 3264/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.381601  [ 3296/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.308898  [ 3328/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.327224  [ 3360/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.368106  [ 3392/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.281741  [ 3424/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.322092  [ 3456/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.339191  [ 3488/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.312534  [ 3520/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.328836  [ 3552/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.391011  [ 3584/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.288623  [ 3616/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.443602  [ 3648/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.331256  [ 3680/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.303500  [ 3712/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.344091  [ 3744/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.277718  [ 3776/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.198588  [ 3808/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.358906  [ 3840/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.258335  [ 3872/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.390056  [ 3904/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.321377  [ 3936/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.323549  [ 3968/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.354413  [ 4000/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.259258  [ 4032/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.381091  [ 4064/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.221828  [ 4096/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.279956  [ 4128/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.264845  [ 4160/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.227555  [ 4192/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.312054  [ 4224/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.267738  [ 4256/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.353125  [ 4288/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.338191  [ 4320/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.271698  [ 4352/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.357593  [ 4384/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.308508  [ 4416/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.330600  [ 4448/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.256457  [ 4480/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.282604  [ 4512/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.250066  [ 4544/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.297412  [ 4576/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.276115  [ 4608/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.302953  [ 4640/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.255195  [ 4672/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.189585  [ 4704/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.373437  [ 4736/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.455258  [ 4768/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.185465  [ 4800/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.324415  [ 4832/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.296348  [ 4864/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.203471  [ 4896/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.320163  [ 4928/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.327058  [ 4960/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.259402  [ 4992/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.312399  [ 5024/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.300966  [ 5056/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.177044  [ 5088/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.243214  [ 5120/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.189319  [ 5152/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.380807  [ 5184/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.486743  [ 5216/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.276429  [ 5248/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.276338  [ 5280/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.421337  [ 5312/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.486388  [ 5344/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.310246  [ 5376/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.297424  [ 5408/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.329259  [ 5440/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.301472  [ 5472/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.284946  [ 5504/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.368204  [ 5536/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.263718  [ 5568/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.218967  [ 5600/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.295736  [ 5632/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.342689  [ 5664/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.361587  [ 5696/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.399150  [ 5728/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.296118  [ 5760/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.259203  [ 5792/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.315267  [ 5824/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.296735  [ 5856/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.283577  [ 5888/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.331517  [ 5920/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.293614  [ 5952/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.363191  [ 5984/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.283115  [ 6016/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.435685  [ 6048/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.297982  [ 6080/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.284114  [ 6112/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.355839  [ 6144/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.354456  [ 6176/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.299152  [ 6208/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.256803  [ 6240/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.413729  [ 6272/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.320670  [ 6304/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.266943  [ 6336/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.408054  [ 6368/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.411520  [ 6400/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.270302  [ 6432/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.372657  [ 6464/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.285970  [ 6496/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.213246  [ 6528/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.236605  [ 6560/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.429324  [ 6592/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.273269  [ 6624/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.334261  [ 6656/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.295368  [ 6688/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.364055  [ 6720/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.520895  [ 6752/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.340066  [ 6784/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.330878  [ 6816/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.335517  [ 6848/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.234673  [ 6880/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.229231  [ 6912/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.377571  [ 6944/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.283670  [ 6976/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.366444  [ 7008/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.250581  [ 7040/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.337642  [ 7072/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.265040  [ 7104/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.285162  [ 7136/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.397851  [ 7168/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.321533  [ 7200/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.317888  [ 7232/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.487886  [ 7264/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.367594  [ 7296/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.307303  [ 7328/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.282627  [ 7360/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.309760  [ 7392/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.374023  [ 7424/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.292146  [ 7456/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.263324  [ 7488/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.289356  [ 7520/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.329887  [ 7552/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.301179  [ 7584/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.325360  [ 7616/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.326158  [ 7648/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.265991  [ 7680/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.367283  [ 7712/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.285715  [ 7744/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.248153  [ 7776/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.356849  [ 7808/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.381140  [ 7840/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.301175  [ 7872/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.300012  [ 7904/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.319923  [ 7936/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.251021  [ 7968/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.190480  [ 8000/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.262860  [ 8032/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.342131  [ 8064/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.260703  [ 8096/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.275862  [ 8128/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.297545  [ 8160/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.356133  [ 8192/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.323678  [ 8224/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.273888  [ 8256/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.264468  [ 8288/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.286691  [ 8320/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.201698  [ 8352/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.239893  [ 8384/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.212030  [ 8416/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.217940  [ 8448/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.332626  [ 8480/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.250826  [ 8512/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.224092  [ 8544/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.294648  [ 8576/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.334474  [ 8608/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.244143  [ 8640/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.272304  [ 8672/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.248776  [ 8704/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.236801  [ 8736/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.351312  [ 8768/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.282992  [ 8800/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.284401  [ 8832/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.315879  [ 8864/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.257987  [ 8896/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.218293  [ 8928/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.283785  [ 8960/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.238112  [ 8992/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.270765  [ 9024/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.201032  [ 9056/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.256824  [ 9088/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.291213  [ 9120/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.250036  [ 9152/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.294276  [ 9184/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.371360  [ 9216/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.216326  [ 9248/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.254083  [ 9280/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.288402  [ 9312/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.213711  [ 9344/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.288061  [ 9376/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.306592  [ 9408/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.219546  [ 9440/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.406717  [ 9472/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.225004  [ 9504/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.246351  [ 9536/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.352601  [ 9568/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.329529  [ 9600/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.309755  [ 9632/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.340247  [ 9664/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.342147  [ 9696/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.205080  [ 9728/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.371367  [ 9760/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.211815  [ 9792/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.337978  [ 9824/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.246320  [ 9856/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.337757  [ 9888/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.221522  [ 9920/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.215361  [ 9952/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.347924  [ 9984/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.283904  [10016/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.309413  [10048/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.398091  [10080/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.325905  [10112/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.330359  [10144/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.223254  [10176/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.275808  [10208/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.264588  [10240/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.305086  [10272/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.328821  [10304/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.392488  [10336/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.344126  [10368/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.325513  [10400/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.309011  [10432/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.173308  [10464/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.299149  [10496/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.305011  [10528/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.293194  [10560/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.263004  [10592/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.261627  [10624/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.297477  [10656/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.217682  [10688/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.242494  [10720/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.277709  [10752/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.278736  [10784/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.227439  [10816/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.220715  [10848/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.219350  [10880/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.362961  [10912/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.364962  [10944/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.234692  [10976/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.313069  [11008/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.238241  [11040/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.312389  [11072/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.395267  [11104/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.383638  [11136/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.323298  [11168/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.384198  [11200/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.383910  [11232/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.260251  [11264/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.230734  [11296/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.264591  [11328/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.310192  [11360/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.329151  [11392/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.340499  [11424/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.183658  [11456/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.292619  [11488/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.219576  [11520/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.260324  [11552/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.343636  [11584/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.230621  [11616/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.336100  [11648/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.257930  [11680/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.325813  [11712/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.340691  [11744/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.249297  [11776/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.232648  [11808/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.154038  [11840/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.240171  [11872/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.276124  [11904/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.330718  [11936/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.281028  [11968/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.283600  [12000/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.270754  [12032/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.321137  [12064/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.250133  [12096/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.317639  [12128/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.218482  [12160/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.295666  [12192/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.229208  [12224/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.280715  [12256/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.191019  [12288/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.240327  [12320/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.318328  [12352/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.208415  [12384/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.238480  [12416/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.308743  [12448/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.325238  [12480/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.225321  [12512/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.322952  [12544/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.272918  [12576/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.415179  [12608/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.302019  [12640/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.272180  [12672/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.297415  [12704/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.290299  [12736/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.287946  [12768/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.453190  [12800/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.413962  [12832/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.568120  [12864/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.258321  [12896/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.303407  [12928/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.392291  [12960/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.241241  [12992/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.271148  [13024/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.428967  [13056/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.187378  [13088/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.317974  [13120/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.270949  [13152/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.221169  [13184/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.370894  [13216/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.214032  [13248/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.313517  [13280/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.308995  [13312/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.370509  [13344/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.285800  [13376/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.288886  [13408/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.301898  [13440/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.277526  [13472/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.331393  [13504/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.404142  [13536/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.217426  [13568/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.371635  [13600/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.199060  [13632/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.344587  [13664/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.225772  [13696/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.234668  [13728/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.297227  [13760/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.286245  [13792/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.224682  [13824/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.400495  [13856/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.241421  [13888/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.274594  [13920/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.162559  [13952/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.236334  [13984/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.236630  [14016/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.354420  [14048/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.281390  [14080/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.291800  [14112/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.250855  [14144/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.249632  [14176/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.229820  [14208/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.305142  [14240/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.372875  [14272/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.287656  [14304/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.331205  [14336/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.299539  [14368/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.276524  [14400/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.313443  [14432/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.456411  [14464/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.230812  [14496/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.282348  [14528/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.323397  [14560/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.287294  [14592/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.316359  [14624/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.473413  [14656/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.352708  [14688/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.297632  [14720/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.409644  [14752/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.347123  [14784/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.337190  [14816/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.407489  [14848/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.300910  [14880/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.257518  [14912/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.249652  [14944/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.261534  [14976/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.247700  [15008/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.360160  [15040/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.278006  [15072/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.369430  [15104/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.165772  [15136/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.265705  [15168/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.203560  [15200/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.337938  [15232/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.242409  [15264/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.324270  [15296/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.301081  [15328/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.213829  [15360/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.295116  [15392/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.329836  [15424/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.290851  [15456/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.216773  [15488/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.196852  [15520/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.276741  [15552/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.238869  [15584/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.272049  [15616/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.341711  [15648/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.274786  [15680/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.340176  [15712/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.306559  [15744/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.224221  [15776/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.236086  [15808/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.311208  [15840/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.173088  [15872/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.189793  [15904/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.299574  [15936/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.377981  [15968/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.340869  [16000/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.370267  [16032/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.287814  [16064/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.188634  [16096/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.258318  [16128/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.407266  [16160/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.321295  [16192/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.318085  [16224/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.280787  [16256/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.272650  [16288/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.313987  [16320/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.377614  [16352/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.323279  [16384/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.305994  [16416/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.202459  [16448/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.278045  [16480/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.289863  [16512/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.270830  [16544/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.148834  [16576/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.243450  [16608/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.351300  [16640/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.261401  [16672/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.177747  [16704/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.304993  [16736/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.326760  [16768/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.224582  [16800/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.249761  [16832/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.202732  [16864/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.245243  [16896/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.293654  [16928/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.221124  [16960/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.340165  [16992/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.365684  [17024/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.280074  [17056/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.287963  [17088/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.303696  [17120/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.300454  [17152/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.298348  [17184/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.336458  [17216/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.325204  [17248/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.369152  [17280/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.241293  [17312/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.275797  [17344/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.431501  [17376/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.125302  [17408/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.279439  [17440/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.154528  [17472/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.280597  [17504/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.278537  [17536/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.251537  [17568/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.346785  [17600/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.293841  [17632/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.174662  [17664/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.291145  [17696/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.318407  [17728/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.330506  [17760/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.268574  [17792/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.233980  [17824/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.396663  [17856/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.337663  [17888/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.237223  [17920/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.232802  [17952/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.405258  [17984/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.224653  [18016/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.219555  [18048/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.242796  [18080/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.304610  [18112/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.212652  [18144/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.236145  [18176/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.300465  [18208/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.292426  [18240/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.328310  [18272/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.289371  [18304/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.280913  [18336/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.256953  [18368/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.191231  [18400/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.333056  [18432/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.234347  [18464/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.271578  [18496/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.283541  [18528/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.328160  [18560/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.240418  [18592/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.232172  [18624/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.265513  [18656/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.277241  [18688/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.336689  [18720/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.322044  [18752/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.363585  [18784/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.237175  [18816/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.354819  [18848/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.245459  [18880/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.240582  [18912/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.293992  [18944/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.411892  [18976/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.293034  [19008/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.212780  [19040/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.328966  [19072/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.324225  [19104/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.324708  [19136/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.274728  [19168/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.222923  [19200/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.246438  [19232/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.163862  [19264/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.368915  [19296/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.277766  [19328/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.279296  [19360/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.235189  [19392/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.185122  [19424/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.279813  [19456/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.276873  [19488/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.312538  [19520/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.285531  [19552/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.220507  [19584/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.276709  [19616/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.208410  [19648/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.165127  [19680/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.264335  [19712/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.220955  [19744/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.202356  [19776/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.302022  [19808/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.230940  [19840/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.262317  [19872/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.252112  [19904/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.288713  [19936/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.292759  [19968/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.309881  [20000/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.318861  [20032/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.257853  [20064/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.234322  [20096/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.283935  [20128/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.285969  [20160/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.215502  [20192/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.256071  [20224/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.225006  [20256/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.241752  [20288/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.272036  [20320/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.334188  [20352/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.251534  [20384/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.190620  [20416/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.294281  [20448/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.311833  [20480/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.249426  [20512/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.264290  [20544/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.184965  [20576/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.193417  [20608/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.242133  [20640/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.224907  [20672/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.176888  [20704/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.310586  [20736/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.163753  [20768/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.321388  [20800/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.304044  [20832/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.200366  [20864/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.276464  [20896/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.347830  [20928/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.289133  [20960/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.218021  [20992/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.297522  [21024/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.277666  [21056/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.204479  [21088/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.250844  [21120/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.337471  [21152/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.270978  [21184/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.275385  [21216/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.246445  [21248/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.303283  [21280/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.303521  [21312/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.227798  [21344/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.224096  [21376/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.282651  [21408/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.296054  [21440/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.276915  [21472/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.199752  [21504/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.202250  [21536/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.227346  [21568/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.234010  [21600/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.297349  [21632/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.333100  [21664/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.168165  [21696/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.228204  [21728/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.269761  [21760/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.183146  [21792/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.280274  [21824/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.200641  [21856/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.205127  [21888/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.310102  [21920/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.137318  [21952/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.317195  [21984/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.357445  [22016/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.279956  [22048/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.222648  [22080/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.291737  [22112/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.251600  [22144/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.200738  [22176/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.235657  [22208/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.327505  [22240/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.199353  [22272/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.223175  [22304/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.298678  [22336/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.303335  [22368/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.332146  [22400/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.296107  [22432/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.233639  [22464/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.229368  [22496/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.247421  [22528/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.301864  [22560/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.271340  [22592/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.268579  [22624/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.223420  [22656/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.341874  [22688/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.312844  [22720/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.218590  [22752/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.282670  [22784/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.253623  [22816/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.252675  [22848/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.202941  [22880/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.335842  [22912/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.295719  [22944/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.343551  [22976/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.259059  [23008/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.300057  [23040/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.173613  [23072/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.277245  [23104/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.251806  [23136/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.259618  [23168/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.274300  [23200/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.371583  [23232/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.333385  [23264/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.280552  [23296/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.242706  [23328/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.381220  [23360/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.275629  [23392/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.219440  [23424/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.160105  [23456/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.270468  [23488/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.253815  [23520/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.217184  [23552/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.196547  [23584/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.221890  [23616/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.288683  [23648/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.311481  [23680/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.263913  [23712/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.338102  [23744/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.184229  [23776/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.244779  [23808/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.280916  [23840/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.350682  [23872/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.281124  [23904/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.288701  [23936/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.313398  [23968/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.217370  [24000/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.318364  [24032/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.231403  [24064/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.323322  [24096/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.273000  [24128/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.232374  [24160/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.154024  [24192/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.259830  [24224/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.286905  [24256/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.255403  [24288/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.196503  [24320/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.191573  [24352/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.256030  [24384/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.223394  [24416/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.285006  [24448/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.237743  [24480/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.198739  [24512/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.255882  [24544/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.206427  [24576/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.312141  [24608/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.239735  [24640/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.182137  [24672/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.286549  [24704/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.359794  [24736/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.166845  [24768/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.202555  [24800/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.285530  [24832/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.375678  [24864/24872]:   0%|          | 0/777 [00:30<?, ?it/s]
loss: 0.375678  [24864/24872]: 100%|██████████| 777/777 [00:30<00:00, 25.88it/s]
loss: 0.362807  [24872/24872]: 100%|██████████| 777/777 [00:30<00:00, 25.88it/s]
loss: 0.362807  [24872/24872]: : 778it [00:30, 25.89it/s]
Epoch 3, time=60.59s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.301910  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.313962  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.283696  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.226864  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.260233  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.238716  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.268689  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.209364  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.317398  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.229434  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.305135  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.227345  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.274983  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.240712  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.293548  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.140112  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.254038  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.217035  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.258236  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.214194  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.238955  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.243748  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.142151  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.263032  [  768/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.215014  [  800/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.267440  [  832/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.312933  [  864/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.257482  [  896/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.384124  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.250062  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.283238  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.371741  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.337240  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.244553  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.236774  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.290053  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.273364  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.225263  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.220715  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.191774  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.249773  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.238042  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.242152  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.305678  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.327578  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.294987  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.248376  [ 1504/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.307448  [ 1536/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.341088  [ 1568/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.299039  [ 1600/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.306835  [ 1632/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.270611  [ 1664/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.258807  [ 1696/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.302925  [ 1728/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.290093  [ 1760/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.244977  [ 1792/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.271597  [ 1824/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.297067  [ 1856/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.208064  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.278322  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.241084  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.281068  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.257750  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.229738  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.193299  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.195618  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.356297  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.225894  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.212059  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.198079  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.219898  [ 2272/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.440711  [ 2304/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.376870  [ 2336/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.290352  [ 2368/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.170111  [ 2400/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.258707  [ 2432/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.228305  [ 2464/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.387548  [ 2496/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.257630  [ 2528/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.243787  [ 2560/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.337912  [ 2592/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.314058  [ 2624/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.222000  [ 2656/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.297878  [ 2688/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.328900  [ 2720/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.177242  [ 2752/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.208040  [ 2784/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.202499  [ 2816/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.211641  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.199420  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.197849  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.264029  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.422879  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.262657  [ 3008/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.329962  [ 3040/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.279530  [ 3072/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.277060  [ 3104/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.287793  [ 3136/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.230497  [ 3168/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.188396  [ 3200/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.183942  [ 3232/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.223818  [ 3264/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.288230  [ 3296/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.245419  [ 3328/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.258755  [ 3360/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.308062  [ 3392/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.236467  [ 3424/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.267525  [ 3456/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.314715  [ 3488/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.237917  [ 3520/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.237371  [ 3552/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.302293  [ 3584/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.282977  [ 3616/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.342834  [ 3648/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.265543  [ 3680/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.221804  [ 3712/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.266307  [ 3744/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.288609  [ 3776/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.138358  [ 3808/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.275383  [ 3840/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.210882  [ 3872/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.285226  [ 3904/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.276308  [ 3936/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.210777  [ 3968/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.243043  [ 4000/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.210864  [ 4032/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.348689  [ 4064/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.176917  [ 4096/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.174577  [ 4128/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.246606  [ 4160/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.199439  [ 4192/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.261917  [ 4224/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.243772  [ 4256/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.309400  [ 4288/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.272913  [ 4320/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.203977  [ 4352/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.288314  [ 4384/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.236365  [ 4416/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.275992  [ 4448/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.248398  [ 4480/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.240124  [ 4512/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.243478  [ 4544/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.251113  [ 4576/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.245655  [ 4608/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.271129  [ 4640/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.180838  [ 4672/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.161625  [ 4704/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.337037  [ 4736/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.342851  [ 4768/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.161059  [ 4800/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.254270  [ 4832/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.244557  [ 4864/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.199285  [ 4896/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.332098  [ 4928/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.297924  [ 4960/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.255462  [ 4992/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.249527  [ 5024/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.231505  [ 5056/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.151399  [ 5088/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.214604  [ 5120/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.161945  [ 5152/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.338692  [ 5184/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.352626  [ 5216/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.242660  [ 5248/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.225555  [ 5280/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.365169  [ 5312/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.472600  [ 5344/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.233345  [ 5376/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.303757  [ 5408/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.306420  [ 5440/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.252787  [ 5472/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.235571  [ 5504/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.292786  [ 5536/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.213831  [ 5568/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.185646  [ 5600/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.251083  [ 5632/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.294134  [ 5664/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.325017  [ 5696/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.336204  [ 5728/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.254217  [ 5760/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.186509  [ 5792/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.249319  [ 5824/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.270312  [ 5856/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.187161  [ 5888/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.292120  [ 5920/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.205413  [ 5952/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.272958  [ 5984/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.243241  [ 6016/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.356655  [ 6048/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.288470  [ 6080/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.260425  [ 6112/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.280293  [ 6144/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.281970  [ 6176/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.215228  [ 6208/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.218215  [ 6240/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.317990  [ 6272/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.281792  [ 6304/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.228515  [ 6336/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.314905  [ 6368/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.305538  [ 6400/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.209933  [ 6432/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.253202  [ 6464/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.177065  [ 6496/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.189784  [ 6528/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.207827  [ 6560/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.296531  [ 6592/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.239628  [ 6624/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.319648  [ 6656/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.225112  [ 6688/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.255935  [ 6720/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.433411  [ 6752/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.268767  [ 6784/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.273885  [ 6816/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.224703  [ 6848/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.167955  [ 6880/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.237408  [ 6912/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.254635  [ 6944/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.208037  [ 6976/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.276204  [ 7008/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.181042  [ 7040/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.250726  [ 7072/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.235709  [ 7104/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.212916  [ 7136/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.320137  [ 7168/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.242241  [ 7200/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.291527  [ 7232/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.413801  [ 7264/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.338405  [ 7296/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.250475  [ 7328/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.307598  [ 7360/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.269574  [ 7392/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.287917  [ 7424/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.269668  [ 7456/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.220533  [ 7488/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.257430  [ 7520/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.352585  [ 7552/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.246265  [ 7584/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.240595  [ 7616/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.241092  [ 7648/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.227920  [ 7680/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.313589  [ 7712/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.238892  [ 7744/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.196129  [ 7776/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.272223  [ 7808/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.353057  [ 7840/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.232937  [ 7872/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.282419  [ 7904/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.275140  [ 7936/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.188730  [ 7968/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.185993  [ 8000/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.232909  [ 8032/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.289516  [ 8064/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.250848  [ 8096/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.215756  [ 8128/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.262548  [ 8160/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.311475  [ 8192/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.258694  [ 8224/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.246108  [ 8256/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.213613  [ 8288/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.235054  [ 8320/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.170213  [ 8352/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.215431  [ 8384/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.181026  [ 8416/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.169923  [ 8448/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.277081  [ 8480/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.213909  [ 8512/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.243607  [ 8544/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.272606  [ 8576/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.285220  [ 8608/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.233941  [ 8640/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.235929  [ 8672/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.206580  [ 8704/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.206495  [ 8736/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.275226  [ 8768/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.262485  [ 8800/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.264493  [ 8832/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.267451  [ 8864/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.222325  [ 8896/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.217934  [ 8928/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.256414  [ 8960/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.192818  [ 8992/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.240401  [ 9024/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.174072  [ 9056/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.223378  [ 9088/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.244322  [ 9120/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.225050  [ 9152/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.229409  [ 9184/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.323355  [ 9216/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.214379  [ 9248/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.228908  [ 9280/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.226594  [ 9312/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.191845  [ 9344/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.227000  [ 9376/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.255791  [ 9408/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.160984  [ 9440/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.355943  [ 9472/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.167052  [ 9504/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.187920  [ 9536/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.279190  [ 9568/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.254679  [ 9600/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.258303  [ 9632/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.296020  [ 9664/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.255552  [ 9696/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.168505  [ 9728/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.321480  [ 9760/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.174775  [ 9792/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.273067  [ 9824/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.212860  [ 9856/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.273704  [ 9888/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.162176  [ 9920/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.176305  [ 9952/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.287699  [ 9984/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.243308  [10016/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.256503  [10048/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.356106  [10080/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.285182  [10112/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.271486  [10144/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.200098  [10176/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.189283  [10208/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.212731  [10240/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.274795  [10272/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.283650  [10304/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.305143  [10336/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.259761  [10368/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.240048  [10400/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.246655  [10432/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.169732  [10464/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.270059  [10496/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.228240  [10528/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.205273  [10560/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.228488  [10592/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.265670  [10624/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.247114  [10656/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.173576  [10688/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.172895  [10720/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.232602  [10752/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.230778  [10784/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.176689  [10816/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.149563  [10848/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.180300  [10880/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.293703  [10912/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.303381  [10944/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.199910  [10976/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.257846  [11008/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.220654  [11040/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.235708  [11072/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.347853  [11104/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.332460  [11136/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.293428  [11168/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.316551  [11200/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.311902  [11232/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.220729  [11264/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.198240  [11296/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.223916  [11328/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.214434  [11360/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.260253  [11392/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.311610  [11424/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.149144  [11456/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.223067  [11488/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.171987  [11520/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.195429  [11552/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.295382  [11584/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.196839  [11616/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.286940  [11648/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.212678  [11680/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.287258  [11712/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.311789  [11744/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.229161  [11776/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.183573  [11808/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.143757  [11840/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.181259  [11872/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.228671  [11904/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.294048  [11936/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.223816  [11968/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.224439  [12000/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.232181  [12032/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.249296  [12064/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.201202  [12096/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.257312  [12128/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.195432  [12160/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.253858  [12192/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.185688  [12224/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.254766  [12256/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.170149  [12288/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.212318  [12320/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.238732  [12352/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.188680  [12384/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.201975  [12416/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.257705  [12448/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.296830  [12480/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.178220  [12512/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.257986  [12544/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.222077  [12576/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.334746  [12608/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.240826  [12640/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.228630  [12672/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.260499  [12704/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.215406  [12736/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.248871  [12768/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.422762  [12800/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.356642  [12832/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.494219  [12864/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.212405  [12896/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.247235  [12928/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.328760  [12960/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.211421  [12992/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.257588  [13024/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.395942  [13056/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.145503  [13088/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.240145  [13120/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.186323  [13152/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.175900  [13184/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.326283  [13216/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.179081  [13248/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.262746  [13280/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.240018  [13312/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.321233  [13344/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.230935  [13376/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.267431  [13408/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.271931  [13440/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.225209  [13472/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.264005  [13504/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.332301  [13536/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.199234  [13568/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.356736  [13600/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.174452  [13632/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.266638  [13664/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.185781  [13696/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.176604  [13728/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.256327  [13760/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.232242  [13792/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.180137  [13824/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.338359  [13856/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.206913  [13888/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.237314  [13920/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.133683  [13952/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.203639  [13984/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.198968  [14016/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.288243  [14048/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.236321  [14080/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.238360  [14112/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.223239  [14144/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.223496  [14176/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.187205  [14208/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.250556  [14240/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.333265  [14272/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.238916  [14304/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.283320  [14336/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.237794  [14368/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.242152  [14400/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.297060  [14432/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.371521  [14464/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.210712  [14496/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.224152  [14528/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.225118  [14560/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.228005  [14592/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.259377  [14624/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.358386  [14656/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.304979  [14688/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.236810  [14720/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.279255  [14752/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.255347  [14784/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.303069  [14816/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.318023  [14848/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.231958  [14880/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.212110  [14912/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.185337  [14944/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.200937  [14976/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.187840  [15008/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.329507  [15040/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.216195  [15072/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.308297  [15104/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.139604  [15136/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.209269  [15168/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.166479  [15200/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.278552  [15232/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.223651  [15264/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.266329  [15296/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.227818  [15328/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.183935  [15360/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.255053  [15392/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.277745  [15424/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.233422  [15456/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.188146  [15488/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.164358  [15520/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.189289  [15552/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.219426  [15584/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.221284  [15616/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.260466  [15648/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.235096  [15680/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.324431  [15712/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.270999  [15744/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.187291  [15776/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.222521  [15808/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.263766  [15840/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.136447  [15872/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.140715  [15904/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.313597  [15936/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.344802  [15968/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.320605  [16000/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.325173  [16032/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.242319  [16064/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.160293  [16096/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.263837  [16128/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.348049  [16160/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.269294  [16192/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.298533  [16224/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.261926  [16256/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.235646  [16288/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.265712  [16320/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.360068  [16352/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.234033  [16384/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.314285  [16416/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.181560  [16448/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.247490  [16480/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.284392  [16512/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.254220  [16544/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.141916  [16576/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.229268  [16608/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.291720  [16640/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.218562  [16672/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.148060  [16704/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.231174  [16736/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.234656  [16768/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.188378  [16800/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.196267  [16832/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.189547  [16864/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.209474  [16896/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.263125  [16928/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.179984  [16960/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.274732  [16992/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.314494  [17024/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.219570  [17056/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.252821  [17088/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.239978  [17120/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.251836  [17152/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.276641  [17184/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.328171  [17216/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.317398  [17248/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.342044  [17280/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.239640  [17312/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.266093  [17344/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.428239  [17376/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.123067  [17408/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.231667  [17440/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.138240  [17472/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.254473  [17504/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.235128  [17536/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.225698  [17568/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.314371  [17600/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.266042  [17632/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.150655  [17664/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.251981  [17696/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.267055  [17728/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.295185  [17760/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.210714  [17792/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.214349  [17824/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.311812  [17856/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.284953  [17888/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.190413  [17920/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.187390  [17952/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.352518  [17984/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.188077  [18016/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.192366  [18048/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.223197  [18080/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.227278  [18112/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.170285  [18144/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.187362  [18176/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.248960  [18208/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.263743  [18240/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.263074  [18272/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.217722  [18304/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.205207  [18336/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.209600  [18368/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.150148  [18400/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.243470  [18432/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.192187  [18464/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.223674  [18496/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.244353  [18528/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.294921  [18560/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.202265  [18592/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.201665  [18624/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.240857  [18656/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.205845  [18688/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.287564  [18720/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.287329  [18752/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.276849  [18784/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.208051  [18816/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.292954  [18848/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.198927  [18880/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.208287  [18912/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.286567  [18944/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.327330  [18976/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.263515  [19008/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.190764  [19040/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.257701  [19072/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.283536  [19104/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.284400  [19136/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.221426  [19168/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.190944  [19200/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.215923  [19232/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.137344  [19264/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.329905  [19296/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.280737  [19328/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.259836  [19360/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.201279  [19392/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.156552  [19424/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.250233  [19456/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.224488  [19488/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.254581  [19520/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.215338  [19552/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.199591  [19584/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.191058  [19616/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.172372  [19648/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.143303  [19680/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.182187  [19712/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.197094  [19744/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.171778  [19776/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.229789  [19808/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.181541  [19840/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.239245  [19872/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.183713  [19904/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.227468  [19936/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.236074  [19968/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.225614  [20000/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.274312  [20032/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.213254  [20064/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.204517  [20096/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.248855  [20128/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.238047  [20160/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.205067  [20192/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.234785  [20224/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.206679  [20256/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.221364  [20288/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.226912  [20320/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.290699  [20352/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.208501  [20384/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.170742  [20416/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.259268  [20448/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.261471  [20480/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.194176  [20512/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.240171  [20544/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.146028  [20576/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.171587  [20608/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.214173  [20640/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.194982  [20672/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.149702  [20704/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.272087  [20736/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.138415  [20768/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.281073  [20800/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.268337  [20832/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.176554  [20864/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.253742  [20896/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.303975  [20928/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.280542  [20960/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.218185  [20992/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.268315  [21024/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.253576  [21056/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.147113  [21088/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.212208  [21120/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.283800  [21152/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.227171  [21184/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.217273  [21216/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.214213  [21248/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.289587  [21280/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.259980  [21312/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.188675  [21344/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.203853  [21376/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.230620  [21408/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.218193  [21440/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.218717  [21472/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.185679  [21504/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.172080  [21536/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.164251  [21568/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.213762  [21600/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.276272  [21632/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.278473  [21664/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.147515  [21696/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.185333  [21728/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.237502  [21760/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.167356  [21792/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.261301  [21824/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.186724  [21856/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.181631  [21888/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.284781  [21920/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.117157  [21952/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.285796  [21984/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.344655  [22016/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.245013  [22048/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.200327  [22080/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.296920  [22112/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.204849  [22144/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.197224  [22176/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.227508  [22208/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.267246  [22240/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.179567  [22272/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.214630  [22304/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.247205  [22336/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.250130  [22368/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.288257  [22400/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.239920  [22432/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.218674  [22464/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.188656  [22496/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.219285  [22528/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.285543  [22560/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.248165  [22592/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.241530  [22624/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.207601  [22656/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.298300  [22688/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.280380  [22720/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.216990  [22752/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.265478  [22784/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.212102  [22816/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.226610  [22848/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.162499  [22880/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.291090  [22912/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.288131  [22944/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.292578  [22976/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.208391  [23008/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.244359  [23040/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.168244  [23072/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.242074  [23104/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.250815  [23136/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.295657  [23168/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.255730  [23200/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.298411  [23232/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.303410  [23264/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.273758  [23296/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.213465  [23328/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.351793  [23360/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.287603  [23392/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.201828  [23424/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.160718  [23456/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.273952  [23488/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.229384  [23520/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.185468  [23552/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.177431  [23584/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.186940  [23616/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.281135  [23648/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.279291  [23680/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.187950  [23712/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.272469  [23744/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.179874  [23776/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.178834  [23808/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.262398  [23840/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.299538  [23872/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.239439  [23904/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.218344  [23936/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.305019  [23968/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.185201  [24000/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.255327  [24032/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.251081  [24064/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.302270  [24096/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.226319  [24128/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.194088  [24160/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.165070  [24192/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.231640  [24224/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.250517  [24256/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.211954  [24288/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.183166  [24320/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.134150  [24352/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.221549  [24384/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.213591  [24416/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.260087  [24448/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.220361  [24480/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.230312  [24512/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.240253  [24544/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.201570  [24576/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.302171  [24608/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.220828  [24640/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.140052  [24672/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.269702  [24704/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.306668  [24736/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.152992  [24768/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.183702  [24800/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.251309  [24832/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.296726  [24864/24872]:   0%|          | 0/777 [00:30<?, ?it/s]
loss: 0.296726  [24864/24872]: 100%|██████████| 777/777 [00:30<00:00, 25.87it/s]
loss: 0.290230  [24872/24872]: 100%|██████████| 777/777 [00:30<00:00, 25.87it/s]
loss: 0.290230  [24872/24872]: : 778it [00:30, 25.87it/s]
Epoch 4, time=90.66s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.264867  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.282281  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.271430  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.211236  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.241508  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.198573  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.292206  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.211072  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.282663  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.235954  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.265727  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.183597  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.287379  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.219509  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.286856  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.136665  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.222649  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.198580  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.222333  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.207624  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.210230  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.208209  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.121176  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.226594  [  768/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.191902  [  800/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.235997  [  832/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.264292  [  864/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.244102  [  896/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.355591  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.231996  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.253206  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.336823  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.304797  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.240277  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.235878  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.290370  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.256683  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.199273  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.191770  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.177916  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.236676  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.228050  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.196793  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.273705  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.302998  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.249805  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.187926  [ 1504/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.250100  [ 1536/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.261728  [ 1568/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.260614  [ 1600/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.261501  [ 1632/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.219039  [ 1664/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.222840  [ 1696/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.249138  [ 1728/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.252344  [ 1760/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.209107  [ 1792/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.231804  [ 1824/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.239981  [ 1856/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.185196  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.246272  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.196535  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.253917  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.205874  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.197466  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.169948  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.187177  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.303497  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.189347  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.182467  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.172183  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.213558  [ 2272/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.375988  [ 2304/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.306059  [ 2336/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.275342  [ 2368/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.145614  [ 2400/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.223850  [ 2432/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.191394  [ 2464/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.340147  [ 2496/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.215298  [ 2528/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.231754  [ 2560/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.286213  [ 2592/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.267888  [ 2624/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.224047  [ 2656/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.273231  [ 2688/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.259427  [ 2720/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.177662  [ 2752/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.203200  [ 2784/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.172167  [ 2816/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.199427  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.167254  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.135075  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.202331  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.401421  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.226436  [ 3008/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.277694  [ 3040/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.225461  [ 3072/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.235317  [ 3104/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.265593  [ 3136/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.227834  [ 3168/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.169513  [ 3200/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.161557  [ 3232/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.207142  [ 3264/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.250912  [ 3296/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.172953  [ 3328/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.199794  [ 3360/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.292983  [ 3392/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.216733  [ 3424/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.232260  [ 3456/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.277948  [ 3488/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.194704  [ 3520/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.201714  [ 3552/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.253998  [ 3584/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.228257  [ 3616/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.364167  [ 3648/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.214752  [ 3680/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.198279  [ 3712/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.239576  [ 3744/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.254938  [ 3776/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.125892  [ 3808/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.236944  [ 3840/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.181396  [ 3872/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.228906  [ 3904/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.220730  [ 3936/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.176664  [ 3968/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.219890  [ 4000/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.190414  [ 4032/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.316527  [ 4064/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.180220  [ 4096/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.152316  [ 4128/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.217070  [ 4160/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.191194  [ 4192/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.232117  [ 4224/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.187148  [ 4256/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.222778  [ 4288/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.225321  [ 4320/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.168331  [ 4352/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.244673  [ 4384/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.202939  [ 4416/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.176249  [ 4448/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.227595  [ 4480/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.220862  [ 4512/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.173707  [ 4544/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.232183  [ 4576/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.224410  [ 4608/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.221138  [ 4640/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.147669  [ 4672/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.146966  [ 4704/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.269341  [ 4736/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.295527  [ 4768/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.139253  [ 4800/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.243111  [ 4832/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.241427  [ 4864/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.162058  [ 4896/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.282250  [ 4928/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.253505  [ 4960/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.208718  [ 4992/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.210461  [ 5024/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.159020  [ 5056/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.128258  [ 5088/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.190030  [ 5120/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.116470  [ 5152/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.298509  [ 5184/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.338323  [ 5216/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.194728  [ 5248/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.233177  [ 5280/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.337787  [ 5312/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.354185  [ 5344/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.210030  [ 5376/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.233140  [ 5408/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.227139  [ 5440/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.218468  [ 5472/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.207745  [ 5504/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.275716  [ 5536/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.204330  [ 5568/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.238616  [ 5600/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.240718  [ 5632/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.227909  [ 5664/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.234689  [ 5696/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.286128  [ 5728/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.263770  [ 5760/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.161378  [ 5792/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.215297  [ 5824/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.241974  [ 5856/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.161839  [ 5888/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.249036  [ 5920/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.194066  [ 5952/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.260100  [ 5984/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.223053  [ 6016/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.311615  [ 6048/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.244128  [ 6080/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.211518  [ 6112/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.246787  [ 6144/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.251685  [ 6176/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.175739  [ 6208/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.218761  [ 6240/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.264585  [ 6272/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.244694  [ 6304/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.199842  [ 6336/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.244134  [ 6368/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.298155  [ 6400/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.153363  [ 6432/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.173712  [ 6464/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.151709  [ 6496/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.147228  [ 6528/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.159403  [ 6560/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.244269  [ 6592/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.219900  [ 6624/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.261369  [ 6656/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.205868  [ 6688/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.204227  [ 6720/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.353782  [ 6752/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.260982  [ 6784/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.244435  [ 6816/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.199352  [ 6848/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.121549  [ 6880/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.184425  [ 6912/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.188263  [ 6944/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.184610  [ 6976/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.268457  [ 7008/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.151480  [ 7040/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.209954  [ 7072/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.176400  [ 7104/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.204111  [ 7136/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.242234  [ 7168/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.212029  [ 7200/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.246393  [ 7232/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.317591  [ 7264/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.301543  [ 7296/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.226469  [ 7328/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.210742  [ 7360/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.237918  [ 7392/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.230987  [ 7424/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.224458  [ 7456/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.181593  [ 7488/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.213853  [ 7520/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.278943  [ 7552/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.188540  [ 7584/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.238879  [ 7616/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.281964  [ 7648/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.191769  [ 7680/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.291951  [ 7712/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.195988  [ 7744/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.176434  [ 7776/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.223479  [ 7808/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.272543  [ 7840/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.217742  [ 7872/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.255374  [ 7904/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.242334  [ 7936/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.178684  [ 7968/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.183263  [ 8000/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.220113  [ 8032/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.284379  [ 8064/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.204019  [ 8096/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.191202  [ 8128/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.241588  [ 8160/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.284967  [ 8192/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.209370  [ 8224/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.245360  [ 8256/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.181522  [ 8288/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.199484  [ 8320/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.154039  [ 8352/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.187857  [ 8384/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.153767  [ 8416/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.118067  [ 8448/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.232452  [ 8480/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.189661  [ 8512/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.172798  [ 8544/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.239588  [ 8576/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.236781  [ 8608/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.187077  [ 8640/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.199643  [ 8672/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.196825  [ 8704/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.171835  [ 8736/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.257840  [ 8768/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.232559  [ 8800/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.204909  [ 8832/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.225699  [ 8864/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.200345  [ 8896/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.193944  [ 8928/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.213869  [ 8960/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.180458  [ 8992/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.222806  [ 9024/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.127197  [ 9056/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.196672  [ 9088/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.203337  [ 9120/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.173737  [ 9152/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.204008  [ 9184/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.282166  [ 9216/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.181143  [ 9248/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.174652  [ 9280/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.202544  [ 9312/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.171672  [ 9344/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.192609  [ 9376/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.239878  [ 9408/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.127143  [ 9440/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.320560  [ 9472/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.133529  [ 9504/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.156075  [ 9536/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.238453  [ 9568/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.233555  [ 9600/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.231955  [ 9632/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.267076  [ 9664/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.209442  [ 9696/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.134873  [ 9728/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.306516  [ 9760/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.155203  [ 9792/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.230636  [ 9824/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.187507  [ 9856/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.234707  [ 9888/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.128281  [ 9920/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.155868  [ 9952/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.254299  [ 9984/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.206535  [10016/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.243707  [10048/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.286424  [10080/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.234648  [10112/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.228431  [10144/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.160606  [10176/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.151905  [10208/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.186424  [10240/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.230375  [10272/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.258318  [10304/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.262306  [10336/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.218041  [10368/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.200809  [10400/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.209948  [10432/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.148398  [10464/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.242569  [10496/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.218293  [10528/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.186866  [10560/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.188585  [10592/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.212921  [10624/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.208478  [10656/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.152813  [10688/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.141816  [10720/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.205196  [10752/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.218389  [10784/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.168231  [10816/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.110039  [10848/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.163076  [10880/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.251806  [10912/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.265959  [10944/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.184171  [10976/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.229104  [11008/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.181653  [11040/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.205377  [11072/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.246512  [11104/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.267879  [11136/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.254523  [11168/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.247046  [11200/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.270357  [11232/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.195047  [11264/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.169174  [11296/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.200066  [11328/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.181320  [11360/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.212538  [11392/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.261860  [11424/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.122846  [11456/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.164711  [11488/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.158178  [11520/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.152550  [11552/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.283210  [11584/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.179708  [11616/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.261079  [11648/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.191266  [11680/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.235292  [11712/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.262973  [11744/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.182423  [11776/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.128955  [11808/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.092167  [11840/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.145830  [11872/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.214454  [11904/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.245148  [11936/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.195109  [11968/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.194647  [12000/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.172872  [12032/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.219252  [12064/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.163955  [12096/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.219632  [12128/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.153608  [12160/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.213226  [12192/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.180156  [12224/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.223231  [12256/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.146268  [12288/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.168493  [12320/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.195390  [12352/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.171258  [12384/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.179800  [12416/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.212911  [12448/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.231300  [12480/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.161828  [12512/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.220490  [12544/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.202506  [12576/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.270685  [12608/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.212117  [12640/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.185871  [12672/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.242859  [12704/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.194069  [12736/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.227132  [12768/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.342085  [12800/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.280306  [12832/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.401276  [12864/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.186940  [12896/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.212935  [12928/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.230878  [12960/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.155685  [12992/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.213876  [13024/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.319732  [13056/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.135774  [13088/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.193003  [13120/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.151161  [13152/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.169074  [13184/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.280675  [13216/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.161234  [13248/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.216908  [13280/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.186762  [13312/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.253871  [13344/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.181946  [13376/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.248214  [13408/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.222190  [13440/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.192256  [13472/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.202476  [13504/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.280576  [13536/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.157881  [13568/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.283625  [13600/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.153780  [13632/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.234194  [13664/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.156483  [13696/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.144899  [13728/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.224243  [13760/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.184766  [13792/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.147257  [13824/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.294443  [13856/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.180835  [13888/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.197188  [13920/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.105997  [13952/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.165484  [13984/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.162773  [14016/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.249189  [14048/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.183984  [14080/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.199061  [14112/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.186636  [14144/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.194245  [14176/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.140351  [14208/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.209292  [14240/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.304814  [14272/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.193772  [14304/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.256936  [14336/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.217682  [14368/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.199712  [14400/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.250751  [14432/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.318822  [14464/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.173860  [14496/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.196695  [14528/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.198272  [14560/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.193813  [14592/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.213889  [14624/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.314170  [14656/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.268324  [14688/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.218090  [14720/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.234893  [14752/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.222239  [14784/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.261305  [14816/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.265469  [14848/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.210634  [14880/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.186909  [14912/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.159344  [14944/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.198262  [14976/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.181301  [15008/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.332319  [15040/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.222935  [15072/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.234318  [15104/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.116228  [15136/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.225333  [15168/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.141094  [15200/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.232460  [15232/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.216747  [15264/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.252076  [15296/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.208744  [15328/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.187415  [15360/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.222217  [15392/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.240034  [15424/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.217300  [15456/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.177215  [15488/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.148374  [15520/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.161055  [15552/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.189037  [15584/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.216480  [15616/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.234981  [15648/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.211096  [15680/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.283539  [15712/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.241488  [15744/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.170057  [15776/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.187189  [15808/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.259353  [15840/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.127545  [15872/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.135296  [15904/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.284127  [15936/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.319810  [15968/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.284437  [16000/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.307225  [16032/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.196623  [16064/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.136025  [16096/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.210950  [16128/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.288210  [16160/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.231507  [16192/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.241663  [16224/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.231446  [16256/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.213321  [16288/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.241758  [16320/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.306314  [16352/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.193873  [16384/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.243626  [16416/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.153518  [16448/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.209447  [16480/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.237399  [16512/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.214239  [16544/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.125936  [16576/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.178163  [16608/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.219404  [16640/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.208489  [16672/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.141818  [16704/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.187045  [16736/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.181965  [16768/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.171211  [16800/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.152969  [16832/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.167943  [16864/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.182500  [16896/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.222576  [16928/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.150731  [16960/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.232634  [16992/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.267942  [17024/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.159385  [17056/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.251923  [17088/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.202630  [17120/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.203776  [17152/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.260811  [17184/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.303433  [17216/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.233368  [17248/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.284601  [17280/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.214552  [17312/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.204220  [17344/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.361509  [17376/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.112285  [17408/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.173251  [17440/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.118067  [17472/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.229454  [17504/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.224376  [17536/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.200839  [17568/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.277573  [17600/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.221989  [17632/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.110232  [17664/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.258178  [17696/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.212389  [17728/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.241102  [17760/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.200622  [17792/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.184511  [17824/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.278000  [17856/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.271146  [17888/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.171249  [17920/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.169129  [17952/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.324917  [17984/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.159745  [18016/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.184453  [18048/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.211045  [18080/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.205880  [18112/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.133638  [18144/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.153079  [18176/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.240558  [18208/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.228645  [18240/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.220977  [18272/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.183279  [18304/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.148425  [18336/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.181207  [18368/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.138910  [18400/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.193386  [18432/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.164624  [18464/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.172194  [18496/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.202705  [18528/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.269039  [18560/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.181856  [18592/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.163811  [18624/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.208691  [18656/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.162991  [18688/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.262225  [18720/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.299795  [18752/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.240350  [18784/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.184635  [18816/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.267426  [18848/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.180375  [18880/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.189588  [18912/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.236844  [18944/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.282286  [18976/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.236251  [19008/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.167157  [19040/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.233593  [19072/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.239979  [19104/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.258118  [19136/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.235755  [19168/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.170270  [19200/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.204328  [19232/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.128977  [19264/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.290490  [19296/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.255234  [19328/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.220416  [19360/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.190295  [19392/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.158468  [19424/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.233327  [19456/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.192344  [19488/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.243414  [19520/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.200777  [19552/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.190457  [19584/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.167073  [19616/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.156472  [19648/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.136836  [19680/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.158181  [19712/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.170255  [19744/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.161723  [19776/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.194064  [19808/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.176812  [19840/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.214021  [19872/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.168947  [19904/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.220028  [19936/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.219281  [19968/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.206938  [20000/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.243478  [20032/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.185906  [20064/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.190830  [20096/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.187443  [20128/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.224292  [20160/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.175533  [20192/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.213112  [20224/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.191557  [20256/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.191686  [20288/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.203791  [20320/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.266468  [20352/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.194398  [20384/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.152142  [20416/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.213954  [20448/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.245687  [20480/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.189010  [20512/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.246812  [20544/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.161145  [20576/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.147676  [20608/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.195963  [20640/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.172895  [20672/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.140902  [20704/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.257209  [20736/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.114133  [20768/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.244727  [20800/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.254849  [20832/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.169998  [20864/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.257006  [20896/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.283215  [20928/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.265447  [20960/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.213598  [20992/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.243172  [21024/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.249697  [21056/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.140662  [21088/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.184535  [21120/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.239762  [21152/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.241489  [21184/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.211486  [21216/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.196750  [21248/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.216596  [21280/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.247800  [21312/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.197456  [21344/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.173530  [21376/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.244761  [21408/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.218070  [21440/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.195646  [21472/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.173242  [21504/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.159986  [21536/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.122678  [21568/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.190187  [21600/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.249473  [21632/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.227190  [21664/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.138481  [21696/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.177256  [21728/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.214371  [21760/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.151108  [21792/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.227653  [21824/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.150266  [21856/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.164606  [21888/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.262391  [21920/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.110083  [21952/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.233941  [21984/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.334705  [22016/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.200756  [22048/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.179652  [22080/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.256766  [22112/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.171879  [22144/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.158616  [22176/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.213056  [22208/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.234892  [22240/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.134239  [22272/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.181695  [22304/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.216283  [22336/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.207085  [22368/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.237181  [22400/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.197962  [22432/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.175656  [22464/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.140862  [22496/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.188505  [22528/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.233443  [22560/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.219180  [22592/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.211590  [22624/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.165980  [22656/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.260562  [22688/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.248485  [22720/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.172839  [22752/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.228073  [22784/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.203187  [22816/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.205261  [22848/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.136391  [22880/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.268088  [22912/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.241450  [22944/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.260025  [22976/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.174223  [23008/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.223408  [23040/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.127609  [23072/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.213503  [23104/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.204819  [23136/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.208496  [23168/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.207235  [23200/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.293022  [23232/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.264567  [23264/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.194182  [23296/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.177790  [23328/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.281217  [23360/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.242773  [23392/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.143431  [23424/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.135044  [23456/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.203378  [23488/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.209989  [23520/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.173347  [23552/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.153648  [23584/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.194846  [23616/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.241736  [23648/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.224240  [23680/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.151076  [23712/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.266269  [23744/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.158830  [23776/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.160254  [23808/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.251524  [23840/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.271896  [23872/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.193396  [23904/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.199737  [23936/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.266809  [23968/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.154054  [24000/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.260841  [24032/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.197032  [24064/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.285915  [24096/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.224271  [24128/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.176113  [24160/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.135924  [24192/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.201747  [24224/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.232128  [24256/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.177111  [24288/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.172032  [24320/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.126001  [24352/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.201112  [24384/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.192696  [24416/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.230327  [24448/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.198134  [24480/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.178471  [24512/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.210241  [24544/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.172607  [24576/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.250248  [24608/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.196459  [24640/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.137137  [24672/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.227525  [24704/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.264550  [24736/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.158048  [24768/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.188190  [24800/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.229177  [24832/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.284411  [24864/24872]:   0%|          | 0/777 [00:30<?, ?it/s]
loss: 0.284411  [24864/24872]: 100%|██████████| 777/777 [00:30<00:00, 25.88it/s]
loss: 0.271566  [24872/24872]: 100%|██████████| 777/777 [00:30<00:00, 25.88it/s]
loss: 0.271566  [24872/24872]: : 778it [00:30, 25.89it/s]
Epoch 5, time=120.71s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.233206  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.258621  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.276393  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.194673  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.201848  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.217859  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.227068  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.199223  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.268839  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.221731  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.248601  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.182355  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.250459  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.190718  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.276698  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.124110  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.189507  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.160847  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.212038  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.181564  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.198255  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.259203  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.111003  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.224090  [  768/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.177266  [  800/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.190445  [  832/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.242993  [  864/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.210573  [  896/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.344135  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.187360  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.228025  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.327983  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.258682  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.251038  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.189198  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.295488  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.233714  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.189266  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.173415  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.169474  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.185476  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.184376  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.151111  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.247202  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.266858  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.265350  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.161626  [ 1504/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.227664  [ 1536/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.213821  [ 1568/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.232806  [ 1600/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.235293  [ 1632/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.187719  [ 1664/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.196653  [ 1696/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.223736  [ 1728/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.224148  [ 1760/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.191200  [ 1792/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.200568  [ 1824/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.201222  [ 1856/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.167681  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.212241  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.166586  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.231681  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.190466  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.164494  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.150845  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.157048  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.249790  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.169820  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.159738  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.145509  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.183594  [ 2272/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.325912  [ 2304/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.269687  [ 2336/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.234603  [ 2368/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.133870  [ 2400/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.194484  [ 2432/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.158917  [ 2464/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.307099  [ 2496/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.193230  [ 2528/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.193215  [ 2560/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.262857  [ 2592/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.240179  [ 2624/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.162476  [ 2656/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.242641  [ 2688/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.218907  [ 2720/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.155114  [ 2752/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.170679  [ 2784/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.137928  [ 2816/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.160171  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.138704  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.117383  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.184359  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.339533  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.203084  [ 3008/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.267434  [ 3040/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.189832  [ 3072/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.201068  [ 3104/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.249542  [ 3136/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.204398  [ 3168/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.167639  [ 3200/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.152711  [ 3232/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.170724  [ 3264/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.255646  [ 3296/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.151159  [ 3328/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.181063  [ 3360/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.248015  [ 3392/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.200523  [ 3424/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.222978  [ 3456/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.257025  [ 3488/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.185647  [ 3520/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.192816  [ 3552/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.209165  [ 3584/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.202939  [ 3616/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.292354  [ 3648/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.209780  [ 3680/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.184048  [ 3712/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.221101  [ 3744/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.262611  [ 3776/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.117544  [ 3808/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.243528  [ 3840/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.162204  [ 3872/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.195268  [ 3904/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.221093  [ 3936/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.160597  [ 3968/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.197847  [ 4000/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.182172  [ 4032/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.271458  [ 4064/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.138085  [ 4096/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.150794  [ 4128/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.197260  [ 4160/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.163665  [ 4192/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.218095  [ 4224/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.155924  [ 4256/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.191854  [ 4288/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.215113  [ 4320/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.155478  [ 4352/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.212244  [ 4384/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.172788  [ 4416/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.182679  [ 4448/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.190145  [ 4480/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.197678  [ 4512/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.175027  [ 4544/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.213471  [ 4576/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.208777  [ 4608/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.199208  [ 4640/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.151623  [ 4672/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.137749  [ 4704/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.272650  [ 4736/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.285363  [ 4768/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.147014  [ 4800/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.262820  [ 4832/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.252312  [ 4864/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.173777  [ 4896/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.326897  [ 4928/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.215404  [ 4960/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.211066  [ 4992/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.218804  [ 5024/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.165720  [ 5056/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.158217  [ 5088/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.185786  [ 5120/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.118296  [ 5152/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.309934  [ 5184/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.330247  [ 5216/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.175047  [ 5248/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.220401  [ 5280/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.291733  [ 5312/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.293006  [ 5344/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.200002  [ 5376/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.184022  [ 5408/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.204549  [ 5440/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.176546  [ 5472/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.160829  [ 5504/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.206214  [ 5536/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.191740  [ 5568/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.182832  [ 5600/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.207686  [ 5632/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.193928  [ 5664/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.196905  [ 5696/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.249710  [ 5728/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.224103  [ 5760/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.136488  [ 5792/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.161494  [ 5824/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.217620  [ 5856/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.139654  [ 5888/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.235763  [ 5920/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.186876  [ 5952/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.237890  [ 5984/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.189806  [ 6016/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.274201  [ 6048/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.201073  [ 6080/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.193498  [ 6112/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.215529  [ 6144/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.232927  [ 6176/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.149871  [ 6208/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.183634  [ 6240/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.255455  [ 6272/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.211341  [ 6304/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.183841  [ 6336/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.210324  [ 6368/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.248173  [ 6400/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.142897  [ 6432/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.185033  [ 6464/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.124059  [ 6496/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.148146  [ 6528/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.132022  [ 6560/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.219437  [ 6592/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.197647  [ 6624/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.235821  [ 6656/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.186220  [ 6688/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.161552  [ 6720/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.336767  [ 6752/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.245906  [ 6784/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.238579  [ 6816/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.188588  [ 6848/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.121327  [ 6880/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.137391  [ 6912/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.186201  [ 6944/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.162221  [ 6976/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.233811  [ 7008/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.140166  [ 7040/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.187746  [ 7072/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.183871  [ 7104/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.192281  [ 7136/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.261559  [ 7168/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.193578  [ 7200/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.242208  [ 7232/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.302912  [ 7264/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.277480  [ 7296/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.184862  [ 7328/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.199351  [ 7360/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.210117  [ 7392/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.216937  [ 7424/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.217074  [ 7456/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.162865  [ 7488/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.155257  [ 7520/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.269817  [ 7552/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.191959  [ 7584/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.234562  [ 7616/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.223948  [ 7648/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.196096  [ 7680/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.262502  [ 7712/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.198945  [ 7744/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.170978  [ 7776/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.222793  [ 7808/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.219147  [ 7840/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.190939  [ 7872/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.206654  [ 7904/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.200469  [ 7936/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.154249  [ 7968/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.130021  [ 8000/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.181075  [ 8032/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.244651  [ 8064/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.200634  [ 8096/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.158735  [ 8128/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.232448  [ 8160/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.248202  [ 8192/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.174943  [ 8224/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.205775  [ 8256/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.171924  [ 8288/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.160468  [ 8320/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.130678  [ 8352/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.180597  [ 8384/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.131127  [ 8416/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.106027  [ 8448/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.238019  [ 8480/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.168435  [ 8512/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.153042  [ 8544/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.234819  [ 8576/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.224902  [ 8608/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.144467  [ 8640/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.179814  [ 8672/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.193384  [ 8704/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.159370  [ 8736/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.234367  [ 8768/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.200845  [ 8800/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.168438  [ 8832/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.213478  [ 8864/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.168049  [ 8896/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.179186  [ 8928/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.192634  [ 8960/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.162110  [ 8992/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.197362  [ 9024/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.118652  [ 9056/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.169261  [ 9088/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.188014  [ 9120/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.159523  [ 9152/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.193624  [ 9184/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.271748  [ 9216/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.161237  [ 9248/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.156597  [ 9280/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.173999  [ 9312/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.145518  [ 9344/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.168037  [ 9376/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.201932  [ 9408/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.098486  [ 9440/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.301431  [ 9472/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.108632  [ 9504/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.157847  [ 9536/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.202878  [ 9568/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.224892  [ 9600/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.226033  [ 9632/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.256368  [ 9664/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.191608  [ 9696/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.132255  [ 9728/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.288751  [ 9760/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.139427  [ 9792/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.221301  [ 9824/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.182314  [ 9856/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.209548  [ 9888/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.123355  [ 9920/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.140832  [ 9952/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.234074  [ 9984/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.193815  [10016/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.239873  [10048/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.260571  [10080/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.221429  [10112/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.204015  [10144/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.168900  [10176/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.133377  [10208/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.177272  [10240/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.226287  [10272/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.226697  [10304/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.210892  [10336/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.191371  [10368/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.190663  [10400/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.203171  [10432/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.132466  [10464/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.237565  [10496/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.202346  [10528/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.179914  [10560/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.182099  [10592/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.178405  [10624/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.193487  [10656/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.132987  [10688/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.137511  [10720/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.179021  [10752/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.185262  [10784/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.172182  [10816/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.086019  [10848/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.150152  [10880/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.222288  [10912/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.234249  [10944/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.188362  [10976/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.211582  [11008/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.169553  [11040/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.188811  [11072/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.234151  [11104/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.244542  [11136/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.213653  [11168/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.212798  [11200/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.231179  [11232/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.183667  [11264/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.153425  [11296/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.177173  [11328/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.174315  [11360/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.185205  [11392/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.234958  [11424/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.110748  [11456/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.145334  [11488/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.129545  [11520/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.144108  [11552/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.267095  [11584/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.161932  [11616/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.247894  [11648/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.165382  [11680/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.245783  [11712/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.234879  [11744/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.166632  [11776/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.109464  [11808/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.081730  [11840/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.156624  [11872/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.199886  [11904/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.238378  [11936/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.195844  [11968/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.174939  [12000/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.155742  [12032/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.210753  [12064/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.157194  [12096/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.208251  [12128/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.145814  [12160/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.200806  [12192/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.166892  [12224/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.212506  [12256/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.137881  [12288/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.153235  [12320/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.169851  [12352/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.172650  [12384/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.151797  [12416/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.181804  [12448/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.214999  [12480/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.154592  [12512/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.201619  [12544/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.166199  [12576/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.242886  [12608/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.199201  [12640/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.159333  [12672/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.221088  [12704/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.180027  [12736/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.221953  [12768/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.302542  [12800/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.247981  [12832/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.385218  [12864/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.178091  [12896/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.188457  [12928/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.203068  [12960/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.124555  [12992/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.200916  [13024/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.299114  [13056/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.121975  [13088/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.184549  [13120/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.138529  [13152/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.154927  [13184/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.244248  [13216/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.147047  [13248/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.196898  [13280/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.165049  [13312/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.232817  [13344/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.155593  [13376/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.236873  [13408/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.201620  [13440/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.179941  [13472/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.174272  [13504/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.244948  [13536/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.149596  [13568/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.247597  [13600/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.133886  [13632/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.222305  [13664/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.133465  [13696/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.137654  [13728/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.224760  [13760/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.168710  [13792/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.131185  [13824/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.293236  [13856/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.172271  [13888/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.149675  [13920/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.102904  [13952/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.150100  [13984/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.165364  [14016/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.231364  [14048/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.186322  [14080/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.192337  [14112/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.184259  [14144/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.178602  [14176/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.119341  [14208/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.224350  [14240/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.298413  [14272/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.174250  [14304/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.255381  [14336/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.189549  [14368/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.167227  [14400/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.235507  [14432/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.318511  [14464/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.163982  [14496/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.193184  [14528/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.192138  [14560/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.192992  [14592/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.253502  [14624/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.270729  [14656/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.266743  [14688/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.203093  [14720/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.241453  [14752/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.228604  [14784/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.253045  [14816/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.259810  [14848/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.192195  [14880/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.151924  [14912/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.138547  [14944/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.174856  [14976/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.149429  [15008/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.283451  [15040/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.177484  [15072/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.229648  [15104/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.092944  [15136/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.174228  [15168/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.122322  [15200/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.226264  [15232/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.205465  [15264/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.208147  [15296/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.184908  [15328/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.164990  [15360/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.180725  [15392/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.207951  [15424/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.191658  [15456/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.150113  [15488/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.140736  [15520/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.143482  [15552/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.169229  [15584/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.216333  [15616/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.196053  [15648/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.195005  [15680/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.268216  [15712/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.235173  [15744/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.159525  [15776/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.175682  [15808/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.228649  [15840/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.116974  [15872/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.145758  [15904/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.241501  [15936/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.284646  [15968/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.305428  [16000/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.307109  [16032/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.221501  [16064/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.154970  [16096/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.204310  [16128/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.295478  [16160/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.210255  [16192/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.220052  [16224/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.207307  [16256/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.234152  [16288/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.251741  [16320/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.309285  [16352/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.213651  [16384/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.226302  [16416/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.155270  [16448/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.209091  [16480/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.272056  [16512/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.192990  [16544/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.134530  [16576/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.169888  [16608/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.205770  [16640/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.194505  [16672/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.137042  [16704/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.178051  [16736/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.182897  [16768/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.162212  [16800/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.145214  [16832/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.141534  [16864/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.179231  [16896/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.220092  [16928/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.127978  [16960/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.188809  [16992/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.257210  [17024/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.164662  [17056/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.230094  [17088/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.204426  [17120/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.216686  [17152/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.232734  [17184/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.303735  [17216/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.211262  [17248/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.269967  [17280/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.221569  [17312/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.180099  [17344/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.270592  [17376/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.118489  [17408/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.183192  [17440/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.084143  [17472/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.209818  [17504/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.188097  [17536/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.196062  [17568/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.258091  [17600/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.226039  [17632/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.133051  [17664/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.228244  [17696/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.214558  [17728/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.189740  [17760/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.181348  [17792/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.188545  [17824/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.272805  [17856/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.267767  [17888/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.184821  [17920/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.187446  [17952/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.284621  [17984/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.165399  [18016/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.205340  [18048/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.239733  [18080/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.168659  [18112/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.169819  [18144/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.199825  [18176/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.206665  [18208/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.214198  [18240/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.255078  [18272/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.206697  [18304/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.130244  [18336/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.198272  [18368/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.164746  [18400/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.200778  [18432/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.171386  [18464/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.183316  [18496/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.210208  [18528/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.251276  [18560/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.190771  [18592/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.174096  [18624/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.197835  [18656/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.148801  [18688/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.246425  [18720/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.257543  [18752/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.245152  [18784/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.164152  [18816/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.264096  [18848/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.165396  [18880/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.178622  [18912/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.224972  [18944/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.294432  [18976/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.208952  [19008/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.160500  [19040/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.239479  [19072/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.216942  [19104/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.257887  [19136/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.222595  [19168/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.162218  [19200/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.190914  [19232/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.116624  [19264/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.243147  [19296/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.246491  [19328/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.207272  [19360/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.166886  [19392/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.156365  [19424/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.227595  [19456/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.185213  [19488/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.230692  [19520/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.152432  [19552/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.202892  [19584/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.159317  [19616/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.148466  [19648/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.132041  [19680/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.168578  [19712/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.151555  [19744/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.158480  [19776/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.230343  [19808/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.154227  [19840/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.208698  [19872/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.194789  [19904/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.228631  [19936/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.217319  [19968/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.234594  [20000/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.227975  [20032/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.185581  [20064/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.193243  [20096/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.163276  [20128/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.215031  [20160/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.175012  [20192/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.227982  [20224/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.204107  [20256/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.178493  [20288/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.217438  [20320/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.273382  [20352/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.186332  [20384/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.164271  [20416/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.249008  [20448/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.215631  [20480/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.167602  [20512/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.227261  [20544/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.133664  [20576/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.138515  [20608/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.204408  [20640/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.166779  [20672/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.126932  [20704/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.228774  [20736/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.133187  [20768/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.239437  [20800/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.235246  [20832/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.161889  [20864/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.204874  [20896/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.250560  [20928/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.246691  [20960/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.158408  [20992/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.216442  [21024/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.207868  [21056/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.125384  [21088/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.166913  [21120/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.227889  [21152/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.187356  [21184/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.185138  [21216/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.173837  [21248/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.217427  [21280/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.208266  [21312/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.159975  [21344/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.162028  [21376/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.190348  [21408/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.191888  [21440/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.181129  [21472/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.163488  [21504/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.155848  [21536/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.117109  [21568/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.176323  [21600/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.248998  [21632/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.212577  [21664/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.112850  [21696/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.185293  [21728/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.188729  [21760/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.140533  [21792/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.205998  [21824/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.144775  [21856/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.153138  [21888/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.259005  [21920/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.101546  [21952/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.205449  [21984/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.280236  [22016/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.169710  [22048/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.177092  [22080/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.214072  [22112/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.155382  [22144/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.145285  [22176/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.195767  [22208/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.205023  [22240/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.117348  [22272/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.171441  [22304/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.222243  [22336/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.205768  [22368/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.215605  [22400/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.200102  [22432/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.166611  [22464/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.141134  [22496/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.190753  [22528/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.194594  [22560/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.235279  [22592/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.202312  [22624/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.148384  [22656/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.236691  [22688/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.256000  [22720/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.153216  [22752/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.240532  [22784/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.186745  [22816/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.168194  [22848/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.129337  [22880/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.281054  [22912/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.230663  [22944/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.267364  [22976/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.170600  [23008/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.212105  [23040/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.113639  [23072/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.214995  [23104/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.205297  [23136/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.176786  [23168/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.187279  [23200/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.248516  [23232/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.217622  [23264/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.152960  [23296/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.154062  [23328/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.274431  [23360/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.236455  [23392/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.134992  [23424/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.131515  [23456/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.230264  [23488/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.194307  [23520/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.186691  [23552/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.156687  [23584/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.194172  [23616/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.280358  [23648/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.206794  [23680/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.144651  [23712/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.252547  [23744/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.147897  [23776/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.151195  [23808/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.232495  [23840/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.294899  [23872/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.166862  [23904/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.214978  [23936/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.238441  [23968/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.124222  [24000/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.226068  [24032/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.182080  [24064/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.266394  [24096/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.218482  [24128/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.161441  [24160/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.106893  [24192/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.205305  [24224/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.226584  [24256/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.160835  [24288/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.143314  [24320/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.112701  [24352/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.185692  [24384/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.165173  [24416/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.210892  [24448/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.170234  [24480/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.152281  [24512/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.206369  [24544/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.159857  [24576/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.198994  [24608/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.169750  [24640/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.107735  [24672/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.182742  [24704/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.217402  [24736/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.131530  [24768/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.146410  [24800/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.196665  [24832/24872]:   0%|          | 0/777 [00:30<?, ?it/s]
loss: 0.196665  [24832/24872]: 100%|█████████▉| 776/777 [00:30<00:00, 25.85it/s]
loss: 0.255172  [24864/24872]: 100%|█████████▉| 776/777 [00:30<00:00, 25.85it/s]
loss: 0.216973  [24872/24872]: 100%|█████████▉| 776/777 [00:30<00:00, 25.85it/s]
loss: 0.216973  [24872/24872]: : 778it [00:30, 25.86it/s]
-------------------------------
LR=0.0001, batch_size=64
-------------------------------
Epoch 1, time=150.80s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.224360  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.323039  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.190750  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.225940  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.279759  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.207430  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.219011  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.189572  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.198800  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.212103  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.199084  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.143425  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.202329  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.201144  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.278848  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.287935  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.225810  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.189680  [ 1152/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.194416  [ 1216/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.174352  [ 1280/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.170757  [ 1344/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.184881  [ 1408/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.236444  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.177214  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.211490  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.190419  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.202995  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.191653  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.190382  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.190233  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.186161  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.166626  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.137072  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.190475  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.133974  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.248272  [ 2304/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.225604  [ 2368/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.143382  [ 2432/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.229768  [ 2496/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.170291  [ 2560/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.237875  [ 2624/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.186834  [ 2688/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.152934  [ 2752/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.140209  [ 2816/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.147356  [ 2880/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.139527  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.250241  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.216285  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.219142  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.166835  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.161717  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.170390  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.199549  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.175983  [ 3456/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.203124  [ 3520/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.183599  [ 3584/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.227468  [ 3648/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.177053  [ 3712/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.201257  [ 3776/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.162824  [ 3840/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.170623  [ 3904/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.177072  [ 3968/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.176337  [ 4032/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.177763  [ 4096/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.157542  [ 4160/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.174613  [ 4224/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.153965  [ 4288/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.168832  [ 4352/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.179650  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.144315  [ 4480/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.153421  [ 4544/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.195390  [ 4608/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.166887  [ 4672/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.193653  [ 4736/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.177502  [ 4800/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.191702  [ 4864/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.164308  [ 4928/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.164071  [ 4992/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.138598  [ 5056/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.130634  [ 5120/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.176894  [ 5184/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.213576  [ 5248/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.222879  [ 5312/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.238797  [ 5376/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.162734  [ 5440/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.136376  [ 5504/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.208482  [ 5568/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.153024  [ 5632/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.178774  [ 5696/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.231541  [ 5760/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.132914  [ 5824/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.156069  [ 5888/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.179936  [ 5952/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.176761  [ 6016/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.226102  [ 6080/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.178492  [ 6144/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.162314  [ 6208/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.192901  [ 6272/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.172536  [ 6336/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.206220  [ 6400/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.127297  [ 6464/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.115378  [ 6528/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.170496  [ 6592/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.186843  [ 6656/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.161332  [ 6720/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.232961  [ 6784/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.203176  [ 6848/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.113072  [ 6912/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.149510  [ 6976/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.143012  [ 7040/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.151033  [ 7104/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.179897  [ 7168/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.160731  [ 7232/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.226462  [ 7296/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.156292  [ 7360/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.172920  [ 7424/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.147676  [ 7488/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.170439  [ 7552/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.173801  [ 7616/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.161572  [ 7680/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.183345  [ 7744/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.183384  [ 7808/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.166144  [ 7872/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.191970  [ 7936/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.121947  [ 8000/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.194928  [ 8064/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.162115  [ 8128/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.204189  [ 8192/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.159439  [ 8256/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.152431  [ 8320/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.142132  [ 8384/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.099776  [ 8448/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.174275  [ 8512/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.192744  [ 8576/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.178259  [ 8640/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.165728  [ 8704/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.179925  [ 8768/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.164607  [ 8832/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.176960  [ 8896/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.168741  [ 8960/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.152540  [ 9024/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.114541  [ 9088/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.140188  [ 9152/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.221841  [ 9216/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.132101  [ 9280/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.132016  [ 9344/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.179839  [ 9408/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.187727  [ 9472/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.122218  [ 9536/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.206027  [ 9600/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.212121  [ 9664/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.132302  [ 9728/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.182682  [ 9792/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.179198  [ 9856/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.133305  [ 9920/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.162358  [ 9984/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.175327  [10048/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.204775  [10112/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.140491  [10176/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.132374  [10240/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.218994  [10304/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.188614  [10368/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.191047  [10432/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.156780  [10496/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.174080  [10560/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.157912  [10624/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.126018  [10688/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.122821  [10752/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.141505  [10816/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.112079  [10880/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.196478  [10944/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.147442  [11008/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.152328  [11072/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.248258  [11136/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.196522  [11200/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.175320  [11264/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.167407  [11328/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.225273  [11392/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.165330  [11456/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.123280  [11520/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.149554  [11584/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.198325  [11648/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.182304  [11712/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.181232  [11776/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.086646  [11840/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.154604  [11904/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.214817  [11968/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.147585  [12032/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.162705  [12096/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.161913  [12160/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.183116  [12224/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.163328  [12288/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.150311  [12352/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.150006  [12416/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.189431  [12480/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.137822  [12544/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.189380  [12608/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.139922  [12672/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.168554  [12736/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.256704  [12800/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.282787  [12864/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.154425  [12928/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.141428  [12992/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.191503  [13056/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.120371  [13120/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.130635  [13184/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.157577  [13248/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.154891  [13312/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.159433  [13376/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.197924  [13440/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.161894  [13504/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.175019  [13568/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.153268  [13632/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.135390  [13696/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.145209  [13760/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.138479  [13824/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.216883  [13888/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.125336  [13952/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.137018  [14016/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.185612  [14080/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.207235  [14144/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.141058  [14208/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.209394  [14272/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.183218  [14336/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.161921  [14400/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.228959  [14464/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.143901  [14528/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.176258  [14592/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.210122  [14656/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.176039  [14720/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.195808  [14784/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.213096  [14848/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.153247  [14912/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.141506  [14976/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.156503  [15040/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.202480  [15104/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.114792  [15168/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.136443  [15232/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.177266  [15296/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.152384  [15360/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.180410  [15424/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.160191  [15488/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.142647  [15552/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.160101  [15616/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.202719  [15680/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.221910  [15744/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.152951  [15808/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.184534  [15872/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.154752  [15936/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.257908  [16000/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.223201  [16064/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.164220  [16128/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.218975  [16192/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.233200  [16256/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.154870  [16320/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.203559  [16384/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.172401  [16448/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.188548  [16512/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.146820  [16576/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.160012  [16640/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.148431  [16704/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.149766  [16768/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.130879  [16832/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.146067  [16896/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.149214  [16960/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.214894  [17024/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.172393  [17088/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.153055  [17152/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.207805  [17216/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.204180  [17280/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.172904  [17344/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.164359  [17408/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.097903  [17472/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.174202  [17536/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.207797  [17600/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.135967  [17664/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.205379  [17728/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.174282  [17792/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.181347  [17856/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.186443  [17920/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.206723  [17984/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.142670  [18048/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.154824  [18112/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.112803  [18176/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.169060  [18240/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.171692  [18304/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.143413  [18368/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.133633  [18432/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.133082  [18496/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.194732  [18560/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.172540  [18624/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.152692  [18688/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.219232  [18752/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.199225  [18816/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.197486  [18880/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.191449  [18944/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.245302  [19008/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.175047  [19072/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.195897  [19136/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.155798  [19200/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.114815  [19264/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.215207  [19328/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.171827  [19392/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.170525  [19456/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.183622  [19520/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.151809  [19584/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.136131  [19648/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.130690  [19712/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.131401  [19776/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.133821  [19840/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.150408  [19904/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.173449  [19968/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.215200  [20032/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.159150  [20096/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.161196  [20160/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.144254  [20224/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.154086  [20288/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.196807  [20352/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.163817  [20416/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.209115  [20480/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.169166  [20544/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.122847  [20608/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.165517  [20672/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.184950  [20736/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.161155  [20800/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.158213  [20864/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.189552  [20928/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.169353  [20992/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.186530  [21056/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.137857  [21120/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.177191  [21184/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.188283  [21248/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.174154  [21312/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.130111  [21376/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.209610  [21440/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.144459  [21504/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.123016  [21568/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.210215  [21632/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.150610  [21696/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.161301  [21760/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.181237  [21824/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.132175  [21888/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.162137  [21952/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.190258  [22016/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.163881  [22080/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.171392  [22144/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.149636  [22208/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.130584  [22272/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.174209  [22336/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.182838  [22400/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.166563  [22464/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.151710  [22528/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.170099  [22592/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.148597  [22656/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.210671  [22720/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.156423  [22784/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.159811  [22848/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.173755  [22912/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.204628  [22976/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.167357  [23040/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.136300  [23104/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.177461  [23168/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.213785  [23232/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.177689  [23296/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.175117  [23360/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.188008  [23424/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.157386  [23488/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.177400  [23552/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.144238  [23616/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.174391  [23680/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.168405  [23744/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.144638  [23808/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.219659  [23872/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.155970  [23936/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.167313  [24000/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.174153  [24064/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.238840  [24128/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.122745  [24192/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.191032  [24256/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.154806  [24320/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.148528  [24384/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.189236  [24448/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.143329  [24512/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.152824  [24576/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.156312  [24640/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.136785  [24704/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.184824  [24768/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.153393  [24832/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.241909  [24872/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.241909  [24872/24872]: : 389it [00:19, 19.68it/s]
Epoch 2, time=170.57s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.226799  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.187878  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.144602  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.148643  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.200000  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.154045  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.170625  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.154905  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.135267  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.143953  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.142777  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.106687  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.161049  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.173789  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.218005  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.220708  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.183077  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.180435  [ 1152/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.162046  [ 1216/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.134629  [ 1280/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.138817  [ 1344/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.173437  [ 1408/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.220060  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.164721  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.203289  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.165449  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.182053  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.164631  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.157082  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.168437  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.168236  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.144759  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.119594  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.181881  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.141777  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.206312  [ 2304/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.192824  [ 2368/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.135459  [ 2432/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.208792  [ 2496/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.150609  [ 2560/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.226457  [ 2624/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.164603  [ 2688/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.145639  [ 2752/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.125749  [ 2816/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.126718  [ 2880/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.120909  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.225633  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.201444  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.195915  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.159526  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.128119  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.166084  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.194094  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.165781  [ 3456/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.185343  [ 3520/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.180538  [ 3584/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.203624  [ 3648/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.176657  [ 3712/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.166355  [ 3776/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.148900  [ 3840/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.150467  [ 3904/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.162308  [ 3968/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.160817  [ 4032/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.162048  [ 4096/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.167945  [ 4160/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.161278  [ 4224/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.147031  [ 4288/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.159202  [ 4352/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.164027  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.142277  [ 4480/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.145253  [ 4544/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.195724  [ 4608/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.136390  [ 4672/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.177245  [ 4736/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.187914  [ 4800/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.177502  [ 4864/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.181309  [ 4928/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.154747  [ 4992/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.135610  [ 5056/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.127553  [ 5120/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.151507  [ 5184/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.197644  [ 5248/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.221854  [ 5312/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.202248  [ 5376/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.163179  [ 5440/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.122386  [ 5504/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.182350  [ 5568/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.141978  [ 5632/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.176550  [ 5696/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.216532  [ 5760/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.124710  [ 5824/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.141453  [ 5888/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.156820  [ 5952/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.170421  [ 6016/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.204202  [ 6080/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.155012  [ 6144/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.166308  [ 6208/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.173587  [ 6272/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.170378  [ 6336/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.186004  [ 6400/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.129353  [ 6464/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.106033  [ 6528/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.159678  [ 6592/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.193134  [ 6656/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.157285  [ 6720/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.247357  [ 6784/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.192265  [ 6848/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.125424  [ 6912/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.147326  [ 6976/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.144184  [ 7040/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.161939  [ 7104/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.188136  [ 7168/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.152200  [ 7232/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.215624  [ 7296/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.154433  [ 7360/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.177737  [ 7424/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.146678  [ 7488/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.154517  [ 7552/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.171650  [ 7616/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.154262  [ 7680/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.190648  [ 7744/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.176600  [ 7808/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.155068  [ 7872/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.169365  [ 7936/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.129715  [ 8000/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.174526  [ 8064/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.134637  [ 8128/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.191576  [ 8192/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.148169  [ 8256/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.149556  [ 8320/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.157931  [ 8384/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.093966  [ 8448/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.180294  [ 8512/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.184902  [ 8576/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.172712  [ 8640/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.163132  [ 8704/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.158690  [ 8768/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.160100  [ 8832/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.171292  [ 8896/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.157576  [ 8960/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.144583  [ 9024/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.108323  [ 9088/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.129249  [ 9152/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.215228  [ 9216/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.119184  [ 9280/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.132360  [ 9344/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.165002  [ 9408/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.169761  [ 9472/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.115117  [ 9536/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.198809  [ 9600/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.202534  [ 9664/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.123661  [ 9728/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.179837  [ 9792/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.162333  [ 9856/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.127505  [ 9920/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.143932  [ 9984/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.156314  [10048/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.189347  [10112/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.132438  [10176/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.125775  [10240/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.189900  [10304/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.166109  [10368/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.184930  [10432/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.148668  [10496/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.159025  [10560/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.150914  [10624/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.111424  [10688/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.119995  [10752/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.136176  [10816/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.100914  [10880/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.178061  [10944/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.143903  [11008/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.153204  [11072/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.236568  [11136/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.201985  [11200/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.181268  [11264/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.162267  [11328/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.202578  [11392/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.167023  [11456/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.120250  [11520/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.156881  [11584/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.180343  [11648/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.175531  [11712/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.187557  [11776/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.098569  [11840/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.153132  [11904/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.195393  [11968/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.148840  [12032/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.164808  [12096/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.161619  [12160/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.168844  [12224/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.151564  [12288/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.155774  [12352/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.142678  [12416/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.182943  [12480/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.149764  [12544/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.164439  [12608/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.145075  [12672/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.161759  [12736/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.251630  [12800/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.277185  [12864/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.135158  [12928/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.145865  [12992/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.185770  [13056/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.116555  [13120/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.117859  [13184/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.136398  [13248/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.154156  [13312/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.146510  [13376/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.178095  [13440/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.145922  [13504/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.163031  [13568/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.146102  [13632/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.155185  [13696/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.133546  [13760/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.121210  [13824/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.221343  [13888/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.116830  [13952/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.131677  [14016/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.175460  [14080/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.168574  [14144/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.133466  [14208/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.235749  [14272/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.169001  [14336/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.161939  [14400/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.228464  [14464/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.128721  [14528/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.167961  [14592/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.208481  [14656/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.198700  [14720/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.182050  [14784/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.206637  [14848/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.151062  [14912/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.139211  [14976/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.147080  [15040/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.164208  [15104/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.125194  [15168/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.120329  [15232/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.154805  [15296/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.144412  [15360/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.163867  [15424/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.153338  [15488/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.121562  [15552/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.146468  [15616/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.191778  [15680/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.207597  [15744/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.141808  [15808/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.165564  [15872/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.166773  [15936/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.239412  [16000/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.224928  [16064/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.144673  [16128/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.196748  [16192/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.204630  [16256/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.145992  [16320/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.168278  [16384/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.180100  [16448/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.145121  [16512/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.140215  [16576/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.139517  [16640/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.153253  [16704/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.159300  [16768/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.117411  [16832/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.136581  [16896/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.136242  [16960/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.205138  [17024/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.169928  [17088/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.151294  [17152/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.214047  [17216/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.207689  [17280/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.168499  [17344/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.156172  [17408/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.116773  [17472/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.165303  [17536/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.204161  [17600/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.133709  [17664/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.195396  [17728/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.157886  [17792/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.190109  [17856/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.182625  [17920/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.181604  [17984/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.126499  [18048/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.164775  [18112/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.101791  [18176/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.174155  [18240/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.162327  [18304/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.133615  [18368/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.123758  [18432/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.128145  [18496/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.185884  [18560/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.141657  [18624/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.146851  [18688/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.184281  [18752/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.169481  [18816/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.177592  [18880/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.173552  [18944/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.206248  [19008/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.145918  [19072/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.187751  [19136/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.136625  [19200/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.112054  [19264/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.200558  [19328/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.156100  [19392/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.159957  [19456/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.158014  [19520/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.136850  [19584/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.132921  [19648/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.116274  [19712/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.115922  [19776/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.125142  [19840/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.154143  [19904/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.147837  [19968/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.180672  [20032/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.130812  [20096/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.153992  [20160/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.127256  [20224/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.147266  [20288/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.178296  [20352/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.140929  [20416/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.199337  [20480/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.158471  [20544/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.103676  [20608/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.137047  [20672/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.133693  [20736/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.149399  [20800/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.152236  [20864/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.160356  [20928/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.149296  [20992/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.160830  [21056/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.123169  [21120/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.162612  [21184/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.148859  [21248/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.148455  [21312/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.119468  [21376/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.164758  [21440/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.134797  [21504/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.115303  [21568/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.176266  [21632/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.142413  [21696/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.126265  [21760/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.160067  [21824/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.120783  [21888/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.152982  [21952/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.200937  [22016/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.149153  [22080/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.164500  [22144/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.139551  [22208/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.118039  [22272/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.168935  [22336/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.172481  [22400/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.156437  [22464/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.144541  [22528/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.160987  [22592/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.147471  [22656/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.191361  [22720/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.155541  [22784/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.142159  [22848/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.167109  [22912/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.211038  [22976/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.163749  [23040/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.133903  [23104/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.166767  [23168/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.174875  [23232/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.187595  [23296/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.187225  [23360/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.195871  [23424/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.143733  [23488/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.164157  [23552/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.134785  [23616/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.171393  [23680/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.160308  [23744/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.128884  [23808/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.201262  [23872/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.169319  [23936/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.143179  [24000/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.165489  [24064/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.209429  [24128/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.112110  [24192/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.189885  [24256/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.132037  [24320/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.141500  [24384/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.174399  [24448/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.128585  [24512/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.149247  [24576/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.149666  [24640/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.118593  [24704/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.177663  [24768/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.145794  [24832/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.209816  [24872/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.209816  [24872/24872]: : 389it [00:19, 19.67it/s]
Epoch 3, time=190.35s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.206027  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.180794  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.140994  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.131737  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.179276  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.156884  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.162523  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.153564  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.116056  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.133208  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.131977  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.106940  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.154130  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.163505  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.195965  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.198227  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.179498  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.182055  [ 1152/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.147770  [ 1216/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.144783  [ 1280/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.133786  [ 1344/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.170196  [ 1408/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.204068  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.144919  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.193744  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.183378  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.171403  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.158234  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.146673  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.165687  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.178710  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.136244  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.127917  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.169856  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.143453  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.227760  [ 2304/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.192959  [ 2368/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.123977  [ 2432/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.216582  [ 2496/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.151833  [ 2560/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.223845  [ 2624/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.157087  [ 2688/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.143332  [ 2752/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.124457  [ 2816/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.122905  [ 2880/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.116535  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.202473  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.173806  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.188288  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.173013  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.111817  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.169943  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.183341  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.167881  [ 3456/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.183239  [ 3520/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.161780  [ 3584/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.211802  [ 3648/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.161469  [ 3712/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.169090  [ 3776/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.147819  [ 3840/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.151974  [ 3904/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.161644  [ 3968/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.163032  [ 4032/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.143958  [ 4096/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.153267  [ 4160/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.150554  [ 4224/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.143473  [ 4288/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.151724  [ 4352/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.149971  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.116367  [ 4480/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.143895  [ 4544/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.176619  [ 4608/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.130117  [ 4672/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.160976  [ 4736/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.164846  [ 4800/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.165687  [ 4864/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.162040  [ 4928/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.128236  [ 4992/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.119026  [ 5056/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.114617  [ 5120/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.126053  [ 5184/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.191408  [ 5248/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.204731  [ 5312/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.195586  [ 5376/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.137357  [ 5440/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.124712  [ 5504/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.168411  [ 5568/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.130790  [ 5632/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.171782  [ 5696/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.202323  [ 5760/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.111576  [ 5824/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.134205  [ 5888/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.150850  [ 5952/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.180527  [ 6016/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.208757  [ 6080/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.151671  [ 6144/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.191146  [ 6208/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.166334  [ 6272/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.173861  [ 6336/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.187861  [ 6400/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.135059  [ 6464/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.107572  [ 6528/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.155338  [ 6592/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.209040  [ 6656/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.159607  [ 6720/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.211057  [ 6784/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.200175  [ 6848/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.102540  [ 6912/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.125394  [ 6976/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.123654  [ 7040/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.128174  [ 7104/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.180673  [ 7168/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.134100  [ 7232/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.187987  [ 7296/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.144358  [ 7360/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.151759  [ 7424/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.129266  [ 7488/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.145720  [ 7552/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.154671  [ 7616/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.146875  [ 7680/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.158419  [ 7744/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.149844  [ 7808/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.145972  [ 7872/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.158638  [ 7936/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.111827  [ 8000/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.176323  [ 8064/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.136535  [ 8128/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.182433  [ 8192/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.143652  [ 8256/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.130685  [ 8320/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.158764  [ 8384/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.087103  [ 8448/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.164728  [ 8512/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.171069  [ 8576/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.147237  [ 8640/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.142313  [ 8704/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.142536  [ 8768/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.141803  [ 8832/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.163896  [ 8896/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.147824  [ 8960/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.142305  [ 9024/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.105704  [ 9088/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.119391  [ 9152/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.204849  [ 9216/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.114505  [ 9280/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.119475  [ 9344/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.142476  [ 9408/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.164750  [ 9472/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.111940  [ 9536/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.212296  [ 9600/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.213849  [ 9664/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.123209  [ 9728/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.184795  [ 9792/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.158791  [ 9856/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.113903  [ 9920/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.135907  [ 9984/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.143716  [10048/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.189690  [10112/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.141338  [10176/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.116387  [10240/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.186321  [10304/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.162608  [10368/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.175968  [10432/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.146677  [10496/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.166662  [10560/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.149494  [10624/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.120758  [10688/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.107652  [10752/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.139317  [10816/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.085540  [10880/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.161919  [10944/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.142200  [11008/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.133960  [11072/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.229635  [11136/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.164184  [11200/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.154168  [11264/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.141060  [11328/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.140457  [11392/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.154578  [11456/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.106817  [11520/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.132699  [11584/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.165563  [11648/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.157725  [11712/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.153186  [11776/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.091269  [11840/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.147011  [11904/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.191594  [11968/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.138175  [12032/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.157523  [12096/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.156105  [12160/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.161984  [12224/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.160165  [12288/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.130389  [12352/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.151487  [12416/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.158234  [12480/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.128533  [12544/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.187926  [12608/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.120798  [12672/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.169885  [12736/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.227553  [12800/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.282292  [12864/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.142160  [12928/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.145571  [12992/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.155422  [13056/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.107414  [13120/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.121735  [13184/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.141349  [13248/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.141302  [13312/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.147496  [13376/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.172765  [13440/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.141679  [13504/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.146692  [13568/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.132887  [13632/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.135937  [13696/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.116835  [13760/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.120624  [13824/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.198283  [13888/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.132885  [13952/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.117381  [14016/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.180843  [14080/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.187623  [14144/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.125906  [14208/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.188394  [14272/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.154369  [14336/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.149475  [14400/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.203875  [14464/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.116340  [14528/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.157525  [14592/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.178755  [14656/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.166000  [14720/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.136212  [14784/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.202743  [14848/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.137605  [14912/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.133220  [14976/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.143586  [15040/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.195938  [15104/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.106739  [15168/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.114784  [15232/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.136906  [15296/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.130520  [15360/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.152103  [15424/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.127074  [15488/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.113690  [15552/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.147294  [15616/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.158773  [15680/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.194566  [15744/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.140268  [15808/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.149309  [15872/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.132594  [15936/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.207410  [16000/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.214523  [16064/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.128642  [16128/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.195911  [16192/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.201321  [16256/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.158008  [16320/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.204614  [16384/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.178725  [16448/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.169278  [16512/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.119658  [16576/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.178706  [16640/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.139916  [16704/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.172673  [16768/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.123599  [16832/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.138454  [16896/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.141773  [16960/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.202578  [17024/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.173637  [17088/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.141359  [17152/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.196235  [17216/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.183284  [17280/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.144667  [17344/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.205074  [17408/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.082330  [17472/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.165178  [17536/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.197176  [17600/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.123684  [17664/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.192654  [17728/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.140254  [17792/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.172342  [17856/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.184116  [17920/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.156495  [17984/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.141321  [18048/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.131972  [18112/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.089105  [18176/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.165237  [18240/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.168137  [18304/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.145080  [18368/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.117414  [18432/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.115390  [18496/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.175289  [18560/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.137256  [18624/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.128882  [18688/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.167518  [18752/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.161536  [18816/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.167744  [18880/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.187447  [18944/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.229147  [19008/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.138312  [19072/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.187203  [19136/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.136326  [19200/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.103065  [19264/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.209142  [19328/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.146267  [19392/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.193814  [19456/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.165298  [19520/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.170703  [19584/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.138932  [19648/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.115036  [19712/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.144117  [19776/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.116961  [19840/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.139123  [19904/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.177687  [19968/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.187834  [20032/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.154476  [20096/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.159234  [20160/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.126265  [20224/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.142430  [20288/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.183475  [20352/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.141796  [20416/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.228318  [20480/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.156066  [20544/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.103413  [20608/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.144426  [20672/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.137436  [20736/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.153311  [20800/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.144123  [20864/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.168285  [20928/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.159732  [20992/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.158850  [21056/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.118900  [21120/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.165070  [21184/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.154468  [21248/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.133760  [21312/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.121980  [21376/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.154454  [21440/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.129338  [21504/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.111628  [21568/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.160826  [21632/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.146593  [21696/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.149637  [21760/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.156634  [21824/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.109792  [21888/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.137293  [21952/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.186173  [22016/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.142718  [22080/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.143083  [22144/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.141821  [22208/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.113134  [22272/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.160680  [22336/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.163181  [22400/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.147314  [22464/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.180894  [22528/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.156230  [22592/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.147931  [22656/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.208664  [22720/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.148777  [22784/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.154222  [22848/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.160296  [22912/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.194357  [22976/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.157241  [23040/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.124670  [23104/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.145566  [23168/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.154838  [23232/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.153552  [23296/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.169921  [23360/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.165200  [23424/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.131358  [23488/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.171010  [23552/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.127431  [23616/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.152758  [23680/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.171831  [23744/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.133299  [23808/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.181328  [23872/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.134829  [23936/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.145522  [24000/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.152938  [24064/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.194631  [24128/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.108033  [24192/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.172032  [24256/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.116078  [24320/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.146246  [24384/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.152971  [24448/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.152410  [24512/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.136519  [24576/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.136574  [24640/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.110985  [24704/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.145889  [24768/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.143266  [24832/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.189865  [24872/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.189865  [24872/24872]: : 389it [00:19, 19.61it/s]
Epoch 4, time=210.19s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.177299  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.162023  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.139654  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.128009  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.145208  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.121951  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.153169  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.135642  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.104632  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.116182  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.126537  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.110396  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.143564  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.148143  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.177973  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.172165  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.151403  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.138500  [ 1152/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.127265  [ 1216/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.115336  [ 1280/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.122609  [ 1344/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.121698  [ 1408/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.161800  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.123306  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.144031  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.128414  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.145805  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.117666  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.142098  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.159829  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.138032  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.135851  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.113937  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.159291  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.127265  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.179529  [ 2304/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.189891  [ 2368/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.122436  [ 2432/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.176811  [ 2496/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.148169  [ 2560/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.186356  [ 2624/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.146478  [ 2688/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.136515  [ 2752/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.121455  [ 2816/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.142430  [ 2880/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.093808  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.183796  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.168618  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.175940  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.170293  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.103574  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.153979  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.180057  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.149761  [ 3456/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.168483  [ 3520/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.164021  [ 3584/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.197261  [ 3648/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.146819  [ 3712/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.156877  [ 3776/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.147777  [ 3840/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.160353  [ 3904/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.167400  [ 3968/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.151770  [ 4032/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.134435  [ 4096/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.162149  [ 4160/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.159360  [ 4224/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.162378  [ 4288/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.134942  [ 4352/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.148636  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.139209  [ 4480/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.148910  [ 4544/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.169858  [ 4608/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.112086  [ 4672/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.145567  [ 4736/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.161064  [ 4800/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.173965  [ 4864/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.127340  [ 4928/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.121210  [ 4992/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.121202  [ 5056/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.108737  [ 5120/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.128442  [ 5184/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.181385  [ 5248/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.190934  [ 5312/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.192495  [ 5376/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.125518  [ 5440/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.117985  [ 5504/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.165682  [ 5568/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.138214  [ 5632/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.175239  [ 5696/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.207018  [ 5760/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.115644  [ 5824/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.133798  [ 5888/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.124284  [ 5952/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.158988  [ 6016/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.185467  [ 6080/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.142046  [ 6144/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.152819  [ 6208/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.147047  [ 6272/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.153249  [ 6336/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.176597  [ 6400/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.125866  [ 6464/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.094016  [ 6528/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.153964  [ 6592/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.178736  [ 6656/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.135971  [ 6720/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.206883  [ 6784/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.190519  [ 6848/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.095379  [ 6912/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.129533  [ 6976/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.117825  [ 7040/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.106099  [ 7104/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.165062  [ 7168/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.121313  [ 7232/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.171762  [ 7296/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.126966  [ 7360/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.131942  [ 7424/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.116674  [ 7488/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.125804  [ 7552/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.139603  [ 7616/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.134550  [ 7680/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.162063  [ 7744/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.140025  [ 7808/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.141293  [ 7872/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.153931  [ 7936/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.099857  [ 8000/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.190518  [ 8064/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.141292  [ 8128/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.174100  [ 8192/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.159306  [ 8256/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.127917  [ 8320/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.132598  [ 8384/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.076404  [ 8448/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.145792  [ 8512/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.163500  [ 8576/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.159618  [ 8640/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.143509  [ 8704/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.137205  [ 8768/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.132493  [ 8832/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.160607  [ 8896/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.132648  [ 8960/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.131583  [ 9024/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.099921  [ 9088/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.151193  [ 9152/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.185983  [ 9216/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.100245  [ 9280/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.110567  [ 9344/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.147777  [ 9408/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.130691  [ 9472/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.104585  [ 9536/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.178062  [ 9600/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.176148  [ 9664/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.116058  [ 9728/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.176540  [ 9792/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.151075  [ 9856/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.102779  [ 9920/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.138906  [ 9984/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.141794  [10048/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.145732  [10112/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.122851  [10176/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.120708  [10240/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.179251  [10304/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.143430  [10368/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.149747  [10432/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.142223  [10496/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.166895  [10560/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.141573  [10624/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.110141  [10688/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.113780  [10752/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.126925  [10816/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.075299  [10880/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.143669  [10944/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.132284  [11008/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.125784  [11072/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.197472  [11136/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.152608  [11200/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.140892  [11264/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.134888  [11328/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.218635  [11392/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.157796  [11456/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.114439  [11520/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.121690  [11584/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.156035  [11648/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.155359  [11712/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.170675  [11776/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.073990  [11840/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.137081  [11904/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.175860  [11968/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.162013  [12032/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.176588  [12096/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.140667  [12160/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.143249  [12224/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.145080  [12288/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.124442  [12352/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.149309  [12416/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.128653  [12480/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.117410  [12544/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.146448  [12608/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.094869  [12672/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.131225  [12736/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.193810  [12800/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.245492  [12864/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.117328  [12928/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.118042  [12992/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.140321  [13056/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.096745  [13120/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.098983  [13184/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.119897  [13248/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.127168  [13312/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.135184  [13376/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.147191  [13440/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.142662  [13504/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.150510  [13568/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.136996  [13632/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.140191  [13696/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.126101  [13760/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.124155  [13824/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.189432  [13888/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.112151  [13952/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.133400  [14016/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.177391  [14080/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.170908  [14144/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.117127  [14208/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.185089  [14272/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.140682  [14336/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.143892  [14400/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.185917  [14464/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.108732  [14528/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.161891  [14592/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.180632  [14656/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.187921  [14720/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.192390  [14784/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.168534  [14848/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.204392  [14912/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.157303  [14976/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.149065  [15040/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.212540  [15104/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.105687  [15168/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.131712  [15232/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.154317  [15296/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.141878  [15360/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.191183  [15424/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.128942  [15488/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.138422  [15552/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.146143  [15616/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.162987  [15680/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.199520  [15744/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.143661  [15808/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.144788  [15872/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.114211  [15936/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.204518  [16000/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.187532  [16064/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.135166  [16128/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.191965  [16192/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.195290  [16256/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.162878  [16320/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.201292  [16384/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.176708  [16448/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.176509  [16512/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.104806  [16576/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.166781  [16640/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.141046  [16704/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.134056  [16768/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.126946  [16832/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.136798  [16896/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.125046  [16960/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.186189  [17024/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.168679  [17088/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.135116  [17152/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.182558  [17216/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.165040  [17280/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.153160  [17344/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.177090  [17408/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.092486  [17472/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.170451  [17536/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.176600  [17600/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.131154  [17664/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.201800  [17728/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.144356  [17792/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.164538  [17856/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.174829  [17920/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.149893  [17984/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.129331  [18048/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.124154  [18112/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.084547  [18176/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.157864  [18240/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.166641  [18304/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.118579  [18368/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.133131  [18432/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.123489  [18496/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.158506  [18560/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.129134  [18624/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.125733  [18688/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.165390  [18752/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.165939  [18816/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.148228  [18880/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.141913  [18944/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.192929  [19008/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.136223  [19072/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.160853  [19136/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.135943  [19200/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.084872  [19264/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.204249  [19328/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.137319  [19392/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.141148  [19456/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.128018  [19520/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.129345  [19584/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.132703  [19648/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.101430  [19712/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.116338  [19776/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.105394  [19840/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.127191  [19904/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.137725  [19968/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.176369  [20032/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.129919  [20096/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.144246  [20160/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.105297  [20224/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.123855  [20288/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.156565  [20352/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.131045  [20416/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.197437  [20480/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.151689  [20544/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.096602  [20608/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.122231  [20672/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.174626  [20736/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.135441  [20800/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.140763  [20864/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.170470  [20928/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.152066  [20992/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.167322  [21056/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.110860  [21120/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.167490  [21184/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.195135  [21248/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.121414  [21312/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.156392  [21376/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.139542  [21440/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.150515  [21504/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.113901  [21568/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.162586  [21632/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.141547  [21696/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.149840  [21760/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.144932  [21824/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.106639  [21888/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.154551  [21952/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.161880  [22016/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.144256  [22080/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.157013  [22144/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.184201  [22208/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.114827  [22272/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.163948  [22336/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.154944  [22400/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.139739  [22464/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.138096  [22528/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.179234  [22592/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.157128  [22656/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.204103  [22720/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.158353  [22784/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.136966  [22848/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.168314  [22912/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.193080  [22976/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.150364  [23040/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.120249  [23104/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.144352  [23168/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.137695  [23232/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.181930  [23296/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.156533  [23360/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.163551  [23424/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.135220  [23488/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.143057  [23552/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.137721  [23616/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.166580  [23680/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.177266  [23744/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.124242  [23808/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.181511  [23872/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.110491  [23936/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.149192  [24000/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.150522  [24064/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.180686  [24128/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.111569  [24192/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.170487  [24256/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.109343  [24320/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.162729  [24384/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.148238  [24448/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.144041  [24512/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.138098  [24576/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.130320  [24640/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.108722  [24704/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.143479  [24768/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.135272  [24832/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.189853  [24872/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.189853  [24872/24872]: : 389it [00:19, 19.47it/s]
Epoch 5, time=230.17s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.177485  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.149602  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.115501  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.116775  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.140029  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.120097  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.146540  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.122035  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.105566  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.123344  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.120620  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.107684  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.139536  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.141102  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.167602  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.158396  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.142042  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.144526  [ 1152/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.120582  [ 1216/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.116806  [ 1280/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.113832  [ 1344/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.115704  [ 1408/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.156035  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.116003  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.130709  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.137635  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.134907  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.116336  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.137964  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.144668  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.146552  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.123022  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.116838  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.138212  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.124763  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.184876  [ 2304/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.168490  [ 2368/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.116432  [ 2432/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.185520  [ 2496/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.133257  [ 2560/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.186824  [ 2624/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.144269  [ 2688/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.124173  [ 2752/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.127750  [ 2816/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.139414  [ 2880/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.089570  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.218108  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.151165  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.176663  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.167736  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.102054  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.148092  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.155234  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.144370  [ 3456/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.160977  [ 3520/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.137406  [ 3584/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.162100  [ 3648/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.142525  [ 3712/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.164498  [ 3776/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.129123  [ 3840/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.133868  [ 3904/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.151651  [ 3968/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.144261  [ 4032/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.141631  [ 4096/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.154271  [ 4160/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.160675  [ 4224/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.160316  [ 4288/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.129969  [ 4352/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.150563  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.145449  [ 4480/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.135243  [ 4544/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.164145  [ 4608/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.120086  [ 4672/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.145174  [ 4736/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.142293  [ 4800/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.153928  [ 4864/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.134873  [ 4928/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.118530  [ 4992/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.104136  [ 5056/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.131451  [ 5120/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.108505  [ 5184/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.170983  [ 5248/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.177194  [ 5312/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.169468  [ 5376/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.121640  [ 5440/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.115693  [ 5504/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.143049  [ 5568/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.152199  [ 5632/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.128748  [ 5696/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.163487  [ 5760/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.102332  [ 5824/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.122815  [ 5888/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.125974  [ 5952/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.145628  [ 6016/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.162280  [ 6080/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.120616  [ 6144/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.141869  [ 6208/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.139762  [ 6272/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.136413  [ 6336/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.159644  [ 6400/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.112926  [ 6464/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.087194  [ 6528/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.141739  [ 6592/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.165288  [ 6656/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.124295  [ 6720/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.185788  [ 6784/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.164500  [ 6848/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.096096  [ 6912/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.116397  [ 6976/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.117189  [ 7040/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.130602  [ 7104/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.164562  [ 7168/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.123016  [ 7232/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.169360  [ 7296/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.129772  [ 7360/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.151089  [ 7424/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.118317  [ 7488/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.113295  [ 7552/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.148934  [ 7616/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.130812  [ 7680/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.160479  [ 7744/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.137603  [ 7808/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.139541  [ 7872/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.168280  [ 7936/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.088367  [ 8000/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.178871  [ 8064/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.107825  [ 8128/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.171389  [ 8192/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.125142  [ 8256/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.129181  [ 8320/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.125578  [ 8384/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.066434  [ 8448/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.145197  [ 8512/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.145761  [ 8576/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.136805  [ 8640/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.139686  [ 8704/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.118999  [ 8768/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.115134  [ 8832/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.155564  [ 8896/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.132178  [ 8960/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.134031  [ 9024/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.101505  [ 9088/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.131330  [ 9152/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.186222  [ 9216/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.098242  [ 9280/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.107890  [ 9344/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.150943  [ 9408/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.132096  [ 9472/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.102766  [ 9536/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.173895  [ 9600/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.188731  [ 9664/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.104017  [ 9728/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.173249  [ 9792/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.150272  [ 9856/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.107212  [ 9920/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.135588  [ 9984/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.136045  [10048/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.132295  [10112/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.118077  [10176/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.115621  [10240/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.175215  [10304/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.137072  [10368/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.143266  [10432/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.134339  [10496/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.160069  [10560/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.132989  [10624/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.100144  [10688/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.104560  [10752/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.117686  [10816/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.063108  [10880/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.135284  [10944/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.125190  [11008/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.119317  [11072/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.186912  [11136/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.130046  [11200/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.130149  [11264/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.128084  [11328/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.169342  [11392/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.143665  [11456/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.099913  [11520/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.094732  [11584/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.153219  [11648/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.126589  [11712/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.161773  [11776/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.076064  [11840/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.128961  [11904/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.148341  [11968/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.120454  [12032/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.160798  [12096/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.140863  [12160/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.144470  [12224/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.140816  [12288/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.120329  [12352/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.151385  [12416/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.124923  [12480/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.114129  [12544/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.122244  [12608/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.099336  [12672/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.120947  [12736/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.198126  [12800/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.251330  [12864/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.112041  [12928/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.108491  [12992/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.137785  [13056/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.098380  [13120/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.105513  [13184/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.101118  [13248/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.115808  [13312/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.117352  [13376/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.141833  [13440/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.127056  [13504/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.146880  [13568/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.130229  [13632/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.121359  [13696/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.104484  [13760/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.098509  [13824/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.187268  [13888/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.118169  [13952/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.104265  [14016/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.192382  [14080/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.158828  [14144/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.109580  [14208/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.178792  [14272/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.135412  [14336/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.141752  [14400/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.177109  [14464/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.104398  [14528/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.149036  [14592/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.171407  [14656/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.135521  [14720/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.125751  [14784/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.174980  [14848/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.119613  [14912/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.150317  [14976/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.139216  [15040/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.161542  [15104/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.105089  [15168/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.107309  [15232/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.126156  [15296/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.108212  [15360/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.151373  [15424/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.125362  [15488/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.108866  [15552/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.129592  [15616/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.144757  [15680/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.174563  [15744/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.125868  [15808/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.131494  [15872/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.104487  [15936/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.195307  [16000/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.175671  [16064/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.120248  [16128/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.157572  [16192/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.192034  [16256/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.124153  [16320/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.154153  [16384/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.121668  [16448/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.139356  [16512/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.098131  [16576/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.128098  [16640/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.125438  [16704/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.115941  [16768/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.108689  [16832/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.114772  [16896/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.107985  [16960/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.149056  [17024/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.144572  [17088/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.129120  [17152/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.140863  [17216/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.152544  [17280/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.126813  [17344/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.157077  [17408/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.074770  [17472/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.138576  [17536/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.164184  [17600/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.118303  [17664/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.188256  [17728/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.134575  [17792/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.148065  [17856/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.166514  [17920/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.121286  [17984/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.110218  [18048/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.097677  [18112/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.076531  [18176/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.148063  [18240/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.151555  [18304/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.124267  [18368/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.117973  [18432/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.103297  [18496/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.143981  [18560/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.127650  [18624/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.112584  [18688/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.144691  [18752/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.148171  [18816/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.147251  [18880/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.129910  [18944/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.176634  [19008/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.121733  [19072/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.158529  [19136/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.121553  [19200/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.080773  [19264/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.181739  [19328/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.134962  [19392/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.145199  [19456/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.118184  [19520/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.125263  [19584/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.125148  [19648/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.103794  [19712/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.105180  [19776/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.092707  [19840/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.122945  [19904/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.120985  [19968/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.157593  [20032/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.121244  [20096/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.146671  [20160/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.107392  [20224/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.118260  [20288/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.167102  [20352/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.119456  [20416/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.193651  [20480/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.137345  [20544/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.086523  [20608/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.116783  [20672/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.163548  [20736/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.127326  [20800/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.136000  [20864/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.151175  [20928/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.138709  [20992/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.143822  [21056/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.114867  [21120/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.137578  [21184/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.147624  [21248/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.106638  [21312/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.115910  [21376/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.149040  [21440/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.139311  [21504/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.103814  [21568/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.143892  [21632/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.120008  [21696/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.121984  [21760/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.145318  [21824/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.108885  [21888/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.133800  [21952/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.150413  [22016/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.121849  [22080/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.136674  [22144/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.170491  [22208/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.099038  [22272/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.150310  [22336/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.131006  [22400/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.126651  [22464/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.120064  [22528/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.175028  [22592/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.139721  [22656/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.179270  [22720/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.129869  [22784/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.135388  [22848/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.149340  [22912/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.181397  [22976/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.141559  [23040/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.114655  [23104/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.138458  [23168/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.143826  [23232/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.157243  [23296/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.150166  [23360/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.134869  [23424/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.126725  [23488/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.132826  [23552/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.105133  [23616/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.145093  [23680/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.165173  [23744/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.116723  [23808/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.187418  [23872/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.105211  [23936/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.168236  [24000/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.163364  [24064/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.170498  [24128/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.128022  [24192/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.196865  [24256/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.105973  [24320/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.169872  [24384/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.156948  [24448/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.119890  [24512/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.139893  [24576/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.143686  [24640/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.105379  [24704/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.142047  [24768/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.153661  [24832/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.207047  [24872/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.207047  [24872/24872]: : 389it [00:20, 19.41it/s]
-------------------------------
LR=0.0001, batch_size=128
-------------------------------
Epoch 1, time=250.21s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.184328  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.526290  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.293090  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.160216  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.288923  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.277782  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.217089  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.188940  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.200620  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.217390  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.228989  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.205655  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.155941  [ 1664/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.170909  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.212387  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.219201  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.158549  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.157084  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.157353  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.170801  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.209588  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.141036  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.121339  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.180418  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.170605  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.140936  [ 3328/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.161001  [ 3456/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.144684  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.161396  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.156037  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.161511  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.128355  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.132297  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.123834  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.132007  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.149202  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.131517  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.141937  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.110573  [ 4992/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.111668  [ 5120/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.137275  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.159918  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.107293  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.133541  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.161222  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.104533  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.128909  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.150190  [ 6144/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.125021  [ 6272/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.143403  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.091829  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.140035  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.149807  [ 6784/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.126763  [ 6912/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.098608  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.116976  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.120198  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.112341  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.101311  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.125130  [ 7680/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.120776  [ 7808/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.132845  [ 7936/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.123376  [ 8064/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.127714  [ 8192/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.113622  [ 8320/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.086480  [ 8448/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.142205  [ 8576/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.127368  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.104758  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.130787  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.102000  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.143185  [ 9216/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.093352  [ 9344/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.125487  [ 9472/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.116994  [ 9600/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.128324  [ 9728/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.139495  [ 9856/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.107277  [ 9984/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.128663  [10112/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.097902  [10240/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.134968  [10368/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.129804  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.139709  [10624/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.090190  [10752/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.085326  [10880/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.124745  [11008/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.146519  [11136/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.113917  [11264/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.145190  [11392/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.111326  [11520/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.112804  [11648/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.132981  [11776/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.090881  [11904/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.128084  [12032/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.146728  [12160/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.132097  [12288/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.110137  [12416/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.109831  [12544/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.110787  [12672/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.148514  [12800/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.167804  [12928/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.109851  [13056/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.092419  [13184/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.105834  [13312/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.113286  [13440/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.121145  [13568/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.123893  [13696/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.096706  [13824/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.139796  [13952/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.131218  [14080/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.111367  [14208/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.149313  [14336/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.149582  [14464/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.107028  [14592/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.127822  [14720/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.145724  [14848/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.111697  [14976/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.118215  [15104/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.093540  [15232/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.103486  [15360/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.130588  [15488/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.115152  [15616/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.136274  [15744/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.116740  [15872/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.137610  [16000/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.129587  [16128/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.152061  [16256/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.116933  [16384/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.115363  [16512/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.102824  [16640/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.104317  [16768/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.094644  [16896/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.119657  [17024/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.125260  [17152/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.141817  [17280/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.132921  [17408/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.094175  [17536/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.128579  [17664/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.150486  [17792/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.143521  [17920/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.104833  [18048/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.080561  [18176/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.141242  [18304/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.115273  [18432/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.107450  [18560/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.107057  [18688/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.129384  [18816/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.124957  [18944/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.135630  [19072/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.129601  [19200/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.118232  [19328/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.120622  [19456/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.124785  [19584/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.095270  [19712/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.086074  [19840/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.118045  [19968/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.121839  [20096/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.115244  [20224/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.124110  [20352/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.152150  [20480/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.097362  [20608/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.106724  [20736/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.115842  [20864/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.124433  [20992/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.107954  [21120/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.116226  [21248/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.096028  [21376/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.127474  [21504/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.107751  [21632/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.102969  [21760/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.105862  [21888/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.126078  [22016/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.109503  [22144/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.121975  [22272/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.126109  [22400/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.112858  [22528/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.126110  [22656/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.128932  [22784/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.125451  [22912/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.145104  [23040/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.134471  [23168/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.126398  [23296/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.128858  [23424/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.115076  [23552/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.109419  [23680/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.127643  [23808/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.125291  [23936/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.124710  [24064/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.124007  [24192/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.115649  [24320/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.128155  [24448/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.120446  [24576/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.103994  [24704/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.117835  [24832/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.182717  [24872/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.182717  [24872/24872]: : 195it [00:14, 13.16it/s]
Epoch 2, time=265.03s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.136163  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.112555  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.137353  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.120693  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.097423  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.110444  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.129257  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.145044  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.118952  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.113159  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.115006  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.136447  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.114365  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.118033  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.127632  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.130054  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.106334  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.140446  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.125330  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.120026  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.150360  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.099229  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.101247  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.147836  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.136105  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.101510  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.133436  [ 3456/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.135128  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.132683  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.115318  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.122392  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.112420  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.111077  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.112322  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.107107  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.124303  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.116918  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.126085  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.099510  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.095964  [ 5120/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.121831  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.144887  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.103015  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.120375  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.140683  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.100997  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.130469  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.123246  [ 6144/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.114817  [ 6272/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.139198  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.089315  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.129926  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.124858  [ 6784/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.113709  [ 6912/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.092196  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.112229  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.111212  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.105184  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.122609  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.110713  [ 7680/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.115441  [ 7808/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.134860  [ 7936/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.124487  [ 8064/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.130489  [ 8192/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.109194  [ 8320/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.087392  [ 8448/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.146335  [ 8576/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.121273  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.102497  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.134545  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.099605  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.136842  [ 9216/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.087981  [ 9344/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.125117  [ 9472/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.108947  [ 9600/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.126079  [ 9728/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.128108  [ 9856/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.107217  [ 9984/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.114513  [10112/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.091748  [10240/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.128304  [10368/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.123770  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.134052  [10624/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.094585  [10752/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.085371  [10880/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.116383  [11008/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.141033  [11136/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.112327  [11264/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.143885  [11392/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.102809  [11520/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.117729  [11648/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.122909  [11776/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.086789  [11904/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.118460  [12032/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.134216  [12160/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.131411  [12288/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.101055  [12416/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.109550  [12544/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.095526  [12672/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.142429  [12800/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.159001  [12928/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.109367  [13056/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.091356  [13184/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.098813  [13312/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.106699  [13440/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.119954  [13568/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.111183  [13696/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.091128  [13824/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.130041  [13952/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.119126  [14080/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.108222  [14208/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.143585  [14336/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.139759  [14464/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.106139  [14592/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.121590  [14720/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.134290  [14848/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.110831  [14976/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.108424  [15104/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.086243  [15232/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.099846  [15360/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.119096  [15488/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.105658  [15616/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.124102  [15744/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.114215  [15872/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.131135  [16000/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.122757  [16128/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.141426  [16256/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.111394  [16384/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.107708  [16512/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.104176  [16640/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.106854  [16768/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.100631  [16896/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.115975  [17024/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.120129  [17152/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.130766  [17280/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.126888  [17408/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.083863  [17536/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.130192  [17664/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.145113  [17792/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.140870  [17920/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.102842  [18048/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.075324  [18176/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.138397  [18304/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.109406  [18432/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.101347  [18560/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.108291  [18688/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.127649  [18816/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.130288  [18944/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.133761  [19072/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.130268  [19200/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.118363  [19328/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.121316  [19456/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.130453  [19584/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.089407  [19712/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.094535  [19840/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.110746  [19968/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.148076  [20096/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.119864  [20224/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.138704  [20352/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.160213  [20480/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.097861  [20608/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.123726  [20736/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.135678  [20864/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.150027  [20992/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.101427  [21120/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.117258  [21248/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.106761  [21376/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.118527  [21504/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.117355  [21632/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.120786  [21760/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.123009  [21888/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.136268  [22016/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.104166  [22144/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.135294  [22272/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.124962  [22400/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.131824  [22528/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.131991  [22656/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.129898  [22784/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.134928  [22912/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.145737  [23040/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.135520  [23168/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.127886  [23296/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.131576  [23424/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.119867  [23552/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.103512  [23680/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.132680  [23808/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.119032  [23936/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.129379  [24064/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.120548  [24192/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.114199  [24320/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.133059  [24448/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.115061  [24576/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.100265  [24704/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.115475  [24832/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.187644  [24872/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.187644  [24872/24872]: : 195it [00:14, 13.18it/s]
Epoch 3, time=279.82s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.127454  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.107924  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.114203  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.110405  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.103475  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.099587  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.133570  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.140484  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.121729  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.114575  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.099326  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.122761  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.117960  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.118854  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.147213  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.125292  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.131598  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.141197  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.129700  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.137518  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.142412  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.107809  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.098904  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.151156  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.153996  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.106629  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.132971  [ 3456/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.132825  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.142968  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.130023  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.121885  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.110758  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.114110  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.112933  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.104076  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.129900  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.119118  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.130072  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.096254  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.101905  [ 5120/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.137175  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.140795  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.098070  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.134338  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.132195  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.094833  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.118395  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.127960  [ 6144/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.107873  [ 6272/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.138746  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.082207  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.136927  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.128588  [ 6784/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.104015  [ 6912/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.097484  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.105629  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.107498  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.110320  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.089532  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.105729  [ 7680/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.111550  [ 7808/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.123767  [ 7936/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.122321  [ 8064/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.116285  [ 8192/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.112473  [ 8320/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.084700  [ 8448/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.128193  [ 8576/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.123579  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.101357  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.124029  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.089408  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.124369  [ 9216/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.085964  [ 9344/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.113511  [ 9472/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.103410  [ 9600/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.118705  [ 9728/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.127077  [ 9856/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.094056  [ 9984/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.112528  [10112/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.089075  [10240/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.136947  [10368/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.114284  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.126036  [10624/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.085711  [10752/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.082802  [10880/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.114153  [11008/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.134858  [11136/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.102602  [11264/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.119595  [11392/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.100625  [11520/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.103556  [11648/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.117981  [11776/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.084352  [11904/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.119906  [12032/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.120694  [12160/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.123186  [12288/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.097208  [12416/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.103291  [12544/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.090087  [12672/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.143706  [12800/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.156374  [12928/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.095012  [13056/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.080267  [13184/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.088939  [13312/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.098326  [13440/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.116886  [13568/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.102835  [13696/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.084601  [13824/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.131861  [13952/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.117380  [14080/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.092674  [14208/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.136264  [14336/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.137236  [14464/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.104778  [14592/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.120080  [14720/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.123572  [14848/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.111836  [14976/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.101390  [15104/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.084468  [15232/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.101551  [15360/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.119681  [15488/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.107952  [15616/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.122716  [15744/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.111638  [15872/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.132821  [16000/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.124991  [16128/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.138371  [16256/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.108789  [16384/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.105815  [16512/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.099153  [16640/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.103318  [16768/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.092949  [16896/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.108847  [17024/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.113051  [17152/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.127353  [17280/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.130167  [17408/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.088372  [17536/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.115943  [17664/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.141706  [17792/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.132049  [17920/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.092417  [18048/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.078811  [18176/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.133313  [18304/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.118456  [18432/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.091289  [18560/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.100692  [18688/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.122515  [18816/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.128696  [18944/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.123612  [19072/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.120009  [19200/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.115667  [19328/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.113877  [19456/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.111976  [19584/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.095416  [19712/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.093971  [19840/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.111484  [19968/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.134231  [20096/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.101494  [20224/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.123209  [20352/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.147810  [20480/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.099690  [20608/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.112766  [20736/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.134298  [20864/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.138117  [20992/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.096287  [21120/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.120182  [21248/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.091277  [21376/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.133292  [21504/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.105936  [21632/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.113643  [21760/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.108435  [21888/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.129035  [22016/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.110704  [22144/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.121451  [22272/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.131486  [22400/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.120357  [22528/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.122530  [22656/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.128165  [22784/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.122141  [22912/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.147010  [23040/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.134809  [23168/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.118553  [23296/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.122137  [23424/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.121055  [23552/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.103856  [23680/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.121974  [23808/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.120406  [23936/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.122261  [24064/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.120461  [24192/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.105582  [24320/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.122582  [24448/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.114443  [24576/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.094994  [24704/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.110063  [24832/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.162960  [24872/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.162960  [24872/24872]: : 195it [00:14, 13.30it/s]
Epoch 4, time=294.49s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.110463  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.105584  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.106876  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.114686  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.089062  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.099185  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.123730  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.120897  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.111680  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.104473  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.100675  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.118769  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.128741  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.117827  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.144716  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.109520  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.142536  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.132424  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.131866  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.143968  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.141159  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.109336  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.088874  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.141539  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.143655  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.106126  [ 3328/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.123610  [ 3456/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.123152  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.132364  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.119172  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.117674  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.103461  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.104357  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.104625  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.097834  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.118302  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.113485  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.127713  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.094358  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.099007  [ 5120/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.120059  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.133769  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.092116  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.131918  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.126567  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.094062  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.113532  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.124410  [ 6144/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.102729  [ 6272/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.137252  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.074781  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.126861  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.118239  [ 6784/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.098533  [ 6912/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.090621  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.099352  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.106499  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.108671  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.088876  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.100461  [ 7680/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.111344  [ 7808/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.120274  [ 7936/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.115268  [ 8064/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.114822  [ 8192/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.106377  [ 8320/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.078742  [ 8448/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.123343  [ 8576/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.117451  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.093508  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.116343  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.085441  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.109142  [ 9216/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.083528  [ 9344/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.112582  [ 9472/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.098686  [ 9600/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.118618  [ 9728/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.123003  [ 9856/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.095925  [ 9984/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.102811  [10112/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.094450  [10240/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.126959  [10368/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.108237  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.115144  [10624/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.084729  [10752/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.080207  [10880/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.109690  [11008/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.132893  [11136/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.105939  [11264/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.119891  [11392/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.095730  [11520/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.106685  [11648/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.113159  [11776/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.082103  [11904/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.107763  [12032/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.111951  [12160/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.118214  [12288/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.096032  [12416/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.100530  [12544/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.086582  [12672/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.137470  [12800/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.151012  [12928/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.098132  [13056/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.078421  [13184/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.092991  [13312/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.093502  [13440/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.113357  [13568/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.104882  [13696/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.081029  [13824/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.131736  [13952/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.115639  [14080/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.092830  [14208/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.134754  [14336/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.137186  [14464/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.104057  [14592/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.117051  [14720/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.111876  [14848/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.105500  [14976/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.091757  [15104/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.077255  [15232/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.100180  [15360/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.118366  [15488/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.107475  [15616/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.128446  [15744/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.103645  [15872/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.135976  [16000/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.119591  [16128/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.129887  [16256/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.107658  [16384/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.105595  [16512/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.100349  [16640/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.106567  [16768/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.093365  [16896/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.106231  [17024/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.103960  [17152/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.132500  [17280/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.122338  [17408/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.086144  [17536/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.116522  [17664/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.130511  [17792/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.129782  [17920/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.093528  [18048/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.072569  [18176/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.133705  [18304/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.109532  [18432/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.090417  [18560/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.097679  [18688/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.117376  [18816/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.127633  [18944/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.129434  [19072/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.119783  [19200/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.107175  [19328/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.113759  [19456/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.114448  [19584/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.082249  [19712/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.077360  [19840/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.098578  [19968/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.121334  [20096/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.100322  [20224/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.116148  [20352/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.144950  [20480/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.090493  [20608/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.102522  [20736/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.121596  [20864/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.128786  [20992/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.093819  [21120/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.113522  [21248/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.080673  [21376/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.112314  [21504/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.097544  [21632/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.103232  [21760/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.097685  [21888/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.117581  [22016/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.094514  [22144/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.110663  [22272/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.121229  [22400/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.108201  [22528/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.113670  [22656/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.131064  [22784/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.115950  [22912/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.140320  [23040/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.111664  [23168/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.108110  [23296/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.118967  [23424/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.112195  [23552/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.096003  [23680/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.098652  [23808/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.106374  [23936/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.116248  [24064/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.112757  [24192/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.108822  [24320/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.110083  [24448/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.110138  [24576/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.086616  [24704/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.099276  [24832/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.144752  [24872/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.144752  [24872/24872]: : 195it [00:14, 13.21it/s]
Epoch 5, time=309.25s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.100315  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.096467  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.109522  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.112700  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.103652  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.094186  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.130961  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.121548  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.113590  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.103299  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.107425  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.112877  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.109000  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.098623  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.126144  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.112432  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.112108  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.118718  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.116106  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.108697  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.125772  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.090284  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.099911  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.131131  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.129027  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.099083  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.106451  [ 3456/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.123740  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.119420  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.113288  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.111000  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.105202  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.103318  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.108243  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.096299  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.110521  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.114264  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.122136  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.085973  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.098732  [ 5120/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.114114  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.130468  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.087907  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.129612  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.127114  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.091291  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.121384  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.124195  [ 6144/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.113802  [ 6272/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.131538  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.078282  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.139483  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.129745  [ 6784/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.101469  [ 6912/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.098343  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.098039  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.120307  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.109832  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.091669  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.107290  [ 7680/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.120184  [ 7808/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.119349  [ 7936/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.115221  [ 8064/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.113156  [ 8192/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.120960  [ 8320/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.077828  [ 8448/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.126292  [ 8576/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.116415  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.095697  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.116635  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.082727  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.116676  [ 9216/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.083840  [ 9344/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.132168  [ 9472/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.096393  [ 9600/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.104687  [ 9728/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.129024  [ 9856/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.092253  [ 9984/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.106809  [10112/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.098347  [10240/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.126094  [10368/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.128139  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.114076  [10624/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.095817  [10752/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.084649  [10880/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.110300  [11008/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.149146  [11136/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.111408  [11264/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.105221  [11392/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.096561  [11520/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.097534  [11648/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.126763  [11776/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.093109  [11904/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.094747  [12032/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.104818  [12160/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.129912  [12288/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.088090  [12416/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.102102  [12544/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.090876  [12672/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.140841  [12800/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.161285  [12928/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.096170  [13056/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.077759  [13184/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.097774  [13312/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.101434  [13440/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.120291  [13568/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.098201  [13696/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.083828  [13824/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.123807  [13952/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.115502  [14080/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.096048  [14208/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.132311  [14336/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.134838  [14464/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.097808  [14592/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.108293  [14720/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.107050  [14848/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.109425  [14976/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.090481  [15104/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.078694  [15232/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.086082  [15360/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.105707  [15488/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.097771  [15616/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.111264  [15744/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.098232  [15872/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.122723  [16000/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.107318  [16128/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.115004  [16256/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.101583  [16384/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.108024  [16512/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.096805  [16640/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.109685  [16768/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.087417  [16896/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.097664  [17024/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.096735  [17152/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.120306  [17280/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.122256  [17408/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.075436  [17536/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.124992  [17664/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.121240  [17792/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.138519  [17920/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.093167  [18048/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.082824  [18176/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.136373  [18304/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.102956  [18432/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.112905  [18560/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.093533  [18688/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.134303  [18816/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.128121  [18944/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.144809  [19072/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.116675  [19200/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.130064  [19328/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.105289  [19456/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.118733  [19584/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.078792  [19712/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.084383  [19840/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.109351  [19968/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.115243  [20096/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.110845  [20224/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.115216  [20352/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.147312  [20480/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.083832  [20608/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.092310  [20736/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.110013  [20864/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.106021  [20992/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.087370  [21120/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.105873  [21248/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.076603  [21376/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.100853  [21504/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.092297  [21632/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.096177  [21760/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.099443  [21888/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.118122  [22016/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.094459  [22144/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.091663  [22272/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.112700  [22400/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.097481  [22528/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.116737  [22656/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.116924  [22784/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.111815  [22912/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.149304  [23040/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.100747  [23168/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.095638  [23296/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.116302  [23424/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.106189  [23552/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.088004  [23680/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.092248  [23808/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.107165  [23936/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.107942  [24064/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.107816  [24192/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.108913  [24320/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.107540  [24448/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.100484  [24576/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.083515  [24704/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.093126  [24832/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.124772  [24872/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.124772  [24872/24872]: : 195it [00:14, 13.33it/s]
Epoch 6, time=323.88s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.091822  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.096668  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.094848  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.103363  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.083706  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.092442  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.115004  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.118062  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.109516  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.098408  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.099470  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.111615  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.104842  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.099875  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.137292  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.104875  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.118369  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.112156  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.125209  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.105115  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.136188  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.088082  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.107938  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.138275  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.112694  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.105477  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.100896  [ 3456/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.119105  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.106522  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.124355  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.110176  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.107890  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.095129  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.114029  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.099208  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.115744  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.111914  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.123174  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.091999  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.097734  [ 5120/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.113600  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.139459  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.092765  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.104677  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.109384  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.099762  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.110302  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.112601  [ 6144/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.109027  [ 6272/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.131335  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.075915  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.124084  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.118948  [ 6784/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.092663  [ 6912/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.079871  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.096844  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.098359  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.099518  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.087188  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.096747  [ 7680/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.099882  [ 7808/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.107265  [ 7936/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.116041  [ 8064/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.109740  [ 8192/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.118171  [ 8320/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.082516  [ 8448/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.119701  [ 8576/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.121472  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.103721  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.111701  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.087313  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.114269  [ 9216/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.090009  [ 9344/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.125898  [ 9472/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.099183  [ 9600/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.105139  [ 9728/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.118013  [ 9856/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.090765  [ 9984/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.108184  [10112/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.083962  [10240/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.111584  [10368/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.121515  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.109855  [10624/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.083633  [10752/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.079796  [10880/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.110608  [11008/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.133072  [11136/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.100136  [11264/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.120878  [11392/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.094991  [11520/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.100375  [11648/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.112485  [11776/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.081969  [11904/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.084770  [12032/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.095736  [12160/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.119570  [12288/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.089514  [12416/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.101947  [12544/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.084746  [12672/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.127046  [12800/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.136411  [12928/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.100458  [13056/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.069464  [13184/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.104177  [13312/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.092611  [13440/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.114082  [13568/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.086004  [13696/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.074878  [13824/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.116946  [13952/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.109377  [14080/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.100364  [14208/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.135354  [14336/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.120729  [14464/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.099814  [14592/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.104518  [14720/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.102323  [14848/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.104081  [14976/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.086855  [15104/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.068980  [15232/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.082469  [15360/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.092838  [15488/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.093016  [15616/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.117154  [15744/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.094054  [15872/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.120533  [16000/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.106119  [16128/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.116145  [16256/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.099848  [16384/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.112625  [16512/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.092130  [16640/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.104050  [16768/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.088098  [16896/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.095386  [17024/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.097598  [17152/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.121560  [17280/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.103962  [17408/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.076838  [17536/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.105788  [17664/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.117811  [17792/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.122153  [17920/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.084114  [18048/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.066695  [18176/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.125007  [18304/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.091585  [18432/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.093625  [18560/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.091357  [18688/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.112079  [18816/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.119787  [18944/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.121412  [19072/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.111179  [19200/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.098577  [19328/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.113987  [19456/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.091505  [19584/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.079279  [19712/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.073405  [19840/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.100941  [19968/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.106398  [20096/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.103247  [20224/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.128538  [20352/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.141076  [20480/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.084928  [20608/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.095284  [20736/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.111475  [20864/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.104325  [20992/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.091193  [21120/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.110282  [21248/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.075233  [21376/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.106533  [21504/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.095245  [21632/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.123990  [21760/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.096269  [21888/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.130149  [22016/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.103049  [22144/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.102684  [22272/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.110286  [22400/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.100130  [22528/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.119766  [22656/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.127693  [22784/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.119007  [22912/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.130231  [23040/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.112852  [23168/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.096060  [23296/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.139485  [23424/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.109489  [23552/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.095780  [23680/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.094318  [23808/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.108704  [23936/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.105757  [24064/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.110573  [24192/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.113355  [24320/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.111766  [24448/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.105013  [24576/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.081332  [24704/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.096849  [24832/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.139450  [24872/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.139450  [24872/24872]: : 195it [00:14, 13.54it/s]
-------------------------------
LR=1e-05, batch_size=256
-------------------------------
Epoch 1, time=338.28s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.093246  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.095403  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.077396  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.110306  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.098825  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.089099  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.088410  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.102336  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.102642  [ 2304/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.099597  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.094959  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.100008  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.100530  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.100087  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.098867  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.094118  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.086679  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.093098  [ 4608/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.102306  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.081787  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.121646  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.095188  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.091741  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.098806  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.105582  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.087629  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.093413  [ 6912/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.079745  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.087189  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.083366  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.098129  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.098485  [ 8192/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.085324  [ 8448/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.104890  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.093260  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.087556  [ 9216/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.095259  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.088047  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.095193  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.081033  [10240/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.098194  [10496/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.086099  [10752/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.080787  [11008/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.108619  [11264/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.094372  [11520/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.094655  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.072060  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.099571  [12288/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.079788  [12544/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.094921  [12800/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.106443  [13056/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.069765  [13312/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.086552  [13568/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.071451  [13824/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.100383  [14080/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.101025  [14336/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.094490  [14592/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.090526  [14848/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.083783  [15104/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.068100  [15360/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.085527  [15616/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.096502  [15872/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.105394  [16128/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.092955  [16384/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.092187  [16640/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.083260  [16896/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.086154  [17152/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.104153  [17408/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.079289  [17664/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.111564  [17920/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.068870  [18176/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.097638  [18432/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.080310  [18688/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.104357  [18944/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.107523  [19200/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.094145  [19456/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.074020  [19712/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.072680  [19968/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.086833  [20224/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.116504  [20480/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.075384  [20736/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.096887  [20992/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.086657  [21248/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.076445  [21504/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.082927  [21760/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.092022  [22016/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.081189  [22272/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.092954  [22528/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.101148  [22784/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.110285  [23040/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.083493  [23296/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.101128  [23552/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.076803  [23808/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.092983  [24064/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.095587  [24320/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.093730  [24576/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.078168  [24832/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.111018  [24872/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.111018  [24872/24872]: : 98it [00:10,  9.10it/s]
Epoch 2, time=349.05s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.081773  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.084935  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.072697  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.100892  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.088550  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.085162  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.082653  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.096416  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.096993  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.094264  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.091900  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.095201  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.099384  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.093551  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.096738  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.091124  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.083353  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.088897  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.099826  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.077990  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.109934  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.090507  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.090140  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.096968  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.102796  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.084254  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.089231  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.076609  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.085326  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.081692  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.093930  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.097064  [ 8192/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.088516  [ 8448/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.101595  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.090388  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.083882  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.092616  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.084721  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.091982  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.079919  [10240/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.094600  [10496/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.084941  [10752/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.078594  [11008/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.103668  [11264/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.090911  [11520/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.091761  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.073481  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.097066  [12288/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.079707  [12544/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.093041  [12800/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.103344  [13056/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.073561  [13312/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.083032  [13568/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.070500  [13824/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.097100  [14080/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.098167  [14336/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.092353  [14592/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.087861  [14848/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.084190  [15104/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.065841  [15360/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.085030  [15616/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.092771  [15872/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.105051  [16128/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.090103  [16384/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.092786  [16640/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.079388  [16896/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.084696  [17152/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.102136  [17408/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.078569  [17664/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.109616  [17920/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.067808  [18176/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.096736  [18432/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.079910  [18688/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.104034  [18944/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.105627  [19200/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.092250  [19456/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.072201  [19712/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.072766  [19968/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.084983  [20224/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.114553  [20480/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.073938  [20736/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.096590  [20992/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.085335  [21248/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.074211  [21504/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.081502  [21760/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.090958  [22016/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.080468  [22272/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.091595  [22528/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.099271  [22784/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.109512  [23040/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.082638  [23296/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.100232  [23552/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.074942  [23808/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.091660  [24064/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.094227  [24320/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.092979  [24576/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.076757  [24832/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.107665  [24872/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.107665  [24872/24872]: : 98it [00:10,  9.20it/s]
Epoch 3, time=359.70s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.081535  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.083804  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.071962  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.100061  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.087641  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.084171  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.081502  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.095900  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.095609  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.092941  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.090780  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.092975  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.098579  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.092315  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.095029  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.089750  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.082477  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.087644  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.098522  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.077229  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.108390  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.089936  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.089186  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.096317  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.101679  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.082743  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.087111  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.075565  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.084020  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.080596  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.092142  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.095659  [ 8192/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.087555  [ 8448/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.100400  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.089562  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.082639  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.091041  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.083878  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.090270  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.079171  [10240/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.093683  [10496/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.084416  [10752/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.077772  [11008/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.102112  [11264/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.089163  [11520/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.090948  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.072801  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.096711  [12288/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.079108  [12544/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.092005  [12800/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.101917  [13056/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.073365  [13312/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.081628  [13568/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.069828  [13824/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.095954  [14080/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.097262  [14336/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.090736  [14592/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.086486  [14848/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.082993  [15104/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.064777  [15360/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.083627  [15616/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.091386  [15872/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.103803  [16128/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.088908  [16384/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.091903  [16640/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.078476  [16896/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.083701  [17152/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.101380  [17408/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.078278  [17664/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.108581  [17920/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.066948  [18176/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.095845  [18432/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.079232  [18688/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.102615  [18944/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.104397  [19200/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.091168  [19456/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.071242  [19712/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.072496  [19968/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.083991  [20224/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.113449  [20480/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.072797  [20736/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.095948  [20992/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.084366  [21248/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.072987  [21504/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.080919  [21760/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.090461  [22016/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.079811  [22272/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.090501  [22528/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.098395  [22784/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.108802  [23040/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.081808  [23296/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.099641  [23552/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.073628  [23808/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.090679  [24064/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.093272  [24320/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.092006  [24576/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.075784  [24832/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.105596  [24872/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.105596  [24872/24872]: : 98it [00:10,  9.16it/s]
Epoch 4, time=370.41s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.080823  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.082912  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.071619  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.098795  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.086884  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.083496  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.080685  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.095119  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.094816  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.092065  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.089973  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.091509  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.097909  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.091316  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.093824  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.088761  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.081723  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.086894  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.097497  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.076550  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.107417  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.089476  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.088424  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.095688  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.100698  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.081640  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.085701  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.074831  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.082999  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.079965  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.090851  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.094454  [ 8192/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.086742  [ 8448/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.099308  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.088856  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.081708  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.089805  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.083062  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.089107  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.078437  [10240/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.092912  [10496/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.083921  [10752/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.077041  [11008/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.100980  [11264/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.087946  [11520/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.090290  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.072051  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.096400  [12288/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.078486  [12544/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.091209  [12800/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.100863  [13056/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.072851  [13312/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.080695  [13568/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.069269  [13824/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.095017  [14080/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.096559  [14336/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.089565  [14592/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.085410  [14848/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.082031  [15104/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.064035  [15360/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.082536  [15616/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.089980  [15872/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.102642  [16128/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.087982  [16384/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.091072  [16640/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.077777  [16896/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.082832  [17152/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.100728  [17408/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.077938  [17664/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.107698  [17920/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.066177  [18176/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.095027  [18432/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.078535  [18688/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.101366  [18944/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.103506  [19200/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.090394  [19456/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.070520  [19712/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.072179  [19968/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.083196  [20224/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.112464  [20480/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.071855  [20736/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.095327  [20992/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.083139  [21248/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.072245  [21504/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.080300  [21760/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.089943  [22016/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.079159  [22272/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.089572  [22528/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.097742  [22784/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.108024  [23040/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.081005  [23296/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.099073  [23552/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.072694  [23808/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.089809  [24064/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.092429  [24320/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.091219  [24576/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.074965  [24832/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.103916  [24872/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.103916  [24872/24872]: : 98it [00:10,  9.19it/s]
Epoch 5, time=381.07s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.080048  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.081974  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.071051  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.097718  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.086225  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.082693  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.079635  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.094266  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.094154  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.091112  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.089120  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.090262  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.097440  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.090126  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.092622  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.087873  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.080819  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.085866  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.096633  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.075907  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.106556  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.088993  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.087647  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.095013  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.099803  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.080794  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.084640  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.074247  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.082201  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.079445  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.089848  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.093391  [ 8192/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.085995  [ 8448/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.098285  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.088277  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.080807  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.088740  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.082239  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.088140  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.077904  [10240/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.092231  [10496/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.083449  [10752/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.076414  [11008/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.099969  [11264/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.086906  [11520/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.089649  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.071506  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.096183  [12288/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.078016  [12544/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.090468  [12800/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.100036  [13056/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.072381  [13312/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.079947  [13568/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.068752  [13824/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.094263  [14080/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.095950  [14336/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.088448  [14592/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.084424  [14848/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.081295  [15104/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.063316  [15360/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.081676  [15616/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.088741  [15872/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.101596  [16128/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.087291  [16384/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.090336  [16640/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.077266  [16896/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.082098  [17152/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.100182  [17408/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.077597  [17664/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.107090  [17920/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.065672  [18176/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.094299  [18432/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.077956  [18688/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.100463  [18944/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.102748  [19200/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.089718  [19456/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.069943  [19712/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.071905  [19968/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.082536  [20224/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.111569  [20480/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.071138  [20736/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.094794  [20992/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.082571  [21248/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.071611  [21504/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.079793  [21760/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.089467  [22016/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.078618  [22272/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.088844  [22528/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.097197  [22784/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.107277  [23040/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.080328  [23296/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.098461  [23552/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.071920  [23808/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.089052  [24064/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.091577  [24320/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.090448  [24576/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.074290  [24832/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.102464  [24872/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.102464  [24872/24872]: : 98it [00:10,  9.20it/s]
Epoch 6, time=391.72s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.079305  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.081145  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.070559  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.097067  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.085621  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.081999  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.078786  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.093513  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.093606  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.090366  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.088306  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.089146  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.097037  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.089082  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.091470  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.087060  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.080023  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.085012  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.095842  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.075338  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.105782  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.088537  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.086963  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.094393  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.098932  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.080049  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.083821  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.073752  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.081451  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.079013  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.089045  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.092371  [ 8192/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.085237  [ 8448/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.097353  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.087748  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.080081  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.087917  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.081514  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.087382  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.077421  [10240/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.091625  [10496/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.082925  [10752/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.075822  [11008/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.099131  [11264/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.086214  [11520/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.089061  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.070794  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.095763  [12288/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.077394  [12544/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.089741  [12800/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.099352  [13056/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.071813  [13312/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.079237  [13568/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.068202  [13824/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.093478  [14080/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.095339  [14336/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.087405  [14592/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.083576  [14848/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.080654  [15104/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.062654  [15360/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.080958  [15616/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.087790  [15872/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.100734  [16128/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.086683  [16384/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.089742  [16640/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.076707  [16896/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.081455  [17152/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.099535  [17408/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.077151  [17664/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.106418  [17920/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.065126  [18176/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.093560  [18432/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.077453  [18688/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.099754  [18944/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.102015  [19200/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.089064  [19456/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.069401  [19712/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.071635  [19968/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.081856  [20224/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.110678  [20480/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.070477  [20736/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.094229  [20992/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.080556  [21248/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.070920  [21504/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.079379  [21760/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.089015  [22016/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.078206  [22272/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.088392  [22528/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.096822  [22784/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.106387  [23040/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.079841  [23296/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.097949  [23552/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.071224  [23808/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.088309  [24064/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.090664  [24320/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.089607  [24576/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.073694  [24832/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.101283  [24872/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.101283  [24872/24872]: : 98it [00:10,  9.20it/s]
Done!

test the network#

Do some qualitative tests: Let the trained network predict some particle geometries and compare their Mie spectra with the traget spectrum.

# pick a few of the training samples for testing.
# Note: Ideally tests should be done on separate samples!
sca_test = q_sca_target_test
pred = model(sca_test)

# evaluate Mie
r_c_test, r_s_test, eps_c_test, eps_s_test = nn_pred_to_mie_geometry(pred)
res_mie = pmd.multishell.cross_sections(
    k0,
    r_c=r_c_test,
    eps_c=eps_c_test,
    r_s=r_s_test,
    eps_s=eps_s_test,
    eps_env=eps_env,
    n_max=n_max,
)

# plot
i_plot = np.random.randint(len(sca_test), size=4)
plt.figure(figsize=(12, 10))
for i_n, i in enumerate(i_plot):
    plt.subplot(2, 2, i_n + 1)
    plt.plot(
        wl0.detach().cpu().numpy(),
        sca_test[i].detach().cpu().numpy(),
        label="reference",
    )
    plt.plot(
        wl0.detach().cpu().numpy(),
        res_mie["q_sca"][i].detach().cpu().numpy(),
        label="predicted particle",
    )
    plt.legend()
    plt.xlabel("wavelength (nm)")
    plt.ylabel("scat. efficiency")
plt.show()
ex 09 tandem

Total running time of the script: (6 minutes 49.999 seconds)

Estimated memory usage: 675 MB

Gallery generated by Sphinx-Gallery