Mie-informed tandem neural network#

Here, we demonstrate how to train a design generator network capable to suggest core-shell particles with specific spectral response using PyMieDiff as differentiable forward-evaluator. The training pipeline follows the “Tandem” model:

target spectrum –> generator NN –> design –> Mie –> real spectrum

training loss is: MSE(target spec., real spec.)

author: O. Jackson, P. Wiecha, 06/2025

imports#

import time

import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
from torch import nn

import pymiediff as pmd

setup optimiation target#

We setup the main configuration here: pymiediff backend, torch device, parameter limits and wavelengths

# pymiediff backend to use and torch compute device
backend = "torch"
device = "cpu"

# general config
N_samples = 25000
n_max = 4  # maximum Mie order fixed for performance
eps_env = torch.tensor(1.0, device=device)

lim_r = torch.as_tensor([40, 100], device=device)
lim_n_re = torch.as_tensor([1.5, 4.0], device=device)
lim_n_im = torch.as_tensor([0.0, 0.1], device=device)

wl0 = torch.linspace(400, 800, 40, device=device)
k0 = 2 * torch.pi / wl0

generate reference spectra#

we generate a large number of reference Mie spectra for existing particles, that will be used as design targets during training.

Note: this step could also be done without any physics knowledge, for example with artificial spectra (e.g. Lorentzians), or a scattering maximization loss.

# datagen: generate existing spectra (won't use the geometries for training)
r_c = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
d_s = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
r_s = r_c + d_s
n_re = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_re)[0] + lim_n_re[0]
n_im = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_im)[0] + lim_n_im[0]
n = n_re + 1j * n_im

# low-level API: permittivity required as spectra (for vectorization)
eps_c = torch.ones_like(k0).unsqueeze(0) * n[:, 0].unsqueeze(1) ** 2
eps_s = torch.ones_like(k0).unsqueeze(0) * n[:, 1].unsqueeze(1) ** 2

all_particles = pmd.farfield.cross_sections(
    k0,
    r_c=r_c,
    eps_c=eps_c,
    r_s=r_s,
    eps_s=eps_s,
    eps_env=eps_env,
    backend=backend,
    n_max=n_max,
)

N_test = 128  # keep a few samples for testing
q_sca_target = all_particles["q_sca"][N_test:].to(dtype=torch.float32)
q_sca_target_test = all_particles["q_sca"][:N_test].to(dtype=torch.float32)

plt.plot(q_sca_target[30].detach().cpu().numpy())  # plot some test sample
ex 05 tandem
[<matplotlib.lines.Line2D object at 0x7f2052fde850>]

Neural network classes / functions#

define the network model (simple MLP) and training loop

class FullyConnected(nn.Module):
    def __init__(self, hidden_dim=1024):
        super().__init__()
        self.fc_in = nn.Linear(len(k0), hidden_dim)
        self.relu1 = nn.ReLU()
        self.fc_1 = nn.Linear(hidden_dim, hidden_dim)
        self.relu2 = nn.ReLU()
        self.fc_2 = nn.Linear(hidden_dim, hidden_dim)
        self.relu3 = nn.ReLU()
        self.fc_out = nn.Linear(hidden_dim, 6)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc_in(x)
        x = self.relu1(x)
        x = self.fc_1(x)
        x = self.relu2(x)
        x = self.fc_2(x)
        x = self.relu3(x)
        x = self.fc_out(x)
        x = self.sigmoid(x)
        return x


def nn_pred_to_mie_geometry(pred):
    # implicit normalization: multiply by user-defined limits
    r_c = lim_r.max() * (pred[:, 0])
    r_s = lim_r.max() * (pred[:, 0] + pred[:, 1])
    n_c = lim_n_re.max() * pred[:, 2] + lim_n_im.max() * (1j * pred[:, 3])
    n_s = lim_n_re.max() * pred[:, 4] + lim_n_im.max() * (1j * pred[:, 5])

    eps_c = torch.ones_like(k0).unsqueeze(0) * n_c.unsqueeze(1) ** 2
    eps_s = torch.ones_like(k0).unsqueeze(0) * n_s.unsqueeze(1) ** 2

    return r_c, r_s, eps_c, eps_s


def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    prog_bar = tqdm(enumerate(dataloader), total=size // dataloader.batch_size)
    for i_batch, X in prog_bar:
        # model prediction: generate core-shell particles
        pred = model(X)

        # evaluate Mie
        r_c, r_s, eps_c, eps_s = nn_pred_to_mie_geometry(pred)
        res_mie = pmd.farfield.cross_sections(
            k0,
            r_c=r_c,
            eps_c=eps_c,
            r_s=r_s,
            eps_s=eps_s,
            eps_env=eps_env,
            backend=backend,
            n_max=n_max,
        )
        q_sca_mie = res_mie["q_sca"].to(dtype=torch.float32)

        # calc. loss
        loss = loss_fn(q_sca_mie, X)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # if i_batch % 100 == 0:
        loss, current = loss.item(), i_batch * dataloader.batch_size + len(X)
        prog_bar.set_description(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

training the Mie-informed network#

here we use some simple, manually optimized training schedule.

model = FullyConnected().to(device)

confs = [
    dict(bs=32, lr=1e-4, n_ep=5),
    dict(bs=64, lr=1e-4, n_ep=5),
    dict(bs=128, lr=1e-4, n_ep=6),
    dict(bs=256, lr=1e-5, n_ep=6),
]

t_start = time.time()
for conf in confs:
    learning_rate = conf["lr"]
    batch_size = conf["bs"]
    epochs = conf["n_ep"]
    print("-------------------------------")
    print(f"LR={learning_rate}, batch_size={batch_size}")
    print("-------------------------------")

    loss_fn = nn.MSELoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
    train_dataloader = torch.utils.data.DataLoader(q_sca_target, batch_size=batch_size)
    for t in range(epochs):
        print(f"Epoch {t+1}, time={time.time()-t_start:.2f}s")
        train_loop(train_dataloader, model, loss_fn, optimizer)
print("Done!")
-------------------------------
LR=0.0001, batch_size=32
-------------------------------
Epoch 1, time=0.00s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 6.329720  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 3.893605  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.541288  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.287702  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.912999  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 3.401494  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 3.521006  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 3.176595  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.333042  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.404341  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.987450  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.796337  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.087246  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.320459  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.110011  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.583368  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.274186  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.436142  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.275631  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.271340  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.306494  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.583768  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.279917  [  736/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.194271  [  768/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.499229  [  800/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.285443  [  832/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.175751  [  864/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.329294  [  896/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.234848  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.085934  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.258007  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.242399  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.236269  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.590491  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.943659  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.907389  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.193053  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.759891  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.179532  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.114766  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.923390  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.965239  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.950130  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.768834  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.819987  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.849760  [ 1472/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.004565  [ 1504/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.957278  [ 1536/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.868088  [ 1568/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.104992  [ 1600/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.021800  [ 1632/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.080884  [ 1664/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.796763  [ 1696/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.838064  [ 1728/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.895536  [ 1760/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.787959  [ 1792/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.837106  [ 1824/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.635757  [ 1856/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.013808  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.760683  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.968864  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.663567  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.915392  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.703101  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.944710  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.702104  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.008613  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.799965  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.963099  [ 2208/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 1.094274  [ 2240/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.837107  [ 2272/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.742020  [ 2304/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.873137  [ 2336/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.792163  [ 2368/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.934145  [ 2400/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 1.052865  [ 2432/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.885965  [ 2464/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.593445  [ 2496/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.713300  [ 2528/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.735547  [ 2560/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.793051  [ 2592/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.714031  [ 2624/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.805911  [ 2656/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.626959  [ 2688/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.709088  [ 2720/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.945150  [ 2752/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.794535  [ 2784/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.690588  [ 2816/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.728939  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.794471  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.616390  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.769166  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 1.009405  [ 2976/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.672475  [ 3008/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.712349  [ 3040/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.818138  [ 3072/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.834494  [ 3104/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.936695  [ 3136/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.947686  [ 3168/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.720525  [ 3200/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 1.275525  [ 3232/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.816448  [ 3264/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.640830  [ 3296/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.699512  [ 3328/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.724363  [ 3360/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.807510  [ 3392/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.525330  [ 3424/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.668358  [ 3456/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.835441  [ 3488/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 1.020612  [ 3520/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.741274  [ 3552/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.954894  [ 3584/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.719355  [ 3616/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.754294  [ 3648/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.712077  [ 3680/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.696096  [ 3712/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.764472  [ 3744/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.820821  [ 3776/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.784483  [ 3808/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.684618  [ 3840/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.542005  [ 3872/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.755062  [ 3904/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.638852  [ 3936/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.861135  [ 3968/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.738408  [ 4000/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.513977  [ 4032/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.579039  [ 4064/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.613166  [ 4096/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.450309  [ 4128/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.630916  [ 4160/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.688500  [ 4192/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.548975  [ 4224/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.571193  [ 4256/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.658275  [ 4288/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.666288  [ 4320/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.777157  [ 4352/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.600445  [ 4384/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.790007  [ 4416/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.661294  [ 4448/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.788263  [ 4480/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.487343  [ 4512/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.725966  [ 4544/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.510801  [ 4576/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.818338  [ 4608/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.464693  [ 4640/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.542450  [ 4672/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.615558  [ 4704/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.511763  [ 4736/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.630955  [ 4768/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.536544  [ 4800/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.706015  [ 4832/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.547444  [ 4864/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.594467  [ 4896/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.561115  [ 4928/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.624614  [ 4960/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.528944  [ 4992/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.616714  [ 5024/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.595726  [ 5056/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.498392  [ 5088/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.746957  [ 5120/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.494175  [ 5152/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.563299  [ 5184/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.452949  [ 5216/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.602027  [ 5248/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.498712  [ 5280/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.901854  [ 5312/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.510574  [ 5344/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.456499  [ 5376/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.721079  [ 5408/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.396174  [ 5440/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.570616  [ 5472/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.565692  [ 5504/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.535340  [ 5536/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.552577  [ 5568/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.628922  [ 5600/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.587739  [ 5632/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.589990  [ 5664/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.618312  [ 5696/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.625898  [ 5728/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.517803  [ 5760/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.554988  [ 5792/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.596735  [ 5824/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.665656  [ 5856/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.431821  [ 5888/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.441411  [ 5920/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.528411  [ 5952/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.579911  [ 5984/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.569648  [ 6016/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.685786  [ 6048/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.540844  [ 6080/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.629485  [ 6112/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.605282  [ 6144/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.464564  [ 6176/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.499036  [ 6208/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.536212  [ 6240/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.575692  [ 6272/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.728562  [ 6304/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.602117  [ 6336/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.495480  [ 6368/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.592051  [ 6400/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.643655  [ 6432/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.448587  [ 6464/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.568217  [ 6496/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.508891  [ 6528/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.675744  [ 6560/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.590426  [ 6592/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.528237  [ 6624/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.549971  [ 6656/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.634583  [ 6688/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.431949  [ 6720/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.514315  [ 6752/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.544474  [ 6784/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.494088  [ 6816/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.575113  [ 6848/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.501115  [ 6880/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.413808  [ 6912/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.610156  [ 6944/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.506997  [ 6976/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.608127  [ 7008/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.765208  [ 7040/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.610621  [ 7072/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.575672  [ 7104/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.366271  [ 7136/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.632053  [ 7168/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.567512  [ 7200/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.493878  [ 7232/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.455430  [ 7264/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.441612  [ 7296/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.478241  [ 7328/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.344425  [ 7360/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.503458  [ 7392/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.606787  [ 7424/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.529565  [ 7456/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.616328  [ 7488/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.436726  [ 7520/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.444392  [ 7552/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.585151  [ 7584/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.503471  [ 7616/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.531072  [ 7648/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.442733  [ 7680/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.559384  [ 7712/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.572397  [ 7744/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.564770  [ 7776/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.512597  [ 7808/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.631590  [ 7840/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.463754  [ 7872/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.568251  [ 7904/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.441372  [ 7936/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.631293  [ 7968/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.655692  [ 8000/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.423480  [ 8032/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.486350  [ 8064/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.386745  [ 8096/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.339537  [ 8128/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.446084  [ 8160/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.494765  [ 8192/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.443422  [ 8224/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.689093  [ 8256/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.655775  [ 8288/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.463297  [ 8320/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.491424  [ 8352/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.437737  [ 8384/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.404442  [ 8416/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.363106  [ 8448/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.429480  [ 8480/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.545790  [ 8512/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.522688  [ 8544/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.441147  [ 8576/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.399463  [ 8608/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.351862  [ 8640/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.559543  [ 8672/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.441236  [ 8704/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.514182  [ 8736/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.521758  [ 8768/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.466865  [ 8800/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.591162  [ 8832/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.386237  [ 8864/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.447706  [ 8896/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.587160  [ 8928/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.504463  [ 8960/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.415875  [ 8992/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.613529  [ 9024/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.461859  [ 9056/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.586805  [ 9088/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.412972  [ 9120/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.347374  [ 9152/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.530750  [ 9184/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.461045  [ 9216/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.386964  [ 9248/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.630727  [ 9280/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.501271  [ 9312/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.624975  [ 9344/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.385834  [ 9376/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.423949  [ 9408/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.454632  [ 9440/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.510700  [ 9472/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.278991  [ 9504/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.499247  [ 9536/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.365536  [ 9568/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.518824  [ 9600/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.419262  [ 9632/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.411808  [ 9664/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.484756  [ 9696/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.590918  [ 9728/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.618465  [ 9760/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.346579  [ 9792/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.482966  [ 9824/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.535122  [ 9856/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.317654  [ 9888/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.415425  [ 9920/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.364217  [ 9952/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.445557  [ 9984/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.753826  [10016/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.417078  [10048/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.528188  [10080/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.387783  [10112/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.457370  [10144/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.567906  [10176/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.387482  [10208/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.433058  [10240/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.460627  [10272/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.487534  [10304/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.463480  [10336/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.374397  [10368/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.486705  [10400/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.476468  [10432/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.362256  [10464/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.610280  [10496/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.616680  [10528/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.432170  [10560/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.514641  [10592/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.612449  [10624/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.351663  [10656/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.443617  [10688/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.458123  [10720/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.395775  [10752/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.579036  [10784/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.455268  [10816/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.455789  [10848/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.610950  [10880/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.361585  [10912/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.405387  [10944/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.436155  [10976/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.547301  [11008/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.427332  [11040/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.540416  [11072/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.443819  [11104/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.319156  [11136/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.417077  [11168/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.382108  [11200/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.349716  [11232/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.428245  [11264/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.408576  [11296/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.465712  [11328/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.441941  [11360/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.479455  [11392/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.502815  [11424/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.411159  [11456/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.395825  [11488/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.358852  [11520/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.456885  [11552/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.443839  [11584/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.463998  [11616/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.645580  [11648/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.348103  [11680/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.405294  [11712/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.392282  [11744/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.516236  [11776/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.479260  [11808/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.260089  [11840/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.420909  [11872/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.669966  [11904/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.417503  [11936/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.364877  [11968/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.379258  [12000/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.421287  [12032/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.362983  [12064/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.480573  [12096/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.513045  [12128/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.503120  [12160/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.519856  [12192/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.537149  [12224/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.308155  [12256/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.567256  [12288/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.505042  [12320/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.398751  [12352/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.587813  [12384/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.308555  [12416/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.636667  [12448/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.383471  [12480/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.479893  [12512/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.471464  [12544/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.407253  [12576/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.465827  [12608/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.311624  [12640/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.486688  [12672/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.473448  [12704/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.419422  [12736/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.466677  [12768/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.509296  [12800/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.472341  [12832/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.322334  [12864/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.352006  [12896/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.330168  [12928/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.455473  [12960/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.449922  [12992/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.456925  [13024/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.346309  [13056/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.412112  [13088/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.457046  [13120/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.464270  [13152/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.653894  [13184/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.343713  [13216/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.400381  [13248/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.496303  [13280/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.430535  [13312/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.360687  [13344/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.384525  [13376/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.481699  [13408/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.365675  [13440/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.369410  [13472/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.456542  [13504/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.409076  [13536/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.364806  [13568/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.527385  [13600/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.470875  [13632/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.415899  [13664/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.408331  [13696/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.377803  [13728/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.585184  [13760/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.386991  [13792/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.353043  [13824/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.410608  [13856/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.360129  [13888/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.252392  [13920/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.356516  [13952/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.349988  [13984/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.363187  [14016/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.427543  [14048/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.512881  [14080/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.463041  [14112/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.473239  [14144/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.385928  [14176/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.318924  [14208/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.455328  [14240/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.465207  [14272/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.375355  [14304/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.531881  [14336/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.398017  [14368/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.476002  [14400/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.306869  [14432/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.368717  [14464/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.271533  [14496/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.426079  [14528/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.389640  [14560/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.410404  [14592/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.601122  [14624/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.457782  [14656/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.481887  [14688/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.421771  [14720/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.484228  [14752/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.390803  [14784/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.393531  [14816/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.562275  [14848/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.370039  [14880/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.472965  [14912/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.518762  [14944/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.427457  [14976/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.403530  [15008/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.472418  [15040/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.366718  [15072/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.504167  [15104/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.363588  [15136/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.347451  [15168/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.378815  [15200/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.404185  [15232/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.339570  [15264/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.396061  [15296/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.401181  [15328/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.342418  [15360/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.474247  [15392/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.495032  [15424/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.512848  [15456/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.492040  [15488/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.367435  [15520/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.339502  [15552/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.506927  [15584/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.402065  [15616/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.259568  [15648/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.358387  [15680/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.415173  [15712/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.400764  [15744/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.424271  [15776/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.387451  [15808/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.433150  [15840/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.357338  [15872/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.313768  [15904/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.381364  [15936/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.467482  [15968/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.336002  [16000/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.333676  [16032/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.276012  [16064/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.461586  [16096/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.450690  [16128/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.342616  [16160/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.322037  [16192/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.298591  [16224/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.402644  [16256/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.449331  [16288/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.329494  [16320/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.339286  [16352/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.312135  [16384/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.414330  [16416/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.421167  [16448/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.452694  [16480/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.381357  [16512/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.414124  [16544/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.388635  [16576/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.433554  [16608/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.402536  [16640/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.341655  [16672/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.251612  [16704/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.381467  [16736/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.346403  [16768/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.541098  [16800/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.341047  [16832/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.336242  [16864/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.469228  [16896/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.299508  [16928/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.391402  [16960/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.519481  [16992/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.462513  [17024/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.338760  [17056/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.479337  [17088/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.340652  [17120/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.411665  [17152/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.344544  [17184/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.447766  [17216/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.432575  [17248/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.406267  [17280/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.337121  [17312/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.404347  [17344/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.702723  [17376/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.332910  [17408/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.445242  [17440/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.401157  [17472/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.284364  [17504/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.277158  [17536/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.468967  [17568/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.373510  [17600/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.324514  [17632/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.453248  [17664/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.401592  [17696/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.405795  [17728/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.332801  [17760/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.556869  [17792/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.420163  [17824/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.348372  [17856/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.435545  [17888/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.241980  [17920/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.394157  [17952/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.376585  [17984/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.339272  [18016/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.332349  [18048/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.500175  [18080/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.502744  [18112/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.493726  [18144/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.457253  [18176/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.237166  [18208/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.306373  [18240/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.385917  [18272/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.358460  [18304/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.393964  [18336/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.395847  [18368/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.283819  [18400/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.277966  [18432/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.378287  [18464/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.428370  [18496/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.383507  [18528/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.517246  [18560/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.290662  [18592/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.309014  [18624/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.488887  [18656/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.363337  [18688/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.444522  [18720/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.410293  [18752/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.530221  [18784/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.312412  [18816/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.365819  [18848/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.304132  [18880/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.357592  [18912/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.419300  [18944/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.436834  [18976/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.325914  [19008/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.412800  [19040/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.312843  [19072/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.240095  [19104/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.321073  [19136/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.334156  [19168/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.329189  [19200/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.383284  [19232/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.328358  [19264/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.374887  [19296/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.253884  [19328/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.339660  [19360/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.432111  [19392/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.447442  [19424/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.390074  [19456/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.375595  [19488/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.415793  [19520/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.309229  [19552/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.412419  [19584/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.370680  [19616/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.410280  [19648/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.312040  [19680/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.341263  [19712/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.282403  [19744/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.342683  [19776/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.310862  [19808/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.396040  [19840/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.345692  [19872/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.294340  [19904/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.335053  [19936/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.520679  [19968/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.383999  [20000/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.334889  [20032/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.387295  [20064/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.277892  [20096/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.303444  [20128/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.393313  [20160/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.400956  [20192/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.317335  [20224/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.421159  [20256/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.382120  [20288/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.358520  [20320/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.312756  [20352/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.269654  [20384/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.265919  [20416/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.408255  [20448/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.261578  [20480/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.301855  [20512/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.416620  [20544/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.472022  [20576/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.376215  [20608/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.540278  [20640/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.330407  [20672/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.416372  [20704/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.380810  [20736/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.369488  [20768/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.358772  [20800/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.389535  [20832/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.404725  [20864/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.233137  [20896/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.308009  [20928/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.477145  [20960/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.407479  [20992/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.379924  [21024/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.637139  [21056/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.365646  [21088/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.548720  [21120/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.349041  [21152/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.434521  [21184/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.421085  [21216/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.507729  [21248/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.376329  [21280/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.463043  [21312/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.383256  [21344/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.415021  [21376/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.467562  [21408/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.438886  [21440/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.367024  [21472/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.423843  [21504/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.405867  [21536/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.331084  [21568/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.401996  [21600/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.340189  [21632/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.314676  [21664/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.332276  [21696/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.367900  [21728/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.243867  [21760/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.303965  [21792/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.451398  [21824/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.379777  [21856/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.312852  [21888/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.432240  [21920/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.251165  [21952/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.391785  [21984/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.359098  [22016/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.414725  [22048/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.401304  [22080/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.407675  [22112/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.511489  [22144/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.246595  [22176/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.314853  [22208/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.306523  [22240/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.474073  [22272/24872]:   0%|          | 0/777 [00:30<?, ?it/s]
loss: 0.474073  [22272/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.318255  [22304/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.385439  [22336/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.304020  [22368/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.300978  [22400/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.367343  [22432/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.524941  [22464/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.399037  [22496/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.329817  [22528/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.291814  [22560/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.294189  [22592/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.207017  [22624/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.284693  [22656/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.336812  [22688/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.347748  [22720/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.369530  [22752/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.270879  [22784/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.312260  [22816/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.375742  [22848/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.274678  [22880/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.302937  [22912/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.423826  [22944/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.434443  [22976/24872]:  90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.357803  [23008/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.372709  [23040/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.450180  [23072/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.323311  [23104/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.455277  [23136/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.404404  [23168/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.327071  [23200/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.396691  [23232/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.211265  [23264/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.295775  [23296/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.313645  [23328/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.264674  [23360/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.341770  [23392/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.391698  [23424/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.317377  [23456/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.352614  [23488/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.318121  [23520/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.246367  [23552/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.320248  [23584/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.355455  [23616/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.499092  [23648/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.293613  [23680/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.312403  [23712/24872]:  90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.411463  [23744/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.325645  [23776/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.416594  [23808/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.277857  [23840/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.333782  [23872/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.331621  [23904/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.424663  [23936/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.345341  [23968/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.338926  [24000/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.262642  [24032/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.348744  [24064/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.293378  [24096/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.339389  [24128/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.303116  [24160/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.211559  [24192/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.156832  [24224/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.462913  [24256/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.207359  [24288/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.268026  [24320/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.343841  [24352/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.285829  [24384/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.383778  [24416/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.326222  [24448/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.301138  [24480/24872]:  90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.229546  [24512/24872]:  90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.294926  [24544/24872]:  90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.275142  [24576/24872]:  90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.294738  [24608/24872]:  90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.353833  [24640/24872]:  90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.305140  [24672/24872]:  90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.352088  [24704/24872]:  90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.254173  [24736/24872]:  90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.389734  [24768/24872]:  90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.332022  [24800/24872]:  90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.321424  [24832/24872]:  90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.315439  [24864/24872]:  90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.178564  [24872/24872]:  90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.178564  [24872/24872]: : 778it [00:33, 23.21it/s]
Epoch 2, time=33.54s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.404187  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.222783  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.416474  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.366615  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.366640  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.281307  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.447916  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.468226  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.294580  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.309417  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.337174  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.352606  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.393122  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.294407  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.449272  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.448022  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.439467  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.459876  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.409790  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.359737  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.468563  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.300970  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.426303  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.417236  [  768/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.358353  [  800/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.372056  [  832/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.537962  [  864/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.519733  [  896/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.370769  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.393825  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.297405  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.243266  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.324053  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.435341  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.346470  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.311440  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.501615  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.355588  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.294676  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.286669  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.389193  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.276141  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.287590  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.314632  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.396786  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.332931  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.360740  [ 1504/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.280905  [ 1536/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.328996  [ 1568/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.365030  [ 1600/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.338332  [ 1632/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.336948  [ 1664/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.335865  [ 1696/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.275579  [ 1728/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.376058  [ 1760/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.177390  [ 1792/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.304054  [ 1824/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.149808  [ 1856/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.337018  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.305091  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.376227  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.373562  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.267463  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.288083  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.306503  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.267116  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.343400  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.311010  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.278639  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.452647  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.315521  [ 2272/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.316880  [ 2304/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.428423  [ 2336/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.368057  [ 2368/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.340791  [ 2400/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.494836  [ 2432/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.349736  [ 2464/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.330815  [ 2496/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.291714  [ 2528/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.289061  [ 2560/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.385267  [ 2592/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.237267  [ 2624/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.356911  [ 2656/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.304261  [ 2688/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.346562  [ 2720/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.353015  [ 2752/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.311497  [ 2784/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.430441  [ 2816/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.384459  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.297296  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.241826  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.390781  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.419642  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.292457  [ 3008/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.340277  [ 3040/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.368538  [ 3072/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.288387  [ 3104/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.349304  [ 3136/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.411761  [ 3168/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.285357  [ 3200/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.411088  [ 3232/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.282391  [ 3264/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.239853  [ 3296/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.242545  [ 3328/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.304132  [ 3360/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.345762  [ 3392/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.221415  [ 3424/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.243758  [ 3456/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.282584  [ 3488/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.434496  [ 3520/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.305887  [ 3552/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.447281  [ 3584/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.343345  [ 3616/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.389238  [ 3648/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.344813  [ 3680/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.393981  [ 3712/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.324465  [ 3744/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.295813  [ 3776/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.271267  [ 3808/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.349221  [ 3840/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.226667  [ 3872/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.228687  [ 3904/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.283285  [ 3936/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.365009  [ 3968/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.379714  [ 4000/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.234699  [ 4032/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.292937  [ 4064/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.308575  [ 4096/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.255450  [ 4128/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.345025  [ 4160/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.249680  [ 4192/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.296573  [ 4224/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.242584  [ 4256/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.267263  [ 4288/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.293558  [ 4320/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.246383  [ 4352/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.266444  [ 4384/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.370522  [ 4416/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.390460  [ 4448/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.420678  [ 4480/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.195339  [ 4512/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.326830  [ 4544/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.223725  [ 4576/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.321615  [ 4608/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.271304  [ 4640/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.232657  [ 4672/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.264665  [ 4704/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.255390  [ 4736/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.332312  [ 4768/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.312686  [ 4800/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.299886  [ 4832/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.307673  [ 4864/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.346623  [ 4896/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.366023  [ 4928/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.283363  [ 4960/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.289699  [ 4992/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.370162  [ 5024/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.348422  [ 5056/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.327698  [ 5088/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.363501  [ 5120/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.291138  [ 5152/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.353512  [ 5184/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.227888  [ 5216/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.247526  [ 5248/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.285905  [ 5280/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.495619  [ 5312/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.310965  [ 5344/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.267652  [ 5376/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.518278  [ 5408/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.188295  [ 5440/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.361686  [ 5472/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.338128  [ 5504/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.334654  [ 5536/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.362402  [ 5568/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.330193  [ 5600/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.440170  [ 5632/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.395013  [ 5664/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.432696  [ 5696/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.402553  [ 5728/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.293071  [ 5760/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.339086  [ 5792/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.336945  [ 5824/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.343448  [ 5856/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.243287  [ 5888/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.256809  [ 5920/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.332577  [ 5952/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.311804  [ 5984/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.298718  [ 6016/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.385464  [ 6048/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.285886  [ 6080/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.399443  [ 6112/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.372270  [ 6144/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.236679  [ 6176/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.290068  [ 6208/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.306590  [ 6240/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.352756  [ 6272/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.253629  [ 6304/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.374739  [ 6336/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.280361  [ 6368/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.469634  [ 6400/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.388553  [ 6432/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.239059  [ 6464/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.399801  [ 6496/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.317513  [ 6528/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.316906  [ 6560/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.306220  [ 6592/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.369219  [ 6624/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.280867  [ 6656/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.427372  [ 6688/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.216892  [ 6720/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.284760  [ 6752/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.336783  [ 6784/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.266012  [ 6816/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.329342  [ 6848/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.250642  [ 6880/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.298598  [ 6912/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.349032  [ 6944/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.306966  [ 6976/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.280143  [ 7008/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.458781  [ 7040/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.368771  [ 7072/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.387121  [ 7104/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.203997  [ 7136/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.353558  [ 7168/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.315715  [ 7200/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.311955  [ 7232/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.337573  [ 7264/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.262616  [ 7296/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.283238  [ 7328/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.209279  [ 7360/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.293008  [ 7392/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.298489  [ 7424/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.306633  [ 7456/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.319069  [ 7488/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.234782  [ 7520/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.256492  [ 7552/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.295178  [ 7584/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.323459  [ 7616/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.320010  [ 7648/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.241534  [ 7680/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.360000  [ 7712/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.426168  [ 7744/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.359310  [ 7776/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.248258  [ 7808/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.252643  [ 7840/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.270086  [ 7872/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.318997  [ 7904/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.285932  [ 7936/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.340727  [ 7968/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.359038  [ 8000/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.327194  [ 8032/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.321555  [ 8064/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.210491  [ 8096/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.224990  [ 8128/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.260065  [ 8160/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.324513  [ 8192/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.301487  [ 8224/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.497893  [ 8256/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.449199  [ 8288/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.348342  [ 8320/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.296160  [ 8352/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.328815  [ 8384/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.272400  [ 8416/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.296241  [ 8448/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.268350  [ 8480/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.349449  [ 8512/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.334523  [ 8544/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.285676  [ 8576/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.321084  [ 8608/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.252711  [ 8640/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.394883  [ 8672/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.265245  [ 8704/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.402638  [ 8736/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.392680  [ 8768/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.375532  [ 8800/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.389293  [ 8832/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.253674  [ 8864/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.319077  [ 8896/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.296862  [ 8928/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.398439  [ 8960/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.232143  [ 8992/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.437296  [ 9024/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.310883  [ 9056/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.402364  [ 9088/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.303923  [ 9120/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.317515  [ 9152/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.397684  [ 9184/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.318533  [ 9216/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.319588  [ 9248/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.411402  [ 9280/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.406419  [ 9312/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.410094  [ 9344/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.252225  [ 9376/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.242737  [ 9408/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.307084  [ 9440/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.388508  [ 9472/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.200347  [ 9504/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.350593  [ 9536/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.300989  [ 9568/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.383450  [ 9600/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.351921  [ 9632/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.336720  [ 9664/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.402645  [ 9696/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.412636  [ 9728/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.383352  [ 9760/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.280165  [ 9792/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.356883  [ 9824/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.309602  [ 9856/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.223391  [ 9888/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.227026  [ 9920/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.246219  [ 9952/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.261220  [ 9984/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.470905  [10016/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.330083  [10048/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.448709  [10080/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.220374  [10112/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.321000  [10144/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.407456  [10176/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.267270  [10208/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.343823  [10240/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.442397  [10272/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.434061  [10304/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.308188  [10336/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.252437  [10368/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.349576  [10400/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.272344  [10432/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.273428  [10464/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.310989  [10496/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.526844  [10528/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.277507  [10560/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.312454  [10592/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.358572  [10624/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.275493  [10656/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.311259  [10688/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.316188  [10720/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.240363  [10752/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.394201  [10784/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.292890  [10816/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.265709  [10848/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.340715  [10880/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.270533  [10912/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.281121  [10944/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.246585  [10976/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.399455  [11008/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.271340  [11040/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.305677  [11072/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.256788  [11104/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.202451  [11136/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.226117  [11168/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.211703  [11200/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.210075  [11232/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.323807  [11264/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.229664  [11296/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.295731  [11328/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.243962  [11360/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.304177  [11392/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.397897  [11424/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.295433  [11456/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.279442  [11488/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.204203  [11520/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.274268  [11552/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.320692  [11584/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.314593  [11616/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.419662  [11648/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.313130  [11680/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.219781  [11712/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.279706  [11744/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.359939  [11776/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.381696  [11808/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.172126  [11840/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.292853  [11872/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.516563  [11904/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.259997  [11936/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.292091  [11968/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.264924  [12000/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.254121  [12032/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.273956  [12064/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.338114  [12096/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.398801  [12128/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.331722  [12160/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.345047  [12192/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.464785  [12224/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.189037  [12256/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.348822  [12288/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.283465  [12320/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.289327  [12352/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.341995  [12384/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.206763  [12416/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.487000  [12448/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.296177  [12480/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.325304  [12512/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.268940  [12544/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.256029  [12576/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.301209  [12608/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.208601  [12640/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.363241  [12672/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.339706  [12704/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.243451  [12736/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.344279  [12768/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.363852  [12800/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.323082  [12832/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.252399  [12864/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.252165  [12896/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.219570  [12928/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.286515  [12960/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.326460  [12992/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.283498  [13024/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.306454  [13056/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.254491  [13088/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.388216  [13120/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.344106  [13152/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.412341  [13184/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.290031  [13216/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.290956  [13248/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.320097  [13280/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.307864  [13312/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.366827  [13344/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.351775  [13376/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.320801  [13408/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.243664  [13440/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.268974  [13472/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.360649  [13504/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.250265  [13536/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.275754  [13568/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.454216  [13600/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.357454  [13632/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.265860  [13664/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.385996  [13696/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.265286  [13728/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.416871  [13760/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.267191  [13792/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.243273  [13824/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.248959  [13856/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.314400  [13888/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.197834  [13920/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.262697  [13952/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.233506  [13984/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.222538  [14016/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.253393  [14048/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.372436  [14080/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.323163  [14112/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.307986  [14144/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.229529  [14176/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.271663  [14208/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.313225  [14240/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.432033  [14272/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.279519  [14304/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.318640  [14336/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.275702  [14368/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.338103  [14400/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.229650  [14432/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.210635  [14464/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.210573  [14496/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.283859  [14528/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.238172  [14560/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.295504  [14592/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.354808  [14624/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.391939  [14656/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.319352  [14688/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.226607  [14720/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.348103  [14752/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.238016  [14784/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.256586  [14816/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.395583  [14848/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.285351  [14880/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.329104  [14912/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.465744  [14944/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.320771  [14976/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.294489  [15008/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.345198  [15040/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.238203  [15072/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.349707  [15104/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.286099  [15136/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.257103  [15168/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.257189  [15200/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.257008  [15232/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.235838  [15264/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.293637  [15296/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.279381  [15328/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.261288  [15360/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.343668  [15392/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.268121  [15424/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.420003  [15456/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.325066  [15488/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.252595  [15520/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.208970  [15552/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.347247  [15584/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.256623  [15616/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.141329  [15648/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.209532  [15680/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.318564  [15712/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.287385  [15744/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.320325  [15776/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.263103  [15808/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.303626  [15840/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.282835  [15872/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.230926  [15904/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.272082  [15936/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.331788  [15968/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.237012  [16000/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.248460  [16032/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.238833  [16064/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.355090  [16096/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.352567  [16128/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.248712  [16160/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.254282  [16192/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.238556  [16224/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.327216  [16256/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.316663  [16288/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.283011  [16320/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.282302  [16352/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.216563  [16384/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.342446  [16416/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.395954  [16448/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.342284  [16480/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.312585  [16512/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.338483  [16544/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.307981  [16576/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.323758  [16608/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.319060  [16640/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.311441  [16672/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.230287  [16704/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.279660  [16736/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.274584  [16768/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.420618  [16800/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.253333  [16832/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.246271  [16864/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.335362  [16896/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.194994  [16928/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.280700  [16960/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.410838  [16992/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.345684  [17024/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.228678  [17056/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.292124  [17088/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.249538  [17120/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.254231  [17152/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.270003  [17184/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.357505  [17216/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.324518  [17248/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.241968  [17280/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.269314  [17312/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.349189  [17344/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.559540  [17376/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.229467  [17408/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.361576  [17440/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.332507  [17472/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.243955  [17504/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.212564  [17536/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.378786  [17568/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.316863  [17600/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.301648  [17632/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.387508  [17664/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.270123  [17696/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.294147  [17728/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.261480  [17760/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.359473  [17792/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.335499  [17824/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.274487  [17856/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.304894  [17888/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.177809  [17920/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.304081  [17952/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.253184  [17984/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.301571  [18016/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.272544  [18048/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.311051  [18080/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.339038  [18112/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.385356  [18144/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.373268  [18176/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.188310  [18208/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.203181  [18240/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.287964  [18272/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.300931  [18304/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.278614  [18336/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.364993  [18368/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.248773  [18400/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.199407  [18432/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.306955  [18464/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.359762  [18496/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.296046  [18528/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.380188  [18560/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.231846  [18592/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.244490  [18624/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.392271  [18656/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.283235  [18688/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.337450  [18720/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.375280  [18752/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.408402  [18784/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.241304  [18816/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.287370  [18848/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.258334  [18880/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.279727  [18912/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.294688  [18944/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.304290  [18976/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.258872  [19008/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.348694  [19040/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.219793  [19072/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.150871  [19104/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.292667  [19136/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.298407  [19168/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.277483  [19200/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.309332  [19232/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.267056  [19264/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.291566  [19296/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.194036  [19328/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.282356  [19360/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.354813  [19392/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.304988  [19424/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.350156  [19456/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.302077  [19488/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.332002  [19520/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.274609  [19552/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.292531  [19584/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.258912  [19616/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.361263  [19648/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.204122  [19680/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.272575  [19712/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.202510  [19744/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.267457  [19776/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.246829  [19808/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.271347  [19840/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.262415  [19872/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.233462  [19904/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.242443  [19936/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.324369  [19968/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.294034  [20000/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.229191  [20032/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.270525  [20064/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.208865  [20096/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.206740  [20128/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.299406  [20160/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.269340  [20192/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.260271  [20224/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.305995  [20256/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.268686  [20288/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.260224  [20320/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.244167  [20352/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.181409  [20384/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.203661  [20416/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.332365  [20448/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.205709  [20480/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.239966  [20512/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.288247  [20544/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.277318  [20576/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.294088  [20608/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.370477  [20640/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.266949  [20672/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.293571  [20704/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.275205  [20736/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.283663  [20768/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.244079  [20800/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.231219  [20832/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.320830  [20864/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.165628  [20896/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.216472  [20928/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.320410  [20960/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.236453  [20992/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.239322  [21024/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.361837  [21056/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.229757  [21088/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.365846  [21120/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.228806  [21152/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.325530  [21184/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.252340  [21216/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.374684  [21248/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.237147  [21280/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.304604  [21312/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.254577  [21344/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.282884  [21376/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.304489  [21408/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.340912  [21440/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.307930  [21472/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.300965  [21504/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.340691  [21536/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.220342  [21568/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.299362  [21600/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.252016  [21632/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.262662  [21664/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.285231  [21696/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.280752  [21728/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.182458  [21760/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.249164  [21792/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.360709  [21824/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.307535  [21856/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.195823  [21888/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.314979  [21920/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.168208  [21952/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.284077  [21984/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.269507  [22016/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.338146  [22048/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.280872  [22080/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.308379  [22112/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.296550  [22144/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.176737  [22176/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.253822  [22208/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.221412  [22240/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.304450  [22272/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.276507  [22304/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.327541  [22336/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.239820  [22368/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.249446  [22400/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.286531  [22432/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.401572  [22464/24872]:   0%|          | 0/777 [00:30<?, ?it/s]
loss: 0.401572  [22464/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.375996  [22496/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.234310  [22528/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.218044  [22560/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.246361  [22592/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.155620  [22624/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.200772  [22656/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.231084  [22688/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.205220  [22720/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.242573  [22752/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.209850  [22784/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.236021  [22816/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.279787  [22848/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.147888  [22880/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.215168  [22912/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.298108  [22944/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.347877  [22976/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.267019  [23008/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.265988  [23040/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.355325  [23072/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.235236  [23104/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.363528  [23136/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.343803  [23168/24872]:  90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.248549  [23200/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.297807  [23232/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.152982  [23264/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.221969  [23296/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.253528  [23328/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.208997  [23360/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.269488  [23392/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.309640  [23424/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.223017  [23456/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.245581  [23488/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.251413  [23520/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.180492  [23552/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.295872  [23584/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.266918  [23616/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.421467  [23648/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.206171  [23680/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.247079  [23712/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.252082  [23744/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.272842  [23776/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.344202  [23808/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.240192  [23840/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.257380  [23872/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.264775  [23904/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.395548  [23936/24872]:  90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.270442  [23968/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.303282  [24000/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.292070  [24032/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.280198  [24064/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.213080  [24096/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.261187  [24128/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.197120  [24160/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.125185  [24192/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.133198  [24224/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.395609  [24256/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.165101  [24288/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.250410  [24320/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.318926  [24352/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.238304  [24384/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.305084  [24416/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.288512  [24448/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.244254  [24480/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.174634  [24512/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.219351  [24544/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.228837  [24576/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.224965  [24608/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.280613  [24640/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.266388  [24672/24872]:  90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.287963  [24704/24872]:  90%|█████████ | 702/777 [00:33<00:03, 23.39it/s]
loss: 0.196905  [24736/24872]:  90%|█████████ | 702/777 [00:33<00:03, 23.39it/s]
loss: 0.303016  [24768/24872]:  90%|█████████ | 702/777 [00:33<00:03, 23.39it/s]
loss: 0.263844  [24800/24872]:  90%|█████████ | 702/777 [00:33<00:03, 23.39it/s]
loss: 0.256359  [24832/24872]:  90%|█████████ | 702/777 [00:33<00:03, 23.39it/s]
loss: 0.271165  [24864/24872]:  90%|█████████ | 702/777 [00:33<00:03, 23.39it/s]
loss: 0.163905  [24872/24872]:  90%|█████████ | 702/777 [00:33<00:03, 23.39it/s]
loss: 0.163905  [24872/24872]: : 778it [00:33, 23.40it/s]
Epoch 3, time=66.79s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.273572  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.188469  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.407836  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.331124  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.331691  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.191334  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.318810  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.354601  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.255331  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.217922  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.271434  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.265648  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.237126  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.208607  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.317505  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.318959  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.192692  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.277884  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.291694  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.253489  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.273887  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.261450  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.361013  [  736/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.283125  [  768/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.249632  [  800/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.250333  [  832/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.398857  [  864/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.397942  [  896/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.240074  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.324335  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.226735  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.206477  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.190896  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.283431  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.277476  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.243638  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.382407  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.264359  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.231116  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.197647  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.234303  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.184496  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.216267  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.261125  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.252114  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.293738  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.309655  [ 1504/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.206137  [ 1536/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.226137  [ 1568/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.236457  [ 1600/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.270115  [ 1632/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.211460  [ 1664/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.241190  [ 1696/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.234334  [ 1728/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.280741  [ 1760/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.096198  [ 1792/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.240458  [ 1824/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.105051  [ 1856/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.275285  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.227685  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.283113  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.283504  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.202874  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.222462  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.238822  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.208170  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.311170  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.249274  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.230219  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.327822  [ 2240/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.280102  [ 2272/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.252496  [ 2304/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.339852  [ 2336/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.248592  [ 2368/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.294132  [ 2400/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.388006  [ 2432/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.313610  [ 2464/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.221809  [ 2496/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.228648  [ 2528/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.214829  [ 2560/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.300429  [ 2592/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.197279  [ 2624/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.321347  [ 2656/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.242591  [ 2688/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.271701  [ 2720/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.256308  [ 2752/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.255880  [ 2784/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.314939  [ 2816/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.264994  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.261774  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.206347  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.291539  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.285086  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.225218  [ 3008/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.284172  [ 3040/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.309281  [ 3072/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.214900  [ 3104/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.265518  [ 3136/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.341211  [ 3168/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.232359  [ 3200/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.372777  [ 3232/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.251095  [ 3264/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.213037  [ 3296/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.206806  [ 3328/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.234919  [ 3360/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.278872  [ 3392/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.178378  [ 3424/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.223845  [ 3456/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.240281  [ 3488/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.342332  [ 3520/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.258487  [ 3552/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.330823  [ 3584/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.300710  [ 3616/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.283940  [ 3648/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.278292  [ 3680/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.350073  [ 3712/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.265235  [ 3744/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.254657  [ 3776/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.222700  [ 3808/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.342141  [ 3840/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.168587  [ 3872/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.176097  [ 3904/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.219769  [ 3936/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.301551  [ 3968/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.327521  [ 4000/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.193697  [ 4032/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.243738  [ 4064/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.287562  [ 4096/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.222106  [ 4128/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.255988  [ 4160/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.208690  [ 4192/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.249877  [ 4224/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.210911  [ 4256/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.212361  [ 4288/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.215257  [ 4320/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.195216  [ 4352/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.236663  [ 4384/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.284576  [ 4416/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.321733  [ 4448/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.329775  [ 4480/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.176091  [ 4512/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.276746  [ 4544/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.171182  [ 4576/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.260997  [ 4608/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.233042  [ 4640/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.195902  [ 4672/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.251148  [ 4704/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.210520  [ 4736/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.281737  [ 4768/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.259614  [ 4800/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.242503  [ 4832/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.261921  [ 4864/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.279716  [ 4896/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.352681  [ 4928/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.244579  [ 4960/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.223354  [ 4992/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.253857  [ 5024/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.321742  [ 5056/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.235050  [ 5088/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.356255  [ 5120/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.231523  [ 5152/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.268944  [ 5184/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.184871  [ 5216/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.246646  [ 5248/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.292379  [ 5280/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.312886  [ 5312/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.251336  [ 5344/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.209220  [ 5376/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.398969  [ 5408/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.137093  [ 5440/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.252252  [ 5472/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.247908  [ 5504/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.248529  [ 5536/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.301672  [ 5568/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.261204  [ 5600/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.294529  [ 5632/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.296271  [ 5664/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.319463  [ 5696/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.314465  [ 5728/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.277315  [ 5760/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.326544  [ 5792/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.283117  [ 5824/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.257629  [ 5856/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.207365  [ 5888/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.140372  [ 5920/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.260284  [ 5952/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.237991  [ 5984/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.233061  [ 6016/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.319560  [ 6048/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.197991  [ 6080/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.342851  [ 6112/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.339488  [ 6144/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.194494  [ 6176/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.264691  [ 6208/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.237663  [ 6240/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.298303  [ 6272/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.213383  [ 6304/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.269098  [ 6336/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.247150  [ 6368/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.395573  [ 6400/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.337905  [ 6432/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.198095  [ 6464/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.322550  [ 6496/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.271383  [ 6528/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.289044  [ 6560/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.254907  [ 6592/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.339413  [ 6624/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.223560  [ 6656/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.348204  [ 6688/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.206809  [ 6720/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.213100  [ 6752/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.217453  [ 6784/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.246996  [ 6816/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.304602  [ 6848/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.207277  [ 6880/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.199434  [ 6912/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.314940  [ 6944/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.295344  [ 6976/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.214038  [ 7008/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.468854  [ 7040/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.340363  [ 7072/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.314151  [ 7104/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.192753  [ 7136/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.304694  [ 7168/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.300866  [ 7200/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.284149  [ 7232/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.228430  [ 7264/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.189053  [ 7296/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.238512  [ 7328/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.197196  [ 7360/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.249134  [ 7392/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.228957  [ 7424/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.214979  [ 7456/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.293157  [ 7488/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.203890  [ 7520/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.210700  [ 7552/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.266445  [ 7584/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.243972  [ 7616/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.313819  [ 7648/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.219625  [ 7680/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.338556  [ 7712/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.397001  [ 7744/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.331723  [ 7776/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.199193  [ 7808/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.228757  [ 7840/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.234809  [ 7872/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.272131  [ 7904/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.234618  [ 7936/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.281973  [ 7968/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.290619  [ 8000/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.288735  [ 8032/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.279079  [ 8064/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.195521  [ 8096/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.211422  [ 8128/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.212893  [ 8160/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.270451  [ 8192/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.257488  [ 8224/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.379826  [ 8256/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.357024  [ 8288/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.321522  [ 8320/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.258560  [ 8352/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.323431  [ 8384/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.270983  [ 8416/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.276107  [ 8448/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.278522  [ 8480/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.297930  [ 8512/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.308241  [ 8544/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.237331  [ 8576/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.299183  [ 8608/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.170825  [ 8640/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.411495  [ 8672/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.220983  [ 8704/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.297298  [ 8736/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.410523  [ 8768/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.294442  [ 8800/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.417953  [ 8832/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.209633  [ 8864/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.244515  [ 8896/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.265909  [ 8928/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.364763  [ 8960/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.229473  [ 8992/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.386714  [ 9024/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.301022  [ 9056/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.348925  [ 9088/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.247059  [ 9120/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.254197  [ 9152/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.307902  [ 9184/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.232596  [ 9216/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.264698  [ 9248/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.404395  [ 9280/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.308714  [ 9312/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.372595  [ 9344/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.191940  [ 9376/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.205946  [ 9408/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.254846  [ 9440/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.313991  [ 9472/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.166507  [ 9504/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.328086  [ 9536/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.291517  [ 9568/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.305964  [ 9600/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.293166  [ 9632/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.282083  [ 9664/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.327012  [ 9696/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.323747  [ 9728/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.290056  [ 9760/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.219848  [ 9792/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.280581  [ 9824/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.262730  [ 9856/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.200933  [ 9888/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.171420  [ 9920/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.204607  [ 9952/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.217650  [ 9984/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.420284  [10016/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.283996  [10048/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.292194  [10080/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.175738  [10112/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.253308  [10144/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.292654  [10176/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.202705  [10208/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.268699  [10240/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.335022  [10272/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.382535  [10304/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.232110  [10336/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.194374  [10368/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.247935  [10400/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.218148  [10432/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.234051  [10464/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.238711  [10496/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.429198  [10528/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.221705  [10560/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.245252  [10592/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.301494  [10624/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.200727  [10656/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.247991  [10688/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.259605  [10720/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.216890  [10752/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.297114  [10784/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.274636  [10816/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.235944  [10848/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.226856  [10880/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.209618  [10912/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.217202  [10944/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.222413  [10976/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.319268  [11008/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.234925  [11040/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.254008  [11072/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.216786  [11104/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.181773  [11136/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.183522  [11168/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.162169  [11200/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.193865  [11232/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.289694  [11264/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.192370  [11296/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.251944  [11328/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.216735  [11360/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.265074  [11392/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.282301  [11424/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.276029  [11456/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.207320  [11488/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.166997  [11520/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.260799  [11552/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.263111  [11584/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.277320  [11616/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.357143  [11648/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.232957  [11680/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.179009  [11712/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.237953  [11744/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.265030  [11776/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.356368  [11808/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.115702  [11840/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.223302  [11872/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.433091  [11904/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.207464  [11936/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.210197  [11968/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.198586  [12000/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.209280  [12032/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.207801  [12064/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.257538  [12096/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.368126  [12128/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.243871  [12160/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.264564  [12192/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.412481  [12224/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.155290  [12256/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.290496  [12288/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.221186  [12320/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.250427  [12352/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.273407  [12384/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.163577  [12416/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.397405  [12448/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.245589  [12480/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.283979  [12512/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.223712  [12544/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.211909  [12576/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.265876  [12608/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.181232  [12640/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.306339  [12672/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.270606  [12704/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.186297  [12736/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.282237  [12768/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.277203  [12800/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.266264  [12832/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.213742  [12864/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.190214  [12896/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.180543  [12928/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.239913  [12960/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.271964  [12992/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.215860  [13024/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.278026  [13056/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.208133  [13088/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.330443  [13120/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.262283  [13152/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.336306  [13184/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.204003  [13216/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.235617  [13248/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.268859  [13280/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.276807  [13312/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.287144  [13344/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.272123  [13376/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.354345  [13408/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.207740  [13440/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.202647  [13472/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.271008  [13504/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.203191  [13536/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.246212  [13568/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.358294  [13600/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.286777  [13632/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.230721  [13664/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.275645  [13696/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.196699  [13728/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.377105  [13760/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.208485  [13792/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.186426  [13824/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.203730  [13856/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.255197  [13888/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.168354  [13920/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.202840  [13952/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.207112  [13984/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.185853  [14016/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.223778  [14048/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.308385  [14080/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.272809  [14112/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.239465  [14144/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.194898  [14176/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.230263  [14208/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.327892  [14240/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.318222  [14272/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.255795  [14304/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.256554  [14336/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.245357  [14368/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.305220  [14400/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.226644  [14432/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.198938  [14464/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.186971  [14496/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.235520  [14528/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.226847  [14560/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.230892  [14592/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.305864  [14624/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.382452  [14656/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.333190  [14688/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.156962  [14720/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.315994  [14752/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.221718  [14784/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.265897  [14816/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.334090  [14848/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.245418  [14880/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.253045  [14912/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.370567  [14944/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.281816  [14976/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.253148  [15008/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.313021  [15040/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.217073  [15072/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.310680  [15104/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.230105  [15136/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.178694  [15168/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.182067  [15200/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.214968  [15232/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.225763  [15264/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.245754  [15296/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.260592  [15328/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.206859  [15360/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.335706  [15392/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.224933  [15424/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.374921  [15456/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.282825  [15488/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.237988  [15520/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.177749  [15552/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.358171  [15584/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.196821  [15616/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.162709  [15648/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.230075  [15680/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.297156  [15712/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.296935  [15744/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.347301  [15776/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.280374  [15808/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.263005  [15840/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.263702  [15872/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.209557  [15904/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.247088  [15936/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.315711  [15968/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.218502  [16000/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.241669  [16032/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.160677  [16064/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.321759  [16096/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.277831  [16128/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.224448  [16160/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.264924  [16192/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.208819  [16224/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.265690  [16256/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.258005  [16288/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.227599  [16320/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.264774  [16352/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.195949  [16384/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.284016  [16416/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.284924  [16448/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.322165  [16480/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.268462  [16512/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.305934  [16544/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.226256  [16576/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.282880  [16608/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.274104  [16640/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.295960  [16672/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.226742  [16704/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.295503  [16736/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.287045  [16768/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.321874  [16800/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.243708  [16832/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.136978  [16864/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.246890  [16896/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.168094  [16928/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.264787  [16960/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.338366  [16992/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.289024  [17024/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.191126  [17056/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.243062  [17088/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.212391  [17120/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.222853  [17152/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.186117  [17184/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.315178  [17216/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.250277  [17248/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.206110  [17280/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.235824  [17312/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.284271  [17344/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.457578  [17376/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.152015  [17408/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.325584  [17440/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.238858  [17472/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.226003  [17504/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.188904  [17536/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.345940  [17568/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.275647  [17600/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.272080  [17632/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.358614  [17664/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.243053  [17696/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.258522  [17728/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.239918  [17760/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.297593  [17792/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.362883  [17824/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.245437  [17856/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.298437  [17888/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.137191  [17920/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.223296  [17952/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.241043  [17984/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.232097  [18016/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.244458  [18048/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.273599  [18080/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.287245  [18112/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.352970  [18144/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.299097  [18176/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.181722  [18208/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.188355  [18240/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.223779  [18272/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.263769  [18304/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.227070  [18336/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.302889  [18368/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.173315  [18400/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.164192  [18432/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.249948  [18464/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.283919  [18496/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.235769  [18528/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.309751  [18560/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.217014  [18592/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.196742  [18624/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.338664  [18656/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.228012  [18688/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.269401  [18720/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.279024  [18752/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.329810  [18784/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.191332  [18816/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.200069  [18848/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.221535  [18880/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.244399  [18912/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.229476  [18944/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.270048  [18976/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.210538  [19008/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.319310  [19040/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.202767  [19072/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.123468  [19104/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.264666  [19136/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.261626  [19168/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.221727  [19200/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.265961  [19232/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.198479  [19264/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.228473  [19296/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.144078  [19328/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.242426  [19360/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.318568  [19392/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.284166  [19424/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.253020  [19456/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.278254  [19488/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.257217  [19520/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.229707  [19552/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.248763  [19584/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.227853  [19616/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.330662  [19648/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.209017  [19680/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.281797  [19712/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.160933  [19744/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.229441  [19776/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.249412  [19808/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.202982  [19840/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.258848  [19872/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.202615  [19904/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.217955  [19936/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.276884  [19968/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.222065  [20000/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.188158  [20032/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.256165  [20064/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.190711  [20096/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.178788  [20128/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.250366  [20160/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.233391  [20192/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.233012  [20224/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.282628  [20256/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.269314  [20288/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.246030  [20320/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.232475  [20352/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.153235  [20384/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.174442  [20416/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.292643  [20448/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.145113  [20480/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.205352  [20512/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.266045  [20544/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.221143  [20576/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.276373  [20608/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.305564  [20640/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.233948  [20672/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.287249  [20704/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.259754  [20736/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.260929  [20768/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.222033  [20800/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.198817  [20832/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.328426  [20864/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.171759  [20896/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.212018  [20928/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.288888  [20960/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.193553  [20992/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.224788  [21024/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.274257  [21056/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.223164  [21088/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.342478  [21120/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.203069  [21152/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.246083  [21184/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.219072  [21216/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.351199  [21248/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.211810  [21280/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.240648  [21312/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.249949  [21344/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.221599  [21376/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.266394  [21408/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.305768  [21440/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.278002  [21472/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.302999  [21504/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.308683  [21536/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.212209  [21568/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.282320  [21600/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.180098  [21632/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.226466  [21664/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.255676  [21696/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.260165  [21728/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.180711  [21760/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.222950  [21792/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.386797  [21824/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.296744  [21856/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.192789  [21888/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.319842  [21920/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.195819  [21952/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.263659  [21984/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.264197  [22016/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.344655  [22048/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.275592  [22080/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.309751  [22112/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.286676  [22144/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.158863  [22176/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.221286  [22208/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.212161  [22240/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.267888  [22272/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.228520  [22304/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.312835  [22336/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.224962  [22368/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.223749  [22400/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.246352  [22432/24872]:   0%|          | 0/777 [00:30<?, ?it/s]
loss: 0.246352  [22432/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.363382  [22464/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.329814  [22496/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.223499  [22528/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.214133  [22560/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.209479  [22592/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.143487  [22624/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.183908  [22656/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.201975  [22688/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.187572  [22720/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.228796  [22752/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.184587  [22784/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.217871  [22816/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.227331  [22848/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.137081  [22880/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.188964  [22912/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.246081  [22944/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.307684  [22976/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.241574  [23008/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.262530  [23040/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.332299  [23072/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.217284  [23104/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.327700  [23136/24872]:  90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.299627  [23168/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.213108  [23200/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.239554  [23232/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.139881  [23264/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.185709  [23296/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.227399  [23328/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.198884  [23360/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.237886  [23392/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.275088  [23424/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.206420  [23456/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.212096  [23488/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.213801  [23520/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.159702  [23552/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.258162  [23584/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.234310  [23616/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.387855  [23648/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.180971  [23680/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.208723  [23712/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.251756  [23744/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.246751  [23776/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.330614  [23808/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.204379  [23840/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.259910  [23872/24872]:  90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.243573  [23904/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.369093  [23936/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.245186  [23968/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.259659  [24000/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.225917  [24032/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.261260  [24064/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.181426  [24096/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.226778  [24128/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.186456  [24160/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.095960  [24192/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.117769  [24224/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.338371  [24256/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.160846  [24288/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.203330  [24320/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.296838  [24352/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.216351  [24384/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.282950  [24416/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.232048  [24448/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.258124  [24480/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.150086  [24512/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.200803  [24544/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.227350  [24576/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.245504  [24608/24872]:  90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.247224  [24640/24872]:  90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.200322  [24672/24872]:  90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.265170  [24704/24872]:  90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.164157  [24736/24872]:  90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.242595  [24768/24872]:  90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.221884  [24800/24872]:  90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.191837  [24832/24872]:  90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.215214  [24864/24872]:  90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.147651  [24872/24872]:  90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.147651  [24872/24872]: : 778it [00:33, 23.33it/s]
Epoch 4, time=100.14s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.225064  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.136552  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.333158  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.278143  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.253046  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.172922  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.302867  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.320890  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.236604  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.176873  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.235216  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.231853  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.195195  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.210284  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.265932  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.276758  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.162968  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.249925  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.241108  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.234733  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.238388  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.214867  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.323487  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.243315  [  768/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.216930  [  800/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.217194  [  832/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.337830  [  864/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.299683  [  896/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.209866  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.273664  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.205417  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.182309  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.152186  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.258314  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.253187  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.224316  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.344423  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.218989  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.202610  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.168436  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.220571  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.175758  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.218386  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.237559  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.229563  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.267966  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.296402  [ 1504/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.188766  [ 1536/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.194769  [ 1568/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.226278  [ 1600/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.275120  [ 1632/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.183199  [ 1664/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.263105  [ 1696/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.217710  [ 1728/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.244229  [ 1760/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.091992  [ 1792/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.194243  [ 1824/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.087504  [ 1856/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.247673  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.201865  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.263492  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.242123  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.152112  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.216380  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.231974  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.175942  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.271504  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.221851  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.181080  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.270132  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.276284  [ 2272/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.225739  [ 2304/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.304345  [ 2336/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.222707  [ 2368/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.242858  [ 2400/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.380534  [ 2432/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.319709  [ 2464/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.172106  [ 2496/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.209302  [ 2528/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.197281  [ 2560/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.282368  [ 2592/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.191886  [ 2624/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.299582  [ 2656/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.231179  [ 2688/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.251856  [ 2720/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.213817  [ 2752/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.220032  [ 2784/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.331448  [ 2816/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.236974  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.246620  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.197573  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.272486  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.269950  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.213922  [ 3008/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.267964  [ 3040/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.262188  [ 3072/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.216033  [ 3104/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.221832  [ 3136/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.300242  [ 3168/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.228353  [ 3200/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.309151  [ 3232/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.212604  [ 3264/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.195725  [ 3296/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.179730  [ 3328/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.237885  [ 3360/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.235524  [ 3392/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.147889  [ 3424/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.172151  [ 3456/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.185193  [ 3488/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.320140  [ 3520/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.217085  [ 3552/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.296217  [ 3584/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.307890  [ 3616/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.252144  [ 3648/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.256154  [ 3680/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.325900  [ 3712/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.216590  [ 3744/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.213185  [ 3776/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.215844  [ 3808/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.251009  [ 3840/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.148652  [ 3872/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.164008  [ 3904/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.185335  [ 3936/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.259191  [ 3968/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.325066  [ 4000/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.166882  [ 4032/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.240109  [ 4064/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.258513  [ 4096/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.237257  [ 4128/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.231237  [ 4160/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.210958  [ 4192/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.212162  [ 4224/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.194567  [ 4256/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.189544  [ 4288/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.190763  [ 4320/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.175247  [ 4352/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.212584  [ 4384/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.266061  [ 4416/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.303229  [ 4448/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.295310  [ 4480/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.118712  [ 4512/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.251783  [ 4544/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.134287  [ 4576/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.238052  [ 4608/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.200040  [ 4640/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.185387  [ 4672/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.199387  [ 4704/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.179572  [ 4736/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.245088  [ 4768/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.210381  [ 4800/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.215327  [ 4832/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.193374  [ 4864/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.245005  [ 4896/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.295564  [ 4928/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.177954  [ 4960/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.180004  [ 4992/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.301845  [ 5024/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.268975  [ 5056/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.205195  [ 5088/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.289760  [ 5120/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.175796  [ 5152/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.243722  [ 5184/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.176407  [ 5216/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.210246  [ 5248/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.221471  [ 5280/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.283230  [ 5312/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.201185  [ 5344/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.189196  [ 5376/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.366072  [ 5408/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.116251  [ 5440/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.230446  [ 5472/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.219057  [ 5504/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.222889  [ 5536/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.286759  [ 5568/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.232828  [ 5600/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.229216  [ 5632/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.257760  [ 5664/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.295571  [ 5696/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.262012  [ 5728/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.252495  [ 5760/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.324385  [ 5792/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.240432  [ 5824/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.220969  [ 5856/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.209518  [ 5888/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.119607  [ 5920/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.222978  [ 5952/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.246463  [ 5984/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.204906  [ 6016/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.264401  [ 6048/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.178326  [ 6080/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.300517  [ 6112/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.290859  [ 6144/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.211712  [ 6176/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.212239  [ 6208/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.199765  [ 6240/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.274697  [ 6272/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.198698  [ 6304/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.202895  [ 6336/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.220357  [ 6368/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.363507  [ 6400/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.264777  [ 6432/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.166431  [ 6464/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.318165  [ 6496/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.240104  [ 6528/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.260504  [ 6560/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.222532  [ 6592/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.272825  [ 6624/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.166810  [ 6656/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.291242  [ 6688/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.170848  [ 6720/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.187830  [ 6752/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.188253  [ 6784/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.249050  [ 6816/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.261308  [ 6848/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.198117  [ 6880/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.172191  [ 6912/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.238810  [ 6944/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.250522  [ 6976/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.171375  [ 7008/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.355831  [ 7040/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.297607  [ 7072/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.260350  [ 7104/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.150260  [ 7136/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.250208  [ 7168/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.246276  [ 7200/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.230105  [ 7232/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.191060  [ 7264/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.175745  [ 7296/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.229865  [ 7328/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.161633  [ 7360/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.211885  [ 7392/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.194219  [ 7424/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.194183  [ 7456/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.275363  [ 7488/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.169492  [ 7520/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.184189  [ 7552/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.238641  [ 7584/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.211858  [ 7616/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.312037  [ 7648/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.165435  [ 7680/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.262826  [ 7712/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.337032  [ 7744/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.294836  [ 7776/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.176381  [ 7808/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.206169  [ 7840/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.205781  [ 7872/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.251526  [ 7904/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.201670  [ 7936/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.249894  [ 7968/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.257805  [ 8000/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.222730  [ 8032/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.223466  [ 8064/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.183956  [ 8096/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.175604  [ 8128/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.199685  [ 8160/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.241012  [ 8192/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.224001  [ 8224/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.308417  [ 8256/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.295836  [ 8288/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.227018  [ 8320/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.187582  [ 8352/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.235485  [ 8384/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.234580  [ 8416/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.194697  [ 8448/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.212474  [ 8480/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.273855  [ 8512/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.256954  [ 8544/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.173242  [ 8576/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.181074  [ 8608/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.145106  [ 8640/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.372006  [ 8672/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.177506  [ 8704/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.230710  [ 8736/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.325025  [ 8768/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.262012  [ 8800/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.371396  [ 8832/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.172176  [ 8864/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.175979  [ 8896/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.193922  [ 8928/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.305125  [ 8960/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.189572  [ 8992/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.331717  [ 9024/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.247526  [ 9056/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.289984  [ 9088/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.224058  [ 9120/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.202465  [ 9152/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.282182  [ 9184/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.192889  [ 9216/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.210926  [ 9248/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.356006  [ 9280/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.202445  [ 9312/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.330042  [ 9344/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.165300  [ 9376/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.185112  [ 9408/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.225576  [ 9440/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.229516  [ 9472/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.138306  [ 9504/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.252050  [ 9536/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.191591  [ 9568/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.281465  [ 9600/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.243169  [ 9632/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.216656  [ 9664/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.252793  [ 9696/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.290772  [ 9728/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.233957  [ 9760/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.181779  [ 9792/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.216354  [ 9824/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.261251  [ 9856/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.173068  [ 9888/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.146514  [ 9920/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.197001  [ 9952/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.275393  [ 9984/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.369247  [10016/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.248549  [10048/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.316464  [10080/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.199685  [10112/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.289417  [10144/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.280280  [10176/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.193319  [10208/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.242984  [10240/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.314446  [10272/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.310268  [10304/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.240397  [10336/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.172537  [10368/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.230959  [10400/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.256914  [10432/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.219239  [10464/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.239867  [10496/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.427339  [10528/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.242992  [10560/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.266945  [10592/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.298757  [10624/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.186330  [10656/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.248838  [10688/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.223893  [10720/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.212852  [10752/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.297877  [10784/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.256316  [10816/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.214236  [10848/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.206240  [10880/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.197754  [10912/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.193867  [10944/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.227642  [10976/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.251706  [11008/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.239931  [11040/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.254701  [11072/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.212363  [11104/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.151641  [11136/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.182575  [11168/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.146739  [11200/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.166056  [11232/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.241778  [11264/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.192370  [11296/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.252551  [11328/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.192188  [11360/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.240467  [11392/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.229124  [11424/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.251792  [11456/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.229289  [11488/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.148862  [11520/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.234332  [11552/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.278471  [11584/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.254570  [11616/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.355939  [11648/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.197729  [11680/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.179394  [11712/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.222581  [11744/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.235912  [11776/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.281726  [11808/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.087768  [11840/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.215861  [11872/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.454509  [11904/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.191095  [11936/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.211583  [11968/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.195771  [12000/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.208139  [12032/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.210211  [12064/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.237656  [12096/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.359629  [12128/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.224473  [12160/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.223404  [12192/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.348965  [12224/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.135595  [12256/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.268808  [12288/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.200096  [12320/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.212606  [12352/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.250187  [12384/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.143604  [12416/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.375093  [12448/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.240118  [12480/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.320097  [12512/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.204343  [12544/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.213317  [12576/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.234849  [12608/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.176068  [12640/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.242354  [12672/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.243560  [12704/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.165244  [12736/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.282723  [12768/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.263644  [12800/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.224844  [12832/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.206866  [12864/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.179887  [12896/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.190075  [12928/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.254655  [12960/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.278653  [12992/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.234420  [13024/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.241858  [13056/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.201370  [13088/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.274970  [13120/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.232508  [13152/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.312425  [13184/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.190932  [13216/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.223148  [13248/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.268312  [13280/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.274699  [13312/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.218555  [13344/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.210219  [13376/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.278317  [13408/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.172900  [13440/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.208903  [13472/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.267471  [13504/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.185238  [13536/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.249910  [13568/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.361337  [13600/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.263208  [13632/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.212039  [13664/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.229422  [13696/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.186608  [13728/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.350802  [13760/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.254277  [13792/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.167396  [13824/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.211655  [13856/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.213180  [13888/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.153507  [13920/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.182727  [13952/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.181972  [13984/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.193631  [14016/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.233035  [14048/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.270484  [14080/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.263787  [14112/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.275967  [14144/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.204472  [14176/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.244301  [14208/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.274065  [14240/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.291315  [14272/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.186465  [14304/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.244679  [14336/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.245749  [14368/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.302099  [14400/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.195844  [14432/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.176850  [14464/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.191056  [14496/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.200387  [14528/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.196298  [14560/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.226534  [14592/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.286171  [14624/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.343737  [14656/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.284630  [14688/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.181573  [14720/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.316051  [14752/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.196693  [14784/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.157947  [14816/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.313496  [14848/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.207265  [14880/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.221362  [14912/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.290522  [14944/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.254525  [14976/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.231578  [15008/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.282798  [15040/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.220819  [15072/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.268902  [15104/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.316846  [15136/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.152568  [15168/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.151438  [15200/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.177338  [15232/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.223574  [15264/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.241237  [15296/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.237284  [15328/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.181492  [15360/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.261991  [15392/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.180840  [15424/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.339537  [15456/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.240479  [15488/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.214973  [15520/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.150028  [15552/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.290509  [15584/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.202511  [15616/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.115604  [15648/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.206651  [15680/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.251419  [15712/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.228216  [15744/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.259691  [15776/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.167834  [15808/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.253599  [15840/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.258790  [15872/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.150286  [15904/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.283835  [15936/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.284697  [15968/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.191219  [16000/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.186934  [16032/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.146114  [16064/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.267557  [16096/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.269356  [16128/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.203740  [16160/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.211325  [16192/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.203750  [16224/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.242302  [16256/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.239374  [16288/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.231989  [16320/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.254803  [16352/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.174336  [16384/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.280364  [16416/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.268866  [16448/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.302574  [16480/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.249386  [16512/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.306974  [16544/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.209248  [16576/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.243772  [16608/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.275629  [16640/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.225044  [16672/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.170345  [16704/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.285288  [16736/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.244730  [16768/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.322454  [16800/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.198148  [16832/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.122061  [16864/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.237759  [16896/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.155540  [16928/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.285933  [16960/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.314901  [16992/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.257553  [17024/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.175577  [17056/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.217225  [17088/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.208601  [17120/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.218725  [17152/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.157402  [17184/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.265713  [17216/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.228527  [17248/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.197427  [17280/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.207951  [17312/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.247018  [17344/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.396110  [17376/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.143959  [17408/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.277292  [17440/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.239649  [17472/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.211773  [17504/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.156791  [17536/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.331445  [17568/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.236640  [17600/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.258211  [17632/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.352614  [17664/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.220766  [17696/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.220263  [17728/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.218054  [17760/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.281000  [17792/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.352699  [17824/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.191900  [17856/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.263003  [17888/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.122123  [17920/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.199862  [17952/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.227741  [17984/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.231192  [18016/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.226815  [18048/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.241602  [18080/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.255648  [18112/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.323675  [18144/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.273112  [18176/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.179402  [18208/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.163053  [18240/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.223045  [18272/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.245984  [18304/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.197027  [18336/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.262179  [18368/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.163248  [18400/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.158097  [18432/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.227085  [18464/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.267345  [18496/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.241007  [18528/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.262142  [18560/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.202661  [18592/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.191359  [18624/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.307808  [18656/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.217092  [18688/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.280635  [18720/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.270670  [18752/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.311000  [18784/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.218275  [18816/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.218988  [18848/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.209020  [18880/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.254079  [18912/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.240454  [18944/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.239569  [18976/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.222509  [19008/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.313369  [19040/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.194354  [19072/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.126941  [19104/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.272707  [19136/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.223891  [19168/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.211358  [19200/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.250156  [19232/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.195554  [19264/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.211350  [19296/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.149511  [19328/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.243401  [19360/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.287456  [19392/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.272917  [19424/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.236116  [19456/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.264273  [19488/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.239709  [19520/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.227804  [19552/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.221280  [19584/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.223345  [19616/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.283347  [19648/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.168507  [19680/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.230906  [19712/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.144096  [19744/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.200659  [19776/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.242786  [19808/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.180506  [19840/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.229905  [19872/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.177948  [19904/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.209555  [19936/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.251126  [19968/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.211062  [20000/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.180706  [20032/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.265352  [20064/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.166337  [20096/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.167199  [20128/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.248268  [20160/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.216469  [20192/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.222491  [20224/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.313198  [20256/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.269049  [20288/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.219019  [20320/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.247200  [20352/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.167697  [20384/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.142532  [20416/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.297232  [20448/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.149801  [20480/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.177135  [20512/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.231956  [20544/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.246206  [20576/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.252988  [20608/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.286760  [20640/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.224099  [20672/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.260810  [20704/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.235292  [20736/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.249934  [20768/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.182684  [20800/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.184644  [20832/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.267317  [20864/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.152782  [20896/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.187585  [20928/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.281423  [20960/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.158600  [20992/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.204567  [21024/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.244372  [21056/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.197369  [21088/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.311181  [21120/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.184714  [21152/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.225025  [21184/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.202497  [21216/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.317596  [21248/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.193022  [21280/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.232298  [21312/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.261989  [21344/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.206769  [21376/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.250790  [21408/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.289563  [21440/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.262204  [21472/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.265548  [21504/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.301907  [21536/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.202928  [21568/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.258937  [21600/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.183991  [21632/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.212428  [21664/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.202083  [21696/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.246002  [21728/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.163383  [21760/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.199054  [21792/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.325951  [21824/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.261402  [21856/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.167911  [21888/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.282795  [21920/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.140088  [21952/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.247733  [21984/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.244786  [22016/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.324357  [22048/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.250041  [22080/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.312410  [22112/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.244743  [22144/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.139498  [22176/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.192547  [22208/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.192119  [22240/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.245313  [22272/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.220430  [22304/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.291756  [22336/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.215050  [22368/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.219208  [22400/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.230280  [22432/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.336175  [22464/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.315494  [22496/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.227042  [22528/24872]:   0%|          | 0/777 [00:30<?, ?it/s]
loss: 0.227042  [22528/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.206393  [22560/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.183595  [22592/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.124736  [22624/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.179131  [22656/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.194689  [22688/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.166881  [22720/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.233766  [22752/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.173770  [22784/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.225703  [22816/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.184712  [22848/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.126143  [22880/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.168810  [22912/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.227040  [22944/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.297863  [22976/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.218347  [23008/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.229356  [23040/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.265692  [23072/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.199016  [23104/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.313258  [23136/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.250108  [23168/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.204867  [23200/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.205961  [23232/24872]:  91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.109376  [23264/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.163363  [23296/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.202983  [23328/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.198397  [23360/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.229026  [23392/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.248102  [23424/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.191411  [23456/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.168108  [23488/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.205255  [23520/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.139436  [23552/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.224699  [23584/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.215581  [23616/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.336249  [23648/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.173522  [23680/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.183959  [23712/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.268512  [23744/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.238595  [23776/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.322949  [23808/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.183946  [23840/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.218420  [23872/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.231957  [23904/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.320227  [23936/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.234192  [23968/24872]:  91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.214343  [24000/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.203002  [24032/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.250765  [24064/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.184166  [24096/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.208170  [24128/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.150018  [24160/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.099855  [24192/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.110312  [24224/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.307379  [24256/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.154112  [24288/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.211317  [24320/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.259524  [24352/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.208296  [24384/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.282749  [24416/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.236840  [24448/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.272361  [24480/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.158533  [24512/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.247574  [24544/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.186421  [24576/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.184364  [24608/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.284857  [24640/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.202164  [24672/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.228512  [24704/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.202060  [24736/24872]:  91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.304767  [24768/24872]:  91%|█████████ | 704/777 [00:33<00:03, 23.43it/s]
loss: 0.210959  [24800/24872]:  91%|█████████ | 704/777 [00:33<00:03, 23.43it/s]
loss: 0.197831  [24832/24872]:  91%|█████████ | 704/777 [00:33<00:03, 23.43it/s]
loss: 0.235669  [24864/24872]:  91%|█████████ | 704/777 [00:33<00:03, 23.43it/s]
loss: 0.165323  [24872/24872]:  91%|█████████ | 704/777 [00:33<00:03, 23.43it/s]
loss: 0.165323  [24872/24872]: : 778it [00:33, 23.44it/s]
Epoch 5, time=133.34s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.215962  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.181025  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.356721  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.294840  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.267851  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.236777  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.255681  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.368363  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.218348  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.178353  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.232199  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.247413  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.192254  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.197739  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.297298  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.308295  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.181816  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.253343  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.278672  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.235063  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.222367  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.241854  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.301622  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.207183  [  768/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.237348  [  800/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.260142  [  832/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.318137  [  864/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.312979  [  896/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.212119  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.262888  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.197865  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.188460  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.146935  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.248907  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.246469  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.164940  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.326878  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.202975  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.196983  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.151887  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.213923  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.167031  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.192473  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.212002  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.218445  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.239584  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.268312  [ 1504/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.168538  [ 1536/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.192694  [ 1568/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.211805  [ 1600/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.256871  [ 1632/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.170481  [ 1664/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.231860  [ 1696/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.189357  [ 1728/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.256134  [ 1760/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.086396  [ 1792/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.187405  [ 1824/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.096029  [ 1856/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.231108  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.183861  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.248100  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.243519  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.150933  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.214766  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.224143  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.158316  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.247253  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.199461  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.168934  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.244627  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.241449  [ 2272/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.209790  [ 2304/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.284796  [ 2336/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.212722  [ 2368/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.223148  [ 2400/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.320967  [ 2432/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.261504  [ 2464/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.148595  [ 2496/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.185230  [ 2528/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.169083  [ 2560/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.247116  [ 2592/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.156693  [ 2624/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.275082  [ 2656/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.210185  [ 2688/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.193891  [ 2720/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.179966  [ 2752/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.199770  [ 2784/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.285528  [ 2816/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.205632  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.224542  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.177005  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.244324  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.252665  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.189786  [ 3008/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.234965  [ 3040/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.203833  [ 3072/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.207197  [ 3104/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.192626  [ 3136/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.299414  [ 3168/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.211292  [ 3200/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.261442  [ 3232/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.194004  [ 3264/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.166943  [ 3296/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.161015  [ 3328/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.186879  [ 3360/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.206798  [ 3392/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.129879  [ 3424/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.162915  [ 3456/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.157637  [ 3488/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.267228  [ 3520/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.208340  [ 3552/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.255768  [ 3584/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.237510  [ 3616/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.216429  [ 3648/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.224508  [ 3680/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.320360  [ 3712/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.207841  [ 3744/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.199437  [ 3776/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.190387  [ 3808/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.226493  [ 3840/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.128884  [ 3872/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.143828  [ 3904/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.147235  [ 3936/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.230967  [ 3968/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.281604  [ 4000/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.162677  [ 4032/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.199543  [ 4064/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.237985  [ 4096/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.217102  [ 4128/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.177520  [ 4160/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.192549  [ 4192/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.191483  [ 4224/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.187687  [ 4256/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.175069  [ 4288/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.172920  [ 4320/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.151591  [ 4352/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.195770  [ 4384/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.239541  [ 4416/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.278662  [ 4448/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.271011  [ 4480/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.107489  [ 4512/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.249002  [ 4544/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.120460  [ 4576/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.220939  [ 4608/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.176157  [ 4640/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.170008  [ 4672/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.178885  [ 4704/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.174408  [ 4736/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.226473  [ 4768/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.187414  [ 4800/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.191332  [ 4832/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.174658  [ 4864/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.208260  [ 4896/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.280138  [ 4928/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.168676  [ 4960/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.153139  [ 4992/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.248721  [ 5024/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.247742  [ 5056/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.199580  [ 5088/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.279947  [ 5120/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.144268  [ 5152/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.226417  [ 5184/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.168429  [ 5216/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.205069  [ 5248/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.200517  [ 5280/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.257504  [ 5312/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.187593  [ 5344/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.186274  [ 5376/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.350888  [ 5408/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.111499  [ 5440/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.224220  [ 5472/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.211012  [ 5504/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.201021  [ 5536/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.267515  [ 5568/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.214169  [ 5600/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.214126  [ 5632/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.250257  [ 5664/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.282870  [ 5696/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.236324  [ 5728/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.247099  [ 5760/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.327622  [ 5792/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.223935  [ 5824/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.228436  [ 5856/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.204782  [ 5888/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.100193  [ 5920/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.231530  [ 5952/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.261360  [ 5984/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.197429  [ 6016/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.228075  [ 6048/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.166965  [ 6080/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.280025  [ 6112/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.263532  [ 6144/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.174938  [ 6176/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.200099  [ 6208/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.199101  [ 6240/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.272097  [ 6272/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.188395  [ 6304/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.198986  [ 6336/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.200679  [ 6368/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.325152  [ 6400/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.238906  [ 6432/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.178499  [ 6464/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.272805  [ 6496/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.212777  [ 6528/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.244678  [ 6560/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.207667  [ 6592/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.264171  [ 6624/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.156266  [ 6656/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.294802  [ 6688/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.169881  [ 6720/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.182922  [ 6752/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.153522  [ 6784/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.229359  [ 6816/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.242227  [ 6848/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.187748  [ 6880/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.160036  [ 6912/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.198663  [ 6944/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.187625  [ 6976/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.163559  [ 7008/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.316015  [ 7040/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.322563  [ 7072/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.256024  [ 7104/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.170435  [ 7136/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.222564  [ 7168/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.221267  [ 7200/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.200093  [ 7232/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.163029  [ 7264/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.187030  [ 7296/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.195076  [ 7328/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.154242  [ 7360/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.203852  [ 7392/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.194619  [ 7424/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.169556  [ 7456/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.269034  [ 7488/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.159256  [ 7520/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.176068  [ 7552/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.224596  [ 7584/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.205969  [ 7616/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.238413  [ 7648/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.143515  [ 7680/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.255574  [ 7712/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.303603  [ 7744/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.276561  [ 7776/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.205842  [ 7808/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.186051  [ 7840/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.189542  [ 7872/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.229085  [ 7904/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.183733  [ 7936/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.230332  [ 7968/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.223473  [ 8000/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.191017  [ 8032/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.220434  [ 8064/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.175796  [ 8096/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.189732  [ 8128/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.143082  [ 8160/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.232209  [ 8192/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.215203  [ 8224/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.262843  [ 8256/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.294491  [ 8288/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.196199  [ 8320/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.165925  [ 8352/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.218154  [ 8384/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.206039  [ 8416/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.195147  [ 8448/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.169634  [ 8480/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.249369  [ 8512/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.226135  [ 8544/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.159088  [ 8576/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.172688  [ 8608/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.133747  [ 8640/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.293467  [ 8672/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.158104  [ 8704/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.201479  [ 8736/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.279812  [ 8768/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.237360  [ 8800/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.347457  [ 8832/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.148175  [ 8864/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.166439  [ 8896/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.185748  [ 8928/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.276973  [ 8960/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.175227  [ 8992/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.309622  [ 9024/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.231327  [ 9056/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.267635  [ 9088/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.195569  [ 9120/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.162749  [ 9152/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.232958  [ 9184/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.176982  [ 9216/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.197249  [ 9248/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.316655  [ 9280/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.185667  [ 9312/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.280085  [ 9344/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.175219  [ 9376/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.179675  [ 9408/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.211954  [ 9440/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.208965  [ 9472/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.153314  [ 9504/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.230654  [ 9536/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.160090  [ 9568/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.247533  [ 9600/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.221331  [ 9632/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.193209  [ 9664/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.221482  [ 9696/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.270110  [ 9728/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.197794  [ 9760/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.150164  [ 9792/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.212460  [ 9824/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.242396  [ 9856/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.143665  [ 9888/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.146043  [ 9920/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.176859  [ 9952/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.208277  [ 9984/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.325269  [10016/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.230881  [10048/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.264469  [10080/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.185770  [10112/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.238098  [10144/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.233706  [10176/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.172749  [10208/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.231487  [10240/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.278571  [10272/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.260879  [10304/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.209669  [10336/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.155585  [10368/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.186773  [10400/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.217960  [10432/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.214407  [10464/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.203074  [10496/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.370068  [10528/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.200477  [10560/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.226893  [10592/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.273090  [10624/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.157040  [10656/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.231347  [10688/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.206187  [10720/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.185797  [10752/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.306362  [10784/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.222995  [10816/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.195010  [10848/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.202757  [10880/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.176474  [10912/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.208237  [10944/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.190543  [10976/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.233590  [11008/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.224886  [11040/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.226051  [11072/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.179940  [11104/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.198592  [11136/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.173285  [11168/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.147207  [11200/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.166801  [11232/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.226723  [11264/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.183065  [11296/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.251619  [11328/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.193544  [11360/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.208774  [11392/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.212962  [11424/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.232359  [11456/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.202618  [11488/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.135371  [11520/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.240221  [11552/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.237293  [11584/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.252126  [11616/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.343867  [11648/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.205522  [11680/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.177823  [11712/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.232268  [11744/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.253111  [11776/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.266863  [11808/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.092591  [11840/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.230477  [11872/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.426498  [11904/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.182091  [11936/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.190575  [11968/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.201733  [12000/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.224198  [12032/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.223559  [12064/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.236032  [12096/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.313222  [12128/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.248690  [12160/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.228542  [12192/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.328068  [12224/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.178522  [12256/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.238319  [12288/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.204765  [12320/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.213019  [12352/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.265311  [12384/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.135121  [12416/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.403498  [12448/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.233080  [12480/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.250566  [12512/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.215257  [12544/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.194655  [12576/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.215145  [12608/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.189201  [12640/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.214352  [12672/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.218361  [12704/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.169780  [12736/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.258674  [12768/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.241576  [12800/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.208135  [12832/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.195104  [12864/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.159011  [12896/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.157909  [12928/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.243725  [12960/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.266548  [12992/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.209956  [13024/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.229052  [13056/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.193214  [13088/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.245356  [13120/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.205079  [13152/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.299684  [13184/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.164837  [13216/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.206979  [13248/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.245810  [13280/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.226149  [13312/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.205419  [13344/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.220311  [13376/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.244809  [13408/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.148553  [13440/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.192339  [13472/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.236882  [13504/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.164591  [13536/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.255424  [13568/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.338008  [13600/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.237168  [13632/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.182205  [13664/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.213526  [13696/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.165888  [13728/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.282815  [13760/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.213016  [13792/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.148018  [13824/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.178340  [13856/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.205742  [13888/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.129562  [13920/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.155975  [13952/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.165469  [13984/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.223440  [14016/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.206937  [14048/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.229159  [14080/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.225689  [14112/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.203195  [14144/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.179022  [14176/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.227563  [14208/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.246815  [14240/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.263784  [14272/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.199501  [14304/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.186328  [14336/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.215941  [14368/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.329740  [14400/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.185012  [14432/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.181909  [14464/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.179730  [14496/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.181625  [14528/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.187967  [14560/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.214423  [14592/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.226985  [14624/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.324201  [14656/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.268321  [14688/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.135660  [14720/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.303257  [14752/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.160216  [14784/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.139426  [14816/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.281561  [14848/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.192813  [14880/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.176972  [14912/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.245060  [14944/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.234582  [14976/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.195679  [15008/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.241845  [15040/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.176026  [15072/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.239251  [15104/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.254083  [15136/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.158115  [15168/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.137316  [15200/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.156153  [15232/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.219628  [15264/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.195583  [15296/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.236158  [15328/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.165437  [15360/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.213545  [15392/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.158570  [15424/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.315187  [15456/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.203581  [15488/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.220020  [15520/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.128313  [15552/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.264935  [15584/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.189827  [15616/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.091312  [15648/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.173918  [15680/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.201651  [15712/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.211354  [15744/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.234586  [15776/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.147423  [15808/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.221853  [15840/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.241709  [15872/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.132513  [15904/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.264586  [15936/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.273397  [15968/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.180477  [16000/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.200498  [16032/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.127940  [16064/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.239798  [16096/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.277956  [16128/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.194145  [16160/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.248276  [16192/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.246966  [16224/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.227541  [16256/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.257113  [16288/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.227303  [16320/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.281393  [16352/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.185467  [16384/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.301276  [16416/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.267183  [16448/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.314866  [16480/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.216750  [16512/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.300972  [16544/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.210733  [16576/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.215267  [16608/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.252403  [16640/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.209285  [16672/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.171102  [16704/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.273486  [16736/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.248432  [16768/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.357491  [16800/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.222298  [16832/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.109463  [16864/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.239561  [16896/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.152241  [16928/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.269413  [16960/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.290491  [16992/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.250447  [17024/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.160690  [17056/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.186781  [17088/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.192514  [17120/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.187287  [17152/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.152121  [17184/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.223558  [17216/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.206994  [17248/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.176729  [17280/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.222679  [17312/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.226296  [17344/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.357538  [17376/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.123827  [17408/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.236466  [17440/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.192820  [17472/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.197101  [17504/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.135884  [17536/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.287584  [17568/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.188932  [17600/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.232409  [17632/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.259761  [17664/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.220272  [17696/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.201059  [17728/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.209227  [17760/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.247421  [17792/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.255972  [17824/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.197421  [17856/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.257303  [17888/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.112479  [17920/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.191409  [17952/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.151274  [17984/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.213042  [18016/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.251040  [18048/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.200591  [18080/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.240675  [18112/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.288123  [18144/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.253383  [18176/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.142197  [18208/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.167114  [18240/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.187088  [18272/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.228075  [18304/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.188373  [18336/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.246186  [18368/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.176973  [18400/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.151154  [18432/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.195201  [18464/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.273568  [18496/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.198805  [18528/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.228948  [18560/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.200383  [18592/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.165847  [18624/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.193924  [18656/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.175335  [18688/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.215149  [18720/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.190066  [18752/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.278645  [18784/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.136560  [18816/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.161417  [18848/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.200890  [18880/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.193459  [18912/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.153290  [18944/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.201215  [18976/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.172471  [19008/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.238663  [19040/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.168845  [19072/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.116669  [19104/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.245297  [19136/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.212202  [19168/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.173239  [19200/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.203303  [19232/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.169377  [19264/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.176766  [19296/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.124457  [19328/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.168653  [19360/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.270061  [19392/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.222892  [19424/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.181891  [19456/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.207197  [19488/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.166915  [19520/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.213938  [19552/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.183834  [19584/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.190496  [19616/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.262403  [19648/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.134143  [19680/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.227174  [19712/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.120829  [19744/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.164969  [19776/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.193118  [19808/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.165269  [19840/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.196786  [19872/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.181845  [19904/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.170833  [19936/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.213956  [19968/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.209582  [20000/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.161123  [20032/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.195003  [20064/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.133550  [20096/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.120775  [20128/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.189265  [20160/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.165089  [20192/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.191678  [20224/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.232535  [20256/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.209430  [20288/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.221583  [20320/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.185288  [20352/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.115240  [20384/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.115610  [20416/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.212768  [20448/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.108299  [20480/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.189118  [20512/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.159148  [20544/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.156078  [20576/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.271207  [20608/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.225071  [20640/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.214637  [20672/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.234361  [20704/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.209486  [20736/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.225043  [20768/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.207766  [20800/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.168573  [20832/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.235499  [20864/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.195630  [20896/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.169156  [20928/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.234921  [20960/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.158728  [20992/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.189451  [21024/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.188152  [21056/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.167643  [21088/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.297248  [21120/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.178461  [21152/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.190312  [21184/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.182262  [21216/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.311035  [21248/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.191354  [21280/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.196491  [21312/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.202553  [21344/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.176777  [21376/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.215598  [21408/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.198906  [21440/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.242312  [21472/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.190209  [21504/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.231886  [21536/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.185837  [21568/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.223620  [21600/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.164325  [21632/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.218866  [21664/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.189554  [21696/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.193158  [21728/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.173062  [21760/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.212538  [21792/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.295970  [21824/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.241928  [21856/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.135749  [21888/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.252492  [21920/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.119436  [21952/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.235623  [21984/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.197065  [22016/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.250227  [22048/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.225321  [22080/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.238068  [22112/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.196819  [22144/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.135339  [22176/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.182736  [22208/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.167340  [22240/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.240033  [22272/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.175500  [22304/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.258435  [22336/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.216147  [22368/24872]:   0%|          | 0/777 [00:30<?, ?it/s]
loss: 0.216147  [22368/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.231576  [22400/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.276778  [22432/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.338171  [22464/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.276720  [22496/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.238136  [22528/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.208543  [22560/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.142007  [22592/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.122726  [22624/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.213651  [22656/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.205730  [22688/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.159760  [22720/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.199936  [22752/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.172316  [22784/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.206833  [22816/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.180921  [22848/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.123071  [22880/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.151300  [22912/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.228418  [22944/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.301283  [22976/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.187222  [23008/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.188639  [23040/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.262789  [23072/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.204827  [23104/24872]:  90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.257445  [23136/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.206571  [23168/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.169577  [23200/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.183833  [23232/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.090571  [23264/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.154409  [23296/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.167371  [23328/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.211348  [23360/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.203880  [23392/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.223213  [23424/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.193430  [23456/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.137384  [23488/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.174576  [23520/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.118255  [23552/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.232129  [23584/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.186960  [23616/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.287638  [23648/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.151503  [23680/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.133722  [23712/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.219353  [23744/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.194315  [23776/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.272305  [23808/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.130208  [23840/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.147776  [23872/24872]:  90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.184080  [23904/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.280891  [23936/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.210585  [23968/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.170937  [24000/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.176377  [24032/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.199637  [24064/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.132566  [24096/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.149191  [24128/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.135714  [24160/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.067088  [24192/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.080955  [24224/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.223045  [24256/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.131048  [24288/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.157651  [24320/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.189970  [24352/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.157780  [24384/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.201600  [24416/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.212442  [24448/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.215208  [24480/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.104911  [24512/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.161324  [24544/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.150344  [24576/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.147008  [24608/24872]:  90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.201246  [24640/24872]:  90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.145471  [24672/24872]:  90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.214398  [24704/24872]:  90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.164375  [24736/24872]:  90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.225740  [24768/24872]:  90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.174293  [24800/24872]:  90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.143668  [24832/24872]:  90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.186313  [24864/24872]:  90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.134436  [24872/24872]:  90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.134436  [24872/24872]: : 778it [00:33, 23.34it/s]
-------------------------------
LR=0.0001, batch_size=64
-------------------------------
Epoch 1, time=166.67s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.144158  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.938114  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.216651  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.333802  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.399317  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.428313  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.291134  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.264903  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.229243  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.354436  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.300824  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.279724  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.246788  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.277786  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.266368  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.244360  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.242879  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.209085  [ 1152/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.264400  [ 1216/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.162860  [ 1280/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.173941  [ 1344/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.200744  [ 1408/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.227800  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.200097  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.188385  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.161728  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.214589  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.156758  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.121906  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.199353  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.220811  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.187175  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.196131  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.206815  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.184157  [ 2240/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.180445  [ 2304/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.234566  [ 2368/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.241754  [ 2432/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.181813  [ 2496/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.173994  [ 2560/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.178993  [ 2624/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.209888  [ 2688/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.169515  [ 2752/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.200038  [ 2816/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.189498  [ 2880/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.184609  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.176138  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.184421  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.158317  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.225818  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.172568  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.139140  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.163853  [ 3392/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.121372  [ 3456/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.174323  [ 3520/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.180451  [ 3584/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.179633  [ 3648/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.227829  [ 3712/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.181421  [ 3776/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.182915  [ 3840/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.111076  [ 3904/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.158244  [ 3968/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.200597  [ 4032/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.176800  [ 4096/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.158035  [ 4160/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.170064  [ 4224/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.157136  [ 4288/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.134802  [ 4352/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.188117  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.257061  [ 4480/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.150202  [ 4544/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.131180  [ 4608/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.136870  [ 4672/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.144282  [ 4736/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.177554  [ 4800/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.157625  [ 4864/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.195341  [ 4928/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.132661  [ 4992/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.200287  [ 5056/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.205789  [ 5120/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.144657  [ 5184/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.161607  [ 5248/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.195541  [ 5312/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.165166  [ 5376/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.168982  [ 5440/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.183404  [ 5504/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.211256  [ 5568/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.172738  [ 5632/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.214477  [ 5696/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.186167  [ 5760/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.236867  [ 5824/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.170520  [ 5888/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.134491  [ 5952/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.186401  [ 6016/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.152312  [ 6080/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.249595  [ 6144/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.156942  [ 6208/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.191051  [ 6272/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.169561  [ 6336/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.222326  [ 6400/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.172449  [ 6464/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.199601  [ 6528/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.192800  [ 6592/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.157470  [ 6656/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.191787  [ 6720/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.138800  [ 6784/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.186611  [ 6848/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.153492  [ 6912/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.163359  [ 6976/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.195769  [ 7040/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.242099  [ 7104/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.179939  [ 7168/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.181576  [ 7232/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.151813  [ 7296/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.144172  [ 7360/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.169324  [ 7424/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.188829  [ 7488/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.150889  [ 7552/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.195110  [ 7616/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.153907  [ 7680/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.263301  [ 7744/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.206343  [ 7808/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.141907  [ 7872/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.176950  [ 7936/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.184638  [ 8000/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.176437  [ 8064/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.151714  [ 8128/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.155199  [ 8192/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.185403  [ 8256/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.224320  [ 8320/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.152071  [ 8384/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.171942  [ 8448/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.179121  [ 8512/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.160633  [ 8576/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.136041  [ 8640/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.202637  [ 8704/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.183075  [ 8768/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.239430  [ 8832/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.139389  [ 8896/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.189289  [ 8960/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.190109  [ 9024/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.192792  [ 9088/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.146582  [ 9152/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.165873  [ 9216/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.216944  [ 9280/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.199044  [ 9344/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.150808  [ 9408/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.167700  [ 9472/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.167845  [ 9536/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.164878  [ 9600/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.181394  [ 9664/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.196384  [ 9728/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.139403  [ 9792/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.194606  [ 9856/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.136174  [ 9920/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.137648  [ 9984/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.235520  [10048/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.164732  [10112/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.161386  [10176/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.157338  [10240/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.215528  [10304/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.153288  [10368/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.133960  [10432/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.163721  [10496/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.244327  [10560/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.203101  [10624/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.155353  [10688/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.158202  [10752/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.218096  [10816/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.141387  [10880/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.160720  [10944/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.178667  [11008/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.171935  [11072/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.138038  [11136/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.123044  [11200/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.152991  [11264/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.176629  [11328/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.165948  [11392/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.181134  [11456/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.123079  [11520/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.200205  [11584/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.229329  [11648/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.137667  [11712/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.185984  [11776/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.148808  [11840/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.247056  [11904/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.153572  [11968/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.161747  [12032/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.145781  [12096/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.189694  [12160/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.211665  [12224/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.127834  [12288/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.163445  [12352/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.147238  [12416/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.251615  [12480/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.152941  [12544/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.147805  [12608/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.177388  [12672/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.135101  [12736/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.212210  [12800/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.160790  [12864/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.127197  [12928/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.211107  [12992/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.179172  [13056/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.166725  [13120/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.178327  [13184/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.144532  [13248/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.204152  [13312/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.166884  [13376/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.163914  [13440/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.174869  [13504/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.161635  [13568/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.234777  [13632/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.153057  [13696/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.163051  [13760/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.157300  [13824/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.162136  [13888/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.115503  [13952/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.153728  [14016/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.179210  [14080/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.170523  [14144/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.158819  [14208/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.187970  [14272/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.175608  [14336/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.223222  [14400/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.149458  [14464/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.152445  [14528/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.169683  [14592/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.235063  [14656/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.157225  [14720/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.212271  [14784/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.193205  [14848/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.150423  [14912/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.204599  [14976/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.196050  [15040/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.192309  [15104/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.192356  [15168/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.140494  [15232/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.181326  [15296/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.177352  [15360/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.183116  [15424/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.236355  [15488/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.142910  [15552/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.235558  [15616/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.136062  [15680/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.164414  [15744/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.159751  [15808/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.192582  [15872/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.150228  [15936/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.196307  [16000/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.102892  [16064/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.189261  [16128/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.159775  [16192/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.184205  [16256/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.179411  [16320/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.189648  [16384/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.226946  [16448/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.186242  [16512/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.212849  [16576/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.219846  [16640/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.150357  [16704/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.205292  [16768/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.194070  [16832/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.161726  [16896/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.162523  [16960/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.216178  [17024/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.141556  [17088/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.162644  [17152/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.148931  [17216/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.141381  [17280/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.181968  [17344/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.176305  [17408/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.175055  [17472/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.138035  [17536/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.200540  [17600/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.282810  [17664/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.160777  [17728/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.201345  [17792/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.171931  [17856/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.132491  [17920/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.149304  [17984/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.186585  [18048/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.175539  [18112/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.240344  [18176/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.102235  [18240/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.166466  [18304/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.177666  [18368/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.115077  [18432/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.172527  [18496/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.180815  [18560/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.143014  [18624/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.146115  [18688/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.171022  [18752/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.185284  [18816/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.150688  [18880/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.144963  [18944/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.169418  [19008/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.169687  [19072/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.150342  [19136/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.157838  [19200/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.172254  [19264/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.119745  [19328/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.184877  [19392/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.158548  [19456/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.160986  [19520/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.141557  [19584/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.194198  [19648/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.147157  [19712/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.120654  [19776/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.157699  [19840/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.162253  [19904/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.170691  [19968/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.172888  [20032/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.149126  [20096/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.138272  [20160/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.153478  [20224/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.194284  [20288/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.185828  [20352/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.102227  [20416/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.149702  [20480/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.131688  [20544/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.184380  [20608/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.208491  [20672/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.175052  [20736/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.155147  [20800/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.170095  [20864/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.143670  [20928/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.156000  [20992/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.158235  [21056/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.197649  [21120/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.166921  [21184/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.209182  [21248/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.179632  [21312/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.161195  [21376/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.165799  [21440/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.192891  [21504/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.137531  [21568/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.163331  [21632/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.177518  [21696/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.152814  [21760/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.213328  [21824/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.171854  [21888/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.147944  [21952/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.168628  [22016/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.191429  [22080/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.174203  [22144/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.117497  [22208/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.175690  [22272/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.181269  [22336/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.174409  [22400/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.243074  [22464/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.195544  [22528/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.129129  [22592/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.123174  [22656/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.140171  [22720/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.153418  [22784/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.142164  [22848/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.112045  [22912/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.233135  [22976/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.135737  [23040/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.213059  [23104/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.208916  [23168/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.145352  [23232/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.104388  [23296/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.159143  [23360/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.182331  [23424/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.140811  [23488/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.127064  [23552/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.208059  [23616/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.195159  [23680/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.195969  [23744/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.227035  [23808/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.126470  [23872/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.211717  [23936/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.180392  [24000/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.197398  [24064/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.115586  [24128/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.093552  [24192/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.142782  [24256/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.138936  [24320/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.157615  [24384/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.172042  [24448/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.145335  [24512/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.151493  [24576/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.163186  [24640/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.165942  [24704/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.193864  [24768/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.159247  [24832/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.148817  [24872/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.148817  [24872/24872]: : 389it [00:22, 17.52it/s]
Epoch 2, time=188.88s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.117926  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.234955  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.153669  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.223500  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.163820  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.176037  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.158497  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.192161  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.138778  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.187142  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.164111  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.218250  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.183511  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.242875  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.194694  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.185493  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.155363  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.183778  [ 1152/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.206733  [ 1216/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.154360  [ 1280/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.140729  [ 1344/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.163866  [ 1408/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.183636  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.156602  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.164443  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.145207  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.180595  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.113295  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.108923  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.186074  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.207876  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.167292  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.163692  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.169755  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.161703  [ 2240/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.153230  [ 2304/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.227024  [ 2368/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.218592  [ 2432/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.168101  [ 2496/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.147298  [ 2560/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.161769  [ 2624/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.213295  [ 2688/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.153864  [ 2752/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.177859  [ 2816/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.169789  [ 2880/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.173673  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.163730  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.177804  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.142609  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.206260  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.164447  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.136610  [ 3328/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.164943  [ 3392/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.106837  [ 3456/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.159031  [ 3520/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.168550  [ 3584/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.166106  [ 3648/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.225630  [ 3712/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.177834  [ 3776/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.182298  [ 3840/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.112277  [ 3904/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.151748  [ 3968/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.197454  [ 4032/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.168454  [ 4096/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.167119  [ 4160/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.170057  [ 4224/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.140817  [ 4288/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.123754  [ 4352/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.185972  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.263131  [ 4480/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.135263  [ 4544/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.123029  [ 4608/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.132816  [ 4672/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.139309  [ 4736/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.167592  [ 4800/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.143989  [ 4864/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.185370  [ 4928/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.124501  [ 4992/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.162801  [ 5056/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.183547  [ 5120/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.137843  [ 5184/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.138788  [ 5248/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.190911  [ 5312/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.156730  [ 5376/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.153540  [ 5440/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.178675  [ 5504/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.217629  [ 5568/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.154752  [ 5632/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.192255  [ 5696/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.167748  [ 5760/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.208854  [ 5824/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.158388  [ 5888/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.130929  [ 5952/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.167483  [ 6016/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.151688  [ 6080/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.237416  [ 6144/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.146303  [ 6208/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.186888  [ 6272/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.159511  [ 6336/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.223734  [ 6400/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.188168  [ 6464/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.185903  [ 6528/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.200000  [ 6592/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.149109  [ 6656/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.189656  [ 6720/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.137202  [ 6784/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.186018  [ 6848/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.169898  [ 6912/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.160009  [ 6976/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.195117  [ 7040/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.265044  [ 7104/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.159485  [ 7168/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.180731  [ 7232/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.141231  [ 7296/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.129134  [ 7360/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.180921  [ 7424/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.211422  [ 7488/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.146721  [ 7552/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.227159  [ 7616/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.149692  [ 7680/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.228626  [ 7744/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.205912  [ 7808/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.153350  [ 7872/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.159306  [ 7936/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.195654  [ 8000/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.178341  [ 8064/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.142068  [ 8128/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.134292  [ 8192/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.180446  [ 8256/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.202860  [ 8320/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.144343  [ 8384/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.161386  [ 8448/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.161643  [ 8512/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.149965  [ 8576/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.134901  [ 8640/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.211319  [ 8704/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.181994  [ 8768/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.215396  [ 8832/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.147732  [ 8896/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.171225  [ 8960/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.202439  [ 9024/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.170740  [ 9088/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.140000  [ 9152/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.156233  [ 9216/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.211525  [ 9280/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.194288  [ 9344/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.146039  [ 9408/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.191195  [ 9472/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.168891  [ 9536/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.164829  [ 9600/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.180529  [ 9664/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.193526  [ 9728/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.138363  [ 9792/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.197064  [ 9856/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.135673  [ 9920/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.145665  [ 9984/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.238171  [10048/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.151467  [10112/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.182695  [10176/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.157721  [10240/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.203452  [10304/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.162549  [10368/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.118399  [10432/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.151662  [10496/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.233755  [10560/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.194311  [10624/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.163050  [10688/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.147847  [10752/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.207608  [10816/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.157740  [10880/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.170698  [10944/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.163651  [11008/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.201783  [11072/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.151039  [11136/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.115051  [11200/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.175117  [11264/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.183431  [11328/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.162003  [11392/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.192837  [11456/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.127385  [11520/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.185276  [11584/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.223745  [11648/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.141305  [11712/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.185309  [11776/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.154283  [11840/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.253513  [11904/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.148753  [11968/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.170915  [12032/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.136883  [12096/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.187341  [12160/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.224566  [12224/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.124854  [12288/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.154344  [12352/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.159527  [12416/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.238626  [12480/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.168095  [12544/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.159036  [12608/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.178164  [12672/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.165397  [12736/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.192700  [12800/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.171056  [12864/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.132837  [12928/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.208912  [12992/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.201228  [13056/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.162262  [13120/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.189236  [13184/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.145364  [13248/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.193576  [13312/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.162890  [13376/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.162567  [13440/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.171829  [13504/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.151062  [13568/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.212329  [13632/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.151473  [13696/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.159439  [13760/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.149195  [13824/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.157869  [13888/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.109933  [13952/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.147834  [14016/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.171004  [14080/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.168866  [14144/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.155187  [14208/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.183532  [14272/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.154542  [14336/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.221765  [14400/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.139958  [14464/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.145943  [14528/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.171465  [14592/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.220764  [14656/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.169625  [14720/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.183445  [14784/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.187302  [14848/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.126714  [14912/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.179709  [14976/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.178156  [15040/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.173891  [15104/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.174504  [15168/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.128815  [15232/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.162523  [15296/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.167028  [15360/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.168310  [15424/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.211386  [15488/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.134509  [15552/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.207198  [15616/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.108804  [15680/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.165399  [15744/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.143440  [15808/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.174060  [15872/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.144290  [15936/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.176065  [16000/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.099100  [16064/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.159420  [16128/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.150558  [16192/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.175694  [16256/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.152513  [16320/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.173713  [16384/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.196413  [16448/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.166315  [16512/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.214101  [16576/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.195898  [16640/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.152185  [16704/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.179469  [16768/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.184750  [16832/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.167192  [16896/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.157400  [16960/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.207009  [17024/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.136797  [17088/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.176737  [17152/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.152514  [17216/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.139917  [17280/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.168395  [17344/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.188654  [17408/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.165159  [17472/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.134860  [17536/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.219302  [17600/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.273981  [17664/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.169926  [17728/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.182532  [17792/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.172316  [17856/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.139722  [17920/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.141642  [17984/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.197293  [18048/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.187303  [18112/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.243670  [18176/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.108122  [18240/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.162092  [18304/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.166655  [18368/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.129833  [18432/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.165259  [18496/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.174067  [18560/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.145269  [18624/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.157426  [18688/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.161034  [18752/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.193480  [18816/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.145448  [18880/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.154642  [18944/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.153990  [19008/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.158169  [19072/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.144179  [19136/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.151214  [19200/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.161270  [19264/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.128903  [19328/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.189257  [19392/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.150511  [19456/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.142436  [19520/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.140509  [19584/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.180986  [19648/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.136254  [19712/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.115826  [19776/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.142784  [19840/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.164365  [19904/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.163634  [19968/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.174100  [20032/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.143330  [20096/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.144127  [20160/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.134422  [20224/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.180661  [20288/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.168721  [20352/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.106523  [20416/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.157981  [20480/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.126697  [20544/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.159450  [20608/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.180535  [20672/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.168344  [20736/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.158344  [20800/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.157213  [20864/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.141387  [20928/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.169833  [20992/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.144012  [21056/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.197937  [21120/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.150839  [21184/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.182124  [21248/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.153007  [21312/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.153817  [21376/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.167008  [21440/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.185802  [21504/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.120471  [21568/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.144688  [21632/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.172951  [21696/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.152907  [21760/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.190717  [21824/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.157107  [21888/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.148580  [21952/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.174680  [22016/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.177418  [22080/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.161256  [22144/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.121843  [22208/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.158382  [22272/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.182522  [22336/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.162353  [22400/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.212829  [22464/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.186551  [22528/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.133498  [22592/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.104712  [22656/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.138579  [22720/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.134104  [22784/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.128846  [22848/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.107163  [22912/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.200706  [22976/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.120847  [23040/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.166933  [23104/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.191825  [23168/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.134298  [23232/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.088867  [23296/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.165746  [23360/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.176469  [23424/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.134668  [23488/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.114653  [23552/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.168694  [23616/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.176212  [23680/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.163146  [23744/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.215519  [23808/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.121452  [23872/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.197088  [23936/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.175607  [24000/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.163482  [24064/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.124969  [24128/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.077306  [24192/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.134598  [24256/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.139288  [24320/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.149131  [24384/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.163334  [24448/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.162910  [24512/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.139080  [24576/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.159032  [24640/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.167365  [24704/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.174494  [24768/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.143378  [24832/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.124543  [24872/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.124543  [24872/24872]: : 389it [00:22, 17.54it/s]
Epoch 3, time=211.06s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.124900  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.214100  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.139726  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.227676  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.152446  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.183770  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.161304  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.179754  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.174716  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.185183  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.184583  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.207570  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.165869  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.222844  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.193872  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.152354  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.152395  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.172868  [ 1152/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.222874  [ 1216/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.137915  [ 1280/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.122025  [ 1344/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.158903  [ 1408/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.167958  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.138219  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.158473  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.136222  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.137971  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.119170  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.102445  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.169657  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.165805  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.145666  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.152112  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.157124  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.156686  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.141525  [ 2304/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.190522  [ 2368/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.210955  [ 2432/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.159031  [ 2496/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.130493  [ 2560/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.143842  [ 2624/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.171943  [ 2688/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.138036  [ 2752/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.165401  [ 2816/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.172823  [ 2880/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.178502  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.156739  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.157225  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.122154  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.190614  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.160761  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.131699  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.153518  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.112356  [ 3456/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.162152  [ 3520/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.146801  [ 3584/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.170599  [ 3648/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.199060  [ 3712/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.175273  [ 3776/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.165154  [ 3840/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.106207  [ 3904/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.147021  [ 3968/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.186027  [ 4032/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.180476  [ 4096/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.127245  [ 4160/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.171076  [ 4224/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.157243  [ 4288/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.115991  [ 4352/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.202084  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.261960  [ 4480/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.153742  [ 4544/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.131615  [ 4608/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.146841  [ 4672/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.164975  [ 4736/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.186917  [ 4800/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.157638  [ 4864/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.204916  [ 4928/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.121606  [ 4992/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.166762  [ 5056/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.196366  [ 5120/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.145028  [ 5184/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.128730  [ 5248/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.181393  [ 5312/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.160510  [ 5376/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.164608  [ 5440/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.176851  [ 5504/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.204917  [ 5568/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.174222  [ 5632/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.212136  [ 5696/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.170341  [ 5760/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.199404  [ 5824/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.144972  [ 5888/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.139989  [ 5952/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.169605  [ 6016/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.143533  [ 6080/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.241150  [ 6144/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.147966  [ 6208/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.168314  [ 6272/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.160834  [ 6336/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.196668  [ 6400/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.145345  [ 6464/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.169654  [ 6528/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.173934  [ 6592/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.127858  [ 6656/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.184273  [ 6720/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.118855  [ 6784/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.182129  [ 6848/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.145201  [ 6912/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.150923  [ 6976/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.166928  [ 7040/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.223784  [ 7104/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.142540  [ 7168/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.163069  [ 7232/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.136191  [ 7296/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.106684  [ 7360/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.140235  [ 7424/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.168763  [ 7488/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.147174  [ 7552/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.180668  [ 7616/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.153486  [ 7680/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.223630  [ 7744/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.185728  [ 7808/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.137994  [ 7872/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.142670  [ 7936/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.154812  [ 8000/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.165743  [ 8064/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.122488  [ 8128/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.129502  [ 8192/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.157211  [ 8256/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.197588  [ 8320/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.122681  [ 8384/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.157716  [ 8448/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.152217  [ 8512/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.139310  [ 8576/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.126936  [ 8640/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.181947  [ 8704/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.165534  [ 8768/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.207197  [ 8832/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.113067  [ 8896/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.157420  [ 8960/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.169043  [ 9024/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.149610  [ 9088/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.121549  [ 9152/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.151094  [ 9216/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.197110  [ 9280/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.138587  [ 9344/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.142694  [ 9408/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.135203  [ 9472/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.143059  [ 9536/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.150111  [ 9600/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.159014  [ 9664/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.162708  [ 9728/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.121365  [ 9792/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.170066  [ 9856/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.125597  [ 9920/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.115546  [ 9984/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.196210  [10048/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.143270  [10112/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.135376  [10176/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.148555  [10240/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.180296  [10304/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.136703  [10368/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.118354  [10432/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.142386  [10496/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.181851  [10560/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.170290  [10624/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.160811  [10688/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.128919  [10752/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.173695  [10816/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.152715  [10880/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.139711  [10944/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.173562  [11008/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.158954  [11072/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.115622  [11136/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.119852  [11200/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.142791  [11264/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.166381  [11328/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.144196  [11392/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.167259  [11456/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.116308  [11520/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.183233  [11584/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.233820  [11648/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.114730  [11712/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.150283  [11776/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.139209  [11840/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.238146  [11904/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.155904  [11968/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.162236  [12032/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.167099  [12096/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.212398  [12160/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.209142  [12224/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.131664  [12288/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.152267  [12352/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.135477  [12416/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.218137  [12480/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.146640  [12544/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.148400  [12608/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.194506  [12672/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.145626  [12736/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.183855  [12800/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.146504  [12864/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.103844  [12928/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.173879  [12992/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.166499  [13056/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.154876  [13120/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.168030  [13184/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.133866  [13248/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.168297  [13312/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.149739  [13376/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.148231  [13440/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.156045  [13504/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.117115  [13568/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.217429  [13632/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.154071  [13696/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.161132  [13760/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.142485  [13824/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.173249  [13888/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.102156  [13952/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.135660  [14016/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.171816  [14080/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.144498  [14144/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.152022  [14208/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.189892  [14272/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.161427  [14336/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.217129  [14400/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.125989  [14464/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.162346  [14528/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.159954  [14592/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.209859  [14656/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.186748  [14720/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.166125  [14784/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.194181  [14848/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.140033  [14912/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.178257  [14976/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.172502  [15040/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.190614  [15104/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.129806  [15168/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.136243  [15232/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.166531  [15296/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.151407  [15360/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.192773  [15424/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.217420  [15488/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.128763  [15552/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.234533  [15616/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.105471  [15680/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.149415  [15744/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.184641  [15808/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.150110  [15872/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.134676  [15936/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.178952  [16000/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.101259  [16064/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.155181  [16128/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.145600  [16192/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.159169  [16256/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.157831  [16320/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.163580  [16384/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.186873  [16448/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.171035  [16512/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.200325  [16576/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.206104  [16640/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.136614  [16704/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.175628  [16768/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.180085  [16832/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.154351  [16896/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.144968  [16960/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.189390  [17024/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.138235  [17088/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.131430  [17152/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.145008  [17216/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.139827  [17280/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.160167  [17344/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.162467  [17408/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.160803  [17472/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.116373  [17536/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.192374  [17600/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.237447  [17664/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.148416  [17728/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.172559  [17792/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.147971  [17856/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.125012  [17920/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.135922  [17984/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.178037  [18048/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.151892  [18112/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.218452  [18176/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.094096  [18240/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.146560  [18304/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.155416  [18368/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.116783  [18432/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.158793  [18496/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.159928  [18560/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.134889  [18624/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.134652  [18688/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.156045  [18752/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.184320  [18816/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.139634  [18880/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.157482  [18944/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.150901  [19008/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.153759  [19072/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.135873  [19136/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.123177  [19200/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.147085  [19264/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.130375  [19328/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.160519  [19392/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.133296  [19456/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.149222  [19520/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.125637  [19584/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.161563  [19648/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.125890  [19712/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.136180  [19776/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.145247  [19840/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.156817  [19904/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.167574  [19968/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.157187  [20032/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.130796  [20096/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.132834  [20160/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.146856  [20224/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.183953  [20288/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.169330  [20352/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.091053  [20416/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.166084  [20480/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.123779  [20544/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.146187  [20608/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.183075  [20672/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.164371  [20736/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.158045  [20800/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.148529  [20864/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.142336  [20928/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.147760  [20992/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.146257  [21056/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.194032  [21120/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.154300  [21184/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.182000  [21248/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.141517  [21312/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.152644  [21376/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.158585  [21440/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.172213  [21504/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.131917  [21568/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.142056  [21632/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.173218  [21696/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.141044  [21760/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.184532  [21824/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.151090  [21888/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.160347  [21952/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.165973  [22016/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.169552  [22080/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.214675  [22144/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.113152  [22208/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.172663  [22272/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.201564  [22336/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.177380  [22400/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.256814  [22464/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.185814  [22528/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.124264  [22592/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.113708  [22656/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.124920  [22720/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.139220  [22784/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.149076  [22848/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.106773  [22912/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.222154  [22976/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.118973  [23040/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.169938  [23104/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.193204  [23168/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.124334  [23232/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.090532  [23296/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.146033  [23360/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.167404  [23424/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.129873  [23488/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.113111  [23552/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.185407  [23616/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.170561  [23680/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.161038  [23744/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.215967  [23808/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.115836  [23872/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.188124  [23936/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.150942  [24000/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.148598  [24064/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.107620  [24128/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.073511  [24192/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.126053  [24256/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.121869  [24320/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.129282  [24384/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.146354  [24448/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.145785  [24512/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.124202  [24576/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.141116  [24640/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.152554  [24704/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.144728  [24768/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.126532  [24832/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.116112  [24872/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.116112  [24872/24872]: : 389it [00:22, 17.55it/s]
Epoch 4, time=233.22s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.107082  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.194477  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.131928  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.198677  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.144585  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.180696  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.151230  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.159870  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.125566  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.166842  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.165718  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.181032  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.172251  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.204335  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.184223  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.145556  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.149984  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.170313  [ 1152/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.205026  [ 1216/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.128146  [ 1280/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.113322  [ 1344/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.149434  [ 1408/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.153336  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.135850  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.150879  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.140838  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.116635  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.102077  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.089773  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.146177  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.165694  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.143275  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.152106  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.148342  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.145365  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.134940  [ 2304/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.189028  [ 2368/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.200011  [ 2432/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.146958  [ 2496/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.124294  [ 2560/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.136583  [ 2624/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.164971  [ 2688/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.127582  [ 2752/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.151352  [ 2816/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.169508  [ 2880/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.151977  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.140597  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.145150  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.111307  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.180620  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.128617  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.118168  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.149060  [ 3392/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.090695  [ 3456/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.134144  [ 3520/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.127518  [ 3584/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.148941  [ 3648/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.186796  [ 3712/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.151719  [ 3776/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.149508  [ 3840/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.087978  [ 3904/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.127929  [ 3968/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.154220  [ 4032/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.158187  [ 4096/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.113467  [ 4160/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.141792  [ 4224/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.129771  [ 4288/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.105212  [ 4352/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.165897  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.238123  [ 4480/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.134178  [ 4544/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.104781  [ 4608/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.139804  [ 4672/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.128005  [ 4736/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.167901  [ 4800/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.156060  [ 4864/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.154519  [ 4928/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.104371  [ 4992/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.146075  [ 5056/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.163061  [ 5120/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.114979  [ 5184/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.123202  [ 5248/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.151666  [ 5312/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.146643  [ 5376/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.156638  [ 5440/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.156287  [ 5504/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.199373  [ 5568/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.161905  [ 5632/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.167280  [ 5696/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.158486  [ 5760/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.168012  [ 5824/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.130086  [ 5888/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.124211  [ 5952/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.163503  [ 6016/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.127212  [ 6080/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.213102  [ 6144/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.132789  [ 6208/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.154327  [ 6272/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.131618  [ 6336/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.172333  [ 6400/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.132370  [ 6464/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.151272  [ 6528/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.162213  [ 6592/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.119339  [ 6656/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.154739  [ 6720/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.104882  [ 6784/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.167624  [ 6848/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.139067  [ 6912/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.129406  [ 6976/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.167611  [ 7040/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.196688  [ 7104/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.116095  [ 7168/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.143394  [ 7232/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.127464  [ 7296/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.097729  [ 7360/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.145572  [ 7424/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.148158  [ 7488/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.122486  [ 7552/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.175058  [ 7616/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.123460  [ 7680/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.218907  [ 7744/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.180184  [ 7808/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.127789  [ 7872/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.137048  [ 7936/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.154215  [ 8000/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.150500  [ 8064/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.125575  [ 8128/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.109770  [ 8192/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.154780  [ 8256/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.177165  [ 8320/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.125428  [ 8384/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.146376  [ 8448/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.147466  [ 8512/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.120369  [ 8576/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.120670  [ 8640/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.166982  [ 8704/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.155183  [ 8768/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.185883  [ 8832/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.123462  [ 8896/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.146906  [ 8960/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.148170  [ 9024/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.146364  [ 9088/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.128306  [ 9152/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.137953  [ 9216/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.187661  [ 9280/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.132771  [ 9344/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.137937  [ 9408/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.139506  [ 9472/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.120404  [ 9536/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.140437  [ 9600/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.144548  [ 9664/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.154249  [ 9728/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.122385  [ 9792/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.155511  [ 9856/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.110870  [ 9920/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.104919  [ 9984/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.197298  [10048/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.125862  [10112/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.119852  [10176/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.137007  [10240/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.169871  [10304/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.113207  [10368/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.093426  [10432/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.124918  [10496/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.177977  [10560/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.164597  [10624/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.139319  [10688/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.119830  [10752/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.170953  [10816/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.119030  [10880/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.132666  [10944/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.148936  [11008/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.147002  [11072/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.121365  [11136/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.101357  [11200/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.121215  [11264/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.147312  [11328/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.134137  [11392/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.148868  [11456/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.093681  [11520/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.148089  [11584/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.198452  [11648/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.121807  [11712/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.135509  [11776/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.118084  [11840/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.213452  [11904/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.130318  [11968/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.139570  [12032/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.128990  [12096/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.162935  [12160/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.169558  [12224/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.110307  [12288/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.132253  [12352/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.117335  [12416/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.183428  [12480/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.111076  [12544/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.128497  [12608/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.152616  [12672/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.110909  [12736/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.175511  [12800/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.142766  [12864/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.096789  [12928/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.195844  [12992/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.145734  [13056/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.119661  [13120/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.147358  [13184/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.122491  [13248/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.175575  [13312/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.131406  [13376/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.132926  [13440/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.150289  [13504/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.103695  [13568/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.193914  [13632/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.136577  [13696/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.145474  [13760/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.143509  [13824/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.133839  [13888/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.086995  [13952/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.116555  [14016/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.130225  [14080/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.148769  [14144/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.124641  [14208/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.201683  [14272/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.127800  [14336/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.202595  [14400/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.125786  [14464/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.128100  [14528/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.155736  [14592/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.205686  [14656/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.170634  [14720/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.158646  [14784/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.168640  [14848/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.116011  [14912/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.146930  [14976/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.149996  [15040/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.170439  [15104/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.147950  [15168/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.102131  [15232/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.153344  [15296/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.152896  [15360/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.151018  [15424/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.193934  [15488/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.114128  [15552/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.191935  [15616/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.097745  [15680/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.136150  [15744/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.131090  [15808/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.151834  [15872/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.120368  [15936/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.148701  [16000/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.092410  [16064/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.134121  [16128/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.118938  [16192/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.143266  [16256/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.136411  [16320/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.151766  [16384/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.155661  [16448/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.146858  [16512/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.169858  [16576/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.173613  [16640/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.120724  [16704/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.158102  [16768/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.170551  [16832/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.135693  [16896/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.136732  [16960/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.172324  [17024/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.118094  [17088/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.138001  [17152/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.132921  [17216/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.097794  [17280/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.152951  [17344/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.147030  [17408/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.157274  [17472/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.110317  [17536/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.215895  [17600/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.228540  [17664/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.140955  [17728/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.175278  [17792/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.141998  [17856/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.113594  [17920/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.129727  [17984/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.160539  [18048/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.153049  [18112/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.208649  [18176/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.084698  [18240/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.125727  [18304/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.167714  [18368/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.104801  [18432/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.150183  [18496/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.159275  [18560/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.123476  [18624/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.125070  [18688/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.140948  [18752/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.164559  [18816/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.128362  [18880/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.150855  [18944/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.132336  [19008/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.147487  [19072/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.128781  [19136/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.113570  [19200/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.147859  [19264/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.128679  [19328/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.144389  [19392/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.131376  [19456/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.120842  [19520/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.115021  [19584/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.149401  [19648/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.121035  [19712/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.107063  [19776/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.131160  [19840/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.138565  [19904/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.146662  [19968/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.157237  [20032/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.119103  [20096/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.126079  [20160/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.124916  [20224/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.172820  [20288/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.156403  [20352/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.087463  [20416/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.134604  [20480/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.129132  [20544/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.130168  [20608/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.170460  [20672/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.150874  [20736/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.136398  [20800/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.136133  [20864/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.149196  [20928/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.154820  [20992/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.126299  [21056/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.191567  [21120/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.147834  [21184/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.157230  [21248/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.158875  [21312/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.154182  [21376/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.161702  [21440/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.187319  [21504/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.120865  [21568/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.136849  [21632/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.169578  [21696/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.120489  [21760/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.179082  [21824/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.178180  [21888/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.143503  [21952/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.170535  [22016/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.170988  [22080/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.171099  [22144/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.122997  [22208/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.207125  [22272/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.183622  [22336/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.165982  [22400/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.178413  [22464/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.182471  [22528/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.118715  [22592/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.095764  [22656/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.149792  [22720/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.120192  [22784/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.120348  [22848/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.093164  [22912/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.192975  [22976/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.125128  [23040/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.165843  [23104/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.165466  [23168/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.110897  [23232/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.080241  [23296/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.152573  [23360/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.166362  [23424/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.134658  [23488/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.102671  [23552/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.166250  [23616/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.159775  [23680/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.129358  [23744/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.217326  [23808/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.107198  [23872/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.200769  [23936/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.144689  [24000/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.166595  [24064/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.117226  [24128/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.074414  [24192/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.125434  [24256/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.122700  [24320/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.121873  [24384/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.125663  [24448/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.120311  [24512/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.124289  [24576/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.132919  [24640/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.143914  [24704/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.134413  [24768/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.119440  [24832/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.111493  [24872/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.111493  [24872/24872]: : 389it [00:22, 17.62it/s]
Epoch 5, time=255.30s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.098102  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.180392  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.125928  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.186247  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.127555  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.152701  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.127903  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.139308  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.113619  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.142107  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.152003  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.169609  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.140716  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.199444  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.160111  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.142251  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.127357  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.155315  [ 1152/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.182428  [ 1216/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.124333  [ 1280/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.118075  [ 1344/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.137705  [ 1408/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.150387  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.119437  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.149899  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.138557  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.110491  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.112131  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.104220  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.139924  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.179947  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.130095  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.150064  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.154794  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.137216  [ 2240/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.156749  [ 2304/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.185974  [ 2368/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.215562  [ 2432/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.149662  [ 2496/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.118080  [ 2560/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.147766  [ 2624/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.156988  [ 2688/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.125089  [ 2752/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.153081  [ 2816/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.156498  [ 2880/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.147844  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.130270  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.147040  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.111652  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.175882  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.129306  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.118030  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.158451  [ 3392/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.094680  [ 3456/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.132241  [ 3520/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.144141  [ 3584/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.150552  [ 3648/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.211626  [ 3712/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.164016  [ 3776/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.153108  [ 3840/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.106439  [ 3904/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.144457  [ 3968/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.169170  [ 4032/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.188502  [ 4096/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.114463  [ 4160/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.147539  [ 4224/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.154199  [ 4288/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.115478  [ 4352/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.175507  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.259461  [ 4480/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.134273  [ 4544/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.106933  [ 4608/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.160784  [ 4672/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.130127  [ 4736/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.175402  [ 4800/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.149628  [ 4864/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.142595  [ 4928/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.109013  [ 4992/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.159187  [ 5056/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.167112  [ 5120/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.128124  [ 5184/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.124097  [ 5248/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.139333  [ 5312/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.152548  [ 5376/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.176241  [ 5440/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.153904  [ 5504/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.201635  [ 5568/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.162991  [ 5632/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.155856  [ 5696/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.138303  [ 5760/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.168978  [ 5824/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.135092  [ 5888/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.125588  [ 5952/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.155856  [ 6016/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.122130  [ 6080/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.212107  [ 6144/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.144109  [ 6208/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.162103  [ 6272/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.137904  [ 6336/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.180876  [ 6400/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.146545  [ 6464/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.161971  [ 6528/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.156047  [ 6592/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.122798  [ 6656/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.140374  [ 6720/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.104654  [ 6784/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.174819  [ 6848/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.148028  [ 6912/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.129537  [ 6976/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.164692  [ 7040/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.189256  [ 7104/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.116347  [ 7168/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.147350  [ 7232/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.124100  [ 7296/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.102235  [ 7360/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.137811  [ 7424/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.149804  [ 7488/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.124840  [ 7552/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.170315  [ 7616/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.127958  [ 7680/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.222819  [ 7744/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.174738  [ 7808/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.115316  [ 7872/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.127210  [ 7936/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.139185  [ 8000/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.150846  [ 8064/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.119764  [ 8128/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.109217  [ 8192/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.133117  [ 8256/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.173570  [ 8320/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.114065  [ 8384/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.148327  [ 8448/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.146283  [ 8512/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.124391  [ 8576/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.112245  [ 8640/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.165685  [ 8704/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.147640  [ 8768/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.170551  [ 8832/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.124825  [ 8896/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.149796  [ 8960/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.132747  [ 9024/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.146158  [ 9088/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.128830  [ 9152/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.135245  [ 9216/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.187445  [ 9280/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.143422  [ 9344/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.134345  [ 9408/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.138691  [ 9472/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.116881  [ 9536/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.139965  [ 9600/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.144550  [ 9664/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.153000  [ 9728/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.118070  [ 9792/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.164534  [ 9856/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.111457  [ 9920/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.104161  [ 9984/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.180187  [10048/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.120076  [10112/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.129857  [10176/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.136175  [10240/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.158380  [10304/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.108859  [10368/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.092805  [10432/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.125097  [10496/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.172252  [10560/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.166099  [10624/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.130105  [10688/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.106803  [10752/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.141858  [10816/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.121790  [10880/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.135915  [10944/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.149072  [11008/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.145165  [11072/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.115883  [11136/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.106345  [11200/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.114631  [11264/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.156993  [11328/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.151433  [11392/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.146640  [11456/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.105420  [11520/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.143382  [11584/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.185902  [11648/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.117888  [11712/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.155780  [11776/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.128699  [11840/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.212024  [11904/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.135143  [11968/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.136652  [12032/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.112369  [12096/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.163142  [12160/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.189589  [12224/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.106751  [12288/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.133465  [12352/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.135687  [12416/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.193045  [12480/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.155383  [12544/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.134665  [12608/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.146123  [12672/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.135577  [12736/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.171010  [12800/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.142263  [12864/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.094888  [12928/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.194298  [12992/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.155320  [13056/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.127485  [13120/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.170081  [13184/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.122968  [13248/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.169300  [13312/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.137635  [13376/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.144973  [13440/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.163422  [13504/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.146145  [13568/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.180076  [13632/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.136053  [13696/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.145129  [13760/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.146275  [13824/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.143412  [13888/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.092544  [13952/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.122909  [14016/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.157728  [14080/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.154713  [14144/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.120920  [14208/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.176919  [14272/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.130362  [14336/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.182907  [14400/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.122979  [14464/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.135065  [14528/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.149334  [14592/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.217169  [14656/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.175707  [14720/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.179221  [14784/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.179208  [14848/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.128179  [14912/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.141945  [14976/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.149457  [15040/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.160106  [15104/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.132790  [15168/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.110248  [15232/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.152040  [15296/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.139796  [15360/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.150356  [15424/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.187059  [15488/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.116324  [15552/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.183576  [15616/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.084624  [15680/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.133077  [15744/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.101715  [15808/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.149576  [15872/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.121901  [15936/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.142296  [16000/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.090709  [16064/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.132394  [16128/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.121590  [16192/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.160245  [16256/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.130079  [16320/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.156307  [16384/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.163143  [16448/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.147061  [16512/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.178531  [16576/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.159962  [16640/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.117289  [16704/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.160437  [16768/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.187474  [16832/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.126785  [16896/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.138376  [16960/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.172381  [17024/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.117495  [17088/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.148940  [17152/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.129595  [17216/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.096055  [17280/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.147845  [17344/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.130613  [17408/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.155521  [17472/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.113488  [17536/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.186921  [17600/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.205879  [17664/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.129864  [17728/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.166077  [17792/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.129691  [17856/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.096001  [17920/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.129846  [17984/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.149906  [18048/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.159629  [18112/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.186547  [18176/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.074430  [18240/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.116064  [18304/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.145041  [18368/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.093771  [18432/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.138420  [18496/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.135894  [18560/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.118980  [18624/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.120647  [18688/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.127264  [18752/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.160304  [18816/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.109598  [18880/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.125036  [18944/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.131859  [19008/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.136705  [19072/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.116392  [19136/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.111399  [19200/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.131855  [19264/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.114267  [19328/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.133292  [19392/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.118496  [19456/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.110922  [19520/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.121373  [19584/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.147010  [19648/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.120240  [19712/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.093121  [19776/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.125561  [19840/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.137928  [19904/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.122265  [19968/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.150081  [20032/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.124118  [20096/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.109694  [20160/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.128954  [20224/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.164268  [20288/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.137215  [20352/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.099416  [20416/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.130792  [20480/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.121671  [20544/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.150076  [20608/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.170944  [20672/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.152894  [20736/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.141506  [20800/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.130039  [20864/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.135640  [20928/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.146021  [20992/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.131294  [21056/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.176596  [21120/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.153603  [21184/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.154447  [21248/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.135890  [21312/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.144715  [21376/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.138458  [21440/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.165144  [21504/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.099068  [21568/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.120525  [21632/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.156774  [21696/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.105649  [21760/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.162911  [21824/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.152032  [21888/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.126651  [21952/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.156207  [22016/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.157210  [22080/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.149191  [22144/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.127292  [22208/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.165573  [22272/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.145899  [22336/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.161747  [22400/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.161386  [22464/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.192491  [22528/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.102298  [22592/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.074863  [22656/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.114040  [22720/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.107189  [22784/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.117784  [22848/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.099382  [22912/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.181747  [22976/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.117398  [23040/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.151781  [23104/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.154723  [23168/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.106208  [23232/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.083356  [23296/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.122291  [23360/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.175734  [23424/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.116325  [23488/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.090157  [23552/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.146625  [23616/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.151039  [23680/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.139661  [23744/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.202819  [23808/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.119001  [23872/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.179631  [23936/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.128584  [24000/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.152361  [24064/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.099071  [24128/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.065351  [24192/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.126815  [24256/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.118329  [24320/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.120411  [24384/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.115055  [24448/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.113363  [24512/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.128628  [24576/24872]:   0%|          | 0/388 [00:21<?, ?it/s]
loss: 0.116593  [24640/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.127630  [24704/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.129138  [24768/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.125214  [24832/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.101060  [24872/24872]:   0%|          | 0/388 [00:22<?, ?it/s]
loss: 0.101060  [24872/24872]: : 389it [00:22, 17.49it/s]
-------------------------------
LR=0.0001, batch_size=128
-------------------------------
Epoch 1, time=277.54s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.135103  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.321957  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.229976  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.187619  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.154031  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.206922  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.181292  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.150995  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.192712  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.200737  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.147492  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.145935  [ 1536/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.156060  [ 1664/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.107976  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.118166  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.154300  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.161464  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.129964  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.185968  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.121035  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.145747  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.129586  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.143524  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.129118  [ 3072/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.135740  [ 3200/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.112121  [ 3328/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.113046  [ 3456/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.128907  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.150142  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.132255  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.094338  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.143698  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.117093  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.099643  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.167877  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.103702  [ 4608/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.108158  [ 4736/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.134271  [ 4864/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.098747  [ 4992/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.142518  [ 5120/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.099173  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.127884  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.142005  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.149459  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.142788  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.134181  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.114715  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.147298  [ 6144/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.133585  [ 6272/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.132070  [ 6400/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.118629  [ 6528/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.127280  [ 6656/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.111195  [ 6784/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.143236  [ 6912/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.130231  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.140862  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.122563  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.098756  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.113051  [ 7552/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.131550  [ 7680/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.173952  [ 7808/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.114270  [ 7936/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.131563  [ 8064/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.103196  [ 8192/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.129438  [ 8320/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.119159  [ 8448/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.118657  [ 8576/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.122387  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.146886  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.120148  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.122219  [ 9088/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.117849  [ 9216/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.140454  [ 9344/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.120822  [ 9472/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.118732  [ 9600/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.126020  [ 9728/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.122008  [ 9856/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.097809  [ 9984/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.138266  [10112/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.115572  [10240/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.119649  [10368/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.099095  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.144325  [10624/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.110755  [10752/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.118294  [10880/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.125334  [11008/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.096729  [11136/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.096893  [11264/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.126278  [11392/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.108094  [11520/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.153788  [11648/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.107735  [11776/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.140412  [11904/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.112355  [12032/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.114622  [12160/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.117809  [12288/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.110851  [12416/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.129297  [12544/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.116626  [12672/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.121335  [12800/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.096234  [12928/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.151823  [13056/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.113965  [13184/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.124690  [13312/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.123452  [13440/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.122093  [13568/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.146955  [13696/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.118070  [13824/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.092420  [13952/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.113148  [14080/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.111540  [14208/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.141372  [14336/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.130369  [14464/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.112531  [14592/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.158012  [14720/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.154802  [14848/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.112596  [14976/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.134024  [15104/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.112954  [15232/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.141262  [15360/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.150711  [15488/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.140781  [15616/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.096757  [15744/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.129141  [15872/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.117516  [16000/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.095626  [16128/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.123065  [16256/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.130875  [16384/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.144697  [16512/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.152156  [16640/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.138795  [16768/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.138472  [16896/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.152871  [17024/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.117316  [17152/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.100221  [17280/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.133177  [17408/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.117454  [17536/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.171227  [17664/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.149270  [17792/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.104478  [17920/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.131374  [18048/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.145912  [18176/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.090308  [18304/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.109451  [18432/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.127162  [18560/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.106699  [18688/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.126348  [18816/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.114990  [18944/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.113924  [19072/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.092845  [19200/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.115676  [19328/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.111438  [19456/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.105450  [19584/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.115310  [19712/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.108173  [19840/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.121238  [19968/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.124288  [20096/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.101147  [20224/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.143865  [20352/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.096053  [20480/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.106864  [20608/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.145836  [20736/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.112516  [20864/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.118908  [20992/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.136474  [21120/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.127798  [21248/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.125084  [21376/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.134151  [21504/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.098046  [21632/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.119111  [21760/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.151327  [21888/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.140663  [22016/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.128907  [22144/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.126545  [22272/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.133641  [22400/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.169426  [22528/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.080970  [22656/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.096725  [22784/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.093444  [22912/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.133327  [23040/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.143793  [23168/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.079170  [23296/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.139908  [23424/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.098632  [23552/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.145354  [23680/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.147763  [23808/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.122602  [23936/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.130356  [24064/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.076087  [24192/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.108572  [24320/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.101167  [24448/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.105814  [24576/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.109110  [24704/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.119798  [24832/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.099133  [24872/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.099133  [24872/24872]: : 195it [00:16, 11.88it/s]
Epoch 2, time=293.96s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.127088  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.141200  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.137555  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.124292  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.124911  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.139220  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.163631  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.140909  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.147022  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.136737  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.133885  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.131317  [ 1536/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.130436  [ 1664/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.100668  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.113499  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.137258  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.121962  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.115643  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.167726  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.114855  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.131904  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.120681  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.133159  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.124417  [ 3072/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.124081  [ 3200/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.104280  [ 3328/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.109182  [ 3456/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.114852  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.144865  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.128781  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.089445  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.141628  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.114987  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.092270  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.167206  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.104921  [ 4608/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.108292  [ 4736/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.129221  [ 4864/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.099222  [ 4992/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.142227  [ 5120/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.095173  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.121809  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.134011  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.146214  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.138819  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.132697  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.114466  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.144195  [ 6144/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.130503  [ 6272/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.133756  [ 6400/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.114070  [ 6528/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.121644  [ 6656/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.109906  [ 6784/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.147050  [ 6912/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.129700  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.129995  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.118778  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.096652  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.112036  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.128978  [ 7680/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.169228  [ 7808/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.112927  [ 7936/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.127807  [ 8064/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.101259  [ 8192/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.124433  [ 8320/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.121911  [ 8448/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.115631  [ 8576/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.121609  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.142697  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.121571  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.116054  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.117408  [ 9216/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.142025  [ 9344/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.117279  [ 9472/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.113854  [ 9600/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.121047  [ 9728/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.119925  [ 9856/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.089761  [ 9984/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.139789  [10112/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.109132  [10240/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.112898  [10368/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.093240  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.141808  [10624/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.107454  [10752/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.111442  [10880/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.126390  [11008/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.101753  [11136/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.096980  [11264/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.124053  [11392/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.106425  [11520/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.152050  [11648/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.105175  [11776/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.140913  [11904/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.111424  [12032/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.115847  [12160/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.115844  [12288/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.109555  [12416/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.114624  [12544/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.118502  [12672/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.113735  [12800/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.094622  [12928/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.143663  [13056/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.104032  [13184/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.122501  [13312/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.115273  [13440/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.112653  [13568/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.135294  [13696/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.112866  [13824/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.095614  [13952/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.109519  [14080/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.106222  [14208/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.133520  [14336/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.127567  [14464/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.110521  [14592/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.159017  [14720/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.151810  [14848/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.103161  [14976/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.120420  [15104/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.110892  [15232/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.125969  [15360/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.139185  [15488/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.125665  [15616/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.101883  [15744/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.124594  [15872/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.111045  [16000/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.091261  [16128/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.122283  [16256/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.127358  [16384/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.135315  [16512/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.146812  [16640/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.127322  [16768/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.137028  [16896/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.137683  [17024/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.118452  [17152/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.098645  [17280/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.129093  [17408/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.111675  [17536/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.169347  [17664/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.152431  [17792/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.105441  [17920/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.129147  [18048/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.149411  [18176/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.087413  [18304/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.107444  [18432/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.127498  [18560/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.112358  [18688/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.120889  [18816/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.126029  [18944/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.114754  [19072/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.095427  [19200/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.117954  [19328/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.110531  [19456/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.102071  [19584/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.119405  [19712/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.106887  [19840/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.123786  [19968/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.121922  [20096/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.096809  [20224/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.142259  [20352/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.095430  [20480/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.105153  [20608/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.143353  [20736/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.104027  [20864/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.110210  [20992/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.135857  [21120/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.124146  [21248/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.120879  [21376/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.133698  [21504/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.104613  [21632/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.116504  [21760/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.148450  [21888/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.131907  [22016/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.127916  [22144/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.117918  [22272/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.122209  [22400/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.158813  [22528/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.077221  [22656/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.097209  [22784/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.090701  [22912/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.132273  [23040/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.127990  [23168/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.077333  [23296/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.130936  [23424/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.091562  [23552/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.136439  [23680/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.143330  [23808/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.117498  [23936/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.127413  [24064/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.078787  [24192/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.108859  [24320/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.097398  [24448/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.108236  [24576/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.107005  [24704/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.110402  [24832/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.101464  [24872/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.101464  [24872/24872]: : 195it [00:16, 11.91it/s]
Epoch 3, time=310.34s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.119402  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.131286  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.131531  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.116048  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.109930  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.128121  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.142619  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.131710  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.137871  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.129417  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.119745  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.115567  [ 1536/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.126239  [ 1664/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.088024  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.108964  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.119260  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.122249  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.106290  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.161427  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.105941  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.124007  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.115489  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.122746  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.115904  [ 3072/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.116148  [ 3200/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.104947  [ 3328/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.100740  [ 3456/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.102755  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.140414  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.119608  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.079331  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.131910  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.099608  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.084702  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.156537  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.097046  [ 4608/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.098765  [ 4736/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.120611  [ 4864/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.097527  [ 4992/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.140191  [ 5120/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.090891  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.117774  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.133018  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.137859  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.128812  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.126515  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.111136  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.140248  [ 6144/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.129658  [ 6272/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.119200  [ 6400/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.106938  [ 6528/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.118539  [ 6656/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.105835  [ 6784/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.142429  [ 6912/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.124147  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.125275  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.116629  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.090050  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.111292  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.132641  [ 7680/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.165942  [ 7808/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.108522  [ 7936/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.125122  [ 8064/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.099202  [ 8192/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.120634  [ 8320/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.120501  [ 8448/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.110470  [ 8576/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.120224  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.139168  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.120933  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.116996  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.124582  [ 9216/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.132996  [ 9344/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.116990  [ 9472/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.114004  [ 9600/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.119640  [ 9728/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.115269  [ 9856/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.097144  [ 9984/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.140464  [10112/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.117845  [10240/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.106558  [10368/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.095945  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.148242  [10624/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.121420  [10752/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.137261  [10880/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.142510  [11008/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.116779  [11136/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.116028  [11264/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.136984  [11392/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.103319  [11520/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.169156  [11648/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.105944  [11776/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.154509  [11904/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.107585  [12032/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.116278  [12160/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.118739  [12288/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.115331  [12416/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.125173  [12544/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.127902  [12672/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.129557  [12800/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.103445  [12928/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.155285  [13056/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.109323  [13184/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.134495  [13312/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.117870  [13440/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.132260  [13568/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.137041  [13696/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.113521  [13824/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.096010  [13952/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.108155  [14080/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.104059  [14208/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.132742  [14336/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.122762  [14464/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.106073  [14592/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.151552  [14720/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.146078  [14848/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.106745  [14976/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.115599  [15104/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.097168  [15232/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.119822  [15360/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.142026  [15488/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.122076  [15616/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.098601  [15744/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.127162  [15872/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.116880  [16000/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.090164  [16128/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.114801  [16256/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.128108  [16384/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.125145  [16512/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.138362  [16640/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.123330  [16768/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.131663  [16896/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.136164  [17024/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.117579  [17152/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.094894  [17280/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.121010  [17408/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.114749  [17536/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.157325  [17664/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.142598  [17792/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.091436  [17920/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.124785  [18048/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.139205  [18176/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.083500  [18304/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.106509  [18432/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.119171  [18560/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.100534  [18688/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.118109  [18816/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.114641  [18944/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.110090  [19072/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.097528  [19200/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.111155  [19328/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.108491  [19456/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.092854  [19584/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.114485  [19712/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.100731  [19840/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.117809  [19968/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.118460  [20096/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.098989  [20224/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.135839  [20352/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.088366  [20480/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.099137  [20608/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.141747  [20736/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.102643  [20864/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.104523  [20992/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.134112  [21120/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.121684  [21248/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.128377  [21376/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.126917  [21504/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.111346  [21632/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.113520  [21760/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.146568  [21888/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.129307  [22016/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.120659  [22144/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.115699  [22272/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.110425  [22400/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.160060  [22528/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.073516  [22656/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.089481  [22784/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.083037  [22912/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.127750  [23040/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.124128  [23168/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.075343  [23296/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.122463  [23424/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.093852  [23552/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.129486  [23680/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.141139  [23808/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.112702  [23936/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.121834  [24064/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.071237  [24192/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.102999  [24320/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.102082  [24448/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.104919  [24576/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.105393  [24704/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.122163  [24832/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.094382  [24872/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.094382  [24872/24872]: : 195it [00:16, 11.87it/s]
Epoch 4, time=326.77s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.115482  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.144683  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.125683  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.108536  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.113327  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.130784  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.139284  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.125854  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.135503  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.143905  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.118188  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.127203  [ 1536/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.124014  [ 1664/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.093733  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.088874  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.134144  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.117296  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.104763  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.158449  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.110865  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.144001  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.115270  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.134674  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.115287  [ 3072/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.119714  [ 3200/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.104830  [ 3328/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.101238  [ 3456/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.118576  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.138360  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.138665  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.081910  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.142385  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.105375  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.089501  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.161819  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.094685  [ 4608/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.109611  [ 4736/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.124955  [ 4864/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.095641  [ 4992/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.135199  [ 5120/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.089601  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.115090  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.130950  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.134421  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.123954  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.119401  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.113794  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.139460  [ 6144/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.118391  [ 6272/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.121229  [ 6400/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.105131  [ 6528/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.122467  [ 6656/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.101605  [ 6784/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.144358  [ 6912/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.113774  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.115402  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.111304  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.084502  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.103583  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.121720  [ 7680/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.141434  [ 7808/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.103826  [ 7936/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.131780  [ 8064/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.081197  [ 8192/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.117298  [ 8320/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.121775  [ 8448/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.102202  [ 8576/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.123913  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.141926  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.118986  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.107718  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.108832  [ 9216/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.144605  [ 9344/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.102989  [ 9472/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.109706  [ 9600/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.106086  [ 9728/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.120786  [ 9856/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.086797  [ 9984/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.121720  [10112/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.107013  [10240/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.105406  [10368/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.092806  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.139725  [10624/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.103633  [10752/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.113153  [10880/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.118551  [11008/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.089305  [11136/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.088159  [11264/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.122389  [11392/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.103332  [11520/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.135404  [11648/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.094497  [11776/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.131116  [11904/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.104762  [12032/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.107577  [12160/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.108508  [12288/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.109574  [12416/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.107051  [12544/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.101733  [12672/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.102539  [12800/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.092039  [12928/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.123712  [13056/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.110517  [13184/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.107324  [13312/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.112747  [13440/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.102082  [13568/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.138933  [13696/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.112692  [13824/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.092728  [13952/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.105109  [14080/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.095822  [14208/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.133552  [14336/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.121105  [14464/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.107695  [14592/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.153457  [14720/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.138965  [14848/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.122834  [14976/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.129796  [15104/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.102767  [15232/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.119751  [15360/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.145643  [15488/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.128304  [15616/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.086833  [15744/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.138334  [15872/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.112518  [16000/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.102812  [16128/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.115104  [16256/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.133585  [16384/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.129380  [16512/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.144541  [16640/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.138490  [16768/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.123494  [16896/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.152053  [17024/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.113483  [17152/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.097124  [17280/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.112748  [17408/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.116563  [17536/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.156056  [17664/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.149255  [17792/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.091398  [17920/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.125281  [18048/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.144054  [18176/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.077866  [18304/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.109147  [18432/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.108301  [18560/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.104942  [18688/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.106434  [18816/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.110041  [18944/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.115125  [19072/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.090754  [19200/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.113472  [19328/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.105000  [19456/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.095629  [19584/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.102823  [19712/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.101037  [19840/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.111497  [19968/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.131481  [20096/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.091065  [20224/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.142334  [20352/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.113262  [20480/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.095543  [20608/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.151740  [20736/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.098334  [20864/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.115295  [20992/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.127461  [21120/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.125243  [21248/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.114011  [21376/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.130069  [21504/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.101189  [21632/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.113782  [21760/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.144200  [21888/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.115605  [22016/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.123985  [22144/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.113765  [22272/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.122737  [22400/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.150093  [22528/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.083130  [22656/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.090427  [22784/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.085570  [22912/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.140976  [23040/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.122604  [23168/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.092501  [23296/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.119329  [23424/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.094846  [23552/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.135145  [23680/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.154619  [23808/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.116604  [23936/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.122716  [24064/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.097331  [24192/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.106681  [24320/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.107646  [24448/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.102283  [24576/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.104617  [24704/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.125635  [24832/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.093095  [24872/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.093095  [24872/24872]: : 195it [00:16, 11.85it/s]
Epoch 5, time=343.23s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.135984  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.128137  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.131898  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.120286  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.099057  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.126843  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.135402  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.131316  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.120182  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.130453  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.129970  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.112572  [ 1536/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.125295  [ 1664/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.089624  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.084192  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.124132  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.110510  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.104088  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.150071  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.110105  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.118032  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.114455  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.123708  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.114467  [ 3072/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.110160  [ 3200/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.094648  [ 3328/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.098920  [ 3456/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.099874  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.141364  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.114826  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.082215  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.130116  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.097985  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.088803  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.157218  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.089015  [ 4608/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.092691  [ 4736/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.108970  [ 4864/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.097380  [ 4992/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.130168  [ 5120/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.084251  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.112017  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.132685  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.127327  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.124640  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.113205  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.112144  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.133050  [ 6144/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.116337  [ 6272/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.114535  [ 6400/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.094502  [ 6528/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.116093  [ 6656/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.096158  [ 6784/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.138742  [ 6912/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.108635  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.113965  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.104538  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.079217  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.096691  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.115788  [ 7680/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.129772  [ 7808/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.097143  [ 7936/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.128147  [ 8064/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.074052  [ 8192/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.107239  [ 8320/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.111993  [ 8448/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.096041  [ 8576/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.112699  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.130263  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.110460  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.098393  [ 9088/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.104670  [ 9216/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.134739  [ 9344/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.095942  [ 9472/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.108405  [ 9600/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.101723  [ 9728/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.113841  [ 9856/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.085481  [ 9984/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.119706  [10112/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.098946  [10240/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.101602  [10368/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.090460  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.135039  [10624/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.099612  [10752/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.112503  [10880/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.113689  [11008/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.086888  [11136/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.084999  [11264/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.121243  [11392/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.103250  [11520/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.129619  [11648/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.096567  [11776/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.125914  [11904/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.102248  [12032/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.099967  [12160/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.111232  [12288/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.103727  [12416/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.100132  [12544/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.105163  [12672/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.093817  [12800/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.098030  [12928/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.131305  [13056/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.098686  [13184/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.119736  [13312/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.101004  [13440/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.112283  [13568/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.122194  [13696/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.126203  [13824/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.092567  [13952/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.097047  [14080/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.113086  [14208/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.129298  [14336/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.153042  [14464/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.098931  [14592/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.160450  [14720/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.145849  [14848/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.109042  [14976/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.130558  [15104/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.081714  [15232/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.123637  [15360/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.165067  [15488/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.123385  [15616/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.102661  [15744/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.105451  [15872/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.125136  [16000/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.086909  [16128/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.121628  [16256/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.127699  [16384/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.129342  [16512/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.148079  [16640/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.129475  [16768/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.125167  [16896/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.134648  [17024/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.110171  [17152/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.103254  [17280/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.120943  [17408/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.116608  [17536/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.148499  [17664/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.137325  [17792/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.088322  [17920/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.131541  [18048/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.142651  [18176/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.085015  [18304/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.090563  [18432/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.108414  [18560/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.096026  [18688/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.104778  [18816/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.104299  [18944/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.105430  [19072/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.093099  [19200/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.099475  [19328/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.121786  [19456/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.088534  [19584/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.112324  [19712/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.094913  [19840/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.116084  [19968/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.117050  [20096/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.093903  [20224/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.126809  [20352/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.090933  [20480/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.108169  [20608/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.137668  [20736/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.097962  [20864/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.101505  [20992/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.135520  [21120/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.131987  [21248/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.130989  [21376/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.132646  [21504/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.095964  [21632/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.115399  [21760/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.132165  [21888/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.116633  [22016/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.113660  [22144/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.108729  [22272/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.110341  [22400/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.150427  [22528/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.075423  [22656/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.084903  [22784/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.084419  [22912/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.123150  [23040/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.127589  [23168/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.080810  [23296/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.117638  [23424/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.092276  [23552/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.130594  [23680/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.133716  [23808/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.117592  [23936/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.113938  [24064/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.073992  [24192/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.109925  [24320/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.091012  [24448/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.110299  [24576/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.104093  [24704/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.122265  [24832/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.111512  [24872/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.111512  [24872/24872]: : 195it [00:16, 11.83it/s]
Epoch 6, time=359.71s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.112614  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.151051  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.123797  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.116394  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.101415  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.121237  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.131907  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.129855  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.118213  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.119219  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.115996  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.106437  [ 1536/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.117904  [ 1664/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.097768  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.085648  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.127223  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.110564  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.097328  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.148503  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.106677  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.113639  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.118133  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.119934  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.113868  [ 3072/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.107213  [ 3200/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.102406  [ 3328/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.093730  [ 3456/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.096802  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.138280  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.112403  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.080992  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.133848  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.099750  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.082470  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.156670  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.089928  [ 4608/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.093648  [ 4736/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.103966  [ 4864/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.085654  [ 4992/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.121495  [ 5120/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.086325  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.118050  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.128985  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.129457  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.118591  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.113647  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.103646  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.136006  [ 6144/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.113741  [ 6272/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.107247  [ 6400/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.096015  [ 6528/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.116269  [ 6656/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.098891  [ 6784/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.127646  [ 6912/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.109426  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.110737  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.099353  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.077775  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.094458  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.115419  [ 7680/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.130187  [ 7808/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.094735  [ 7936/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.125467  [ 8064/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.079398  [ 8192/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.106829  [ 8320/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.113897  [ 8448/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.104033  [ 8576/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.107701  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.135369  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.108716  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.105990  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.111708  [ 9216/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.134618  [ 9344/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.098323  [ 9472/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.109131  [ 9600/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.101998  [ 9728/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.122315  [ 9856/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.080655  [ 9984/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.130575  [10112/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.104963  [10240/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.098897  [10368/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.083755  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.134214  [10624/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.103977  [10752/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.106544  [10880/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.108929  [11008/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.087149  [11136/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.088213  [11264/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.119119  [11392/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.087727  [11520/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.133344  [11648/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.085568  [11776/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.134618  [11904/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.110622  [12032/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.098544  [12160/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.101300  [12288/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.111231  [12416/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.097279  [12544/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.109546  [12672/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.093816  [12800/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.092787  [12928/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.148867  [13056/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.095717  [13184/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.122655  [13312/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.101683  [13440/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.114929  [13568/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.142226  [13696/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.129663  [13824/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.110762  [13952/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.099591  [14080/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.134331  [14208/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.130047  [14336/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.136396  [14464/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.119316  [14592/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.140500  [14720/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.162187  [14848/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.118233  [14976/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.133536  [15104/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.096246  [15232/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.115895  [15360/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.152743  [15488/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.121932  [15616/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.086575  [15744/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.121166  [15872/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.110939  [16000/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.088159  [16128/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.103482  [16256/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.112855  [16384/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.122954  [16512/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.140929  [16640/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.113824  [16768/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.126029  [16896/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.130452  [17024/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.110590  [17152/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.086723  [17280/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.109352  [17408/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.101764  [17536/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.138443  [17664/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.139048  [17792/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.084551  [17920/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.113246  [18048/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.153265  [18176/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.072396  [18304/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.099416  [18432/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.105226  [18560/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.088648  [18688/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.099920  [18816/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.103490  [18944/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.103307  [19072/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.077590  [19200/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.093775  [19328/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.091811  [19456/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.081388  [19584/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.103663  [19712/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.085748  [19840/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.105019  [19968/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.113398  [20096/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.079944  [20224/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.122621  [20352/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.084828  [20480/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.087333  [20608/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.128510  [20736/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.088029  [20864/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.088811  [20992/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.123448  [21120/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.110268  [21248/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.109451  [21376/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.119476  [21504/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.088493  [21632/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.107962  [21760/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.141233  [21888/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.105010  [22016/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.108928  [22144/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.114662  [22272/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.103149  [22400/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.138827  [22528/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.076665  [22656/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.086757  [22784/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.081258  [22912/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.120326  [23040/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.114432  [23168/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.073790  [23296/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.114386  [23424/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.084926  [23552/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.130714  [23680/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.131563  [23808/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.117874  [23936/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.111914  [24064/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.076596  [24192/24872]:   0%|          | 0/194 [00:15<?, ?it/s]
loss: 0.113624  [24320/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.086416  [24448/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.115562  [24576/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.106703  [24704/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.110491  [24832/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.095379  [24872/24872]:   0%|          | 0/194 [00:16<?, ?it/s]
loss: 0.095379  [24872/24872]: : 195it [00:16, 11.85it/s]
-------------------------------
LR=1e-05, batch_size=256
-------------------------------
Epoch 1, time=376.16s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.112657  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.107971  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.101506  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.108027  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.114396  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.100568  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.096691  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.092114  [ 2048/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.093056  [ 2304/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.120716  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.103184  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.107511  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.095672  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.085505  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.112139  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.093192  [ 4096/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.078020  [ 4352/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.112541  [ 4608/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.095606  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.091945  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.090856  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.115465  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.109417  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.109315  [ 6144/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.101016  [ 6400/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.094011  [ 6656/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.105549  [ 6912/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.103654  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.081958  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.096639  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.106678  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.088477  [ 8192/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.097680  [ 8448/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.095653  [ 8704/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.119726  [ 8960/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.092152  [ 9216/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.104932  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.095256  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.088017  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.101753  [10240/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.087794  [10496/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.110123  [10752/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.096305  [11008/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.077289  [11264/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.094520  [11520/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.097824  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.109740  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.095577  [12288/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.091647  [12544/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.086681  [12800/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.096699  [13056/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.091751  [13312/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.084704  [13568/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.102956  [13824/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.080058  [14080/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.095639  [14336/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.098426  [14592/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.117832  [14848/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.097480  [15104/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.088544  [15360/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.113282  [15616/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.092652  [15872/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.084007  [16128/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.098087  [16384/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.113525  [16640/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.104181  [16896/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.110269  [17152/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.085347  [17408/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.109109  [17664/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.103992  [17920/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.121398  [18176/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.074626  [18432/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.089627  [18688/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.090340  [18944/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.081105  [19200/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.086651  [19456/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.085800  [19712/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.086774  [19968/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.088039  [20224/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.094978  [20480/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.104850  [20736/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.084735  [20992/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.113286  [21248/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.109647  [21504/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.088397  [21760/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.108713  [22016/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.100162  [22272/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.109163  [22528/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.068858  [22784/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.084093  [23040/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.082324  [23296/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.090278  [23552/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.117197  [23808/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.102644  [24064/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.075780  [24320/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.085093  [24576/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.089627  [24832/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.077942  [24872/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.077942  [24872/24872]: : 98it [00:12,  7.99it/s]
Epoch 2, time=388.43s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.098276  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.098225  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.095949  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.101868  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.104763  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.093533  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.090559  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.086880  [ 2048/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.088917  [ 2304/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.118116  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.100200  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.105986  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.092538  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.082997  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.107080  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.090431  [ 4096/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.075836  [ 4352/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.110823  [ 4608/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.092404  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.089388  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.087525  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.113149  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.107447  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.106404  [ 6144/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.097540  [ 6400/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.089465  [ 6656/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.103651  [ 6912/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.099489  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.079627  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.094793  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.104957  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.086384  [ 8192/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.096332  [ 8448/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.094666  [ 8704/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.117849  [ 8960/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.090261  [ 9216/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.101845  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.093668  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.086401  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.100306  [10240/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.084925  [10496/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.108753  [10752/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.095120  [11008/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.076254  [11264/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.092893  [11520/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.096284  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.108196  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.092698  [12288/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.088842  [12544/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.084736  [12800/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.095886  [13056/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.089934  [13312/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.082736  [13568/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.101976  [13824/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.079801  [14080/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.094488  [14336/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.097156  [14592/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.115967  [14848/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.096360  [15104/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.087538  [15360/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.110757  [15616/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.091656  [15872/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.082977  [16128/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.097009  [16384/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.112168  [16640/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.102328  [16896/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.108078  [17152/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.084368  [17408/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.107993  [17664/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.102799  [17920/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.119084  [18176/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.073746  [18432/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.088996  [18688/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.089009  [18944/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.079954  [19200/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.085054  [19456/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.084674  [19712/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.086000  [19968/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.086574  [20224/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.094326  [20480/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.104387  [20736/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.083687  [20992/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.112401  [21248/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.108340  [21504/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.088148  [21760/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.105667  [22016/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.099235  [22272/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.108621  [22528/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.068308  [22784/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.083186  [23040/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.080116  [23296/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.089757  [23552/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.115488  [23808/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.099705  [24064/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.074788  [24320/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.084149  [24576/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.088968  [24832/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.076861  [24872/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.076861  [24872/24872]: : 98it [00:12,  8.04it/s]
Epoch 3, time=400.62s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.097483  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.097280  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.094773  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.100796  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.103097  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.092651  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.089715  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.086125  [ 2048/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.087845  [ 2304/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.116692  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.099544  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.105142  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.091692  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.081978  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.105725  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.089465  [ 4096/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.075047  [ 4352/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.109899  [ 4608/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.090958  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.088561  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.086980  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.112047  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.106370  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.105152  [ 6144/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.096132  [ 6400/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.087978  [ 6656/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.102826  [ 6912/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.097877  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.078879  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.093693  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.103678  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.085361  [ 8192/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.095629  [ 8448/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.093584  [ 8704/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.117193  [ 8960/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.089304  [ 9216/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.100784  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.092569  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.085587  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.099684  [10240/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.083826  [10496/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.107976  [10752/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.094618  [11008/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.075624  [11264/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.092034  [11520/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.095470  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.106617  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.090684  [12288/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.087619  [12544/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.083120  [12800/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.095129  [13056/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.088622  [13312/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.081740  [13568/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.101517  [13824/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.079522  [14080/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.093595  [14336/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.096164  [14592/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.114855  [14848/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.095822  [15104/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.086622  [15360/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.109273  [15616/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.090797  [15872/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.082344  [16128/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.096245  [16384/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.111215  [16640/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.101128  [16896/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.106861  [17152/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.083748  [17408/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.107169  [17664/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.101865  [17920/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.117628  [18176/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.073186  [18432/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.088375  [18688/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.088009  [18944/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.079037  [19200/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.083812  [19456/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.082916  [19712/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.085393  [19968/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.085577  [20224/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.093884  [20480/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.103612  [20736/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.082612  [20992/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.111720  [21248/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.107456  [21504/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.087639  [21760/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.104481  [22016/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.098644  [22272/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.107964  [22528/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.067819  [22784/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.082536  [23040/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.078984  [23296/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.089259  [23552/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.114014  [23808/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.098009  [24064/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.074127  [24320/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.083437  [24576/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.087836  [24832/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.076112  [24872/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.076112  [24872/24872]: : 98it [00:12,  8.01it/s]
Epoch 4, time=412.86s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.096653  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.096121  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.093393  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.099648  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.101898  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.091980  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.088971  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.086038  [ 2048/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.087124  [ 2304/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.115477  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.098776  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.104122  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.091036  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.081142  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.104706  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.088871  [ 4096/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.074585  [ 4352/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.109115  [ 4608/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.089982  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.088036  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.086438  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.111143  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.105399  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.104373  [ 6144/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.095160  [ 6400/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.086923  [ 6656/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.102228  [ 6912/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.096801  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.078208  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.092870  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.102461  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.084664  [ 8192/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.094850  [ 8448/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.092736  [ 8704/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.116458  [ 8960/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.088360  [ 9216/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.100137  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.091757  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.084914  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.099132  [10240/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.082867  [10496/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.107290  [10752/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.093656  [11008/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.075032  [11264/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.091386  [11520/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.094826  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.105723  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.089218  [12288/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.086796  [12544/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.082068  [12800/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.094279  [13056/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.087556  [13312/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.080939  [13568/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.101067  [13824/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.078938  [14080/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.092854  [14336/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.095329  [14592/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.114025  [14848/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.095378  [15104/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.085766  [15360/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.108255  [15616/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.090065  [15872/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.081734  [16128/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.095559  [16384/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.110365  [16640/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.100200  [16896/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.105961  [17152/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.083164  [17408/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.106527  [17664/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.101089  [17920/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.116352  [18176/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.072679  [18432/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.087764  [18688/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.087108  [18944/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.078309  [19200/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.082803  [19456/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.081565  [19712/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.084845  [19968/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.084885  [20224/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.093522  [20480/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.102858  [20736/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.081758  [20992/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.111109  [21248/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.106757  [21504/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.087106  [21760/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.103900  [22016/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.098233  [22272/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.106951  [22528/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.067425  [22784/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.081706  [23040/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.078498  [23296/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.088907  [23552/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.113081  [23808/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.096720  [24064/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.073777  [24320/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.082967  [24576/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.087265  [24832/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.075601  [24872/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.075601  [24872/24872]: : 98it [00:12,  8.05it/s]
Epoch 5, time=425.04s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.096313  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.095463  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.092881  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.098686  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.100793  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.091576  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.088430  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.085338  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.086437  [ 2304/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.114603  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.098087  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.103576  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.090376  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.080568  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.103775  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.088422  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.074140  [ 4352/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.108590  [ 4608/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.089247  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.087350  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.085984  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.110297  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.104722  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.103548  [ 6144/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.094308  [ 6400/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.086079  [ 6656/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.101661  [ 6912/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.095651  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.077624  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.092191  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.101538  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.084113  [ 8192/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.094347  [ 8448/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.091998  [ 8704/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.115901  [ 8960/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.087513  [ 9216/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.099490  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.091131  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.084301  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.098665  [10240/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.082070  [10496/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.106833  [10752/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.092845  [11008/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.074593  [11264/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.090857  [11520/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.094257  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.104809  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.088154  [12288/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.086048  [12544/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.081251  [12800/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.093574  [13056/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.086624  [13312/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.080274  [13568/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.100592  [13824/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.078468  [14080/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.092211  [14336/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.094674  [14592/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.113292  [14848/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.094841  [15104/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.085031  [15360/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.107464  [15616/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.089532  [15872/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.081166  [16128/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.094906  [16384/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.109732  [16640/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.099434  [16896/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.105199  [17152/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.082578  [17408/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.105921  [17664/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.100521  [17920/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.115227  [18176/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.072234  [18432/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.087185  [18688/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.086305  [18944/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.077681  [19200/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.082042  [19456/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.080371  [19712/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.084334  [19968/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.084240  [20224/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.093140  [20480/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.102108  [20736/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.080938  [20992/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.110528  [21248/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.106072  [21504/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.086561  [21760/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.103313  [22016/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.097747  [22272/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.106198  [22528/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.066991  [22784/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.081127  [23040/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.077782  [23296/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.088407  [23552/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.112427  [23808/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.096003  [24064/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.073273  [24320/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.082522  [24576/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.086475  [24832/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.075057  [24872/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.075057  [24872/24872]: : 98it [00:12,  8.05it/s]
Epoch 6, time=437.21s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.095569  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.094727  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.092092  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.097689  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.099825  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.091094  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.087878  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.084914  [ 2048/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.085781  [ 2304/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.113804  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.097401  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.102879  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.089698  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.080031  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.103045  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.088036  [ 4096/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.073696  [ 4352/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.108011  [ 4608/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.088716  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.086774  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.085482  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.109483  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.104104  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.102879  [ 6144/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.093512  [ 6400/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.085313  [ 6656/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.101163  [ 6912/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.094707  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.077069  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.091512  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.100691  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.083625  [ 8192/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.093819  [ 8448/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.091326  [ 8704/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.115424  [ 8960/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.086651  [ 9216/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.098917  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.090531  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.083863  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.098200  [10240/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.081394  [10496/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.106338  [10752/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.091983  [11008/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.074107  [11264/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.090322  [11520/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.093675  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.103947  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.087226  [12288/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.085329  [12544/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.080587  [12800/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.092962  [13056/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.085832  [13312/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.079716  [13568/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.100139  [13824/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.077909  [14080/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.091640  [14336/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.094070  [14592/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.112618  [14848/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.094387  [15104/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.084422  [15360/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.106820  [15616/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.088966  [15872/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.080629  [16128/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.094295  [16384/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.109097  [16640/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.098678  [16896/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.104534  [17152/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.082043  [17408/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.105249  [17664/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.099907  [17920/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.114352  [18176/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.071777  [18432/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.086570  [18688/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.085568  [18944/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.077078  [19200/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.081484  [19456/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.079337  [19712/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.083836  [19968/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.083705  [20224/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.092772  [20480/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.101411  [20736/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.080217  [20992/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.109956  [21248/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.105414  [21504/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.086074  [21760/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.102896  [22016/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.097427  [22272/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.105458  [22528/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.066590  [22784/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.080477  [23040/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.077406  [23296/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.087946  [23552/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.111579  [23808/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.095308  [24064/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.072902  [24320/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.082181  [24576/24872]:   0%|          | 0/97 [00:11<?, ?it/s]
loss: 0.085867  [24832/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.074690  [24872/24872]:   0%|          | 0/97 [00:12<?, ?it/s]
loss: 0.074690  [24872/24872]: : 98it [00:12,  8.09it/s]
Done!

test the network#

Do some qualitative tests: Let the trained network predict some particle geometries and compare their Mie spectra with the traget spectrum.

# pick a few of the training samples for testing.
# Note: Ideally tests should be done on separate samples!
sca_test = q_sca_target_test
pred = model(sca_test)

# evaluate Mie
r_c_test, r_s_test, eps_c_test, eps_s_test = nn_pred_to_mie_geometry(pred)
res_mie = pmd.farfield.cross_sections(
    k0,
    r_c=r_c_test,
    eps_c=eps_c_test,
    r_s=r_s_test,
    eps_s=eps_s_test,
    eps_env=eps_env,
    n_max=n_max,
)

# plot
i_plot = np.random.randint(len(sca_test), size=4)
plt.figure(figsize=(12, 10))
for i_n, i in enumerate(i_plot):
    plt.subplot(2, 2, i_n + 1)
    plt.plot(
        wl0.detach().cpu().numpy(),
        sca_test[i].detach().cpu().numpy(),
        label="reference",
    )
    plt.plot(
        wl0.detach().cpu().numpy(),
        res_mie["q_sca"][i].detach().cpu().numpy(),
        label="predicted particle",
    )
    plt.legend()
    plt.xlabel("wavelength (nm)")
    plt.ylabel("scat. efficiency")
plt.show()
ex 05 tandem

Total running time of the script: (7 minutes 35.560 seconds)

Estimated memory usage: 3012 MB

Gallery generated by Sphinx-Gallery