Mie-informed tandem neural network#

Here, we demonstrate how to train a design generator network capable to suggest core-shell particles with specific spectral response using PyMieDiff as differentiable forward-evaluator. The training pipeline follows the “Tandem” model:

target spectrum –> generator NN –> design –> Mie –> real spectrum

training loss is: MSE(target spec., real spec.)

author: O. Jackson, P. Wiecha, 06/2025

imports#

import time

import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
from torch import nn

import pymiediff as pmd

setup optimiation target#

We setup the main configuration here: torch device, parameter limits and wavelengths

# torch compute device
device = "cpu"

# general config
N_samples = 25000
n_max = 4  # maximum Mie order fixed for performance
eps_env = torch.tensor(1.0, device=device)

lim_r = torch.as_tensor([40, 100], device=device)
lim_n_re = torch.as_tensor([1.5, 4.0], device=device)
lim_n_im = torch.as_tensor([0.0, 0.1], device=device)

wl0 = torch.linspace(400, 800, 40, device=device)
k0 = 2 * torch.pi / wl0

generate reference spectra#

we generate a large number of reference Mie spectra for existing particles, that will be used as design targets during training.

Note: this step could also be done without any physics knowledge, for example with artificial spectra (e.g. Lorentzians), or a scattering maximization loss.

# datagen: generate existing spectra (won't use the geometries for training)
r_c = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
d_s = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
r_s = r_c + d_s
n_re = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_re)[0] + lim_n_re[0]
n_im = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_im)[0] + lim_n_im[0]
n = n_re + 1j * n_im

# low-level API: permittivity required as spectra (for vectorization)
eps_c = torch.ones_like(k0).unsqueeze(0) * n[:, 0].unsqueeze(1) ** 2
eps_s = torch.ones_like(k0).unsqueeze(0) * n[:, 1].unsqueeze(1) ** 2

all_particles = pmd.multishell.cross_sections(
    k0,
    r_c=r_c,
    eps_c=eps_c,
    r_s=r_s,
    eps_s=eps_s,
    eps_env=eps_env,
    n_max=n_max,
)

N_test = 128  # keep a few samples for testing
q_sca_target = all_particles["q_sca"][N_test:].to(dtype=torch.float32)
q_sca_target_test = all_particles["q_sca"][:N_test].to(dtype=torch.float32)

plt.plot(q_sca_target[30].detach().cpu().numpy())  # plot some test sample
ex 09 tandem
[<matplotlib.lines.Line2D object at 0x7f06a0074590>]

Neural network classes / functions#

define the network model (simple MLP) and training loop

class FullyConnected(nn.Module):
    def __init__(self, hidden_dim=1024):
        super().__init__()
        self.fc_in = nn.Linear(len(k0), hidden_dim)
        self.relu1 = nn.ReLU()
        self.fc_1 = nn.Linear(hidden_dim, hidden_dim)
        self.relu2 = nn.ReLU()
        self.fc_2 = nn.Linear(hidden_dim, hidden_dim)
        self.relu3 = nn.ReLU()
        self.fc_out = nn.Linear(hidden_dim, 6)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc_in(x)
        x = self.relu1(x)
        x = self.fc_1(x)
        x = self.relu2(x)
        x = self.fc_2(x)
        x = self.relu3(x)
        x = self.fc_out(x)
        x = self.sigmoid(x)
        return x


def nn_pred_to_mie_geometry(pred):
    # implicit normalization: multiply by user-defined limits
    r_c = lim_r.max() * (pred[:, 0])
    r_s = lim_r.max() * (pred[:, 0] + pred[:, 1])
    n_c = lim_n_re.max() * pred[:, 2] + lim_n_im.max() * (1j * pred[:, 3])
    n_s = lim_n_re.max() * pred[:, 4] + lim_n_im.max() * (1j * pred[:, 5])

    eps_c = torch.ones_like(k0).unsqueeze(0) * n_c.unsqueeze(1) ** 2
    eps_s = torch.ones_like(k0).unsqueeze(0) * n_s.unsqueeze(1) ** 2

    return r_c, r_s, eps_c, eps_s


def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    prog_bar = tqdm(enumerate(dataloader), total=size // dataloader.batch_size)
    for i_batch, X in prog_bar:
        # model prediction: generate core-shell particles
        pred = model(X)

        # evaluate Mie
        r_c, r_s, eps_c, eps_s = nn_pred_to_mie_geometry(pred)
        res_mie = pmd.multishell.cross_sections(
            k0,
            r_c=r_c,
            eps_c=eps_c,
            r_s=r_s,
            eps_s=eps_s,
            eps_env=eps_env,
            n_max=n_max,
        )
        q_sca_mie = res_mie["q_sca"].to(dtype=torch.float32)

        # calc. loss
        loss = loss_fn(q_sca_mie, X)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # if i_batch % 100 == 0:
        loss, current = loss.item(), i_batch * dataloader.batch_size + len(X)
        prog_bar.set_description(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

training the Mie-informed network#

here we use some simple, manually optimized training schedule.

model = FullyConnected().to(device)

confs = [
    dict(bs=32, lr=1e-4, n_ep=5),
    dict(bs=64, lr=1e-4, n_ep=5),
    dict(bs=128, lr=1e-4, n_ep=6),
    dict(bs=256, lr=1e-5, n_ep=6),
]

t_start = time.time()
for conf in confs:
    learning_rate = conf["lr"]
    batch_size = conf["bs"]
    epochs = conf["n_ep"]
    print("-------------------------------")
    print(f"LR={learning_rate}, batch_size={batch_size}")
    print("-------------------------------")

    loss_fn = nn.MSELoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
    train_dataloader = torch.utils.data.DataLoader(q_sca_target, batch_size=batch_size)
    for t in range(epochs):
        print(f"Epoch {t+1}, time={time.time()-t_start:.2f}s")
        train_loop(train_dataloader, model, loss_fn, optimizer)
print("Done!")
-------------------------------
LR=0.0001, batch_size=32
-------------------------------
Epoch 1, time=0.00s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 4.549628  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 3.024712  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.603119  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.700456  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.396730  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.976609  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.351362  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 2.066803  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.822288  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.593468  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.564855  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.606593  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.178725  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.213516  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.375950  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.206874  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.248866  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.326798  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.103084  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.184518  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.954216  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.145559  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 1.104081  [  736/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.864190  [  768/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.018764  [  800/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.194699  [  832/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.168010  [  864/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.903035  [  896/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.134447  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.369251  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.968039  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.177012  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.044216  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.944270  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.990864  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.117964  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.297089  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.850444  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.030533  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.962755  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.870597  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.255670  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.842951  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 1.094264  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.956608  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.813043  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.981611  [ 1504/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.115376  [ 1536/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.215330  [ 1568/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.693462  [ 1600/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.941374  [ 1632/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.938547  [ 1664/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.744120  [ 1696/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.768484  [ 1728/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.966859  [ 1760/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.073991  [ 1792/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.004996  [ 1824/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.826588  [ 1856/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.100463  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.803903  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.837029  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.114654  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.692557  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.856841  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.027091  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.206690  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.947298  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.905120  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.988815  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.619839  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.049874  [ 2272/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 1.056211  [ 2304/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.707493  [ 2336/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 1.032326  [ 2368/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.869738  [ 2400/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.830498  [ 2432/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.681917  [ 2464/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.950300  [ 2496/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.983554  [ 2528/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.650350  [ 2560/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.830604  [ 2592/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.696787  [ 2624/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.605493  [ 2656/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.925351  [ 2688/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.797087  [ 2720/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.869690  [ 2752/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.724087  [ 2784/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.830089  [ 2816/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.694161  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.692300  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.717461  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.909331  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.699228  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.767702  [ 3008/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.966523  [ 3040/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.739716  [ 3072/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.708228  [ 3104/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.623273  [ 3136/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.765987  [ 3168/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.841981  [ 3200/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.614877  [ 3232/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.811593  [ 3264/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.759134  [ 3296/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.735067  [ 3328/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.648065  [ 3360/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.796157  [ 3392/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.541324  [ 3424/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 1.046082  [ 3456/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.913262  [ 3488/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.777712  [ 3520/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.786183  [ 3552/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.797318  [ 3584/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.685859  [ 3616/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.692348  [ 3648/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.766447  [ 3680/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.905225  [ 3712/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.781734  [ 3744/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.581652  [ 3776/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.897002  [ 3808/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.581007  [ 3840/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.775189  [ 3872/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.757537  [ 3904/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.743633  [ 3936/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.708249  [ 3968/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.686087  [ 4000/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.686662  [ 4032/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.737527  [ 4064/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.525106  [ 4096/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.630956  [ 4128/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.667601  [ 4160/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.627045  [ 4192/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.724834  [ 4224/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.637911  [ 4256/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.780280  [ 4288/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.569397  [ 4320/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.515206  [ 4352/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.584434  [ 4384/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.655563  [ 4416/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.665454  [ 4448/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.703536  [ 4480/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.522763  [ 4512/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.656305  [ 4544/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.536123  [ 4576/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.796360  [ 4608/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.682935  [ 4640/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.459931  [ 4672/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.657267  [ 4704/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.554008  [ 4736/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.594858  [ 4768/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.644254  [ 4800/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.710495  [ 4832/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.617788  [ 4864/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.614180  [ 4896/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.629451  [ 4928/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.533078  [ 4960/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.480037  [ 4992/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.601435  [ 5024/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.444080  [ 5056/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.672689  [ 5088/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.503242  [ 5120/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.686830  [ 5152/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.702820  [ 5184/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.578752  [ 5216/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.544487  [ 5248/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 1.196646  [ 5280/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.627913  [ 5312/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.527364  [ 5344/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.606205  [ 5376/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.505998  [ 5408/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.363671  [ 5440/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.634818  [ 5472/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.691325  [ 5504/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.613241  [ 5536/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.611454  [ 5568/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.545395  [ 5600/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.495668  [ 5632/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.478125  [ 5664/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.576021  [ 5696/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.613744  [ 5728/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.485815  [ 5760/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.514641  [ 5792/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.534588  [ 5824/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.736430  [ 5856/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.483989  [ 5888/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.675932  [ 5920/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.528801  [ 5952/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.557654  [ 5984/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.504843  [ 6016/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.609743  [ 6048/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.616528  [ 6080/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.535119  [ 6112/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.486834  [ 6144/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.841753  [ 6176/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.499647  [ 6208/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.508176  [ 6240/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.525578  [ 6272/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.554933  [ 6304/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.671742  [ 6336/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.530646  [ 6368/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.524304  [ 6400/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.488595  [ 6432/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.617125  [ 6464/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.440587  [ 6496/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.374608  [ 6528/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.643205  [ 6560/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.600344  [ 6592/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.572962  [ 6624/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.544347  [ 6656/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.531797  [ 6688/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.568195  [ 6720/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.478528  [ 6752/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.545627  [ 6784/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.566370  [ 6816/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.526727  [ 6848/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.550577  [ 6880/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.656246  [ 6912/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.557936  [ 6944/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.545366  [ 6976/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.419109  [ 7008/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.441224  [ 7040/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.650576  [ 7072/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.523005  [ 7104/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.477916  [ 7136/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.616433  [ 7168/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.380292  [ 7200/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.603326  [ 7232/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.653603  [ 7264/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.487595  [ 7296/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.571038  [ 7328/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.590345  [ 7360/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.517546  [ 7392/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.513353  [ 7424/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.474648  [ 7456/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.762551  [ 7488/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.415614  [ 7520/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.534361  [ 7552/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.488098  [ 7584/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.448855  [ 7616/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.598816  [ 7648/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.582722  [ 7680/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.662912  [ 7712/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.589332  [ 7744/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.689269  [ 7776/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.731560  [ 7808/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.614626  [ 7840/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.449892  [ 7872/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.428820  [ 7904/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.412033  [ 7936/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.426400  [ 7968/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.299821  [ 8000/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.564144  [ 8032/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.539655  [ 8064/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.437809  [ 8096/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.507565  [ 8128/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.693444  [ 8160/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.479798  [ 8192/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.336999  [ 8224/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.411812  [ 8256/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.529656  [ 8288/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.554582  [ 8320/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.503436  [ 8352/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.353426  [ 8384/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.405522  [ 8416/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.467683  [ 8448/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.445175  [ 8480/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.483850  [ 8512/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.404057  [ 8544/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.464224  [ 8576/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.520994  [ 8608/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.604560  [ 8640/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.719930  [ 8672/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.388810  [ 8704/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.408840  [ 8736/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.426316  [ 8768/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.308958  [ 8800/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.407226  [ 8832/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.303953  [ 8864/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.419643  [ 8896/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.516337  [ 8928/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.390661  [ 8960/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.539882  [ 8992/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.694293  [ 9024/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.522615  [ 9056/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.509678  [ 9088/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.385783  [ 9120/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.371328  [ 9152/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.447466  [ 9184/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.460427  [ 9216/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.397998  [ 9248/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.387884  [ 9280/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.553470  [ 9312/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.433575  [ 9344/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.392388  [ 9376/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.495469  [ 9408/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.590276  [ 9440/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.421869  [ 9472/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.412991  [ 9504/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.379875  [ 9536/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.396954  [ 9568/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.392329  [ 9600/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.388244  [ 9632/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.408485  [ 9664/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.512046  [ 9696/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.334044  [ 9728/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.454203  [ 9760/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.456139  [ 9792/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.351116  [ 9824/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.472090  [ 9856/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.622287  [ 9888/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.453783  [ 9920/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.698191  [ 9952/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.553079  [ 9984/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.389329  [10016/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.428926  [10048/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.524185  [10080/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.449711  [10112/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.411744  [10144/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.547091  [10176/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.419536  [10208/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.394449  [10240/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.344872  [10272/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.483099  [10304/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.526175  [10336/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.382574  [10368/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.352601  [10400/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.727609  [10432/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.467378  [10464/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.482972  [10496/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.463137  [10528/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.578260  [10560/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.374397  [10592/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.441601  [10624/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.494875  [10656/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.474518  [10688/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.336959  [10720/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.537207  [10752/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.398783  [10784/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.430813  [10816/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.441473  [10848/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.389757  [10880/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.321666  [10912/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.504341  [10944/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.315483  [10976/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.473437  [11008/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.454556  [11040/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.566269  [11072/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.587890  [11104/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.525940  [11136/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.363633  [11168/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.637930  [11200/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.439117  [11232/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.619214  [11264/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.411787  [11296/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.426952  [11328/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.401679  [11360/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.315194  [11392/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.461923  [11424/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.544348  [11456/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.430461  [11488/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.559629  [11520/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.529309  [11552/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.500499  [11584/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.459413  [11616/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.523774  [11648/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.476505  [11680/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.497862  [11712/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.455932  [11744/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.415073  [11776/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.399761  [11808/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.590684  [11840/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.546350  [11872/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.447200  [11904/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.495819  [11936/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.384262  [11968/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.449788  [12000/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.321127  [12032/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.558259  [12064/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.579662  [12096/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.504516  [12128/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.368363  [12160/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.624155  [12192/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.527096  [12224/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.501503  [12256/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.332513  [12288/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.530861  [12320/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.679022  [12352/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.396214  [12384/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.373015  [12416/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.435047  [12448/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.466307  [12480/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.610844  [12512/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.486569  [12544/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.554915  [12576/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.326728  [12608/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.384791  [12640/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.450395  [12672/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.416946  [12704/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.582915  [12736/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.541123  [12768/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.401172  [12800/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.488855  [12832/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.411621  [12864/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.438094  [12896/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.317467  [12928/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.329888  [12960/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.513414  [12992/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.301342  [13024/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.493120  [13056/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.373194  [13088/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.389756  [13120/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.376352  [13152/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.426434  [13184/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.337656  [13216/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.521722  [13248/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.370779  [13280/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.471989  [13312/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.410423  [13344/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.373401  [13376/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.357253  [13408/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.437061  [13440/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.506767  [13472/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.350172  [13504/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.495880  [13536/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.380744  [13568/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.391955  [13600/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.622314  [13632/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.290438  [13664/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.417524  [13696/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.331813  [13728/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.499983  [13760/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.534947  [13792/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.339916  [13824/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.356018  [13856/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.507206  [13888/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.381724  [13920/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.352162  [13952/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.521444  [13984/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.445944  [14016/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.330387  [14048/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.343746  [14080/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.388855  [14112/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.450348  [14144/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.346402  [14176/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.392995  [14208/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.391251  [14240/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.362365  [14272/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.580504  [14304/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.347771  [14336/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.445925  [14368/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.322515  [14400/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.319873  [14432/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.309469  [14464/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.442309  [14496/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.423842  [14528/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.293089  [14560/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.361317  [14592/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.338995  [14624/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.412417  [14656/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.508890  [14688/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.304614  [14720/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.402269  [14752/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.467089  [14784/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.328270  [14816/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.375803  [14848/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.472397  [14880/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.385040  [14912/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.463952  [14944/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.356906  [14976/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.383532  [15008/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.387146  [15040/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.419348  [15072/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.319023  [15104/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.323296  [15136/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.335487  [15168/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.361660  [15200/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.318061  [15232/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.362902  [15264/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.425487  [15296/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.299420  [15328/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.344759  [15360/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.320463  [15392/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.333913  [15424/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.358150  [15456/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.397917  [15488/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.263146  [15520/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.361221  [15552/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.494278  [15584/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.348267  [15616/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.387530  [15648/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.479746  [15680/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.408308  [15712/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.438234  [15744/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.362222  [15776/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.335714  [15808/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.443347  [15840/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.381987  [15872/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.384488  [15904/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.384167  [15936/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.436453  [15968/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.383691  [16000/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.340311  [16032/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.591009  [16064/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.353621  [16096/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.322694  [16128/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.523676  [16160/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.362979  [16192/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.509963  [16224/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.424580  [16256/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.246852  [16288/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.405392  [16320/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.480509  [16352/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.364842  [16384/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.500431  [16416/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.320942  [16448/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.418386  [16480/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.317397  [16512/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.577402  [16544/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.327841  [16576/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.383394  [16608/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.401530  [16640/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.480230  [16672/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.358451  [16704/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.418698  [16736/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.397019  [16768/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.468379  [16800/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.386432  [16832/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.350056  [16864/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.428424  [16896/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.341039  [16928/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.300668  [16960/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.315750  [16992/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.273489  [17024/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.322349  [17056/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.386824  [17088/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.487324  [17120/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.355576  [17152/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.330146  [17184/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.431542  [17216/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.407376  [17248/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.430952  [17280/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.319604  [17312/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.336517  [17344/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.333201  [17376/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.499510  [17408/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.476563  [17440/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.318845  [17472/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.489813  [17504/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.337632  [17536/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.467893  [17568/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.380177  [17600/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.474777  [17632/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.400263  [17664/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.377945  [17696/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.280561  [17728/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.412686  [17760/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.340615  [17792/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.414274  [17824/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.412331  [17856/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.534486  [17888/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.406893  [17920/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.251940  [17952/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.440566  [17984/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.322921  [18016/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.341320  [18048/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.388748  [18080/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.262315  [18112/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.535959  [18144/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.377091  [18176/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.425502  [18208/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.354572  [18240/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.390230  [18272/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.379879  [18304/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.368647  [18336/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.498882  [18368/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.334948  [18400/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.463478  [18432/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.308384  [18464/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.400624  [18496/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.380907  [18528/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.314939  [18560/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.486488  [18592/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.402198  [18624/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.387731  [18656/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.270144  [18688/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.317397  [18720/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.344869  [18752/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.325180  [18784/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.369843  [18816/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.489812  [18848/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.351519  [18880/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.312784  [18912/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.361861  [18944/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.378031  [18976/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.512727  [19008/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.273538  [19040/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.370248  [19072/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.492742  [19104/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.365299  [19136/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.325249  [19168/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.372361  [19200/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.358694  [19232/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.292300  [19264/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.447966  [19296/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.418573  [19328/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.313550  [19360/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.500564  [19392/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.259482  [19424/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.354409  [19456/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.359396  [19488/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.419542  [19520/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.432468  [19552/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.470844  [19584/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.461298  [19616/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.395918  [19648/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.401139  [19680/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.379364  [19712/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.306118  [19744/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.551871  [19776/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.351526  [19808/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.293987  [19840/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.392219  [19872/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.390839  [19904/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.271481  [19936/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.326211  [19968/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.348391  [20000/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.352165  [20032/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.418399  [20064/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.494591  [20096/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.474764  [20128/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.354494  [20160/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.271876  [20192/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.450272  [20224/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.451023  [20256/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.373034  [20288/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.555970  [20320/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.362142  [20352/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.515864  [20384/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.369445  [20416/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.508467  [20448/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.320921  [20480/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.455923  [20512/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.356555  [20544/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.262221  [20576/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.400646  [20608/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.505278  [20640/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.371812  [20672/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.301972  [20704/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.408529  [20736/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.249654  [20768/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.370492  [20800/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.543774  [20832/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.335540  [20864/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.326588  [20896/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.384606  [20928/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.309123  [20960/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.298672  [20992/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.363904  [21024/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.435579  [21056/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.266862  [21088/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.404633  [21120/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.301502  [21152/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.447788  [21184/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.267154  [21216/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.408455  [21248/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.339121  [21280/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.339151  [21312/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.464472  [21344/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.474612  [21376/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.509788  [21408/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.499553  [21440/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.333445  [21472/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.327020  [21504/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.389921  [21536/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.348973  [21568/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.257835  [21600/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.320440  [21632/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.296033  [21664/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.414327  [21696/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.272428  [21728/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.378775  [21760/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.286031  [21792/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.333130  [21824/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.303862  [21856/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.270307  [21888/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.292891  [21920/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.197499  [21952/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.240744  [21984/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.334758  [22016/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.340978  [22048/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.308360  [22080/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.321220  [22112/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.333909  [22144/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.388880  [22176/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.325752  [22208/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.365014  [22240/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.346643  [22272/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.410152  [22304/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.437560  [22336/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.353799  [22368/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.267648  [22400/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.275618  [22432/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.237514  [22464/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.355445  [22496/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.380280  [22528/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.306545  [22560/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.334275  [22592/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.431062  [22624/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.333115  [22656/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.321273  [22688/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.313332  [22720/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.303174  [22752/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.416200  [22784/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.323261  [22816/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.340338  [22848/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.170217  [22880/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.250049  [22912/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.291061  [22944/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.287828  [22976/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.265184  [23008/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.449629  [23040/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.293007  [23072/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.439108  [23104/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.293973  [23136/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.433186  [23168/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.270668  [23200/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.389370  [23232/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.348672  [23264/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.424665  [23296/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.306012  [23328/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.269034  [23360/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.334453  [23392/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.259544  [23424/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.424439  [23456/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.318240  [23488/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.372348  [23520/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.356491  [23552/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.358771  [23584/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.222680  [23616/24872]:   0%|          | 0/777 [00:30<?, ?it/s]
loss: 0.222680  [23616/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.346880  [23648/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.344657  [23680/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.335473  [23712/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.355527  [23744/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.236474  [23776/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.371519  [23808/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.296287  [23840/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.409068  [23872/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.300034  [23904/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.475202  [23936/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.275134  [23968/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.267775  [24000/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.255639  [24032/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.307331  [24064/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.263582  [24096/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.423794  [24128/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.355937  [24160/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.299370  [24192/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.329409  [24224/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.259210  [24256/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.311036  [24288/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.360720  [24320/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.308461  [24352/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.201081  [24384/24872]:  95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.433952  [24416/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.278161  [24448/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.335278  [24480/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.338501  [24512/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.292395  [24544/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.299277  [24576/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.290700  [24608/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.435608  [24640/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.245576  [24672/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.245214  [24704/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.423037  [24736/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.309382  [24768/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.285760  [24800/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.316207  [24832/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.251803  [24864/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.266870  [24872/24872]:  95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.266870  [24872/24872]: : 778it [00:31, 24.59it/s]
Epoch 2, time=31.64s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.283125  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.411228  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.269505  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.350134  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.335825  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.346835  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.380049  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.389290  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.464753  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.328631  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.484095  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.378010  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.236295  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.287581  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.311718  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.438121  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.279064  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.290778  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.393975  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.332091  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.384161  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.291357  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.280420  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.230342  [  768/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.322365  [  800/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.313599  [  832/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.536940  [  864/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.221580  [  896/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.400341  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.350411  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.250367  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.366883  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.270114  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.330985  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.301963  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.299400  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.395208  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.233439  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.338958  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.193938  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.378336  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.310304  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.243454  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.398139  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.244661  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.392542  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.270657  [ 1504/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.369263  [ 1536/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.283508  [ 1568/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.254809  [ 1600/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.344699  [ 1632/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.224397  [ 1664/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.258598  [ 1696/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.253651  [ 1728/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.431209  [ 1760/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.390843  [ 1792/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.434032  [ 1824/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.277459  [ 1856/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.284102  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.257802  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.318801  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.259288  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.223426  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.322985  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.349482  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.439607  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.284697  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.307575  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.444187  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.233325  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.466372  [ 2272/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.271507  [ 2304/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.253530  [ 2336/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.398921  [ 2368/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.299402  [ 2400/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.373275  [ 2432/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.309798  [ 2464/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.504842  [ 2496/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.351272  [ 2528/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.302515  [ 2560/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.288677  [ 2592/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.226544  [ 2624/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.248727  [ 2656/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.385478  [ 2688/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.367891  [ 2720/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.315276  [ 2752/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.307414  [ 2784/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.321228  [ 2816/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.285854  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.273705  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.258222  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.348028  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.288998  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.201752  [ 3008/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.413191  [ 3040/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.289343  [ 3072/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.314572  [ 3104/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.292626  [ 3136/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.363701  [ 3168/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.449250  [ 3200/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.261443  [ 3232/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.335878  [ 3264/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.375275  [ 3296/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.308554  [ 3328/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.255685  [ 3360/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.259187  [ 3392/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.213246  [ 3424/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.400277  [ 3456/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.326957  [ 3488/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.473284  [ 3520/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.411850  [ 3552/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.344118  [ 3584/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.275703  [ 3616/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.399979  [ 3648/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.321730  [ 3680/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.259960  [ 3712/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.285948  [ 3744/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.269583  [ 3776/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.408090  [ 3808/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.206076  [ 3840/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.301462  [ 3872/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.334388  [ 3904/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.417729  [ 3936/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.266612  [ 3968/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.351207  [ 4000/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.272201  [ 4032/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.510018  [ 4064/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.251985  [ 4096/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.240293  [ 4128/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.315287  [ 4160/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.354413  [ 4192/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.357082  [ 4224/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.385048  [ 4256/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.375314  [ 4288/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.313760  [ 4320/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.307647  [ 4352/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.302591  [ 4384/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.390135  [ 4416/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.319135  [ 4448/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.288436  [ 4480/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.266418  [ 4512/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.347196  [ 4544/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.236476  [ 4576/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.379059  [ 4608/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.512721  [ 4640/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.325025  [ 4672/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.414932  [ 4704/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.319960  [ 4736/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.342416  [ 4768/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.304565  [ 4800/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.366954  [ 4832/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.336471  [ 4864/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.354645  [ 4896/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.324350  [ 4928/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.244702  [ 4960/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.230151  [ 4992/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.249894  [ 5024/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.251706  [ 5056/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.367758  [ 5088/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.342536  [ 5120/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.344872  [ 5152/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.362867  [ 5184/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.394302  [ 5216/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.288501  [ 5248/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.570035  [ 5280/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.296051  [ 5312/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.341469  [ 5344/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.321411  [ 5376/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.288694  [ 5408/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.207944  [ 5440/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.297923  [ 5472/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.381821  [ 5504/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.287498  [ 5536/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.341360  [ 5568/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.372690  [ 5600/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.225668  [ 5632/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.308513  [ 5664/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.353651  [ 5696/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.343213  [ 5728/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.225829  [ 5760/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.259179  [ 5792/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.268429  [ 5824/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.452516  [ 5856/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.271319  [ 5888/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.313505  [ 5920/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.306439  [ 5952/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.234428  [ 5984/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.279250  [ 6016/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.269530  [ 6048/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.359317  [ 6080/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.279197  [ 6112/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.273001  [ 6144/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.427209  [ 6176/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.228566  [ 6208/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.257173  [ 6240/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.265612  [ 6272/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.255955  [ 6304/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.389432  [ 6336/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.291519  [ 6368/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.300908  [ 6400/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.296905  [ 6432/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.271939  [ 6464/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.302886  [ 6496/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.212850  [ 6528/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.386339  [ 6560/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.316571  [ 6592/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.321973  [ 6624/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.278355  [ 6656/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.326488  [ 6688/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.319433  [ 6720/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.260900  [ 6752/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.315254  [ 6784/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.356268  [ 6816/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.320472  [ 6848/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.233872  [ 6880/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.353544  [ 6912/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.257432  [ 6944/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.332819  [ 6976/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.236242  [ 7008/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.241539  [ 7040/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.296871  [ 7072/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.267780  [ 7104/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.211294  [ 7136/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.385634  [ 7168/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.220884  [ 7200/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.306814  [ 7232/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.421682  [ 7264/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.329465  [ 7296/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.351548  [ 7328/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.262606  [ 7360/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.260161  [ 7392/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.316669  [ 7424/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.294544  [ 7456/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.432232  [ 7488/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.257959  [ 7520/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.284458  [ 7552/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.228584  [ 7584/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.235188  [ 7616/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.337516  [ 7648/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.329253  [ 7680/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.438873  [ 7712/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.434022  [ 7744/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.353854  [ 7776/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.414820  [ 7808/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.408301  [ 7840/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.313158  [ 7872/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.253104  [ 7904/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.240295  [ 7936/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.267382  [ 7968/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.219396  [ 8000/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.328134  [ 8032/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.350492  [ 8064/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.256752  [ 8096/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.354131  [ 8128/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.436695  [ 8160/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.235047  [ 8192/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.210712  [ 8224/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.254201  [ 8256/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.315851  [ 8288/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.360827  [ 8320/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.287297  [ 8352/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.200231  [ 8384/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.230466  [ 8416/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.238478  [ 8448/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.292137  [ 8480/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.328957  [ 8512/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.313162  [ 8544/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.250183  [ 8576/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.432714  [ 8608/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.355828  [ 8640/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.653207  [ 8672/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.262356  [ 8704/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.278370  [ 8736/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.274638  [ 8768/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.173467  [ 8800/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.243358  [ 8832/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.202503  [ 8864/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.254270  [ 8896/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.281973  [ 8928/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.238935  [ 8960/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.348737  [ 8992/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.455748  [ 9024/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.368188  [ 9056/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.336085  [ 9088/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.290611  [ 9120/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.252552  [ 9152/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.308165  [ 9184/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.341043  [ 9216/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.232957  [ 9248/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.284980  [ 9280/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.367221  [ 9312/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.289757  [ 9344/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.338949  [ 9376/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.315924  [ 9408/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.391519  [ 9440/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.309442  [ 9472/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.324944  [ 9504/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.286630  [ 9536/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.207541  [ 9568/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.268363  [ 9600/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.237888  [ 9632/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.240158  [ 9664/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.338208  [ 9696/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.256907  [ 9728/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.283721  [ 9760/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.326248  [ 9792/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.252465  [ 9824/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.328722  [ 9856/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.347461  [ 9888/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.295555  [ 9920/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.487708  [ 9952/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.450508  [ 9984/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.265394  [10016/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.254454  [10048/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.371183  [10080/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.280824  [10112/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.300214  [10144/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.342829  [10176/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.337153  [10208/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.220623  [10240/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.226935  [10272/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.347403  [10304/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.383249  [10336/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.267275  [10368/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.252889  [10400/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.494623  [10432/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.356170  [10464/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.397078  [10496/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.333082  [10528/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.378337  [10560/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.306573  [10592/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.376200  [10624/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.292871  [10656/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.368518  [10688/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.264547  [10720/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.412769  [10752/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.237073  [10784/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.310263  [10816/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.301891  [10848/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.250352  [10880/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.236736  [10912/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.334746  [10944/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.201470  [10976/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.308255  [11008/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.233776  [11040/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.372350  [11072/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.403429  [11104/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.382165  [11136/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.217276  [11168/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.340724  [11200/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.354264  [11232/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.403458  [11264/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.269134  [11296/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.327926  [11328/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.356637  [11360/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.204031  [11392/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.292600  [11424/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.421179  [11456/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.432080  [11488/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.361935  [11520/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.373924  [11552/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.382705  [11584/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.304896  [11616/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.293582  [11648/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.286836  [11680/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.361164  [11712/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.256195  [11744/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.324247  [11776/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.270204  [11808/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.447708  [11840/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.291288  [11872/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.268740  [11904/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.327126  [11936/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.242892  [11968/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.294443  [12000/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.240311  [12032/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.355534  [12064/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.402110  [12096/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.338846  [12128/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.250733  [12160/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.387843  [12192/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.324227  [12224/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.307363  [12256/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.273750  [12288/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.331122  [12320/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.382723  [12352/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.276902  [12384/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.274959  [12416/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.275893  [12448/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.307337  [12480/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.455947  [12512/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.298646  [12544/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.405907  [12576/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.222697  [12608/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.285629  [12640/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.274512  [12672/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.280437  [12704/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.419032  [12736/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.402568  [12768/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.302870  [12800/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.349057  [12832/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.253546  [12864/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.316808  [12896/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.211420  [12928/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.244912  [12960/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.340049  [12992/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.200072  [13024/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.365378  [13056/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.300885  [13088/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.286990  [13120/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.302635  [13152/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.280980  [13184/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.193788  [13216/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.343860  [13248/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.250760  [13280/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.295179  [13312/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.294683  [13344/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.195121  [13376/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.298824  [13408/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.256920  [13440/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.398392  [13472/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.232464  [13504/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.358769  [13536/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.252718  [13568/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.249935  [13600/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.444748  [13632/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.210771  [13664/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.333124  [13696/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.241222  [13728/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.328214  [13760/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.360327  [13792/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.276640  [13824/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.259483  [13856/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.364306  [13888/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.257225  [13920/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.228175  [13952/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.327080  [13984/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.319745  [14016/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.254768  [14048/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.225923  [14080/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.270361  [14112/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.287317  [14144/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.268560  [14176/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.268844  [14208/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.247540  [14240/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.262881  [14272/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.345443  [14304/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.292779  [14336/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.302470  [14368/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.278669  [14400/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.164424  [14432/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.213713  [14464/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.264976  [14496/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.290851  [14528/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.215406  [14560/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.242225  [14592/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.239785  [14624/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.313312  [14656/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.406784  [14688/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.219770  [14720/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.394031  [14752/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.307792  [14784/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.245003  [14816/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.332450  [14848/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.296359  [14880/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.258599  [14912/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.396527  [14944/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.278687  [14976/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.262632  [15008/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.325456  [15040/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.382625  [15072/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.197688  [15104/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.288563  [15136/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.282949  [15168/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.215819  [15200/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.239022  [15232/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.302304  [15264/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.270064  [15296/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.231045  [15328/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.253516  [15360/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.254414  [15392/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.259087  [15424/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.267647  [15456/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.337119  [15488/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.204591  [15520/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.302182  [15552/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.409223  [15584/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.292441  [15616/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.296205  [15648/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.424672  [15680/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.328123  [15712/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.344667  [15744/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.261303  [15776/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.246445  [15808/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.356770  [15840/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.308815  [15872/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.253945  [15904/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.283525  [15936/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.290659  [15968/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.340692  [16000/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.245996  [16032/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.447661  [16064/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.276570  [16096/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.267608  [16128/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.452782  [16160/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.250493  [16192/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.361832  [16224/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.341269  [16256/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.175367  [16288/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.345972  [16320/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.379525  [16352/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.325949  [16384/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.347248  [16416/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.233543  [16448/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.254702  [16480/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.228529  [16512/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.375041  [16544/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.226247  [16576/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.274755  [16608/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.293790  [16640/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.363546  [16672/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.200898  [16704/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.297025  [16736/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.288971  [16768/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.257371  [16800/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.266654  [16832/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.250066  [16864/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.294934  [16896/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.217626  [16928/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.234650  [16960/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.255740  [16992/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.190071  [17024/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.203783  [17056/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.299582  [17088/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.341935  [17120/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.265988  [17152/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.208472  [17184/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.307397  [17216/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.307391  [17248/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.302836  [17280/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.284320  [17312/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.298571  [17344/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.301814  [17376/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.360180  [17408/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.353715  [17440/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.241761  [17472/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.388926  [17504/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.292070  [17536/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.355464  [17568/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.277623  [17600/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.307966  [17632/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.300545  [17664/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.295483  [17696/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.207195  [17728/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.342486  [17760/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.254482  [17792/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.338374  [17824/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.321208  [17856/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.403231  [17888/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.318407  [17920/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.183099  [17952/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.332952  [17984/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.247948  [18016/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.272869  [18048/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.271882  [18080/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.195860  [18112/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.376769  [18144/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.256583  [18176/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.315612  [18208/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.277402  [18240/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.313100  [18272/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.245571  [18304/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.244779  [18336/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.335378  [18368/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.266667  [18400/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.289508  [18432/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.206267  [18464/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.283084  [18496/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.345027  [18528/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.236577  [18560/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.354842  [18592/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.330507  [18624/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.250222  [18656/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.247999  [18688/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.231640  [18720/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.237408  [18752/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.203409  [18784/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.241765  [18816/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.358400  [18848/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.252885  [18880/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.254202  [18912/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.258679  [18944/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.236516  [18976/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.354075  [19008/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.200970  [19040/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.227542  [19072/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.363360  [19104/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.281444  [19136/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.208310  [19168/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.262927  [19200/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.306742  [19232/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.166761  [19264/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.336331  [19296/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.309400  [19328/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.231713  [19360/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.379656  [19392/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.201803  [19424/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.246939  [19456/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.228569  [19488/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.330752  [19520/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.338145  [19552/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.363791  [19584/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.383342  [19616/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.262816  [19648/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.308388  [19680/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.306549  [19712/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.217581  [19744/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.433382  [19776/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.269089  [19808/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.223126  [19840/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.309200  [19872/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.356300  [19904/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.235808  [19936/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.233596  [19968/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.263861  [20000/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.235534  [20032/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.316401  [20064/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.340608  [20096/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.331362  [20128/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.253219  [20160/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.108887  [20192/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.269381  [20224/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.293828  [20256/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.216889  [20288/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.347065  [20320/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.207600  [20352/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.343189  [20384/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.222312  [20416/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.253830  [20448/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.182829  [20480/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.294978  [20512/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.247168  [20544/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.193570  [20576/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.336162  [20608/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.323476  [20640/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.261333  [20672/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.174719  [20704/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.301761  [20736/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.180892  [20768/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.282023  [20800/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.410580  [20832/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.252878  [20864/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.191051  [20896/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.248791  [20928/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.211562  [20960/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.228746  [20992/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.223679  [21024/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.350975  [21056/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.200432  [21088/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.317935  [21120/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.242440  [21152/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.357959  [21184/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.197261  [21216/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.349344  [21248/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.250858  [21280/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.265313  [21312/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.387489  [21344/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.370432  [21376/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.465018  [21408/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.347911  [21440/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.246725  [21472/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.255353  [21504/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.315152  [21536/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.280347  [21568/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.232140  [21600/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.250781  [21632/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.226980  [21664/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.273166  [21696/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.207491  [21728/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.244837  [21760/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.213336  [21792/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.279910  [21824/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.230611  [21856/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.197361  [21888/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.232272  [21920/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.146780  [21952/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.185711  [21984/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.235739  [22016/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.286285  [22048/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.245558  [22080/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.273690  [22112/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.223294  [22144/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.288927  [22176/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.256182  [22208/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.256402  [22240/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.277075  [22272/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.329837  [22304/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.297052  [22336/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.242542  [22368/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.179541  [22400/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.202728  [22432/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.169913  [22464/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.265438  [22496/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.273574  [22528/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.239752  [22560/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.268433  [22592/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.283642  [22624/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.262078  [22656/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.264919  [22688/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.239864  [22720/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.212984  [22752/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.342134  [22784/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.265762  [22816/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.268547  [22848/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.131165  [22880/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.234624  [22912/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.215642  [22944/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.214907  [22976/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.212157  [23008/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.379055  [23040/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.198605  [23072/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.373102  [23104/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.226813  [23136/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.342227  [23168/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.162817  [23200/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.285306  [23232/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.274991  [23264/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.355267  [23296/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.234675  [23328/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.208324  [23360/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.263041  [23392/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.255555  [23424/24872]:   0%|          | 0/777 [00:30<?, ?it/s]
loss: 0.255555  [23424/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.350746  [23456/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.275276  [23488/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.295389  [23520/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.247013  [23552/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.278580  [23584/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.176218  [23616/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.325020  [23648/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.237580  [23680/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.265217  [23712/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.283457  [23744/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.198589  [23776/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.296756  [23808/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.199334  [23840/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.290444  [23872/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.227193  [23904/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.401452  [23936/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.226327  [23968/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.192956  [24000/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.206366  [24032/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.278068  [24064/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.205089  [24096/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.355511  [24128/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.275013  [24160/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.268794  [24192/24872]:  94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.260093  [24224/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.192018  [24256/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.221487  [24288/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.305547  [24320/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.263421  [24352/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.169625  [24384/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.373240  [24416/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.220283  [24448/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.268430  [24480/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.277551  [24512/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.216989  [24544/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.218353  [24576/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.221749  [24608/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.338623  [24640/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.194554  [24672/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.208256  [24704/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.293702  [24736/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.249164  [24768/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.220105  [24800/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.258191  [24832/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.187857  [24864/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.242622  [24872/24872]:  94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.242622  [24872/24872]: : 778it [00:31, 24.41it/s]
Epoch 3, time=63.50s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.219644  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.328128  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.227656  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.285419  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.284449  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.252201  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.322977  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.249440  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.341812  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.241536  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.368137  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.299879  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.190878  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.217680  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.253465  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.338251  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.219741  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.245202  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.312780  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.287664  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.302664  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.224215  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.198540  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.189870  [  768/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.259084  [  800/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.241543  [  832/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.430814  [  864/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.195703  [  896/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.333608  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.283829  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.193870  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.296555  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.212226  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.219256  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.218557  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.209060  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.307712  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.185443  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.260597  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.133019  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.320840  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.236119  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.193061  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.278651  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.182960  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.282757  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.226524  [ 1504/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.266534  [ 1536/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.253580  [ 1568/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.209213  [ 1600/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.287619  [ 1632/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.179154  [ 1664/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.193298  [ 1696/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.223150  [ 1728/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.329959  [ 1760/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.286555  [ 1792/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.345604  [ 1824/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.207188  [ 1856/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.210749  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.220632  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.253038  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.209667  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.167523  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.262714  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.272972  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.347449  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.235044  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.250170  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.333258  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.188699  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.387687  [ 2272/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.201442  [ 2304/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.198009  [ 2336/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.335516  [ 2368/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.227308  [ 2400/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.309478  [ 2432/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.270351  [ 2464/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.441436  [ 2496/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.253114  [ 2528/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.264736  [ 2560/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.245393  [ 2592/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.170810  [ 2624/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.167386  [ 2656/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.291336  [ 2688/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.279992  [ 2720/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.251140  [ 2752/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.270387  [ 2784/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.263132  [ 2816/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.237214  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.220850  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.220749  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.296729  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.236371  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.148607  [ 3008/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.356595  [ 3040/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.244326  [ 3072/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.257911  [ 3104/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.219209  [ 3136/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.294501  [ 3168/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.338753  [ 3200/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.190482  [ 3232/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.277905  [ 3264/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.279705  [ 3296/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.224811  [ 3328/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.202864  [ 3360/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.187796  [ 3392/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.175602  [ 3424/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.334948  [ 3456/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.263179  [ 3488/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.402449  [ 3520/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.327533  [ 3552/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.279472  [ 3584/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.210577  [ 3616/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.301551  [ 3648/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.252880  [ 3680/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.204654  [ 3712/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.220518  [ 3744/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.223525  [ 3776/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.317242  [ 3808/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.169324  [ 3840/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.207955  [ 3872/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.253972  [ 3904/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.322258  [ 3936/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.220815  [ 3968/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.302460  [ 4000/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.237450  [ 4032/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.394116  [ 4064/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.243188  [ 4096/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.212593  [ 4128/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.226118  [ 4160/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.263908  [ 4192/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.306094  [ 4224/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.333919  [ 4256/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.421927  [ 4288/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.146974  [ 4320/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.250101  [ 4352/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.283240  [ 4384/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.259443  [ 4416/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.241671  [ 4448/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.215254  [ 4480/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.191482  [ 4512/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.233835  [ 4544/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.185185  [ 4576/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.299944  [ 4608/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.347302  [ 4640/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.236797  [ 4672/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.337775  [ 4704/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.229700  [ 4736/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.238750  [ 4768/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.260113  [ 4800/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.298459  [ 4832/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.266374  [ 4864/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.286883  [ 4896/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.273307  [ 4928/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.199436  [ 4960/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.154560  [ 4992/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.194934  [ 5024/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.210042  [ 5056/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.263428  [ 5088/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.219958  [ 5120/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.271480  [ 5152/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.304205  [ 5184/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.304212  [ 5216/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.255634  [ 5248/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.495393  [ 5280/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.258433  [ 5312/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.263927  [ 5344/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.250971  [ 5376/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.199691  [ 5408/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.187781  [ 5440/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.247665  [ 5472/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.267799  [ 5504/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.224625  [ 5536/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.277905  [ 5568/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.270329  [ 5600/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.196176  [ 5632/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.243441  [ 5664/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.308874  [ 5696/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.280994  [ 5728/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.172752  [ 5760/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.184301  [ 5792/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.257909  [ 5824/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.407176  [ 5856/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.192155  [ 5888/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.272464  [ 5920/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.257723  [ 5952/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.193394  [ 5984/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.226249  [ 6016/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.205866  [ 6048/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.310797  [ 6080/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.253548  [ 6112/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.196611  [ 6144/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.352438  [ 6176/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.203755  [ 6208/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.211782  [ 6240/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.209322  [ 6272/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.201518  [ 6304/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.320265  [ 6336/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.203654  [ 6368/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.261709  [ 6400/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.245552  [ 6432/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.220605  [ 6464/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.224710  [ 6496/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.176409  [ 6528/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.349678  [ 6560/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.238305  [ 6592/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.260202  [ 6624/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.236581  [ 6656/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.239229  [ 6688/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.267024  [ 6720/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.198179  [ 6752/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.254251  [ 6784/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.304834  [ 6816/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.271144  [ 6848/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.188422  [ 6880/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.296805  [ 6912/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.220334  [ 6944/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.287911  [ 6976/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.193025  [ 7008/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.213215  [ 7040/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.258705  [ 7072/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.216135  [ 7104/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.170216  [ 7136/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.297482  [ 7168/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.193571  [ 7200/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.275106  [ 7232/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.328217  [ 7264/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.280563  [ 7296/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.295487  [ 7328/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.192555  [ 7360/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.232840  [ 7392/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.276891  [ 7424/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.233945  [ 7456/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.351537  [ 7488/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.223204  [ 7520/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.229558  [ 7552/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.173370  [ 7584/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.168646  [ 7616/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.294313  [ 7648/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.259510  [ 7680/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.335880  [ 7712/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.374870  [ 7744/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.296272  [ 7776/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.300534  [ 7808/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.356469  [ 7840/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.264221  [ 7872/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.206863  [ 7904/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.202252  [ 7936/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.236768  [ 7968/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.183628  [ 8000/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.262167  [ 8032/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.281463  [ 8064/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.201663  [ 8096/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.304637  [ 8128/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.331779  [ 8160/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.181127  [ 8192/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.167595  [ 8224/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.210007  [ 8256/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.252889  [ 8288/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.318501  [ 8320/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.229340  [ 8352/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.158894  [ 8384/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.191389  [ 8416/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.185720  [ 8448/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.262757  [ 8480/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.269285  [ 8512/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.249676  [ 8544/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.199533  [ 8576/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.381033  [ 8608/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.291238  [ 8640/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.556406  [ 8672/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.225179  [ 8704/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.232200  [ 8736/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.233786  [ 8768/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.149119  [ 8800/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.192060  [ 8832/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.157043  [ 8864/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.204735  [ 8896/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.221979  [ 8928/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.190035  [ 8960/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.267714  [ 8992/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.340151  [ 9024/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.260246  [ 9056/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.227412  [ 9088/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.230493  [ 9120/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.189893  [ 9152/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.218124  [ 9184/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.227727  [ 9216/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.177840  [ 9248/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.182214  [ 9280/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.251234  [ 9312/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.242455  [ 9344/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.269437  [ 9376/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.234500  [ 9408/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.319231  [ 9440/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.236138  [ 9472/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.221543  [ 9504/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.225747  [ 9536/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.167077  [ 9568/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.208223  [ 9600/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.153385  [ 9632/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.187888  [ 9664/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.263009  [ 9696/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.227978  [ 9728/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.217582  [ 9760/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.278217  [ 9792/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.212465  [ 9824/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.252160  [ 9856/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.294494  [ 9888/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.231133  [ 9920/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.398147  [ 9952/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.298098  [ 9984/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.194446  [10016/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.251984  [10048/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.305234  [10080/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.240804  [10112/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.264420  [10144/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.277710  [10176/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.302518  [10208/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.200164  [10240/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.174641  [10272/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.266947  [10304/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.285834  [10336/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.183706  [10368/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.184066  [10400/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.407249  [10432/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.291107  [10464/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.315510  [10496/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.274405  [10528/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.353530  [10560/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.268593  [10592/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.209162  [10624/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.222513  [10656/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.282850  [10688/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.201958  [10720/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.329189  [10752/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.184871  [10784/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.234980  [10816/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.262435  [10848/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.210693  [10880/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.212486  [10912/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.286202  [10944/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.172759  [10976/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.247454  [11008/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.205124  [11040/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.313775  [11072/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.349360  [11104/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.318243  [11136/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.173956  [11168/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.296160  [11200/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.280686  [11232/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.293035  [11264/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.207029  [11296/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.221651  [11328/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.263427  [11360/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.167695  [11392/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.236517  [11424/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.308934  [11456/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.309403  [11488/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.329610  [11520/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.299427  [11552/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.283955  [11584/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.240507  [11616/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.250928  [11648/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.233832  [11680/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.301147  [11712/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.206996  [11744/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.266721  [11776/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.223287  [11808/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.323111  [11840/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.237018  [11872/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.225063  [11904/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.285147  [11936/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.197901  [11968/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.227594  [12000/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.183639  [12032/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.306658  [12064/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.340780  [12096/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.279408  [12128/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.241289  [12160/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.324841  [12192/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.274397  [12224/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.252631  [12256/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.227473  [12288/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.287165  [12320/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.349448  [12352/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.235095  [12384/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.273399  [12416/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.275800  [12448/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.242092  [12480/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.421312  [12512/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.295039  [12544/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.329299  [12576/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.186200  [12608/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.240408  [12640/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.232166  [12672/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.237033  [12704/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.393726  [12736/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.351635  [12768/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.275019  [12800/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.307211  [12832/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.199403  [12864/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.266263  [12896/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.184867  [12928/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.239427  [12960/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.240185  [12992/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.187640  [13024/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.307710  [13056/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.257461  [13088/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.263651  [13120/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.229820  [13152/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.241979  [13184/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.164302  [13216/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.288061  [13248/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.225038  [13280/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.268909  [13312/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.245732  [13344/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.141810  [13376/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.277796  [13408/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.213535  [13440/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.347491  [13472/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.203940  [13504/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.307947  [13536/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.200396  [13568/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.217631  [13600/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.388996  [13632/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.175892  [13664/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.259899  [13696/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.215903  [13728/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.298325  [13760/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.325096  [13792/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.248282  [13824/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.213705  [13856/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.317264  [13888/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.226506  [13920/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.195550  [13952/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.290788  [13984/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.291869  [14016/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.234551  [14048/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.209613  [14080/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.247544  [14112/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.268358  [14144/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.220083  [14176/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.219826  [14208/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.198515  [14240/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.238050  [14272/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.305885  [14304/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.243456  [14336/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.262523  [14368/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.247951  [14400/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.142300  [14432/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.194179  [14464/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.217768  [14496/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.249563  [14528/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.190905  [14560/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.215144  [14592/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.206832  [14624/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.263767  [14656/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.346308  [14688/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.181433  [14720/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.342634  [14752/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.273505  [14784/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.226524  [14816/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.278530  [14848/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.264909  [14880/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.249661  [14912/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.319869  [14944/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.250757  [14976/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.292042  [15008/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.258884  [15040/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.294429  [15072/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.206702  [15104/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.260134  [15136/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.212335  [15168/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.206722  [15200/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.232551  [15232/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.286552  [15264/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.240452  [15296/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.249125  [15328/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.239717  [15360/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.218056  [15392/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.249263  [15424/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.255267  [15456/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.287795  [15488/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.180289  [15520/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.255527  [15552/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.350789  [15584/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.248626  [15616/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.274726  [15648/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.402955  [15680/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.272388  [15712/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.302713  [15744/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.223652  [15776/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.228942  [15808/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.302137  [15840/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.277698  [15872/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.199613  [15904/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.239813  [15936/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.228075  [15968/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.310361  [16000/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.232915  [16032/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.353055  [16064/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.247902  [16096/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.216836  [16128/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.341128  [16160/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.223126  [16192/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.309240  [16224/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.271195  [16256/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.144442  [16288/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.306802  [16320/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.323198  [16352/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.265516  [16384/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.344272  [16416/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.208250  [16448/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.182723  [16480/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.180815  [16512/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.334779  [16544/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.169427  [16576/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.213519  [16608/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.273321  [16640/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.330060  [16672/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.153784  [16704/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.251251  [16736/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.257673  [16768/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.205314  [16800/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.267310  [16832/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.223395  [16864/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.240747  [16896/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.177008  [16928/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.206198  [16960/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.225041  [16992/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.165508  [17024/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.158881  [17056/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.249028  [17088/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.301023  [17120/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.210799  [17152/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.165595  [17184/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.252411  [17216/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.216263  [17248/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.253395  [17280/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.221270  [17312/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.250899  [17344/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.264430  [17376/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.309987  [17408/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.269731  [17440/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.189261  [17472/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.352788  [17504/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.270197  [17536/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.289355  [17568/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.263497  [17600/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.265845  [17632/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.257111  [17664/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.267664  [17696/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.175832  [17728/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.321690  [17760/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.221381  [17792/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.269859  [17824/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.278091  [17856/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.360298  [17888/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.266004  [17920/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.164916  [17952/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.294573  [17984/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.212724  [18016/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.252062  [18048/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.235575  [18080/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.164370  [18112/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.357549  [18144/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.236984  [18176/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.294025  [18208/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.240797  [18240/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.286620  [18272/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.224286  [18304/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.205093  [18336/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.252879  [18368/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.257170  [18400/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.305663  [18432/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.161855  [18464/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.259745  [18496/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.329260  [18528/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.180992  [18560/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.337421  [18592/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.315489  [18624/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.222501  [18656/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.182215  [18688/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.212677  [18720/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.193676  [18752/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.192810  [18784/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.181897  [18816/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.278642  [18848/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.217405  [18880/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.228425  [18912/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.187452  [18944/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.216195  [18976/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.313190  [19008/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.172105  [19040/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.190405  [19072/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.290407  [19104/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.248300  [19136/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.188988  [19168/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.212936  [19200/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.286495  [19232/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.150107  [19264/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.331972  [19296/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.232368  [19328/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.172374  [19360/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.355163  [19392/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.167607  [19424/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.183842  [19456/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.184638  [19488/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.290336  [19520/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.306515  [19552/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.306828  [19584/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.360615  [19616/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.220905  [19648/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.303081  [19680/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.263791  [19712/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.189543  [19744/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.356794  [19776/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.232681  [19808/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.181840  [19840/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.291256  [19872/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.273473  [19904/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.193189  [19936/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.203109  [19968/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.226796  [20000/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.200095  [20032/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.261006  [20064/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.286587  [20096/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.292828  [20128/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.220321  [20160/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.109919  [20192/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.260754  [20224/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.251850  [20256/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.181957  [20288/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.308342  [20320/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.187274  [20352/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.298057  [20384/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.194629  [20416/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.214207  [20448/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.155369  [20480/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.230134  [20512/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.219616  [20544/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.158264  [20576/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.297839  [20608/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.292524  [20640/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.217662  [20672/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.155571  [20704/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.294752  [20736/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.161366  [20768/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.278711  [20800/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.378722  [20832/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.228468  [20864/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.144932  [20896/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.210516  [20928/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.185379  [20960/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.206058  [20992/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.187220  [21024/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.299715  [21056/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.189724  [21088/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.298247  [21120/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.215558  [21152/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.297780  [21184/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.184660  [21216/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.301554  [21248/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.204830  [21280/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.230119  [21312/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.316877  [21344/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.342335  [21376/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.410574  [21408/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.299405  [21440/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.232090  [21472/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.221928  [21504/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.273348  [21536/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.259750  [21568/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.215221  [21600/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.232325  [21632/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.200895  [21664/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.243785  [21696/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.186628  [21728/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.225161  [21760/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.215951  [21792/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.245577  [21824/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.203683  [21856/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.191279  [21888/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.212547  [21920/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.134899  [21952/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.175179  [21984/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.222288  [22016/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.270764  [22048/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.232338  [22080/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.241867  [22112/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.180241  [22144/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.294908  [22176/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.216660  [22208/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.231606  [22240/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.245150  [22272/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.295014  [22304/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.267560  [22336/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.210962  [22368/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.148716  [22400/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.181478  [22432/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.146233  [22464/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.234669  [22496/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.219350  [22528/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.207407  [22560/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.228558  [22592/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.227355  [22624/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.225238  [22656/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.218112  [22688/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.218202  [22720/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.177299  [22752/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.293570  [22784/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.237814  [22816/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.246724  [22848/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.108730  [22880/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.188866  [22912/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.176134  [22944/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.192262  [22976/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.192921  [23008/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.322374  [23040/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.174806  [23072/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.319148  [23104/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.197924  [23136/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.325776  [23168/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.142494  [23200/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.260466  [23232/24872]:   0%|          | 0/777 [00:30<?, ?it/s]
loss: 0.260466  [23232/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.244684  [23264/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.342234  [23296/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.212300  [23328/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.181741  [23360/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.230813  [23392/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.236332  [23424/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.329861  [23456/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.241722  [23488/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.276854  [23520/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.217380  [23552/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.245094  [23584/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.156858  [23616/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.316753  [23648/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.214877  [23680/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.249560  [23712/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.249726  [23744/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.173725  [23776/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.278541  [23808/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.195228  [23840/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.249049  [23872/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.206104  [23904/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.363155  [23936/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.188020  [23968/24872]:  93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.174132  [24000/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.172741  [24032/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.256323  [24064/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.166860  [24096/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.320422  [24128/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.242151  [24160/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.226131  [24192/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.234046  [24224/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.168483  [24256/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.179039  [24288/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.278832  [24320/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.248030  [24352/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.154076  [24384/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.341630  [24416/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.198793  [24448/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.254220  [24480/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.240209  [24512/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.208710  [24544/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.198859  [24576/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.199115  [24608/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.303159  [24640/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.171682  [24672/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.189002  [24704/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.275190  [24736/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.234884  [24768/24872]:  93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.189010  [24800/24872]:  93%|█████████▎| 726/777 [00:32<00:02, 24.18it/s]
loss: 0.254258  [24832/24872]:  93%|█████████▎| 726/777 [00:32<00:02, 24.18it/s]
loss: 0.170808  [24864/24872]:  93%|█████████▎| 726/777 [00:32<00:02, 24.18it/s]
loss: 0.207310  [24872/24872]:  93%|█████████▎| 726/777 [00:32<00:02, 24.18it/s]
loss: 0.207310  [24872/24872]: : 778it [00:32, 24.22it/s]
Epoch 4, time=95.63s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.190080  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.301387  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.203102  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.254241  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.244961  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.249300  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.276830  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.199123  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.341630  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.210239  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.316476  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.282204  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.165451  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.212533  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.233706  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.318023  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.218333  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.217420  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.296859  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.252815  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.280344  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.197031  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.187240  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.172691  [  768/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.228611  [  800/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.227169  [  832/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.392719  [  864/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.184004  [  896/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.331351  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.266399  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.182308  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.276334  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.195540  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.198070  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.217743  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.186887  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.270635  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.176710  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.225471  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.129584  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.303552  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.256096  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.182209  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.261631  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.180003  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.254820  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.214903  [ 1504/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.250638  [ 1536/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.229622  [ 1568/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.194191  [ 1600/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.269187  [ 1632/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.163699  [ 1664/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.176856  [ 1696/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.220197  [ 1728/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.295724  [ 1760/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.248367  [ 1792/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.304797  [ 1824/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.176635  [ 1856/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.182034  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.201943  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.214503  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.170670  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.149021  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.246953  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.244774  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.297486  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.225273  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.229919  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.275570  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.179063  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.325462  [ 2272/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.169168  [ 2304/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.175192  [ 2336/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.314853  [ 2368/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.206381  [ 2400/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.276510  [ 2432/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.238534  [ 2464/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.398650  [ 2496/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.204620  [ 2528/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.249807  [ 2560/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.218929  [ 2592/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.158823  [ 2624/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.139573  [ 2656/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.254816  [ 2688/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.259329  [ 2720/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.189586  [ 2752/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.232047  [ 2784/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.242140  [ 2816/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.205357  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.183056  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.192042  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.255571  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.217249  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.137355  [ 3008/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.313204  [ 3040/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.225783  [ 3072/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.219741  [ 3104/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.189852  [ 3136/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.231407  [ 3168/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.287167  [ 3200/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.158671  [ 3232/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.239089  [ 3264/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.237780  [ 3296/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.191808  [ 3328/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.186321  [ 3360/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.159179  [ 3392/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.153776  [ 3424/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.285393  [ 3456/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.230827  [ 3488/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.347835  [ 3520/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.284850  [ 3552/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.237400  [ 3584/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.186115  [ 3616/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.263193  [ 3648/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.236696  [ 3680/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.159856  [ 3712/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.194236  [ 3744/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.207028  [ 3776/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.285995  [ 3808/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.165248  [ 3840/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.184529  [ 3872/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.230760  [ 3904/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.290154  [ 3936/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.214472  [ 3968/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.266113  [ 4000/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.188519  [ 4032/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.378324  [ 4064/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.225192  [ 4096/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.194578  [ 4128/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.189853  [ 4160/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.243498  [ 4192/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.293876  [ 4224/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.327112  [ 4256/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.323878  [ 4288/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.129700  [ 4320/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.220681  [ 4352/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.208879  [ 4384/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.289250  [ 4416/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.210308  [ 4448/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.191352  [ 4480/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.173851  [ 4512/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.223070  [ 4544/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.166935  [ 4576/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.269083  [ 4608/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.296928  [ 4640/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.212171  [ 4672/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.315206  [ 4704/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.215578  [ 4736/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.228496  [ 4768/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.244535  [ 4800/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.269723  [ 4832/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.231058  [ 4864/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.293236  [ 4896/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.255197  [ 4928/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.176997  [ 4960/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.141809  [ 4992/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.183467  [ 5024/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.193743  [ 5056/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.251657  [ 5088/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.194235  [ 5120/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.253542  [ 5152/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.281459  [ 5184/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.274859  [ 5216/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.224625  [ 5248/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.427793  [ 5280/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.251655  [ 5312/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.245225  [ 5344/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.212337  [ 5376/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.196467  [ 5408/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.178912  [ 5440/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.214206  [ 5472/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.256455  [ 5504/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.217632  [ 5536/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.253575  [ 5568/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.236200  [ 5600/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.181710  [ 5632/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.222043  [ 5664/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.254605  [ 5696/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.259590  [ 5728/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.161534  [ 5760/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.178688  [ 5792/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.230491  [ 5824/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.390864  [ 5856/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.142612  [ 5888/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.271075  [ 5920/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.229739  [ 5952/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.169574  [ 5984/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.206146  [ 6016/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.186535  [ 6048/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.281382  [ 6080/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.235333  [ 6112/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.176501  [ 6144/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.317434  [ 6176/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.187748  [ 6208/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.172405  [ 6240/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.165760  [ 6272/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.176170  [ 6304/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.278212  [ 6336/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.203146  [ 6368/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.229675  [ 6400/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.210721  [ 6432/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.188858  [ 6464/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.213393  [ 6496/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.163210  [ 6528/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.286229  [ 6560/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.218088  [ 6592/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.235212  [ 6624/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.232540  [ 6656/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.225314  [ 6688/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.265417  [ 6720/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.178536  [ 6752/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.248203  [ 6784/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.291760  [ 6816/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.261929  [ 6848/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.164656  [ 6880/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.260793  [ 6912/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.200820  [ 6944/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.265668  [ 6976/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.170366  [ 7008/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.193448  [ 7040/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.235461  [ 7072/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.204775  [ 7104/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.148720  [ 7136/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.261378  [ 7168/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.162363  [ 7200/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.266121  [ 7232/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.322526  [ 7264/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.273058  [ 7296/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.255782  [ 7328/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.186913  [ 7360/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.231592  [ 7392/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.275175  [ 7424/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.198697  [ 7456/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.343224  [ 7488/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.203617  [ 7520/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.210044  [ 7552/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.159326  [ 7584/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.157360  [ 7616/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.267409  [ 7648/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.237860  [ 7680/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.310999  [ 7712/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.361820  [ 7744/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.282218  [ 7776/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.281281  [ 7808/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.328137  [ 7840/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.257787  [ 7872/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.197587  [ 7904/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.198933  [ 7936/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.221198  [ 7968/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.177459  [ 8000/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.227992  [ 8032/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.242623  [ 8064/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.177472  [ 8096/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.275805  [ 8128/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.281497  [ 8160/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.162189  [ 8192/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.151129  [ 8224/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.189163  [ 8256/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.219845  [ 8288/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.271524  [ 8320/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.214962  [ 8352/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.155640  [ 8384/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.191097  [ 8416/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.169606  [ 8448/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.246302  [ 8480/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.223645  [ 8512/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.267050  [ 8544/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.187675  [ 8576/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.363297  [ 8608/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.280201  [ 8640/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.501747  [ 8672/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.205733  [ 8704/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.232415  [ 8736/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.233803  [ 8768/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.151997  [ 8800/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.170933  [ 8832/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.144006  [ 8864/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.198224  [ 8896/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.194043  [ 8928/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.170959  [ 8960/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.236665  [ 8992/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.370803  [ 9024/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.230694  [ 9056/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.220040  [ 9088/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.173747  [ 9120/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.182107  [ 9152/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.207925  [ 9184/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.214337  [ 9216/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.171321  [ 9248/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.178680  [ 9280/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.244063  [ 9312/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.231880  [ 9344/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.270143  [ 9376/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.209298  [ 9408/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.291940  [ 9440/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.214666  [ 9472/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.205402  [ 9504/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.202189  [ 9536/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.169625  [ 9568/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.187721  [ 9600/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.170970  [ 9632/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.173284  [ 9664/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.242622  [ 9696/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.231012  [ 9728/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.207080  [ 9760/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.231562  [ 9792/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.183713  [ 9824/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.238219  [ 9856/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.279004  [ 9888/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.238795  [ 9920/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.363090  [ 9952/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.255188  [ 9984/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.173244  [10016/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.201303  [10048/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.296097  [10080/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.207584  [10112/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.244841  [10144/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.264408  [10176/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.275789  [10208/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.182544  [10240/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.164556  [10272/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.255078  [10304/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.255917  [10336/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.169508  [10368/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.163254  [10400/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.398363  [10432/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.283183  [10464/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.291335  [10496/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.251927  [10528/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.329747  [10560/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.252815  [10592/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.191730  [10624/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.207473  [10656/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.288728  [10688/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.193670  [10720/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.309780  [10752/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.158905  [10784/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.224689  [10816/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.241453  [10848/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.194509  [10880/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.184011  [10912/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.277185  [10944/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.151486  [10976/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.211477  [11008/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.166175  [11040/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.283933  [11072/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.327457  [11104/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.302701  [11136/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.163049  [11168/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.255621  [11200/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.264876  [11232/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.263796  [11264/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.196371  [11296/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.200901  [11328/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.238821  [11360/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.142725  [11392/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.208912  [11424/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.275796  [11456/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.289367  [11488/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.281339  [11520/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.274592  [11552/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.263906  [11584/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.221131  [11616/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.225812  [11648/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.188858  [11680/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.281414  [11712/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.184972  [11744/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.210486  [11776/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.195625  [11808/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.287675  [11840/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.237115  [11872/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.224644  [11904/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.277680  [11936/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.190389  [11968/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.202245  [12000/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.171066  [12032/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.288278  [12064/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.297484  [12096/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.274684  [12128/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.200135  [12160/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.282745  [12192/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.255370  [12224/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.216711  [12256/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.213316  [12288/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.254961  [12320/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.270241  [12352/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.225629  [12384/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.227501  [12416/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.240441  [12448/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.211346  [12480/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.342546  [12512/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.257347  [12544/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.294554  [12576/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.160704  [12608/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.210013  [12640/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.203554  [12672/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.207844  [12704/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.354136  [12736/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.317982  [12768/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.257058  [12800/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.287973  [12832/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.168374  [12864/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.240295  [12896/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.181685  [12928/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.170789  [12960/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.209566  [12992/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.178840  [13024/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.266345  [13056/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.252033  [13088/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.233276  [13120/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.205793  [13152/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.239803  [13184/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.151477  [13216/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.270320  [13248/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.216547  [13280/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.218766  [13312/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.218660  [13344/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.126222  [13376/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.257157  [13408/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.198463  [13440/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.312367  [13472/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.186784  [13504/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.260183  [13536/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.165805  [13568/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.203299  [13600/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.387337  [13632/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.160982  [13664/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.237319  [13696/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.216329  [13728/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.279432  [13760/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.305871  [13792/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.240264  [13824/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.198654  [13856/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.291549  [13888/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.198745  [13920/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.177232  [13952/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.247323  [13984/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.260933  [14016/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.237592  [14048/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.189026  [14080/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.205103  [14112/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.247486  [14144/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.198471  [14176/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.198809  [14208/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.185461  [14240/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.211802  [14272/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.270914  [14304/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.220554  [14336/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.258038  [14368/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.223366  [14400/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.137141  [14432/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.186535  [14464/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.187439  [14496/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.233578  [14528/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.204905  [14560/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.201238  [14592/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.182727  [14624/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.235013  [14656/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.326347  [14688/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.172750  [14720/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.301196  [14752/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.248235  [14784/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.202684  [14816/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.230764  [14848/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.258443  [14880/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.221862  [14912/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.251565  [14944/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.242550  [14976/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.232674  [15008/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.226030  [15040/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.265746  [15072/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.166094  [15104/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.231024  [15136/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.195077  [15168/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.183605  [15200/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.212789  [15232/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.222327  [15264/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.227931  [15296/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.215729  [15328/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.215260  [15360/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.202409  [15392/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.235710  [15424/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.226977  [15456/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.266925  [15488/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.168541  [15520/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.250570  [15552/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.314220  [15584/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.193345  [15616/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.262140  [15648/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.327166  [15680/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.219048  [15712/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.283041  [15744/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.189547  [15776/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.198086  [15808/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.300327  [15840/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.251962  [15872/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.215221  [15904/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.220968  [15936/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.227535  [15968/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.265413  [16000/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.194197  [16032/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.286229  [16064/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.224032  [16096/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.170210  [16128/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.328671  [16160/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.194582  [16192/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.273256  [16224/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.328911  [16256/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.132040  [16288/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.347475  [16320/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.297849  [16352/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.240490  [16384/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.288676  [16416/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.206121  [16448/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.179049  [16480/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.178115  [16512/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.300009  [16544/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.147072  [16576/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.233040  [16608/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.270829  [16640/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.297284  [16672/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.149285  [16704/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.279406  [16736/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.231918  [16768/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.182886  [16800/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.230514  [16832/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.205498  [16864/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.214849  [16896/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.155262  [16928/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.217948  [16960/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.202629  [16992/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.163073  [17024/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.145377  [17056/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.235567  [17088/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.276143  [17120/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.197458  [17152/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.162876  [17184/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.246031  [17216/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.213977  [17248/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.233668  [17280/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.203687  [17312/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.246341  [17344/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.244295  [17376/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.284871  [17408/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.234649  [17440/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.183281  [17472/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.311950  [17504/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.271464  [17536/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.292274  [17568/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.244865  [17600/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.265468  [17632/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.254332  [17664/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.305537  [17696/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.166632  [17728/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.307052  [17760/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.178404  [17792/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.247684  [17824/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.219778  [17856/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.344027  [17888/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.249295  [17920/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.180807  [17952/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.290438  [17984/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.199424  [18016/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.252731  [18048/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.228502  [18080/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.158037  [18112/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.292850  [18144/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.231292  [18176/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.276007  [18208/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.208405  [18240/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.241874  [18272/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.221780  [18304/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.181693  [18336/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.240556  [18368/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.214279  [18400/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.263452  [18432/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.141439  [18464/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.223954  [18496/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.302876  [18528/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.167155  [18560/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.304705  [18592/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.304343  [18624/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.204974  [18656/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.170875  [18688/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.216363  [18720/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.187717  [18752/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.179163  [18784/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.171353  [18816/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.275506  [18848/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.202587  [18880/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.223976  [18912/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.184109  [18944/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.181905  [18976/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.279718  [19008/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.159604  [19040/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.197042  [19072/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.252109  [19104/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.211105  [19136/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.186713  [19168/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.196711  [19200/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.243253  [19232/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.137044  [19264/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.283992  [19296/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.205381  [19328/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.153217  [19360/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.308686  [19392/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.153729  [19424/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.174174  [19456/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.159215  [19488/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.258382  [19520/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.288724  [19552/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.286249  [19584/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.333474  [19616/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.214481  [19648/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.273245  [19680/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.207961  [19712/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.175073  [19744/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.335151  [19776/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.211228  [19808/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.163130  [19840/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.291289  [19872/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.252679  [19904/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.185128  [19936/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.188494  [19968/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.181014  [20000/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.169496  [20032/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.243250  [20064/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.247319  [20096/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.272650  [20128/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.197908  [20160/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.099666  [20192/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.229242  [20224/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.211123  [20256/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.162186  [20288/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.302013  [20320/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.164569  [20352/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.265502  [20384/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.175000  [20416/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.182259  [20448/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.147319  [20480/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.200092  [20512/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.201428  [20544/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.151182  [20576/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.243694  [20608/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.249467  [20640/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.187040  [20672/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.132657  [20704/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.245599  [20736/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.150113  [20768/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.249957  [20800/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.319454  [20832/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.185782  [20864/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.132298  [20896/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.182195  [20928/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.160231  [20960/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.193834  [20992/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.169843  [21024/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.267396  [21056/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.198245  [21088/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.282943  [21120/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.200786  [21152/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.273568  [21184/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.174639  [21216/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.267693  [21248/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.187718  [21280/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.208589  [21312/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.284181  [21344/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.314038  [21376/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.397660  [21408/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.275944  [21440/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.222124  [21472/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.192027  [21504/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.272951  [21536/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.236063  [21568/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.192968  [21600/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.241313  [21632/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.186410  [21664/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.249590  [21696/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.223477  [21728/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.216919  [21760/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.199807  [21792/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.239471  [21824/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.204861  [21856/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.174544  [21888/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.209838  [21920/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.128947  [21952/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.166584  [21984/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.261183  [22016/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.268770  [22048/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.201490  [22080/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.219246  [22112/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.205388  [22144/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.311159  [22176/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.207269  [22208/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.256144  [22240/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.232049  [22272/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.291443  [22304/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.294765  [22336/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.231425  [22368/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.189118  [22400/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.190046  [22432/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.158145  [22464/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.241266  [22496/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.208627  [22528/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.202678  [22560/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.201407  [22592/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.223594  [22624/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.237786  [22656/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.202593  [22688/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.209869  [22720/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.169872  [22752/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.287376  [22784/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.229877  [22816/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.229068  [22848/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.104239  [22880/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.169200  [22912/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.158002  [22944/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.174990  [22976/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.179601  [23008/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.278446  [23040/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.162705  [23072/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.292155  [23104/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.187773  [23136/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.285316  [23168/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.138999  [23200/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.221685  [23232/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.208961  [23264/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.310506  [23296/24872]:   0%|          | 0/777 [00:30<?, ?it/s]
loss: 0.310506  [23296/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.188899  [23328/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.165502  [23360/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.210971  [23392/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.196140  [23424/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.287855  [23456/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.215361  [23488/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.239329  [23520/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.196779  [23552/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.237473  [23584/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.149353  [23616/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.282091  [23648/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.201593  [23680/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.224037  [23712/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.244668  [23744/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.174341  [23776/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.223451  [23808/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.160269  [23840/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.208183  [23872/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.195317  [23904/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.344156  [23936/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.175776  [23968/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.159474  [24000/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.144877  [24032/24872]:  94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.231458  [24064/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.153034  [24096/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.287441  [24128/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.243038  [24160/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.172962  [24192/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.211541  [24224/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.166176  [24256/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.164050  [24288/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.258868  [24320/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.240365  [24352/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.149504  [24384/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.317467  [24416/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.180037  [24448/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.244772  [24480/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.215650  [24512/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.212149  [24544/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.181679  [24576/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.174152  [24608/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.267714  [24640/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.160238  [24672/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.171379  [24704/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.263279  [24736/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.213443  [24768/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.174841  [24800/24872]:  94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.252641  [24832/24872]:  94%|█████████▎| 728/777 [00:32<00:02, 24.24it/s]
loss: 0.149507  [24864/24872]:  94%|█████████▎| 728/777 [00:32<00:02, 24.24it/s]
loss: 0.217583  [24872/24872]:  94%|█████████▎| 728/777 [00:32<00:02, 24.24it/s]
loss: 0.217583  [24872/24872]: : 778it [00:32, 24.24it/s]
Epoch 5, time=127.73s

  0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.184487  [   32/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.268028  [   64/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.184546  [   96/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.238333  [  128/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.238619  [  160/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.241502  [  192/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.248014  [  224/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.187908  [  256/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.308608  [  288/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.189654  [  320/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.299007  [  352/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.274413  [  384/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.155499  [  416/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.185493  [  448/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.227523  [  480/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.325771  [  512/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.210656  [  544/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.204029  [  576/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.278616  [  608/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.245539  [  640/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.265067  [  672/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.195817  [  704/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.171693  [  736/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.156800  [  768/24872]:   0%|          | 0/777 [00:00<?, ?it/s]
loss: 0.210632  [  800/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.223507  [  832/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.359056  [  864/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.171858  [  896/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.316232  [  928/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.250520  [  960/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.176117  [  992/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.279893  [ 1024/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.191590  [ 1056/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.185357  [ 1088/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.207776  [ 1120/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.180038  [ 1152/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.238349  [ 1184/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.169577  [ 1216/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.222956  [ 1248/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.139792  [ 1280/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.298308  [ 1312/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.257465  [ 1344/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.180492  [ 1376/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.247313  [ 1408/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.173595  [ 1440/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.226574  [ 1472/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.198266  [ 1504/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.222018  [ 1536/24872]:   0%|          | 0/777 [00:01<?, ?it/s]
loss: 0.213598  [ 1568/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.188487  [ 1600/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.254637  [ 1632/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.175812  [ 1664/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.162722  [ 1696/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.206478  [ 1728/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.277153  [ 1760/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.225374  [ 1792/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.277928  [ 1824/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.166010  [ 1856/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.163041  [ 1888/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.194185  [ 1920/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.185212  [ 1952/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.141050  [ 1984/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.140585  [ 2016/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.207821  [ 2048/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.223443  [ 2080/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.280901  [ 2112/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.199359  [ 2144/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.201745  [ 2176/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.242774  [ 2208/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.176942  [ 2240/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.301763  [ 2272/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.156402  [ 2304/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.156821  [ 2336/24872]:   0%|          | 0/777 [00:02<?, ?it/s]
loss: 0.295274  [ 2368/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.175190  [ 2400/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.266213  [ 2432/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.209021  [ 2464/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.386048  [ 2496/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.183170  [ 2528/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.226629  [ 2560/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.193772  [ 2592/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.158141  [ 2624/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.131498  [ 2656/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.232991  [ 2688/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.235987  [ 2720/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.192181  [ 2752/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.214179  [ 2784/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.219568  [ 2816/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.177956  [ 2848/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.154159  [ 2880/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.168615  [ 2912/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.218368  [ 2944/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.209492  [ 2976/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.124455  [ 3008/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.294902  [ 3040/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.205312  [ 3072/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.190666  [ 3104/24872]:   0%|          | 0/777 [00:03<?, ?it/s]
loss: 0.170737  [ 3136/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.216147  [ 3168/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.281551  [ 3200/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.139902  [ 3232/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.217451  [ 3264/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.217823  [ 3296/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.175555  [ 3328/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.167431  [ 3360/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.132396  [ 3392/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.145414  [ 3424/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.257855  [ 3456/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.220469  [ 3488/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.321512  [ 3520/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.257178  [ 3552/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.222912  [ 3584/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.168306  [ 3616/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.239199  [ 3648/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.210007  [ 3680/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.140636  [ 3712/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.182428  [ 3744/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.185261  [ 3776/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.233902  [ 3808/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.159175  [ 3840/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.187733  [ 3872/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.225390  [ 3904/24872]:   0%|          | 0/777 [00:04<?, ?it/s]
loss: 0.266089  [ 3936/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.194197  [ 3968/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.231465  [ 4000/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.185335  [ 4032/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.341022  [ 4064/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.165785  [ 4096/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.167842  [ 4128/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.176901  [ 4160/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.195296  [ 4192/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.270394  [ 4224/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.314879  [ 4256/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.237131  [ 4288/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.122665  [ 4320/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.189103  [ 4352/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.173810  [ 4384/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.223680  [ 4416/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.185419  [ 4448/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.177030  [ 4480/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.168867  [ 4512/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.202805  [ 4544/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.151421  [ 4576/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.249384  [ 4608/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.263369  [ 4640/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.190023  [ 4672/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.291236  [ 4704/24872]:   0%|          | 0/777 [00:05<?, ?it/s]
loss: 0.179663  [ 4736/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.206802  [ 4768/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.222480  [ 4800/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.233041  [ 4832/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.209469  [ 4864/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.276064  [ 4896/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.231135  [ 4928/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.153929  [ 4960/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.124786  [ 4992/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.160978  [ 5024/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.169353  [ 5056/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.216516  [ 5088/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.184550  [ 5120/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.221687  [ 5152/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.246030  [ 5184/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.245780  [ 5216/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.200677  [ 5248/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.394160  [ 5280/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.225466  [ 5312/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.226198  [ 5344/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.187757  [ 5376/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.170377  [ 5408/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.162241  [ 5440/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.184210  [ 5472/24872]:   0%|          | 0/777 [00:06<?, ?it/s]
loss: 0.249458  [ 5504/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.197046  [ 5536/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.240921  [ 5568/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.214630  [ 5600/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.171587  [ 5632/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.201181  [ 5664/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.219871  [ 5696/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.246956  [ 5728/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.162874  [ 5760/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.169593  [ 5792/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.198671  [ 5824/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.389442  [ 5856/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.127707  [ 5888/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.248250  [ 5920/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.214452  [ 5952/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.153689  [ 5984/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.189549  [ 6016/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.176826  [ 6048/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.260629  [ 6080/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.219094  [ 6112/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.160869  [ 6144/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.279108  [ 6176/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.160330  [ 6208/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.165577  [ 6240/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.158131  [ 6272/24872]:   0%|          | 0/777 [00:07<?, ?it/s]
loss: 0.170436  [ 6304/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.248961  [ 6336/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.206759  [ 6368/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.227307  [ 6400/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.193735  [ 6432/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.172946  [ 6464/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.198560  [ 6496/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.157590  [ 6528/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.275918  [ 6560/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.188258  [ 6592/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.213988  [ 6624/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.220092  [ 6656/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.180888  [ 6688/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.248899  [ 6720/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.176714  [ 6752/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.239144  [ 6784/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.273687  [ 6816/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.224503  [ 6848/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.143452  [ 6880/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.247562  [ 6912/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.193948  [ 6944/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.248761  [ 6976/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.151374  [ 7008/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.170274  [ 7040/24872]:   0%|          | 0/777 [00:08<?, ?it/s]
loss: 0.224763  [ 7072/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.201737  [ 7104/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.154792  [ 7136/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.244247  [ 7168/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.154688  [ 7200/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.226519  [ 7232/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.293908  [ 7264/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.264042  [ 7296/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.261079  [ 7328/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.184937  [ 7360/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.249922  [ 7392/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.233020  [ 7424/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.184837  [ 7456/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.301918  [ 7488/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.221730  [ 7520/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.190455  [ 7552/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.142915  [ 7584/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.152851  [ 7616/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.251529  [ 7648/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.216069  [ 7680/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.273279  [ 7712/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.321129  [ 7744/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.260381  [ 7776/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.272932  [ 7808/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.310966  [ 7840/24872]:   0%|          | 0/777 [00:09<?, ?it/s]
loss: 0.210424  [ 7872/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.208786  [ 7904/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.176979  [ 7936/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.189728  [ 7968/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.165676  [ 8000/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.221412  [ 8032/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.222315  [ 8064/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.162752  [ 8096/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.249454  [ 8128/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.280687  [ 8160/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.144309  [ 8192/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.139452  [ 8224/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.194847  [ 8256/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.208179  [ 8288/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.226479  [ 8320/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.203353  [ 8352/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.158960  [ 8384/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.188171  [ 8416/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.182359  [ 8448/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.218488  [ 8480/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.206799  [ 8512/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.258811  [ 8544/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.181351  [ 8576/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.329634  [ 8608/24872]:   0%|          | 0/777 [00:10<?, ?it/s]
loss: 0.253220  [ 8640/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.358961  [ 8672/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.178169  [ 8704/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.229813  [ 8736/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.249194  [ 8768/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.132241  [ 8800/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.162429  [ 8832/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.147840  [ 8864/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.194007  [ 8896/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.181347  [ 8928/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.161351  [ 8960/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.228689  [ 8992/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.319909  [ 9024/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.203834  [ 9056/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.201564  [ 9088/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.167393  [ 9120/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.155417  [ 9152/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.163907  [ 9184/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.177658  [ 9216/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.170634  [ 9248/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.148079  [ 9280/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.209062  [ 9312/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.216462  [ 9344/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.230856  [ 9376/24872]:   0%|          | 0/777 [00:11<?, ?it/s]
loss: 0.211876  [ 9408/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.300563  [ 9440/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.220088  [ 9472/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.183788  [ 9504/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.173661  [ 9536/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.172475  [ 9568/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.176068  [ 9600/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.163555  [ 9632/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.184684  [ 9664/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.253230  [ 9696/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.234412  [ 9728/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.269971  [ 9760/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.190410  [ 9792/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.183432  [ 9824/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.264421  [ 9856/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.299855  [ 9888/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.250576  [ 9920/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.326980  [ 9952/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.325761  [ 9984/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.173842  [10016/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.222986  [10048/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.289973  [10080/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.201933  [10112/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.267757  [10144/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.240842  [10176/24872]:   0%|          | 0/777 [00:12<?, ?it/s]
loss: 0.275648  [10208/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.170299  [10240/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.184602  [10272/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.255797  [10304/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.263590  [10336/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.181506  [10368/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.198269  [10400/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.394338  [10432/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.318124  [10464/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.360026  [10496/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.257232  [10528/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.402894  [10560/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.245394  [10592/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.184805  [10624/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.222991  [10656/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.325542  [10688/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.199724  [10720/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.351470  [10752/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.162471  [10784/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.278717  [10816/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.240311  [10848/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.218861  [10880/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.218323  [10912/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.345283  [10944/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.156457  [10976/24872]:   0%|          | 0/777 [00:13<?, ?it/s]
loss: 0.266801  [11008/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.191330  [11040/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.281041  [11072/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.307290  [11104/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.317760  [11136/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.179871  [11168/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.248855  [11200/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.288953  [11232/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.278400  [11264/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.199702  [11296/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.215189  [11328/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.250183  [11360/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.136757  [11392/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.195541  [11424/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.258006  [11456/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.307404  [11488/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.270499  [11520/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.267315  [11552/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.263414  [11584/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.202039  [11616/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.211774  [11648/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.174018  [11680/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.275765  [11712/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.171159  [11744/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.199638  [11776/24872]:   0%|          | 0/777 [00:14<?, ?it/s]
loss: 0.175453  [11808/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.281067  [11840/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.228057  [11872/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.205150  [11904/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.266104  [11936/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.175079  [11968/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.175769  [12000/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.158471  [12032/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.265265  [12064/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.276859  [12096/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.246964  [12128/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.175443  [12160/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.280843  [12192/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.226496  [12224/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.191900  [12256/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.187271  [12288/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.246101  [12320/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.239643  [12352/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.190593  [12384/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.188295  [12416/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.223619  [12448/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.197898  [12480/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.316466  [12512/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.236978  [12544/24872]:   0%|          | 0/777 [00:15<?, ?it/s]
loss: 0.277822  [12576/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.146687  [12608/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.187377  [12640/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.176804  [12672/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.188567  [12704/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.279751  [12736/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.320720  [12768/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.245011  [12800/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.260546  [12832/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.160632  [12864/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.219612  [12896/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.152440  [12928/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.161621  [12960/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.188991  [12992/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.129737  [13024/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.236509  [13056/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.217441  [13088/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.193203  [13120/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.180964  [13152/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.218673  [13184/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.113852  [13216/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.228320  [13248/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.185991  [13280/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.191892  [13312/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.186313  [13344/24872]:   0%|          | 0/777 [00:16<?, ?it/s]
loss: 0.118403  [13376/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.226494  [13408/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.188894  [13440/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.282231  [13472/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.173238  [13504/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.232437  [13536/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.154373  [13568/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.209089  [13600/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.339260  [13632/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.167654  [13664/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.217741  [13696/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.173460  [13728/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.307790  [13760/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.285600  [13792/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.198876  [13824/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.193789  [13856/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.263346  [13888/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.180522  [13920/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.174520  [13952/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.228566  [13984/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.218705  [14016/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.219347  [14048/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.172117  [14080/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.200195  [14112/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.248676  [14144/24872]:   0%|          | 0/777 [00:17<?, ?it/s]
loss: 0.190908  [14176/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.196968  [14208/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.178270  [14240/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.191559  [14272/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.243417  [14304/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.211964  [14336/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.247274  [14368/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.188830  [14400/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.147929  [14432/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.180822  [14464/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.164672  [14496/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.225546  [14528/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.202903  [14560/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.181402  [14592/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.168620  [14624/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.230261  [14656/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.294027  [14688/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.158400  [14720/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.264110  [14752/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.200962  [14784/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.193726  [14816/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.201205  [14848/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.238540  [14880/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.208971  [14912/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.280177  [14944/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.214193  [14976/24872]:   0%|          | 0/777 [00:18<?, ?it/s]
loss: 0.213784  [15008/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.217948  [15040/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.223132  [15072/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.143530  [15104/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.223080  [15136/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.173652  [15168/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.160746  [15200/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.193044  [15232/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.219789  [15264/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.199969  [15296/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.196262  [15328/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.196708  [15360/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.185539  [15392/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.215282  [15424/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.188438  [15456/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.235755  [15488/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.154073  [15520/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.213893  [15552/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.287137  [15584/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.166068  [15616/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.197423  [15648/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.266139  [15680/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.186469  [15712/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.223197  [15744/24872]:   0%|          | 0/777 [00:19<?, ?it/s]
loss: 0.158668  [15776/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.161818  [15808/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.236076  [15840/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.221046  [15872/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.191332  [15904/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.179100  [15936/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.177516  [15968/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.242800  [16000/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.157098  [16032/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.244844  [16064/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.179355  [16096/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.157621  [16128/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.280550  [16160/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.160210  [16192/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.250372  [16224/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.248580  [16256/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.112442  [16288/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.281110  [16320/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.250437  [16352/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.198963  [16384/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.266970  [16416/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.183987  [16448/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.155860  [16480/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.143509  [16512/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.285870  [16544/24872]:   0%|          | 0/777 [00:20<?, ?it/s]
loss: 0.114743  [16576/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.185451  [16608/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.235392  [16640/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.266339  [16672/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.125498  [16704/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.234702  [16736/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.178062  [16768/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.148218  [16800/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.197787  [16832/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.182083  [16864/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.184380  [16896/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.145602  [16928/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.184404  [16960/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.180320  [16992/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.150624  [17024/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.147330  [17056/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.217215  [17088/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.249063  [17120/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.169716  [17152/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.120778  [17184/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.201157  [17216/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.162079  [17248/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.180297  [17280/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.189659  [17312/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.228647  [17344/24872]:   0%|          | 0/777 [00:21<?, ?it/s]
loss: 0.201874  [17376/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.276566  [17408/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.226167  [17440/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.196564  [17472/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.264162  [17504/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.248460  [17536/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.268874  [17568/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.196166  [17600/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.215904  [17632/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.230269  [17664/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.251043  [17696/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.162418  [17728/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.248222  [17760/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.153611  [17792/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.227349  [17824/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.230989  [17856/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.321469  [17888/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.206423  [17920/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.132472  [17952/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.238983  [17984/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.178915  [18016/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.224837  [18048/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.204553  [18080/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.148392  [18112/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.305528  [18144/24872]:   0%|          | 0/777 [00:22<?, ?it/s]
loss: 0.182119  [18176/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.243129  [18208/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.196100  [18240/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.230752  [18272/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.213495  [18304/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.158886  [18336/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.235075  [18368/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.197038  [18400/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.213581  [18432/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.111312  [18464/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.204669  [18496/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.270054  [18528/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.170217  [18560/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.300832  [18592/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.238461  [18624/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.191592  [18656/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.166080  [18688/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.218148  [18720/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.184979  [18752/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.172978  [18784/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.175530  [18816/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.263918  [18848/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.179543  [18880/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.225042  [18912/24872]:   0%|          | 0/777 [00:23<?, ?it/s]
loss: 0.223385  [18944/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.178276  [18976/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.262281  [19008/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.156209  [19040/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.188742  [19072/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.220405  [19104/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.167597  [19136/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.153090  [19168/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.178007  [19200/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.230411  [19232/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.126273  [19264/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.259132  [19296/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.187442  [19328/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.162142  [19360/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.312344  [19392/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.124134  [19424/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.179196  [19456/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.139016  [19488/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.236216  [19520/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.272284  [19552/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.277810  [19584/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.316352  [19616/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.199405  [19648/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.261601  [19680/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.216947  [19712/24872]:   0%|          | 0/777 [00:24<?, ?it/s]
loss: 0.162551  [19744/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.318559  [19776/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.201454  [19808/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.174720  [19840/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.275013  [19872/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.207977  [19904/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.158325  [19936/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.191578  [19968/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.218957  [20000/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.188816  [20032/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.231126  [20064/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.236942  [20096/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.233877  [20128/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.182716  [20160/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.112936  [20192/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.210450  [20224/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.203484  [20256/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.170230  [20288/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.257277  [20320/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.160853  [20352/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.264457  [20384/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.202295  [20416/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.186025  [20448/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.174534  [20480/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.231911  [20512/24872]:   0%|          | 0/777 [00:25<?, ?it/s]
loss: 0.180832  [20544/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.162922  [20576/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.323971  [20608/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.278555  [20640/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.187062  [20672/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.169742  [20704/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.280803  [20736/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.142973  [20768/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.226349  [20800/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.314026  [20832/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.201804  [20864/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.127747  [20896/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.180813  [20928/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.169859  [20960/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.191793  [20992/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.168820  [21024/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.320643  [21056/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.195283  [21088/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.263558  [21120/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.203475  [21152/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.328041  [21184/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.170692  [21216/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.280063  [21248/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.222936  [21280/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.232620  [21312/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.222466  [21344/24872]:   0%|          | 0/777 [00:26<?, ?it/s]
loss: 0.313402  [21376/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.393149  [21408/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.269758  [21440/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.224764  [21472/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.187728  [21504/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.243637  [21536/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.218983  [21568/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.173330  [21600/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.213436  [21632/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.173946  [21664/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.212529  [21696/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.143591  [21728/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.159568  [21760/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.156704  [21792/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.191097  [21824/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.183755  [21856/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.146881  [21888/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.176003  [21920/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.132528  [21952/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.138967  [21984/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.202791  [22016/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.244094  [22048/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.203790  [22080/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.191983  [22112/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.153552  [22144/24872]:   0%|          | 0/777 [00:27<?, ?it/s]
loss: 0.207769  [22176/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.176555  [22208/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.231793  [22240/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.236986  [22272/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.317226  [22304/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.219293  [22336/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.214391  [22368/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.155736  [22400/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.176400  [22432/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.113130  [22464/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.201848  [22496/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.189903  [22528/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.156784  [22560/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.182320  [22592/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.222461  [22624/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.217577  [22656/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.184329  [22688/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.179371  [22720/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.160131  [22752/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.252535  [22784/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.211395  [22816/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.215233  [22848/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.107393  [22880/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.159843  [22912/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.128523  [22944/24872]:   0%|          | 0/777 [00:28<?, ?it/s]
loss: 0.128255  [22976/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.162524  [23008/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.253909  [23040/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.141670  [23072/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.262513  [23104/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.155989  [23136/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.266513  [23168/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.118596  [23200/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.200894  [23232/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.192943  [23264/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.308918  [23296/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.153642  [23328/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.156911  [23360/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.200682  [23392/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.173011  [23424/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.275910  [23456/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.216240  [23488/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.228651  [23520/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.203906  [23552/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.211696  [23584/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.131109  [23616/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.257107  [23648/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.192307  [23680/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.189760  [23712/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.198439  [23744/24872]:   0%|          | 0/777 [00:29<?, ?it/s]
loss: 0.142595  [23776/24872]:   0%|          | 0/777 [00:30<?, ?it/s]
loss: 0.142595  [23776/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.202703  [23808/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.137616  [23840/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.181918  [23872/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.173316  [23904/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.307944  [23936/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.133633  [23968/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.140645  [24000/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.131274  [24032/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.218077  [24064/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.125636  [24096/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.274108  [24128/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.231587  [24160/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.139581  [24192/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.161746  [24224/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.106568  [24256/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.140085  [24288/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.221657  [24320/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.190212  [24352/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.131423  [24384/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.244949  [24416/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.144375  [24448/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.196630  [24480/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.184308  [24512/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.177361  [24544/24872]:  96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.173422  [24576/24872]:  96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.150845  [24608/24872]:  96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.233674  [24640/24872]:  96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.140672  [24672/24872]:  96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.139329  [24704/24872]:  96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.237641  [24736/24872]:  96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.189219  [24768/24872]:  96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.156036  [24800/24872]:  96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.237722  [24832/24872]:  96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.141117  [24864/24872]:  96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.199975  [24872/24872]:  96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.199975  [24872/24872]: : 778it [00:31, 24.77it/s]
-------------------------------
LR=0.0001, batch_size=64
-------------------------------
Epoch 1, time=159.13s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.191362  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.307791  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.224230  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.279302  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.277204  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.258955  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.157431  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.249664  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.232416  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.284636  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.215462  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.169824  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.201766  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.251067  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.235157  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.200749  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.158294  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.160163  [ 1152/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.203251  [ 1216/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.153660  [ 1280/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.236096  [ 1344/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.188776  [ 1408/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.176138  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.174874  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.211362  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.183136  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.153854  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.236833  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.207110  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.141218  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.142771  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.165459  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.201413  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.191127  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.184677  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.192441  [ 2304/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.208922  [ 2368/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.180912  [ 2432/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.266760  [ 2496/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.173942  [ 2560/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.154204  [ 2624/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.153277  [ 2688/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.163186  [ 2752/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.183419  [ 2816/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.144250  [ 2880/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.149126  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.161401  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.228985  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.154530  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.198548  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.153870  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.160367  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.114213  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.165266  [ 3456/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.216061  [ 3520/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.166851  [ 3584/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.174313  [ 3648/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.132399  [ 3712/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.156650  [ 3776/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.171712  [ 3840/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.162389  [ 3904/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.185253  [ 3968/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.188641  [ 4032/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.217297  [ 4096/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.147008  [ 4160/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.170709  [ 4224/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.224025  [ 4288/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.122374  [ 4352/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.193254  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.156426  [ 4480/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.154314  [ 4544/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.171296  [ 4608/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.185334  [ 4672/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.202489  [ 4736/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.177009  [ 4800/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.171319  [ 4864/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.224285  [ 4928/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.122154  [ 4992/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.143881  [ 5056/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.160525  [ 5120/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.194334  [ 5184/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.201236  [ 5248/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.248181  [ 5312/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.187355  [ 5376/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.139467  [ 5440/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.187508  [ 5504/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.192309  [ 5568/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.177676  [ 5632/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.184939  [ 5696/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.171288  [ 5760/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.144461  [ 5824/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.235083  [ 5888/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.205166  [ 5952/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.145589  [ 6016/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.191503  [ 6080/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.164601  [ 6144/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.196013  [ 6208/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.136793  [ 6272/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.181422  [ 6336/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.194735  [ 6400/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.153929  [ 6464/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.164904  [ 6528/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.197826  [ 6592/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.188723  [ 6656/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.189161  [ 6720/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.153877  [ 6784/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.195797  [ 6848/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.151477  [ 6912/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.198056  [ 6976/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.143211  [ 7040/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.174628  [ 7104/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.150134  [ 7168/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.157433  [ 7232/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.222027  [ 7296/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.209115  [ 7360/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.209856  [ 7424/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.198880  [ 7488/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.175044  [ 7552/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.124525  [ 7616/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.211628  [ 7680/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.227821  [ 7744/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.231248  [ 7808/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.231708  [ 7872/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.146223  [ 7936/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.153288  [ 8000/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.194328  [ 8064/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.171856  [ 8128/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.194196  [ 8192/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.148274  [ 8256/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.182268  [ 8320/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.153135  [ 8384/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.138534  [ 8448/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.188656  [ 8512/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.209938  [ 8576/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.231830  [ 8640/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.239458  [ 8704/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.191799  [ 8768/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.118470  [ 8832/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.132665  [ 8896/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.142667  [ 8960/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.226490  [ 9024/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.162547  [ 9088/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.129348  [ 9152/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.144468  [ 9216/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.132564  [ 9280/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.186043  [ 9344/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.187937  [ 9408/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.225720  [ 9472/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.142412  [ 9536/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.134751  [ 9600/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.110381  [ 9664/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.207834  [ 9728/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.159677  [ 9792/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.188222  [ 9856/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.197355  [ 9920/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.190176  [ 9984/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.146310  [10048/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.176556  [10112/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.189737  [10176/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.169496  [10240/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.157873  [10304/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.166252  [10368/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.224744  [10432/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.216103  [10496/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.210091  [10560/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.155900  [10624/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.173766  [10688/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.166703  [10752/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.152078  [10816/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.165315  [10880/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.179400  [10944/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.145410  [11008/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.170469  [11072/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.245824  [11136/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.170745  [11200/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.188804  [11264/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.163741  [11328/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.145611  [11392/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.198746  [11456/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.227862  [11520/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.241638  [11584/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.174385  [11648/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.180816  [11712/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.162147  [11776/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.188008  [11840/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.187769  [11904/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.192126  [11968/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.141225  [12032/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.236379  [12096/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.192910  [12160/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.210445  [12224/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.142417  [12288/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.214021  [12352/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.155134  [12416/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.173745  [12480/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.236087  [12544/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.167062  [12608/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.150535  [12672/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.194657  [12736/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.237225  [12800/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.182546  [12864/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.151600  [12928/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.125304  [12992/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.145920  [13056/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.176961  [13120/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.166390  [13184/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.137884  [13248/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.161340  [13312/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.122917  [13376/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.174683  [13440/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.189548  [13504/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.168348  [13568/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.232053  [13632/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.165732  [13696/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.181615  [13760/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.186899  [13824/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.195955  [13888/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.141730  [13952/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.197233  [14016/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.164106  [14080/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.191446  [14144/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.163587  [14208/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.151089  [14272/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.182244  [14336/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.162573  [14400/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.120099  [14464/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.178337  [14528/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.162420  [14592/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.185498  [14656/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.170237  [14720/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.195925  [14784/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.181056  [14848/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.183677  [14912/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.183207  [14976/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.166272  [15040/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.144007  [15104/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.157270  [15168/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.146148  [15232/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.149305  [15296/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.142038  [15360/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.161974  [15424/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.180417  [15488/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.165704  [15552/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.182878  [15616/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.189827  [15680/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.171934  [15744/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.118874  [15808/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.196679  [15872/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.150053  [15936/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.193279  [16000/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.169311  [16064/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.140757  [16128/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.181083  [16192/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.221789  [16256/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.168815  [16320/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.196727  [16384/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.200696  [16448/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.123924  [16512/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.168527  [16576/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.181823  [16640/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.167213  [16704/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.193390  [16768/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.142375  [16832/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.173038  [16896/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.153437  [16960/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.152288  [17024/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.162343  [17088/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.187951  [17152/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.148487  [17216/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.161406  [17280/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.174881  [17344/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.206882  [17408/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.163933  [17472/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.228457  [17536/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.207315  [17600/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.193357  [17664/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.172890  [17728/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.159039  [17792/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.196978  [17856/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.219481  [17920/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.157065  [17984/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.139856  [18048/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.157789  [18112/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.182541  [18176/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.183185  [18240/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.178027  [18304/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.170979  [18368/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.177213  [18432/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.128326  [18496/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.188371  [18560/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.218649  [18624/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.135990  [18688/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.157174  [18752/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.142531  [18816/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.158168  [18880/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.160452  [18944/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.179417  [19008/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.133338  [19072/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.148712  [19136/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.143772  [19200/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.153172  [19264/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.183945  [19328/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.177424  [19392/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.118514  [19456/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.140955  [19520/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.218406  [19584/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.206742  [19648/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.212327  [19712/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.196401  [19776/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.156396  [19840/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.219986  [19904/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.135914  [19968/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.178394  [20032/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.191993  [20096/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.178183  [20160/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.114442  [20224/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.148953  [20288/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.179676  [20352/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.176485  [20416/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.162694  [20480/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.158898  [20544/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.178236  [20608/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.164135  [20672/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.153740  [20736/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.156097  [20800/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.194022  [20864/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.116789  [20928/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.144730  [20992/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.176914  [21056/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.183792  [21120/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.204086  [21184/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.184484  [21248/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.166381  [21312/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.261444  [21376/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.275528  [21440/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.183229  [21504/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.178120  [21568/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.180821  [21632/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.158650  [21696/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.115606  [21760/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.150495  [21824/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.138664  [21888/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.118069  [21952/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.124951  [22016/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.200384  [22080/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.132858  [22144/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.169561  [22208/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.184719  [22272/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.229883  [22336/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.133548  [22400/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.138497  [22464/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.149706  [22528/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.143221  [22592/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.189214  [22656/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.161408  [22720/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.171697  [22784/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.164743  [22848/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.111293  [22912/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.120728  [22976/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.182607  [23040/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.177272  [23104/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.184914  [23168/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.138318  [23232/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.219400  [23296/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.128814  [23360/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.160478  [23424/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.219212  [23488/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.167286  [23552/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.160536  [23616/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.185509  [23680/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.163449  [23744/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.143578  [23808/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.130626  [23872/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.219252  [23936/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.123358  [24000/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.154104  [24064/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.176036  [24128/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.165314  [24192/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.122759  [24256/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.157528  [24320/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.141191  [24384/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.169432  [24448/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.169002  [24512/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.161568  [24576/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.162856  [24640/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.127481  [24704/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.193814  [24768/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.180551  [24832/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.136429  [24872/24872]:   0%|          | 0/388 [00:20<?, ?it/s]
loss: 0.136429  [24872/24872]: : 389it [00:20, 19.44it/s]
Epoch 2, time=179.15s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.170052  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.168175  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.165809  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.187426  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.211198  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.224520  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.133040  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.224583  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.150611  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.220894  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.168977  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.144770  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.197849  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.235137  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.209035  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.178416  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.152840  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.133815  [ 1152/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.172062  [ 1216/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.151535  [ 1280/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.218506  [ 1344/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.178149  [ 1408/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.156520  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.156033  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.178810  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.154950  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.155652  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.205672  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.177482  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.133026  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.145595  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.164178  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.187324  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.176176  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.171510  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.183103  [ 2304/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.201595  [ 2368/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.156182  [ 2432/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.240845  [ 2496/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.174082  [ 2560/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.157034  [ 2624/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.152619  [ 2688/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.151360  [ 2752/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.174931  [ 2816/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.138166  [ 2880/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.143772  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.151869  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.216714  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.143085  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.194244  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.139414  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.140370  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.106944  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.161106  [ 3456/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.206932  [ 3520/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.161825  [ 3584/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.171725  [ 3648/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.131483  [ 3712/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.155166  [ 3776/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.164893  [ 3840/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.156705  [ 3904/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.182922  [ 3968/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.171821  [ 4032/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.209078  [ 4096/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.137142  [ 4160/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.167682  [ 4224/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.217210  [ 4288/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.121861  [ 4352/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.176868  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.159164  [ 4480/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.145604  [ 4544/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.152646  [ 4608/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.186996  [ 4672/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.177955  [ 4736/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.175740  [ 4800/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.169580  [ 4864/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.200430  [ 4928/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.125417  [ 4992/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.122235  [ 5056/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.152676  [ 5120/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.195575  [ 5184/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.175050  [ 5248/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.241311  [ 5312/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.176986  [ 5376/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.126720  [ 5440/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.188578  [ 5504/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.194336  [ 5568/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.165975  [ 5632/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.184777  [ 5696/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.166682  [ 5760/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.145588  [ 5824/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.231029  [ 5888/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.183418  [ 5952/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.145688  [ 6016/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.190171  [ 6080/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.159911  [ 6144/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.202276  [ 6208/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.134719  [ 6272/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.171652  [ 6336/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.210542  [ 6400/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.151844  [ 6464/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.153744  [ 6528/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.200651  [ 6592/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.180578  [ 6656/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.166956  [ 6720/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.159444  [ 6784/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.170257  [ 6848/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.149395  [ 6912/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.180961  [ 6976/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.134384  [ 7040/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.164881  [ 7104/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.140950  [ 7168/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.143671  [ 7232/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.204450  [ 7296/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.182027  [ 7360/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.187341  [ 7424/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.182169  [ 7488/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.151966  [ 7552/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.123638  [ 7616/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.194875  [ 7680/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.224297  [ 7744/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.226170  [ 7808/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.199554  [ 7872/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.141987  [ 7936/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.129833  [ 8000/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.189086  [ 8064/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.157104  [ 8128/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.181631  [ 8192/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.136255  [ 8256/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.166937  [ 8320/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.146766  [ 8384/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.128571  [ 8448/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.168690  [ 8512/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.197059  [ 8576/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.224616  [ 8640/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.224178  [ 8704/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.183657  [ 8768/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.106022  [ 8832/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.133392  [ 8896/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.137348  [ 8960/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.205048  [ 9024/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.135906  [ 9088/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.114033  [ 9152/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.127665  [ 9216/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.120592  [ 9280/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.167380  [ 9344/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.181157  [ 9408/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.203741  [ 9472/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.132580  [ 9536/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.122624  [ 9600/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.104601  [ 9664/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.169848  [ 9728/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.140748  [ 9792/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.160128  [ 9856/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.179851  [ 9920/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.178858  [ 9984/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.135719  [10048/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.164486  [10112/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.181971  [10176/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.164530  [10240/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.131379  [10304/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.147915  [10368/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.214490  [10432/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.203550  [10496/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.186777  [10560/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.138492  [10624/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.150386  [10688/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.165811  [10752/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.146862  [10816/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.155020  [10880/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.153619  [10944/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.129364  [11008/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.166914  [11072/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.222011  [11136/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.165381  [11200/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.166159  [11264/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.150184  [11328/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.118094  [11392/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.178785  [11456/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.212047  [11520/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.214918  [11584/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.165354  [11648/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.175704  [11712/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.143549  [11776/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.172394  [11840/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.180687  [11904/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.171690  [11968/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.117028  [12032/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.219895  [12096/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.203852  [12160/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.189370  [12224/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.162627  [12288/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.203864  [12352/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.143180  [12416/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.165833  [12480/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.220197  [12544/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.152467  [12608/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.140125  [12672/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.177746  [12736/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.217694  [12800/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.174271  [12864/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.144362  [12928/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.120473  [12992/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.128719  [13056/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.169882  [13120/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.149341  [13184/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.113284  [13248/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.148102  [13312/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.117310  [13376/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.159185  [13440/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.160033  [13504/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.155057  [13568/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.220219  [13632/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.150115  [13696/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.180039  [13760/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.184351  [13824/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.192152  [13888/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.134817  [13952/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.186468  [14016/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.150692  [14080/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.198766  [14144/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.158535  [14208/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.154945  [14272/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.176282  [14336/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.152334  [14400/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.115972  [14464/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.166663  [14528/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.154438  [14592/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.172513  [14656/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.174686  [14720/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.181484  [14784/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.174989  [14848/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.183225  [14912/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.191169  [14976/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.157216  [15040/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.137436  [15104/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.145794  [15168/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.139934  [15232/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.146065  [15296/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.140603  [15360/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.148951  [15424/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.169523  [15488/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.161564  [15552/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.167119  [15616/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.190392  [15680/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.157061  [15744/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.105261  [15808/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.197775  [15872/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.144101  [15936/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.172483  [16000/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.145005  [16064/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.128079  [16128/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.169581  [16192/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.222009  [16256/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.168476  [16320/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.192406  [16384/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.184000  [16448/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.135158  [16512/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.154544  [16576/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.173558  [16640/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.162761  [16704/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.174537  [16768/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.138099  [16832/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.159209  [16896/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.153856  [16960/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.136083  [17024/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.152836  [17088/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.163442  [17152/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.136629  [17216/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.144283  [17280/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.168253  [17344/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.196364  [17408/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.155871  [17472/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.202834  [17536/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.185006  [17600/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.186778  [17664/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.177182  [17728/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.160956  [17792/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.181685  [17856/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.207789  [17920/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.150254  [17984/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.127478  [18048/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.142558  [18112/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.194352  [18176/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.170768  [18240/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.169964  [18304/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.163954  [18368/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.181635  [18432/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.114251  [18496/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.173884  [18560/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.188726  [18624/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.129852  [18688/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.142317  [18752/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.131761  [18816/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.141166  [18880/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.150909  [18944/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.169045  [19008/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.117163  [19072/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.164841  [19136/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.125989  [19200/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.139848  [19264/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.190103  [19328/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.180654  [19392/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.114231  [19456/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.127797  [19520/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.224407  [19584/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.215293  [19648/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.160693  [19712/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.185470  [19776/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.153616  [19840/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.222224  [19904/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.127030  [19968/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.203481  [20032/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.176807  [20096/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.174259  [20160/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.120902  [20224/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.130891  [20288/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.168846  [20352/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.176322  [20416/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.140243  [20480/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.166548  [20544/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.155077  [20608/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.150706  [20672/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.151372  [20736/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.131160  [20800/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.171643  [20864/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.102480  [20928/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.121561  [20992/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.154328  [21056/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.176691  [21120/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.157944  [21184/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.173912  [21248/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.161189  [21312/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.218886  [21376/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.229566  [21440/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.143052  [21504/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.178934  [21568/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.157477  [21632/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.164984  [21696/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.125620  [21760/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.138995  [21824/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.136486  [21888/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.111081  [21952/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.111101  [22016/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.197640  [22080/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.111320  [22144/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.142223  [22208/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.186543  [22272/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.205143  [22336/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.122507  [22400/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.111421  [22464/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.146392  [22528/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.125647  [22592/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.168502  [22656/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.153622  [22720/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.158848  [22784/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.153974  [22848/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.098766  [22912/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.104514  [22976/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.158981  [23040/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.160547  [23104/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.167569  [23168/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.131260  [23232/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.204798  [23296/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.132713  [23360/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.152785  [23424/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.208184  [23488/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.145479  [23552/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.147457  [23616/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.176215  [23680/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.158778  [23744/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.137886  [23808/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.116825  [23872/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.206726  [23936/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.114050  [24000/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.150502  [24064/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.166932  [24128/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.166165  [24192/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.107743  [24256/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.143552  [24320/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.128938  [24384/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.166512  [24448/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.152860  [24512/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.145089  [24576/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.155415  [24640/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.119741  [24704/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.184134  [24768/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.158453  [24832/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.110952  [24872/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.110952  [24872/24872]: : 389it [00:19, 19.71it/s]
Epoch 3, time=198.88s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.164502  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.155964  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.147437  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.174289  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.170255  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.185828  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.095009  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.201174  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.149209  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.192433  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.153900  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.137268  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.160290  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.218222  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.181089  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.164969  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.122184  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.131601  [ 1152/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.149327  [ 1216/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.127515  [ 1280/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.184833  [ 1344/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.160685  [ 1408/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.155060  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.137869  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.148442  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.138698  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.117168  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.169553  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.158298  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.101623  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.120771  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.131587  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.168886  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.153889  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.159363  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.152299  [ 2304/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.163431  [ 2368/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.132586  [ 2432/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.209446  [ 2496/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.151507  [ 2560/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.133130  [ 2624/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.110953  [ 2688/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.134228  [ 2752/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.162318  [ 2816/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.129602  [ 2880/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.122298  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.139105  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.199985  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.130011  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.174577  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.119905  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.133213  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.109346  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.152598  [ 3456/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.176661  [ 3520/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.140769  [ 3584/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.166126  [ 3648/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.124129  [ 3712/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.134036  [ 3776/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.145304  [ 3840/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.146001  [ 3904/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.154589  [ 3968/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.166673  [ 4032/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.192803  [ 4096/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.118532  [ 4160/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.156283  [ 4224/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.210616  [ 4288/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.109315  [ 4352/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.151979  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.135559  [ 4480/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.134820  [ 4544/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.140668  [ 4608/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.174282  [ 4672/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.159357  [ 4736/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.161062  [ 4800/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.152843  [ 4864/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.206172  [ 4928/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.110045  [ 4992/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.112005  [ 5056/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.149929  [ 5120/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.172539  [ 5184/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.174244  [ 5248/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.228650  [ 5312/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.170096  [ 5376/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.126967  [ 5440/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.164415  [ 5504/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.181343  [ 5568/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.155876  [ 5632/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.167020  [ 5696/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.165455  [ 5760/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.132630  [ 5824/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.217711  [ 5888/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.161824  [ 5952/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.110908  [ 6016/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.165739  [ 6080/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.144785  [ 6144/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.158055  [ 6208/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.119669  [ 6272/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.143893  [ 6336/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.178977  [ 6400/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.139454  [ 6464/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.134342  [ 6528/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.172785  [ 6592/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.177497  [ 6656/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.155229  [ 6720/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.141153  [ 6784/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.157880  [ 6848/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.134262  [ 6912/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.172991  [ 6976/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.132823  [ 7040/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.149464  [ 7104/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.116448  [ 7168/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.143783  [ 7232/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.181661  [ 7296/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.186394  [ 7360/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.191051  [ 7424/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.173390  [ 7488/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.139520  [ 7552/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.117890  [ 7616/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.180808  [ 7680/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.201919  [ 7744/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.205312  [ 7808/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.192693  [ 7872/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.139238  [ 7936/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.120350  [ 8000/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.183305  [ 8064/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.148779  [ 8128/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.162149  [ 8192/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.135903  [ 8256/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.159089  [ 8320/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.145246  [ 8384/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.134535  [ 8448/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.137371  [ 8512/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.204103  [ 8576/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.234834  [ 8640/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.222958  [ 8704/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.174677  [ 8768/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.107667  [ 8832/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.131573  [ 8896/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.129653  [ 8960/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.190307  [ 9024/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.141658  [ 9088/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.131636  [ 9152/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.129327  [ 9216/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.135023  [ 9280/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.178022  [ 9344/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.167884  [ 9408/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.203631  [ 9472/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.128054  [ 9536/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.113460  [ 9600/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.098710  [ 9664/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.180721  [ 9728/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.154470  [ 9792/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.161385  [ 9856/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.176709  [ 9920/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.164065  [ 9984/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.127906  [10048/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.159565  [10112/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.180387  [10176/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.141611  [10240/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.137167  [10304/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.140074  [10368/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.219419  [10432/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.175699  [10496/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.191268  [10560/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.155926  [10624/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.160648  [10688/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.144244  [10752/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.141596  [10816/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.151481  [10880/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.145222  [10944/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.130607  [11008/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.164581  [11072/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.213883  [11136/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.156916  [11200/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.166515  [11264/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.143788  [11328/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.115474  [11392/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.165563  [11456/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.193008  [11520/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.214653  [11584/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.165093  [11648/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.165012  [11712/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.126490  [11776/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.156861  [11840/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.156851  [11904/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.154807  [11968/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.105382  [12032/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.206804  [12096/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.183578  [12160/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.182939  [12224/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.125473  [12288/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.198857  [12352/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.133217  [12416/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.160578  [12480/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.207351  [12544/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.143380  [12608/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.133659  [12672/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.167793  [12736/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.210906  [12800/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.164279  [12864/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.127706  [12928/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.111994  [12992/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.131052  [13056/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.174048  [13120/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.149329  [13184/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.115214  [13248/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.140427  [13312/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.110157  [13376/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.146184  [13440/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.146775  [13504/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.146223  [13568/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.218962  [13632/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.141791  [13696/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.175910  [13760/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.184140  [13824/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.176148  [13888/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.117613  [13952/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.157896  [14016/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.137874  [14080/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.173362  [14144/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.162423  [14208/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.140347  [14272/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.166022  [14336/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.148215  [14400/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.109613  [14464/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.158723  [14528/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.148294  [14592/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.186317  [14656/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.153450  [14720/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.186909  [14784/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.164811  [14848/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.186613  [14912/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.157107  [14976/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.151480  [15040/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.134722  [15104/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.144317  [15168/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.146747  [15232/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.116280  [15296/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.126336  [15360/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.139974  [15424/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.170079  [15488/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.160964  [15552/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.150388  [15616/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.184518  [15680/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.159552  [15744/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.101086  [15808/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.199864  [15872/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.128630  [15936/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.167939  [16000/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.163638  [16064/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.133376  [16128/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.191071  [16192/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.233187  [16256/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.177199  [16320/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.218369  [16384/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.185835  [16448/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.137264  [16512/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.169783  [16576/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.169657  [16640/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.186419  [16704/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.196142  [16768/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.159390  [16832/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.182568  [16896/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.145045  [16960/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.167564  [17024/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.157725  [17088/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.159709  [17152/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.154052  [17216/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.159632  [17280/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.156433  [17344/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.198264  [17408/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.147586  [17472/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.213050  [17536/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.203857  [17600/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.186002  [17664/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.155869  [17728/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.155291  [17792/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.172677  [17856/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.192150  [17920/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.136239  [17984/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.135480  [18048/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.147135  [18112/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.167650  [18176/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.170520  [18240/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.164241  [18304/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.160157  [18368/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.170550  [18432/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.125493  [18496/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.164629  [18560/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.177480  [18624/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.114313  [18688/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.142279  [18752/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.127930  [18816/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.124684  [18880/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.136512  [18944/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.160568  [19008/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.119044  [19072/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.134895  [19136/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.120170  [19200/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.131099  [19264/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.161124  [19328/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.161320  [19392/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.094753  [19456/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.122078  [19520/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.211619  [19584/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.185051  [19648/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.207301  [19712/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.180324  [19776/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.139152  [19840/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.208914  [19904/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.123090  [19968/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.160679  [20032/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.184447  [20096/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.156666  [20160/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.120395  [20224/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.126769  [20288/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.152683  [20352/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.178398  [20416/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.131487  [20480/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.139501  [20544/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.134863  [20608/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.128111  [20672/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.131519  [20736/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.116805  [20800/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.154499  [20864/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.104885  [20928/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.120432  [20992/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.146283  [21056/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.166522  [21120/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.158257  [21184/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.161768  [21248/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.141410  [21312/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.247408  [21376/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.260909  [21440/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.171944  [21504/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.168457  [21568/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.154547  [21632/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.187575  [21696/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.135745  [21760/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.130110  [21824/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.139478  [21888/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.107971  [21952/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.108595  [22016/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.195299  [22080/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.151728  [22144/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.146344  [22208/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.230636  [22272/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.203026  [22336/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.138262  [22400/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.144604  [22464/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.147974  [22528/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.116733  [22592/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.209666  [22656/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.157019  [22720/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.170354  [22784/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.181074  [22848/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.103406  [22912/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.104526  [22976/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.186670  [23040/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.179545  [23104/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.172914  [23168/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.141027  [23232/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.192844  [23296/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.117575  [23360/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.151837  [23424/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.196320  [23488/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.149488  [23552/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.162849  [23616/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.166823  [23680/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.159816  [23744/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.134926  [23808/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.115035  [23872/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.208341  [23936/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.115880  [24000/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.150401  [24064/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.160453  [24128/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.160490  [24192/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.101335  [24256/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.146997  [24320/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.121959  [24384/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.163458  [24448/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.145880  [24512/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.149887  [24576/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.158731  [24640/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.115718  [24704/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.167446  [24768/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.153452  [24832/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.114273  [24872/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.114273  [24872/24872]: : 389it [00:19, 19.77it/s]
Epoch 4, time=218.56s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.163906  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.157845  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.156643  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.158009  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.181489  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.202169  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.113293  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.229643  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.137166  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.214431  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.185228  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.133230  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.168967  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.234095  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.197803  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.171834  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.166122  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.153474  [ 1152/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.149272  [ 1216/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.175406  [ 1280/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.185740  [ 1344/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.164930  [ 1408/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.180795  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.130450  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.151757  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.164020  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.114799  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.168585  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.164045  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.102429  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.112570  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.136152  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.170438  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.137809  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.166270  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.151395  [ 2304/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.163473  [ 2368/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.143605  [ 2432/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.217687  [ 2496/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.149967  [ 2560/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.152003  [ 2624/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.131352  [ 2688/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.131207  [ 2752/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.176059  [ 2816/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.133798  [ 2880/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.119779  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.132711  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.228813  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.129140  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.200003  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.136717  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.122133  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.098111  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.157205  [ 3456/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.170429  [ 3520/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.136883  [ 3584/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.133469  [ 3648/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.113742  [ 3712/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.137613  [ 3776/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.140853  [ 3840/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.138468  [ 3904/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.147913  [ 3968/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.154373  [ 4032/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.187143  [ 4096/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.120754  [ 4160/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.134879  [ 4224/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.189065  [ 4288/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.110887  [ 4352/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.150669  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.120876  [ 4480/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.135463  [ 4544/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.130694  [ 4608/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.160335  [ 4672/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.128666  [ 4736/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.144207  [ 4800/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.145292  [ 4864/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.178028  [ 4928/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.094050  [ 4992/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.109334  [ 5056/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.127037  [ 5120/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.158005  [ 5184/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.154729  [ 5248/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.226753  [ 5312/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.151384  [ 5376/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.113010  [ 5440/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.152197  [ 5504/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.168344  [ 5568/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.135448  [ 5632/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.150642  [ 5696/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.157878  [ 5760/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.109419  [ 5824/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.189272  [ 5888/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.140913  [ 5952/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.101387  [ 6016/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.159709  [ 6080/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.144221  [ 6144/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.162464  [ 6208/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.108663  [ 6272/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.141046  [ 6336/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.175151  [ 6400/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.130860  [ 6464/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.127759  [ 6528/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.174551  [ 6592/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.163876  [ 6656/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.158489  [ 6720/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.131051  [ 6784/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.140807  [ 6848/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.151853  [ 6912/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.166845  [ 6976/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.130920  [ 7040/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.153479  [ 7104/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.114246  [ 7168/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.130506  [ 7232/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.171837  [ 7296/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.153359  [ 7360/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.171767  [ 7424/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.174262  [ 7488/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.130054  [ 7552/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.113995  [ 7616/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.195102  [ 7680/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.178630  [ 7744/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.195620  [ 7808/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.183737  [ 7872/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.116574  [ 7936/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.104681  [ 8000/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.168957  [ 8064/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.142068  [ 8128/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.149040  [ 8192/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.117451  [ 8256/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.142035  [ 8320/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.137716  [ 8384/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.118906  [ 8448/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.126731  [ 8512/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.189207  [ 8576/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.200744  [ 8640/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.197137  [ 8704/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.151700  [ 8768/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.094535  [ 8832/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.114869  [ 8896/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.125248  [ 8960/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.205786  [ 9024/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.123029  [ 9088/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.105696  [ 9152/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.116622  [ 9216/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.105978  [ 9280/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.158612  [ 9344/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.157183  [ 9408/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.181260  [ 9472/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.124842  [ 9536/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.105175  [ 9600/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.104327  [ 9664/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.173022  [ 9728/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.127620  [ 9792/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.159325  [ 9856/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.156649  [ 9920/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.135679  [ 9984/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.119467  [10048/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.132788  [10112/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.169585  [10176/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.145479  [10240/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.123372  [10304/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.123913  [10368/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.192986  [10432/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.162409  [10496/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.184932  [10560/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.138203  [10624/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.151020  [10688/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.134723  [10752/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.131025  [10816/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.138923  [10880/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.147975  [10944/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.114675  [11008/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.163536  [11072/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.214837  [11136/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.148452  [11200/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.160538  [11264/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.156956  [11328/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.114210  [11392/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.183248  [11456/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.206463  [11520/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.181110  [11584/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.165261  [11648/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.179060  [11712/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.121733  [11776/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.161299  [11840/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.154814  [11904/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.145632  [11968/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.115328  [12032/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.210300  [12096/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.169283  [12160/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.171520  [12224/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.132904  [12288/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.175768  [12352/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.145136  [12416/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.153064  [12480/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.195653  [12544/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.136685  [12608/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.118317  [12672/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.172029  [12736/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.194361  [12800/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.154096  [12864/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.119591  [12928/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.087358  [12992/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.114039  [13056/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.157121  [13120/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.142725  [13184/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.096812  [13248/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.124414  [13312/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.100810  [13376/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.136495  [13440/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.134077  [13504/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.139516  [13568/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.216983  [13632/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.128291  [13696/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.158075  [13760/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.154240  [13824/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.156091  [13888/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.104057  [13952/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.158211  [14016/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.115249  [14080/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.150895  [14144/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.140490  [14208/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.132920  [14272/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.152967  [14336/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.146465  [14400/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.098268  [14464/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.142783  [14528/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.133942  [14592/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.188619  [14656/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.137153  [14720/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.163076  [14784/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.144574  [14848/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.161244  [14912/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.142131  [14976/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.143942  [15040/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.125117  [15104/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.134449  [15168/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.128022  [15232/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.121916  [15296/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.120927  [15360/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.137439  [15424/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.148087  [15488/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.132679  [15552/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.150805  [15616/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.149999  [15680/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.142499  [15744/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.092210  [15808/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.178685  [15872/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.116271  [15936/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.152487  [16000/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.124623  [16064/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.118076  [16128/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.137356  [16192/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.206423  [16256/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.158031  [16320/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.168833  [16384/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.162999  [16448/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.108912  [16512/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.138920  [16576/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.138406  [16640/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.132938  [16704/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.161264  [16768/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.119162  [16832/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.142195  [16896/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.133528  [16960/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.104709  [17024/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.140016  [17088/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.135437  [17152/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.128950  [17216/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.130748  [17280/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.133626  [17344/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.188511  [17408/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.126197  [17472/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.203206  [17536/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.180749  [17600/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.169994  [17664/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.147477  [17728/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.141788  [17792/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.154999  [17856/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.162185  [17920/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.124416  [17984/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.102904  [18048/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.135410  [18112/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.153859  [18176/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.151566  [18240/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.161762  [18304/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.148868  [18368/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.156088  [18432/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.115660  [18496/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.144533  [18560/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.168512  [18624/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.103439  [18688/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.129575  [18752/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.107232  [18816/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.110094  [18880/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.137672  [18944/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.151116  [19008/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.109504  [19072/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.113635  [19136/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.129629  [19200/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.125650  [19264/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.152757  [19328/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.149783  [19392/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.087074  [19456/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.116658  [19520/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.176758  [19584/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.177509  [19648/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.190384  [19712/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.156691  [19776/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.127193  [19840/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.183041  [19904/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.115785  [19968/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.164848  [20032/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.164221  [20096/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.149306  [20160/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.115265  [20224/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.134275  [20288/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.137729  [20352/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.163283  [20416/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.115095  [20480/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.141497  [20544/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.132172  [20608/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.121662  [20672/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.129293  [20736/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.109935  [20800/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.145707  [20864/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.107943  [20928/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.109441  [20992/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.129516  [21056/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.160503  [21120/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.127246  [21184/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.166259  [21248/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.131143  [21312/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.188685  [21376/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.251485  [21440/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.120733  [21504/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.160609  [21568/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.135510  [21632/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.143903  [21696/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.122559  [21760/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.121776  [21824/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.122878  [21888/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.101503  [21952/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.093052  [22016/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.167860  [22080/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.121097  [22144/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.120841  [22208/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.183161  [22272/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.180401  [22336/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.110714  [22400/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.106681  [22464/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.123199  [22528/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.105127  [22592/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.162528  [22656/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.140588  [22720/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.159138  [22784/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.153671  [22848/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.104100  [22912/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.108770  [22976/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.156796  [23040/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.156916  [23104/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.140305  [23168/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.112665  [23232/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.203345  [23296/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.122076  [23360/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.147217  [23424/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.175025  [23488/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.152526  [23552/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.146315  [23616/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.143831  [23680/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.131240  [23744/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.125723  [23808/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.111189  [23872/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.192106  [23936/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.103306  [24000/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.137442  [24064/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.144793  [24128/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.153901  [24192/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.100311  [24256/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.129913  [24320/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.126040  [24384/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.176261  [24448/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.140712  [24512/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.143256  [24576/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.155171  [24640/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.113284  [24704/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.172543  [24768/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.152373  [24832/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.114209  [24872/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.114209  [24872/24872]: : 389it [00:19, 19.53it/s]
Epoch 5, time=238.48s

  0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.157690  [   64/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.155700  [  128/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.139376  [  192/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.156539  [  256/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.166700  [  320/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.169294  [  384/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.093200  [  448/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.182094  [  512/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.120945  [  576/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.189477  [  640/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.140213  [  704/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.124483  [  768/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.159179  [  832/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.190067  [  896/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.169327  [  960/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.157011  [ 1024/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.109676  [ 1088/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.106707  [ 1152/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.142970  [ 1216/24872]:   0%|          | 0/388 [00:00<?, ?it/s]
loss: 0.129350  [ 1280/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.158433  [ 1344/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.151423  [ 1408/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.135320  [ 1472/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.123936  [ 1536/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.135317  [ 1600/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.120682  [ 1664/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.106133  [ 1728/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.163803  [ 1792/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.136886  [ 1856/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.110736  [ 1920/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.114762  [ 1984/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.131193  [ 2048/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.161880  [ 2112/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.142376  [ 2176/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.152509  [ 2240/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.136976  [ 2304/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.151216  [ 2368/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.121362  [ 2432/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.203750  [ 2496/24872]:   0%|          | 0/388 [00:01<?, ?it/s]
loss: 0.137440  [ 2560/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.124216  [ 2624/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.110766  [ 2688/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.124112  [ 2752/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.142506  [ 2816/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.111559  [ 2880/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.113908  [ 2944/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.121165  [ 3008/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.185791  [ 3072/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.119279  [ 3136/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.161951  [ 3200/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.097334  [ 3264/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.126753  [ 3328/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.098806  [ 3392/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.153131  [ 3456/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.162717  [ 3520/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.136167  [ 3584/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.136076  [ 3648/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.117374  [ 3712/24872]:   0%|          | 0/388 [00:02<?, ?it/s]
loss: 0.125989  [ 3776/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.140331  [ 3840/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.124097  [ 3904/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.146732  [ 3968/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.158128  [ 4032/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.189545  [ 4096/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.132539  [ 4160/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.131018  [ 4224/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.189901  [ 4288/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.120885  [ 4352/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.147780  [ 4416/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.120391  [ 4480/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.133805  [ 4544/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.123583  [ 4608/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.144233  [ 4672/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.132717  [ 4736/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.144281  [ 4800/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.137193  [ 4864/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.186479  [ 4928/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.097479  [ 4992/24872]:   0%|          | 0/388 [00:03<?, ?it/s]
loss: 0.115992  [ 5056/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.116999  [ 5120/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.159975  [ 5184/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.148388  [ 5248/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.214266  [ 5312/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.149073  [ 5376/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.106551  [ 5440/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.145720  [ 5504/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.161114  [ 5568/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.142186  [ 5632/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.137076  [ 5696/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.147749  [ 5760/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.117297  [ 5824/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.176977  [ 5888/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.144759  [ 5952/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.096725  [ 6016/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.170025  [ 6080/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.147391  [ 6144/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.129466  [ 6208/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.110968  [ 6272/24872]:   0%|          | 0/388 [00:04<?, ?it/s]
loss: 0.129109  [ 6336/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.165717  [ 6400/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.123625  [ 6464/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.116273  [ 6528/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.154503  [ 6592/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.155006  [ 6656/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.137187  [ 6720/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.123860  [ 6784/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.150084  [ 6848/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.124739  [ 6912/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.159300  [ 6976/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.128578  [ 7040/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.126880  [ 7104/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.108590  [ 7168/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.130074  [ 7232/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.165846  [ 7296/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.156612  [ 7360/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.183318  [ 7424/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.177524  [ 7488/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.120986  [ 7552/24872]:   0%|          | 0/388 [00:05<?, ?it/s]
loss: 0.106870  [ 7616/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.182586  [ 7680/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.173173  [ 7744/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.199449  [ 7808/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.176761  [ 7872/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.117905  [ 7936/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.112316  [ 8000/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.158481  [ 8064/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.142775  [ 8128/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.148690  [ 8192/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.105868  [ 8256/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.145579  [ 8320/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.131567  [ 8384/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.110591  [ 8448/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.137514  [ 8512/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.169618  [ 8576/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.196025  [ 8640/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.182950  [ 8704/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.161891  [ 8768/24872]:   0%|          | 0/388 [00:06<?, ?it/s]
loss: 0.095758  [ 8832/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.109868  [ 8896/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.117452  [ 8960/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.186199  [ 9024/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.122369  [ 9088/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.103455  [ 9152/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.123062  [ 9216/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.099746  [ 9280/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.158957  [ 9344/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.157307  [ 9408/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.169877  [ 9472/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.123995  [ 9536/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.109233  [ 9600/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.102715  [ 9664/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.177888  [ 9728/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.130280  [ 9792/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.147955  [ 9856/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.176279  [ 9920/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.141229  [ 9984/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.120502  [10048/24872]:   0%|          | 0/388 [00:07<?, ?it/s]
loss: 0.136394  [10112/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.164097  [10176/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.128463  [10240/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.130279  [10304/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.118010  [10368/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.181392  [10432/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.147848  [10496/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.182807  [10560/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.126278  [10624/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.134791  [10688/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.127164  [10752/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.129557  [10816/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.132342  [10880/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.122256  [10944/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.107230  [11008/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.143851  [11072/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.204010  [11136/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.143530  [11200/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.146152  [11264/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.141914  [11328/24872]:   0%|          | 0/388 [00:08<?, ?it/s]
loss: 0.101739  [11392/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.164095  [11456/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.176845  [11520/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.166638  [11584/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.145821  [11648/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.165703  [11712/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.119736  [11776/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.149170  [11840/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.139845  [11904/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.143941  [11968/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.104277  [12032/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.199128  [12096/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.163068  [12160/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.154280  [12224/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.120143  [12288/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.166309  [12352/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.133227  [12416/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.141009  [12480/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.180822  [12544/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.111994  [12608/24872]:   0%|          | 0/388 [00:09<?, ?it/s]
loss: 0.121407  [12672/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.175519  [12736/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.180304  [12800/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.142916  [12864/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.103890  [12928/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.083670  [12992/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.106376  [13056/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.138947  [13120/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.136995  [13184/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.093202  [13248/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.120291  [13312/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.094172  [13376/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.147104  [13440/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.125525  [13504/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.133866  [13568/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.189888  [13632/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.135265  [13696/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.154062  [13760/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.151848  [13824/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.179931  [13888/24872]:   0%|          | 0/388 [00:10<?, ?it/s]
loss: 0.112546  [13952/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.164613  [14016/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.136450  [14080/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.142437  [14144/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.146542  [14208/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.132734  [14272/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.156485  [14336/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.146196  [14400/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.099733  [14464/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.149261  [14528/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.133193  [14592/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.174064  [14656/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.142341  [14720/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.176402  [14784/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.144498  [14848/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.180208  [14912/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.154315  [14976/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.137402  [15040/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.114836  [15104/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.135437  [15168/24872]:   0%|          | 0/388 [00:11<?, ?it/s]
loss: 0.118008  [15232/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.106971  [15296/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.114195  [15360/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.124987  [15424/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.133874  [15488/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.127643  [15552/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.139272  [15616/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.145890  [15680/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.124609  [15744/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.102669  [15808/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.185536  [15872/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.106214  [15936/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.152988  [16000/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.125000  [16064/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.111004  [16128/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.113200  [16192/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.203400  [16256/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.152531  [16320/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.164330  [16384/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.164855  [16448/24872]:   0%|          | 0/388 [00:12<?, ?it/s]
loss: 0.092235  [16512/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.131351  [16576/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.129981  [16640/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.150377  [16704/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.157210  [16768/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.110965  [16832/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.127223  [16896/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.115556  [16960/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.102594  [17024/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.130648  [17088/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.139175  [17152/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.120541  [17216/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.123920  [17280/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.120499  [17344/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.171945  [17408/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.131730  [17472/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.180337  [17536/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.161718  [17600/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.149497  [17664/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.159292  [17728/24872]:   0%|          | 0/388 [00:13<?, ?it/s]
loss: 0.134463  [17792/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.135665  [17856/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.139370  [17920/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.133246  [17984/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.109077  [18048/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.129571  [18112/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.151928  [18176/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.140067  [18240/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.155883  [18304/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.142929  [18368/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.150098  [18432/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.107290  [18496/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.130730  [18560/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.151092  [18624/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.105519  [18688/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.126802  [18752/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.099010  [18816/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.110620  [18880/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.128372  [18944/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.143060  [19008/24872]:   0%|          | 0/388 [00:14<?, ?it/s]
loss: 0.100515  [19072/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.111327  [19136/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.123999  [19200/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.122930  [19264/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.153534  [19328/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.147008  [19392/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.092781  [19456/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.116898  [19520/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.182186  [19584/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.172362  [19648/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.179713  [19712/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.150564  [19776/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.112351  [19840/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.163104  [19904/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.112830  [19968/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.133487  [20032/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.156377  [20096/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.138216  [20160/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.103090  [20224/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.118833  [20288/24872]:   0%|          | 0/388 [00:15<?, ?it/s]
loss: 0.151998  [20352/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.141158  [20416/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.098200  [20480/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.123014  [20544/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.117387  [20608/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.125663  [20672/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.119959  [20736/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.104232  [20800/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.133940  [20864/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.103398  [20928/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.105736  [20992/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.120056  [21056/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.161626  [21120/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.122978  [21184/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.144330  [21248/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.122847  [21312/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.207689  [21376/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.228017  [21440/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.121795  [21504/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.141580  [21568/24872]:   0%|          | 0/388 [00:16<?, ?it/s]
loss: 0.131513  [21632/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.146737  [21696/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.108097  [21760/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.115661  [21824/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.112336  [21888/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.086015  [21952/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.087985  [22016/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.161084  [22080/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.113683  [22144/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.115813  [22208/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.152712  [22272/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.186569  [22336/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.098770  [22400/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.113191  [22464/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.128276  [22528/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.090402  [22592/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.186313  [22656/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.138341  [22720/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.145689  [22784/24872]:   0%|          | 0/388 [00:17<?, ?it/s]
loss: 0.165147  [22848/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.097239  [22912/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.094309  [22976/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.160694  [23040/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.163976  [23104/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.146698  [23168/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.101932  [23232/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.193247  [23296/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.104029  [23360/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.136115  [23424/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.155174  [23488/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.149022  [23552/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.134683  [23616/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.141567  [23680/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.136841  [23744/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.122603  [23808/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.100385  [23872/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.182681  [23936/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.096504  [24000/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.125284  [24064/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.132583  [24128/24872]:   0%|          | 0/388 [00:18<?, ?it/s]
loss: 0.138479  [24192/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.087493  [24256/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.114234  [24320/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.120471  [24384/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.158643  [24448/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.129835  [24512/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.128388  [24576/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.129500  [24640/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.098307  [24704/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.165975  [24768/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.141528  [24832/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.095943  [24872/24872]:   0%|          | 0/388 [00:19<?, ?it/s]
loss: 0.095943  [24872/24872]: : 389it [00:19, 19.86it/s]
-------------------------------
LR=0.0001, batch_size=128
-------------------------------
Epoch 1, time=258.07s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.142783  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.318197  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.272213  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.183752  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.180756  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.211264  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.254027  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.205519  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.148999  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.160338  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.173786  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.149375  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.154195  [ 1664/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.161542  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.143039  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.137011  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.163445  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.138007  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.144709  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.176787  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.124077  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.138912  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.117473  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.155616  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.145639  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.110078  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.114347  [ 3456/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.143942  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.109119  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.120746  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.121587  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.161608  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.117964  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.141754  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.131479  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.121943  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.136237  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.124513  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.130321  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.103479  [ 5120/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.141081  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.169484  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.113561  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.134486  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.143212  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.131322  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.107292  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.145813  [ 6144/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.113650  [ 6272/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.135935  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.114826  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.143539  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.119433  [ 6784/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.122040  [ 6912/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.129901  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.104806  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.125230  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.142775  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.129610  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.131659  [ 7680/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.152315  [ 7808/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.129623  [ 7936/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.122329  [ 8064/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.124513  [ 8192/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.106749  [ 8320/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.112249  [ 8448/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.139957  [ 8576/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.179237  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.121034  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.098906  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.129472  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.096442  [ 9216/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.119094  [ 9344/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.143925  [ 9472/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.103307  [ 9600/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.116584  [ 9728/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.116228  [ 9856/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.131462  [ 9984/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.112036  [10112/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.139025  [10240/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.108428  [10368/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.155180  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.135141  [10624/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.114785  [10752/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.124782  [10880/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.109052  [11008/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.159262  [11136/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.141523  [11264/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.107452  [11392/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.144349  [11520/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.140401  [11648/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.123751  [11776/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.132009  [11904/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.109795  [12032/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.168952  [12160/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.132988  [12288/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.127884  [12416/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.148005  [12544/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.109067  [12672/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.158999  [12800/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.113808  [12928/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.091711  [13056/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.138068  [13184/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.108629  [13312/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.105467  [13440/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.125233  [13568/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.128647  [13696/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.142498  [13824/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.113421  [13952/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.120031  [14080/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.123283  [14208/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.125616  [14336/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.103725  [14464/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.126271  [14592/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.145357  [14720/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.140381  [14848/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.148719  [14976/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.115973  [15104/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.117101  [15232/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.099240  [15360/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.125595  [15488/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.115461  [15616/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.123114  [15744/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.119111  [15872/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.114060  [16000/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.107653  [16128/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.151406  [16256/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.141402  [16384/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.117543  [16512/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.126884  [16640/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.135094  [16768/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.108448  [16896/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.110245  [17024/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.120682  [17152/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.100950  [17280/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.134823  [17408/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.146753  [17536/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.151681  [17664/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.126841  [17792/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.131457  [17920/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.099320  [18048/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.134232  [18176/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.144548  [18304/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.140356  [18432/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.108015  [18560/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.116562  [18688/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.105685  [18816/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.102667  [18944/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.115381  [19072/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.105418  [19200/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.126483  [19328/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.107248  [19456/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.126438  [19584/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.151572  [19712/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.116784  [19840/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.116939  [19968/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.126402  [20096/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.109497  [20224/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.129648  [20352/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.117655  [20480/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.115244  [20608/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.112004  [20736/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.111157  [20864/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.099139  [20992/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.129789  [21120/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.131012  [21248/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.131856  [21376/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.163520  [21504/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.125261  [21632/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.108464  [21760/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.108581  [21888/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.078611  [22016/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.121967  [22144/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.126070  [22272/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.128368  [22400/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.096561  [22528/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.115507  [22656/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.122155  [22784/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.110513  [22912/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.100221  [23040/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.136212  [23168/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.126761  [23296/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.110471  [23424/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.148485  [23552/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.128029  [23680/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.116628  [23808/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.135596  [23936/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.107224  [24064/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.131007  [24192/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.099302  [24320/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.129039  [24448/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.131201  [24576/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.116311  [24704/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.153669  [24832/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.086235  [24872/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.086235  [24872/24872]: : 195it [00:14, 13.53it/s]
Epoch 2, time=272.48s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.134623  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.119232  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.136745  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.114870  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.134568  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.115862  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.155697  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.136399  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.102779  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.118638  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.137590  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.127191  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.117703  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.114808  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.111397  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.117870  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.137275  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.128198  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.123082  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.151196  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.106674  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.127233  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.098807  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.142984  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.127330  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.099469  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.106055  [ 3456/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.136757  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.100580  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.116262  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.116563  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.144449  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.111656  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.136463  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.122267  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.112985  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.122177  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.117762  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.119056  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.100033  [ 5120/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.130728  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.154210  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.108162  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.128323  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.131798  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.118803  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.099345  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.129726  [ 6144/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.110366  [ 6272/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.129185  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.104669  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.139017  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.116219  [ 6784/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.116885  [ 6912/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.128986  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.105740  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.122081  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.138013  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.115484  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.133302  [ 7680/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.146749  [ 7808/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.129267  [ 7936/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.119135  [ 8064/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.117060  [ 8192/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.102598  [ 8320/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.107959  [ 8448/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.134262  [ 8576/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.177738  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.113840  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.093147  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.121403  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.097801  [ 9216/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.113830  [ 9344/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.145325  [ 9472/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.102440  [ 9600/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.115192  [ 9728/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.111840  [ 9856/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.126908  [ 9984/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.111235  [10112/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.134948  [10240/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.099732  [10368/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.144566  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.128858  [10624/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.107292  [10752/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.116218  [10880/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.106663  [11008/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.160715  [11136/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.133773  [11264/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.101235  [11392/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.134379  [11520/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.132942  [11648/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.115131  [11776/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.125841  [11904/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.106869  [12032/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.160978  [12160/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.125563  [12288/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.124402  [12416/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.152891  [12544/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.104275  [12672/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.161321  [12800/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.111567  [12928/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.090528  [13056/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.134615  [13184/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.099569  [13312/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.120183  [13440/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.112915  [13568/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.129986  [13696/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.139516  [13824/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.112888  [13952/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.119465  [14080/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.122788  [14208/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.125716  [14336/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.105962  [14464/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.130771  [14592/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.144068  [14720/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.147262  [14848/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.142891  [14976/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.119377  [15104/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.115934  [15232/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.099897  [15360/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.119468  [15488/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.119953  [15616/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.118484  [15744/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.120725  [15872/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.113261  [16000/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.107098  [16128/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.153171  [16256/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.142657  [16384/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.117940  [16512/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.123757  [16640/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.129992  [16768/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.110345  [16896/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.106863  [17024/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.111898  [17152/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.099189  [17280/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.134159  [17408/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.141361  [17536/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.145049  [17664/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.121648  [17792/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.118776  [17920/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.094608  [18048/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.132361  [18176/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.137304  [18304/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.137545  [18432/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.106975  [18560/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.113879  [18688/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.107258  [18816/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.093988  [18944/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.113488  [19072/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.104167  [19200/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.128724  [19328/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.103781  [19456/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.122168  [19584/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.161501  [19712/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.113619  [19840/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.119271  [19968/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.122325  [20096/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.110657  [20224/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.121937  [20352/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.120951  [20480/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.113933  [20608/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.105454  [20736/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.113970  [20864/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.096267  [20992/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.125225  [21120/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.120753  [21248/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.123184  [21376/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.157893  [21504/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.122317  [21632/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.100133  [21760/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.108847  [21888/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.072674  [22016/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.119451  [22144/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.118693  [22272/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.125782  [22400/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.096010  [22528/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.109022  [22656/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.121198  [22784/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.108757  [22912/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.095744  [23040/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.125610  [23168/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.125804  [23296/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.109478  [23424/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.143524  [23552/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.122485  [23680/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.111418  [23808/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.130316  [23936/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.096271  [24064/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.128617  [24192/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.092377  [24320/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.127228  [24448/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.124644  [24576/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.124227  [24704/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.143582  [24832/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.082755  [24872/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.082755  [24872/24872]: : 195it [00:14, 13.41it/s]
Epoch 3, time=287.02s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.131183  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.115411  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.135472  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.111713  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.131303  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.112330  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.146282  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.135175  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.092074  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.124216  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.130108  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.124492  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.114300  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.120357  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.110993  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.114858  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.132353  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.121585  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.116004  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.140288  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.100123  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.121718  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.093496  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.139320  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.118435  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.091724  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.102656  [ 3456/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.133786  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.094763  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.113444  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.111846  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.138560  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.102418  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.132393  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.117541  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.109601  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.117767  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.109326  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.118783  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.092367  [ 5120/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.126557  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.146719  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.101276  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.120897  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.127130  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.112517  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.095301  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.126319  [ 6144/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.105550  [ 6272/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.121382  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.099205  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.132966  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.116213  [ 6784/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.116450  [ 6912/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.125870  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.100209  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.113273  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.136517  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.113304  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.123326  [ 7680/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.140467  [ 7808/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.125016  [ 7936/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.112649  [ 8064/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.109381  [ 8192/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.099857  [ 8320/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.104996  [ 8448/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.128408  [ 8576/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.164930  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.109327  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.092242  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.123328  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.094966  [ 9216/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.107485  [ 9344/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.146740  [ 9472/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.096786  [ 9600/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.111345  [ 9728/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.109797  [ 9856/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.126275  [ 9984/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.113937  [10112/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.132377  [10240/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.094756  [10368/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.138999  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.123384  [10624/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.108331  [10752/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.120514  [10880/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.109300  [11008/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.155128  [11136/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.134474  [11264/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.110377  [11392/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.133934  [11520/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.136011  [11648/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.114347  [11776/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.136989  [11904/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.111065  [12032/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.156486  [12160/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.123032  [12288/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.126556  [12416/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.140693  [12544/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.104426  [12672/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.151503  [12800/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.105915  [12928/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.085069  [13056/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.129436  [13184/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.093734  [13312/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.109588  [13440/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.105945  [13568/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.127582  [13696/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.138494  [13824/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.119249  [13952/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.114235  [14080/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.132059  [14208/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.122163  [14336/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.121669  [14464/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.127640  [14592/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.169965  [14720/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.148645  [14848/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.174580  [14976/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.113422  [15104/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.127839  [15232/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.107144  [15360/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.122315  [15488/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.112438  [15616/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.118583  [15744/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.120122  [15872/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.109015  [16000/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.111203  [16128/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.143609  [16256/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.134874  [16384/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.120774  [16512/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.117680  [16640/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.125879  [16768/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.106628  [16896/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.100233  [17024/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.111217  [17152/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.110894  [17280/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.136958  [17408/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.147019  [17536/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.146062  [17664/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.126774  [17792/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.114958  [17920/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.095776  [18048/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.131184  [18176/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.132970  [18304/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.140649  [18432/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.102592  [18560/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.116196  [18688/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.103257  [18816/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.101324  [18944/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.110814  [19072/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.102128  [19200/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.121497  [19328/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.102310  [19456/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.121376  [19584/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.156073  [19712/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.118237  [19840/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.117037  [19968/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.127026  [20096/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.104937  [20224/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.124237  [20352/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.111089  [20480/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.115952  [20608/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.108748  [20736/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.114660  [20864/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.097005  [20992/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.124526  [21120/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.119477  [21248/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.124000  [21376/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.155048  [21504/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.118285  [21632/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.102102  [21760/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.102854  [21888/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.070230  [22016/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.112320  [22144/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.114210  [22272/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.123069  [22400/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.096218  [22528/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.111220  [22656/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.134113  [22784/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.112398  [22912/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.094525  [23040/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.124890  [23168/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.125654  [23296/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.104904  [23424/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.143613  [23552/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.123411  [23680/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.105253  [23808/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.126238  [23936/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.095436  [24064/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.125939  [24192/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.087280  [24320/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.120026  [24448/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.115811  [24576/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.108638  [24704/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.148531  [24832/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.079670  [24872/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.079670  [24872/24872]: : 195it [00:14, 13.62it/s]
Epoch 4, time=301.34s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.131819  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.110132  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.136175  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.112239  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.134733  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.108992  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.146473  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.131443  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.099937  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.115626  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.133235  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.128370  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.113402  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.115748  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.107494  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.110735  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.134484  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.116377  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.117496  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.141698  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.100171  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.124069  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.093274  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.137288  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.114658  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.100126  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.111982  [ 3456/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.129430  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.097874  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.116814  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.110053  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.138603  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.110583  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.135824  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.116050  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.116418  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.131558  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.109786  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.121636  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.097637  [ 5120/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.124613  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.150021  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.095697  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.124753  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.129429  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.121563  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.092486  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.119901  [ 6144/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.106892  [ 6272/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.119599  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.102217  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.130353  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.111529  [ 6784/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.117489  [ 6912/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.122810  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.105058  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.107461  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.141280  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.107477  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.120831  [ 7680/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.145936  [ 7808/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.123732  [ 7936/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.110937  [ 8064/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.115023  [ 8192/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.100991  [ 8320/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.105866  [ 8448/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.125093  [ 8576/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.155260  [ 8704/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.107787  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.086298  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.107986  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.092380  [ 9216/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.104810  [ 9344/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.139197  [ 9472/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.098919  [ 9600/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.108215  [ 9728/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.108893  [ 9856/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.122436  [ 9984/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.106942  [10112/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.131091  [10240/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.088060  [10368/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.132457  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.118488  [10624/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.107550  [10752/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.115106  [10880/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.097515  [11008/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.151900  [11136/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.131238  [11264/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.107524  [11392/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.115849  [11520/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.132889  [11648/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.113571  [11776/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.125540  [11904/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.110698  [12032/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.154756  [12160/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.127722  [12288/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.115718  [12416/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.153086  [12544/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.104798  [12672/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.158382  [12800/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.105768  [12928/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.082447  [13056/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.133555  [13184/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.090361  [13312/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.107109  [13440/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.111904  [13568/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.129294  [13696/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.134384  [13824/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.107849  [13952/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.115574  [14080/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.120240  [14208/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.121741  [14336/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.104873  [14464/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.123284  [14592/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.136953  [14720/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.148041  [14848/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.149127  [14976/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.109158  [15104/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.110266  [15232/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.089882  [15360/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.107385  [15488/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.102670  [15616/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.106940  [15744/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.115440  [15872/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.099042  [16000/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.097060  [16128/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.141495  [16256/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.126009  [16384/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.114234  [16512/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.110865  [16640/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.123773  [16768/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.098922  [16896/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.099696  [17024/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.104422  [17152/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.101988  [17280/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.128203  [17408/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.142216  [17536/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.141351  [17664/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.119595  [17792/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.111129  [17920/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.088943  [18048/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.123242  [18176/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.124783  [18304/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.137231  [18432/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.091335  [18560/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.106250  [18688/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.092939  [18816/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.092253  [18944/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.101571  [19072/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.095096  [19200/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.117975  [19328/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.094658  [19456/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.110679  [19584/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.147018  [19712/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.105733  [19840/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.106206  [19968/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.112905  [20096/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.096843  [20224/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.113894  [20352/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.108496  [20480/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.104927  [20608/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.100431  [20736/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.104987  [20864/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.090293  [20992/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.114833  [21120/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.110074  [21248/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.119553  [21376/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.142086  [21504/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.114770  [21632/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.087989  [21760/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.096026  [21888/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.064134  [22016/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.109899  [22144/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.105860  [22272/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.116549  [22400/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.088718  [22528/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.102508  [22656/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.116250  [22784/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.103099  [22912/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.090342  [23040/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.118584  [23168/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.113713  [23296/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.098214  [23424/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.134687  [23552/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.116145  [23680/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.104334  [23808/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.122931  [23936/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.088084  [24064/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.124307  [24192/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.085473  [24320/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.123072  [24448/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.109664  [24576/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.119419  [24704/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.134288  [24832/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.093022  [24872/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.093022  [24872/24872]: : 195it [00:14, 13.65it/s]
Epoch 5, time=315.63s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.137590  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.119256  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.137168  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.114021  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.134218  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.123077  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.144491  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.148835  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.093064  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.129541  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.131895  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.125880  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.119533  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.113792  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.120089  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.105405  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.148101  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.108919  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.123825  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.138459  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.105741  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.127338  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.089622  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.137716  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.118430  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.091397  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.105139  [ 3456/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.125403  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.099156  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.110578  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.114320  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.131220  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.113018  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.130038  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.120542  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.103373  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.115529  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.109406  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.120603  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.087242  [ 5120/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.132065  [ 5248/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.147542  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.096116  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.118912  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.127992  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.122630  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.080941  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.131834  [ 6144/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.096346  [ 6272/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.115440  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.099619  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.127709  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.124440  [ 6784/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.113790  [ 6912/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.120603  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.092718  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.113159  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.132649  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.116827  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.108842  [ 7680/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.134836  [ 7808/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.124693  [ 7936/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.103361  [ 8064/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.143659  [ 8192/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.101109  [ 8320/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.110589  [ 8448/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.134380  [ 8576/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.168423  [ 8704/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.122220  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.086950  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.124604  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.099307  [ 9216/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.106536  [ 9344/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.138139  [ 9472/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.095416  [ 9600/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.109421  [ 9728/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.116491  [ 9856/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.134610  [ 9984/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.129069  [10112/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.135629  [10240/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.106665  [10368/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.139741  [10496/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.130227  [10624/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.106398  [10752/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.127370  [10880/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.116084  [11008/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.157410  [11136/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.151888  [11264/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.113793  [11392/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.161509  [11520/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.136447  [11648/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.150747  [11776/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.118696  [11904/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.129229  [12032/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.177383  [12160/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.128321  [12288/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.161420  [12416/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.146295  [12544/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.149435  [12672/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.166109  [12800/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.112277  [12928/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.131646  [13056/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.129287  [13184/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.119797  [13312/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.112212  [13440/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.134179  [13568/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.140154  [13696/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.141359  [13824/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.117427  [13952/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.113662  [14080/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.120130  [14208/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.124672  [14336/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.103514  [14464/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.125431  [14592/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.141355  [14720/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.137648  [14848/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.151379  [14976/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.109059  [15104/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.111422  [15232/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.094234  [15360/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.118125  [15488/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.101204  [15616/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.110946  [15744/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.105649  [15872/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.104658  [16000/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.093996  [16128/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.139824  [16256/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.140329  [16384/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.110740  [16512/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.128654  [16640/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.113116  [16768/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.112224  [16896/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.095240  [17024/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.106570  [17152/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.100255  [17280/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.139633  [17408/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.132981  [17536/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.132696  [17664/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.126255  [17792/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.122774  [17920/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.088238  [18048/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.126808  [18176/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.124274  [18304/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.133747  [18432/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.090333  [18560/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.108478  [18688/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.096918  [18816/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.089130  [18944/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.111404  [19072/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.093377  [19200/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.119005  [19328/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.097407  [19456/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.115518  [19584/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.151030  [19712/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.117003  [19840/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.109576  [19968/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.114894  [20096/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.099277  [20224/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.116169  [20352/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.108952  [20480/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.108094  [20608/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.104682  [20736/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.103601  [20864/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.089387  [20992/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.121680  [21120/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.112756  [21248/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.131191  [21376/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.146375  [21504/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.116694  [21632/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.114014  [21760/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.098410  [21888/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.073564  [22016/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.117909  [22144/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.112806  [22272/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.118973  [22400/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.091619  [22528/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.108238  [22656/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.120878  [22784/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.105933  [22912/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.093504  [23040/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.123811  [23168/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.114011  [23296/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.103896  [23424/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.135402  [23552/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.120973  [23680/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.110659  [23808/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.129478  [23936/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.095601  [24064/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.121487  [24192/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.101734  [24320/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.116658  [24448/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.121540  [24576/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.110265  [24704/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.149018  [24832/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.085656  [24872/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.085656  [24872/24872]: : 195it [00:14, 13.69it/s]
Epoch 6, time=329.87s

  0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.134137  [  128/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.110612  [  256/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.139018  [  384/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.115517  [  512/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.119020  [  640/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.113061  [  768/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.138482  [  896/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.128189  [ 1024/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.092866  [ 1152/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.113975  [ 1280/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.127168  [ 1408/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.114337  [ 1536/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.108250  [ 1664/24872]:   0%|          | 0/194 [00:00<?, ?it/s]
loss: 0.103087  [ 1792/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.098561  [ 1920/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.101804  [ 2048/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.124222  [ 2176/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.109715  [ 2304/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.113548  [ 2432/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.130200  [ 2560/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.092233  [ 2688/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.116961  [ 2816/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.087786  [ 2944/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.129353  [ 3072/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.114649  [ 3200/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.082553  [ 3328/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.104462  [ 3456/24872]:   0%|          | 0/194 [00:01<?, ?it/s]
loss: 0.125414  [ 3584/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.091200  [ 3712/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.117250  [ 3840/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.099045  [ 3968/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.135341  [ 4096/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.098832  [ 4224/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.139267  [ 4352/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.115358  [ 4480/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.108932  [ 4608/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.125217  [ 4736/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.114053  [ 4864/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.125561  [ 4992/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.093612  [ 5120/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.129126  [ 5248/24872]:   0%|          | 0/194 [00:02<?, ?it/s]
loss: 0.149738  [ 5376/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.093303  [ 5504/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.132159  [ 5632/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.124177  [ 5760/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.126998  [ 5888/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.084849  [ 6016/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.122916  [ 6144/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.095847  [ 6272/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.108029  [ 6400/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.095377  [ 6528/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.129020  [ 6656/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.109150  [ 6784/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.114417  [ 6912/24872]:   0%|          | 0/194 [00:03<?, ?it/s]
loss: 0.128099  [ 7040/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.093441  [ 7168/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.108748  [ 7296/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.125193  [ 7424/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.113423  [ 7552/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.099983  [ 7680/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.134876  [ 7808/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.119904  [ 7936/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.101767  [ 8064/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.122994  [ 8192/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.109261  [ 8320/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.102913  [ 8448/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.126033  [ 8576/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.140794  [ 8704/24872]:   0%|          | 0/194 [00:04<?, ?it/s]
loss: 0.114285  [ 8832/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.093641  [ 8960/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.107880  [ 9088/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.094826  [ 9216/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.100763  [ 9344/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.139321  [ 9472/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.093949  [ 9600/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.100279  [ 9728/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.109016  [ 9856/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.113281  [ 9984/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.113341  [10112/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.128662  [10240/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.085905  [10368/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.132086  [10496/24872]:   0%|          | 0/194 [00:05<?, ?it/s]
loss: 0.110434  [10624/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.103635  [10752/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.110090  [10880/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.099561  [11008/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.140769  [11136/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.133724  [11264/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.094479  [11392/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.120784  [11520/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.121219  [11648/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.098241  [11776/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.115778  [11904/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.099495  [12032/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.153667  [12160/24872]:   0%|          | 0/194 [00:06<?, ?it/s]
loss: 0.113109  [12288/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.113176  [12416/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.131324  [12544/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.100558  [12672/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.156365  [12800/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.099964  [12928/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.084029  [13056/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.116693  [13184/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.092485  [13312/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.094180  [13440/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.110004  [13568/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.119312  [13696/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.146285  [13824/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.115738  [13952/24872]:   0%|          | 0/194 [00:07<?, ?it/s]
loss: 0.102809  [14080/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.116945  [14208/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.117585  [14336/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.100475  [14464/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.125208  [14592/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.139306  [14720/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.138471  [14848/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.138715  [14976/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.103248  [15104/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.108418  [15232/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.092233  [15360/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.117818  [15488/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.100584  [15616/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.107023  [15744/24872]:   0%|          | 0/194 [00:08<?, ?it/s]
loss: 0.114250  [15872/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.093594  [16000/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.086161  [16128/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.136947  [16256/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.130386  [16384/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.111395  [16512/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.111068  [16640/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.104959  [16768/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.106656  [16896/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.090164  [17024/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.103537  [17152/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.091773  [17280/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.118975  [17408/24872]:   0%|          | 0/194 [00:09<?, ?it/s]
loss: 0.134241  [17536/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.141536  [17664/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.138072  [17792/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.100548  [17920/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.088636  [18048/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.128640  [18176/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.122943  [18304/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.123983  [18432/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.088342  [18560/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.112204  [18688/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.087098  [18816/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.090309  [18944/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.110186  [19072/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.088190  [19200/24872]:   0%|          | 0/194 [00:10<?, ?it/s]
loss: 0.115488  [19328/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.093798  [19456/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.117234  [19584/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.125436  [19712/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.110252  [19840/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.102482  [19968/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.110100  [20096/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.096619  [20224/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.113607  [20352/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.105015  [20480/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.101021  [20608/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.095846  [20736/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.101375  [20864/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.092782  [20992/24872]:   0%|          | 0/194 [00:11<?, ?it/s]
loss: 0.119663  [21120/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.106763  [21248/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.125835  [21376/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.133247  [21504/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.113288  [21632/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.091302  [21760/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.093961  [21888/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.066603  [22016/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.109814  [22144/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.099878  [22272/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.121629  [22400/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.086501  [22528/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.100002  [22656/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.113477  [22784/24872]:   0%|          | 0/194 [00:12<?, ?it/s]
loss: 0.102238  [22912/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.085228  [23040/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.115822  [23168/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.104329  [23296/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.092152  [23424/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.109461  [23552/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.117103  [23680/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.108090  [23808/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.121995  [23936/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.089504  [24064/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.123766  [24192/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.096302  [24320/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.106290  [24448/24872]:   0%|          | 0/194 [00:13<?, ?it/s]
loss: 0.101002  [24576/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.111388  [24704/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.118918  [24832/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.089259  [24872/24872]:   0%|          | 0/194 [00:14<?, ?it/s]
loss: 0.089259  [24872/24872]: : 195it [00:14, 13.73it/s]
-------------------------------
LR=1e-05, batch_size=256
-------------------------------
Epoch 1, time=344.07s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.108217  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.110070  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.096328  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.125296  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.090557  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.115912  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.092941  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.092029  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.109067  [ 2304/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.100311  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.097294  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.101872  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.096516  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.101936  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.088370  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.114843  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.109611  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.097164  [ 4608/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.093797  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.093911  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.129108  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.096821  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.101076  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.088607  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.091945  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.102855  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.101746  [ 6912/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.091180  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.094524  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.098454  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.112366  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.115194  [ 8192/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.092214  [ 8448/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.111684  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.089302  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.084426  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.107188  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.085098  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.099955  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.106329  [10240/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.095469  [10496/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.098792  [10752/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.094446  [11008/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.122694  [11264/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.092548  [11520/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.103076  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.095084  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.117496  [12288/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.112452  [12544/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.113610  [12800/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.080341  [13056/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.090865  [13312/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.091854  [13568/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.101819  [13824/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.092054  [14080/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.106429  [14336/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.096450  [14592/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.116510  [14848/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.107026  [15104/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.087298  [15360/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.091552  [15616/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.098521  [15872/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.082908  [16128/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.121091  [16384/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.099262  [16640/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.097785  [16896/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.088848  [17152/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.091694  [17408/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.130108  [17664/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.095430  [17920/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.093790  [18176/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.113521  [18432/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.083267  [18688/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.083952  [18944/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.088877  [19200/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.094408  [19456/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.105448  [19712/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.098053  [19968/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.094364  [20224/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.102520  [20480/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.089437  [20736/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.089354  [20992/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.101166  [21248/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.111342  [21504/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.092791  [21760/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.071775  [22016/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.096323  [22272/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.097537  [22528/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.093437  [22784/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.086087  [23040/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.099953  [23296/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.089185  [23552/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.102087  [23808/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.095477  [24064/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.100683  [24320/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.095420  [24576/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.104947  [24832/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.080749  [24872/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.080749  [24872/24872]: : 98it [00:10,  9.13it/s]
Epoch 2, time=354.81s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.099365  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.102729  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.091872  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.119069  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.085731  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.109031  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.089242  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.090156  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.105308  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.096462  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.095333  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.098911  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.091997  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.099408  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.085358  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.108740  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.105579  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.094042  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.091163  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.089644  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.124077  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.093196  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.099393  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.086619  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.088522  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.100332  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.098775  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.088652  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.091481  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.094267  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.108011  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.109504  [ 8192/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.090827  [ 8448/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.108515  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.087284  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.082089  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.105112  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.084911  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.099163  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.103898  [10240/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.093089  [10496/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.097299  [10752/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.091984  [11008/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.119832  [11264/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.089681  [11520/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.101378  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.094047  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.115870  [12288/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.109771  [12544/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.111430  [12800/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.078878  [13056/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.087858  [13312/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.090621  [13568/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.100615  [13824/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.090473  [14080/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.104374  [14336/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.095820  [14592/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.114463  [14848/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.104053  [15104/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.086163  [15360/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.090467  [15616/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.095957  [15872/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.081628  [16128/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.119585  [16384/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.099024  [16640/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.097244  [16896/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.086191  [17152/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.090041  [17408/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.128586  [17664/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.094702  [17920/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.092230  [18176/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.111931  [18432/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.082786  [18688/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.083355  [18944/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.087008  [19200/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.092766  [19456/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.104114  [19712/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.097143  [19968/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.092933  [20224/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.101929  [20480/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.088154  [20736/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.088293  [20992/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.099140  [21248/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.108494  [21504/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.092092  [21760/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.070918  [22016/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.093733  [22272/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.097884  [22528/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.092840  [22784/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.084954  [23040/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.099314  [23296/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.086647  [23552/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.100788  [23808/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.094540  [24064/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.099071  [24320/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.094659  [24576/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.103678  [24832/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.076256  [24872/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.076256  [24872/24872]: : 98it [00:10,  9.24it/s]
Epoch 3, time=365.42s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.097903  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.100975  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.091634  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.118348  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.085420  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.108674  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.088207  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.089395  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.103751  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.095408  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.094225  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.098036  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.090465  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.098950  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.084106  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.107932  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.104078  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.092926  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.089826  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.087559  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.119803  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.092072  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.098110  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.086777  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.087275  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.099715  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.097095  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.087362  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.090250  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.092651  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.106994  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.105575  [ 8192/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.090338  [ 8448/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.106818  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.086032  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.080827  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.104259  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.084417  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.099634  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.103203  [10240/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.091669  [10496/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.096017  [10752/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.090259  [11008/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.118311  [11264/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.088468  [11520/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.100874  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.093695  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.114312  [12288/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.108672  [12544/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.110520  [12800/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.078232  [13056/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.086505  [13312/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.089885  [13568/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.098923  [13824/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.089586  [14080/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.103009  [14336/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.095002  [14592/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.113360  [14848/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.103081  [15104/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.085742  [15360/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.089554  [15616/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.094092  [15872/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.080923  [16128/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.118585  [16384/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.098411  [16640/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.096008  [16896/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.084820  [17152/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.089128  [17408/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.127455  [17664/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.093837  [17920/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.091139  [18176/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.111029  [18432/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.082238  [18688/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.082443  [18944/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.086037  [19200/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.091983  [19456/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.103495  [19712/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.096391  [19968/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.091825  [20224/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.101543  [20480/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.087350  [20736/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.087586  [20992/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.098138  [21248/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.107019  [21504/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.091729  [21760/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.070401  [22016/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.092185  [22272/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.097738  [22528/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.092322  [22784/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.084367  [23040/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.098724  [23296/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.085788  [23552/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.099621  [23808/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.093984  [24064/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.098130  [24320/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.094082  [24576/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.102679  [24832/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.073965  [24872/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.073965  [24872/24872]: : 98it [00:10,  9.24it/s]
Epoch 4, time=376.03s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.096944  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.100035  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.091391  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.117738  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.085046  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.107875  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.087481  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.088621  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.102622  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.094618  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.093594  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.097431  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.089465  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.098550  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.083253  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.107129  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.103200  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.092341  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.088627  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.086424  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.117105  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.091068  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.097228  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.086750  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.086498  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.099034  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.096116  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.086519  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.089315  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.091577  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.106267  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.103288  [ 8192/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.089913  [ 8448/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.105429  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.085231  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.080073  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.103210  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.083904  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.098909  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.102616  [10240/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.090466  [10496/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.094952  [10752/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.089243  [11008/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.116736  [11264/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.087621  [11520/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.100220  [11776/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.093355  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.113054  [12288/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.107750  [12544/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.109552  [12800/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.077617  [13056/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.085676  [13312/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.089203  [13568/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.097441  [13824/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.088834  [14080/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.101996  [14336/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.094255  [14592/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.112549  [14848/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.102225  [15104/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.085369  [15360/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.088696  [15616/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.092888  [15872/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.080331  [16128/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.117677  [16384/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.097649  [16640/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.094941  [16896/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.083989  [17152/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.088446  [17408/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.126576  [17664/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.093110  [17920/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.090247  [18176/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.110163  [18432/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.081742  [18688/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.081740  [18944/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.085282  [19200/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.091344  [19456/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.103042  [19712/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.095586  [19968/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.090978  [20224/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.101211  [20480/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.086608  [20736/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.086958  [20992/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.097337  [21248/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.105897  [21504/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.091322  [21760/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.070003  [22016/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.091042  [22272/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.097342  [22528/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.091823  [22784/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.083667  [23040/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.097962  [23296/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.085032  [23552/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.098688  [23808/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.093460  [24064/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.097352  [24320/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.093560  [24576/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.101821  [24832/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.071918  [24872/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.071918  [24872/24872]: : 98it [00:10,  9.27it/s]
Epoch 5, time=386.60s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.096075  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.099436  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.091104  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.117138  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.084944  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.107370  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.086825  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.087701  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.101554  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.093927  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.093098  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.096849  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.088273  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.098090  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.082350  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.106082  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.102607  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.091970  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.087776  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.085331  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.114442  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.090099  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.096319  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.087087  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.085837  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.098321  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.095241  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.085733  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.088486  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.090431  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.105673  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.102312  [ 8192/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.089527  [ 8448/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.104702  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.084354  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.079498  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.102426  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.083563  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.099138  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.102357  [10240/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.089408  [10496/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.093885  [10752/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.088347  [11008/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.115643  [11264/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.086998  [11520/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.099831  [11776/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.092865  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.111811  [12288/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.106983  [12544/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.108721  [12800/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.077117  [13056/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.084928  [13312/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.088745  [13568/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.096323  [13824/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.088213  [14080/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.100953  [14336/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.093628  [14592/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.111750  [14848/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.101891  [15104/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.085024  [15360/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.088056  [15616/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.091668  [15872/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.079752  [16128/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.116878  [16384/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.096961  [16640/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.093744  [16896/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.083289  [17152/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.087681  [17408/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.125328  [17664/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.092389  [17920/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.089361  [18176/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.109311  [18432/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.081184  [18688/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.080959  [18944/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.084505  [19200/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.090808  [19456/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.102603  [19712/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.094908  [19968/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.090159  [20224/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.100965  [20480/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.085992  [20736/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.086364  [20992/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.096794  [21248/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.105008  [21504/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.090827  [21760/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.069599  [22016/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.090073  [22272/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.096966  [22528/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.091365  [22784/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.083004  [23040/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.097206  [23296/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.084298  [23552/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.097826  [23808/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.092910  [24064/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.096586  [24320/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.093064  [24576/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.101091  [24832/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.070741  [24872/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.070741  [24872/24872]: : 98it [00:10,  9.29it/s]
Epoch 6, time=397.15s

  0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.095327  [  256/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.098963  [  512/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.090715  [  768/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.116529  [ 1024/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.084623  [ 1280/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.106807  [ 1536/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.086247  [ 1792/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.086931  [ 2048/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.100634  [ 2304/24872]:   0%|          | 0/97 [00:00<?, ?it/s]
loss: 0.093270  [ 2560/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.092625  [ 2816/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.096370  [ 3072/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.087511  [ 3328/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.097617  [ 3584/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.081669  [ 3840/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.105266  [ 4096/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.102177  [ 4352/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.091567  [ 4608/24872]:   0%|          | 0/97 [00:01<?, ?it/s]
loss: 0.087249  [ 4864/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.084559  [ 5120/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.112894  [ 5376/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.089299  [ 5632/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.095508  [ 5888/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.086823  [ 6144/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.085252  [ 6400/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.097734  [ 6656/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.094712  [ 6912/24872]:   0%|          | 0/97 [00:02<?, ?it/s]
loss: 0.085097  [ 7168/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.087822  [ 7424/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.089554  [ 7680/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.105078  [ 7936/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.101668  [ 8192/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.089224  [ 8448/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.103895  [ 8704/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.083969  [ 8960/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.078987  [ 9216/24872]:   0%|          | 0/97 [00:03<?, ?it/s]
loss: 0.101514  [ 9472/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.083216  [ 9728/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.098878  [ 9984/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.102005  [10240/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.088577  [10496/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.092822  [10752/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.087657  [11008/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.114440  [11264/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.086451  [11520/24872]:   0%|          | 0/97 [00:04<?, ?it/s]
loss: 0.099225  [11776/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.092356  [12032/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.110790  [12288/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.106115  [12544/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.107864  [12800/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.076630  [13056/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.084215  [13312/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.088278  [13568/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.095494  [13824/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.087687  [14080/24872]:   0%|          | 0/97 [00:05<?, ?it/s]
loss: 0.100112  [14336/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.093042  [14592/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.111093  [14848/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.101323  [15104/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.084599  [15360/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.087428  [15616/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.090848  [15872/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.079151  [16128/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.116214  [16384/24872]:   0%|          | 0/97 [00:06<?, ?it/s]
loss: 0.096346  [16640/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.092896  [16896/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.082781  [17152/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.087126  [17408/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.124403  [17664/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.091715  [17920/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.088609  [18176/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.108369  [18432/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.080703  [18688/24872]:   0%|          | 0/97 [00:07<?, ?it/s]
loss: 0.080300  [18944/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.083706  [19200/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.090303  [19456/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.102226  [19712/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.094262  [19968/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.089361  [20224/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.100687  [20480/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.085418  [20736/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.085752  [20992/24872]:   0%|          | 0/97 [00:08<?, ?it/s]
loss: 0.096263  [21248/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.104317  [21504/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.090328  [21760/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.069265  [22016/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.089251  [22272/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.096639  [22528/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.090888  [22784/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.082190  [23040/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.096483  [23296/24872]:   0%|          | 0/97 [00:09<?, ?it/s]
loss: 0.083633  [23552/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.097105  [23808/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.092342  [24064/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.095951  [24320/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.092618  [24576/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.100383  [24832/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.069638  [24872/24872]:   0%|          | 0/97 [00:10<?, ?it/s]
loss: 0.069638  [24872/24872]: : 98it [00:10,  9.25it/s]
Done!

test the network#

Do some qualitative tests: Let the trained network predict some particle geometries and compare their Mie spectra with the traget spectrum.

# pick a few of the training samples for testing.
# Note: Ideally tests should be done on separate samples!
sca_test = q_sca_target_test
pred = model(sca_test)

# evaluate Mie
r_c_test, r_s_test, eps_c_test, eps_s_test = nn_pred_to_mie_geometry(pred)
res_mie = pmd.multishell.cross_sections(
    k0,
    r_c=r_c_test,
    eps_c=eps_c_test,
    r_s=r_s_test,
    eps_s=eps_s_test,
    eps_env=eps_env,
    n_max=n_max,
)

# plot
i_plot = np.random.randint(len(sca_test), size=4)
plt.figure(figsize=(12, 10))
for i_n, i in enumerate(i_plot):
    plt.subplot(2, 2, i_n + 1)
    plt.plot(
        wl0.detach().cpu().numpy(),
        sca_test[i].detach().cpu().numpy(),
        label="reference",
    )
    plt.plot(
        wl0.detach().cpu().numpy(),
        res_mie["q_sca"][i].detach().cpu().numpy(),
        label="predicted particle",
    )
    plt.legend()
    plt.xlabel("wavelength (nm)")
    plt.ylabel("scat. efficiency")
plt.show()
ex 09 tandem

Total running time of the script: (6 minutes 55.181 seconds)

Estimated memory usage: 873 MB

Gallery generated by Sphinx-Gallery