Note
Go to the end to download the full example code.
Mie-informed tandem neural network#
Here, we demonstrate how to train a design generator network capable to suggest core-shell particles with specific spectral response using PyMieDiff as differentiable forward-evaluator. The training pipeline follows the “Tandem” model:
target spectrum –> generator NN –> design –> Mie –> real spectrum
training loss is: MSE(target spec., real spec.)
author: O. Jackson, P. Wiecha, 06/2025
imports#
import time
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
from torch import nn
import pymiediff as pmd
setup optimiation target#
We setup the main configuration here: torch device, parameter limits and wavelengths
# torch compute device
device = "cpu"
# general config
N_samples = 25000
n_max = 4 # maximum Mie order fixed for performance
eps_env = torch.tensor(1.0, device=device)
lim_r = torch.as_tensor([40, 100], device=device)
lim_n_re = torch.as_tensor([1.5, 4.0], device=device)
lim_n_im = torch.as_tensor([0.0, 0.1], device=device)
wl0 = torch.linspace(400, 800, 40, device=device)
k0 = 2 * torch.pi / wl0
generate reference spectra#
we generate a large number of reference Mie spectra for existing particles, that will be used as design targets during training.
Note: this step could also be done without any physics knowledge, for example with artificial spectra (e.g. Lorentzians), or a scattering maximization loss.
# datagen: generate existing spectra (won't use the geometries for training)
r_c = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
d_s = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
r_s = r_c + d_s
n_re = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_re)[0] + lim_n_re[0]
n_im = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_im)[0] + lim_n_im[0]
n = n_re + 1j * n_im
# low-level API: permittivity required as spectra (for vectorization)
eps_c = torch.ones_like(k0).unsqueeze(0) * n[:, 0].unsqueeze(1) ** 2
eps_s = torch.ones_like(k0).unsqueeze(0) * n[:, 1].unsqueeze(1) ** 2
all_particles = pmd.multishell.cross_sections(
k0,
r_c=r_c,
eps_c=eps_c,
r_s=r_s,
eps_s=eps_s,
eps_env=eps_env,
n_max=n_max,
)
N_test = 128 # keep a few samples for testing
q_sca_target = all_particles["q_sca"][N_test:].to(dtype=torch.float32)
q_sca_target_test = all_particles["q_sca"][:N_test].to(dtype=torch.float32)
plt.plot(q_sca_target[30].detach().cpu().numpy()) # plot some test sample

[<matplotlib.lines.Line2D object at 0x7f5d621a5340>]
Neural network classes / functions#
define the network model (simple MLP) and training loop
class FullyConnected(nn.Module):
def __init__(self, hidden_dim=1024):
super().__init__()
self.fc_in = nn.Linear(len(k0), hidden_dim)
self.relu1 = nn.ReLU()
self.fc_1 = nn.Linear(hidden_dim, hidden_dim)
self.relu2 = nn.ReLU()
self.fc_2 = nn.Linear(hidden_dim, hidden_dim)
self.relu3 = nn.ReLU()
self.fc_out = nn.Linear(hidden_dim, 6)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.fc_in(x)
x = self.relu1(x)
x = self.fc_1(x)
x = self.relu2(x)
x = self.fc_2(x)
x = self.relu3(x)
x = self.fc_out(x)
x = self.sigmoid(x)
return x
def nn_pred_to_mie_geometry(pred):
# implicit normalization: multiply by user-defined limits
r_c = lim_r.max() * (pred[:, 0])
r_s = lim_r.max() * (pred[:, 0] + pred[:, 1])
n_c = lim_n_re.max() * pred[:, 2] + lim_n_im.max() * (1j * pred[:, 3])
n_s = lim_n_re.max() * pred[:, 4] + lim_n_im.max() * (1j * pred[:, 5])
eps_c = torch.ones_like(k0).unsqueeze(0) * n_c.unsqueeze(1) ** 2
eps_s = torch.ones_like(k0).unsqueeze(0) * n_s.unsqueeze(1) ** 2
return r_c, r_s, eps_c, eps_s
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
# Set the model to training mode - important for batch normalization and dropout layers
# Unnecessary in this situation but added for best practices
model.train()
prog_bar = tqdm(enumerate(dataloader), total=size // dataloader.batch_size)
for i_batch, X in prog_bar:
# model prediction: generate core-shell particles
pred = model(X)
# evaluate Mie
r_c, r_s, eps_c, eps_s = nn_pred_to_mie_geometry(pred)
res_mie = pmd.multishell.cross_sections(
k0,
r_c=r_c,
eps_c=eps_c,
r_s=r_s,
eps_s=eps_s,
eps_env=eps_env,
n_max=n_max,
)
q_sca_mie = res_mie["q_sca"].to(dtype=torch.float32)
# calc. loss
loss = loss_fn(q_sca_mie, X)
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
# if i_batch % 100 == 0:
loss, current = loss.item(), i_batch * dataloader.batch_size + len(X)
prog_bar.set_description(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
training the Mie-informed network#
here we use some simple, manually optimized training schedule.
model = FullyConnected().to(device)
confs = [
dict(bs=32, lr=1e-4, n_ep=5),
dict(bs=64, lr=1e-4, n_ep=5),
dict(bs=128, lr=1e-4, n_ep=6),
dict(bs=256, lr=1e-5, n_ep=6),
]
t_start = time.time()
for conf in confs:
learning_rate = conf["lr"]
batch_size = conf["bs"]
epochs = conf["n_ep"]
print("-------------------------------")
print(f"LR={learning_rate}, batch_size={batch_size}")
print("-------------------------------")
loss_fn = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
train_dataloader = torch.utils.data.DataLoader(q_sca_target, batch_size=batch_size)
for t in range(epochs):
print(f"Epoch {t+1}, time={time.time()-t_start:.2f}s")
train_loop(train_dataloader, model, loss_fn, optimizer)
print("Done!")
-------------------------------
LR=0.0001, batch_size=32
-------------------------------
Epoch 1, time=0.00s
0%| | 0/777 [00:00<?, ?it/s]
loss: 6.164481 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.737760 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.875986 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.720785 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.874957 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.450446 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.243979 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.964936 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.352512 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.009158 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.603234 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.629543 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.831257 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.614799 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.220146 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.890595 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.974652 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.636398 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.938516 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.319782 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.623832 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.313473 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.091473 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.252733 [ 768/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.260256 [ 800/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.270293 [ 832/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.324409 [ 864/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.109571 [ 896/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.162570 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.819867 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.104579 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.353768 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.453144 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.992753 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.958361 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.169763 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.124153 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.091988 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.108627 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.054809 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.978839 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.858665 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.057194 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.104313 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.858314 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.017307 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.961133 [ 1504/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.002170 [ 1536/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.022681 [ 1568/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.068828 [ 1600/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.915891 [ 1632/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.022553 [ 1664/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.121984 [ 1696/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.817024 [ 1728/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.849087 [ 1760/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.732544 [ 1792/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.140341 [ 1824/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.233776 [ 1856/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.670891 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.960571 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.897371 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.909648 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.818682 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.795169 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.679846 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.670422 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.963210 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.920963 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.765692 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.568434 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.954701 [ 2272/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.133265 [ 2304/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.247346 [ 2336/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.753909 [ 2368/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.709689 [ 2400/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.883656 [ 2432/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.999551 [ 2464/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.742481 [ 2496/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 1.023800 [ 2528/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.774954 [ 2560/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.763109 [ 2592/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.916699 [ 2624/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.746391 [ 2656/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.915866 [ 2688/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 1.024970 [ 2720/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.610243 [ 2752/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.741972 [ 2784/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.794500 [ 2816/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.667142 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.648914 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.652748 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.900908 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.967347 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.755115 [ 3008/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 1.019181 [ 3040/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.874483 [ 3072/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.882794 [ 3104/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.752526 [ 3136/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.758126 [ 3168/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.665612 [ 3200/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.589667 [ 3232/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.696020 [ 3264/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.894405 [ 3296/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.757941 [ 3328/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.878593 [ 3360/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.797711 [ 3392/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.715678 [ 3424/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.736084 [ 3456/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.708245 [ 3488/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.708949 [ 3520/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.720824 [ 3552/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.984631 [ 3584/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.786663 [ 3616/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.931745 [ 3648/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.868698 [ 3680/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.770071 [ 3712/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.616093 [ 3744/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.638998 [ 3776/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.513895 [ 3808/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.795441 [ 3840/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.810363 [ 3872/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.933426 [ 3904/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.918894 [ 3936/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.634410 [ 3968/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.714248 [ 4000/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.619162 [ 4032/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.742969 [ 4064/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.628853 [ 4096/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.680418 [ 4128/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.737965 [ 4160/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.613960 [ 4192/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.697630 [ 4224/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.745663 [ 4256/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.880557 [ 4288/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.617042 [ 4320/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.718834 [ 4352/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.763029 [ 4384/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.609054 [ 4416/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.792580 [ 4448/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.542121 [ 4480/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.532433 [ 4512/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.588701 [ 4544/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.702374 [ 4576/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.691495 [ 4608/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.728311 [ 4640/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.504064 [ 4672/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.487640 [ 4704/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.949446 [ 4736/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.791738 [ 4768/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.409219 [ 4800/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.632877 [ 4832/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.585334 [ 4864/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.568657 [ 4896/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.671711 [ 4928/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.601464 [ 4960/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.545707 [ 4992/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.774876 [ 5024/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.691815 [ 5056/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.612153 [ 5088/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.639408 [ 5120/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.535402 [ 5152/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.698626 [ 5184/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.855511 [ 5216/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.629847 [ 5248/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.685521 [ 5280/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.747082 [ 5312/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.873829 [ 5344/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.538996 [ 5376/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.603878 [ 5408/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.576532 [ 5440/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.569921 [ 5472/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.601796 [ 5504/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.664271 [ 5536/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.591459 [ 5568/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.568649 [ 5600/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.470834 [ 5632/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.614080 [ 5664/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.726296 [ 5696/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.596752 [ 5728/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.555599 [ 5760/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.477164 [ 5792/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.511473 [ 5824/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.721067 [ 5856/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.460814 [ 5888/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.587306 [ 5920/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.559756 [ 5952/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.735799 [ 5984/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.604562 [ 6016/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.759143 [ 6048/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.567662 [ 6080/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.514022 [ 6112/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.627488 [ 6144/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.646896 [ 6176/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.553794 [ 6208/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.461629 [ 6240/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.697395 [ 6272/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.553990 [ 6304/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.471745 [ 6336/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.520045 [ 6368/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.790232 [ 6400/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.546048 [ 6432/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.674127 [ 6464/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.518294 [ 6496/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.405453 [ 6528/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.448474 [ 6560/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.683180 [ 6592/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.421975 [ 6624/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.660372 [ 6656/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.573604 [ 6688/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.593175 [ 6720/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.816774 [ 6752/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.633772 [ 6784/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.687386 [ 6816/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.610047 [ 6848/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.601501 [ 6880/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.497183 [ 6912/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.605369 [ 6944/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.594093 [ 6976/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.729000 [ 7008/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.499710 [ 7040/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.764414 [ 7072/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.507794 [ 7104/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.532414 [ 7136/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.574363 [ 7168/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.601733 [ 7200/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.600877 [ 7232/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.740827 [ 7264/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.628635 [ 7296/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.533615 [ 7328/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.681888 [ 7360/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.467761 [ 7392/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.509412 [ 7424/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.543921 [ 7456/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.519471 [ 7488/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.562687 [ 7520/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.476441 [ 7552/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.499534 [ 7584/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.592533 [ 7616/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.630596 [ 7648/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.511322 [ 7680/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.675612 [ 7712/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.470358 [ 7744/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.426941 [ 7776/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.531452 [ 7808/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.602817 [ 7840/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.591064 [ 7872/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.464712 [ 7904/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.610520 [ 7936/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.471653 [ 7968/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.381827 [ 8000/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.545269 [ 8032/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.566303 [ 8064/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.447763 [ 8096/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.456645 [ 8128/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.557786 [ 8160/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.667689 [ 8192/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.570170 [ 8224/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.547727 [ 8256/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.524546 [ 8288/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.476321 [ 8320/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.494603 [ 8352/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.500116 [ 8384/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.384330 [ 8416/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.437409 [ 8448/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.660500 [ 8480/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.483352 [ 8512/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.342019 [ 8544/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.485371 [ 8576/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.539697 [ 8608/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.645562 [ 8640/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.551717 [ 8672/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.499212 [ 8704/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.481490 [ 8736/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.558849 [ 8768/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.499785 [ 8800/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.441693 [ 8832/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.528619 [ 8864/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.484029 [ 8896/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.336259 [ 8928/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.485258 [ 8960/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.446639 [ 8992/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.488524 [ 9024/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.446786 [ 9056/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.442423 [ 9088/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.557848 [ 9120/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.432377 [ 9152/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.547403 [ 9184/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.600596 [ 9216/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.350248 [ 9248/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.393484 [ 9280/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.520538 [ 9312/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.298799 [ 9344/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.404730 [ 9376/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.512682 [ 9408/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.533546 [ 9440/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.573727 [ 9472/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.463179 [ 9504/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.565591 [ 9536/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.611485 [ 9568/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.507239 [ 9600/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.553235 [ 9632/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.614904 [ 9664/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.500576 [ 9696/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.403709 [ 9728/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.578954 [ 9760/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.422608 [ 9792/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.490583 [ 9824/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.454019 [ 9856/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.550727 [ 9888/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.433561 [ 9920/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.483665 [ 9952/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.551014 [ 9984/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.471864 [10016/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.480280 [10048/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.591849 [10080/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.494830 [10112/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.534624 [10144/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.423455 [10176/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.377640 [10208/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.442957 [10240/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.565998 [10272/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.494577 [10304/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.541149 [10336/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.482146 [10368/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.492007 [10400/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.492258 [10432/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.321000 [10464/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.448232 [10496/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.491537 [10528/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.364877 [10560/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.495876 [10592/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.481563 [10624/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.479789 [10656/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.443664 [10688/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.502584 [10720/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.385685 [10752/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.363244 [10784/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.344523 [10816/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.361157 [10848/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.307963 [10880/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.479893 [10912/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.564132 [10944/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.255729 [10976/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.493022 [11008/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.359864 [11040/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.469917 [11072/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.486194 [11104/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.566673 [11136/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.513065 [11168/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.544822 [11200/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.527525 [11232/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.381965 [11264/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.343938 [11296/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.401624 [11328/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.432339 [11360/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.520971 [11392/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.505763 [11424/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.271518 [11456/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.405863 [11488/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.321346 [11520/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.419883 [11552/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.466337 [11584/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.383390 [11616/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.416236 [11648/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.387445 [11680/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.471862 [11712/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.531733 [11744/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.351787 [11776/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.333254 [11808/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.305919 [11840/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.387773 [11872/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.353196 [11904/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.480133 [11936/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.401466 [11968/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.417345 [12000/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.425895 [12032/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.478947 [12064/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.399976 [12096/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.451708 [12128/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.364525 [12160/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.439444 [12192/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.382794 [12224/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.431667 [12256/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.316090 [12288/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.400298 [12320/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.462672 [12352/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.342367 [12384/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.330446 [12416/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.476799 [12448/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.450942 [12480/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.336902 [12512/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.364853 [12544/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.431173 [12576/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.550652 [12608/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.403326 [12640/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.353696 [12672/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.300439 [12704/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.329600 [12736/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.402558 [12768/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.579772 [12800/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.608851 [12832/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.773836 [12864/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.404577 [12896/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.387139 [12928/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.575440 [12960/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.559383 [12992/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.410424 [13024/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.546545 [13056/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.308444 [13088/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.371378 [13120/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.360736 [13152/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.369335 [13184/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.496179 [13216/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.308689 [13248/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.499237 [13280/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.374767 [13312/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.451602 [13344/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.439136 [13376/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.370242 [13408/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.392848 [13440/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.410021 [13472/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.420327 [13504/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.457675 [13536/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.355162 [13568/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.474337 [13600/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.264589 [13632/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.457975 [13664/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.394797 [13696/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.262375 [13728/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.446653 [13760/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.428418 [13792/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.275490 [13824/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.492444 [13856/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.319403 [13888/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.327027 [13920/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.199375 [13952/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.426176 [13984/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.452906 [14016/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.455700 [14048/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.421430 [14080/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.457690 [14112/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.336038 [14144/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.315600 [14176/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.431287 [14208/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.413768 [14240/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.426661 [14272/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.474721 [14304/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.526632 [14336/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.470570 [14368/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.421071 [14400/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.447360 [14432/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.596888 [14464/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.347469 [14496/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.516718 [14528/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.592666 [14560/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.386945 [14592/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.458444 [14624/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.661369 [14656/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.504070 [14688/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.435507 [14720/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.468812 [14752/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.559835 [14784/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.558869 [14816/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.604500 [14848/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.476561 [14880/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.486849 [14912/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.374346 [14944/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.424815 [14976/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.486685 [15008/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.532743 [15040/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.376641 [15072/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.489207 [15104/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.283214 [15136/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.396738 [15168/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.306378 [15200/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.622631 [15232/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.449364 [15264/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.425564 [15296/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.467183 [15328/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.367500 [15360/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.479868 [15392/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.480306 [15424/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.419648 [15456/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.414952 [15488/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.314269 [15520/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.337400 [15552/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.349557 [15584/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.436922 [15616/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.452547 [15648/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.494932 [15680/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.457965 [15712/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.471573 [15744/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.378888 [15776/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.369957 [15808/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.453205 [15840/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.251479 [15872/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.290858 [15904/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.426465 [15936/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.525461 [15968/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.442222 [16000/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.505852 [16032/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.358162 [16064/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.301516 [16096/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.375555 [16128/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.524034 [16160/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.488457 [16192/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.428641 [16224/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.439230 [16256/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.438243 [16288/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.463229 [16320/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.503034 [16352/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.382813 [16384/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.419839 [16416/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.333028 [16448/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.446058 [16480/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.391882 [16512/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.317515 [16544/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.222456 [16576/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.323043 [16608/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.538504 [16640/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.354220 [16672/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.331572 [16704/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.501393 [16736/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.443438 [16768/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.346256 [16800/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.449919 [16832/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.355996 [16864/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.404949 [16896/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.389284 [16928/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.335724 [16960/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.545809 [16992/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.445820 [17024/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.407232 [17056/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.383329 [17088/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.437687 [17120/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.428839 [17152/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.401619 [17184/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.465694 [17216/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.408829 [17248/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.413443 [17280/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.416003 [17312/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.503340 [17344/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.566891 [17376/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.173165 [17408/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.434798 [17440/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.293052 [17472/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.394416 [17504/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.452694 [17536/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.366316 [17568/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.450925 [17600/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.445385 [17632/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.285724 [17664/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.430205 [17696/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.450709 [17728/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.488189 [17760/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.302865 [17792/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.324937 [17824/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.504436 [17856/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.444063 [17888/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.330172 [17920/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.344504 [17952/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.546266 [17984/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.327963 [18016/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.324779 [18048/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.358217 [18080/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.416842 [18112/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.294857 [18144/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.332755 [18176/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.457839 [18208/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.389204 [18240/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.394812 [18272/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.467936 [18304/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.473143 [18336/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.354342 [18368/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.318570 [18400/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.568328 [18432/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.418184 [18464/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.370156 [18496/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.444739 [18528/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.523204 [18560/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.311710 [18592/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.305019 [18624/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.335084 [18656/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.436217 [18688/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.396871 [18720/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.400851 [18752/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.479577 [18784/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.386131 [18816/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.454749 [18848/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.341027 [18880/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.304143 [18912/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.397492 [18944/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.550720 [18976/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.405478 [19008/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.310278 [19040/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.367927 [19072/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.503123 [19104/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.413198 [19136/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.334535 [19168/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.310399 [19200/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.382061 [19232/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.228281 [19264/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.483055 [19296/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.388800 [19328/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.372158 [19360/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.359320 [19392/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.240768 [19424/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.342772 [19456/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.386745 [19488/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.570611 [19520/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.396872 [19552/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.307266 [19584/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.395296 [19616/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.294635 [19648/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.197110 [19680/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.291161 [19712/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.292942 [19744/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.306654 [19776/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.407621 [19808/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.293375 [19840/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.349851 [19872/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.291374 [19904/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.392952 [19936/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.365261 [19968/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.401104 [20000/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.425249 [20032/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.355111 [20064/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.269007 [20096/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.459430 [20128/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.341830 [20160/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.299207 [20192/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.333568 [20224/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.322445 [20256/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.316737 [20288/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.343932 [20320/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.471067 [20352/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.318223 [20384/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.272798 [20416/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.359059 [20448/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.425376 [20480/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.340415 [20512/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.366516 [20544/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.298011 [20576/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.278228 [20608/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.280206 [20640/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.320808 [20672/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.295453 [20704/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.452652 [20736/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.209483 [20768/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.397573 [20800/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.416354 [20832/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.342583 [20864/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.415048 [20896/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.438201 [20928/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.368795 [20960/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.380330 [20992/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.423544 [21024/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.404173 [21056/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.272423 [21088/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.408709 [21120/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.416119 [21152/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.416682 [21184/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.362662 [21216/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.361368 [21248/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.516891 [21280/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.376424 [21312/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.318392 [21344/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.323624 [21376/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.381395 [21408/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.376439 [21440/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.347040 [21472/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.291771 [21504/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.268036 [21536/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.277370 [21568/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.321673 [21600/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.372983 [21632/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.432959 [21664/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.218108 [21696/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.291943 [21728/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.324434 [21760/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.231176 [21792/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.340811 [21824/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.269099 [21856/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.292310 [21888/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.411023 [21920/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.217042 [21952/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.434015 [21984/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.386317 [22016/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.330260 [22048/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.297162 [22080/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.336870 [22112/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.358483 [22144/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.302095 [22176/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.300444 [22208/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.392768 [22240/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.240118 [22272/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.282357 [22304/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.350900 [22336/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.362419 [22368/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.384580 [22400/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.412200 [22432/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.293520 [22464/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.347582 [22496/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.289150 [22528/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.411087 [22560/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.370978 [22592/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.374850 [22624/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.309391 [22656/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.389198 [22688/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.399159 [22720/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.339568 [22752/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.343452 [22784/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.391304 [22816/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.374068 [22848/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.343143 [22880/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.396483 [22912/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.397452 [22944/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.506754 [22976/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.335878 [23008/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.397195 [23040/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.268821 [23072/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.326466 [23104/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.292304 [23136/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.343123 [23168/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.407657 [23200/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.402419 [23232/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.401593 [23264/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.406507 [23296/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.402965 [23328/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.414556 [23360/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.332363 [23392/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.320690 [23424/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.257641 [23456/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.366760 [23488/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.328922 [23520/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.345113 [23552/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.256553 [23584/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.339989 [23616/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.314803 [23648/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.414970 [23680/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.315931 [23712/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.455096 [23744/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.222365 [23776/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.344899 [23808/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.376577 [23840/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.457034 [23872/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.376500 [23904/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.361700 [23936/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.401792 [23968/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.263831 [24000/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.409723 [24032/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.353604 [24064/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.403589 [24096/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.379990 [24128/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.310417 [24160/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.249213 [24192/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.327041 [24224/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.350532 [24256/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.357905 [24288/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.289430 [24320/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.269042 [24352/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.319808 [24384/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.311022 [24416/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.375248 [24448/24872]: 0%| | 0/777 [00:30<?, ?it/s]
loss: 0.375248 [24448/24872]: 98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.350070 [24480/24872]: 98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.268360 [24512/24872]: 98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.315079 [24544/24872]: 98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.262206 [24576/24872]: 98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.436917 [24608/24872]: 98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.294415 [24640/24872]: 98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.242801 [24672/24872]: 98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.342124 [24704/24872]: 98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.479245 [24736/24872]: 98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.214594 [24768/24872]: 98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.261813 [24800/24872]: 98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.429851 [24832/24872]: 98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.491314 [24864/24872]: 98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.479038 [24872/24872]: 98%|█████████▊| 764/777 [00:30<00:00, 25.46it/s]
loss: 0.479038 [24872/24872]: : 778it [00:30, 25.47it/s]
Epoch 2, time=30.54s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.333662 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.349014 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.360450 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.276553 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.290454 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.302002 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.314912 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.277260 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.400150 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.258316 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.418012 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.295628 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.358790 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.321909 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.337019 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.173866 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.311167 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.272052 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.329776 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.290595 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.267222 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.382368 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.192103 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.386307 [ 768/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.294742 [ 800/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.310129 [ 832/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.360554 [ 864/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.353763 [ 896/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.502205 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.292156 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.357291 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.473111 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.427209 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.288267 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.324332 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.331146 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.356051 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.314671 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.297612 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.217868 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.271724 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.286501 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.275884 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.373024 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.387251 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.358557 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.278860 [ 1504/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.424750 [ 1536/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.361930 [ 1568/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.347943 [ 1600/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.403776 [ 1632/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.311494 [ 1664/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.310218 [ 1696/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.381897 [ 1728/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.315516 [ 1760/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.267412 [ 1792/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.374829 [ 1824/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.370299 [ 1856/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.243289 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.324852 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.322733 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.339508 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.305992 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.291732 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.281408 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.225140 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.430692 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.319322 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.286176 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.224133 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.262513 [ 2272/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.583305 [ 2304/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.447943 [ 2336/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.334720 [ 2368/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.230506 [ 2400/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.306729 [ 2432/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.335475 [ 2464/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.365699 [ 2496/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.331867 [ 2528/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.305401 [ 2560/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.412135 [ 2592/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.316049 [ 2624/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.254667 [ 2656/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.358729 [ 2688/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.487864 [ 2720/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.215994 [ 2752/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.288173 [ 2784/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.288142 [ 2816/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.233857 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.277111 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.267570 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.273528 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.501639 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.328003 [ 3008/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.420068 [ 3040/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.326933 [ 3072/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.378846 [ 3104/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.314695 [ 3136/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.304697 [ 3168/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.219396 [ 3200/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.217817 [ 3232/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.258255 [ 3264/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.381601 [ 3296/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.308898 [ 3328/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.327224 [ 3360/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.368106 [ 3392/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.281741 [ 3424/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.322092 [ 3456/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.339191 [ 3488/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.312534 [ 3520/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.328836 [ 3552/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.391011 [ 3584/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.288623 [ 3616/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.443602 [ 3648/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.331256 [ 3680/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.303500 [ 3712/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.344091 [ 3744/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.277718 [ 3776/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.198588 [ 3808/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.358906 [ 3840/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.258335 [ 3872/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.390056 [ 3904/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.321377 [ 3936/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.323549 [ 3968/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.354413 [ 4000/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.259258 [ 4032/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.381091 [ 4064/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.221828 [ 4096/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.279956 [ 4128/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.264845 [ 4160/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.227555 [ 4192/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.312054 [ 4224/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.267738 [ 4256/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.353125 [ 4288/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.338191 [ 4320/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.271698 [ 4352/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.357593 [ 4384/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.308508 [ 4416/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.330600 [ 4448/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.256457 [ 4480/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.282604 [ 4512/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.250066 [ 4544/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.297412 [ 4576/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.276115 [ 4608/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.302953 [ 4640/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.255195 [ 4672/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.189585 [ 4704/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.373437 [ 4736/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.455258 [ 4768/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.185465 [ 4800/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.324415 [ 4832/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.296348 [ 4864/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.203471 [ 4896/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.320163 [ 4928/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.327058 [ 4960/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.259402 [ 4992/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.312399 [ 5024/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.300966 [ 5056/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.177044 [ 5088/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.243214 [ 5120/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.189319 [ 5152/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.380807 [ 5184/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.486743 [ 5216/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.276429 [ 5248/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.276338 [ 5280/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.421337 [ 5312/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.486388 [ 5344/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.310246 [ 5376/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.297424 [ 5408/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.329259 [ 5440/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.301472 [ 5472/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.284946 [ 5504/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.368204 [ 5536/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.263718 [ 5568/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.218967 [ 5600/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.295736 [ 5632/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.342689 [ 5664/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.361587 [ 5696/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.399150 [ 5728/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.296118 [ 5760/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.259203 [ 5792/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.315267 [ 5824/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.296735 [ 5856/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.283577 [ 5888/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.331517 [ 5920/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.293614 [ 5952/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.363191 [ 5984/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.283115 [ 6016/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.435685 [ 6048/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.297982 [ 6080/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.284114 [ 6112/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.355839 [ 6144/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.354456 [ 6176/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.299152 [ 6208/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.256803 [ 6240/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.413729 [ 6272/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.320670 [ 6304/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.266943 [ 6336/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.408054 [ 6368/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.411520 [ 6400/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.270302 [ 6432/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.372657 [ 6464/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.285970 [ 6496/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.213246 [ 6528/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.236605 [ 6560/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.429324 [ 6592/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.273269 [ 6624/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.334261 [ 6656/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.295368 [ 6688/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.364055 [ 6720/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.520895 [ 6752/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.340066 [ 6784/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.330878 [ 6816/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.335517 [ 6848/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.234673 [ 6880/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.229231 [ 6912/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.377571 [ 6944/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.283670 [ 6976/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.366444 [ 7008/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.250581 [ 7040/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.337642 [ 7072/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.265040 [ 7104/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.285162 [ 7136/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.397851 [ 7168/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.321533 [ 7200/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.317888 [ 7232/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.487886 [ 7264/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.367594 [ 7296/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.307303 [ 7328/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.282627 [ 7360/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.309760 [ 7392/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.374023 [ 7424/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.292146 [ 7456/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.263324 [ 7488/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.289356 [ 7520/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.329887 [ 7552/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.301179 [ 7584/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.325360 [ 7616/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.326158 [ 7648/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.265991 [ 7680/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.367283 [ 7712/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.285715 [ 7744/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.248153 [ 7776/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.356849 [ 7808/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.381140 [ 7840/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.301175 [ 7872/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.300012 [ 7904/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.319923 [ 7936/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.251021 [ 7968/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.190480 [ 8000/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.262860 [ 8032/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.342131 [ 8064/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.260703 [ 8096/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.275862 [ 8128/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.297545 [ 8160/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.356133 [ 8192/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.323678 [ 8224/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.273888 [ 8256/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.264468 [ 8288/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.286691 [ 8320/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.201698 [ 8352/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.239893 [ 8384/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.212030 [ 8416/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.217940 [ 8448/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.332626 [ 8480/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.250826 [ 8512/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.224092 [ 8544/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.294648 [ 8576/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.334474 [ 8608/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.244143 [ 8640/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.272304 [ 8672/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.248776 [ 8704/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.236801 [ 8736/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.351312 [ 8768/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.282992 [ 8800/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.284401 [ 8832/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.315879 [ 8864/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.257987 [ 8896/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.218293 [ 8928/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.283785 [ 8960/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.238112 [ 8992/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.270765 [ 9024/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.201032 [ 9056/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.256824 [ 9088/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.291213 [ 9120/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.250036 [ 9152/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.294276 [ 9184/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.371360 [ 9216/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.216326 [ 9248/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.254083 [ 9280/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.288402 [ 9312/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.213711 [ 9344/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.288061 [ 9376/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.306592 [ 9408/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.219546 [ 9440/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.406717 [ 9472/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.225004 [ 9504/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.246351 [ 9536/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.352601 [ 9568/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.329529 [ 9600/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.309755 [ 9632/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.340247 [ 9664/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.342147 [ 9696/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.205080 [ 9728/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.371367 [ 9760/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.211815 [ 9792/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.337978 [ 9824/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.246320 [ 9856/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.337757 [ 9888/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.221522 [ 9920/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.215361 [ 9952/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.347924 [ 9984/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.283904 [10016/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.309413 [10048/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.398091 [10080/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.325905 [10112/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.330359 [10144/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.223254 [10176/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.275808 [10208/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.264588 [10240/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.305086 [10272/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.328821 [10304/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.392488 [10336/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.344126 [10368/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.325513 [10400/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.309011 [10432/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.173308 [10464/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.299149 [10496/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.305011 [10528/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.293194 [10560/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.263004 [10592/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.261627 [10624/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.297477 [10656/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.217682 [10688/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.242494 [10720/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.277709 [10752/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.278736 [10784/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.227439 [10816/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.220715 [10848/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.219350 [10880/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.362961 [10912/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.364962 [10944/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.234692 [10976/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.313069 [11008/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.238241 [11040/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.312389 [11072/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.395267 [11104/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.383638 [11136/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.323298 [11168/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.384198 [11200/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.383910 [11232/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.260251 [11264/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.230734 [11296/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.264591 [11328/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.310192 [11360/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.329151 [11392/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.340499 [11424/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.183658 [11456/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.292619 [11488/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.219576 [11520/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.260324 [11552/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.343636 [11584/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.230621 [11616/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.336100 [11648/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.257930 [11680/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.325813 [11712/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.340691 [11744/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.249297 [11776/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.232648 [11808/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.154038 [11840/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.240171 [11872/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.276124 [11904/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.330718 [11936/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.281028 [11968/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.283600 [12000/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.270754 [12032/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.321137 [12064/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.250133 [12096/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.317639 [12128/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.218482 [12160/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.295666 [12192/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.229208 [12224/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.280715 [12256/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.191019 [12288/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.240327 [12320/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.318328 [12352/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.208415 [12384/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.238480 [12416/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.308743 [12448/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.325238 [12480/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.225321 [12512/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.322952 [12544/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.272918 [12576/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.415179 [12608/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.302019 [12640/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.272180 [12672/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.297415 [12704/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.290299 [12736/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.287946 [12768/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.453190 [12800/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.413962 [12832/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.568120 [12864/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.258321 [12896/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.303407 [12928/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.392291 [12960/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.241241 [12992/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.271148 [13024/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.428967 [13056/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.187378 [13088/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.317974 [13120/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.270949 [13152/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.221169 [13184/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.370894 [13216/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.214032 [13248/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.313517 [13280/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.308995 [13312/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.370509 [13344/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.285800 [13376/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.288886 [13408/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.301898 [13440/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.277526 [13472/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.331393 [13504/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.404142 [13536/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.217426 [13568/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.371635 [13600/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.199060 [13632/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.344587 [13664/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.225772 [13696/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.234668 [13728/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.297227 [13760/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.286245 [13792/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.224682 [13824/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.400495 [13856/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.241421 [13888/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.274594 [13920/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.162559 [13952/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.236334 [13984/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.236630 [14016/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.354420 [14048/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.281390 [14080/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.291800 [14112/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.250855 [14144/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.249632 [14176/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.229820 [14208/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.305142 [14240/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.372875 [14272/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.287656 [14304/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.331205 [14336/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.299539 [14368/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.276524 [14400/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.313443 [14432/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.456411 [14464/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.230812 [14496/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.282348 [14528/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.323397 [14560/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.287294 [14592/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.316359 [14624/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.473413 [14656/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.352708 [14688/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.297632 [14720/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.409644 [14752/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.347123 [14784/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.337190 [14816/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.407489 [14848/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.300910 [14880/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.257518 [14912/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.249652 [14944/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.261534 [14976/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.247700 [15008/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.360160 [15040/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.278006 [15072/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.369430 [15104/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.165772 [15136/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.265705 [15168/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.203560 [15200/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.337938 [15232/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.242409 [15264/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.324270 [15296/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.301081 [15328/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.213829 [15360/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.295116 [15392/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.329836 [15424/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.290851 [15456/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.216773 [15488/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.196852 [15520/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.276741 [15552/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.238869 [15584/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.272049 [15616/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.341711 [15648/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.274786 [15680/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.340176 [15712/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.306559 [15744/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.224221 [15776/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.236086 [15808/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.311208 [15840/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.173088 [15872/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.189793 [15904/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.299574 [15936/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.377981 [15968/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.340869 [16000/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.370267 [16032/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.287814 [16064/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.188634 [16096/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.258318 [16128/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.407266 [16160/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.321295 [16192/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.318085 [16224/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.280787 [16256/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.272650 [16288/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.313987 [16320/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.377614 [16352/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.323279 [16384/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.305994 [16416/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.202459 [16448/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.278045 [16480/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.289863 [16512/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.270830 [16544/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.148834 [16576/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.243450 [16608/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.351300 [16640/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.261401 [16672/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.177747 [16704/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.304993 [16736/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.326760 [16768/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.224582 [16800/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.249761 [16832/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.202732 [16864/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.245243 [16896/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.293654 [16928/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.221124 [16960/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.340165 [16992/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.365684 [17024/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.280074 [17056/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.287963 [17088/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.303696 [17120/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.300454 [17152/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.298348 [17184/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.336458 [17216/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.325204 [17248/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.369152 [17280/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.241293 [17312/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.275797 [17344/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.431501 [17376/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.125302 [17408/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.279439 [17440/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.154528 [17472/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.280597 [17504/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.278537 [17536/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.251537 [17568/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.346785 [17600/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.293841 [17632/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.174662 [17664/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.291145 [17696/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.318407 [17728/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.330506 [17760/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.268574 [17792/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.233980 [17824/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.396663 [17856/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.337663 [17888/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.237223 [17920/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.232802 [17952/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.405258 [17984/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.224653 [18016/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.219555 [18048/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.242796 [18080/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.304610 [18112/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.212652 [18144/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.236145 [18176/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.300465 [18208/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.292426 [18240/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.328310 [18272/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.289371 [18304/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.280913 [18336/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.256953 [18368/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.191231 [18400/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.333056 [18432/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.234347 [18464/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.271578 [18496/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.283541 [18528/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.328160 [18560/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.240418 [18592/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.232172 [18624/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.265513 [18656/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.277241 [18688/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.336689 [18720/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.322044 [18752/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.363585 [18784/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.237175 [18816/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.354819 [18848/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.245459 [18880/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.240582 [18912/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.293992 [18944/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.411892 [18976/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.293034 [19008/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.212780 [19040/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.328966 [19072/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.324225 [19104/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.324708 [19136/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.274728 [19168/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.222923 [19200/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.246438 [19232/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.163862 [19264/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.368915 [19296/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.277766 [19328/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.279296 [19360/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.235189 [19392/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.185122 [19424/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.279813 [19456/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.276873 [19488/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.312538 [19520/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.285531 [19552/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.220507 [19584/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.276709 [19616/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.208410 [19648/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.165127 [19680/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.264335 [19712/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.220955 [19744/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.202356 [19776/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.302022 [19808/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.230940 [19840/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.262317 [19872/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.252112 [19904/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.288713 [19936/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.292759 [19968/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.309881 [20000/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.318861 [20032/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.257853 [20064/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.234322 [20096/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.283935 [20128/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.285969 [20160/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.215502 [20192/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.256071 [20224/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.225006 [20256/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.241752 [20288/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.272036 [20320/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.334188 [20352/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.251534 [20384/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.190620 [20416/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.294281 [20448/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.311833 [20480/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.249426 [20512/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.264290 [20544/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.184965 [20576/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.193417 [20608/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.242133 [20640/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.224907 [20672/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.176888 [20704/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.310586 [20736/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.163753 [20768/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.321388 [20800/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.304044 [20832/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.200366 [20864/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.276464 [20896/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.347830 [20928/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.289133 [20960/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.218021 [20992/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.297522 [21024/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.277666 [21056/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.204479 [21088/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.250844 [21120/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.337471 [21152/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.270978 [21184/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.275385 [21216/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.246445 [21248/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.303283 [21280/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.303521 [21312/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.227798 [21344/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.224096 [21376/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.282651 [21408/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.296054 [21440/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.276915 [21472/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.199752 [21504/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.202250 [21536/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.227346 [21568/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.234010 [21600/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.297349 [21632/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.333100 [21664/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.168165 [21696/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.228204 [21728/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.269761 [21760/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.183146 [21792/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.280274 [21824/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.200641 [21856/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.205127 [21888/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.310102 [21920/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.137318 [21952/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.317195 [21984/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.357445 [22016/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.279956 [22048/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.222648 [22080/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.291737 [22112/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.251600 [22144/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.200738 [22176/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.235657 [22208/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.327505 [22240/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.199353 [22272/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.223175 [22304/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.298678 [22336/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.303335 [22368/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.332146 [22400/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.296107 [22432/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.233639 [22464/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.229368 [22496/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.247421 [22528/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.301864 [22560/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.271340 [22592/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.268579 [22624/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.223420 [22656/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.341874 [22688/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.312844 [22720/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.218590 [22752/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.282670 [22784/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.253623 [22816/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.252675 [22848/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.202941 [22880/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.335842 [22912/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.295719 [22944/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.343551 [22976/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.259059 [23008/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.300057 [23040/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.173613 [23072/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.277245 [23104/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.251806 [23136/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.259618 [23168/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.274300 [23200/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.371583 [23232/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.333385 [23264/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.280552 [23296/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.242706 [23328/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.381220 [23360/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.275629 [23392/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.219440 [23424/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.160105 [23456/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.270468 [23488/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.253815 [23520/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.217184 [23552/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.196547 [23584/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.221890 [23616/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.288683 [23648/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.311481 [23680/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.263913 [23712/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.338102 [23744/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.184229 [23776/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.244779 [23808/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.280916 [23840/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.350682 [23872/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.281124 [23904/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.288701 [23936/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.313398 [23968/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.217370 [24000/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.318364 [24032/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.231403 [24064/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.323322 [24096/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.273000 [24128/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.232374 [24160/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.154024 [24192/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.259830 [24224/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.286905 [24256/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.255403 [24288/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.196503 [24320/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.191573 [24352/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.256030 [24384/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.223394 [24416/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.285006 [24448/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.237743 [24480/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.198739 [24512/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.255882 [24544/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.206427 [24576/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.312141 [24608/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.239735 [24640/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.182137 [24672/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.286549 [24704/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.359794 [24736/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.166845 [24768/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.202555 [24800/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.285530 [24832/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.375678 [24864/24872]: 0%| | 0/777 [00:30<?, ?it/s]
loss: 0.375678 [24864/24872]: 100%|██████████| 777/777 [00:30<00:00, 25.88it/s]
loss: 0.362807 [24872/24872]: 100%|██████████| 777/777 [00:30<00:00, 25.88it/s]
loss: 0.362807 [24872/24872]: : 778it [00:30, 25.89it/s]
Epoch 3, time=60.59s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.301910 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.313962 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.283696 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.226864 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.260233 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.238716 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.268689 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.209364 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.317398 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.229434 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.305135 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.227345 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.274983 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.240712 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.293548 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.140112 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.254038 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.217035 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.258236 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.214194 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.238955 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.243748 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.142151 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.263032 [ 768/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.215014 [ 800/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.267440 [ 832/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.312933 [ 864/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.257482 [ 896/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.384124 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.250062 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.283238 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.371741 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.337240 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.244553 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.236774 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.290053 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.273364 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.225263 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.220715 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.191774 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.249773 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.238042 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.242152 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.305678 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.327578 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.294987 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.248376 [ 1504/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.307448 [ 1536/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.341088 [ 1568/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.299039 [ 1600/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.306835 [ 1632/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.270611 [ 1664/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.258807 [ 1696/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.302925 [ 1728/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.290093 [ 1760/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.244977 [ 1792/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.271597 [ 1824/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.297067 [ 1856/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.208064 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.278322 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.241084 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.281068 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.257750 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.229738 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.193299 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.195618 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.356297 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.225894 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.212059 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.198079 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.219898 [ 2272/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.440711 [ 2304/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.376870 [ 2336/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.290352 [ 2368/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.170111 [ 2400/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.258707 [ 2432/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.228305 [ 2464/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.387548 [ 2496/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.257630 [ 2528/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.243787 [ 2560/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.337912 [ 2592/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.314058 [ 2624/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.222000 [ 2656/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.297878 [ 2688/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.328900 [ 2720/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.177242 [ 2752/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.208040 [ 2784/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.202499 [ 2816/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.211641 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.199420 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.197849 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.264029 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.422879 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.262657 [ 3008/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.329962 [ 3040/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.279530 [ 3072/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.277060 [ 3104/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.287793 [ 3136/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.230497 [ 3168/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.188396 [ 3200/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.183942 [ 3232/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.223818 [ 3264/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.288230 [ 3296/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.245419 [ 3328/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.258755 [ 3360/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.308062 [ 3392/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.236467 [ 3424/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.267525 [ 3456/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.314715 [ 3488/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.237917 [ 3520/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.237371 [ 3552/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.302293 [ 3584/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.282977 [ 3616/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.342834 [ 3648/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.265543 [ 3680/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.221804 [ 3712/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.266307 [ 3744/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.288609 [ 3776/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.138358 [ 3808/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.275383 [ 3840/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.210882 [ 3872/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.285226 [ 3904/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.276308 [ 3936/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.210777 [ 3968/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.243043 [ 4000/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.210864 [ 4032/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.348689 [ 4064/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.176917 [ 4096/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.174577 [ 4128/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.246606 [ 4160/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.199439 [ 4192/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.261917 [ 4224/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.243772 [ 4256/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.309400 [ 4288/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.272913 [ 4320/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.203977 [ 4352/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.288314 [ 4384/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.236365 [ 4416/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.275992 [ 4448/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.248398 [ 4480/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.240124 [ 4512/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.243478 [ 4544/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.251113 [ 4576/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.245655 [ 4608/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.271129 [ 4640/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.180838 [ 4672/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.161625 [ 4704/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.337037 [ 4736/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.342851 [ 4768/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.161059 [ 4800/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.254270 [ 4832/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.244557 [ 4864/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.199285 [ 4896/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.332098 [ 4928/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.297924 [ 4960/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.255462 [ 4992/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.249527 [ 5024/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.231505 [ 5056/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.151399 [ 5088/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.214604 [ 5120/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.161945 [ 5152/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.338692 [ 5184/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.352626 [ 5216/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.242660 [ 5248/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.225555 [ 5280/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.365169 [ 5312/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.472600 [ 5344/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.233345 [ 5376/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.303757 [ 5408/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.306420 [ 5440/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.252787 [ 5472/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.235571 [ 5504/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.292786 [ 5536/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.213831 [ 5568/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.185646 [ 5600/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.251083 [ 5632/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.294134 [ 5664/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.325017 [ 5696/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.336204 [ 5728/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.254217 [ 5760/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.186509 [ 5792/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.249319 [ 5824/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.270312 [ 5856/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.187161 [ 5888/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.292120 [ 5920/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.205413 [ 5952/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.272958 [ 5984/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.243241 [ 6016/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.356655 [ 6048/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.288470 [ 6080/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.260425 [ 6112/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.280293 [ 6144/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.281970 [ 6176/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.215228 [ 6208/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.218215 [ 6240/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.317990 [ 6272/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.281792 [ 6304/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.228515 [ 6336/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.314905 [ 6368/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.305538 [ 6400/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.209933 [ 6432/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.253202 [ 6464/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.177065 [ 6496/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.189784 [ 6528/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.207827 [ 6560/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.296531 [ 6592/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.239628 [ 6624/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.319648 [ 6656/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.225112 [ 6688/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.255935 [ 6720/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.433411 [ 6752/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.268767 [ 6784/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.273885 [ 6816/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.224703 [ 6848/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.167955 [ 6880/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.237408 [ 6912/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.254635 [ 6944/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.208037 [ 6976/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.276204 [ 7008/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.181042 [ 7040/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.250726 [ 7072/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.235709 [ 7104/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.212916 [ 7136/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.320137 [ 7168/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.242241 [ 7200/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.291527 [ 7232/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.413801 [ 7264/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.338405 [ 7296/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.250475 [ 7328/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.307598 [ 7360/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.269574 [ 7392/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.287917 [ 7424/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.269668 [ 7456/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.220533 [ 7488/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.257430 [ 7520/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.352585 [ 7552/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.246265 [ 7584/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.240595 [ 7616/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.241092 [ 7648/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.227920 [ 7680/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.313589 [ 7712/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.238892 [ 7744/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.196129 [ 7776/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.272223 [ 7808/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.353057 [ 7840/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.232937 [ 7872/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.282419 [ 7904/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.275140 [ 7936/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.188730 [ 7968/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.185993 [ 8000/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.232909 [ 8032/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.289516 [ 8064/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.250848 [ 8096/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.215756 [ 8128/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.262548 [ 8160/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.311475 [ 8192/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.258694 [ 8224/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.246108 [ 8256/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.213613 [ 8288/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.235054 [ 8320/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.170213 [ 8352/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.215431 [ 8384/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.181026 [ 8416/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.169923 [ 8448/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.277081 [ 8480/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.213909 [ 8512/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.243607 [ 8544/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.272606 [ 8576/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.285220 [ 8608/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.233941 [ 8640/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.235929 [ 8672/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.206580 [ 8704/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.206495 [ 8736/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.275226 [ 8768/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.262485 [ 8800/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.264493 [ 8832/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.267451 [ 8864/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.222325 [ 8896/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.217934 [ 8928/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.256414 [ 8960/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.192818 [ 8992/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.240401 [ 9024/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.174072 [ 9056/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.223378 [ 9088/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.244322 [ 9120/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.225050 [ 9152/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.229409 [ 9184/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.323355 [ 9216/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.214379 [ 9248/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.228908 [ 9280/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.226594 [ 9312/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.191845 [ 9344/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.227000 [ 9376/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.255791 [ 9408/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.160984 [ 9440/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.355943 [ 9472/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.167052 [ 9504/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.187920 [ 9536/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.279190 [ 9568/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.254679 [ 9600/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.258303 [ 9632/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.296020 [ 9664/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.255552 [ 9696/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.168505 [ 9728/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.321480 [ 9760/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.174775 [ 9792/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.273067 [ 9824/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.212860 [ 9856/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.273704 [ 9888/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.162176 [ 9920/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.176305 [ 9952/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.287699 [ 9984/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.243308 [10016/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.256503 [10048/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.356106 [10080/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.285182 [10112/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.271486 [10144/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.200098 [10176/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.189283 [10208/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.212731 [10240/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.274795 [10272/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.283650 [10304/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.305143 [10336/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.259761 [10368/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.240048 [10400/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.246655 [10432/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.169732 [10464/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.270059 [10496/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.228240 [10528/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.205273 [10560/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.228488 [10592/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.265670 [10624/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.247114 [10656/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.173576 [10688/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.172895 [10720/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.232602 [10752/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.230778 [10784/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.176689 [10816/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.149563 [10848/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.180300 [10880/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.293703 [10912/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.303381 [10944/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.199910 [10976/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.257846 [11008/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.220654 [11040/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.235708 [11072/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.347853 [11104/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.332460 [11136/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.293428 [11168/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.316551 [11200/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.311902 [11232/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.220729 [11264/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.198240 [11296/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.223916 [11328/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.214434 [11360/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.260253 [11392/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.311610 [11424/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.149144 [11456/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.223067 [11488/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.171987 [11520/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.195429 [11552/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.295382 [11584/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.196839 [11616/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.286940 [11648/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.212678 [11680/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.287258 [11712/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.311789 [11744/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.229161 [11776/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.183573 [11808/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.143757 [11840/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.181259 [11872/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.228671 [11904/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.294048 [11936/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.223816 [11968/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.224439 [12000/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.232181 [12032/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.249296 [12064/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.201202 [12096/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.257312 [12128/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.195432 [12160/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.253858 [12192/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.185688 [12224/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.254766 [12256/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.170149 [12288/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.212318 [12320/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.238732 [12352/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.188680 [12384/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.201975 [12416/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.257705 [12448/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.296830 [12480/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.178220 [12512/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.257986 [12544/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.222077 [12576/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.334746 [12608/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.240826 [12640/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.228630 [12672/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.260499 [12704/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.215406 [12736/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.248871 [12768/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.422762 [12800/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.356642 [12832/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.494219 [12864/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.212405 [12896/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.247235 [12928/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.328760 [12960/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.211421 [12992/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.257588 [13024/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.395942 [13056/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.145503 [13088/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.240145 [13120/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.186323 [13152/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.175900 [13184/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.326283 [13216/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.179081 [13248/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.262746 [13280/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.240018 [13312/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.321233 [13344/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.230935 [13376/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.267431 [13408/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.271931 [13440/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.225209 [13472/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.264005 [13504/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.332301 [13536/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.199234 [13568/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.356736 [13600/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.174452 [13632/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.266638 [13664/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.185781 [13696/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.176604 [13728/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.256327 [13760/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.232242 [13792/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.180137 [13824/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.338359 [13856/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.206913 [13888/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.237314 [13920/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.133683 [13952/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.203639 [13984/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.198968 [14016/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.288243 [14048/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.236321 [14080/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.238360 [14112/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.223239 [14144/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.223496 [14176/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.187205 [14208/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.250556 [14240/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.333265 [14272/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.238916 [14304/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.283320 [14336/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.237794 [14368/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.242152 [14400/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.297060 [14432/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.371521 [14464/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.210712 [14496/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.224152 [14528/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.225118 [14560/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.228005 [14592/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.259377 [14624/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.358386 [14656/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.304979 [14688/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.236810 [14720/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.279255 [14752/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.255347 [14784/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.303069 [14816/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.318023 [14848/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.231958 [14880/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.212110 [14912/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.185337 [14944/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.200937 [14976/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.187840 [15008/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.329507 [15040/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.216195 [15072/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.308297 [15104/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.139604 [15136/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.209269 [15168/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.166479 [15200/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.278552 [15232/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.223651 [15264/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.266329 [15296/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.227818 [15328/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.183935 [15360/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.255053 [15392/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.277745 [15424/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.233422 [15456/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.188146 [15488/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.164358 [15520/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.189289 [15552/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.219426 [15584/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.221284 [15616/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.260466 [15648/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.235096 [15680/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.324431 [15712/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.270999 [15744/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.187291 [15776/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.222521 [15808/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.263766 [15840/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.136447 [15872/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.140715 [15904/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.313597 [15936/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.344802 [15968/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.320605 [16000/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.325173 [16032/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.242319 [16064/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.160293 [16096/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.263837 [16128/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.348049 [16160/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.269294 [16192/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.298533 [16224/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.261926 [16256/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.235646 [16288/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.265712 [16320/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.360068 [16352/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.234033 [16384/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.314285 [16416/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.181560 [16448/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.247490 [16480/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.284392 [16512/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.254220 [16544/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.141916 [16576/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.229268 [16608/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.291720 [16640/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.218562 [16672/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.148060 [16704/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.231174 [16736/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.234656 [16768/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.188378 [16800/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.196267 [16832/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.189547 [16864/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.209474 [16896/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.263125 [16928/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.179984 [16960/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.274732 [16992/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.314494 [17024/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.219570 [17056/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.252821 [17088/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.239978 [17120/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.251836 [17152/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.276641 [17184/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.328171 [17216/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.317398 [17248/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.342044 [17280/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.239640 [17312/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.266093 [17344/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.428239 [17376/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.123067 [17408/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.231667 [17440/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.138240 [17472/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.254473 [17504/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.235128 [17536/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.225698 [17568/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.314371 [17600/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.266042 [17632/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.150655 [17664/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.251981 [17696/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.267055 [17728/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.295185 [17760/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.210714 [17792/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.214349 [17824/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.311812 [17856/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.284953 [17888/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.190413 [17920/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.187390 [17952/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.352518 [17984/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.188077 [18016/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.192366 [18048/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.223197 [18080/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.227278 [18112/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.170285 [18144/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.187362 [18176/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.248960 [18208/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.263743 [18240/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.263074 [18272/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.217722 [18304/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.205207 [18336/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.209600 [18368/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.150148 [18400/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.243470 [18432/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.192187 [18464/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.223674 [18496/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.244353 [18528/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.294921 [18560/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.202265 [18592/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.201665 [18624/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.240857 [18656/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.205845 [18688/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.287564 [18720/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.287329 [18752/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.276849 [18784/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.208051 [18816/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.292954 [18848/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.198927 [18880/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.208287 [18912/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.286567 [18944/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.327330 [18976/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.263515 [19008/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.190764 [19040/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.257701 [19072/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.283536 [19104/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.284400 [19136/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.221426 [19168/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.190944 [19200/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.215923 [19232/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.137344 [19264/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.329905 [19296/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.280737 [19328/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.259836 [19360/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.201279 [19392/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.156552 [19424/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.250233 [19456/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.224488 [19488/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.254581 [19520/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.215338 [19552/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.199591 [19584/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.191058 [19616/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.172372 [19648/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.143303 [19680/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.182187 [19712/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.197094 [19744/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.171778 [19776/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.229789 [19808/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.181541 [19840/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.239245 [19872/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.183713 [19904/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.227468 [19936/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.236074 [19968/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.225614 [20000/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.274312 [20032/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.213254 [20064/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.204517 [20096/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.248855 [20128/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.238047 [20160/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.205067 [20192/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.234785 [20224/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.206679 [20256/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.221364 [20288/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.226912 [20320/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.290699 [20352/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.208501 [20384/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.170742 [20416/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.259268 [20448/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.261471 [20480/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.194176 [20512/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.240171 [20544/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.146028 [20576/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.171587 [20608/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.214173 [20640/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.194982 [20672/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.149702 [20704/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.272087 [20736/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.138415 [20768/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.281073 [20800/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.268337 [20832/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.176554 [20864/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.253742 [20896/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.303975 [20928/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.280542 [20960/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.218185 [20992/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.268315 [21024/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.253576 [21056/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.147113 [21088/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.212208 [21120/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.283800 [21152/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.227171 [21184/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.217273 [21216/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.214213 [21248/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.289587 [21280/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.259980 [21312/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.188675 [21344/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.203853 [21376/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.230620 [21408/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.218193 [21440/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.218717 [21472/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.185679 [21504/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.172080 [21536/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.164251 [21568/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.213762 [21600/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.276272 [21632/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.278473 [21664/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.147515 [21696/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.185333 [21728/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.237502 [21760/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.167356 [21792/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.261301 [21824/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.186724 [21856/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.181631 [21888/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.284781 [21920/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.117157 [21952/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.285796 [21984/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.344655 [22016/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.245013 [22048/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.200327 [22080/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.296920 [22112/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.204849 [22144/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.197224 [22176/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.227508 [22208/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.267246 [22240/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.179567 [22272/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.214630 [22304/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.247205 [22336/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.250130 [22368/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.288257 [22400/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.239920 [22432/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.218674 [22464/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.188656 [22496/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.219285 [22528/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.285543 [22560/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.248165 [22592/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.241530 [22624/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.207601 [22656/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.298300 [22688/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.280380 [22720/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.216990 [22752/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.265478 [22784/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.212102 [22816/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.226610 [22848/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.162499 [22880/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.291090 [22912/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.288131 [22944/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.292578 [22976/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.208391 [23008/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.244359 [23040/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.168244 [23072/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.242074 [23104/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.250815 [23136/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.295657 [23168/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.255730 [23200/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.298411 [23232/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.303410 [23264/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.273758 [23296/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.213465 [23328/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.351793 [23360/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.287603 [23392/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.201828 [23424/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.160718 [23456/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.273952 [23488/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.229384 [23520/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.185468 [23552/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.177431 [23584/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.186940 [23616/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.281135 [23648/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.279291 [23680/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.187950 [23712/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.272469 [23744/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.179874 [23776/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.178834 [23808/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.262398 [23840/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.299538 [23872/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.239439 [23904/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.218344 [23936/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.305019 [23968/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.185201 [24000/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.255327 [24032/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.251081 [24064/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.302270 [24096/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.226319 [24128/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.194088 [24160/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.165070 [24192/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.231640 [24224/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.250517 [24256/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.211954 [24288/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.183166 [24320/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.134150 [24352/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.221549 [24384/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.213591 [24416/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.260087 [24448/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.220361 [24480/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.230312 [24512/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.240253 [24544/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.201570 [24576/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.302171 [24608/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.220828 [24640/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.140052 [24672/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.269702 [24704/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.306668 [24736/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.152992 [24768/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.183702 [24800/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.251309 [24832/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.296726 [24864/24872]: 0%| | 0/777 [00:30<?, ?it/s]
loss: 0.296726 [24864/24872]: 100%|██████████| 777/777 [00:30<00:00, 25.87it/s]
loss: 0.290230 [24872/24872]: 100%|██████████| 777/777 [00:30<00:00, 25.87it/s]
loss: 0.290230 [24872/24872]: : 778it [00:30, 25.87it/s]
Epoch 4, time=90.66s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.264867 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.282281 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.271430 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.211236 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.241508 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.198573 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.292206 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.211072 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.282663 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.235954 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.265727 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.183597 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.287379 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.219509 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.286856 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.136665 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.222649 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.198580 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.222333 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.207624 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.210230 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.208209 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.121176 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.226594 [ 768/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.191902 [ 800/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.235997 [ 832/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.264292 [ 864/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.244102 [ 896/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.355591 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.231996 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.253206 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.336823 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.304797 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.240277 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.235878 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.290370 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.256683 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.199273 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.191770 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.177916 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.236676 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.228050 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.196793 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.273705 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.302998 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.249805 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.187926 [ 1504/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.250100 [ 1536/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.261728 [ 1568/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.260614 [ 1600/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.261501 [ 1632/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.219039 [ 1664/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.222840 [ 1696/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.249138 [ 1728/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.252344 [ 1760/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.209107 [ 1792/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.231804 [ 1824/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.239981 [ 1856/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.185196 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.246272 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.196535 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.253917 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.205874 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.197466 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.169948 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.187177 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.303497 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.189347 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.182467 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.172183 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.213558 [ 2272/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.375988 [ 2304/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.306059 [ 2336/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.275342 [ 2368/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.145614 [ 2400/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.223850 [ 2432/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.191394 [ 2464/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.340147 [ 2496/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.215298 [ 2528/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.231754 [ 2560/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.286213 [ 2592/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.267888 [ 2624/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.224047 [ 2656/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.273231 [ 2688/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.259427 [ 2720/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.177662 [ 2752/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.203200 [ 2784/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.172167 [ 2816/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.199427 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.167254 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.135075 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.202331 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.401421 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.226436 [ 3008/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.277694 [ 3040/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.225461 [ 3072/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.235317 [ 3104/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.265593 [ 3136/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.227834 [ 3168/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.169513 [ 3200/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.161557 [ 3232/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.207142 [ 3264/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.250912 [ 3296/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.172953 [ 3328/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.199794 [ 3360/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.292983 [ 3392/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.216733 [ 3424/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.232260 [ 3456/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.277948 [ 3488/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.194704 [ 3520/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.201714 [ 3552/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.253998 [ 3584/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.228257 [ 3616/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.364167 [ 3648/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.214752 [ 3680/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.198279 [ 3712/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.239576 [ 3744/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.254938 [ 3776/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.125892 [ 3808/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.236944 [ 3840/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.181396 [ 3872/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.228906 [ 3904/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.220730 [ 3936/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.176664 [ 3968/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.219890 [ 4000/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.190414 [ 4032/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.316527 [ 4064/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.180220 [ 4096/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.152316 [ 4128/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.217070 [ 4160/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.191194 [ 4192/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.232117 [ 4224/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.187148 [ 4256/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.222778 [ 4288/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.225321 [ 4320/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.168331 [ 4352/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.244673 [ 4384/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.202939 [ 4416/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.176249 [ 4448/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.227595 [ 4480/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.220862 [ 4512/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.173707 [ 4544/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.232183 [ 4576/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.224410 [ 4608/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.221138 [ 4640/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.147669 [ 4672/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.146966 [ 4704/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.269341 [ 4736/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.295527 [ 4768/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.139253 [ 4800/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.243111 [ 4832/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.241427 [ 4864/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.162058 [ 4896/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.282250 [ 4928/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.253505 [ 4960/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.208718 [ 4992/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.210461 [ 5024/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.159020 [ 5056/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.128258 [ 5088/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.190030 [ 5120/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.116470 [ 5152/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.298509 [ 5184/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.338323 [ 5216/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.194728 [ 5248/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.233177 [ 5280/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.337787 [ 5312/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.354185 [ 5344/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.210030 [ 5376/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.233140 [ 5408/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.227139 [ 5440/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.218468 [ 5472/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.207745 [ 5504/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.275716 [ 5536/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.204330 [ 5568/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.238616 [ 5600/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.240718 [ 5632/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.227909 [ 5664/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.234689 [ 5696/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.286128 [ 5728/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.263770 [ 5760/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.161378 [ 5792/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.215297 [ 5824/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.241974 [ 5856/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.161839 [ 5888/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.249036 [ 5920/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.194066 [ 5952/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.260100 [ 5984/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.223053 [ 6016/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.311615 [ 6048/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.244128 [ 6080/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.211518 [ 6112/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.246787 [ 6144/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.251685 [ 6176/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.175739 [ 6208/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.218761 [ 6240/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.264585 [ 6272/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.244694 [ 6304/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.199842 [ 6336/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.244134 [ 6368/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.298155 [ 6400/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.153363 [ 6432/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.173712 [ 6464/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.151709 [ 6496/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.147228 [ 6528/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.159403 [ 6560/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.244269 [ 6592/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.219900 [ 6624/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.261369 [ 6656/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.205868 [ 6688/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.204227 [ 6720/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.353782 [ 6752/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.260982 [ 6784/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.244435 [ 6816/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.199352 [ 6848/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.121549 [ 6880/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.184425 [ 6912/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.188263 [ 6944/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.184610 [ 6976/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.268457 [ 7008/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.151480 [ 7040/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.209954 [ 7072/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.176400 [ 7104/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.204111 [ 7136/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.242234 [ 7168/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.212029 [ 7200/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.246393 [ 7232/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.317591 [ 7264/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.301543 [ 7296/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.226469 [ 7328/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.210742 [ 7360/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.237918 [ 7392/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.230987 [ 7424/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.224458 [ 7456/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.181593 [ 7488/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.213853 [ 7520/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.278943 [ 7552/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.188540 [ 7584/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.238879 [ 7616/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.281964 [ 7648/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.191769 [ 7680/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.291951 [ 7712/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.195988 [ 7744/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.176434 [ 7776/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.223479 [ 7808/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.272543 [ 7840/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.217742 [ 7872/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.255374 [ 7904/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.242334 [ 7936/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.178684 [ 7968/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.183263 [ 8000/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.220113 [ 8032/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.284379 [ 8064/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.204019 [ 8096/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.191202 [ 8128/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.241588 [ 8160/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.284967 [ 8192/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.209370 [ 8224/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.245360 [ 8256/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.181522 [ 8288/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.199484 [ 8320/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.154039 [ 8352/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.187857 [ 8384/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.153767 [ 8416/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.118067 [ 8448/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.232452 [ 8480/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.189661 [ 8512/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.172798 [ 8544/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.239588 [ 8576/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.236781 [ 8608/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.187077 [ 8640/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.199643 [ 8672/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.196825 [ 8704/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.171835 [ 8736/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.257840 [ 8768/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.232559 [ 8800/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.204909 [ 8832/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.225699 [ 8864/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.200345 [ 8896/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.193944 [ 8928/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.213869 [ 8960/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.180458 [ 8992/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.222806 [ 9024/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.127197 [ 9056/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.196672 [ 9088/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.203337 [ 9120/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.173737 [ 9152/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.204008 [ 9184/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.282166 [ 9216/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.181143 [ 9248/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.174652 [ 9280/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.202544 [ 9312/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.171672 [ 9344/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.192609 [ 9376/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.239878 [ 9408/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.127143 [ 9440/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.320560 [ 9472/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.133529 [ 9504/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.156075 [ 9536/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.238453 [ 9568/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.233555 [ 9600/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.231955 [ 9632/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.267076 [ 9664/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.209442 [ 9696/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.134873 [ 9728/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.306516 [ 9760/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.155203 [ 9792/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.230636 [ 9824/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.187507 [ 9856/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.234707 [ 9888/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.128281 [ 9920/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.155868 [ 9952/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.254299 [ 9984/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.206535 [10016/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.243707 [10048/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.286424 [10080/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.234648 [10112/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.228431 [10144/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.160606 [10176/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.151905 [10208/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.186424 [10240/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.230375 [10272/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.258318 [10304/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.262306 [10336/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.218041 [10368/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.200809 [10400/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.209948 [10432/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.148398 [10464/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.242569 [10496/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.218293 [10528/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.186866 [10560/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.188585 [10592/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.212921 [10624/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.208478 [10656/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.152813 [10688/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.141816 [10720/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.205196 [10752/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.218389 [10784/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.168231 [10816/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.110039 [10848/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.163076 [10880/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.251806 [10912/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.265959 [10944/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.184171 [10976/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.229104 [11008/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.181653 [11040/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.205377 [11072/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.246512 [11104/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.267879 [11136/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.254523 [11168/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.247046 [11200/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.270357 [11232/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.195047 [11264/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.169174 [11296/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.200066 [11328/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.181320 [11360/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.212538 [11392/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.261860 [11424/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.122846 [11456/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.164711 [11488/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.158178 [11520/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.152550 [11552/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.283210 [11584/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.179708 [11616/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.261079 [11648/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.191266 [11680/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.235292 [11712/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.262973 [11744/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.182423 [11776/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.128955 [11808/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.092167 [11840/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.145830 [11872/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.214454 [11904/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.245148 [11936/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.195109 [11968/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.194647 [12000/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.172872 [12032/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.219252 [12064/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.163955 [12096/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.219632 [12128/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.153608 [12160/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.213226 [12192/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.180156 [12224/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.223231 [12256/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.146268 [12288/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.168493 [12320/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.195390 [12352/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.171258 [12384/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.179800 [12416/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.212911 [12448/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.231300 [12480/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.161828 [12512/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.220490 [12544/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.202506 [12576/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.270685 [12608/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.212117 [12640/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.185871 [12672/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.242859 [12704/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.194069 [12736/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.227132 [12768/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.342085 [12800/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.280306 [12832/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.401276 [12864/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.186940 [12896/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.212935 [12928/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.230878 [12960/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.155685 [12992/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.213876 [13024/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.319732 [13056/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.135774 [13088/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.193003 [13120/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.151161 [13152/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.169074 [13184/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.280675 [13216/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.161234 [13248/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.216908 [13280/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.186762 [13312/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.253871 [13344/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.181946 [13376/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.248214 [13408/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.222190 [13440/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.192256 [13472/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.202476 [13504/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.280576 [13536/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.157881 [13568/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.283625 [13600/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.153780 [13632/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.234194 [13664/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.156483 [13696/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.144899 [13728/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.224243 [13760/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.184766 [13792/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.147257 [13824/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.294443 [13856/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.180835 [13888/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.197188 [13920/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.105997 [13952/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.165484 [13984/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.162773 [14016/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.249189 [14048/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.183984 [14080/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.199061 [14112/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.186636 [14144/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.194245 [14176/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.140351 [14208/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.209292 [14240/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.304814 [14272/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.193772 [14304/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.256936 [14336/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.217682 [14368/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.199712 [14400/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.250751 [14432/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.318822 [14464/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.173860 [14496/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.196695 [14528/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.198272 [14560/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.193813 [14592/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.213889 [14624/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.314170 [14656/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.268324 [14688/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.218090 [14720/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.234893 [14752/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.222239 [14784/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.261305 [14816/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.265469 [14848/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.210634 [14880/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.186909 [14912/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.159344 [14944/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.198262 [14976/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.181301 [15008/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.332319 [15040/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.222935 [15072/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.234318 [15104/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.116228 [15136/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.225333 [15168/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.141094 [15200/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.232460 [15232/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.216747 [15264/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.252076 [15296/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.208744 [15328/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.187415 [15360/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.222217 [15392/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.240034 [15424/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.217300 [15456/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.177215 [15488/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.148374 [15520/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.161055 [15552/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.189037 [15584/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.216480 [15616/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.234981 [15648/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.211096 [15680/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.283539 [15712/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.241488 [15744/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.170057 [15776/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.187189 [15808/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.259353 [15840/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.127545 [15872/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.135296 [15904/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.284127 [15936/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.319810 [15968/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.284437 [16000/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.307225 [16032/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.196623 [16064/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.136025 [16096/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.210950 [16128/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.288210 [16160/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.231507 [16192/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.241663 [16224/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.231446 [16256/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.213321 [16288/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.241758 [16320/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.306314 [16352/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.193873 [16384/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.243626 [16416/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.153518 [16448/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.209447 [16480/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.237399 [16512/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.214239 [16544/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.125936 [16576/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.178163 [16608/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.219404 [16640/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.208489 [16672/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.141818 [16704/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.187045 [16736/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.181965 [16768/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.171211 [16800/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.152969 [16832/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.167943 [16864/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.182500 [16896/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.222576 [16928/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.150731 [16960/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.232634 [16992/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.267942 [17024/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.159385 [17056/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.251923 [17088/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.202630 [17120/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.203776 [17152/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.260811 [17184/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.303433 [17216/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.233368 [17248/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.284601 [17280/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.214552 [17312/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.204220 [17344/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.361509 [17376/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.112285 [17408/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.173251 [17440/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.118067 [17472/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.229454 [17504/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.224376 [17536/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.200839 [17568/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.277573 [17600/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.221989 [17632/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.110232 [17664/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.258178 [17696/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.212389 [17728/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.241102 [17760/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.200622 [17792/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.184511 [17824/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.278000 [17856/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.271146 [17888/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.171249 [17920/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.169129 [17952/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.324917 [17984/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.159745 [18016/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.184453 [18048/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.211045 [18080/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.205880 [18112/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.133638 [18144/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.153079 [18176/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.240558 [18208/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.228645 [18240/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.220977 [18272/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.183279 [18304/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.148425 [18336/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.181207 [18368/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.138910 [18400/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.193386 [18432/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.164624 [18464/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.172194 [18496/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.202705 [18528/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.269039 [18560/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.181856 [18592/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.163811 [18624/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.208691 [18656/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.162991 [18688/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.262225 [18720/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.299795 [18752/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.240350 [18784/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.184635 [18816/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.267426 [18848/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.180375 [18880/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.189588 [18912/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.236844 [18944/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.282286 [18976/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.236251 [19008/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.167157 [19040/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.233593 [19072/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.239979 [19104/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.258118 [19136/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.235755 [19168/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.170270 [19200/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.204328 [19232/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.128977 [19264/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.290490 [19296/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.255234 [19328/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.220416 [19360/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.190295 [19392/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.158468 [19424/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.233327 [19456/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.192344 [19488/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.243414 [19520/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.200777 [19552/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.190457 [19584/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.167073 [19616/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.156472 [19648/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.136836 [19680/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.158181 [19712/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.170255 [19744/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.161723 [19776/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.194064 [19808/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.176812 [19840/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.214021 [19872/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.168947 [19904/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.220028 [19936/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.219281 [19968/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.206938 [20000/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.243478 [20032/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.185906 [20064/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.190830 [20096/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.187443 [20128/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.224292 [20160/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.175533 [20192/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.213112 [20224/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.191557 [20256/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.191686 [20288/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.203791 [20320/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.266468 [20352/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.194398 [20384/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.152142 [20416/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.213954 [20448/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.245687 [20480/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.189010 [20512/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.246812 [20544/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.161145 [20576/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.147676 [20608/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.195963 [20640/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.172895 [20672/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.140902 [20704/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.257209 [20736/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.114133 [20768/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.244727 [20800/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.254849 [20832/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.169998 [20864/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.257006 [20896/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.283215 [20928/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.265447 [20960/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.213598 [20992/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.243172 [21024/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.249697 [21056/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.140662 [21088/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.184535 [21120/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.239762 [21152/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.241489 [21184/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.211486 [21216/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.196750 [21248/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.216596 [21280/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.247800 [21312/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.197456 [21344/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.173530 [21376/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.244761 [21408/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.218070 [21440/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.195646 [21472/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.173242 [21504/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.159986 [21536/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.122678 [21568/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.190187 [21600/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.249473 [21632/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.227190 [21664/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.138481 [21696/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.177256 [21728/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.214371 [21760/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.151108 [21792/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.227653 [21824/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.150266 [21856/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.164606 [21888/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.262391 [21920/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.110083 [21952/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.233941 [21984/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.334705 [22016/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.200756 [22048/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.179652 [22080/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.256766 [22112/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.171879 [22144/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.158616 [22176/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.213056 [22208/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.234892 [22240/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.134239 [22272/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.181695 [22304/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.216283 [22336/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.207085 [22368/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.237181 [22400/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.197962 [22432/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.175656 [22464/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.140862 [22496/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.188505 [22528/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.233443 [22560/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.219180 [22592/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.211590 [22624/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.165980 [22656/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.260562 [22688/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.248485 [22720/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.172839 [22752/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.228073 [22784/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.203187 [22816/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.205261 [22848/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.136391 [22880/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.268088 [22912/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.241450 [22944/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.260025 [22976/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.174223 [23008/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.223408 [23040/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.127609 [23072/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.213503 [23104/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.204819 [23136/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.208496 [23168/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.207235 [23200/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.293022 [23232/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.264567 [23264/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.194182 [23296/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.177790 [23328/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.281217 [23360/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.242773 [23392/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.143431 [23424/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.135044 [23456/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.203378 [23488/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.209989 [23520/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.173347 [23552/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.153648 [23584/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.194846 [23616/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.241736 [23648/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.224240 [23680/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.151076 [23712/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.266269 [23744/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.158830 [23776/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.160254 [23808/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.251524 [23840/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.271896 [23872/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.193396 [23904/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.199737 [23936/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.266809 [23968/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.154054 [24000/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.260841 [24032/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.197032 [24064/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.285915 [24096/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.224271 [24128/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.176113 [24160/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.135924 [24192/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.201747 [24224/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.232128 [24256/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.177111 [24288/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.172032 [24320/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.126001 [24352/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.201112 [24384/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.192696 [24416/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.230327 [24448/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.198134 [24480/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.178471 [24512/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.210241 [24544/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.172607 [24576/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.250248 [24608/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.196459 [24640/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.137137 [24672/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.227525 [24704/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.264550 [24736/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.158048 [24768/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.188190 [24800/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.229177 [24832/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.284411 [24864/24872]: 0%| | 0/777 [00:30<?, ?it/s]
loss: 0.284411 [24864/24872]: 100%|██████████| 777/777 [00:30<00:00, 25.88it/s]
loss: 0.271566 [24872/24872]: 100%|██████████| 777/777 [00:30<00:00, 25.88it/s]
loss: 0.271566 [24872/24872]: : 778it [00:30, 25.89it/s]
Epoch 5, time=120.71s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.233206 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.258621 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.276393 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.194673 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.201848 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.217859 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.227068 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.199223 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.268839 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.221731 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.248601 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.182355 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.250459 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.190718 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.276698 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.124110 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.189507 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.160847 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.212038 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.181564 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.198255 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.259203 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.111003 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.224090 [ 768/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.177266 [ 800/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.190445 [ 832/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.242993 [ 864/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.210573 [ 896/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.344135 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.187360 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.228025 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.327983 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.258682 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.251038 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.189198 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.295488 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.233714 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.189266 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.173415 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.169474 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.185476 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.184376 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.151111 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.247202 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.266858 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.265350 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.161626 [ 1504/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.227664 [ 1536/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.213821 [ 1568/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.232806 [ 1600/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.235293 [ 1632/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.187719 [ 1664/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.196653 [ 1696/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.223736 [ 1728/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.224148 [ 1760/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.191200 [ 1792/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.200568 [ 1824/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.201222 [ 1856/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.167681 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.212241 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.166586 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.231681 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.190466 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.164494 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.150845 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.157048 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.249790 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.169820 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.159738 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.145509 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.183594 [ 2272/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.325912 [ 2304/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.269687 [ 2336/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.234603 [ 2368/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.133870 [ 2400/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.194484 [ 2432/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.158917 [ 2464/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.307099 [ 2496/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.193230 [ 2528/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.193215 [ 2560/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.262857 [ 2592/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.240179 [ 2624/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.162476 [ 2656/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.242641 [ 2688/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.218907 [ 2720/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.155114 [ 2752/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.170679 [ 2784/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.137928 [ 2816/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.160171 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.138704 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.117383 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.184359 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.339533 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.203084 [ 3008/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.267434 [ 3040/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.189832 [ 3072/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.201068 [ 3104/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.249542 [ 3136/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.204398 [ 3168/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.167639 [ 3200/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.152711 [ 3232/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.170724 [ 3264/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.255646 [ 3296/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.151159 [ 3328/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.181063 [ 3360/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.248015 [ 3392/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.200523 [ 3424/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.222978 [ 3456/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.257025 [ 3488/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.185647 [ 3520/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.192816 [ 3552/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.209165 [ 3584/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.202939 [ 3616/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.292354 [ 3648/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.209780 [ 3680/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.184048 [ 3712/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.221101 [ 3744/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.262611 [ 3776/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.117544 [ 3808/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.243528 [ 3840/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.162204 [ 3872/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.195268 [ 3904/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.221093 [ 3936/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.160597 [ 3968/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.197847 [ 4000/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.182172 [ 4032/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.271458 [ 4064/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.138085 [ 4096/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.150794 [ 4128/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.197260 [ 4160/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.163665 [ 4192/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.218095 [ 4224/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.155924 [ 4256/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.191854 [ 4288/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.215113 [ 4320/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.155478 [ 4352/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.212244 [ 4384/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.172788 [ 4416/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.182679 [ 4448/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.190145 [ 4480/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.197678 [ 4512/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.175027 [ 4544/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.213471 [ 4576/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.208777 [ 4608/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.199208 [ 4640/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.151623 [ 4672/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.137749 [ 4704/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.272650 [ 4736/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.285363 [ 4768/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.147014 [ 4800/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.262820 [ 4832/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.252312 [ 4864/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.173777 [ 4896/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.326897 [ 4928/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.215404 [ 4960/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.211066 [ 4992/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.218804 [ 5024/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.165720 [ 5056/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.158217 [ 5088/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.185786 [ 5120/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.118296 [ 5152/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.309934 [ 5184/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.330247 [ 5216/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.175047 [ 5248/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.220401 [ 5280/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.291733 [ 5312/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.293006 [ 5344/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.200002 [ 5376/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.184022 [ 5408/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.204549 [ 5440/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.176546 [ 5472/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.160829 [ 5504/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.206214 [ 5536/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.191740 [ 5568/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.182832 [ 5600/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.207686 [ 5632/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.193928 [ 5664/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.196905 [ 5696/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.249710 [ 5728/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.224103 [ 5760/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.136488 [ 5792/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.161494 [ 5824/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.217620 [ 5856/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.139654 [ 5888/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.235763 [ 5920/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.186876 [ 5952/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.237890 [ 5984/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.189806 [ 6016/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.274201 [ 6048/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.201073 [ 6080/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.193498 [ 6112/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.215529 [ 6144/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.232927 [ 6176/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.149871 [ 6208/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.183634 [ 6240/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.255455 [ 6272/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.211341 [ 6304/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.183841 [ 6336/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.210324 [ 6368/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.248173 [ 6400/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.142897 [ 6432/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.185033 [ 6464/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.124059 [ 6496/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.148146 [ 6528/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.132022 [ 6560/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.219437 [ 6592/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.197647 [ 6624/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.235821 [ 6656/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.186220 [ 6688/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.161552 [ 6720/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.336767 [ 6752/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.245906 [ 6784/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.238579 [ 6816/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.188588 [ 6848/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.121327 [ 6880/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.137391 [ 6912/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.186201 [ 6944/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.162221 [ 6976/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.233811 [ 7008/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.140166 [ 7040/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.187746 [ 7072/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.183871 [ 7104/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.192281 [ 7136/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.261559 [ 7168/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.193578 [ 7200/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.242208 [ 7232/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.302912 [ 7264/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.277480 [ 7296/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.184862 [ 7328/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.199351 [ 7360/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.210117 [ 7392/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.216937 [ 7424/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.217074 [ 7456/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.162865 [ 7488/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.155257 [ 7520/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.269817 [ 7552/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.191959 [ 7584/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.234562 [ 7616/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.223948 [ 7648/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.196096 [ 7680/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.262502 [ 7712/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.198945 [ 7744/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.170978 [ 7776/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.222793 [ 7808/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.219147 [ 7840/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.190939 [ 7872/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.206654 [ 7904/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.200469 [ 7936/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.154249 [ 7968/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.130021 [ 8000/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.181075 [ 8032/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.244651 [ 8064/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.200634 [ 8096/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.158735 [ 8128/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.232448 [ 8160/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.248202 [ 8192/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.174943 [ 8224/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.205775 [ 8256/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.171924 [ 8288/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.160468 [ 8320/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.130678 [ 8352/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.180597 [ 8384/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.131127 [ 8416/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.106027 [ 8448/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.238019 [ 8480/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.168435 [ 8512/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.153042 [ 8544/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.234819 [ 8576/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.224902 [ 8608/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.144467 [ 8640/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.179814 [ 8672/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.193384 [ 8704/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.159370 [ 8736/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.234367 [ 8768/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.200845 [ 8800/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.168438 [ 8832/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.213478 [ 8864/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.168049 [ 8896/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.179186 [ 8928/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.192634 [ 8960/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.162110 [ 8992/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.197362 [ 9024/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.118652 [ 9056/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.169261 [ 9088/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.188014 [ 9120/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.159523 [ 9152/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.193624 [ 9184/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.271748 [ 9216/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.161237 [ 9248/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.156597 [ 9280/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.173999 [ 9312/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.145518 [ 9344/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.168037 [ 9376/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.201932 [ 9408/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.098486 [ 9440/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.301431 [ 9472/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.108632 [ 9504/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.157847 [ 9536/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.202878 [ 9568/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.224892 [ 9600/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.226033 [ 9632/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.256368 [ 9664/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.191608 [ 9696/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.132255 [ 9728/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.288751 [ 9760/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.139427 [ 9792/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.221301 [ 9824/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.182314 [ 9856/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.209548 [ 9888/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.123355 [ 9920/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.140832 [ 9952/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.234074 [ 9984/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.193815 [10016/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.239873 [10048/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.260571 [10080/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.221429 [10112/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.204015 [10144/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.168900 [10176/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.133377 [10208/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.177272 [10240/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.226287 [10272/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.226697 [10304/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.210892 [10336/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.191371 [10368/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.190663 [10400/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.203171 [10432/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.132466 [10464/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.237565 [10496/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.202346 [10528/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.179914 [10560/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.182099 [10592/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.178405 [10624/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.193487 [10656/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.132987 [10688/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.137511 [10720/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.179021 [10752/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.185262 [10784/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.172182 [10816/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.086019 [10848/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.150152 [10880/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.222288 [10912/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.234249 [10944/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.188362 [10976/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.211582 [11008/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.169553 [11040/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.188811 [11072/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.234151 [11104/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.244542 [11136/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.213653 [11168/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.212798 [11200/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.231179 [11232/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.183667 [11264/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.153425 [11296/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.177173 [11328/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.174315 [11360/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.185205 [11392/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.234958 [11424/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.110748 [11456/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.145334 [11488/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.129545 [11520/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.144108 [11552/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.267095 [11584/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.161932 [11616/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.247894 [11648/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.165382 [11680/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.245783 [11712/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.234879 [11744/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.166632 [11776/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.109464 [11808/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.081730 [11840/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.156624 [11872/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.199886 [11904/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.238378 [11936/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.195844 [11968/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.174939 [12000/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.155742 [12032/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.210753 [12064/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.157194 [12096/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.208251 [12128/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.145814 [12160/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.200806 [12192/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.166892 [12224/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.212506 [12256/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.137881 [12288/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.153235 [12320/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.169851 [12352/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.172650 [12384/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.151797 [12416/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.181804 [12448/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.214999 [12480/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.154592 [12512/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.201619 [12544/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.166199 [12576/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.242886 [12608/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.199201 [12640/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.159333 [12672/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.221088 [12704/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.180027 [12736/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.221953 [12768/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.302542 [12800/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.247981 [12832/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.385218 [12864/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.178091 [12896/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.188457 [12928/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.203068 [12960/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.124555 [12992/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.200916 [13024/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.299114 [13056/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.121975 [13088/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.184549 [13120/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.138529 [13152/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.154927 [13184/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.244248 [13216/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.147047 [13248/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.196898 [13280/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.165049 [13312/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.232817 [13344/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.155593 [13376/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.236873 [13408/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.201620 [13440/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.179941 [13472/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.174272 [13504/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.244948 [13536/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.149596 [13568/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.247597 [13600/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.133886 [13632/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.222305 [13664/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.133465 [13696/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.137654 [13728/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.224760 [13760/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.168710 [13792/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.131185 [13824/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.293236 [13856/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.172271 [13888/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.149675 [13920/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.102904 [13952/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.150100 [13984/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.165364 [14016/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.231364 [14048/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.186322 [14080/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.192337 [14112/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.184259 [14144/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.178602 [14176/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.119341 [14208/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.224350 [14240/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.298413 [14272/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.174250 [14304/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.255381 [14336/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.189549 [14368/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.167227 [14400/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.235507 [14432/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.318511 [14464/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.163982 [14496/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.193184 [14528/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.192138 [14560/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.192992 [14592/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.253502 [14624/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.270729 [14656/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.266743 [14688/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.203093 [14720/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.241453 [14752/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.228604 [14784/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.253045 [14816/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.259810 [14848/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.192195 [14880/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.151924 [14912/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.138547 [14944/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.174856 [14976/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.149429 [15008/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.283451 [15040/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.177484 [15072/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.229648 [15104/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.092944 [15136/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.174228 [15168/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.122322 [15200/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.226264 [15232/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.205465 [15264/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.208147 [15296/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.184908 [15328/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.164990 [15360/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.180725 [15392/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.207951 [15424/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.191658 [15456/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.150113 [15488/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.140736 [15520/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.143482 [15552/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.169229 [15584/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.216333 [15616/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.196053 [15648/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.195005 [15680/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.268216 [15712/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.235173 [15744/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.159525 [15776/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.175682 [15808/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.228649 [15840/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.116974 [15872/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.145758 [15904/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.241501 [15936/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.284646 [15968/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.305428 [16000/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.307109 [16032/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.221501 [16064/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.154970 [16096/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.204310 [16128/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.295478 [16160/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.210255 [16192/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.220052 [16224/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.207307 [16256/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.234152 [16288/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.251741 [16320/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.309285 [16352/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.213651 [16384/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.226302 [16416/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.155270 [16448/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.209091 [16480/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.272056 [16512/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.192990 [16544/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.134530 [16576/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.169888 [16608/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.205770 [16640/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.194505 [16672/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.137042 [16704/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.178051 [16736/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.182897 [16768/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.162212 [16800/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.145214 [16832/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.141534 [16864/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.179231 [16896/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.220092 [16928/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.127978 [16960/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.188809 [16992/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.257210 [17024/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.164662 [17056/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.230094 [17088/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.204426 [17120/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.216686 [17152/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.232734 [17184/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.303735 [17216/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.211262 [17248/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.269967 [17280/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.221569 [17312/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.180099 [17344/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.270592 [17376/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.118489 [17408/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.183192 [17440/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.084143 [17472/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.209818 [17504/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.188097 [17536/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.196062 [17568/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.258091 [17600/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.226039 [17632/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.133051 [17664/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.228244 [17696/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.214558 [17728/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.189740 [17760/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.181348 [17792/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.188545 [17824/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.272805 [17856/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.267767 [17888/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.184821 [17920/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.187446 [17952/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.284621 [17984/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.165399 [18016/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.205340 [18048/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.239733 [18080/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.168659 [18112/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.169819 [18144/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.199825 [18176/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.206665 [18208/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.214198 [18240/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.255078 [18272/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.206697 [18304/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.130244 [18336/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.198272 [18368/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.164746 [18400/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.200778 [18432/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.171386 [18464/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.183316 [18496/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.210208 [18528/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.251276 [18560/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.190771 [18592/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.174096 [18624/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.197835 [18656/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.148801 [18688/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.246425 [18720/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.257543 [18752/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.245152 [18784/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.164152 [18816/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.264096 [18848/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.165396 [18880/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.178622 [18912/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.224972 [18944/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.294432 [18976/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.208952 [19008/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.160500 [19040/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.239479 [19072/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.216942 [19104/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.257887 [19136/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.222595 [19168/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.162218 [19200/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.190914 [19232/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.116624 [19264/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.243147 [19296/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.246491 [19328/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.207272 [19360/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.166886 [19392/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.156365 [19424/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.227595 [19456/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.185213 [19488/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.230692 [19520/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.152432 [19552/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.202892 [19584/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.159317 [19616/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.148466 [19648/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.132041 [19680/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.168578 [19712/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.151555 [19744/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.158480 [19776/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.230343 [19808/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.154227 [19840/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.208698 [19872/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.194789 [19904/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.228631 [19936/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.217319 [19968/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.234594 [20000/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.227975 [20032/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.185581 [20064/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.193243 [20096/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.163276 [20128/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.215031 [20160/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.175012 [20192/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.227982 [20224/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.204107 [20256/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.178493 [20288/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.217438 [20320/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.273382 [20352/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.186332 [20384/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.164271 [20416/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.249008 [20448/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.215631 [20480/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.167602 [20512/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.227261 [20544/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.133664 [20576/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.138515 [20608/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.204408 [20640/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.166779 [20672/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.126932 [20704/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.228774 [20736/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.133187 [20768/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.239437 [20800/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.235246 [20832/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.161889 [20864/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.204874 [20896/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.250560 [20928/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.246691 [20960/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.158408 [20992/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.216442 [21024/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.207868 [21056/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.125384 [21088/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.166913 [21120/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.227889 [21152/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.187356 [21184/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.185138 [21216/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.173837 [21248/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.217427 [21280/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.208266 [21312/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.159975 [21344/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.162028 [21376/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.190348 [21408/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.191888 [21440/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.181129 [21472/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.163488 [21504/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.155848 [21536/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.117109 [21568/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.176323 [21600/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.248998 [21632/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.212577 [21664/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.112850 [21696/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.185293 [21728/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.188729 [21760/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.140533 [21792/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.205998 [21824/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.144775 [21856/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.153138 [21888/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.259005 [21920/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.101546 [21952/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.205449 [21984/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.280236 [22016/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.169710 [22048/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.177092 [22080/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.214072 [22112/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.155382 [22144/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.145285 [22176/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.195767 [22208/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.205023 [22240/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.117348 [22272/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.171441 [22304/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.222243 [22336/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.205768 [22368/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.215605 [22400/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.200102 [22432/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.166611 [22464/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.141134 [22496/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.190753 [22528/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.194594 [22560/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.235279 [22592/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.202312 [22624/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.148384 [22656/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.236691 [22688/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.256000 [22720/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.153216 [22752/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.240532 [22784/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.186745 [22816/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.168194 [22848/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.129337 [22880/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.281054 [22912/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.230663 [22944/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.267364 [22976/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.170600 [23008/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.212105 [23040/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.113639 [23072/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.214995 [23104/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.205297 [23136/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.176786 [23168/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.187279 [23200/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.248516 [23232/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.217622 [23264/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.152960 [23296/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.154062 [23328/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.274431 [23360/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.236455 [23392/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.134992 [23424/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.131515 [23456/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.230264 [23488/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.194307 [23520/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.186691 [23552/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.156687 [23584/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.194172 [23616/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.280358 [23648/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.206794 [23680/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.144651 [23712/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.252547 [23744/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.147897 [23776/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.151195 [23808/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.232495 [23840/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.294899 [23872/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.166862 [23904/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.214978 [23936/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.238441 [23968/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.124222 [24000/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.226068 [24032/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.182080 [24064/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.266394 [24096/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.218482 [24128/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.161441 [24160/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.106893 [24192/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.205305 [24224/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.226584 [24256/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.160835 [24288/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.143314 [24320/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.112701 [24352/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.185692 [24384/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.165173 [24416/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.210892 [24448/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.170234 [24480/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.152281 [24512/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.206369 [24544/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.159857 [24576/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.198994 [24608/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.169750 [24640/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.107735 [24672/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.182742 [24704/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.217402 [24736/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.131530 [24768/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.146410 [24800/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.196665 [24832/24872]: 0%| | 0/777 [00:30<?, ?it/s]
loss: 0.196665 [24832/24872]: 100%|█████████▉| 776/777 [00:30<00:00, 25.85it/s]
loss: 0.255172 [24864/24872]: 100%|█████████▉| 776/777 [00:30<00:00, 25.85it/s]
loss: 0.216973 [24872/24872]: 100%|█████████▉| 776/777 [00:30<00:00, 25.85it/s]
loss: 0.216973 [24872/24872]: : 778it [00:30, 25.86it/s]
-------------------------------
LR=0.0001, batch_size=64
-------------------------------
Epoch 1, time=150.80s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.224360 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.323039 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.190750 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.225940 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.279759 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.207430 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.219011 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.189572 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.198800 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.212103 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.199084 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.143425 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.202329 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.201144 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.278848 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.287935 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.225810 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.189680 [ 1152/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.194416 [ 1216/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.174352 [ 1280/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.170757 [ 1344/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.184881 [ 1408/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.236444 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.177214 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.211490 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.190419 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.202995 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.191653 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.190382 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.190233 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.186161 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.166626 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.137072 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.190475 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.133974 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.248272 [ 2304/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.225604 [ 2368/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.143382 [ 2432/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.229768 [ 2496/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.170291 [ 2560/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.237875 [ 2624/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.186834 [ 2688/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.152934 [ 2752/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.140209 [ 2816/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.147356 [ 2880/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.139527 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.250241 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.216285 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.219142 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.166835 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.161717 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.170390 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.199549 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.175983 [ 3456/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.203124 [ 3520/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.183599 [ 3584/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.227468 [ 3648/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.177053 [ 3712/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.201257 [ 3776/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.162824 [ 3840/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.170623 [ 3904/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.177072 [ 3968/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.176337 [ 4032/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.177763 [ 4096/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.157542 [ 4160/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.174613 [ 4224/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.153965 [ 4288/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.168832 [ 4352/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.179650 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.144315 [ 4480/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.153421 [ 4544/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.195390 [ 4608/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.166887 [ 4672/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.193653 [ 4736/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.177502 [ 4800/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.191702 [ 4864/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.164308 [ 4928/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.164071 [ 4992/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.138598 [ 5056/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.130634 [ 5120/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.176894 [ 5184/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.213576 [ 5248/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.222879 [ 5312/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.238797 [ 5376/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.162734 [ 5440/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.136376 [ 5504/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.208482 [ 5568/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.153024 [ 5632/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.178774 [ 5696/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.231541 [ 5760/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.132914 [ 5824/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.156069 [ 5888/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.179936 [ 5952/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.176761 [ 6016/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.226102 [ 6080/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.178492 [ 6144/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.162314 [ 6208/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.192901 [ 6272/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.172536 [ 6336/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.206220 [ 6400/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.127297 [ 6464/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.115378 [ 6528/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.170496 [ 6592/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.186843 [ 6656/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.161332 [ 6720/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.232961 [ 6784/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.203176 [ 6848/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.113072 [ 6912/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.149510 [ 6976/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.143012 [ 7040/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.151033 [ 7104/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.179897 [ 7168/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.160731 [ 7232/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.226462 [ 7296/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.156292 [ 7360/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.172920 [ 7424/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.147676 [ 7488/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.170439 [ 7552/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.173801 [ 7616/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.161572 [ 7680/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.183345 [ 7744/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.183384 [ 7808/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.166144 [ 7872/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.191970 [ 7936/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.121947 [ 8000/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.194928 [ 8064/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.162115 [ 8128/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.204189 [ 8192/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.159439 [ 8256/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.152431 [ 8320/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.142132 [ 8384/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.099776 [ 8448/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.174275 [ 8512/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.192744 [ 8576/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.178259 [ 8640/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.165728 [ 8704/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.179925 [ 8768/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.164607 [ 8832/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.176960 [ 8896/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.168741 [ 8960/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.152540 [ 9024/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.114541 [ 9088/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.140188 [ 9152/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.221841 [ 9216/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.132101 [ 9280/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.132016 [ 9344/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.179839 [ 9408/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.187727 [ 9472/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.122218 [ 9536/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.206027 [ 9600/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.212121 [ 9664/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.132302 [ 9728/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.182682 [ 9792/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.179198 [ 9856/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.133305 [ 9920/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.162358 [ 9984/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.175327 [10048/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.204775 [10112/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.140491 [10176/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.132374 [10240/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.218994 [10304/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.188614 [10368/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.191047 [10432/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.156780 [10496/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.174080 [10560/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.157912 [10624/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.126018 [10688/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.122821 [10752/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.141505 [10816/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.112079 [10880/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.196478 [10944/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.147442 [11008/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.152328 [11072/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.248258 [11136/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.196522 [11200/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.175320 [11264/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.167407 [11328/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.225273 [11392/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.165330 [11456/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.123280 [11520/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.149554 [11584/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.198325 [11648/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.182304 [11712/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.181232 [11776/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.086646 [11840/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.154604 [11904/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.214817 [11968/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.147585 [12032/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.162705 [12096/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.161913 [12160/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.183116 [12224/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.163328 [12288/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.150311 [12352/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.150006 [12416/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.189431 [12480/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.137822 [12544/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.189380 [12608/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.139922 [12672/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.168554 [12736/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.256704 [12800/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.282787 [12864/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.154425 [12928/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.141428 [12992/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.191503 [13056/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.120371 [13120/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.130635 [13184/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.157577 [13248/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.154891 [13312/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.159433 [13376/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.197924 [13440/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.161894 [13504/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.175019 [13568/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.153268 [13632/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.135390 [13696/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.145209 [13760/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.138479 [13824/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.216883 [13888/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.125336 [13952/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.137018 [14016/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.185612 [14080/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.207235 [14144/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.141058 [14208/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.209394 [14272/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.183218 [14336/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.161921 [14400/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.228959 [14464/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.143901 [14528/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.176258 [14592/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.210122 [14656/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.176039 [14720/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.195808 [14784/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.213096 [14848/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.153247 [14912/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.141506 [14976/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.156503 [15040/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.202480 [15104/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.114792 [15168/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.136443 [15232/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.177266 [15296/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.152384 [15360/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.180410 [15424/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.160191 [15488/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.142647 [15552/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.160101 [15616/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.202719 [15680/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.221910 [15744/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.152951 [15808/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.184534 [15872/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.154752 [15936/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.257908 [16000/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.223201 [16064/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.164220 [16128/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.218975 [16192/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.233200 [16256/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.154870 [16320/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.203559 [16384/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.172401 [16448/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.188548 [16512/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.146820 [16576/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.160012 [16640/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.148431 [16704/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.149766 [16768/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.130879 [16832/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.146067 [16896/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.149214 [16960/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.214894 [17024/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.172393 [17088/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.153055 [17152/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.207805 [17216/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.204180 [17280/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.172904 [17344/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.164359 [17408/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.097903 [17472/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.174202 [17536/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.207797 [17600/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.135967 [17664/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.205379 [17728/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.174282 [17792/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.181347 [17856/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.186443 [17920/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.206723 [17984/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.142670 [18048/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.154824 [18112/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.112803 [18176/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.169060 [18240/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.171692 [18304/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.143413 [18368/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.133633 [18432/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.133082 [18496/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.194732 [18560/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.172540 [18624/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.152692 [18688/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.219232 [18752/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.199225 [18816/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.197486 [18880/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.191449 [18944/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.245302 [19008/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.175047 [19072/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.195897 [19136/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.155798 [19200/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.114815 [19264/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.215207 [19328/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.171827 [19392/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.170525 [19456/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.183622 [19520/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.151809 [19584/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.136131 [19648/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.130690 [19712/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.131401 [19776/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.133821 [19840/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.150408 [19904/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.173449 [19968/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.215200 [20032/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.159150 [20096/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.161196 [20160/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.144254 [20224/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.154086 [20288/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.196807 [20352/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.163817 [20416/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.209115 [20480/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.169166 [20544/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.122847 [20608/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.165517 [20672/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.184950 [20736/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.161155 [20800/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.158213 [20864/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.189552 [20928/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.169353 [20992/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.186530 [21056/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.137857 [21120/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.177191 [21184/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.188283 [21248/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.174154 [21312/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.130111 [21376/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.209610 [21440/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.144459 [21504/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.123016 [21568/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.210215 [21632/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.150610 [21696/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.161301 [21760/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.181237 [21824/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.132175 [21888/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.162137 [21952/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.190258 [22016/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.163881 [22080/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.171392 [22144/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.149636 [22208/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.130584 [22272/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.174209 [22336/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.182838 [22400/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.166563 [22464/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.151710 [22528/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.170099 [22592/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.148597 [22656/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.210671 [22720/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.156423 [22784/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.159811 [22848/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.173755 [22912/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.204628 [22976/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.167357 [23040/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.136300 [23104/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.177461 [23168/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.213785 [23232/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.177689 [23296/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.175117 [23360/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.188008 [23424/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.157386 [23488/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.177400 [23552/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.144238 [23616/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.174391 [23680/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.168405 [23744/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.144638 [23808/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.219659 [23872/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.155970 [23936/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.167313 [24000/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.174153 [24064/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.238840 [24128/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.122745 [24192/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.191032 [24256/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.154806 [24320/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.148528 [24384/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.189236 [24448/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.143329 [24512/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.152824 [24576/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.156312 [24640/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.136785 [24704/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.184824 [24768/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.153393 [24832/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.241909 [24872/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.241909 [24872/24872]: : 389it [00:19, 19.68it/s]
Epoch 2, time=170.57s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.226799 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.187878 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.144602 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.148643 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.200000 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.154045 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.170625 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.154905 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.135267 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.143953 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.142777 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.106687 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.161049 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.173789 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.218005 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.220708 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.183077 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.180435 [ 1152/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.162046 [ 1216/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.134629 [ 1280/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.138817 [ 1344/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.173437 [ 1408/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.220060 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.164721 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.203289 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.165449 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.182053 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.164631 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.157082 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.168437 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.168236 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.144759 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.119594 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.181881 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.141777 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.206312 [ 2304/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.192824 [ 2368/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.135459 [ 2432/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.208792 [ 2496/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.150609 [ 2560/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.226457 [ 2624/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.164603 [ 2688/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.145639 [ 2752/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.125749 [ 2816/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.126718 [ 2880/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.120909 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.225633 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.201444 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.195915 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.159526 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.128119 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.166084 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.194094 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.165781 [ 3456/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.185343 [ 3520/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.180538 [ 3584/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.203624 [ 3648/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.176657 [ 3712/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.166355 [ 3776/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.148900 [ 3840/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.150467 [ 3904/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.162308 [ 3968/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.160817 [ 4032/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.162048 [ 4096/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.167945 [ 4160/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.161278 [ 4224/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.147031 [ 4288/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.159202 [ 4352/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.164027 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.142277 [ 4480/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.145253 [ 4544/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.195724 [ 4608/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.136390 [ 4672/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.177245 [ 4736/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.187914 [ 4800/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.177502 [ 4864/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.181309 [ 4928/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.154747 [ 4992/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.135610 [ 5056/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.127553 [ 5120/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.151507 [ 5184/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.197644 [ 5248/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.221854 [ 5312/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.202248 [ 5376/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.163179 [ 5440/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.122386 [ 5504/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.182350 [ 5568/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.141978 [ 5632/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.176550 [ 5696/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.216532 [ 5760/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.124710 [ 5824/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.141453 [ 5888/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.156820 [ 5952/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.170421 [ 6016/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.204202 [ 6080/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.155012 [ 6144/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.166308 [ 6208/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.173587 [ 6272/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.170378 [ 6336/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.186004 [ 6400/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.129353 [ 6464/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.106033 [ 6528/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.159678 [ 6592/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.193134 [ 6656/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.157285 [ 6720/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.247357 [ 6784/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.192265 [ 6848/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.125424 [ 6912/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.147326 [ 6976/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.144184 [ 7040/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.161939 [ 7104/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.188136 [ 7168/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.152200 [ 7232/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.215624 [ 7296/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.154433 [ 7360/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.177737 [ 7424/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.146678 [ 7488/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.154517 [ 7552/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.171650 [ 7616/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.154262 [ 7680/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.190648 [ 7744/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.176600 [ 7808/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.155068 [ 7872/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.169365 [ 7936/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.129715 [ 8000/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.174526 [ 8064/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.134637 [ 8128/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.191576 [ 8192/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.148169 [ 8256/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.149556 [ 8320/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.157931 [ 8384/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.093966 [ 8448/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.180294 [ 8512/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.184902 [ 8576/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.172712 [ 8640/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.163132 [ 8704/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.158690 [ 8768/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.160100 [ 8832/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.171292 [ 8896/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.157576 [ 8960/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.144583 [ 9024/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.108323 [ 9088/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.129249 [ 9152/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.215228 [ 9216/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.119184 [ 9280/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.132360 [ 9344/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.165002 [ 9408/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.169761 [ 9472/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.115117 [ 9536/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.198809 [ 9600/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.202534 [ 9664/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.123661 [ 9728/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.179837 [ 9792/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.162333 [ 9856/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.127505 [ 9920/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.143932 [ 9984/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.156314 [10048/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.189347 [10112/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.132438 [10176/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.125775 [10240/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.189900 [10304/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.166109 [10368/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.184930 [10432/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.148668 [10496/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.159025 [10560/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.150914 [10624/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.111424 [10688/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.119995 [10752/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.136176 [10816/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.100914 [10880/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.178061 [10944/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.143903 [11008/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.153204 [11072/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.236568 [11136/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.201985 [11200/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.181268 [11264/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.162267 [11328/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.202578 [11392/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.167023 [11456/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.120250 [11520/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.156881 [11584/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.180343 [11648/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.175531 [11712/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.187557 [11776/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.098569 [11840/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.153132 [11904/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.195393 [11968/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.148840 [12032/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.164808 [12096/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.161619 [12160/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.168844 [12224/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.151564 [12288/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.155774 [12352/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.142678 [12416/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.182943 [12480/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.149764 [12544/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.164439 [12608/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.145075 [12672/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.161759 [12736/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.251630 [12800/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.277185 [12864/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.135158 [12928/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.145865 [12992/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.185770 [13056/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.116555 [13120/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.117859 [13184/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.136398 [13248/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.154156 [13312/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.146510 [13376/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.178095 [13440/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.145922 [13504/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.163031 [13568/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.146102 [13632/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.155185 [13696/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.133546 [13760/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.121210 [13824/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.221343 [13888/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.116830 [13952/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.131677 [14016/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.175460 [14080/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.168574 [14144/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.133466 [14208/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.235749 [14272/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.169001 [14336/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.161939 [14400/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.228464 [14464/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.128721 [14528/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.167961 [14592/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.208481 [14656/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.198700 [14720/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.182050 [14784/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.206637 [14848/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.151062 [14912/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.139211 [14976/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.147080 [15040/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.164208 [15104/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.125194 [15168/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.120329 [15232/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.154805 [15296/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.144412 [15360/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.163867 [15424/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.153338 [15488/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.121562 [15552/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.146468 [15616/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.191778 [15680/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.207597 [15744/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.141808 [15808/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.165564 [15872/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.166773 [15936/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.239412 [16000/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.224928 [16064/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.144673 [16128/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.196748 [16192/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.204630 [16256/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.145992 [16320/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.168278 [16384/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.180100 [16448/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.145121 [16512/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.140215 [16576/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.139517 [16640/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.153253 [16704/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.159300 [16768/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.117411 [16832/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.136581 [16896/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.136242 [16960/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.205138 [17024/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.169928 [17088/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.151294 [17152/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.214047 [17216/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.207689 [17280/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.168499 [17344/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.156172 [17408/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.116773 [17472/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.165303 [17536/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.204161 [17600/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.133709 [17664/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.195396 [17728/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.157886 [17792/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.190109 [17856/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.182625 [17920/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.181604 [17984/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.126499 [18048/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.164775 [18112/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.101791 [18176/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.174155 [18240/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.162327 [18304/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.133615 [18368/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.123758 [18432/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.128145 [18496/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.185884 [18560/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.141657 [18624/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.146851 [18688/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.184281 [18752/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.169481 [18816/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.177592 [18880/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.173552 [18944/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.206248 [19008/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.145918 [19072/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.187751 [19136/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.136625 [19200/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.112054 [19264/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.200558 [19328/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.156100 [19392/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.159957 [19456/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.158014 [19520/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.136850 [19584/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.132921 [19648/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.116274 [19712/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.115922 [19776/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.125142 [19840/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.154143 [19904/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.147837 [19968/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.180672 [20032/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.130812 [20096/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.153992 [20160/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.127256 [20224/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.147266 [20288/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.178296 [20352/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.140929 [20416/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.199337 [20480/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.158471 [20544/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.103676 [20608/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.137047 [20672/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.133693 [20736/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.149399 [20800/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.152236 [20864/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.160356 [20928/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.149296 [20992/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.160830 [21056/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.123169 [21120/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.162612 [21184/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.148859 [21248/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.148455 [21312/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.119468 [21376/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.164758 [21440/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.134797 [21504/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.115303 [21568/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.176266 [21632/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.142413 [21696/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.126265 [21760/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.160067 [21824/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.120783 [21888/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.152982 [21952/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.200937 [22016/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.149153 [22080/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.164500 [22144/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.139551 [22208/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.118039 [22272/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.168935 [22336/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.172481 [22400/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.156437 [22464/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.144541 [22528/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.160987 [22592/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.147471 [22656/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.191361 [22720/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.155541 [22784/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.142159 [22848/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.167109 [22912/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.211038 [22976/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.163749 [23040/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.133903 [23104/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.166767 [23168/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.174875 [23232/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.187595 [23296/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.187225 [23360/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.195871 [23424/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.143733 [23488/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.164157 [23552/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.134785 [23616/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.171393 [23680/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.160308 [23744/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.128884 [23808/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.201262 [23872/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.169319 [23936/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.143179 [24000/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.165489 [24064/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.209429 [24128/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.112110 [24192/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.189885 [24256/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.132037 [24320/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.141500 [24384/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.174399 [24448/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.128585 [24512/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.149247 [24576/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.149666 [24640/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.118593 [24704/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.177663 [24768/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.145794 [24832/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.209816 [24872/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.209816 [24872/24872]: : 389it [00:19, 19.67it/s]
Epoch 3, time=190.35s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.206027 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.180794 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.140994 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.131737 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.179276 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.156884 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.162523 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.153564 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.116056 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.133208 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.131977 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.106940 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.154130 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.163505 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.195965 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.198227 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.179498 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.182055 [ 1152/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.147770 [ 1216/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.144783 [ 1280/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.133786 [ 1344/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.170196 [ 1408/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.204068 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.144919 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.193744 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.183378 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.171403 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.158234 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.146673 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.165687 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.178710 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.136244 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.127917 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.169856 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.143453 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.227760 [ 2304/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.192959 [ 2368/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.123977 [ 2432/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.216582 [ 2496/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.151833 [ 2560/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.223845 [ 2624/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.157087 [ 2688/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.143332 [ 2752/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.124457 [ 2816/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.122905 [ 2880/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.116535 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.202473 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.173806 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.188288 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.173013 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.111817 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.169943 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.183341 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.167881 [ 3456/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.183239 [ 3520/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.161780 [ 3584/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.211802 [ 3648/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.161469 [ 3712/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.169090 [ 3776/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.147819 [ 3840/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.151974 [ 3904/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.161644 [ 3968/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.163032 [ 4032/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.143958 [ 4096/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.153267 [ 4160/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.150554 [ 4224/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.143473 [ 4288/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.151724 [ 4352/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.149971 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.116367 [ 4480/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.143895 [ 4544/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.176619 [ 4608/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.130117 [ 4672/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.160976 [ 4736/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.164846 [ 4800/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.165687 [ 4864/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.162040 [ 4928/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.128236 [ 4992/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.119026 [ 5056/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.114617 [ 5120/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.126053 [ 5184/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.191408 [ 5248/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.204731 [ 5312/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.195586 [ 5376/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.137357 [ 5440/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.124712 [ 5504/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.168411 [ 5568/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.130790 [ 5632/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.171782 [ 5696/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.202323 [ 5760/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.111576 [ 5824/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.134205 [ 5888/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.150850 [ 5952/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.180527 [ 6016/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.208757 [ 6080/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.151671 [ 6144/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.191146 [ 6208/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.166334 [ 6272/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.173861 [ 6336/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.187861 [ 6400/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.135059 [ 6464/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.107572 [ 6528/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.155338 [ 6592/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.209040 [ 6656/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.159607 [ 6720/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.211057 [ 6784/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.200175 [ 6848/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.102540 [ 6912/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.125394 [ 6976/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.123654 [ 7040/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.128174 [ 7104/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.180673 [ 7168/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.134100 [ 7232/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.187987 [ 7296/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.144358 [ 7360/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.151759 [ 7424/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.129266 [ 7488/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.145720 [ 7552/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.154671 [ 7616/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.146875 [ 7680/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.158419 [ 7744/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.149844 [ 7808/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.145972 [ 7872/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.158638 [ 7936/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.111827 [ 8000/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.176323 [ 8064/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.136535 [ 8128/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.182433 [ 8192/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.143652 [ 8256/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.130685 [ 8320/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.158764 [ 8384/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.087103 [ 8448/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.164728 [ 8512/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.171069 [ 8576/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.147237 [ 8640/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.142313 [ 8704/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.142536 [ 8768/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.141803 [ 8832/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.163896 [ 8896/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.147824 [ 8960/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.142305 [ 9024/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.105704 [ 9088/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.119391 [ 9152/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.204849 [ 9216/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.114505 [ 9280/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.119475 [ 9344/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.142476 [ 9408/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.164750 [ 9472/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.111940 [ 9536/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.212296 [ 9600/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.213849 [ 9664/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.123209 [ 9728/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.184795 [ 9792/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.158791 [ 9856/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.113903 [ 9920/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.135907 [ 9984/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.143716 [10048/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.189690 [10112/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.141338 [10176/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.116387 [10240/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.186321 [10304/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.162608 [10368/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.175968 [10432/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.146677 [10496/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.166662 [10560/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.149494 [10624/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.120758 [10688/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.107652 [10752/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.139317 [10816/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.085540 [10880/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.161919 [10944/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.142200 [11008/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.133960 [11072/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.229635 [11136/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.164184 [11200/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.154168 [11264/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.141060 [11328/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.140457 [11392/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.154578 [11456/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.106817 [11520/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.132699 [11584/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.165563 [11648/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.157725 [11712/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.153186 [11776/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.091269 [11840/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.147011 [11904/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.191594 [11968/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.138175 [12032/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.157523 [12096/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.156105 [12160/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.161984 [12224/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.160165 [12288/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.130389 [12352/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.151487 [12416/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.158234 [12480/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.128533 [12544/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.187926 [12608/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.120798 [12672/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.169885 [12736/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.227553 [12800/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.282292 [12864/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.142160 [12928/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.145571 [12992/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.155422 [13056/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.107414 [13120/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.121735 [13184/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.141349 [13248/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.141302 [13312/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.147496 [13376/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.172765 [13440/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.141679 [13504/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.146692 [13568/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.132887 [13632/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.135937 [13696/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.116835 [13760/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.120624 [13824/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.198283 [13888/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.132885 [13952/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.117381 [14016/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.180843 [14080/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.187623 [14144/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.125906 [14208/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.188394 [14272/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.154369 [14336/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.149475 [14400/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.203875 [14464/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.116340 [14528/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.157525 [14592/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.178755 [14656/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.166000 [14720/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.136212 [14784/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.202743 [14848/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.137605 [14912/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.133220 [14976/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.143586 [15040/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.195938 [15104/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.106739 [15168/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.114784 [15232/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.136906 [15296/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.130520 [15360/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.152103 [15424/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.127074 [15488/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.113690 [15552/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.147294 [15616/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.158773 [15680/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.194566 [15744/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.140268 [15808/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.149309 [15872/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.132594 [15936/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.207410 [16000/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.214523 [16064/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.128642 [16128/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.195911 [16192/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.201321 [16256/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.158008 [16320/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.204614 [16384/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.178725 [16448/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.169278 [16512/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.119658 [16576/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.178706 [16640/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.139916 [16704/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.172673 [16768/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.123599 [16832/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.138454 [16896/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.141773 [16960/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.202578 [17024/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.173637 [17088/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.141359 [17152/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.196235 [17216/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.183284 [17280/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.144667 [17344/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.205074 [17408/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.082330 [17472/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.165178 [17536/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.197176 [17600/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.123684 [17664/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.192654 [17728/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.140254 [17792/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.172342 [17856/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.184116 [17920/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.156495 [17984/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.141321 [18048/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.131972 [18112/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.089105 [18176/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.165237 [18240/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.168137 [18304/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.145080 [18368/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.117414 [18432/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.115390 [18496/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.175289 [18560/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.137256 [18624/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.128882 [18688/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.167518 [18752/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.161536 [18816/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.167744 [18880/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.187447 [18944/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.229147 [19008/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.138312 [19072/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.187203 [19136/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.136326 [19200/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.103065 [19264/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.209142 [19328/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.146267 [19392/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.193814 [19456/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.165298 [19520/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.170703 [19584/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.138932 [19648/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.115036 [19712/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.144117 [19776/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.116961 [19840/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.139123 [19904/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.177687 [19968/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.187834 [20032/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.154476 [20096/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.159234 [20160/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.126265 [20224/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.142430 [20288/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.183475 [20352/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.141796 [20416/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.228318 [20480/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.156066 [20544/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.103413 [20608/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.144426 [20672/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.137436 [20736/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.153311 [20800/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.144123 [20864/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.168285 [20928/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.159732 [20992/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.158850 [21056/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.118900 [21120/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.165070 [21184/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.154468 [21248/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.133760 [21312/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.121980 [21376/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.154454 [21440/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.129338 [21504/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.111628 [21568/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.160826 [21632/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.146593 [21696/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.149637 [21760/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.156634 [21824/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.109792 [21888/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.137293 [21952/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.186173 [22016/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.142718 [22080/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.143083 [22144/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.141821 [22208/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.113134 [22272/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.160680 [22336/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.163181 [22400/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.147314 [22464/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.180894 [22528/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.156230 [22592/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.147931 [22656/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.208664 [22720/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.148777 [22784/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.154222 [22848/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.160296 [22912/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.194357 [22976/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.157241 [23040/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.124670 [23104/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.145566 [23168/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.154838 [23232/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.153552 [23296/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.169921 [23360/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.165200 [23424/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.131358 [23488/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.171010 [23552/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.127431 [23616/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.152758 [23680/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.171831 [23744/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.133299 [23808/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.181328 [23872/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.134829 [23936/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.145522 [24000/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.152938 [24064/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.194631 [24128/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.108033 [24192/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.172032 [24256/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.116078 [24320/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.146246 [24384/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.152971 [24448/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.152410 [24512/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.136519 [24576/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.136574 [24640/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.110985 [24704/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.145889 [24768/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.143266 [24832/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.189865 [24872/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.189865 [24872/24872]: : 389it [00:19, 19.61it/s]
Epoch 4, time=210.19s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.177299 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.162023 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.139654 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.128009 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.145208 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.121951 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.153169 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.135642 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.104632 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.116182 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.126537 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.110396 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.143564 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.148143 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.177973 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.172165 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.151403 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.138500 [ 1152/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.127265 [ 1216/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.115336 [ 1280/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.122609 [ 1344/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.121698 [ 1408/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.161800 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.123306 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.144031 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.128414 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.145805 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.117666 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.142098 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.159829 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.138032 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.135851 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.113937 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.159291 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.127265 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.179529 [ 2304/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.189891 [ 2368/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.122436 [ 2432/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.176811 [ 2496/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.148169 [ 2560/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.186356 [ 2624/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.146478 [ 2688/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.136515 [ 2752/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.121455 [ 2816/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.142430 [ 2880/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.093808 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.183796 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.168618 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.175940 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.170293 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.103574 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.153979 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.180057 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.149761 [ 3456/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.168483 [ 3520/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.164021 [ 3584/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.197261 [ 3648/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.146819 [ 3712/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.156877 [ 3776/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.147777 [ 3840/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.160353 [ 3904/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.167400 [ 3968/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.151770 [ 4032/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.134435 [ 4096/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.162149 [ 4160/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.159360 [ 4224/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.162378 [ 4288/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.134942 [ 4352/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.148636 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.139209 [ 4480/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.148910 [ 4544/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.169858 [ 4608/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.112086 [ 4672/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.145567 [ 4736/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.161064 [ 4800/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.173965 [ 4864/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.127340 [ 4928/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.121210 [ 4992/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.121202 [ 5056/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.108737 [ 5120/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.128442 [ 5184/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.181385 [ 5248/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.190934 [ 5312/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.192495 [ 5376/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.125518 [ 5440/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.117985 [ 5504/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.165682 [ 5568/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.138214 [ 5632/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.175239 [ 5696/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.207018 [ 5760/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.115644 [ 5824/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.133798 [ 5888/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.124284 [ 5952/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.158988 [ 6016/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.185467 [ 6080/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.142046 [ 6144/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.152819 [ 6208/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.147047 [ 6272/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.153249 [ 6336/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.176597 [ 6400/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.125866 [ 6464/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.094016 [ 6528/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.153964 [ 6592/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.178736 [ 6656/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.135971 [ 6720/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.206883 [ 6784/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.190519 [ 6848/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.095379 [ 6912/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.129533 [ 6976/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.117825 [ 7040/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.106099 [ 7104/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.165062 [ 7168/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.121313 [ 7232/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.171762 [ 7296/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.126966 [ 7360/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.131942 [ 7424/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.116674 [ 7488/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.125804 [ 7552/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.139603 [ 7616/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.134550 [ 7680/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.162063 [ 7744/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.140025 [ 7808/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.141293 [ 7872/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.153931 [ 7936/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.099857 [ 8000/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.190518 [ 8064/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.141292 [ 8128/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.174100 [ 8192/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.159306 [ 8256/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.127917 [ 8320/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.132598 [ 8384/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.076404 [ 8448/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.145792 [ 8512/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.163500 [ 8576/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.159618 [ 8640/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.143509 [ 8704/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.137205 [ 8768/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.132493 [ 8832/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.160607 [ 8896/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.132648 [ 8960/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.131583 [ 9024/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.099921 [ 9088/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.151193 [ 9152/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.185983 [ 9216/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.100245 [ 9280/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.110567 [ 9344/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.147777 [ 9408/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.130691 [ 9472/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.104585 [ 9536/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.178062 [ 9600/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.176148 [ 9664/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.116058 [ 9728/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.176540 [ 9792/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.151075 [ 9856/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.102779 [ 9920/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.138906 [ 9984/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.141794 [10048/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.145732 [10112/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.122851 [10176/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.120708 [10240/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.179251 [10304/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.143430 [10368/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.149747 [10432/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.142223 [10496/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.166895 [10560/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.141573 [10624/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.110141 [10688/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.113780 [10752/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.126925 [10816/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.075299 [10880/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.143669 [10944/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.132284 [11008/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.125784 [11072/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.197472 [11136/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.152608 [11200/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.140892 [11264/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.134888 [11328/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.218635 [11392/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.157796 [11456/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.114439 [11520/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.121690 [11584/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.156035 [11648/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.155359 [11712/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.170675 [11776/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.073990 [11840/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.137081 [11904/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.175860 [11968/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.162013 [12032/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.176588 [12096/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.140667 [12160/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.143249 [12224/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.145080 [12288/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.124442 [12352/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.149309 [12416/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.128653 [12480/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.117410 [12544/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.146448 [12608/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.094869 [12672/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.131225 [12736/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.193810 [12800/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.245492 [12864/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.117328 [12928/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.118042 [12992/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.140321 [13056/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.096745 [13120/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.098983 [13184/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.119897 [13248/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.127168 [13312/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.135184 [13376/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.147191 [13440/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.142662 [13504/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.150510 [13568/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.136996 [13632/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.140191 [13696/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.126101 [13760/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.124155 [13824/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.189432 [13888/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.112151 [13952/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.133400 [14016/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.177391 [14080/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.170908 [14144/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.117127 [14208/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.185089 [14272/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.140682 [14336/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.143892 [14400/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.185917 [14464/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.108732 [14528/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.161891 [14592/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.180632 [14656/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.187921 [14720/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.192390 [14784/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.168534 [14848/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.204392 [14912/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.157303 [14976/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.149065 [15040/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.212540 [15104/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.105687 [15168/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.131712 [15232/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.154317 [15296/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.141878 [15360/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.191183 [15424/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.128942 [15488/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.138422 [15552/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.146143 [15616/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.162987 [15680/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.199520 [15744/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.143661 [15808/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.144788 [15872/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.114211 [15936/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.204518 [16000/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.187532 [16064/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.135166 [16128/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.191965 [16192/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.195290 [16256/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.162878 [16320/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.201292 [16384/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.176708 [16448/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.176509 [16512/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.104806 [16576/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.166781 [16640/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.141046 [16704/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.134056 [16768/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.126946 [16832/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.136798 [16896/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.125046 [16960/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.186189 [17024/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.168679 [17088/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.135116 [17152/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.182558 [17216/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.165040 [17280/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.153160 [17344/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.177090 [17408/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.092486 [17472/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.170451 [17536/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.176600 [17600/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.131154 [17664/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.201800 [17728/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.144356 [17792/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.164538 [17856/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.174829 [17920/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.149893 [17984/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.129331 [18048/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.124154 [18112/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.084547 [18176/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.157864 [18240/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.166641 [18304/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.118579 [18368/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.133131 [18432/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.123489 [18496/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.158506 [18560/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.129134 [18624/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.125733 [18688/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.165390 [18752/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.165939 [18816/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.148228 [18880/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.141913 [18944/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.192929 [19008/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.136223 [19072/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.160853 [19136/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.135943 [19200/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.084872 [19264/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.204249 [19328/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.137319 [19392/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.141148 [19456/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.128018 [19520/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.129345 [19584/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.132703 [19648/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.101430 [19712/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.116338 [19776/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.105394 [19840/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.127191 [19904/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.137725 [19968/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.176369 [20032/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.129919 [20096/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.144246 [20160/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.105297 [20224/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.123855 [20288/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.156565 [20352/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.131045 [20416/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.197437 [20480/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.151689 [20544/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.096602 [20608/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.122231 [20672/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.174626 [20736/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.135441 [20800/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.140763 [20864/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.170470 [20928/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.152066 [20992/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.167322 [21056/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.110860 [21120/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.167490 [21184/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.195135 [21248/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.121414 [21312/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.156392 [21376/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.139542 [21440/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.150515 [21504/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.113901 [21568/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.162586 [21632/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.141547 [21696/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.149840 [21760/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.144932 [21824/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.106639 [21888/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.154551 [21952/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.161880 [22016/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.144256 [22080/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.157013 [22144/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.184201 [22208/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.114827 [22272/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.163948 [22336/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.154944 [22400/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.139739 [22464/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.138096 [22528/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.179234 [22592/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.157128 [22656/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.204103 [22720/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.158353 [22784/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.136966 [22848/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.168314 [22912/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.193080 [22976/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.150364 [23040/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.120249 [23104/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.144352 [23168/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.137695 [23232/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.181930 [23296/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.156533 [23360/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.163551 [23424/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.135220 [23488/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.143057 [23552/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.137721 [23616/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.166580 [23680/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.177266 [23744/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.124242 [23808/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.181511 [23872/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.110491 [23936/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.149192 [24000/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.150522 [24064/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.180686 [24128/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.111569 [24192/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.170487 [24256/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.109343 [24320/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.162729 [24384/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.148238 [24448/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.144041 [24512/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.138098 [24576/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.130320 [24640/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.108722 [24704/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.143479 [24768/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.135272 [24832/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.189853 [24872/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.189853 [24872/24872]: : 389it [00:19, 19.47it/s]
Epoch 5, time=230.17s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.177485 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.149602 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.115501 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.116775 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.140029 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.120097 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.146540 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.122035 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.105566 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.123344 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.120620 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.107684 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.139536 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.141102 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.167602 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.158396 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.142042 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.144526 [ 1152/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.120582 [ 1216/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.116806 [ 1280/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.113832 [ 1344/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.115704 [ 1408/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.156035 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.116003 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.130709 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.137635 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.134907 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.116336 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.137964 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.144668 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.146552 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.123022 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.116838 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.138212 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.124763 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.184876 [ 2304/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.168490 [ 2368/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.116432 [ 2432/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.185520 [ 2496/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.133257 [ 2560/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.186824 [ 2624/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.144269 [ 2688/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.124173 [ 2752/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.127750 [ 2816/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.139414 [ 2880/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.089570 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.218108 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.151165 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.176663 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.167736 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.102054 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.148092 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.155234 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.144370 [ 3456/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.160977 [ 3520/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.137406 [ 3584/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.162100 [ 3648/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.142525 [ 3712/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.164498 [ 3776/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.129123 [ 3840/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.133868 [ 3904/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.151651 [ 3968/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.144261 [ 4032/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.141631 [ 4096/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.154271 [ 4160/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.160675 [ 4224/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.160316 [ 4288/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.129969 [ 4352/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.150563 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.145449 [ 4480/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.135243 [ 4544/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.164145 [ 4608/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.120086 [ 4672/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.145174 [ 4736/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.142293 [ 4800/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.153928 [ 4864/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.134873 [ 4928/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.118530 [ 4992/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.104136 [ 5056/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.131451 [ 5120/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.108505 [ 5184/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.170983 [ 5248/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.177194 [ 5312/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.169468 [ 5376/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.121640 [ 5440/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.115693 [ 5504/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.143049 [ 5568/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.152199 [ 5632/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.128748 [ 5696/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.163487 [ 5760/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.102332 [ 5824/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.122815 [ 5888/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.125974 [ 5952/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.145628 [ 6016/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.162280 [ 6080/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.120616 [ 6144/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.141869 [ 6208/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.139762 [ 6272/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.136413 [ 6336/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.159644 [ 6400/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.112926 [ 6464/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.087194 [ 6528/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.141739 [ 6592/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.165288 [ 6656/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.124295 [ 6720/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.185788 [ 6784/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.164500 [ 6848/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.096096 [ 6912/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.116397 [ 6976/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.117189 [ 7040/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.130602 [ 7104/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.164562 [ 7168/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.123016 [ 7232/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.169360 [ 7296/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.129772 [ 7360/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.151089 [ 7424/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.118317 [ 7488/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.113295 [ 7552/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.148934 [ 7616/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.130812 [ 7680/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.160479 [ 7744/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.137603 [ 7808/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.139541 [ 7872/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.168280 [ 7936/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.088367 [ 8000/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.178871 [ 8064/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.107825 [ 8128/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.171389 [ 8192/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.125142 [ 8256/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.129181 [ 8320/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.125578 [ 8384/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.066434 [ 8448/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.145197 [ 8512/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.145761 [ 8576/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.136805 [ 8640/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.139686 [ 8704/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.118999 [ 8768/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.115134 [ 8832/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.155564 [ 8896/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.132178 [ 8960/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.134031 [ 9024/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.101505 [ 9088/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.131330 [ 9152/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.186222 [ 9216/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.098242 [ 9280/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.107890 [ 9344/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.150943 [ 9408/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.132096 [ 9472/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.102766 [ 9536/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.173895 [ 9600/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.188731 [ 9664/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.104017 [ 9728/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.173249 [ 9792/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.150272 [ 9856/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.107212 [ 9920/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.135588 [ 9984/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.136045 [10048/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.132295 [10112/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.118077 [10176/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.115621 [10240/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.175215 [10304/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.137072 [10368/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.143266 [10432/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.134339 [10496/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.160069 [10560/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.132989 [10624/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.100144 [10688/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.104560 [10752/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.117686 [10816/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.063108 [10880/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.135284 [10944/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.125190 [11008/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.119317 [11072/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.186912 [11136/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.130046 [11200/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.130149 [11264/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.128084 [11328/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.169342 [11392/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.143665 [11456/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.099913 [11520/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.094732 [11584/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.153219 [11648/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.126589 [11712/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.161773 [11776/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.076064 [11840/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.128961 [11904/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.148341 [11968/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.120454 [12032/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.160798 [12096/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.140863 [12160/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.144470 [12224/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.140816 [12288/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.120329 [12352/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.151385 [12416/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.124923 [12480/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.114129 [12544/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.122244 [12608/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.099336 [12672/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.120947 [12736/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.198126 [12800/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.251330 [12864/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.112041 [12928/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.108491 [12992/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.137785 [13056/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.098380 [13120/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.105513 [13184/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.101118 [13248/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.115808 [13312/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.117352 [13376/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.141833 [13440/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.127056 [13504/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.146880 [13568/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.130229 [13632/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.121359 [13696/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.104484 [13760/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.098509 [13824/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.187268 [13888/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.118169 [13952/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.104265 [14016/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.192382 [14080/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.158828 [14144/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.109580 [14208/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.178792 [14272/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.135412 [14336/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.141752 [14400/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.177109 [14464/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.104398 [14528/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.149036 [14592/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.171407 [14656/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.135521 [14720/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.125751 [14784/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.174980 [14848/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.119613 [14912/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.150317 [14976/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.139216 [15040/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.161542 [15104/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.105089 [15168/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.107309 [15232/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.126156 [15296/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.108212 [15360/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.151373 [15424/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.125362 [15488/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.108866 [15552/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.129592 [15616/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.144757 [15680/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.174563 [15744/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.125868 [15808/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.131494 [15872/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.104487 [15936/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.195307 [16000/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.175671 [16064/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.120248 [16128/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.157572 [16192/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.192034 [16256/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.124153 [16320/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.154153 [16384/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.121668 [16448/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.139356 [16512/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.098131 [16576/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.128098 [16640/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.125438 [16704/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.115941 [16768/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.108689 [16832/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.114772 [16896/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.107985 [16960/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.149056 [17024/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.144572 [17088/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.129120 [17152/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.140863 [17216/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.152544 [17280/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.126813 [17344/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.157077 [17408/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.074770 [17472/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.138576 [17536/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.164184 [17600/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.118303 [17664/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.188256 [17728/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.134575 [17792/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.148065 [17856/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.166514 [17920/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.121286 [17984/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.110218 [18048/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.097677 [18112/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.076531 [18176/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.148063 [18240/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.151555 [18304/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.124267 [18368/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.117973 [18432/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.103297 [18496/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.143981 [18560/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.127650 [18624/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.112584 [18688/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.144691 [18752/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.148171 [18816/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.147251 [18880/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.129910 [18944/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.176634 [19008/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.121733 [19072/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.158529 [19136/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.121553 [19200/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.080773 [19264/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.181739 [19328/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.134962 [19392/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.145199 [19456/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.118184 [19520/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.125263 [19584/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.125148 [19648/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.103794 [19712/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.105180 [19776/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.092707 [19840/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.122945 [19904/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.120985 [19968/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.157593 [20032/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.121244 [20096/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.146671 [20160/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.107392 [20224/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.118260 [20288/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.167102 [20352/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.119456 [20416/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.193651 [20480/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.137345 [20544/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.086523 [20608/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.116783 [20672/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.163548 [20736/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.127326 [20800/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.136000 [20864/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.151175 [20928/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.138709 [20992/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.143822 [21056/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.114867 [21120/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.137578 [21184/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.147624 [21248/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.106638 [21312/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.115910 [21376/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.149040 [21440/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.139311 [21504/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.103814 [21568/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.143892 [21632/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.120008 [21696/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.121984 [21760/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.145318 [21824/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.108885 [21888/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.133800 [21952/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.150413 [22016/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.121849 [22080/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.136674 [22144/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.170491 [22208/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.099038 [22272/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.150310 [22336/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.131006 [22400/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.126651 [22464/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.120064 [22528/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.175028 [22592/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.139721 [22656/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.179270 [22720/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.129869 [22784/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.135388 [22848/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.149340 [22912/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.181397 [22976/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.141559 [23040/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.114655 [23104/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.138458 [23168/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.143826 [23232/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.157243 [23296/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.150166 [23360/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.134869 [23424/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.126725 [23488/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.132826 [23552/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.105133 [23616/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.145093 [23680/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.165173 [23744/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.116723 [23808/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.187418 [23872/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.105211 [23936/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.168236 [24000/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.163364 [24064/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.170498 [24128/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.128022 [24192/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.196865 [24256/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.105973 [24320/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.169872 [24384/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.156948 [24448/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.119890 [24512/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.139893 [24576/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.143686 [24640/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.105379 [24704/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.142047 [24768/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.153661 [24832/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.207047 [24872/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.207047 [24872/24872]: : 389it [00:20, 19.41it/s]
-------------------------------
LR=0.0001, batch_size=128
-------------------------------
Epoch 1, time=250.21s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.184328 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.526290 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.293090 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.160216 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.288923 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.277782 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.217089 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.188940 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.200620 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.217390 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.228989 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.205655 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.155941 [ 1664/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.170909 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.212387 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.219201 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.158549 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.157084 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.157353 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.170801 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.209588 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.141036 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.121339 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.180418 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.170605 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.140936 [ 3328/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.161001 [ 3456/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.144684 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.161396 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.156037 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.161511 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.128355 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.132297 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.123834 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.132007 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.149202 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.131517 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.141937 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.110573 [ 4992/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.111668 [ 5120/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.137275 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.159918 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.107293 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.133541 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.161222 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.104533 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.128909 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.150190 [ 6144/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.125021 [ 6272/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.143403 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.091829 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.140035 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.149807 [ 6784/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.126763 [ 6912/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.098608 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.116976 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.120198 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.112341 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.101311 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.125130 [ 7680/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.120776 [ 7808/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.132845 [ 7936/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.123376 [ 8064/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.127714 [ 8192/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.113622 [ 8320/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.086480 [ 8448/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.142205 [ 8576/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.127368 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.104758 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.130787 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.102000 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.143185 [ 9216/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.093352 [ 9344/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.125487 [ 9472/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.116994 [ 9600/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.128324 [ 9728/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.139495 [ 9856/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.107277 [ 9984/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.128663 [10112/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.097902 [10240/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.134968 [10368/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.129804 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.139709 [10624/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.090190 [10752/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.085326 [10880/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.124745 [11008/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.146519 [11136/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.113917 [11264/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.145190 [11392/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.111326 [11520/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.112804 [11648/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.132981 [11776/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.090881 [11904/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.128084 [12032/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.146728 [12160/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.132097 [12288/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.110137 [12416/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.109831 [12544/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.110787 [12672/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.148514 [12800/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.167804 [12928/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.109851 [13056/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.092419 [13184/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.105834 [13312/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.113286 [13440/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.121145 [13568/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.123893 [13696/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.096706 [13824/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.139796 [13952/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.131218 [14080/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.111367 [14208/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.149313 [14336/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.149582 [14464/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.107028 [14592/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.127822 [14720/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.145724 [14848/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.111697 [14976/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.118215 [15104/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.093540 [15232/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.103486 [15360/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.130588 [15488/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.115152 [15616/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.136274 [15744/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.116740 [15872/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.137610 [16000/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.129587 [16128/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.152061 [16256/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.116933 [16384/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.115363 [16512/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.102824 [16640/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.104317 [16768/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.094644 [16896/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.119657 [17024/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.125260 [17152/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.141817 [17280/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.132921 [17408/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.094175 [17536/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.128579 [17664/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.150486 [17792/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.143521 [17920/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.104833 [18048/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.080561 [18176/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.141242 [18304/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.115273 [18432/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.107450 [18560/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.107057 [18688/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.129384 [18816/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.124957 [18944/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.135630 [19072/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.129601 [19200/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.118232 [19328/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.120622 [19456/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.124785 [19584/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.095270 [19712/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.086074 [19840/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.118045 [19968/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.121839 [20096/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.115244 [20224/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.124110 [20352/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.152150 [20480/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.097362 [20608/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.106724 [20736/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.115842 [20864/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.124433 [20992/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.107954 [21120/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.116226 [21248/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.096028 [21376/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.127474 [21504/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.107751 [21632/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.102969 [21760/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.105862 [21888/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.126078 [22016/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.109503 [22144/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.121975 [22272/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.126109 [22400/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.112858 [22528/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.126110 [22656/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.128932 [22784/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.125451 [22912/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.145104 [23040/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.134471 [23168/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.126398 [23296/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.128858 [23424/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.115076 [23552/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.109419 [23680/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.127643 [23808/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.125291 [23936/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.124710 [24064/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.124007 [24192/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.115649 [24320/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.128155 [24448/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.120446 [24576/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.103994 [24704/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.117835 [24832/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.182717 [24872/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.182717 [24872/24872]: : 195it [00:14, 13.16it/s]
Epoch 2, time=265.03s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.136163 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.112555 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.137353 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.120693 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.097423 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.110444 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.129257 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.145044 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.118952 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.113159 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.115006 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.136447 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.114365 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.118033 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.127632 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.130054 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.106334 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.140446 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.125330 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.120026 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.150360 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.099229 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.101247 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.147836 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.136105 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.101510 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.133436 [ 3456/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.135128 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.132683 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.115318 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.122392 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.112420 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.111077 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.112322 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.107107 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.124303 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.116918 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.126085 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.099510 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.095964 [ 5120/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.121831 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.144887 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.103015 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.120375 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.140683 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.100997 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.130469 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.123246 [ 6144/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.114817 [ 6272/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.139198 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.089315 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.129926 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.124858 [ 6784/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.113709 [ 6912/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.092196 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.112229 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.111212 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.105184 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.122609 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.110713 [ 7680/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.115441 [ 7808/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.134860 [ 7936/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.124487 [ 8064/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.130489 [ 8192/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.109194 [ 8320/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.087392 [ 8448/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.146335 [ 8576/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.121273 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.102497 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.134545 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.099605 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.136842 [ 9216/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.087981 [ 9344/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.125117 [ 9472/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.108947 [ 9600/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.126079 [ 9728/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.128108 [ 9856/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.107217 [ 9984/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.114513 [10112/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.091748 [10240/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.128304 [10368/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.123770 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.134052 [10624/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.094585 [10752/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.085371 [10880/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.116383 [11008/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.141033 [11136/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.112327 [11264/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.143885 [11392/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.102809 [11520/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.117729 [11648/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.122909 [11776/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.086789 [11904/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.118460 [12032/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.134216 [12160/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.131411 [12288/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.101055 [12416/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.109550 [12544/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.095526 [12672/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.142429 [12800/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.159001 [12928/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.109367 [13056/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.091356 [13184/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.098813 [13312/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.106699 [13440/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.119954 [13568/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.111183 [13696/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.091128 [13824/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.130041 [13952/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.119126 [14080/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.108222 [14208/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.143585 [14336/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.139759 [14464/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.106139 [14592/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.121590 [14720/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.134290 [14848/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.110831 [14976/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.108424 [15104/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.086243 [15232/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.099846 [15360/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.119096 [15488/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.105658 [15616/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.124102 [15744/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.114215 [15872/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.131135 [16000/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.122757 [16128/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.141426 [16256/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.111394 [16384/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.107708 [16512/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.104176 [16640/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.106854 [16768/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.100631 [16896/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.115975 [17024/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.120129 [17152/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.130766 [17280/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.126888 [17408/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.083863 [17536/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.130192 [17664/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.145113 [17792/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.140870 [17920/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.102842 [18048/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.075324 [18176/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.138397 [18304/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.109406 [18432/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.101347 [18560/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.108291 [18688/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.127649 [18816/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.130288 [18944/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.133761 [19072/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.130268 [19200/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.118363 [19328/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.121316 [19456/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.130453 [19584/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.089407 [19712/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.094535 [19840/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.110746 [19968/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.148076 [20096/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.119864 [20224/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.138704 [20352/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.160213 [20480/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.097861 [20608/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.123726 [20736/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.135678 [20864/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.150027 [20992/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.101427 [21120/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.117258 [21248/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.106761 [21376/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.118527 [21504/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.117355 [21632/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.120786 [21760/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.123009 [21888/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.136268 [22016/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.104166 [22144/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.135294 [22272/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.124962 [22400/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.131824 [22528/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.131991 [22656/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.129898 [22784/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.134928 [22912/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.145737 [23040/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.135520 [23168/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.127886 [23296/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.131576 [23424/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.119867 [23552/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.103512 [23680/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.132680 [23808/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.119032 [23936/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.129379 [24064/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.120548 [24192/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.114199 [24320/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.133059 [24448/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.115061 [24576/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.100265 [24704/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.115475 [24832/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.187644 [24872/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.187644 [24872/24872]: : 195it [00:14, 13.18it/s]
Epoch 3, time=279.82s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.127454 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.107924 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.114203 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.110405 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.103475 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.099587 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.133570 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.140484 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.121729 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.114575 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.099326 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.122761 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.117960 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.118854 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.147213 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.125292 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.131598 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.141197 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.129700 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.137518 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.142412 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.107809 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.098904 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.151156 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.153996 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.106629 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.132971 [ 3456/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.132825 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.142968 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.130023 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.121885 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.110758 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.114110 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.112933 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.104076 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.129900 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.119118 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.130072 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.096254 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.101905 [ 5120/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.137175 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.140795 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.098070 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.134338 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.132195 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.094833 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.118395 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.127960 [ 6144/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.107873 [ 6272/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.138746 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.082207 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.136927 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.128588 [ 6784/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.104015 [ 6912/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.097484 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.105629 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.107498 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.110320 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.089532 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.105729 [ 7680/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.111550 [ 7808/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.123767 [ 7936/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.122321 [ 8064/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.116285 [ 8192/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.112473 [ 8320/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.084700 [ 8448/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.128193 [ 8576/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.123579 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.101357 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.124029 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.089408 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.124369 [ 9216/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.085964 [ 9344/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.113511 [ 9472/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.103410 [ 9600/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.118705 [ 9728/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.127077 [ 9856/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.094056 [ 9984/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.112528 [10112/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.089075 [10240/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.136947 [10368/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.114284 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.126036 [10624/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.085711 [10752/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.082802 [10880/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.114153 [11008/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.134858 [11136/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.102602 [11264/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.119595 [11392/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.100625 [11520/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.103556 [11648/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.117981 [11776/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.084352 [11904/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.119906 [12032/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.120694 [12160/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.123186 [12288/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.097208 [12416/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.103291 [12544/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.090087 [12672/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.143706 [12800/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.156374 [12928/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.095012 [13056/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.080267 [13184/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.088939 [13312/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.098326 [13440/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.116886 [13568/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.102835 [13696/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.084601 [13824/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.131861 [13952/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.117380 [14080/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.092674 [14208/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.136264 [14336/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.137236 [14464/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.104778 [14592/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.120080 [14720/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.123572 [14848/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.111836 [14976/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.101390 [15104/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.084468 [15232/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.101551 [15360/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.119681 [15488/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.107952 [15616/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.122716 [15744/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.111638 [15872/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.132821 [16000/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.124991 [16128/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.138371 [16256/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.108789 [16384/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.105815 [16512/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.099153 [16640/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.103318 [16768/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.092949 [16896/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.108847 [17024/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.113051 [17152/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.127353 [17280/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.130167 [17408/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.088372 [17536/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.115943 [17664/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.141706 [17792/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.132049 [17920/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.092417 [18048/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.078811 [18176/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.133313 [18304/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.118456 [18432/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.091289 [18560/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.100692 [18688/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.122515 [18816/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.128696 [18944/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.123612 [19072/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.120009 [19200/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.115667 [19328/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.113877 [19456/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.111976 [19584/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.095416 [19712/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.093971 [19840/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.111484 [19968/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.134231 [20096/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.101494 [20224/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.123209 [20352/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.147810 [20480/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.099690 [20608/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.112766 [20736/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.134298 [20864/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.138117 [20992/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.096287 [21120/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.120182 [21248/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.091277 [21376/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.133292 [21504/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.105936 [21632/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.113643 [21760/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.108435 [21888/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.129035 [22016/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.110704 [22144/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.121451 [22272/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.131486 [22400/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.120357 [22528/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.122530 [22656/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.128165 [22784/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.122141 [22912/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.147010 [23040/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.134809 [23168/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.118553 [23296/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.122137 [23424/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.121055 [23552/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.103856 [23680/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.121974 [23808/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.120406 [23936/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.122261 [24064/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.120461 [24192/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.105582 [24320/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.122582 [24448/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.114443 [24576/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.094994 [24704/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.110063 [24832/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.162960 [24872/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.162960 [24872/24872]: : 195it [00:14, 13.30it/s]
Epoch 4, time=294.49s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.110463 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.105584 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.106876 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.114686 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.089062 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.099185 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.123730 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.120897 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.111680 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.104473 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.100675 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.118769 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.128741 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.117827 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.144716 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.109520 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.142536 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.132424 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.131866 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.143968 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.141159 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.109336 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.088874 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.141539 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.143655 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.106126 [ 3328/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.123610 [ 3456/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.123152 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.132364 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.119172 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.117674 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.103461 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.104357 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.104625 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.097834 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.118302 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.113485 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.127713 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.094358 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.099007 [ 5120/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.120059 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.133769 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.092116 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.131918 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.126567 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.094062 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.113532 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.124410 [ 6144/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.102729 [ 6272/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.137252 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.074781 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.126861 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.118239 [ 6784/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.098533 [ 6912/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.090621 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.099352 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.106499 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.108671 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.088876 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.100461 [ 7680/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.111344 [ 7808/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.120274 [ 7936/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.115268 [ 8064/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.114822 [ 8192/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.106377 [ 8320/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.078742 [ 8448/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.123343 [ 8576/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.117451 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.093508 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.116343 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.085441 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.109142 [ 9216/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.083528 [ 9344/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.112582 [ 9472/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.098686 [ 9600/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.118618 [ 9728/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.123003 [ 9856/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.095925 [ 9984/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.102811 [10112/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.094450 [10240/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.126959 [10368/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.108237 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.115144 [10624/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.084729 [10752/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.080207 [10880/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.109690 [11008/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.132893 [11136/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.105939 [11264/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.119891 [11392/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.095730 [11520/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.106685 [11648/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.113159 [11776/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.082103 [11904/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.107763 [12032/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.111951 [12160/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.118214 [12288/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.096032 [12416/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.100530 [12544/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.086582 [12672/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.137470 [12800/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.151012 [12928/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.098132 [13056/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.078421 [13184/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.092991 [13312/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.093502 [13440/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.113357 [13568/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.104882 [13696/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.081029 [13824/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.131736 [13952/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.115639 [14080/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.092830 [14208/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.134754 [14336/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.137186 [14464/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.104057 [14592/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.117051 [14720/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.111876 [14848/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.105500 [14976/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.091757 [15104/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.077255 [15232/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.100180 [15360/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.118366 [15488/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.107475 [15616/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.128446 [15744/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.103645 [15872/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.135976 [16000/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.119591 [16128/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.129887 [16256/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.107658 [16384/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.105595 [16512/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.100349 [16640/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.106567 [16768/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.093365 [16896/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.106231 [17024/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.103960 [17152/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.132500 [17280/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.122338 [17408/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.086144 [17536/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.116522 [17664/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.130511 [17792/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.129782 [17920/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.093528 [18048/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.072569 [18176/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.133705 [18304/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.109532 [18432/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.090417 [18560/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.097679 [18688/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.117376 [18816/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.127633 [18944/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.129434 [19072/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.119783 [19200/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.107175 [19328/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.113759 [19456/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.114448 [19584/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.082249 [19712/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.077360 [19840/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.098578 [19968/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.121334 [20096/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.100322 [20224/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.116148 [20352/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.144950 [20480/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.090493 [20608/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.102522 [20736/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.121596 [20864/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.128786 [20992/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.093819 [21120/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.113522 [21248/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.080673 [21376/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.112314 [21504/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.097544 [21632/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.103232 [21760/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.097685 [21888/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.117581 [22016/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.094514 [22144/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.110663 [22272/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.121229 [22400/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.108201 [22528/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.113670 [22656/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.131064 [22784/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.115950 [22912/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.140320 [23040/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.111664 [23168/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.108110 [23296/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.118967 [23424/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.112195 [23552/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.096003 [23680/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.098652 [23808/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.106374 [23936/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.116248 [24064/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.112757 [24192/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.108822 [24320/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.110083 [24448/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.110138 [24576/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.086616 [24704/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.099276 [24832/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.144752 [24872/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.144752 [24872/24872]: : 195it [00:14, 13.21it/s]
Epoch 5, time=309.25s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.100315 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.096467 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.109522 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.112700 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.103652 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.094186 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.130961 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.121548 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.113590 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.103299 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.107425 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.112877 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.109000 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.098623 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.126144 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.112432 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.112108 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.118718 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.116106 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.108697 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.125772 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.090284 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.099911 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.131131 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.129027 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.099083 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.106451 [ 3456/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.123740 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.119420 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.113288 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.111000 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.105202 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.103318 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.108243 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.096299 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.110521 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.114264 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.122136 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.085973 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.098732 [ 5120/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.114114 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.130468 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.087907 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.129612 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.127114 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.091291 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.121384 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.124195 [ 6144/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.113802 [ 6272/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.131538 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.078282 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.139483 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.129745 [ 6784/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.101469 [ 6912/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.098343 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.098039 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.120307 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.109832 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.091669 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.107290 [ 7680/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.120184 [ 7808/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.119349 [ 7936/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.115221 [ 8064/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.113156 [ 8192/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.120960 [ 8320/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.077828 [ 8448/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.126292 [ 8576/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.116415 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.095697 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.116635 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.082727 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.116676 [ 9216/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.083840 [ 9344/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.132168 [ 9472/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.096393 [ 9600/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.104687 [ 9728/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.129024 [ 9856/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.092253 [ 9984/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.106809 [10112/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.098347 [10240/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.126094 [10368/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.128139 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.114076 [10624/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.095817 [10752/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.084649 [10880/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.110300 [11008/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.149146 [11136/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.111408 [11264/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.105221 [11392/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.096561 [11520/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.097534 [11648/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.126763 [11776/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.093109 [11904/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.094747 [12032/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.104818 [12160/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.129912 [12288/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.088090 [12416/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.102102 [12544/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.090876 [12672/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.140841 [12800/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.161285 [12928/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.096170 [13056/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.077759 [13184/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.097774 [13312/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.101434 [13440/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.120291 [13568/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.098201 [13696/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.083828 [13824/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.123807 [13952/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.115502 [14080/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.096048 [14208/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.132311 [14336/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.134838 [14464/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.097808 [14592/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.108293 [14720/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.107050 [14848/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.109425 [14976/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.090481 [15104/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.078694 [15232/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.086082 [15360/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.105707 [15488/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.097771 [15616/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.111264 [15744/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.098232 [15872/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.122723 [16000/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.107318 [16128/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.115004 [16256/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.101583 [16384/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.108024 [16512/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.096805 [16640/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.109685 [16768/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.087417 [16896/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.097664 [17024/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.096735 [17152/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.120306 [17280/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.122256 [17408/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.075436 [17536/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.124992 [17664/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.121240 [17792/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.138519 [17920/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.093167 [18048/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.082824 [18176/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.136373 [18304/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.102956 [18432/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.112905 [18560/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.093533 [18688/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.134303 [18816/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.128121 [18944/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.144809 [19072/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.116675 [19200/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.130064 [19328/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.105289 [19456/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.118733 [19584/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.078792 [19712/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.084383 [19840/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.109351 [19968/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.115243 [20096/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.110845 [20224/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.115216 [20352/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.147312 [20480/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.083832 [20608/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.092310 [20736/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.110013 [20864/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.106021 [20992/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.087370 [21120/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.105873 [21248/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.076603 [21376/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.100853 [21504/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.092297 [21632/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.096177 [21760/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.099443 [21888/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.118122 [22016/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.094459 [22144/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.091663 [22272/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.112700 [22400/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.097481 [22528/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.116737 [22656/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.116924 [22784/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.111815 [22912/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.149304 [23040/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.100747 [23168/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.095638 [23296/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.116302 [23424/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.106189 [23552/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.088004 [23680/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.092248 [23808/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.107165 [23936/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.107942 [24064/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.107816 [24192/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.108913 [24320/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.107540 [24448/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.100484 [24576/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.083515 [24704/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.093126 [24832/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.124772 [24872/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.124772 [24872/24872]: : 195it [00:14, 13.33it/s]
Epoch 6, time=323.88s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.091822 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.096668 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.094848 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.103363 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.083706 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.092442 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.115004 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.118062 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.109516 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.098408 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.099470 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.111615 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.104842 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.099875 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.137292 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.104875 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.118369 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.112156 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.125209 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.105115 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.136188 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.088082 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.107938 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.138275 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.112694 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.105477 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.100896 [ 3456/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.119105 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.106522 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.124355 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.110176 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.107890 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.095129 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.114029 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.099208 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.115744 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.111914 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.123174 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.091999 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.097734 [ 5120/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.113600 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.139459 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.092765 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.104677 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.109384 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.099762 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.110302 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.112601 [ 6144/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.109027 [ 6272/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.131335 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.075915 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.124084 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.118948 [ 6784/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.092663 [ 6912/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.079871 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.096844 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.098359 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.099518 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.087188 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.096747 [ 7680/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.099882 [ 7808/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.107265 [ 7936/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.116041 [ 8064/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.109740 [ 8192/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.118171 [ 8320/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.082516 [ 8448/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.119701 [ 8576/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.121472 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.103721 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.111701 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.087313 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.114269 [ 9216/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.090009 [ 9344/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.125898 [ 9472/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.099183 [ 9600/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.105139 [ 9728/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.118013 [ 9856/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.090765 [ 9984/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.108184 [10112/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.083962 [10240/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.111584 [10368/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.121515 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.109855 [10624/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.083633 [10752/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.079796 [10880/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.110608 [11008/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.133072 [11136/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.100136 [11264/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.120878 [11392/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.094991 [11520/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.100375 [11648/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.112485 [11776/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.081969 [11904/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.084770 [12032/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.095736 [12160/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.119570 [12288/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.089514 [12416/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.101947 [12544/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.084746 [12672/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.127046 [12800/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.136411 [12928/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.100458 [13056/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.069464 [13184/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.104177 [13312/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.092611 [13440/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.114082 [13568/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.086004 [13696/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.074878 [13824/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.116946 [13952/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.109377 [14080/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.100364 [14208/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.135354 [14336/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.120729 [14464/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.099814 [14592/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.104518 [14720/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.102323 [14848/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.104081 [14976/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.086855 [15104/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.068980 [15232/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.082469 [15360/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.092838 [15488/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.093016 [15616/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.117154 [15744/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.094054 [15872/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.120533 [16000/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.106119 [16128/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.116145 [16256/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.099848 [16384/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.112625 [16512/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.092130 [16640/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.104050 [16768/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.088098 [16896/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.095386 [17024/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.097598 [17152/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.121560 [17280/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.103962 [17408/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.076838 [17536/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.105788 [17664/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.117811 [17792/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.122153 [17920/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.084114 [18048/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.066695 [18176/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.125007 [18304/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.091585 [18432/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.093625 [18560/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.091357 [18688/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.112079 [18816/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.119787 [18944/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.121412 [19072/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.111179 [19200/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.098577 [19328/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.113987 [19456/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.091505 [19584/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.079279 [19712/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.073405 [19840/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.100941 [19968/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.106398 [20096/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.103247 [20224/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.128538 [20352/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.141076 [20480/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.084928 [20608/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.095284 [20736/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.111475 [20864/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.104325 [20992/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.091193 [21120/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.110282 [21248/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.075233 [21376/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.106533 [21504/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.095245 [21632/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.123990 [21760/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.096269 [21888/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.130149 [22016/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.103049 [22144/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.102684 [22272/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.110286 [22400/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.100130 [22528/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.119766 [22656/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.127693 [22784/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.119007 [22912/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.130231 [23040/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.112852 [23168/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.096060 [23296/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.139485 [23424/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.109489 [23552/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.095780 [23680/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.094318 [23808/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.108704 [23936/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.105757 [24064/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.110573 [24192/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.113355 [24320/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.111766 [24448/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.105013 [24576/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.081332 [24704/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.096849 [24832/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.139450 [24872/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.139450 [24872/24872]: : 195it [00:14, 13.54it/s]
-------------------------------
LR=1e-05, batch_size=256
-------------------------------
Epoch 1, time=338.28s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.093246 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.095403 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.077396 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.110306 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.098825 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.089099 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.088410 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.102336 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.102642 [ 2304/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.099597 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.094959 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.100008 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.100530 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.100087 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.098867 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.094118 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.086679 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.093098 [ 4608/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.102306 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.081787 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.121646 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.095188 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.091741 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.098806 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.105582 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.087629 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.093413 [ 6912/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.079745 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.087189 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.083366 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.098129 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.098485 [ 8192/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.085324 [ 8448/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.104890 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.093260 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.087556 [ 9216/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.095259 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.088047 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.095193 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.081033 [10240/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.098194 [10496/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.086099 [10752/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.080787 [11008/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.108619 [11264/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.094372 [11520/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.094655 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.072060 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.099571 [12288/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.079788 [12544/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.094921 [12800/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.106443 [13056/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.069765 [13312/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.086552 [13568/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.071451 [13824/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.100383 [14080/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.101025 [14336/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.094490 [14592/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.090526 [14848/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.083783 [15104/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.068100 [15360/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.085527 [15616/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.096502 [15872/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.105394 [16128/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.092955 [16384/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.092187 [16640/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.083260 [16896/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.086154 [17152/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.104153 [17408/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.079289 [17664/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.111564 [17920/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.068870 [18176/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.097638 [18432/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.080310 [18688/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.104357 [18944/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.107523 [19200/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.094145 [19456/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.074020 [19712/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.072680 [19968/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.086833 [20224/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.116504 [20480/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.075384 [20736/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.096887 [20992/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.086657 [21248/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.076445 [21504/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.082927 [21760/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.092022 [22016/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.081189 [22272/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.092954 [22528/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.101148 [22784/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.110285 [23040/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.083493 [23296/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.101128 [23552/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.076803 [23808/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.092983 [24064/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.095587 [24320/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.093730 [24576/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.078168 [24832/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.111018 [24872/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.111018 [24872/24872]: : 98it [00:10, 9.10it/s]
Epoch 2, time=349.05s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.081773 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.084935 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.072697 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.100892 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.088550 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.085162 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.082653 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.096416 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.096993 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.094264 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.091900 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.095201 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.099384 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.093551 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.096738 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.091124 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.083353 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.088897 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.099826 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.077990 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.109934 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.090507 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.090140 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.096968 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.102796 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.084254 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.089231 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.076609 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.085326 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.081692 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.093930 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.097064 [ 8192/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.088516 [ 8448/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.101595 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.090388 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.083882 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.092616 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.084721 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.091982 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.079919 [10240/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.094600 [10496/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.084941 [10752/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.078594 [11008/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.103668 [11264/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.090911 [11520/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.091761 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.073481 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.097066 [12288/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.079707 [12544/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.093041 [12800/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.103344 [13056/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.073561 [13312/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.083032 [13568/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.070500 [13824/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.097100 [14080/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.098167 [14336/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.092353 [14592/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.087861 [14848/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.084190 [15104/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.065841 [15360/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.085030 [15616/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.092771 [15872/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.105051 [16128/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.090103 [16384/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.092786 [16640/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.079388 [16896/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.084696 [17152/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.102136 [17408/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.078569 [17664/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.109616 [17920/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.067808 [18176/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.096736 [18432/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.079910 [18688/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.104034 [18944/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.105627 [19200/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.092250 [19456/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.072201 [19712/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.072766 [19968/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.084983 [20224/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.114553 [20480/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.073938 [20736/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.096590 [20992/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.085335 [21248/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.074211 [21504/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.081502 [21760/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.090958 [22016/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.080468 [22272/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.091595 [22528/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.099271 [22784/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.109512 [23040/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.082638 [23296/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.100232 [23552/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.074942 [23808/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.091660 [24064/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.094227 [24320/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.092979 [24576/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.076757 [24832/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.107665 [24872/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.107665 [24872/24872]: : 98it [00:10, 9.20it/s]
Epoch 3, time=359.70s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.081535 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.083804 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.071962 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.100061 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.087641 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.084171 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.081502 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.095900 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.095609 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.092941 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.090780 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.092975 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.098579 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.092315 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.095029 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.089750 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.082477 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.087644 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.098522 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.077229 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.108390 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.089936 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.089186 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.096317 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.101679 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.082743 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.087111 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.075565 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.084020 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.080596 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.092142 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.095659 [ 8192/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.087555 [ 8448/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.100400 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.089562 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.082639 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.091041 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.083878 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.090270 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.079171 [10240/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.093683 [10496/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.084416 [10752/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.077772 [11008/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.102112 [11264/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.089163 [11520/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.090948 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.072801 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.096711 [12288/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.079108 [12544/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.092005 [12800/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.101917 [13056/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.073365 [13312/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.081628 [13568/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.069828 [13824/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.095954 [14080/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.097262 [14336/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.090736 [14592/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.086486 [14848/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.082993 [15104/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.064777 [15360/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.083627 [15616/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.091386 [15872/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.103803 [16128/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.088908 [16384/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.091903 [16640/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.078476 [16896/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.083701 [17152/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.101380 [17408/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.078278 [17664/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.108581 [17920/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.066948 [18176/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.095845 [18432/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.079232 [18688/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.102615 [18944/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.104397 [19200/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.091168 [19456/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.071242 [19712/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.072496 [19968/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.083991 [20224/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.113449 [20480/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.072797 [20736/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.095948 [20992/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.084366 [21248/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.072987 [21504/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.080919 [21760/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.090461 [22016/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.079811 [22272/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.090501 [22528/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.098395 [22784/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.108802 [23040/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.081808 [23296/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.099641 [23552/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.073628 [23808/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.090679 [24064/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.093272 [24320/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.092006 [24576/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.075784 [24832/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.105596 [24872/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.105596 [24872/24872]: : 98it [00:10, 9.16it/s]
Epoch 4, time=370.41s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.080823 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.082912 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.071619 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.098795 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.086884 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.083496 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.080685 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.095119 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.094816 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.092065 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.089973 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.091509 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.097909 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.091316 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.093824 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.088761 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.081723 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.086894 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.097497 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.076550 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.107417 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.089476 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.088424 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.095688 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.100698 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.081640 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.085701 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.074831 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.082999 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.079965 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.090851 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.094454 [ 8192/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.086742 [ 8448/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.099308 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.088856 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.081708 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.089805 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.083062 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.089107 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.078437 [10240/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.092912 [10496/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.083921 [10752/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.077041 [11008/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.100980 [11264/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.087946 [11520/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.090290 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.072051 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.096400 [12288/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.078486 [12544/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.091209 [12800/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.100863 [13056/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.072851 [13312/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.080695 [13568/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.069269 [13824/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.095017 [14080/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.096559 [14336/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.089565 [14592/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.085410 [14848/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.082031 [15104/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.064035 [15360/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.082536 [15616/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.089980 [15872/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.102642 [16128/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.087982 [16384/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.091072 [16640/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.077777 [16896/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.082832 [17152/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.100728 [17408/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.077938 [17664/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.107698 [17920/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.066177 [18176/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.095027 [18432/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.078535 [18688/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.101366 [18944/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.103506 [19200/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.090394 [19456/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.070520 [19712/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.072179 [19968/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.083196 [20224/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.112464 [20480/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.071855 [20736/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.095327 [20992/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.083139 [21248/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.072245 [21504/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.080300 [21760/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.089943 [22016/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.079159 [22272/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.089572 [22528/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.097742 [22784/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.108024 [23040/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.081005 [23296/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.099073 [23552/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.072694 [23808/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.089809 [24064/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.092429 [24320/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.091219 [24576/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.074965 [24832/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.103916 [24872/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.103916 [24872/24872]: : 98it [00:10, 9.19it/s]
Epoch 5, time=381.07s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.080048 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.081974 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.071051 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.097718 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.086225 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.082693 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.079635 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.094266 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.094154 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.091112 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.089120 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.090262 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.097440 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.090126 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.092622 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.087873 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.080819 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.085866 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.096633 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.075907 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.106556 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.088993 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.087647 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.095013 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.099803 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.080794 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.084640 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.074247 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.082201 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.079445 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.089848 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.093391 [ 8192/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.085995 [ 8448/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.098285 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.088277 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.080807 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.088740 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.082239 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.088140 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.077904 [10240/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.092231 [10496/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.083449 [10752/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.076414 [11008/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.099969 [11264/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.086906 [11520/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.089649 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.071506 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.096183 [12288/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.078016 [12544/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.090468 [12800/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.100036 [13056/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.072381 [13312/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.079947 [13568/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.068752 [13824/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.094263 [14080/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.095950 [14336/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.088448 [14592/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.084424 [14848/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.081295 [15104/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.063316 [15360/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.081676 [15616/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.088741 [15872/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.101596 [16128/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.087291 [16384/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.090336 [16640/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.077266 [16896/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.082098 [17152/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.100182 [17408/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.077597 [17664/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.107090 [17920/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.065672 [18176/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.094299 [18432/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.077956 [18688/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.100463 [18944/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.102748 [19200/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.089718 [19456/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.069943 [19712/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.071905 [19968/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.082536 [20224/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.111569 [20480/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.071138 [20736/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.094794 [20992/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.082571 [21248/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.071611 [21504/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.079793 [21760/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.089467 [22016/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.078618 [22272/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.088844 [22528/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.097197 [22784/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.107277 [23040/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.080328 [23296/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.098461 [23552/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.071920 [23808/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.089052 [24064/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.091577 [24320/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.090448 [24576/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.074290 [24832/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.102464 [24872/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.102464 [24872/24872]: : 98it [00:10, 9.20it/s]
Epoch 6, time=391.72s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.079305 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.081145 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.070559 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.097067 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.085621 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.081999 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.078786 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.093513 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.093606 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.090366 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.088306 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.089146 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.097037 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.089082 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.091470 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.087060 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.080023 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.085012 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.095842 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.075338 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.105782 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.088537 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.086963 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.094393 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.098932 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.080049 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.083821 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.073752 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.081451 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.079013 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.089045 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.092371 [ 8192/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.085237 [ 8448/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.097353 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.087748 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.080081 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.087917 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.081514 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.087382 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.077421 [10240/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.091625 [10496/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.082925 [10752/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.075822 [11008/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.099131 [11264/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.086214 [11520/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.089061 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.070794 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.095763 [12288/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.077394 [12544/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.089741 [12800/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.099352 [13056/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.071813 [13312/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.079237 [13568/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.068202 [13824/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.093478 [14080/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.095339 [14336/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.087405 [14592/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.083576 [14848/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.080654 [15104/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.062654 [15360/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.080958 [15616/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.087790 [15872/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.100734 [16128/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.086683 [16384/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.089742 [16640/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.076707 [16896/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.081455 [17152/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.099535 [17408/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.077151 [17664/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.106418 [17920/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.065126 [18176/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.093560 [18432/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.077453 [18688/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.099754 [18944/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.102015 [19200/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.089064 [19456/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.069401 [19712/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.071635 [19968/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.081856 [20224/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.110678 [20480/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.070477 [20736/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.094229 [20992/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.080556 [21248/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.070920 [21504/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.079379 [21760/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.089015 [22016/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.078206 [22272/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.088392 [22528/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.096822 [22784/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.106387 [23040/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.079841 [23296/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.097949 [23552/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.071224 [23808/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.088309 [24064/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.090664 [24320/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.089607 [24576/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.073694 [24832/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.101283 [24872/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.101283 [24872/24872]: : 98it [00:10, 9.20it/s]
Done!
test the network#
Do some qualitative tests: Let the trained network predict some particle geometries and compare their Mie spectra with the traget spectrum.
# pick a few of the training samples for testing.
# Note: Ideally tests should be done on separate samples!
sca_test = q_sca_target_test
pred = model(sca_test)
# evaluate Mie
r_c_test, r_s_test, eps_c_test, eps_s_test = nn_pred_to_mie_geometry(pred)
res_mie = pmd.multishell.cross_sections(
k0,
r_c=r_c_test,
eps_c=eps_c_test,
r_s=r_s_test,
eps_s=eps_s_test,
eps_env=eps_env,
n_max=n_max,
)
# plot
i_plot = np.random.randint(len(sca_test), size=4)
plt.figure(figsize=(12, 10))
for i_n, i in enumerate(i_plot):
plt.subplot(2, 2, i_n + 1)
plt.plot(
wl0.detach().cpu().numpy(),
sca_test[i].detach().cpu().numpy(),
label="reference",
)
plt.plot(
wl0.detach().cpu().numpy(),
res_mie["q_sca"][i].detach().cpu().numpy(),
label="predicted particle",
)
plt.legend()
plt.xlabel("wavelength (nm)")
plt.ylabel("scat. efficiency")
plt.show()

Total running time of the script: (6 minutes 49.999 seconds)
Estimated memory usage: 675 MB