Note
Go to the end to download the full example code.
Mie-informed tandem neural network#
Here, we demonstrate how to train a design generator network capable to suggest core-shell particles with specific spectral response using PyMieDiff as differentiable forward-evaluator. The training pipeline follows the “Tandem” model:
target spectrum –> generator NN –> design –> Mie –> real spectrum
training loss is: MSE(target spec., real spec.)
author: O. Jackson, P. Wiecha, 06/2025
imports#
import time
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
from torch import nn
import pymiediff as pmd
setup optimiation target#
We setup the main configuration here: pymiediff backend, torch device, parameter limits and wavelengths
# pymiediff backend to use and torch compute device
backend = "torch"
device = "cpu"
# general config
N_samples = 25000
n_max = 4 # maximum Mie order fixed for performance
eps_env = torch.tensor(1.0, device=device)
lim_r = torch.as_tensor([40, 100], device=device)
lim_n_re = torch.as_tensor([1.5, 4.0], device=device)
lim_n_im = torch.as_tensor([0.0, 0.1], device=device)
wl0 = torch.linspace(400, 800, 40, device=device)
k0 = 2 * torch.pi / wl0
generate reference spectra#
we generate a large number of reference Mie spectra for existing particles, that will be used as design targets during training.
Note: this step could also be done without any physics knowledge, for example with artificial spectra (e.g. Lorentzians), or a scattering maximization loss.
# datagen: generate existing spectra (won't use the geometries for training)
r_c = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
d_s = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
r_s = r_c + d_s
n_re = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_re)[0] + lim_n_re[0]
n_im = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_im)[0] + lim_n_im[0]
n = n_re + 1j * n_im
# low-level API: permittivity required as spectra (for vectorization)
eps_c = torch.ones_like(k0).unsqueeze(0) * n[:, 0].unsqueeze(1) ** 2
eps_s = torch.ones_like(k0).unsqueeze(0) * n[:, 1].unsqueeze(1) ** 2
all_particles = pmd.coreshell.cross_sections(
k0,
r_c=r_c,
eps_c=eps_c,
r_s=r_s,
eps_s=eps_s,
eps_env=eps_env,
backend=backend,
n_max=n_max,
)
N_test = 128 # keep a few samples for testing
q_sca_target = all_particles["q_sca"][N_test:].to(dtype=torch.float32)
q_sca_target_test = all_particles["q_sca"][:N_test].to(dtype=torch.float32)
plt.plot(q_sca_target[30].detach().cpu().numpy()) # plot some test sample

[<matplotlib.lines.Line2D object at 0x7ff455aee1b0>]
Neural network classes / functions#
define the network model (simple MLP) and training loop
class FullyConnected(nn.Module):
def __init__(self, hidden_dim=1024):
super().__init__()
self.fc_in = nn.Linear(len(k0), hidden_dim)
self.relu1 = nn.ReLU()
self.fc_1 = nn.Linear(hidden_dim, hidden_dim)
self.relu2 = nn.ReLU()
self.fc_2 = nn.Linear(hidden_dim, hidden_dim)
self.relu3 = nn.ReLU()
self.fc_out = nn.Linear(hidden_dim, 6)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.fc_in(x)
x = self.relu1(x)
x = self.fc_1(x)
x = self.relu2(x)
x = self.fc_2(x)
x = self.relu3(x)
x = self.fc_out(x)
x = self.sigmoid(x)
return x
def nn_pred_to_mie_geometry(pred):
# implicit normalization: multiply by user-defined limits
r_c = lim_r.max() * (pred[:, 0])
r_s = lim_r.max() * (pred[:, 0] + pred[:, 1])
n_c = lim_n_re.max() * pred[:, 2] + lim_n_im.max() * (1j * pred[:, 3])
n_s = lim_n_re.max() * pred[:, 4] + lim_n_im.max() * (1j * pred[:, 5])
eps_c = torch.ones_like(k0).unsqueeze(0) * n_c.unsqueeze(1) ** 2
eps_s = torch.ones_like(k0).unsqueeze(0) * n_s.unsqueeze(1) ** 2
return r_c, r_s, eps_c, eps_s
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
# Set the model to training mode - important for batch normalization and dropout layers
# Unnecessary in this situation but added for best practices
model.train()
prog_bar = tqdm(enumerate(dataloader), total=size // dataloader.batch_size)
for i_batch, X in prog_bar:
# model prediction: generate core-shell particles
pred = model(X)
# evaluate Mie
r_c, r_s, eps_c, eps_s = nn_pred_to_mie_geometry(pred)
res_mie = pmd.coreshell.cross_sections(
k0,
r_c=r_c,
eps_c=eps_c,
r_s=r_s,
eps_s=eps_s,
eps_env=eps_env,
backend=backend,
n_max=n_max,
)
q_sca_mie = res_mie["q_sca"].to(dtype=torch.float32)
# calc. loss
loss = loss_fn(q_sca_mie, X)
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
# if i_batch % 100 == 0:
loss, current = loss.item(), i_batch * dataloader.batch_size + len(X)
prog_bar.set_description(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
training the Mie-informed network#
here we use some simple, manually optimized training schedule.
model = FullyConnected().to(device)
confs = [
dict(bs=32, lr=1e-4, n_ep=5),
dict(bs=64, lr=1e-4, n_ep=5),
dict(bs=128, lr=1e-4, n_ep=6),
dict(bs=256, lr=1e-5, n_ep=6),
]
t_start = time.time()
for conf in confs:
learning_rate = conf["lr"]
batch_size = conf["bs"]
epochs = conf["n_ep"]
print("-------------------------------")
print(f"LR={learning_rate}, batch_size={batch_size}")
print("-------------------------------")
loss_fn = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
train_dataloader = torch.utils.data.DataLoader(q_sca_target, batch_size=batch_size)
for t in range(epochs):
print(f"Epoch {t+1}, time={time.time()-t_start:.2f}s")
train_loop(train_dataloader, model, loss_fn, optimizer)
print("Done!")
-------------------------------
LR=0.0001, batch_size=32
-------------------------------
Epoch 1, time=0.00s
0%| | 0/777 [00:00<?, ?it/s]
loss: 6.182572 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 3.334477 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 3.108343 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.594806 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.844598 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.672254 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.921931 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.853018 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.622688 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.216375 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.715949 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.650837 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.526762 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.614265 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.356275 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.488001 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.352542 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.450811 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.607834 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.281707 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.873309 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.139984 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.418243 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.244231 [ 768/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.997306 [ 800/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.153756 [ 832/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.859865 [ 864/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.801029 [ 896/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.482426 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.537925 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.206968 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.808883 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.273452 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.159672 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.055174 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.956229 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.101278 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.929632 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.420024 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.146415 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.852853 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.998428 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.928108 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.794232 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.030994 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.257447 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.857558 [ 1504/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.045353 [ 1536/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.110978 [ 1568/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.056010 [ 1600/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.706728 [ 1632/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.913445 [ 1664/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.084007 [ 1696/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.784323 [ 1728/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.185635 [ 1760/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.762800 [ 1792/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.925333 [ 1824/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.823907 [ 1856/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.882497 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.755688 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.833456 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.070379 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.695112 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.852664 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.874298 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.781030 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.801805 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.926717 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.022391 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.681406 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.781619 [ 2272/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.838514 [ 2304/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.901780 [ 2336/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.690984 [ 2368/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.573145 [ 2400/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.812639 [ 2432/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.735887 [ 2464/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.910911 [ 2496/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.759571 [ 2528/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.857392 [ 2560/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.082136 [ 2592/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.853489 [ 2624/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.477143 [ 2656/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.925727 [ 2688/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.668492 [ 2720/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.837689 [ 2752/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.025828 [ 2784/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.877934 [ 2816/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.950735 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.715084 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.919077 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.927000 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.868034 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.859462 [ 3008/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.917695 [ 3040/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.645564 [ 3072/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.771818 [ 3104/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.754923 [ 3136/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 1.095286 [ 3168/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.665557 [ 3200/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 1.015794 [ 3232/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.869047 [ 3264/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.633646 [ 3296/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.861033 [ 3328/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.745661 [ 3360/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.544293 [ 3392/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.803379 [ 3424/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.735419 [ 3456/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.608767 [ 3488/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.768060 [ 3520/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.821219 [ 3552/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.733536 [ 3584/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.768524 [ 3616/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.720838 [ 3648/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.707083 [ 3680/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.932173 [ 3712/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.715108 [ 3744/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.877462 [ 3776/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.843866 [ 3808/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.549525 [ 3840/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.929485 [ 3872/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.725561 [ 3904/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.729177 [ 3936/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.634928 [ 3968/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.764124 [ 4000/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.940358 [ 4032/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.697993 [ 4064/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.934334 [ 4096/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.734423 [ 4128/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.829414 [ 4160/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.720620 [ 4192/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.781999 [ 4224/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.783253 [ 4256/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.766130 [ 4288/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.708845 [ 4320/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.507006 [ 4352/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.636664 [ 4384/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.837828 [ 4416/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.715154 [ 4448/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.686331 [ 4480/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.533245 [ 4512/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.560344 [ 4544/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.738786 [ 4576/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.624795 [ 4608/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.699484 [ 4640/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.821406 [ 4672/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.748804 [ 4704/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.697531 [ 4736/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.601929 [ 4768/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.534576 [ 4800/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.745200 [ 4832/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.695427 [ 4864/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.691930 [ 4896/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.505526 [ 4928/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.917637 [ 4960/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.518932 [ 4992/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.652021 [ 5024/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.604701 [ 5056/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.760159 [ 5088/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.706993 [ 5120/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.912276 [ 5152/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.853322 [ 5184/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.479915 [ 5216/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.642461 [ 5248/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.805953 [ 5280/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.758529 [ 5312/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.562470 [ 5344/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.651807 [ 5376/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.693378 [ 5408/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.477579 [ 5440/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.440845 [ 5472/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.755653 [ 5504/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.543157 [ 5536/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.525984 [ 5568/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.738688 [ 5600/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.505031 [ 5632/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.603095 [ 5664/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.495685 [ 5696/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.649035 [ 5728/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.626231 [ 5760/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.624574 [ 5792/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.539009 [ 5824/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.608602 [ 5856/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.517679 [ 5888/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.645958 [ 5920/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.633316 [ 5952/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.456997 [ 5984/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.474314 [ 6016/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.585139 [ 6048/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.695903 [ 6080/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.426125 [ 6112/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.650087 [ 6144/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.549055 [ 6176/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.719173 [ 6208/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.478238 [ 6240/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.570678 [ 6272/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.565205 [ 6304/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.443512 [ 6336/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.571824 [ 6368/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.499425 [ 6400/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.587146 [ 6432/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.484214 [ 6464/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.591078 [ 6496/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.529968 [ 6528/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.574853 [ 6560/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.558849 [ 6592/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.477197 [ 6624/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.366215 [ 6656/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.419809 [ 6688/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.556782 [ 6720/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.456732 [ 6752/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.501543 [ 6784/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.547164 [ 6816/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.443912 [ 6848/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.691703 [ 6880/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.586831 [ 6912/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.483213 [ 6944/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.596832 [ 6976/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.554465 [ 7008/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.521209 [ 7040/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.491609 [ 7072/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.387293 [ 7104/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.794813 [ 7136/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.425498 [ 7168/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.389268 [ 7200/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.420405 [ 7232/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.542045 [ 7264/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.487033 [ 7296/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.399472 [ 7328/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.448121 [ 7360/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.406630 [ 7392/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.591825 [ 7424/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.627374 [ 7456/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.447223 [ 7488/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.565641 [ 7520/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.624734 [ 7552/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.451864 [ 7584/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.415504 [ 7616/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.691492 [ 7648/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.648329 [ 7680/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.595378 [ 7712/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.359191 [ 7744/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.518852 [ 7776/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.610308 [ 7808/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.353026 [ 7840/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.380109 [ 7872/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.606776 [ 7904/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.590940 [ 7936/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.511027 [ 7968/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.414065 [ 8000/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.376860 [ 8032/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.442762 [ 8064/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.452412 [ 8096/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.528809 [ 8128/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.394380 [ 8160/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.465563 [ 8192/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.566898 [ 8224/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.474321 [ 8256/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.663122 [ 8288/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.516909 [ 8320/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.398049 [ 8352/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.659187 [ 8384/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.659491 [ 8416/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.484123 [ 8448/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.606162 [ 8480/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.388272 [ 8512/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.389152 [ 8544/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.465150 [ 8576/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.599031 [ 8608/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.521233 [ 8640/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.741320 [ 8672/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.648986 [ 8704/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.331249 [ 8736/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.425979 [ 8768/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.389425 [ 8800/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.479067 [ 8832/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.398535 [ 8864/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.400271 [ 8896/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.489922 [ 8928/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.517244 [ 8960/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.549319 [ 8992/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.408617 [ 9024/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.604844 [ 9056/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.494191 [ 9088/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.536059 [ 9120/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.328432 [ 9152/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.642036 [ 9184/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.454632 [ 9216/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.615949 [ 9248/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.544591 [ 9280/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.364256 [ 9312/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.426344 [ 9344/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.616912 [ 9376/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.385636 [ 9408/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.628750 [ 9440/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.631596 [ 9472/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.461516 [ 9504/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.524834 [ 9536/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.431266 [ 9568/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.571660 [ 9600/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.389676 [ 9632/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.347735 [ 9664/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.334361 [ 9696/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.485077 [ 9728/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.619223 [ 9760/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.404663 [ 9792/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.430423 [ 9824/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.418580 [ 9856/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.473991 [ 9888/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.610956 [ 9920/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.435558 [ 9952/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.416647 [ 9984/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.440566 [10016/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.552326 [10048/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.617116 [10080/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.520148 [10112/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.370968 [10144/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.439676 [10176/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.462503 [10208/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.547763 [10240/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.418268 [10272/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.683885 [10304/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.608562 [10336/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.545290 [10368/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.402975 [10400/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.532819 [10432/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.473393 [10464/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.501363 [10496/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.352207 [10528/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.472003 [10560/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.452334 [10592/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.455455 [10624/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.498290 [10656/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.496919 [10688/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.488055 [10720/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.425092 [10752/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.503697 [10784/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.459601 [10816/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.381308 [10848/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.382794 [10880/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.422137 [10912/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.550531 [10944/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.439775 [10976/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.459610 [11008/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.597789 [11040/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.508992 [11072/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.416530 [11104/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.432492 [11136/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.454946 [11168/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.449624 [11200/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.450659 [11232/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.475472 [11264/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.442137 [11296/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.378373 [11328/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.487064 [11360/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.410949 [11392/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.615205 [11424/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.378739 [11456/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.405390 [11488/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.537704 [11520/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.498317 [11552/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.377109 [11584/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.477670 [11616/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.531565 [11648/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.538401 [11680/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.481005 [11712/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.533812 [11744/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.233503 [11776/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.479941 [11808/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.481281 [11840/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.592981 [11872/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.298879 [11904/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.495679 [11936/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.400152 [11968/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.373099 [12000/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.439246 [12032/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.316510 [12064/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.425532 [12096/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.531142 [12128/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.411219 [12160/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.451959 [12192/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.470364 [12224/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.409591 [12256/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.453683 [12288/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.324992 [12320/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.323058 [12352/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.370793 [12384/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.467052 [12416/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.398762 [12448/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.482727 [12480/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.485286 [12512/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.405564 [12544/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.537323 [12576/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.521915 [12608/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.433399 [12640/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.660012 [12672/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.444537 [12704/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.560645 [12736/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.393242 [12768/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.416149 [12800/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.633953 [12832/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.616306 [12864/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.596975 [12896/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.580615 [12928/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.474852 [12960/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.343369 [12992/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.372915 [13024/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.592035 [13056/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.423999 [13088/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.493554 [13120/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.349277 [13152/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.446138 [13184/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.350674 [13216/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.377281 [13248/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.352433 [13280/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.343349 [13312/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.491824 [13344/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.537138 [13376/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.403118 [13408/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.531287 [13440/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.451567 [13472/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.418220 [13504/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.460424 [13536/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.449146 [13568/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.618755 [13600/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.376046 [13632/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.488001 [13664/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.400850 [13696/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.520549 [13728/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.358996 [13760/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.373340 [13792/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.368646 [13824/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.514603 [13856/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.517549 [13888/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.360675 [13920/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.488427 [13952/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.494517 [13984/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.410651 [14016/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.314336 [14048/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.293638 [14080/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.476437 [14112/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.555156 [14144/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.324506 [14176/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.388002 [14208/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.442456 [14240/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.350908 [14272/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.319808 [14304/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.424010 [14336/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.392068 [14368/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.398116 [14400/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.554329 [14432/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.561060 [14464/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.386620 [14496/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.380240 [14528/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.497517 [14560/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.597805 [14592/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.545099 [14624/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.374111 [14656/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.385943 [14688/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.525957 [14720/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.385689 [14752/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.407729 [14784/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.416215 [14816/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.424749 [14848/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.431131 [14880/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.320945 [14912/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.372937 [14944/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.404364 [14976/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.337524 [15008/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.444859 [15040/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.542595 [15072/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.256636 [15104/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.398489 [15136/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.374267 [15168/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.381496 [15200/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.469922 [15232/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.494171 [15264/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.445082 [15296/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.286583 [15328/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.427008 [15360/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.486539 [15392/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.363454 [15424/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.292049 [15456/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.426690 [15488/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.436988 [15520/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.390411 [15552/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.365706 [15584/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.217163 [15616/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.365467 [15648/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.548497 [15680/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.352967 [15712/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.418756 [15744/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.294217 [15776/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.427893 [15808/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.486844 [15840/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.396313 [15872/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.399575 [15904/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.269003 [15936/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.246287 [15968/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.422631 [16000/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.330260 [16032/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.392084 [16064/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.309957 [16096/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.345493 [16128/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.418565 [16160/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.424585 [16192/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.216848 [16224/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.354302 [16256/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.529730 [16288/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.302277 [16320/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.290925 [16352/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.404665 [16384/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.411469 [16416/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.332088 [16448/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.336992 [16480/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.344732 [16512/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.520639 [16544/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.346569 [16576/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.420527 [16608/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.436842 [16640/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.423338 [16672/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.489803 [16704/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.331524 [16736/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.381501 [16768/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.515839 [16800/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.460788 [16832/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.391189 [16864/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.296698 [16896/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.407435 [16928/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.320570 [16960/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.339994 [16992/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.354870 [17024/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.309354 [17056/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.421184 [17088/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.313710 [17120/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.349468 [17152/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.277739 [17184/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.504557 [17216/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.359512 [17248/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.341378 [17280/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.400877 [17312/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.346884 [17344/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.287972 [17376/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.347368 [17408/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.432697 [17440/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.451170 [17472/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.325171 [17504/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.312805 [17536/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.372722 [17568/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.291218 [17600/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.381447 [17632/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.358265 [17664/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.322823 [17696/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.230518 [17728/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.339897 [17760/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.525428 [17792/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.374745 [17824/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.456367 [17856/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.276648 [17888/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.280241 [17920/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.434213 [17952/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.316658 [17984/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.306616 [18016/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.380962 [18048/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.496836 [18080/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.322949 [18112/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.326471 [18144/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.351888 [18176/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.377811 [18208/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.300397 [18240/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.393547 [18272/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.357333 [18304/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.401228 [18336/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.403133 [18368/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.411685 [18400/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.319454 [18432/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.572745 [18464/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.417655 [18496/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.281128 [18528/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.334777 [18560/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.495568 [18592/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.450313 [18624/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.396474 [18656/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.445340 [18688/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.361275 [18720/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.401411 [18752/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.355343 [18784/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.354503 [18816/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.371070 [18848/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.295151 [18880/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.398222 [18912/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.322440 [18944/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.365946 [18976/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.237211 [19008/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.298748 [19040/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.347349 [19072/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.216835 [19104/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.410963 [19136/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.360430 [19168/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.224400 [19200/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.437457 [19232/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.383434 [19264/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.308794 [19296/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.396776 [19328/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.423565 [19360/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.434738 [19392/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.303214 [19424/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.259686 [19456/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.334746 [19488/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.457153 [19520/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.268551 [19552/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.385748 [19584/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.403775 [19616/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.422714 [19648/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.325159 [19680/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.467854 [19712/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.305899 [19744/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.263826 [19776/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.424393 [19808/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.316809 [19840/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.268500 [19872/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.419471 [19904/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.277837 [19936/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.380701 [19968/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.424425 [20000/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.320018 [20032/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.256738 [20064/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.269271 [20096/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.390987 [20128/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.321418 [20160/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.350246 [20192/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.337882 [20224/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.399227 [20256/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.312488 [20288/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.270803 [20320/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.430310 [20352/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.309064 [20384/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.220802 [20416/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.276694 [20448/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.430284 [20480/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.407599 [20512/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.482259 [20544/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.373769 [20576/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.371380 [20608/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.367435 [20640/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.454970 [20672/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.236993 [20704/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.308130 [20736/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.374640 [20768/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.427230 [20800/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.408032 [20832/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.392227 [20864/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.395829 [20896/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.602216 [20928/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.455932 [20960/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.363813 [20992/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.302878 [21024/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.488638 [21056/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.441445 [21088/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.371620 [21120/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.303022 [21152/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.393241 [21184/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.536671 [21216/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.405784 [21248/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.426318 [21280/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.436494 [21312/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.271370 [21344/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.394010 [21376/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.317655 [21408/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.221180 [21440/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.432571 [21472/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.273507 [21504/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.422910 [21536/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.408548 [21568/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.359962 [21600/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.372469 [21632/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.283168 [21664/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.341756 [21696/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.380907 [21728/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.341792 [21760/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.422473 [21792/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.383469 [21824/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.297440 [21856/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.254103 [21888/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.294036 [21920/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.274219 [21952/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.342979 [21984/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.323180 [22016/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.537353 [22048/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.356895 [22080/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.479990 [22112/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.397183 [22144/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.390467 [22176/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.324865 [22208/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.253007 [22240/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.343804 [22272/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.516181 [22304/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.288809 [22336/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.335244 [22368/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.301399 [22400/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.267387 [22432/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.403985 [22464/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.355732 [22496/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.440882 [22528/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.323637 [22560/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.306853 [22592/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.425291 [22624/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.286754 [22656/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.500194 [22688/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.339172 [22720/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.376058 [22752/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.375402 [22784/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.299163 [22816/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.362746 [22848/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.192124 [22880/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.277537 [22912/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.252056 [22944/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.369647 [22976/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.218066 [23008/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.327654 [23040/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.303217 [23072/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.358150 [23104/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.299988 [23136/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.375925 [23168/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.360307 [23200/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.347974 [23232/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.321921 [23264/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.332618 [23296/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.359337 [23328/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.302615 [23360/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.283038 [23392/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.519574 [23424/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.407474 [23456/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.382742 [23488/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.418022 [23520/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.421506 [23552/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.322997 [23584/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.347220 [23616/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.267300 [23648/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.547674 [23680/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.332545 [23712/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.406222 [23744/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.311692 [23776/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.499420 [23808/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.370106 [23840/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.438601 [23872/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.504836 [23904/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.304768 [23936/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.343738 [23968/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.290378 [24000/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.241026 [24032/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.297337 [24064/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.268155 [24096/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.275849 [24128/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.336545 [24160/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.308935 [24192/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.394887 [24224/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.367017 [24256/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.382758 [24288/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.541768 [24320/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.287573 [24352/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.343699 [24384/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.249040 [24416/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.345051 [24448/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.335610 [24480/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.259211 [24512/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.301738 [24544/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.290498 [24576/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.385153 [24608/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.362023 [24640/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.334413 [24672/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.233532 [24704/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.288486 [24736/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.343870 [24768/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.290819 [24800/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.299607 [24832/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.267837 [24864/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.451986 [24872/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.451986 [24872/24872]: : 778it [00:25, 30.20it/s]
Epoch 2, time=25.79s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.303133 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.151487 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.318765 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.285219 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.351937 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.288636 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.343785 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.235235 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.413842 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.475065 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.434514 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.236320 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.311150 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.398568 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.273421 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.349302 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.283959 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.301722 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.350492 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.360828 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.248867 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.471385 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.381764 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.348401 [ 768/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.282063 [ 800/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.380425 [ 832/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.248235 [ 864/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.460929 [ 896/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.308660 [ 928/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.308168 [ 960/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.388460 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.240443 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.307169 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.407995 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.260567 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.408227 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.359861 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.276127 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.378850 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.357901 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.358293 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.260461 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.417661 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.422087 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.400386 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.384299 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.236981 [ 1504/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.346517 [ 1536/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.372406 [ 1568/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.348665 [ 1600/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.286150 [ 1632/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.358694 [ 1664/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.352164 [ 1696/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.267228 [ 1728/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.363219 [ 1760/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.216982 [ 1792/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.447193 [ 1824/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.272310 [ 1856/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.272730 [ 1888/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.252365 [ 1920/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.259615 [ 1952/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.313676 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.254766 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.413763 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.326718 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.237399 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.257179 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.366702 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.322454 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.321178 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.271390 [ 2272/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.352137 [ 2304/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.368009 [ 2336/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.241145 [ 2368/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.200141 [ 2400/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.243110 [ 2432/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.207349 [ 2464/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.377745 [ 2496/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.321258 [ 2528/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.281937 [ 2560/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.319191 [ 2592/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.388786 [ 2624/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.152815 [ 2656/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.410009 [ 2688/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.276721 [ 2720/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.308788 [ 2752/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.313796 [ 2784/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.292488 [ 2816/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.382648 [ 2848/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.256953 [ 2880/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.377486 [ 2912/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.393062 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.378297 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.233271 [ 3008/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.416272 [ 3040/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.213773 [ 3072/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.294011 [ 3104/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.347402 [ 3136/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.326073 [ 3168/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.278834 [ 3200/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.494409 [ 3232/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.283643 [ 3264/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.263155 [ 3296/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.316357 [ 3328/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.344766 [ 3360/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.213819 [ 3392/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.262157 [ 3424/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.368405 [ 3456/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.357644 [ 3488/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.349842 [ 3520/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.296208 [ 3552/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.409783 [ 3584/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.386074 [ 3616/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.211272 [ 3648/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.357898 [ 3680/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.456691 [ 3712/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.248428 [ 3744/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.431181 [ 3776/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.336599 [ 3808/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.248759 [ 3840/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.305417 [ 3872/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.296353 [ 3904/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.396774 [ 3936/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.314589 [ 3968/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.362420 [ 4000/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.378754 [ 4032/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.234301 [ 4064/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.455254 [ 4096/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.249636 [ 4128/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.304295 [ 4160/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.365081 [ 4192/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.368285 [ 4224/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.310641 [ 4256/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.325222 [ 4288/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.268216 [ 4320/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.217904 [ 4352/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.195660 [ 4384/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.379354 [ 4416/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.297099 [ 4448/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.356829 [ 4480/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.247164 [ 4512/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.242046 [ 4544/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.355358 [ 4576/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.219030 [ 4608/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.375675 [ 4640/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.384247 [ 4672/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.306783 [ 4704/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.268594 [ 4736/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.296113 [ 4768/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.289031 [ 4800/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.382115 [ 4832/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.258346 [ 4864/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.311838 [ 4896/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.211418 [ 4928/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.480172 [ 4960/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.207919 [ 4992/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.310867 [ 5024/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.280997 [ 5056/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.320377 [ 5088/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.350414 [ 5120/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.497772 [ 5152/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.363241 [ 5184/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.210222 [ 5216/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.337163 [ 5248/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.439612 [ 5280/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.351622 [ 5312/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.352642 [ 5344/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.385271 [ 5376/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.313662 [ 5408/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.308999 [ 5440/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.254717 [ 5472/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.360613 [ 5504/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.339263 [ 5536/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.314112 [ 5568/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.372617 [ 5600/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.203795 [ 5632/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.295589 [ 5664/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.294730 [ 5696/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.418652 [ 5728/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.313686 [ 5760/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.332274 [ 5792/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.301104 [ 5824/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.322097 [ 5856/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.278526 [ 5888/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.427306 [ 5920/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.320515 [ 5952/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.232794 [ 5984/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.213078 [ 6016/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.196728 [ 6048/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.288600 [ 6080/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.244271 [ 6112/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.295850 [ 6144/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.295084 [ 6176/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.353055 [ 6208/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.256223 [ 6240/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.211577 [ 6272/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.381216 [ 6304/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.284269 [ 6336/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.290153 [ 6368/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.247505 [ 6400/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.327339 [ 6432/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.280928 [ 6464/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.336600 [ 6496/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.213314 [ 6528/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.323938 [ 6560/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.332015 [ 6592/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.249672 [ 6624/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.216879 [ 6656/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.296798 [ 6688/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.276672 [ 6720/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.248381 [ 6752/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.216865 [ 6784/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.356984 [ 6816/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.261532 [ 6848/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.470693 [ 6880/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.283144 [ 6912/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.254472 [ 6944/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.392836 [ 6976/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.287748 [ 7008/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.233608 [ 7040/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.269309 [ 7072/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.214611 [ 7104/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.598052 [ 7136/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.255252 [ 7168/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.231600 [ 7200/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.199889 [ 7232/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.401102 [ 7264/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.348018 [ 7296/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.181148 [ 7328/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.268579 [ 7360/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.246925 [ 7392/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.332897 [ 7424/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.389495 [ 7456/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.292720 [ 7488/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.374388 [ 7520/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.414756 [ 7552/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.249407 [ 7584/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.193524 [ 7616/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.363473 [ 7648/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.463107 [ 7680/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.313540 [ 7712/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.253451 [ 7744/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.334758 [ 7776/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.363350 [ 7808/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.212543 [ 7840/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.245239 [ 7872/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.462380 [ 7904/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.290131 [ 7936/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.324035 [ 7968/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.239098 [ 8000/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.225909 [ 8032/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.267676 [ 8064/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.281602 [ 8096/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.289612 [ 8128/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.292887 [ 8160/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.353696 [ 8192/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.356935 [ 8224/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.265488 [ 8256/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.362858 [ 8288/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.358608 [ 8320/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.241614 [ 8352/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.359301 [ 8384/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.439738 [ 8416/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.283827 [ 8448/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.347467 [ 8480/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.225025 [ 8512/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.225355 [ 8544/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.265922 [ 8576/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.397294 [ 8608/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.351067 [ 8640/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.412683 [ 8672/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.318631 [ 8704/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.174167 [ 8736/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.246119 [ 8768/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.212205 [ 8800/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.254155 [ 8832/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.236251 [ 8864/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.277684 [ 8896/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.328624 [ 8928/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.327932 [ 8960/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.335622 [ 8992/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.244634 [ 9024/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.336718 [ 9056/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.332665 [ 9088/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.328690 [ 9120/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.200213 [ 9152/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.446267 [ 9184/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.250528 [ 9216/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.333183 [ 9248/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.341000 [ 9280/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.166288 [ 9312/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.284169 [ 9344/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.329725 [ 9376/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.262552 [ 9408/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.362154 [ 9440/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.332740 [ 9472/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.275097 [ 9504/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.310746 [ 9536/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.260296 [ 9568/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.333383 [ 9600/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.216976 [ 9632/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.223088 [ 9664/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.183558 [ 9696/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.253209 [ 9728/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.308288 [ 9760/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.234160 [ 9792/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.256225 [ 9824/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.259891 [ 9856/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.278649 [ 9888/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.318951 [ 9920/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.250676 [ 9952/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.327347 [ 9984/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.240595 [10016/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.277525 [10048/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.389894 [10080/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.275317 [10112/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.229365 [10144/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.302348 [10176/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.223232 [10208/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.362307 [10240/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.289681 [10272/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.396656 [10304/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.267646 [10336/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.290277 [10368/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.253198 [10400/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.278985 [10432/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.273663 [10464/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.260337 [10496/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.232959 [10528/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.305088 [10560/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.286901 [10592/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.303053 [10624/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.313963 [10656/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.320910 [10688/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.290774 [10720/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.269046 [10752/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.262283 [10784/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.312536 [10816/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.231918 [10848/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.244936 [10880/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.270064 [10912/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.402147 [10944/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.326589 [10976/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.296078 [11008/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.371504 [11040/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.341900 [11072/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.273013 [11104/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.271616 [11136/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.307845 [11168/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.272735 [11200/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.306107 [11232/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.329905 [11264/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.259639 [11296/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.261085 [11328/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.293442 [11360/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.224815 [11392/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.384917 [11424/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.271664 [11456/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.286021 [11488/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.342144 [11520/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.276867 [11552/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.197430 [11584/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.372719 [11616/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.277789 [11648/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.350666 [11680/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.323493 [11712/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.284939 [11744/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.149889 [11776/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.291829 [11808/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.228500 [11840/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.426203 [11872/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.159113 [11904/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.251576 [11936/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.285275 [11968/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.215936 [12000/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.285926 [12032/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.241284 [12064/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.293944 [12096/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.307429 [12128/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.244891 [12160/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.390276 [12192/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.285725 [12224/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.255911 [12256/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.308401 [12288/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.199325 [12320/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.207913 [12352/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.202352 [12384/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.323255 [12416/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.272744 [12448/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.295949 [12480/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.390260 [12512/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.258522 [12544/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.404557 [12576/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.326143 [12608/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.274634 [12640/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.445921 [12672/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.280693 [12704/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.391015 [12736/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.202972 [12768/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.270509 [12800/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.440166 [12832/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.357475 [12864/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.387353 [12896/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.324554 [12928/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.324412 [12960/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.219365 [12992/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.275552 [13024/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.397977 [13056/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.289492 [13088/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.399457 [13120/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.211956 [13152/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.270755 [13184/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.238808 [13216/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.296015 [13248/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.236756 [13280/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.258413 [13312/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.347807 [13344/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.357918 [13376/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.269763 [13408/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.415229 [13440/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.284834 [13472/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.315424 [13504/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.320082 [13536/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.334028 [13568/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.426173 [13600/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.259601 [13632/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.293818 [13664/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.239248 [13696/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.391869 [13728/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.222464 [13760/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.237191 [13792/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.253453 [13824/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.314276 [13856/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.401094 [13888/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.242208 [13920/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.326807 [13952/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.323126 [13984/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.294869 [14016/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.222796 [14048/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.218435 [14080/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.358977 [14112/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.400003 [14144/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.188797 [14176/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.234490 [14208/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.347323 [14240/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.252103 [14272/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.207874 [14304/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.290380 [14336/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.263102 [14368/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.285539 [14400/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.332403 [14432/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.372083 [14464/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.250543 [14496/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.236166 [14528/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.292430 [14560/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.296883 [14592/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.343973 [14624/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.265554 [14656/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.205606 [14688/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.271599 [14720/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.256223 [14752/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.186307 [14784/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.228002 [14816/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.293524 [14848/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.264713 [14880/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.206909 [14912/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.204052 [14944/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.246196 [14976/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.230419 [15008/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.286018 [15040/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.359338 [15072/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.174416 [15104/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.300374 [15136/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.254701 [15168/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.259431 [15200/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.342193 [15232/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.382313 [15264/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.326361 [15296/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.240418 [15328/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.314700 [15360/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.366739 [15392/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.301770 [15424/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.210980 [15456/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.278877 [15488/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.344644 [15520/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.294641 [15552/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.275871 [15584/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.189176 [15616/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.317312 [15648/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.410210 [15680/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.230495 [15712/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.337459 [15744/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.190412 [15776/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.329188 [15808/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.373150 [15840/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.274105 [15872/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.276758 [15904/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.161294 [15936/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.192966 [15968/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.304727 [16000/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.237223 [16032/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.290962 [16064/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.214022 [16096/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.265724 [16128/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.300678 [16160/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.339999 [16192/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.152751 [16224/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.233744 [16256/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.429419 [16288/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.208122 [16320/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.209372 [16352/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.349976 [16384/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.296406 [16416/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.265580 [16448/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.346201 [16480/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.292108 [16512/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.406158 [16544/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.258977 [16576/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.367486 [16608/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.351955 [16640/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.347997 [16672/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.304599 [16704/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.217212 [16736/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.243842 [16768/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.386387 [16800/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.323660 [16832/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.283327 [16864/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.229813 [16896/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.317713 [16928/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.213633 [16960/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.252861 [16992/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.281653 [17024/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.211963 [17056/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.246053 [17088/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.225357 [17120/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.241846 [17152/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.191816 [17184/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.402124 [17216/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.257863 [17248/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.230556 [17280/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.297148 [17312/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.279942 [17344/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.224683 [17376/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.277290 [17408/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.349480 [17440/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.360346 [17472/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.235744 [17504/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.228038 [17536/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.283239 [17568/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.206871 [17600/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.277635 [17632/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.265183 [17664/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.247978 [17696/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.175265 [17728/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.254162 [17760/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.414983 [17792/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.292880 [17824/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.318824 [17856/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.201217 [17888/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.197148 [17920/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.282633 [17952/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.227035 [17984/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.201345 [18016/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.328046 [18048/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.263796 [18080/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.229658 [18112/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.236355 [18144/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.271864 [18176/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.272884 [18208/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.176701 [18240/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.296985 [18272/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.230988 [18304/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.309428 [18336/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.282127 [18368/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.304954 [18400/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.246903 [18432/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.358340 [18464/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.269290 [18496/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.195571 [18528/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.200206 [18560/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.246740 [18592/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.304646 [18624/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.258973 [18656/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.317295 [18688/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.252912 [18720/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.242716 [18752/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.217068 [18784/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.257769 [18816/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.284054 [18848/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.169886 [18880/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.243300 [18912/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.197740 [18944/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.290609 [18976/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.178745 [19008/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.198900 [19040/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.237155 [19072/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.152844 [19104/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.276023 [19136/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.291355 [19168/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.133390 [19200/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.359730 [19232/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.279950 [19264/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.237443 [19296/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.306100 [19328/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.328075 [19360/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.315068 [19392/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.215919 [19424/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.164590 [19456/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.285745 [19488/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.344997 [19520/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.197502 [19552/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.281358 [19584/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.311932 [19616/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.357053 [19648/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.248726 [19680/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.382287 [19712/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.218087 [19744/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.176243 [19776/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.334624 [19808/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.248178 [19840/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.179909 [19872/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.344811 [19904/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.161862 [19936/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.306887 [19968/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.311564 [20000/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.249135 [20032/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.201237 [20064/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.188803 [20096/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.275602 [20128/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.247099 [20160/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.258894 [20192/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.259421 [20224/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.320954 [20256/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.218188 [20288/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.238401 [20320/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.356239 [20352/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.231858 [20384/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.162773 [20416/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.185307 [20448/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.303748 [20480/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.364712 [20512/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.379899 [20544/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.288685 [20576/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.293558 [20608/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.258540 [20640/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.374775 [20672/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.185943 [20704/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.252642 [20736/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.265647 [20768/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.324871 [20800/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.295063 [20832/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.331432 [20864/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.316155 [20896/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.438002 [20928/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.277831 [20960/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.226185 [20992/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.247540 [21024/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.343280 [21056/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.348798 [21088/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.262779 [21120/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.232653 [21152/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.289281 [21184/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.390659 [21216/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.254055 [21248/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.305377 [21280/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.327732 [21312/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.169941 [21344/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.272807 [21376/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.250862 [21408/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.164074 [21440/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.334611 [21472/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.237527 [21504/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.277351 [21536/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.302444 [21568/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.304827 [21600/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.302240 [21632/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.246095 [21664/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.261681 [21696/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.289353 [21728/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.298168 [21760/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.359767 [21792/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.276318 [21824/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.218288 [21856/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.190514 [21888/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.236717 [21920/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.224375 [21952/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.286538 [21984/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.274712 [22016/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.382420 [22048/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.269880 [22080/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.370807 [22112/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.303565 [22144/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.282563 [22176/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.249236 [22208/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.202789 [22240/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.247546 [22272/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.379700 [22304/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.204498 [22336/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.277383 [22368/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.247470 [22400/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.212669 [22432/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.291487 [22464/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.304226 [22496/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.359521 [22528/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.238555 [22560/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.262983 [22592/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.394989 [22624/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.220864 [22656/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.386830 [22688/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.234881 [22720/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.305209 [22752/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.261686 [22784/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.244508 [22816/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.284644 [22848/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.149995 [22880/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.183516 [22912/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.176404 [22944/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.250626 [22976/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.169945 [23008/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.237774 [23040/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.243029 [23072/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.269865 [23104/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.240894 [23136/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.322879 [23168/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.291763 [23200/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.263074 [23232/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.262996 [23264/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.239295 [23296/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.275148 [23328/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.246467 [23360/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.216884 [23392/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.355339 [23424/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.328829 [23456/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.292813 [23488/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.357123 [23520/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.299480 [23552/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.241856 [23584/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.245799 [23616/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.212699 [23648/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.420962 [23680/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.268863 [23712/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.329218 [23744/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.231208 [23776/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.406248 [23808/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.282387 [23840/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.290265 [23872/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.359932 [23904/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.235806 [23936/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.267296 [23968/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.208566 [24000/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.174631 [24032/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.270326 [24064/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.197406 [24096/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.211010 [24128/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.252246 [24160/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.232171 [24192/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.278593 [24224/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.288373 [24256/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.290347 [24288/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.451115 [24320/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.236581 [24352/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.282577 [24384/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.206184 [24416/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.266583 [24448/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.225022 [24480/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.205182 [24512/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.245083 [24544/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.230072 [24576/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.326534 [24608/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.277316 [24640/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.265430 [24672/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.197444 [24704/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.236102 [24736/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.260714 [24768/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.236401 [24800/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.239560 [24832/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.232556 [24864/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.366332 [24872/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.366332 [24872/24872]: : 778it [00:25, 30.43it/s]
Epoch 3, time=51.36s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.259917 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.133448 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.257873 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.226052 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.339895 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.231713 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.298509 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.198882 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.343592 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.368277 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.376619 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.174103 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.261486 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.316372 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.202255 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.243246 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.181049 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.239581 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.242148 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.296759 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.216131 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.370782 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.319695 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.281411 [ 768/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.217297 [ 800/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.308868 [ 832/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.192286 [ 864/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.370300 [ 896/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.248558 [ 928/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.233442 [ 960/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.310863 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.178946 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.215138 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.319469 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.196824 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.292250 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.297258 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.223693 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.283594 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.230443 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.284839 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.189376 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.321194 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.349125 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.329215 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.300142 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.164003 [ 1504/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.300856 [ 1536/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.285098 [ 1568/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.253601 [ 1600/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.226194 [ 1632/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.276605 [ 1664/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.248555 [ 1696/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.215476 [ 1728/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.296749 [ 1760/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.171113 [ 1792/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.323289 [ 1824/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.215258 [ 1856/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.202022 [ 1888/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.190667 [ 1920/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.193275 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.278926 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.201555 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.343263 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.270567 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.171627 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.198513 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.284093 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.279404 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.231045 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.217605 [ 2272/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.319346 [ 2304/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.330966 [ 2336/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.169071 [ 2368/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.193549 [ 2400/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.209680 [ 2432/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.167133 [ 2464/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.292905 [ 2496/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.284207 [ 2528/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.217026 [ 2560/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.273270 [ 2592/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.283612 [ 2624/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.122817 [ 2656/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.345089 [ 2688/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.241586 [ 2720/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.234237 [ 2752/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.260582 [ 2784/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.197454 [ 2816/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.325249 [ 2848/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.231117 [ 2880/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.291106 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.306838 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.318577 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.184495 [ 3008/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.267041 [ 3040/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.177663 [ 3072/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.208256 [ 3104/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.292494 [ 3136/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.286138 [ 3168/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.207910 [ 3200/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.357816 [ 3232/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.235342 [ 3264/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.211518 [ 3296/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.254697 [ 3328/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.338541 [ 3360/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.185375 [ 3392/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.237856 [ 3424/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.284345 [ 3456/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.311840 [ 3488/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.231553 [ 3520/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.216513 [ 3552/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.225019 [ 3584/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.337229 [ 3616/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.181323 [ 3648/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.283847 [ 3680/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.395832 [ 3712/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.242331 [ 3744/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.401717 [ 3776/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.231947 [ 3808/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.245750 [ 3840/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.329342 [ 3872/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.263291 [ 3904/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.350760 [ 3936/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.269715 [ 3968/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.286917 [ 4000/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.291706 [ 4032/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.244544 [ 4064/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.381661 [ 4096/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.220869 [ 4128/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.283097 [ 4160/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.314917 [ 4192/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.316092 [ 4224/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.253730 [ 4256/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.262474 [ 4288/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.221142 [ 4320/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.167506 [ 4352/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.143155 [ 4384/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.319452 [ 4416/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.262687 [ 4448/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.276052 [ 4480/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.202040 [ 4512/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.208690 [ 4544/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.297428 [ 4576/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.171564 [ 4608/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.322281 [ 4640/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.322240 [ 4672/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.267004 [ 4704/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.186321 [ 4736/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.248065 [ 4768/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.239040 [ 4800/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.290792 [ 4832/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.196189 [ 4864/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.261187 [ 4896/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.157313 [ 4928/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.389824 [ 4960/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.146317 [ 4992/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.212021 [ 5024/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.230537 [ 5056/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.240115 [ 5088/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.268675 [ 5120/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.408089 [ 5152/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.295832 [ 5184/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.196929 [ 5216/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.289348 [ 5248/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.312176 [ 5280/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.287764 [ 5312/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.264762 [ 5344/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.323825 [ 5376/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.253335 [ 5408/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.241468 [ 5440/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.194344 [ 5472/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.277783 [ 5504/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.294577 [ 5536/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.223120 [ 5568/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.290405 [ 5600/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.164381 [ 5632/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.248161 [ 5664/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.183970 [ 5696/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.340037 [ 5728/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.270642 [ 5760/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.234937 [ 5792/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.226297 [ 5824/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.259604 [ 5856/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.211422 [ 5888/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.275512 [ 5920/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.274842 [ 5952/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.158804 [ 5984/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.165263 [ 6016/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.147210 [ 6048/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.218948 [ 6080/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.181547 [ 6112/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.258224 [ 6144/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.217771 [ 6176/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.311941 [ 6208/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.189189 [ 6240/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.161905 [ 6272/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.298004 [ 6304/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.232341 [ 6336/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.230152 [ 6368/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.185888 [ 6400/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.259177 [ 6432/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.249232 [ 6464/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.255151 [ 6496/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.167400 [ 6528/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.240027 [ 6560/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.299642 [ 6592/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.204676 [ 6624/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.195192 [ 6656/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.229543 [ 6688/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.215474 [ 6720/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.186669 [ 6752/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.166817 [ 6784/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.262543 [ 6816/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.221373 [ 6848/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.345002 [ 6880/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.234489 [ 6912/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.205097 [ 6944/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.296815 [ 6976/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.208081 [ 7008/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.183096 [ 7040/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.211310 [ 7072/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.141329 [ 7104/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.402029 [ 7136/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.195386 [ 7168/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.173314 [ 7200/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.147041 [ 7232/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.309373 [ 7264/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.279198 [ 7296/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.147959 [ 7328/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.235827 [ 7360/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.193601 [ 7392/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.287469 [ 7424/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.272506 [ 7456/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.267424 [ 7488/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.319756 [ 7520/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.393181 [ 7552/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.231678 [ 7584/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.161236 [ 7616/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.291546 [ 7648/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.407158 [ 7680/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.242340 [ 7712/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.212140 [ 7744/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.294089 [ 7776/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.310650 [ 7808/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.172313 [ 7840/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.229934 [ 7872/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.312317 [ 7904/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.238250 [ 7936/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.219365 [ 7968/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.221485 [ 8000/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.211503 [ 8032/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.205028 [ 8064/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.233227 [ 8096/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.280661 [ 8128/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.228702 [ 8160/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.215751 [ 8192/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.311365 [ 8224/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.233110 [ 8256/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.259886 [ 8288/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.346322 [ 8320/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.227183 [ 8352/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.276408 [ 8384/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.326453 [ 8416/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.264186 [ 8448/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.271820 [ 8480/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.171903 [ 8512/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.215494 [ 8544/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.214273 [ 8576/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.321551 [ 8608/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.322574 [ 8640/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.298144 [ 8672/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.280490 [ 8704/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.154354 [ 8736/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.226651 [ 8768/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.180264 [ 8800/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.211269 [ 8832/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.196281 [ 8864/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.217213 [ 8896/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.264608 [ 8928/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.278295 [ 8960/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.268731 [ 8992/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.221184 [ 9024/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.295999 [ 9056/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.260257 [ 9088/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.264174 [ 9120/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.152802 [ 9152/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.383222 [ 9184/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.202278 [ 9216/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.295135 [ 9248/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.263502 [ 9280/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.150187 [ 9312/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.237786 [ 9344/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.237437 [ 9376/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.232956 [ 9408/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.309592 [ 9440/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.277174 [ 9472/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.232796 [ 9504/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.231270 [ 9536/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.194771 [ 9568/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.297863 [ 9600/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.189612 [ 9632/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.173611 [ 9664/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.151066 [ 9696/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.186478 [ 9728/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.248958 [ 9760/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.187405 [ 9792/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.199996 [ 9824/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.210459 [ 9856/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.216282 [ 9888/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.250086 [ 9920/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.250735 [ 9952/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.242513 [ 9984/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.173583 [10016/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.223578 [10048/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.318513 [10080/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.223835 [10112/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.167080 [10144/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.258499 [10176/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.195231 [10208/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.310748 [10240/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.243302 [10272/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.313932 [10304/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.223290 [10336/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.234476 [10368/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.224577 [10400/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.242397 [10432/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.213297 [10464/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.219361 [10496/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.180115 [10528/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.259696 [10560/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.238192 [10592/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.267369 [10624/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.225783 [10656/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.275526 [10688/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.267887 [10720/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.238545 [10752/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.240198 [10784/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.259523 [10816/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.179656 [10848/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.203456 [10880/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.215303 [10912/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.300403 [10944/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.294362 [10976/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.226630 [11008/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.334805 [11040/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.274114 [11072/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.229412 [11104/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.225405 [11136/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.265916 [11168/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.246810 [11200/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.224748 [11232/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.253950 [11264/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.201075 [11296/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.210392 [11328/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.210528 [11360/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.194229 [11392/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.317965 [11424/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.234362 [11456/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.240912 [11488/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.277551 [11520/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.209548 [11552/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.160196 [11584/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.296273 [11616/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.215045 [11648/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.289761 [11680/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.270645 [11712/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.258560 [11744/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.128173 [11776/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.265594 [11808/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.194880 [11840/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.386499 [11872/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.167454 [11904/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.205936 [11936/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.262073 [11968/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.182021 [12000/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.244531 [12032/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.233178 [12064/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.264112 [12096/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.243710 [12128/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.214578 [12160/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.299682 [12192/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.248152 [12224/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.223311 [12256/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.270476 [12288/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.177286 [12320/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.170483 [12352/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.144388 [12384/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.281063 [12416/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.237007 [12448/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.232190 [12480/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.298494 [12512/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.211307 [12544/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.329322 [12576/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.245668 [12608/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.216092 [12640/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.351085 [12672/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.256483 [12704/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.352272 [12736/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.186267 [12768/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.223722 [12800/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.370025 [12832/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.287044 [12864/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.382696 [12896/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.228721 [12928/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.258223 [12960/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.199664 [12992/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.195529 [13024/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.294829 [13056/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.247827 [13088/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.319405 [13120/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.198425 [13152/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.198567 [13184/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.214266 [13216/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.258472 [13248/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.201444 [13280/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.237183 [13312/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.251081 [13344/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.298955 [13376/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.248409 [13408/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.371551 [13440/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.233283 [13472/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.305346 [13504/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.230876 [13536/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.282234 [13568/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.352999 [13600/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.259379 [13632/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.290953 [13664/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.227558 [13696/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.319647 [13728/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.185801 [13760/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.195016 [13792/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.200038 [13824/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.215244 [13856/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.343351 [13888/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.209499 [13920/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.279436 [13952/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.267820 [13984/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.261441 [14016/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.170516 [14048/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.195342 [14080/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.319262 [14112/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.294356 [14144/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.155751 [14176/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.183959 [14208/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.291507 [14240/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.188965 [14272/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.163963 [14304/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.246936 [14336/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.202078 [14368/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.246225 [14400/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.303233 [14432/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.290910 [14464/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.215511 [14496/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.200522 [14528/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.247468 [14560/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.261085 [14592/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.339841 [14624/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.193078 [14656/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.189173 [14688/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.235494 [14720/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.211398 [14752/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.148213 [14784/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.190912 [14816/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.243371 [14848/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.215669 [14880/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.169197 [14912/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.176123 [14944/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.194862 [14976/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.203661 [15008/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.238262 [15040/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.329295 [15072/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.145623 [15104/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.248604 [15136/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.211265 [15168/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.231651 [15200/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.282548 [15232/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.327522 [15264/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.250838 [15296/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.197841 [15328/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.245601 [15360/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.291849 [15392/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.275916 [15424/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.166287 [15456/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.212691 [15488/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.257553 [15520/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.242608 [15552/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.235472 [15584/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.156086 [15616/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.236467 [15648/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.295418 [15680/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.197350 [15712/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.256031 [15744/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.175184 [15776/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.315035 [15808/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.332682 [15840/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.264982 [15872/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.210997 [15904/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.142088 [15936/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.191162 [15968/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.256318 [16000/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.181382 [16032/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.245139 [16064/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.190683 [16096/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.216413 [16128/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.290746 [16160/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.282196 [16192/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.122098 [16224/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.234072 [16256/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.369487 [16288/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.180677 [16320/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.193113 [16352/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.337277 [16384/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.265208 [16416/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.262669 [16448/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.293449 [16480/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.248336 [16512/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.381682 [16544/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.210488 [16576/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.297353 [16608/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.307239 [16640/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.293780 [16672/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.262830 [16704/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.225258 [16736/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.209408 [16768/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.354732 [16800/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.289113 [16832/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.245865 [16864/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.245420 [16896/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.343679 [16928/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.185871 [16960/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.221855 [16992/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.213311 [17024/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.191249 [17056/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.206536 [17088/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.214319 [17120/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.222978 [17152/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.179849 [17184/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.391196 [17216/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.253232 [17248/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.236022 [17280/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.243289 [17312/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.244301 [17344/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.215551 [17376/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.290312 [17408/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.339300 [17440/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.359274 [17472/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.221125 [17504/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.203924 [17536/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.260224 [17568/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.178287 [17600/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.248800 [17632/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.247698 [17664/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.259031 [17696/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.172052 [17728/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.221501 [17760/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.400682 [17792/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.275554 [17824/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.352417 [17856/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.197196 [17888/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.212693 [17920/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.296924 [17952/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.193646 [17984/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.187362 [18016/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.336501 [18048/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.245470 [18080/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.228910 [18112/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.317726 [18144/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.215619 [18176/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.249990 [18208/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.170383 [18240/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.304470 [18272/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.204958 [18304/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.309301 [18336/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.267794 [18368/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.323445 [18400/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.244736 [18432/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.333562 [18464/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.290588 [18496/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.192296 [18528/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.225856 [18560/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.190842 [18592/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.265692 [18624/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.228016 [18656/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.279088 [18688/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.243506 [18720/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.249890 [18752/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.189667 [18784/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.246745 [18816/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.261651 [18848/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.163163 [18880/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.223198 [18912/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.171335 [18944/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.262808 [18976/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.167001 [19008/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.188874 [19040/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.219421 [19072/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.141352 [19104/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.315034 [19136/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.262178 [19168/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.112934 [19200/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.383430 [19232/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.225213 [19264/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.217929 [19296/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.296653 [19328/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.334721 [19360/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.284167 [19392/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.230429 [19424/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.197629 [19456/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.282298 [19488/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.323674 [19520/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.198270 [19552/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.309101 [19584/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.273271 [19616/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.383388 [19648/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.222627 [19680/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.343308 [19712/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.244312 [19744/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.193330 [19776/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.339656 [19808/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.211226 [19840/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.173756 [19872/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.334964 [19904/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.158177 [19936/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.229633 [19968/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.322492 [20000/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.256172 [20032/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.185431 [20064/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.143254 [20096/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.256250 [20128/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.222652 [20160/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.223486 [20192/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.252727 [20224/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.295576 [20256/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.196721 [20288/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.219200 [20320/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.379318 [20352/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.214188 [20384/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.138697 [20416/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.203189 [20448/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.289095 [20480/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.287460 [20512/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.410413 [20544/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.191641 [20576/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.207316 [20608/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.232563 [20640/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.340270 [20672/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.142700 [20704/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.220905 [20736/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.218943 [20768/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.256748 [20800/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.264992 [20832/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.279587 [20864/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.240057 [20896/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.396746 [20928/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.197722 [20960/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.177783 [20992/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.237491 [21024/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.290867 [21056/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.320859 [21088/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.245761 [21120/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.183968 [21152/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.240413 [21184/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.302792 [21216/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.270728 [21248/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.338218 [21280/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.257223 [21312/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.144631 [21344/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.251811 [21376/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.206455 [21408/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.146936 [21440/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.276798 [21472/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.226126 [21504/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.241575 [21536/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.268472 [21568/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.262064 [21600/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.279933 [21632/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.241662 [21664/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.221624 [21696/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.272946 [21728/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.206676 [21760/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.314812 [21792/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.258609 [21824/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.175822 [21856/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.180292 [21888/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.193939 [21920/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.198491 [21952/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.248054 [21984/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.237408 [22016/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.355932 [22048/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.243555 [22080/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.325389 [22112/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.265030 [22144/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.218707 [22176/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.228372 [22208/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.189167 [22240/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.238556 [22272/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.330373 [22304/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.190457 [22336/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.213339 [22368/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.201129 [22400/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.155077 [22432/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.205796 [22464/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.270510 [22496/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.316961 [22528/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.187409 [22560/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.248273 [22592/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.315164 [22624/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.196658 [22656/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.308614 [22688/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.182606 [22720/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.247947 [22752/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.248126 [22784/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.208982 [22816/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.243404 [22848/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.130744 [22880/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.154663 [22912/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.138735 [22944/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.224049 [22976/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.124504 [23008/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.213373 [23040/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.207826 [23072/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.249063 [23104/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.257725 [23136/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.301310 [23168/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.280369 [23200/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.252953 [23232/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.234502 [23264/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.216934 [23296/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.262053 [23328/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.212884 [23360/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.192764 [23392/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.341757 [23424/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.315590 [23456/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.236546 [23488/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.302758 [23520/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.270098 [23552/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.204905 [23584/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.230853 [23616/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.167697 [23648/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.362016 [23680/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.258344 [23712/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.231601 [23744/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.191444 [23776/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.395695 [23808/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.221760 [23840/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.228403 [23872/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.307847 [23904/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.210763 [23936/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.200769 [23968/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.177718 [24000/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.153095 [24032/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.238598 [24064/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.211421 [24096/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.198866 [24128/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.238747 [24160/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.196147 [24192/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.247746 [24224/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.225107 [24256/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.238882 [24288/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.417105 [24320/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.170018 [24352/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.248273 [24384/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.207109 [24416/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.242141 [24448/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.191656 [24480/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.181119 [24512/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.228254 [24544/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.210715 [24576/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.272530 [24608/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.253632 [24640/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.251849 [24672/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.154865 [24704/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.185394 [24736/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.204700 [24768/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.217671 [24800/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.198603 [24832/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.190060 [24864/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.324468 [24872/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.324468 [24872/24872]: : 778it [00:25, 30.21it/s]
Epoch 4, time=77.11s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.229137 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.115430 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.260233 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.193371 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.306043 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.196072 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.253413 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.197863 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.341282 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.342348 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.394056 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.141439 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.232247 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.305493 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.194196 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.238496 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.152531 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.228968 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.205876 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.252938 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.216376 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.303535 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.261250 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.273585 [ 768/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.181899 [ 800/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.268625 [ 832/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.166549 [ 864/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.318496 [ 896/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.191807 [ 928/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.221348 [ 960/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.265097 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.139490 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.202750 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.284964 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.147856 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.222365 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.290630 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.185340 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.266233 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.199120 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.248111 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.165602 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.271174 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.307323 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.307557 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.254487 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.174024 [ 1504/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.195439 [ 1536/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.248530 [ 1568/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.249487 [ 1600/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.222999 [ 1632/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.226646 [ 1664/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.183667 [ 1696/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.204611 [ 1728/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.238586 [ 1760/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.129869 [ 1792/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.296865 [ 1824/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.173689 [ 1856/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.142256 [ 1888/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.165062 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.179955 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.216331 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.171883 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.312439 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.230140 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.141145 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.172990 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.249892 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.233810 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.200136 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.190875 [ 2272/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.288086 [ 2304/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.272147 [ 2336/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.137088 [ 2368/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.149323 [ 2400/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.192358 [ 2432/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.137373 [ 2464/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.267044 [ 2496/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.242220 [ 2528/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.156356 [ 2560/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.231234 [ 2592/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.228126 [ 2624/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.115121 [ 2656/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.297800 [ 2688/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.212814 [ 2720/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.201503 [ 2752/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.242346 [ 2784/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.166593 [ 2816/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.259547 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.208023 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.248117 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.258057 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.292537 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.160387 [ 3008/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.224940 [ 3040/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.157378 [ 3072/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.175899 [ 3104/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.259803 [ 3136/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.238393 [ 3168/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.166572 [ 3200/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.339481 [ 3232/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.213730 [ 3264/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.194394 [ 3296/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.229719 [ 3328/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.318574 [ 3360/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.168573 [ 3392/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.222694 [ 3424/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.242664 [ 3456/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.299655 [ 3488/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.195822 [ 3520/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.183814 [ 3552/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.206970 [ 3584/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.304999 [ 3616/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.169169 [ 3648/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.260885 [ 3680/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.350206 [ 3712/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.237155 [ 3744/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.325396 [ 3776/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.206216 [ 3808/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.198491 [ 3840/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.265571 [ 3872/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.230072 [ 3904/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.309027 [ 3936/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.239606 [ 3968/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.260489 [ 4000/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.247163 [ 4032/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.191527 [ 4064/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.311670 [ 4096/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.200683 [ 4128/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.272754 [ 4160/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.280228 [ 4192/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.288332 [ 4224/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.222057 [ 4256/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.246583 [ 4288/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.178516 [ 4320/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.160641 [ 4352/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.127669 [ 4384/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.274056 [ 4416/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.232207 [ 4448/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.257553 [ 4480/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.185044 [ 4512/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.178394 [ 4544/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.265838 [ 4576/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.136488 [ 4608/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.309901 [ 4640/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.301284 [ 4672/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.225423 [ 4704/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.149709 [ 4736/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.221891 [ 4768/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.217960 [ 4800/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.266266 [ 4832/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.175798 [ 4864/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.222051 [ 4896/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.137811 [ 4928/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.330141 [ 4960/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.123000 [ 4992/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.182576 [ 5024/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.206478 [ 5056/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.217102 [ 5088/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.236084 [ 5120/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.387905 [ 5152/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.249394 [ 5184/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.172475 [ 5216/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.264106 [ 5248/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.274683 [ 5280/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.247053 [ 5312/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.265208 [ 5344/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.256910 [ 5376/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.232811 [ 5408/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.213998 [ 5440/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.179962 [ 5472/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.255561 [ 5504/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.214954 [ 5536/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.146029 [ 5568/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.260821 [ 5600/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.120278 [ 5632/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.213001 [ 5664/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.165778 [ 5696/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.250009 [ 5728/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.222928 [ 5760/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.236212 [ 5792/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.209924 [ 5824/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.248001 [ 5856/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.183770 [ 5888/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.236668 [ 5920/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.215126 [ 5952/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.136778 [ 5984/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.148797 [ 6016/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.126319 [ 6048/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.208600 [ 6080/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.164590 [ 6112/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.236569 [ 6144/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.199915 [ 6176/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.268137 [ 6208/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.159682 [ 6240/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.157083 [ 6272/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.302389 [ 6304/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.240395 [ 6336/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.221025 [ 6368/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.172067 [ 6400/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.247050 [ 6432/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.253472 [ 6464/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.226650 [ 6496/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.160569 [ 6528/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.218044 [ 6560/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.279959 [ 6592/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.183084 [ 6624/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.192506 [ 6656/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.178745 [ 6688/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.192153 [ 6720/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.164385 [ 6752/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.151949 [ 6784/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.234765 [ 6816/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.174078 [ 6848/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.296372 [ 6880/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.229505 [ 6912/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.196197 [ 6944/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.243328 [ 6976/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.177817 [ 7008/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.155138 [ 7040/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.189459 [ 7072/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.126992 [ 7104/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.280099 [ 7136/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.175875 [ 7168/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.158129 [ 7200/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.164702 [ 7232/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.277001 [ 7264/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.210509 [ 7296/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.123481 [ 7328/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.226150 [ 7360/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.154819 [ 7392/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.233503 [ 7424/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.220385 [ 7456/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.187288 [ 7488/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.276369 [ 7520/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.334941 [ 7552/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.197451 [ 7584/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.143598 [ 7616/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.245081 [ 7648/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.360295 [ 7680/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.203139 [ 7712/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.194223 [ 7744/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.270007 [ 7776/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.230305 [ 7808/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.152431 [ 7840/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.191935 [ 7872/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.249609 [ 7904/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.220676 [ 7936/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.206100 [ 7968/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.192009 [ 8000/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.183450 [ 8032/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.173722 [ 8064/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.200684 [ 8096/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.224084 [ 8128/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.214537 [ 8160/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.189935 [ 8192/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.245387 [ 8224/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.177492 [ 8256/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.241513 [ 8288/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.305691 [ 8320/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.211412 [ 8352/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.250193 [ 8384/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.295288 [ 8416/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.229402 [ 8448/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.244670 [ 8480/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.149557 [ 8512/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.183073 [ 8544/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.212229 [ 8576/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.288090 [ 8608/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.279171 [ 8640/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.266588 [ 8672/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.253061 [ 8704/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.138534 [ 8736/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.173121 [ 8768/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.132802 [ 8800/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.201729 [ 8832/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.172576 [ 8864/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.199443 [ 8896/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.217102 [ 8928/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.244003 [ 8960/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.279398 [ 8992/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.193641 [ 9024/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.268952 [ 9056/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.225186 [ 9088/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.245573 [ 9120/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.108579 [ 9152/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.314519 [ 9184/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.171780 [ 9216/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.273603 [ 9248/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.229656 [ 9280/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.138488 [ 9312/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.221423 [ 9344/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.232574 [ 9376/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.210066 [ 9408/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.297898 [ 9440/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.248950 [ 9472/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.202373 [ 9504/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.205766 [ 9536/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.181967 [ 9568/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.254840 [ 9600/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.186254 [ 9632/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.148409 [ 9664/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.130409 [ 9696/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.161464 [ 9728/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.208292 [ 9760/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.180636 [ 9792/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.183695 [ 9824/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.172057 [ 9856/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.204111 [ 9888/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.232008 [ 9920/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.232224 [ 9952/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.223755 [ 9984/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.149436 [10016/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.217344 [10048/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.316035 [10080/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.209150 [10112/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.154807 [10144/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.255307 [10176/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.172469 [10208/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.294119 [10240/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.200856 [10272/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.311626 [10304/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.211747 [10336/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.206505 [10368/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.211683 [10400/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.226631 [10432/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.200263 [10464/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.212335 [10496/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.181286 [10528/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.249905 [10560/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.216955 [10592/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.245049 [10624/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.196793 [10656/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.244641 [10688/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.208036 [10720/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.223739 [10752/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.220024 [10784/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.234750 [10816/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.150610 [10848/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.212684 [10880/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.202939 [10912/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.267466 [10944/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.273974 [10976/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.200842 [11008/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.306530 [11040/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.238354 [11072/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.253187 [11104/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.195061 [11136/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.242094 [11168/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.209779 [11200/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.202064 [11232/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.222127 [11264/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.188260 [11296/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.181366 [11328/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.200751 [11360/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.166633 [11392/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.274091 [11424/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.220228 [11456/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.190726 [11488/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.267203 [11520/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.192319 [11552/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.137862 [11584/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.240537 [11616/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.194731 [11648/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.263322 [11680/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.243187 [11712/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.245530 [11744/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.124146 [11776/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.263128 [11808/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.179014 [11840/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.347486 [11872/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.160716 [11904/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.183134 [11936/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.224966 [11968/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.175730 [12000/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.216601 [12032/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.204837 [12064/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.235793 [12096/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.190376 [12128/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.186091 [12160/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.288791 [12192/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.202720 [12224/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.217059 [12256/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.262589 [12288/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.165220 [12320/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.159530 [12352/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.145644 [12384/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.245007 [12416/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.201984 [12448/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.244171 [12480/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.271882 [12512/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.216339 [12544/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.324662 [12576/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.232038 [12608/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.241010 [12640/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.340556 [12672/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.256851 [12704/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.387833 [12736/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.183579 [12768/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.211246 [12800/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.378077 [12832/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.315023 [12864/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.319278 [12896/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.227820 [12928/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.295771 [12960/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.203950 [12992/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.194704 [13024/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.323154 [13056/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.238219 [13088/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.267464 [13120/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.201145 [13152/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.196906 [13184/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.203677 [13216/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.263554 [13248/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.193726 [13280/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.198828 [13312/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.238259 [13344/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.308062 [13376/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.256295 [13408/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.333683 [13440/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.252994 [13472/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.268362 [13504/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.227659 [13536/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.284095 [13568/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.371464 [13600/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.203985 [13632/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.242077 [13664/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.240195 [13696/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.267135 [13728/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.194992 [13760/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.206432 [13792/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.191620 [13824/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.201569 [13856/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.323337 [13888/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.196177 [13920/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.261937 [13952/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.251222 [13984/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.219835 [14016/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.136323 [14048/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.182986 [14080/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.288272 [14112/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.270266 [14144/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.142259 [14176/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.171108 [14208/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.236305 [14240/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.174552 [14272/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.126953 [14304/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.237397 [14336/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.173166 [14368/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.212297 [14400/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.243950 [14432/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.271612 [14464/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.204678 [14496/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.181931 [14528/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.204002 [14560/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.243872 [14592/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.238708 [14624/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.164005 [14656/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.178919 [14688/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.212821 [14720/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.184249 [14752/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.138164 [14784/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.192193 [14816/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.223106 [14848/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.194680 [14880/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.152574 [14912/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.149429 [14944/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.177374 [14976/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.195126 [15008/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.212739 [15040/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.307032 [15072/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.139463 [15104/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.230076 [15136/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.188955 [15168/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.203653 [15200/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.256737 [15232/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.297091 [15264/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.216233 [15296/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.178064 [15328/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.238862 [15360/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.264962 [15392/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.264734 [15424/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.158293 [15456/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.186746 [15488/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.230235 [15520/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.202321 [15552/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.203030 [15584/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.138795 [15616/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.215941 [15648/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.258951 [15680/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.182581 [15712/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.232647 [15744/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.140191 [15776/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.239489 [15808/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.319775 [15840/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.214280 [15872/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.185741 [15904/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.121622 [15936/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.168076 [15968/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.206719 [16000/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.167060 [16032/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.225868 [16064/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.159247 [16096/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.188460 [16128/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.246527 [16160/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.262945 [16192/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.104999 [16224/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.209233 [16256/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.308121 [16288/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.153163 [16320/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.174002 [16352/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.296604 [16384/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.249524 [16416/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.250250 [16448/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.260052 [16480/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.223199 [16512/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.316725 [16544/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.183175 [16576/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.272867 [16608/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.296211 [16640/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.265999 [16672/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.231721 [16704/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.195683 [16736/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.213650 [16768/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.286673 [16800/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.263476 [16832/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.234497 [16864/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.150373 [16896/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.302963 [16928/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.176755 [16960/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.214825 [16992/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.199393 [17024/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.147685 [17056/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.193601 [17088/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.200121 [17120/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.212253 [17152/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.185324 [17184/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.379382 [17216/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.210509 [17248/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.190974 [17280/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.221303 [17312/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.236319 [17344/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.160153 [17376/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.199367 [17408/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.360566 [17440/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.268990 [17472/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.211084 [17504/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.182210 [17536/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.219046 [17568/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.170215 [17600/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.206004 [17632/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.254731 [17664/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.200516 [17696/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.158702 [17728/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.194522 [17760/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.345973 [17792/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.266225 [17824/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.279849 [17856/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.163907 [17888/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.174108 [17920/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.227016 [17952/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.171964 [17984/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.187405 [18016/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.197508 [18048/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.191537 [18080/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.204899 [18112/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.173480 [18144/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.186809 [18176/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.221090 [18208/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.137874 [18240/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.249297 [18272/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.165768 [18304/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.244533 [18336/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.239737 [18368/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.241896 [18400/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.189370 [18432/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.250210 [18464/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.216707 [18496/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.155109 [18528/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.159431 [18560/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.162089 [18592/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.274022 [18624/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.185149 [18656/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.233813 [18688/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.217691 [18720/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.212723 [18752/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.159225 [18784/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.185577 [18816/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.241097 [18848/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.161478 [18880/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.156518 [18912/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.139688 [18944/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.232818 [18976/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.145945 [19008/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.189076 [19040/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.226806 [19072/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.127456 [19104/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.266221 [19136/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.218490 [19168/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.097862 [19200/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.289249 [19232/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.147940 [19264/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.234521 [19296/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.247782 [19328/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.267581 [19360/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.256745 [19392/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.207757 [19424/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.139942 [19456/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.217612 [19488/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.285373 [19520/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.142084 [19552/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.223353 [19584/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.250938 [19616/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.302070 [19648/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.146938 [19680/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.316811 [19712/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.169301 [19744/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.127351 [19776/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.267716 [19808/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.194572 [19840/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.149062 [19872/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.262702 [19904/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.128083 [19936/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.245276 [19968/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.267508 [20000/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.227068 [20032/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.169155 [20064/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.120882 [20096/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.215232 [20128/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.199171 [20160/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.179092 [20192/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.222220 [20224/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.267710 [20256/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.197963 [20288/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.165684 [20320/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.298664 [20352/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.235801 [20384/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.144519 [20416/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.154535 [20448/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.259273 [20480/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.276344 [20512/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.325690 [20544/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.183084 [20576/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.184644 [20608/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.167353 [20640/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.347487 [20672/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.121812 [20704/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.207398 [20736/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.214216 [20768/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.220034 [20800/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.185389 [20832/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.262640 [20864/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.251275 [20896/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.330999 [20928/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.186302 [20960/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.168784 [20992/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.208204 [21024/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.298147 [21056/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.304344 [21088/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.212848 [21120/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.170375 [21152/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.211850 [21184/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.297419 [21216/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.203983 [21248/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.235733 [21280/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.263904 [21312/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.129099 [21344/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.228037 [21376/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.218525 [21408/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.129618 [21440/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.305955 [21472/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.219280 [21504/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.247484 [21536/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.273206 [21568/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.224433 [21600/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.259896 [21632/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.244686 [21664/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.212818 [21696/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.244019 [21728/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.217285 [21760/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.275797 [21792/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.240123 [21824/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.174152 [21856/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.157241 [21888/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.190499 [21920/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.190707 [21952/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.233388 [21984/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.180262 [22016/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.317126 [22048/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.220327 [22080/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.272859 [22112/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.299664 [22144/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.229596 [22176/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.248504 [22208/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.162580 [22240/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.231606 [22272/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.303593 [22304/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.172868 [22336/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.210748 [22368/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.167460 [22400/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.147383 [22432/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.164735 [22464/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.249734 [22496/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.276678 [22528/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.178584 [22560/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.212889 [22592/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.272670 [22624/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.189566 [22656/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.316943 [22688/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.167052 [22720/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.231092 [22752/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.196497 [22784/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.303791 [22816/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.263763 [22848/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.134698 [22880/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.140981 [22912/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.163067 [22944/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.223385 [22976/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.130540 [23008/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.192235 [23040/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.200063 [23072/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.250664 [23104/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.211039 [23136/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.302456 [23168/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.227083 [23200/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.268214 [23232/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.273416 [23264/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.234224 [23296/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.255429 [23328/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.200053 [23360/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.173686 [23392/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.348766 [23424/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.307109 [23456/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.222225 [23488/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.295277 [23520/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.238522 [23552/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.199476 [23584/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.257934 [23616/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.207991 [23648/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.336085 [23680/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.256725 [23712/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.193944 [23744/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.201569 [23776/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.346324 [23808/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.215513 [23840/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.213656 [23872/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.292028 [23904/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.193876 [23936/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.204560 [23968/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.191703 [24000/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.162407 [24032/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.242294 [24064/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.161897 [24096/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.166683 [24128/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.217233 [24160/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.194068 [24192/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.266831 [24224/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.210604 [24256/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.243981 [24288/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.371541 [24320/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.171825 [24352/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.238861 [24384/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.247933 [24416/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.226811 [24448/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.181774 [24480/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.201145 [24512/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.239167 [24544/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.211622 [24576/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.291568 [24608/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.250099 [24640/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.252220 [24672/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.145238 [24704/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.201767 [24736/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.198486 [24768/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.250616 [24800/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.204755 [24832/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.185662 [24864/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.295531 [24872/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.295531 [24872/24872]: : 778it [00:26, 29.80it/s]
Epoch 5, time=103.22s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.247508 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.107770 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.191895 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.185435 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.273653 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.197334 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.206585 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.168245 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.354287 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.315022 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.350037 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.152254 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.213365 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.248350 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.203811 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.196706 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.143516 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.249365 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.188079 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.263245 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.200564 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.317188 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.249287 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.241221 [ 768/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.186920 [ 800/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.233776 [ 832/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.150031 [ 864/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.287429 [ 896/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.190570 [ 928/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.220570 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.288047 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.179356 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.230236 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.276638 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.175973 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.214085 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.225440 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.222119 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.250723 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.223197 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.241433 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.177675 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.242223 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.317011 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.304138 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.245571 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.124739 [ 1504/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.175748 [ 1536/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.261992 [ 1568/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.203177 [ 1600/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.203423 [ 1632/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.226729 [ 1664/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.183304 [ 1696/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.174823 [ 1728/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.234233 [ 1760/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.130547 [ 1792/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.273105 [ 1824/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.162498 [ 1856/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.115217 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.159306 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.171839 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.202890 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.168796 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.287259 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.202491 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.126466 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.155766 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.246015 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.214302 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.180958 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.213087 [ 2272/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.268096 [ 2304/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.232260 [ 2336/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.134445 [ 2368/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.128096 [ 2400/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.169859 [ 2432/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.123063 [ 2464/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.301201 [ 2496/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.197454 [ 2528/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.121975 [ 2560/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.212761 [ 2592/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.225658 [ 2624/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.105566 [ 2656/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.233698 [ 2688/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.262342 [ 2720/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.192893 [ 2752/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.212394 [ 2784/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.158042 [ 2816/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.244831 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.192648 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.247019 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.255004 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.286649 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.157880 [ 3008/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.207636 [ 3040/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.169127 [ 3072/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.189640 [ 3104/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.257230 [ 3136/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.239012 [ 3168/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.144183 [ 3200/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.298920 [ 3232/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.208603 [ 3264/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.175511 [ 3296/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.203100 [ 3328/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.266310 [ 3360/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.144680 [ 3392/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.178288 [ 3424/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.225451 [ 3456/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.207763 [ 3488/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.189666 [ 3520/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.184755 [ 3552/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.174529 [ 3584/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.293467 [ 3616/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.155563 [ 3648/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.238449 [ 3680/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.322177 [ 3712/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.161467 [ 3744/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.267087 [ 3776/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.187396 [ 3808/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.176611 [ 3840/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.182544 [ 3872/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.193825 [ 3904/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.329341 [ 3936/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.202257 [ 3968/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.249214 [ 4000/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.238769 [ 4032/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.134370 [ 4064/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.238305 [ 4096/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.161641 [ 4128/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.239942 [ 4160/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.226567 [ 4192/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.264315 [ 4224/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.226197 [ 4256/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.230171 [ 4288/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.149618 [ 4320/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.141170 [ 4352/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.099565 [ 4384/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.261980 [ 4416/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.176185 [ 4448/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.229652 [ 4480/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.165650 [ 4512/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.145513 [ 4544/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.246008 [ 4576/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.122437 [ 4608/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.232757 [ 4640/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.270647 [ 4672/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.195059 [ 4704/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.138158 [ 4736/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.213085 [ 4768/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.290771 [ 4800/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.264968 [ 4832/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.171512 [ 4864/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.201805 [ 4896/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.126105 [ 4928/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.316081 [ 4960/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.137034 [ 4992/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.178077 [ 5024/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.200271 [ 5056/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.208012 [ 5088/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.215928 [ 5120/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.356353 [ 5152/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.250612 [ 5184/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.156720 [ 5216/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.255849 [ 5248/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.256303 [ 5280/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.232425 [ 5312/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.245643 [ 5344/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.242419 [ 5376/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.223038 [ 5408/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.209865 [ 5440/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.165906 [ 5472/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.225257 [ 5504/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.199320 [ 5536/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.135503 [ 5568/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.251455 [ 5600/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.115998 [ 5632/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.203420 [ 5664/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.148431 [ 5696/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.235961 [ 5728/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.206691 [ 5760/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.210224 [ 5792/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.183741 [ 5824/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.209713 [ 5856/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.173321 [ 5888/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.222689 [ 5920/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.201670 [ 5952/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.126265 [ 5984/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.134253 [ 6016/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.113076 [ 6048/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.190462 [ 6080/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.150892 [ 6112/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.226339 [ 6144/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.176824 [ 6176/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.235889 [ 6208/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.150273 [ 6240/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.143641 [ 6272/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.271235 [ 6304/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.190144 [ 6336/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.211342 [ 6368/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.167190 [ 6400/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.232733 [ 6432/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.223113 [ 6464/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.206491 [ 6496/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.154523 [ 6528/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.227253 [ 6560/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.262261 [ 6592/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.169113 [ 6624/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.165750 [ 6656/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.183133 [ 6688/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.178251 [ 6720/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.147637 [ 6752/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.152851 [ 6784/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.201335 [ 6816/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.153556 [ 6848/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.272862 [ 6880/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.242918 [ 6912/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.197100 [ 6944/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.217622 [ 6976/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.165381 [ 7008/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.139402 [ 7040/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.182072 [ 7072/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.114737 [ 7104/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.249631 [ 7136/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.170580 [ 7168/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.158931 [ 7200/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.110436 [ 7232/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.251782 [ 7264/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.185439 [ 7296/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.107795 [ 7328/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.217161 [ 7360/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.148459 [ 7392/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.210315 [ 7424/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.222556 [ 7456/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.172128 [ 7488/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.275211 [ 7520/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.331196 [ 7552/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.196378 [ 7584/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.137247 [ 7616/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.235294 [ 7648/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.362841 [ 7680/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.189168 [ 7712/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.181004 [ 7744/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.252733 [ 7776/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.213108 [ 7808/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.148658 [ 7840/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.190155 [ 7872/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.222807 [ 7904/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.202676 [ 7936/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.176224 [ 7968/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.172070 [ 8000/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.155767 [ 8032/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.165599 [ 8064/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.193346 [ 8096/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.185500 [ 8128/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.207330 [ 8160/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.177091 [ 8192/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.210450 [ 8224/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.162254 [ 8256/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.228665 [ 8288/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.292407 [ 8320/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.204844 [ 8352/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.229784 [ 8384/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.267839 [ 8416/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.225809 [ 8448/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.235837 [ 8480/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.137724 [ 8512/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.164164 [ 8544/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.196807 [ 8576/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.263497 [ 8608/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.270165 [ 8640/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.242788 [ 8672/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.216690 [ 8704/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.133095 [ 8736/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.165847 [ 8768/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.121086 [ 8800/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.186255 [ 8832/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.150729 [ 8864/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.193898 [ 8896/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.192652 [ 8928/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.219541 [ 8960/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.277546 [ 8992/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.169681 [ 9024/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.253011 [ 9056/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.177170 [ 9088/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.222629 [ 9120/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.108920 [ 9152/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.273092 [ 9184/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.154700 [ 9216/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.218985 [ 9248/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.220054 [ 9280/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.126910 [ 9312/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.210224 [ 9344/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.183356 [ 9376/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.199658 [ 9408/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.289492 [ 9440/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.254268 [ 9472/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.199278 [ 9504/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.233597 [ 9536/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.163353 [ 9568/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.249097 [ 9600/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.160562 [ 9632/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.140268 [ 9664/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.129684 [ 9696/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.156645 [ 9728/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.156258 [ 9760/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.159889 [ 9792/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.184656 [ 9824/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.148294 [ 9856/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.227532 [ 9888/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.233673 [ 9920/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.148888 [ 9952/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.209379 [ 9984/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.143813 [10016/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.186751 [10048/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.302387 [10080/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.202299 [10112/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.148842 [10144/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.236497 [10176/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.177302 [10208/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.299292 [10240/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.205802 [10272/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.371802 [10304/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.197504 [10336/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.210421 [10368/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.229371 [10400/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.240836 [10432/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.172733 [10464/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.227181 [10496/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.158819 [10528/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.215702 [10560/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.240556 [10592/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.220990 [10624/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.185463 [10656/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.315996 [10688/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.203269 [10720/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.210756 [10752/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.224285 [10784/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.230184 [10816/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.148410 [10848/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.184887 [10880/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.217372 [10912/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.270042 [10944/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.267989 [10976/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.211244 [11008/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.310693 [11040/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.235077 [11072/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.169195 [11104/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.190278 [11136/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.247734 [11168/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.196276 [11200/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.188102 [11232/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.221072 [11264/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.171037 [11296/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.184853 [11328/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.176064 [11360/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.173173 [11392/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.274672 [11424/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.212240 [11456/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.168762 [11488/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.249401 [11520/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.197696 [11552/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.157797 [11584/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.230841 [11616/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.178271 [11648/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.247963 [11680/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.244281 [11712/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.233837 [11744/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.114347 [11776/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.302478 [11808/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.157859 [11840/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.372856 [11872/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.149265 [11904/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.201164 [11936/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.182170 [11968/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.175074 [12000/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.200924 [12032/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.181783 [12064/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.206998 [12096/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.176619 [12128/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.185017 [12160/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.272565 [12192/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.212487 [12224/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.203197 [12256/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.244044 [12288/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.159910 [12320/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.160865 [12352/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.125429 [12384/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.198813 [12416/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.172970 [12448/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.213912 [12480/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.248921 [12512/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.176658 [12544/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.305529 [12576/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.219589 [12608/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.211535 [12640/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.365839 [12672/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.198785 [12704/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.296669 [12736/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.158382 [12768/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.178897 [12800/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.285128 [12832/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.267766 [12864/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.283559 [12896/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.188823 [12928/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.217540 [12960/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.172569 [12992/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.177102 [13024/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.242473 [13056/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.199408 [13088/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.255144 [13120/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.171487 [13152/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.185129 [13184/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.185782 [13216/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.236279 [13248/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.133608 [13280/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.202357 [13312/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.237526 [13344/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.261440 [13376/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.208343 [13408/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.280436 [13440/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.186130 [13472/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.207987 [13504/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.209745 [13536/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.251543 [13568/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.352739 [13600/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.167130 [13632/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.208868 [13664/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.205086 [13696/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.236874 [13728/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.160045 [13760/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.158523 [13792/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.169635 [13824/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.182511 [13856/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.287796 [13888/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.179064 [13920/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.253593 [13952/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.244165 [13984/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.197946 [14016/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.134705 [14048/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.171018 [14080/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.269732 [14112/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.262816 [14144/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.132004 [14176/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.157615 [14208/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.207860 [14240/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.152994 [14272/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.126422 [14304/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.221026 [14336/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.170114 [14368/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.196365 [14400/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.218124 [14432/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.254258 [14464/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.195850 [14496/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.176781 [14528/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.190524 [14560/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.244443 [14592/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.214383 [14624/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.149467 [14656/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.171658 [14688/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.196224 [14720/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.169640 [14752/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.129997 [14784/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.183199 [14816/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.213874 [14848/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.187771 [14880/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.146269 [14912/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.140021 [14944/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.160234 [14976/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.183723 [15008/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.204388 [15040/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.295938 [15072/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.130085 [15104/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.222190 [15136/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.179690 [15168/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.202846 [15200/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.212662 [15232/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.268404 [15264/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.206478 [15296/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.173849 [15328/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.217304 [15360/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.259610 [15392/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.217900 [15424/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.158644 [15456/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.168078 [15488/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.220365 [15520/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.184349 [15552/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.220990 [15584/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.148588 [15616/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.201541 [15648/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.247374 [15680/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.181644 [15712/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.203527 [15744/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.130473 [15776/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.223455 [15808/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.312939 [15840/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.183315 [15872/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.174071 [15904/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.103600 [15936/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.134437 [15968/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.181711 [16000/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.159613 [16032/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.220307 [16064/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.153396 [16096/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.192372 [16128/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.225377 [16160/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.244734 [16192/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.113850 [16224/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.188969 [16256/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.288497 [16288/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.141324 [16320/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.166621 [16352/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.249977 [16384/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.236134 [16416/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.184115 [16448/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.235798 [16480/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.204944 [16512/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.250042 [16544/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.184391 [16576/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.272976 [16608/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.271391 [16640/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.241098 [16672/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.214935 [16704/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.192067 [16736/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.183790 [16768/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.259282 [16800/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.242484 [16832/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.236282 [16864/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.141057 [16896/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.286503 [16928/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.157681 [16960/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.195680 [16992/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.186801 [17024/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.150122 [17056/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.175086 [17088/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.188172 [17120/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.197018 [17152/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.179202 [17184/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.351348 [17216/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.188835 [17248/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.172498 [17280/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.208911 [17312/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.212363 [17344/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.151236 [17376/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.182299 [17408/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.350897 [17440/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.250088 [17472/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.176898 [17504/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.171316 [17536/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.198807 [17568/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.169363 [17600/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.184010 [17632/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.226215 [17664/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.177780 [17696/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.133549 [17728/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.180349 [17760/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.316981 [17792/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.264614 [17824/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.279382 [17856/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.143900 [17888/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.160202 [17920/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.232302 [17952/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.160265 [17984/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.167604 [18016/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.177793 [18048/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.188289 [18080/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.198893 [18112/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.186278 [18144/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.172507 [18176/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.204215 [18208/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.130652 [18240/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.201629 [18272/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.147360 [18304/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.237929 [18336/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.250633 [18368/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.219960 [18400/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.177439 [18432/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.232267 [18464/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.202983 [18496/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.142400 [18528/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.138345 [18560/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.155356 [18592/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.243271 [18624/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.184351 [18656/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.212904 [18688/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.213517 [18720/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.194587 [18752/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.140571 [18784/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.161093 [18816/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.239187 [18848/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.155477 [18880/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.158432 [18912/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.129022 [18944/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.231216 [18976/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.133290 [19008/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.169377 [19040/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.206397 [19072/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.119073 [19104/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.262641 [19136/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.207476 [19168/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.102448 [19200/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.286786 [19232/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.128933 [19264/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.225220 [19296/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.247869 [19328/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.245628 [19360/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.250395 [19392/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.213789 [19424/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.142537 [19456/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.219244 [19488/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.265136 [19520/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.138637 [19552/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.214988 [19584/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.242010 [19616/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.307417 [19648/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.140510 [19680/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.312215 [19712/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.156168 [19744/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.123452 [19776/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.265549 [19808/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.189731 [19840/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.146729 [19872/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.243617 [19904/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.115797 [19936/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.238741 [19968/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.219978 [20000/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.225330 [20032/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.181529 [20064/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.114161 [20096/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.215235 [20128/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.226567 [20160/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.163598 [20192/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.226241 [20224/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.261433 [20256/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.176619 [20288/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.152696 [20320/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.279440 [20352/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.201909 [20384/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.123842 [20416/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.151668 [20448/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.269098 [20480/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.284852 [20512/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.297632 [20544/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.184770 [20576/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.181546 [20608/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.160485 [20640/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.316723 [20672/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.127855 [20704/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.186993 [20736/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.203413 [20768/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.217267 [20800/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.170487 [20832/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.257547 [20864/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.251880 [20896/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.314565 [20928/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.180826 [20960/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.183479 [20992/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.197496 [21024/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.258218 [21056/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.306564 [21088/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.177421 [21120/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.149210 [21152/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.203659 [21184/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.262192 [21216/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.191984 [21248/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.217407 [21280/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.218462 [21312/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.115107 [21344/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.206755 [21376/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.166911 [21408/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.115886 [21440/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.264001 [21472/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.189434 [21504/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.238090 [21536/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.264603 [21568/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.216812 [21600/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.236615 [21632/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.226862 [21664/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.191937 [21696/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.217208 [21728/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.183312 [21760/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.252311 [21792/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.212096 [21824/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.168643 [21856/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.129594 [21888/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.180189 [21920/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.176878 [21952/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.225498 [21984/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.177756 [22016/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.274748 [22048/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.207351 [22080/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.239114 [22112/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.302270 [22144/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.195189 [22176/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.190007 [22208/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.160698 [22240/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.224011 [22272/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.260529 [22304/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.144000 [22336/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.195128 [22368/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.157393 [22400/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.144305 [22432/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.148379 [22464/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.237341 [22496/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.252153 [22528/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.175248 [22560/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.213259 [22592/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.257127 [22624/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.172523 [22656/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.302998 [22688/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.147909 [22720/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.224967 [22752/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.184078 [22784/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.277167 [22816/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.246037 [22848/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.122506 [22880/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.127886 [22912/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.158597 [22944/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.190603 [22976/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.135003 [23008/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.161976 [23040/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.163299 [23072/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.199570 [23104/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.185233 [23136/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.261228 [23168/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.230479 [23200/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.244232 [23232/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.219994 [23264/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.198829 [23296/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.218773 [23328/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.163774 [23360/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.157422 [23392/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.290444 [23424/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.255668 [23456/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.190272 [23488/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.262361 [23520/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.244520 [23552/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.175543 [23584/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.164477 [23616/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.130075 [23648/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.311188 [23680/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.155061 [23712/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.193287 [23744/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.155188 [23776/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.321149 [23808/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.219336 [23840/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.193887 [23872/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.258977 [23904/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.163867 [23936/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.166623 [23968/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.145518 [24000/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.129750 [24032/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.209367 [24064/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.147102 [24096/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.148828 [24128/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.196962 [24160/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.175421 [24192/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.227778 [24224/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.164391 [24256/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.173856 [24288/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.340976 [24320/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.142517 [24352/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.222114 [24384/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.174540 [24416/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.205554 [24448/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.149958 [24480/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.138430 [24512/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.218126 [24544/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.158507 [24576/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.229717 [24608/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.187091 [24640/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.182804 [24672/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.124053 [24704/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.194424 [24736/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.161039 [24768/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.216243 [24800/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.163729 [24832/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.171278 [24864/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.279357 [24872/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.279357 [24872/24872]: : 778it [00:26, 29.45it/s]
-------------------------------
LR=0.0001, batch_size=64
-------------------------------
Epoch 1, time=129.64s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.133366 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.326509 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.325199 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.355387 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.293687 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.308483 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.308965 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.203022 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.214851 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.294941 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.307943 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.259772 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.191066 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.210546 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.212936 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.203774 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.282132 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.177345 [ 1152/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.223716 [ 1216/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.252527 [ 1280/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.219695 [ 1344/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.262135 [ 1408/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.282792 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.141393 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.218497 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.240207 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.184582 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.176843 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.194721 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.131786 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.197942 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.208849 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.182575 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.206571 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.184141 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.214068 [ 2304/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.178484 [ 2368/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.135989 [ 2432/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.178147 [ 2496/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.148615 [ 2560/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.206434 [ 2624/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.163136 [ 2688/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.193882 [ 2752/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.155130 [ 2816/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.197887 [ 2880/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.214459 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.204185 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.164303 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.190950 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.173251 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.236935 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.168717 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.177948 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.184629 [ 3456/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.177435 [ 3520/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.151363 [ 3584/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.199391 [ 3648/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.255873 [ 3712/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.194871 [ 3776/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.169156 [ 3840/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.169487 [ 3904/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.240689 [ 3968/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.230403 [ 4032/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.159089 [ 4096/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.181625 [ 4160/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.221735 [ 4224/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.227257 [ 4288/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.131202 [ 4352/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.158625 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.190837 [ 4480/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.146930 [ 4544/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.163294 [ 4608/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.213105 [ 4672/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.150272 [ 4736/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.240600 [ 4800/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.201634 [ 4864/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.141715 [ 4928/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.180824 [ 4992/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.168242 [ 5056/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.187154 [ 5120/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.271973 [ 5184/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.193207 [ 5248/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.225593 [ 5312/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.213886 [ 5376/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.186173 [ 5440/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.179756 [ 5504/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.156615 [ 5568/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.170035 [ 5632/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.161685 [ 5696/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.193922 [ 5760/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.174961 [ 5824/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.175302 [ 5888/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.186055 [ 5952/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.121212 [ 6016/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.136798 [ 6080/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.166369 [ 6144/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.188559 [ 6208/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.127903 [ 6272/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.201999 [ 6336/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.178250 [ 6400/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.222933 [ 6464/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.165756 [ 6528/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.220571 [ 6592/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.151626 [ 6656/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.166558 [ 6720/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.123148 [ 6784/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.150391 [ 6848/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.214993 [ 6912/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.174884 [ 6976/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.132659 [ 7040/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.120999 [ 7104/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.177120 [ 7168/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.123543 [ 7232/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.200391 [ 7296/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.137669 [ 7360/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.141243 [ 7424/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.163485 [ 7488/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.273054 [ 7552/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.135979 [ 7616/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.276608 [ 7680/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.159490 [ 7744/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.209934 [ 7808/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.136751 [ 7872/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.162265 [ 7936/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.132551 [ 8000/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.134825 [ 8064/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.158195 [ 8128/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.164824 [ 8192/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.173572 [ 8256/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.225060 [ 8320/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.170527 [ 8384/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.218285 [ 8448/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.150272 [ 8512/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.133625 [ 8576/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.238535 [ 8640/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.189558 [ 8704/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.135930 [ 8768/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.137357 [ 8832/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.166591 [ 8896/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.175790 [ 8960/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.202972 [ 9024/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.178389 [ 9088/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.140743 [ 9152/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.174452 [ 9216/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.187299 [ 9280/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.155173 [ 9344/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.160811 [ 9408/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.210892 [ 9472/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.156258 [ 9536/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.174203 [ 9600/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.134615 [ 9664/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.126607 [ 9728/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.139438 [ 9792/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.144796 [ 9856/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.196952 [ 9920/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.152886 [ 9984/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.137573 [10048/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.186161 [10112/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.173481 [10176/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.182654 [10240/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.229356 [10304/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.175341 [10368/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.180665 [10432/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.142846 [10496/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.161497 [10560/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.189039 [10624/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.239473 [10688/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.184373 [10752/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.178806 [10816/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.137413 [10880/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.171836 [10944/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.180752 [11008/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.210429 [11072/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.148429 [11136/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.209502 [11200/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.194127 [11264/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.159846 [11328/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.149638 [11392/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.211487 [11456/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.219176 [11520/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.135006 [11584/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.193055 [11648/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.212627 [11712/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.150065 [11776/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.189432 [11840/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.179838 [11904/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.168510 [11968/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.173489 [12032/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.158893 [12096/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.157273 [12160/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.197268 [12224/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.180402 [12288/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.141144 [12352/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.142577 [12416/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.170030 [12480/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.186192 [12544/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.216878 [12608/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.230309 [12672/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.213298 [12736/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.140170 [12800/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.212651 [12864/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.199103 [12928/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.158151 [12992/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.187512 [13056/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.173922 [13120/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.156914 [13184/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.167743 [13248/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.134397 [13312/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.188661 [13376/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.216547 [13440/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.174825 [13504/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.196487 [13568/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.255848 [13632/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.186590 [13696/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.162368 [13760/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.156290 [13824/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.217423 [13888/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.200581 [13952/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.173493 [14016/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.133542 [14080/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.244333 [14144/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.111312 [14208/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.152576 [14272/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.142898 [14336/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.171330 [14400/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.202308 [14464/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.142568 [14528/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.166106 [14592/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.149810 [14656/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.175661 [14720/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.130899 [14784/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.190066 [14848/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.165681 [14912/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.149854 [14976/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.191838 [15040/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.163529 [15104/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.184118 [15168/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.179958 [15232/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.206280 [15296/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.157448 [15360/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.207729 [15424/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.137487 [15488/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.171686 [15552/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.187326 [15616/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.191365 [15680/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.146203 [15744/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.160742 [15808/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.207917 [15872/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.144751 [15936/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.147069 [16000/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.171630 [16064/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.149827 [16128/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.215683 [16192/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.139163 [16256/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.177713 [16320/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.177380 [16384/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.189262 [16448/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.190021 [16512/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.188711 [16576/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.237488 [16640/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.207170 [16704/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.169288 [16768/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.218069 [16832/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.169668 [16896/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.191287 [16960/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.160681 [17024/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.128080 [17088/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.170648 [17152/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.239830 [17216/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.169874 [17280/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.193673 [17344/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.143785 [17408/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.246633 [17472/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.159175 [17536/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.167990 [17600/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.153034 [17664/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.128885 [17728/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.188206 [17792/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.228901 [17856/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.136013 [17920/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.171578 [17984/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.135050 [18048/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.149606 [18112/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.140483 [18176/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.135811 [18240/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.161396 [18304/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.173972 [18368/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.166932 [18432/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.195368 [18496/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.113958 [18560/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.163612 [18624/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.179606 [18688/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.165775 [18752/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.129534 [18816/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.145984 [18880/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.115991 [18944/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.135087 [19008/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.149978 [19072/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.142073 [19136/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.129902 [19200/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.169214 [19264/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.184922 [19328/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.220730 [19392/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.117113 [19456/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.196686 [19520/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.128044 [19584/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.194744 [19648/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.175873 [19712/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.110328 [19776/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.165426 [19840/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.157409 [19904/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.153667 [19968/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.160936 [20032/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.108374 [20096/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.176724 [20160/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.141364 [20224/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.157007 [20288/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.175053 [20352/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.112780 [20416/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.166766 [20480/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.219687 [20544/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.140653 [20608/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.184016 [20672/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.130835 [20736/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.167980 [20800/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.189809 [20864/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.212056 [20928/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.117394 [20992/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.173080 [21056/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.186306 [21120/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.140258 [21184/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.190644 [21248/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.177605 [21312/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.121780 [21376/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.128702 [21440/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.180445 [21504/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.146468 [21568/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.188789 [21632/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.153426 [21696/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.184967 [21760/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.199600 [21824/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.126863 [21888/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.147453 [21952/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.160082 [22016/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.209494 [22080/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.221971 [22144/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.175902 [22208/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.160886 [22272/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.164972 [22336/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.146695 [22400/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.123840 [22464/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.189016 [22528/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.175538 [22592/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.170306 [22656/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.188685 [22720/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.157568 [22784/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.184678 [22848/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.136545 [22912/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.140964 [22976/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.125604 [23040/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.164640 [23104/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.189047 [23168/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.218105 [23232/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.185461 [23296/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.179036 [23360/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.190118 [23424/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.189230 [23488/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.208049 [23552/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.148944 [23616/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.195873 [23680/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.137984 [23744/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.189436 [23808/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.199210 [23872/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.218320 [23936/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.142636 [24000/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.174528 [24064/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.134548 [24128/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.157875 [24192/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.212446 [24256/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.256906 [24320/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.157928 [24384/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.209396 [24448/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.127353 [24512/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.171139 [24576/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.179462 [24640/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.141369 [24704/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.190760 [24768/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.182720 [24832/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.192798 [24872/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.192798 [24872/24872]: : 389it [00:17, 22.73it/s]
Epoch 2, time=146.75s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.153127 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.139508 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.175046 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.161584 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.247147 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.211305 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.198388 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.154755 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.180241 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.172434 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.187744 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.198697 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.164316 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.172504 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.168386 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.149413 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.219847 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.144735 [ 1152/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.158594 [ 1216/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.190114 [ 1280/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.169058 [ 1344/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.219379 [ 1408/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.222583 [ 1472/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.120177 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.190531 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.188300 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.140966 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.162330 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.162021 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.110977 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.155462 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.176826 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.144904 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.189658 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.143212 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.178990 [ 2304/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.157189 [ 2368/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.111909 [ 2432/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.144637 [ 2496/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.125878 [ 2560/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.166497 [ 2624/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.140939 [ 2688/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.174989 [ 2752/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.121717 [ 2816/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.176149 [ 2880/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.163736 [ 2944/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.158127 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.121892 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.159129 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.158165 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.203688 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.137229 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.150478 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.171562 [ 3456/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.156247 [ 3520/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.120609 [ 3584/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.166549 [ 3648/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.216781 [ 3712/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.165652 [ 3776/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.167092 [ 3840/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.138901 [ 3904/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.204779 [ 3968/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.203013 [ 4032/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.134880 [ 4096/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.158663 [ 4160/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.208128 [ 4224/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.213989 [ 4288/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.117230 [ 4352/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.143975 [ 4416/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.173899 [ 4480/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.130009 [ 4544/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.139001 [ 4608/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.169583 [ 4672/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.133356 [ 4736/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.221562 [ 4800/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.158587 [ 4864/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.119094 [ 4928/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.146199 [ 4992/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.133218 [ 5056/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.158833 [ 5120/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.236753 [ 5184/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.177883 [ 5248/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.186966 [ 5312/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.200773 [ 5376/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.157111 [ 5440/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.174470 [ 5504/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.163077 [ 5568/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.164969 [ 5632/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.151903 [ 5696/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.167498 [ 5760/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.149744 [ 5824/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.164182 [ 5888/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.137516 [ 5952/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.119027 [ 6016/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.130808 [ 6080/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.142099 [ 6144/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.170916 [ 6208/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.124619 [ 6272/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.174905 [ 6336/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.171572 [ 6400/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.207616 [ 6464/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.157898 [ 6528/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.215106 [ 6592/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.130360 [ 6656/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.155118 [ 6720/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.116212 [ 6784/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.131683 [ 6848/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.186417 [ 6912/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.156351 [ 6976/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.123992 [ 7040/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.110771 [ 7104/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.167088 [ 7168/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.109063 [ 7232/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.179579 [ 7296/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.133720 [ 7360/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.127362 [ 7424/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.160118 [ 7488/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.232772 [ 7552/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.148088 [ 7616/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.243491 [ 7680/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.158968 [ 7744/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.190201 [ 7808/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.125625 [ 7872/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.147102 [ 7936/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.119913 [ 8000/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.111235 [ 8064/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.125626 [ 8128/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.163835 [ 8192/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.154150 [ 8256/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.222695 [ 8320/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.157577 [ 8384/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.184876 [ 8448/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.129264 [ 8512/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.124867 [ 8576/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.198023 [ 8640/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.165810 [ 8704/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.120222 [ 8768/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.135000 [ 8832/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.148297 [ 8896/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.162105 [ 8960/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.197082 [ 9024/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.159731 [ 9088/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.131613 [ 9152/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.156532 [ 9216/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.181093 [ 9280/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.147347 [ 9344/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.148276 [ 9408/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.213498 [ 9472/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.135637 [ 9536/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.157520 [ 9600/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.122931 [ 9664/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.118930 [ 9728/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.126739 [ 9792/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.127244 [ 9856/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.184343 [ 9920/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.133016 [ 9984/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.115161 [10048/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.173453 [10112/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.172230 [10176/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.169308 [10240/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.203515 [10304/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.152574 [10368/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.171364 [10432/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.150459 [10496/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.134896 [10560/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.166158 [10624/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.222076 [10688/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.161491 [10752/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.150307 [10816/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.110211 [10880/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.172832 [10944/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.147228 [11008/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.191407 [11072/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.132294 [11136/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.187994 [11200/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.182377 [11264/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.124875 [11328/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.128042 [11392/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.189690 [11456/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.176962 [11520/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.114304 [11584/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.154994 [11648/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.173673 [11712/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.131340 [11776/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.141511 [11840/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.155784 [11904/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.154201 [11968/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.128197 [12032/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.142880 [12096/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.121996 [12160/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.173509 [12224/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.157994 [12288/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.113938 [12352/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.127031 [12416/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.143425 [12480/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.169611 [12544/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.198466 [12608/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.207992 [12672/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.196928 [12736/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.129021 [12800/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.189860 [12864/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.187040 [12928/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.133553 [12992/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.174734 [13056/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.159039 [13120/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.142490 [13184/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.164245 [13248/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.107537 [13312/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.185548 [13376/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.188764 [13440/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.180918 [13504/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.177975 [13568/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.244117 [13632/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.191490 [13696/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.148172 [13760/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.151967 [13824/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.217882 [13888/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.182348 [13952/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.171058 [14016/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.127825 [14080/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.217544 [14144/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.114157 [14208/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.150593 [14272/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.134807 [14336/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.160778 [14400/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.203829 [14464/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.137679 [14528/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.150763 [14592/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.142343 [14656/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.180275 [14720/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.132265 [14784/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.185787 [14848/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.164606 [14912/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.136768 [14976/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.185903 [15040/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.153535 [15104/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.170276 [15168/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.166898 [15232/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.187526 [15296/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.148430 [15360/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.184593 [15424/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.118113 [15488/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.153959 [15552/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.169823 [15616/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.163167 [15680/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.127574 [15744/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.146627 [15808/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.198272 [15872/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.119022 [15936/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.130621 [16000/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.147472 [16064/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.135757 [16128/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.186232 [16192/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.119497 [16256/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.152141 [16320/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.166705 [16384/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.153089 [16448/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.169481 [16512/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.153984 [16576/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.207960 [16640/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.162292 [16704/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.143875 [16768/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.180700 [16832/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.163965 [16896/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.172932 [16960/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.144628 [17024/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.110916 [17088/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.155694 [17152/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.215729 [17216/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.139817 [17280/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.180204 [17344/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.131223 [17408/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.240944 [17472/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.137300 [17536/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.154541 [17600/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.138524 [17664/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.111773 [17728/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.168397 [17792/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.183923 [17856/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.114207 [17920/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.149790 [17984/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.124857 [18048/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.128253 [18112/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.120373 [18176/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.115717 [18240/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.139280 [18304/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.154039 [18368/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.148768 [18432/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.167685 [18496/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.099908 [18560/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.143165 [18624/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.148479 [18688/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.140793 [18752/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.112152 [18816/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.129147 [18880/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.098721 [18944/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.120619 [19008/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.125317 [19072/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.127776 [19136/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.113285 [19200/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.161967 [19264/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.154757 [19328/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.203927 [19392/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.107234 [19456/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.182854 [19520/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.117301 [19584/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.174213 [19648/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.166859 [19712/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.098775 [19776/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.142947 [19840/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.140251 [19904/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.141602 [19968/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.141291 [20032/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.101481 [20096/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.152086 [20160/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.126075 [20224/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.141580 [20288/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.157709 [20352/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.101134 [20416/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.137804 [20480/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.196426 [20544/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.130069 [20608/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.172093 [20672/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.115589 [20736/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.150067 [20800/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.179186 [20864/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.192129 [20928/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.105787 [20992/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.157286 [21056/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.167446 [21120/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.126381 [21184/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.178681 [21248/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.160904 [21312/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.106307 [21376/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.116328 [21440/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.170325 [21504/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.122338 [21568/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.175800 [21632/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.132517 [21696/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.166068 [21760/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.182935 [21824/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.119629 [21888/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.136303 [21952/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.151746 [22016/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.198420 [22080/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.203093 [22144/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.145727 [22208/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.148666 [22272/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.144187 [22336/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.132944 [22400/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.111073 [22464/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.167830 [22528/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.152749 [22592/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.163565 [22656/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.182216 [22720/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.132501 [22784/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.162875 [22848/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.109640 [22912/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.121576 [22976/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.106244 [23040/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.127562 [23104/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.175882 [23168/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.184688 [23232/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.169939 [23296/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.158899 [23360/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.162949 [23424/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.166768 [23488/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.171726 [23552/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.142870 [23616/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.172641 [23680/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.128357 [23744/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.179956 [23808/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.182724 [23872/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.201997 [23936/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.131861 [24000/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.174904 [24064/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.125611 [24128/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.150098 [24192/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.209089 [24256/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.223271 [24320/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.156775 [24384/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.191948 [24448/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.109328 [24512/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.155200 [24576/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.149295 [24640/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.116538 [24704/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.158871 [24768/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.152416 [24832/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.173006 [24872/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.173006 [24872/24872]: : 389it [00:16, 23.36it/s]
Epoch 3, time=163.41s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.128567 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.130031 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.169425 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.134189 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.214461 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.162720 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.164600 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.143451 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.148668 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.153749 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.172608 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.167979 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.142618 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.153021 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.153692 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.130351 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.169204 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.130264 [ 1152/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.139379 [ 1216/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.156771 [ 1280/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.134147 [ 1344/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.208397 [ 1408/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.181793 [ 1472/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.105362 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.171905 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.172748 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.123863 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.149150 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.135181 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.106245 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.129181 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.159087 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.149532 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.156249 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.124402 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.169772 [ 2304/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.139311 [ 2368/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.101386 [ 2432/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.126165 [ 2496/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.112761 [ 2560/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.153434 [ 2624/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.129690 [ 2688/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.145254 [ 2752/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.114850 [ 2816/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.163931 [ 2880/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.155113 [ 2944/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.138496 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.119365 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.144327 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.134504 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.182582 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.122708 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.157618 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.157441 [ 3456/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.154547 [ 3520/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.113507 [ 3584/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.147706 [ 3648/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.205235 [ 3712/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.167800 [ 3776/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.160051 [ 3840/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.122474 [ 3904/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.177568 [ 3968/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.180463 [ 4032/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.132042 [ 4096/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.140848 [ 4160/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.189176 [ 4224/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.156976 [ 4288/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.099151 [ 4352/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.144056 [ 4416/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.164788 [ 4480/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.133504 [ 4544/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.137223 [ 4608/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.161772 [ 4672/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.120464 [ 4736/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.166105 [ 4800/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.126030 [ 4864/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.106341 [ 4928/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.129042 [ 4992/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.132961 [ 5056/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.146263 [ 5120/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.259194 [ 5184/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.149937 [ 5248/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.185214 [ 5312/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.168053 [ 5376/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.157313 [ 5440/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.142178 [ 5504/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.138589 [ 5568/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.154732 [ 5632/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.145531 [ 5696/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.166824 [ 5760/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.170775 [ 5824/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.186273 [ 5888/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.129159 [ 5952/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.111487 [ 6016/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.121206 [ 6080/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.136584 [ 6144/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.176544 [ 6208/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.141064 [ 6272/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.176542 [ 6336/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.145625 [ 6400/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.184241 [ 6464/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.149607 [ 6528/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.198038 [ 6592/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.127658 [ 6656/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.115881 [ 6720/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.118514 [ 6784/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.163754 [ 6848/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.176315 [ 6912/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.171017 [ 6976/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.108714 [ 7040/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.104299 [ 7104/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.167426 [ 7168/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.097813 [ 7232/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.168274 [ 7296/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.142704 [ 7360/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.125893 [ 7424/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.149907 [ 7488/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.246342 [ 7552/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.123352 [ 7616/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.202357 [ 7680/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.134500 [ 7744/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.178272 [ 7808/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.133571 [ 7872/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.135341 [ 7936/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.126822 [ 8000/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.103333 [ 8064/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.112139 [ 8128/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.145017 [ 8192/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.129808 [ 8256/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.207208 [ 8320/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.129251 [ 8384/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.152072 [ 8448/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.116166 [ 8512/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.111605 [ 8576/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.204963 [ 8640/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.138401 [ 8704/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.108321 [ 8768/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.111708 [ 8832/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.147429 [ 8896/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.141054 [ 8960/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.177374 [ 9024/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.144821 [ 9088/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.116917 [ 9152/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.139516 [ 9216/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.179714 [ 9280/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.129068 [ 9344/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.137191 [ 9408/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.187103 [ 9472/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.114260 [ 9536/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.162461 [ 9600/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.114314 [ 9664/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.113200 [ 9728/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.128327 [ 9792/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.112513 [ 9856/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.205296 [ 9920/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.126496 [ 9984/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.108267 [10048/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.207468 [10112/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.158521 [10176/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.167879 [10240/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.201560 [10304/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.194922 [10368/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.186186 [10432/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.134467 [10496/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.176274 [10560/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.175340 [10624/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.218573 [10688/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.212181 [10752/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.152056 [10816/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.139324 [10880/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.206736 [10944/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.169147 [11008/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.198010 [11072/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.158362 [11136/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.196037 [11200/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.217747 [11264/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.143332 [11328/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.144943 [11392/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.204412 [11456/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.172692 [11520/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.129608 [11584/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.155123 [11648/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.166769 [11712/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.156810 [11776/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.138762 [11840/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.147027 [11904/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.159088 [11968/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.118207 [12032/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.140527 [12096/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.121596 [12160/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.170926 [12224/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.160071 [12288/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.104422 [12352/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.114699 [12416/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.134730 [12480/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.159328 [12544/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.178308 [12608/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.200683 [12672/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.184814 [12736/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.123687 [12800/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.161822 [12864/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.159905 [12928/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.123795 [12992/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.156308 [13056/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.141329 [13120/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.140347 [13184/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.138539 [13248/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.094336 [13312/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.167651 [13376/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.176404 [13440/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.139656 [13504/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.164042 [13568/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.209585 [13632/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.142585 [13696/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.134354 [13760/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.114380 [13824/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.166664 [13888/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.164756 [13952/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.127618 [14016/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.121312 [14080/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.179100 [14144/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.100870 [14208/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.103787 [14272/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.118762 [14336/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.119588 [14400/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.161557 [14464/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.112899 [14528/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.153544 [14592/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.110306 [14656/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.151225 [14720/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.101581 [14784/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.154017 [14848/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.135661 [14912/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.104329 [14976/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.134446 [15040/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.130665 [15104/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.166650 [15168/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.142895 [15232/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.166311 [15296/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.127112 [15360/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.163366 [15424/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.106538 [15488/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.130023 [15552/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.148083 [15616/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.150037 [15680/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.136284 [15744/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.133891 [15808/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.168276 [15872/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.105799 [15936/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.118251 [16000/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.132523 [16064/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.118919 [16128/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.159558 [16192/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.105014 [16256/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.139289 [16320/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.151410 [16384/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.142872 [16448/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.159288 [16512/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.145891 [16576/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.201459 [16640/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.157354 [16704/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.135787 [16768/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.182502 [16832/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.149721 [16896/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.172993 [16960/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.130358 [17024/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.109810 [17088/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.156231 [17152/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.156829 [17216/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.142607 [17280/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.173903 [17344/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.129783 [17408/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.226474 [17472/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.114599 [17536/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.148505 [17600/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.126269 [17664/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.097337 [17728/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.158161 [17792/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.167917 [17856/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.102535 [17920/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.134460 [17984/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.113225 [18048/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.125332 [18112/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.123355 [18176/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.113760 [18240/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.120761 [18304/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.160967 [18368/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.135199 [18432/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.152228 [18496/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.096324 [18560/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.145969 [18624/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.140102 [18688/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.131109 [18752/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.119087 [18816/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.121416 [18880/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.094087 [18944/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.111511 [19008/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.127839 [19072/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.125457 [19136/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.105385 [19200/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.157277 [19264/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.144635 [19328/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.193414 [19392/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.109342 [19456/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.192913 [19520/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.116963 [19584/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.168445 [19648/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.146978 [19712/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.094890 [19776/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.136153 [19840/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.133880 [19904/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.140193 [19968/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.127459 [20032/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.109475 [20096/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.131493 [20160/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.118529 [20224/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.138263 [20288/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.163749 [20352/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.116009 [20416/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.117084 [20480/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.182817 [20544/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.136970 [20608/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.164798 [20672/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.112216 [20736/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.147578 [20800/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.175785 [20864/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.193150 [20928/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.097465 [20992/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.147134 [21056/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.155635 [21120/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.119246 [21184/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.165648 [21248/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.149052 [21312/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.097276 [21376/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.114593 [21440/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.168997 [21504/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.125680 [21568/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.163906 [21632/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.126723 [21696/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.146595 [21760/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.170508 [21824/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.120913 [21888/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.120121 [21952/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.141858 [22016/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.186105 [22080/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.178809 [22144/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.139238 [22208/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.143665 [22272/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.139205 [22336/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.127275 [22400/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.119806 [22464/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.161918 [22528/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.161664 [22592/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.155806 [22656/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.207917 [22720/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.139042 [22784/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.164632 [22848/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.094965 [22912/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.120561 [22976/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.110674 [23040/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.141644 [23104/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.178596 [23168/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.195233 [23232/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.171941 [23296/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.168871 [23360/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.171530 [23424/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.161155 [23488/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.166159 [23552/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.146863 [23616/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.167582 [23680/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.146454 [23744/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.144665 [23808/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.178004 [23872/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.143357 [23936/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.139506 [24000/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.156455 [24064/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.117177 [24128/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.176298 [24192/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.172884 [24256/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.199133 [24320/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.126438 [24384/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.176594 [24448/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.106799 [24512/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.151900 [24576/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.147147 [24640/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.108888 [24704/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.105874 [24768/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.118743 [24832/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.161859 [24872/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.161859 [24872/24872]: : 389it [00:16, 23.66it/s]
Epoch 4, time=179.85s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.112099 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.128977 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.159523 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.113006 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.236121 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.150129 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.154497 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.129142 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.131036 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.151273 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.167274 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.151103 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.134983 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.146992 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.140264 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.126964 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.170745 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.130408 [ 1152/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.125425 [ 1216/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.137542 [ 1280/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.158778 [ 1344/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.167529 [ 1408/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.183268 [ 1472/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.102821 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.161315 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.209533 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.122508 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.143378 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.169759 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.100954 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.109417 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.157832 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.124035 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.145800 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.108372 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.161142 [ 2304/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.140393 [ 2368/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.099065 [ 2432/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.121081 [ 2496/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.105945 [ 2560/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.152750 [ 2624/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.117185 [ 2688/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.141732 [ 2752/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.110606 [ 2816/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.164693 [ 2880/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.143678 [ 2944/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.125217 [ 3008/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.133587 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.131701 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.124170 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.169109 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.112293 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.124431 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.141608 [ 3456/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.151892 [ 3520/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.106051 [ 3584/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.152158 [ 3648/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.180479 [ 3712/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.142411 [ 3776/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.175167 [ 3840/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.113615 [ 3904/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.165662 [ 3968/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.169391 [ 4032/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.126649 [ 4096/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.133866 [ 4160/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.153807 [ 4224/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.185225 [ 4288/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.105317 [ 4352/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.148316 [ 4416/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.174843 [ 4480/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.114830 [ 4544/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.154126 [ 4608/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.157464 [ 4672/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.122067 [ 4736/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.180931 [ 4800/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.158594 [ 4864/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.111485 [ 4928/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.147701 [ 4992/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.127033 [ 5056/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.153207 [ 5120/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.242842 [ 5184/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.135645 [ 5248/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.175444 [ 5312/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.152122 [ 5376/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.164153 [ 5440/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.146594 [ 5504/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.138545 [ 5568/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.154268 [ 5632/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.146511 [ 5696/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.152316 [ 5760/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.160523 [ 5824/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.154531 [ 5888/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.139504 [ 5952/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.104932 [ 6016/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.121044 [ 6080/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.137826 [ 6144/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.181790 [ 6208/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.106400 [ 6272/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.171867 [ 6336/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.144551 [ 6400/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.197561 [ 6464/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.144417 [ 6528/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.171255 [ 6592/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.126393 [ 6656/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.107634 [ 6720/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.091854 [ 6784/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.141274 [ 6848/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.158857 [ 6912/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.148564 [ 6976/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.109925 [ 7040/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.096317 [ 7104/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.150043 [ 7168/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.095592 [ 7232/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.154342 [ 7296/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.125231 [ 7360/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.114432 [ 7424/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.133424 [ 7488/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.231613 [ 7552/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.114824 [ 7616/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.214302 [ 7680/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.126044 [ 7744/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.159536 [ 7808/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.116602 [ 7872/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.123693 [ 7936/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.099781 [ 8000/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.099274 [ 8064/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.107231 [ 8128/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.139131 [ 8192/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.126285 [ 8256/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.196466 [ 8320/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.137710 [ 8384/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.156153 [ 8448/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.104082 [ 8512/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.104716 [ 8576/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.193374 [ 8640/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.142842 [ 8704/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.110824 [ 8768/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.107875 [ 8832/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.140657 [ 8896/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.154418 [ 8960/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.169031 [ 9024/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.121102 [ 9088/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.113712 [ 9152/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.143457 [ 9216/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.156581 [ 9280/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.125986 [ 9344/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.150345 [ 9408/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.173942 [ 9472/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.099818 [ 9536/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.148422 [ 9600/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.124318 [ 9664/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.110612 [ 9728/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.120353 [ 9792/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.117701 [ 9856/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.175580 [ 9920/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.132825 [ 9984/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.107819 [10048/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.155694 [10112/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.166528 [10176/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.147625 [10240/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.219336 [10304/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.146034 [10368/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.154301 [10432/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.184737 [10496/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.108577 [10560/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.176694 [10624/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.212975 [10688/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.145923 [10752/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.152206 [10816/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.102650 [10880/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.157126 [10944/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.161775 [11008/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.161159 [11072/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.168591 [11136/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.179705 [11200/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.168099 [11264/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.130930 [11328/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.113136 [11392/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.171600 [11456/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.174083 [11520/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.118414 [11584/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.151544 [11648/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.149602 [11712/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.130368 [11776/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.124676 [11840/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.159068 [11904/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.174589 [11968/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.146792 [12032/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.137735 [12096/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.119056 [12160/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.181018 [12224/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.174932 [12288/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.104281 [12352/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.134598 [12416/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.125559 [12480/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.167175 [12544/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.163215 [12608/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.179854 [12672/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.179322 [12736/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.126904 [12800/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.198524 [12864/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.160764 [12928/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.135596 [12992/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.152877 [13056/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.163846 [13120/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.143642 [13184/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.133454 [13248/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.100192 [13312/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.175209 [13376/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.179400 [13440/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.155157 [13504/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.154561 [13568/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.206355 [13632/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.142033 [13696/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.126693 [13760/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.113940 [13824/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.146349 [13888/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.162715 [13952/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.137574 [14016/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.108341 [14080/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.169479 [14144/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.091690 [14208/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.097335 [14272/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.132073 [14336/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.102472 [14400/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.176299 [14464/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.098871 [14528/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.112278 [14592/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.096675 [14656/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.142478 [14720/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.095429 [14784/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.140602 [14848/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.132929 [14912/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.098072 [14976/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.129309 [15040/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.116213 [15104/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.141323 [15168/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.126520 [15232/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.159679 [15296/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.123986 [15360/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.166042 [15424/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.108299 [15488/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.131244 [15552/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.121557 [15616/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.145037 [15680/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.112066 [15744/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.127260 [15808/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.144614 [15872/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.101241 [15936/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.116677 [16000/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.149729 [16064/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.109881 [16128/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.177565 [16192/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.110859 [16256/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.128052 [16320/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.159100 [16384/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.147679 [16448/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.170513 [16512/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.147950 [16576/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.180049 [16640/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.148188 [16704/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.128121 [16768/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.177893 [16832/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.137246 [16896/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.159044 [16960/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.127687 [17024/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.122637 [17088/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.176479 [17152/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.173991 [17216/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.140646 [17280/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.172350 [17344/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.117919 [17408/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.218729 [17472/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.113961 [17536/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.144978 [17600/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.130867 [17664/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.096718 [17728/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.163581 [17792/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.157457 [17856/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.102610 [17920/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.147181 [17984/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.115012 [18048/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.129042 [18112/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.128309 [18176/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.108573 [18240/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.113824 [18304/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.152912 [18368/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.118468 [18432/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.147363 [18496/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.092562 [18560/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.128287 [18624/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.125822 [18688/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.124344 [18752/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.109575 [18816/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.116436 [18880/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.094980 [18944/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.112625 [19008/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.097685 [19072/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.116600 [19136/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.094742 [19200/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.153409 [19264/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.138041 [19328/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.174703 [19392/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.095442 [19456/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.168263 [19520/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.113852 [19584/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.156028 [19648/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.147562 [19712/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.091588 [19776/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.137949 [19840/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.131395 [19904/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.131612 [19968/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.133610 [20032/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.090399 [20096/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.119987 [20160/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.108078 [20224/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.121230 [20288/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.147420 [20352/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.095908 [20416/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.107917 [20480/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.174144 [20544/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.130808 [20608/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.157405 [20672/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.110702 [20736/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.132857 [20800/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.164523 [20864/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.176328 [20928/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.093987 [20992/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.137734 [21056/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.144144 [21120/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.113320 [21184/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.161037 [21248/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.140941 [21312/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.081417 [21376/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.094490 [21440/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.139799 [21504/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.094226 [21568/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.154110 [21632/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.098780 [21696/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.116690 [21760/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.150228 [21824/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.098691 [21888/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.106219 [21952/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.127807 [22016/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.163302 [22080/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.162989 [22144/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.117685 [22208/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.120681 [22272/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.130448 [22336/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.102601 [22400/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.100944 [22464/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.145102 [22528/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.128819 [22592/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.136212 [22656/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.159549 [22720/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.117918 [22784/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.129408 [22848/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.081262 [22912/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.095195 [22976/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.084172 [23040/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.111864 [23104/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.158214 [23168/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.158965 [23232/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.155022 [23296/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.149405 [23360/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.153448 [23424/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.150478 [23488/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.146456 [23552/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.130772 [23616/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.155088 [23680/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.109409 [23744/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.154228 [23808/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.163112 [23872/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.162200 [23936/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.130795 [24000/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.142287 [24064/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.109636 [24128/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.153231 [24192/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.150080 [24256/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.195478 [24320/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.116763 [24384/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.159409 [24448/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.121127 [24512/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.124822 [24576/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.152258 [24640/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.119899 [24704/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.115407 [24768/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.124312 [24832/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.191032 [24872/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.191032 [24872/24872]: : 389it [00:16, 23.62it/s]
Epoch 5, time=196.32s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.106798 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.137706 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.166715 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.113610 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.211569 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.149909 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.148275 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.119354 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.124071 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.126543 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.158801 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.146109 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.126904 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.144905 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.138445 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.115154 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.159452 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.140735 [ 1152/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.112319 [ 1216/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.108575 [ 1280/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.144928 [ 1344/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.180317 [ 1408/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.145077 [ 1472/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.099639 [ 1536/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.165182 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.167129 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.145723 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.118810 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.150760 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.089760 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.099737 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.138503 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.110397 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.123904 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.102552 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.143136 [ 2304/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.141443 [ 2368/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.091358 [ 2432/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.110392 [ 2496/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.095485 [ 2560/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.132434 [ 2624/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.104214 [ 2688/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.137010 [ 2752/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.105065 [ 2816/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.160837 [ 2880/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.130883 [ 2944/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.117348 [ 3008/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.122136 [ 3072/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.122333 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.131640 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.170559 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.107703 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.136015 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.135361 [ 3456/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.153442 [ 3520/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.114733 [ 3584/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.142890 [ 3648/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.183534 [ 3712/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.144513 [ 3776/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.169675 [ 3840/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.127837 [ 3904/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.141175 [ 3968/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.190530 [ 4032/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.131197 [ 4096/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.134427 [ 4160/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.179907 [ 4224/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.160833 [ 4288/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.119763 [ 4352/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.157364 [ 4416/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.146727 [ 4480/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.107556 [ 4544/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.125678 [ 4608/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.130333 [ 4672/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.131329 [ 4736/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.177210 [ 4800/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.122284 [ 4864/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.133630 [ 4928/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.133111 [ 4992/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.135136 [ 5056/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.170690 [ 5120/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.205092 [ 5184/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.125272 [ 5248/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.193143 [ 5312/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.151063 [ 5376/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.190092 [ 5440/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.158094 [ 5504/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.124334 [ 5568/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.182261 [ 5632/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.139165 [ 5696/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.163844 [ 5760/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.151817 [ 5824/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.138982 [ 5888/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.136609 [ 5952/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.105312 [ 6016/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.121490 [ 6080/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.117748 [ 6144/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.138689 [ 6208/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.096432 [ 6272/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.150178 [ 6336/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.142006 [ 6400/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.163744 [ 6464/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.119938 [ 6528/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.172283 [ 6592/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.112202 [ 6656/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.107823 [ 6720/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.084592 [ 6784/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.117513 [ 6848/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.146230 [ 6912/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.133363 [ 6976/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.092399 [ 7040/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.090051 [ 7104/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.149456 [ 7168/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.097840 [ 7232/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.148954 [ 7296/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.126386 [ 7360/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.109061 [ 7424/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.122518 [ 7488/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.218252 [ 7552/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.110672 [ 7616/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.190246 [ 7680/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.112463 [ 7744/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.151963 [ 7808/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.108481 [ 7872/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.114355 [ 7936/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.097038 [ 8000/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.093878 [ 8064/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.095400 [ 8128/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.131091 [ 8192/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.121960 [ 8256/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.176377 [ 8320/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.116515 [ 8384/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.131886 [ 8448/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.103876 [ 8512/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.095454 [ 8576/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.177126 [ 8640/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.119111 [ 8704/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.094153 [ 8768/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.102576 [ 8832/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.123544 [ 8896/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.141846 [ 8960/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.157633 [ 9024/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.118075 [ 9088/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.117021 [ 9152/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.147857 [ 9216/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.159304 [ 9280/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.118228 [ 9344/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.124896 [ 9408/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.162065 [ 9472/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.125312 [ 9536/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.140245 [ 9600/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.113340 [ 9664/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.115085 [ 9728/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.113595 [ 9792/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.126363 [ 9856/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.170213 [ 9920/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.123281 [ 9984/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.096273 [10048/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.166492 [10112/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.151892 [10176/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.196253 [10240/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.152971 [10304/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.161704 [10368/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.138273 [10432/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.117721 [10496/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.108402 [10560/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.124378 [10624/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.176952 [10688/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.122314 [10752/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.135301 [10816/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.092093 [10880/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.131034 [10944/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.133744 [11008/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.140734 [11072/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.104260 [11136/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.151803 [11200/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.144294 [11264/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.120235 [11328/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.106725 [11392/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.151315 [11456/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.165230 [11520/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.108967 [11584/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.142957 [11648/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.151200 [11712/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.120043 [11776/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.126550 [11840/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.140871 [11904/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.141049 [11968/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.126535 [12032/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.131851 [12096/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.116183 [12160/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.175873 [12224/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.155207 [12288/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.101250 [12352/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.137586 [12416/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.120356 [12480/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.145853 [12544/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.163197 [12608/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.188744 [12672/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.172705 [12736/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.122666 [12800/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.160706 [12864/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.153419 [12928/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.130605 [12992/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.147208 [13056/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.159561 [13120/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.137538 [13184/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.116673 [13248/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.082891 [13312/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.179675 [13376/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.181039 [13440/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.130992 [13504/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.146476 [13568/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.196037 [13632/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.128258 [13696/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.118208 [13760/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.101101 [13824/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.154225 [13888/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.167914 [13952/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.107207 [14016/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.105612 [14080/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.163359 [14144/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.078930 [14208/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.091284 [14272/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.107411 [14336/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.109154 [14400/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.161247 [14464/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.095574 [14528/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.144496 [14592/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.104981 [14656/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.148882 [14720/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.104403 [14784/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.139131 [14848/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.118965 [14912/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.108017 [14976/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.148062 [15040/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.121020 [15104/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.152680 [15168/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.132968 [15232/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.157071 [15296/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.118119 [15360/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.165903 [15424/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.090495 [15488/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.120574 [15552/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.123914 [15616/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.140807 [15680/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.112958 [15744/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.144036 [15808/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.134582 [15872/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.096468 [15936/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.115021 [16000/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.129513 [16064/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.112162 [16128/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.182993 [16192/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.106274 [16256/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.121145 [16320/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.145858 [16384/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.151259 [16448/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.157064 [16512/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.143562 [16576/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.183419 [16640/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.141245 [16704/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.125791 [16768/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.169506 [16832/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.135965 [16896/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.163002 [16960/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.134137 [17024/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.116979 [17088/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.175638 [17152/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.133016 [17216/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.148104 [17280/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.173661 [17344/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.128680 [17408/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.224841 [17472/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.111000 [17536/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.134954 [17600/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.110026 [17664/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.097082 [17728/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.173718 [17792/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.163522 [17856/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.091734 [17920/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.135291 [17984/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.108004 [18048/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.116084 [18112/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.127235 [18176/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.107304 [18240/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.120904 [18304/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.142978 [18368/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.117640 [18432/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.132521 [18496/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.086455 [18560/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.144536 [18624/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.115652 [18688/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.112206 [18752/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.102201 [18816/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.104853 [18880/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.081660 [18944/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.101374 [19008/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.119478 [19072/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.105833 [19136/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.091370 [19200/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.150770 [19264/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.123183 [19328/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.173173 [19392/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.107393 [19456/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.172872 [19520/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.107151 [19584/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.160615 [19648/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.143834 [19712/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.086864 [19776/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.144307 [19840/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.123104 [19904/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.117970 [19968/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.134663 [20032/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.104965 [20096/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.124940 [20160/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.115213 [20224/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.122191 [20288/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.130015 [20352/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.102495 [20416/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.113653 [20480/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.150182 [20544/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.120628 [20608/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.154880 [20672/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.094343 [20736/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.138240 [20800/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.152564 [20864/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.155426 [20928/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.082665 [20992/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.120307 [21056/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.124703 [21120/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.101850 [21184/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.142431 [21248/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.130125 [21312/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.083917 [21376/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.088299 [21440/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.142293 [21504/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.084126 [21568/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.163427 [21632/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.089910 [21696/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.100711 [21760/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.138599 [21824/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.092398 [21888/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.101683 [21952/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.124547 [22016/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.166795 [22080/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.165912 [22144/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.107025 [22208/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.137311 [22272/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.123390 [22336/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.101641 [22400/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.099970 [22464/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.148323 [22528/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.123451 [22592/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.154363 [22656/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.149519 [22720/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.110666 [22784/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.128374 [22848/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.085248 [22912/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.091474 [22976/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.090902 [23040/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.130448 [23104/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.172940 [23168/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.160047 [23232/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.149607 [23296/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.160536 [23360/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.155293 [23424/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.158200 [23488/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.179090 [23552/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.117256 [23616/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.180366 [23680/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.102616 [23744/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.139525 [23808/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.159403 [23872/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.138817 [23936/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.125802 [24000/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.130404 [24064/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.102359 [24128/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.154934 [24192/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.141856 [24256/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.208381 [24320/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.114953 [24384/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.154392 [24448/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.117111 [24512/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.173359 [24576/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.179067 [24640/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.104396 [24704/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.112175 [24768/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.137737 [24832/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.159709 [24872/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.159709 [24872/24872]: : 389it [00:16, 24.10it/s]
-------------------------------
LR=0.0001, batch_size=128
-------------------------------
Epoch 1, time=212.47s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.122156 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.532525 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.230023 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.260358 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.371063 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.264400 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.167511 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.172567 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.258811 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.223883 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.181570 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.135147 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.193150 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.198653 [ 1792/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.155479 [ 1920/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.129085 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.117739 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.148112 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.158476 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.140710 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.130181 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.114181 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.147795 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.127318 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.137527 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.130001 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.130665 [ 3456/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.112193 [ 3584/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.162338 [ 3712/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.133151 [ 3840/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.116251 [ 3968/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.135087 [ 4096/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.132628 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.111989 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.132752 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.103715 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.112738 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.126775 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.095134 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.106934 [ 5120/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.153226 [ 5248/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.131216 [ 5376/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.124267 [ 5504/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.118697 [ 5632/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.125684 [ 5760/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.114984 [ 5888/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.092487 [ 6016/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.101726 [ 6144/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.121520 [ 6272/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.126651 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.148728 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.123171 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.081301 [ 6784/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.113527 [ 6912/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.098887 [ 7040/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.102354 [ 7168/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.099034 [ 7296/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.100529 [ 7424/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.159161 [ 7552/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.127866 [ 7680/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.124128 [ 7808/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.108941 [ 7936/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.086589 [ 8064/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.101496 [ 8192/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.136796 [ 8320/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.111602 [ 8448/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.093196 [ 8576/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.126907 [ 8704/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.085682 [ 8832/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.112823 [ 8960/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.123603 [ 9088/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.111171 [ 9216/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.116388 [ 9344/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.117584 [ 9472/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.102638 [ 9600/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.111112 [ 9728/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.084484 [ 9856/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.115327 [ 9984/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.107592 [10112/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.127313 [10240/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.128376 [10368/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.106718 [10496/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.095497 [10624/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.132650 [10752/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.088796 [10880/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.115453 [11008/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.097108 [11136/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.130016 [11264/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.084015 [11392/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.125042 [11520/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.096446 [11648/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.122071 [11776/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.102391 [11904/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.111101 [12032/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.105743 [12160/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.130560 [12288/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.081451 [12416/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.118350 [12544/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.156366 [12672/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.129613 [12800/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.126635 [12928/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.113681 [13056/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.129017 [13184/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.094417 [13312/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.152595 [13440/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.135923 [13568/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.142917 [13696/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.096708 [13824/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.145853 [13952/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.094536 [14080/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.098455 [14208/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.087935 [14336/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.111246 [14464/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.093643 [14592/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.100872 [14720/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.100851 [14848/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.093974 [14976/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.107065 [15104/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.121461 [15232/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.121991 [15360/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.103023 [15488/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.117235 [15616/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.102476 [15744/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.112138 [15872/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.087372 [16000/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.101009 [16128/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.117300 [16256/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.105270 [16384/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.127102 [16512/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.144113 [16640/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.106421 [16768/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.131738 [16896/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.118189 [17024/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.111562 [17152/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.122858 [17280/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.125352 [17408/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.142434 [17536/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.113171 [17664/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.102457 [17792/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.110022 [17920/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.107978 [18048/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.105614 [18176/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.102217 [18304/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.114702 [18432/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.098478 [18560/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.112646 [18688/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.091055 [18816/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.094796 [18944/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.091717 [19072/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.093144 [19200/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.122519 [19328/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.114869 [19456/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.122085 [19584/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.129574 [19712/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.093639 [19840/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.120236 [19968/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.095610 [20096/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.097554 [20224/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.121313 [20352/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.083390 [20480/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.131705 [20608/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.117952 [20736/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.137019 [20864/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.109513 [20992/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.120105 [21120/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.119260 [21248/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.097090 [21376/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.109732 [21504/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.104594 [21632/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.087010 [21760/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.105615 [21888/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.104891 [22016/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.144621 [22144/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.103614 [22272/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.105911 [22400/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.103315 [22528/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.126820 [22656/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.117499 [22784/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.096184 [22912/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.083021 [23040/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.124367 [23168/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.152178 [23296/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.132283 [23424/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.127272 [23552/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.119083 [23680/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.096152 [23808/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.112712 [23936/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.114094 [24064/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.102791 [24192/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.156620 [24320/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.124944 [24448/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.095747 [24576/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.098979 [24704/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.106164 [24832/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.147257 [24872/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.147257 [24872/24872]: : 195it [00:11, 16.28it/s]
Epoch 2, time=224.44s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.107305 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.114779 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.148364 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.112922 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.098234 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.131202 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.138700 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.101445 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.136815 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.099743 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.125671 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.097025 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.129602 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.095975 [ 1792/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.089459 [ 1920/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.093792 [ 2048/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.102970 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.103681 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.092324 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.091613 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.118171 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.095688 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.125213 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.098872 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.107371 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.111503 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.119554 [ 3456/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.096953 [ 3584/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.137932 [ 3712/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.130063 [ 3840/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.114319 [ 3968/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.121181 [ 4096/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.127258 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.103830 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.130245 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.090845 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.104880 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.120434 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.093167 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.103372 [ 5120/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.132143 [ 5248/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.135441 [ 5376/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.131234 [ 5504/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.104222 [ 5632/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.119869 [ 5760/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.107856 [ 5888/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.090401 [ 6016/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.098265 [ 6144/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.116023 [ 6272/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.121026 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.142599 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.117303 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.074856 [ 6784/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.108502 [ 6912/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.095109 [ 7040/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.101717 [ 7168/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.093891 [ 7296/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.090138 [ 7424/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.143951 [ 7552/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.124596 [ 7680/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.122153 [ 7808/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.105557 [ 7936/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.085692 [ 8064/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.101006 [ 8192/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.129942 [ 8320/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.099649 [ 8448/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.090209 [ 8576/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.125162 [ 8704/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.082322 [ 8832/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.101510 [ 8960/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.110344 [ 9088/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.104484 [ 9216/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.112453 [ 9344/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.107816 [ 9472/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.103367 [ 9600/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.092806 [ 9728/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.082589 [ 9856/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.115746 [ 9984/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.106666 [10112/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.124718 [10240/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.127615 [10368/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.103030 [10496/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.086819 [10624/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.119901 [10752/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.098744 [10880/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.104625 [11008/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.093589 [11136/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.121132 [11264/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.081412 [11392/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.119990 [11520/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.093227 [11648/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.117389 [11776/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.099345 [11904/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.112254 [12032/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.100856 [12160/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.128793 [12288/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.075338 [12416/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.112519 [12544/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.147571 [12672/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.121459 [12800/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.131828 [12928/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.112557 [13056/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.125178 [13184/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.082635 [13312/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.146190 [13440/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.120961 [13568/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.132144 [13696/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.088938 [13824/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.132646 [13952/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.086337 [14080/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.091380 [14208/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.075749 [14336/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.095876 [14464/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.091470 [14592/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.096439 [14720/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.097224 [14848/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.087271 [14976/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.105551 [15104/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.120611 [15232/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.121883 [15360/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.100296 [15488/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.114565 [15616/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.110895 [15744/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.108937 [15872/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.082311 [16000/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.102282 [16128/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.110154 [16256/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.102684 [16384/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.125887 [16512/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.133543 [16640/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.110772 [16768/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.118487 [16896/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.111522 [17024/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.107627 [17152/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.113483 [17280/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.119772 [17408/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.135429 [17536/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.106568 [17664/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.099056 [17792/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.108056 [17920/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.105499 [18048/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.106079 [18176/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.094195 [18304/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.112190 [18432/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.097404 [18560/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.109099 [18688/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.090790 [18816/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.088754 [18944/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.085361 [19072/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.085985 [19200/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.119850 [19328/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.110984 [19456/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.116506 [19584/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.125185 [19712/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.086749 [19840/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.109954 [19968/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.107057 [20096/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.095951 [20224/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.122111 [20352/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.087552 [20480/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.140860 [20608/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.110987 [20736/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.135036 [20864/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.107754 [20992/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.121696 [21120/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.114767 [21248/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.102066 [21376/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.102520 [21504/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.102217 [21632/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.076849 [21760/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.099550 [21888/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.098687 [22016/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.124170 [22144/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.101147 [22272/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.096961 [22400/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.099713 [22528/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.117695 [22656/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.106618 [22784/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.088248 [22912/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.070960 [23040/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.119358 [23168/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.134639 [23296/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.126058 [23424/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.126277 [23552/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.114280 [23680/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.096110 [23808/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.111149 [23936/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.125303 [24064/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.100773 [24192/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.146991 [24320/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.115098 [24448/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.090025 [24576/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.096830 [24704/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.095789 [24832/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.146779 [24872/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.146779 [24872/24872]: : 195it [00:12, 16.24it/s]
Epoch 3, time=236.46s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.095773 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.116752 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.155022 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.100028 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.095155 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.123847 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.113446 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.107440 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.128190 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.099410 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.113363 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.092703 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.129406 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.097643 [ 1792/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.081637 [ 1920/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.105902 [ 2048/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.100972 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.100209 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.091705 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.086306 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.108733 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.092021 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.114824 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.101971 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.100559 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.104831 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.111304 [ 3456/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.093903 [ 3584/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.128623 [ 3712/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.124233 [ 3840/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.103535 [ 3968/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.122377 [ 4096/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.115306 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.095817 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.120287 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.085213 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.100989 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.117907 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.094395 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.105650 [ 5120/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.127287 [ 5248/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.138750 [ 5376/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.123998 [ 5504/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.110407 [ 5632/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.113664 [ 5760/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.099429 [ 5888/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.098197 [ 6016/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.091191 [ 6144/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.105902 [ 6272/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.120664 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.141437 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.126964 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.076101 [ 6784/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.116484 [ 6912/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.094742 [ 7040/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.112411 [ 7168/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.089831 [ 7296/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.100991 [ 7424/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.155167 [ 7552/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.133941 [ 7680/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.119206 [ 7808/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.105543 [ 7936/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.101784 [ 8064/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.096085 [ 8192/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.133418 [ 8320/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.106210 [ 8448/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.093895 [ 8576/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.147552 [ 8704/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.087243 [ 8832/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.156310 [ 8960/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.121189 [ 9088/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.115764 [ 9216/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.124492 [ 9344/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.126963 [ 9472/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.124432 [ 9600/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.114537 [ 9728/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.108429 [ 9856/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.109656 [ 9984/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.120960 [10112/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.121768 [10240/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.129859 [10368/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.103853 [10496/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.094527 [10624/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.132651 [10752/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.083706 [10880/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.106801 [11008/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.090775 [11136/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.123774 [11264/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.082434 [11392/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.124011 [11520/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.092589 [11648/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.115287 [11776/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.100992 [11904/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.110120 [12032/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.102789 [12160/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.122817 [12288/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.078775 [12416/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.114761 [12544/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.154039 [12672/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.117400 [12800/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.125357 [12928/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.104627 [13056/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.117017 [13184/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.079760 [13312/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.135770 [13440/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.120972 [13568/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.126105 [13696/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.086944 [13824/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.134212 [13952/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.095726 [14080/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.096567 [14208/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.077348 [14336/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.089476 [14464/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.089030 [14592/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.094709 [14720/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.096561 [14848/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.088089 [14976/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.102665 [15104/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.117732 [15232/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.116001 [15360/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.092786 [15488/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.120790 [15616/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.097581 [15744/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.098279 [15872/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.081567 [16000/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.095743 [16128/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.110129 [16256/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.094847 [16384/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.127687 [16512/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.131729 [16640/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.108397 [16768/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.116477 [16896/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.106595 [17024/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.111949 [17152/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.117197 [17280/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.119948 [17408/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.123975 [17536/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.100788 [17664/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.096331 [17792/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.100552 [17920/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.101883 [18048/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.102852 [18176/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.089266 [18304/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.108225 [18432/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.092448 [18560/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.105987 [18688/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.085369 [18816/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.084743 [18944/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.087964 [19072/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.081903 [19200/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.121035 [19328/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.105449 [19456/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.109307 [19584/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.117960 [19712/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.084615 [19840/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.102304 [19968/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.082229 [20096/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.088304 [20224/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.111533 [20352/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.080760 [20480/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.115795 [20608/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.109970 [20736/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.137034 [20864/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.104878 [20992/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.131528 [21120/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.111493 [21248/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.100254 [21376/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.099310 [21504/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.098161 [21632/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.073454 [21760/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.097872 [21888/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.100735 [22016/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.126354 [22144/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.102897 [22272/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.087911 [22400/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.099909 [22528/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.109609 [22656/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.106766 [22784/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.083233 [22912/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.069284 [23040/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.113453 [23168/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.135038 [23296/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.122645 [23424/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.117060 [23552/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.108473 [23680/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.085530 [23808/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.108587 [23936/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.107182 [24064/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.098679 [24192/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.136856 [24320/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.127295 [24448/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.093150 [24576/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.111618 [24704/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.142508 [24832/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.132718 [24872/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.132718 [24872/24872]: : 195it [00:11, 16.54it/s]
Epoch 4, time=248.24s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.135374 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.111067 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.174731 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.131826 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.089872 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.151521 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.130673 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.117038 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.143231 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.110184 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.121925 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.091830 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.140403 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.093813 [ 1792/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.099321 [ 1920/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.104722 [ 2048/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.120762 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.112117 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.092448 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.099880 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.115898 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.108887 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.135578 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.106474 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.127278 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.114720 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.112347 [ 3456/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.095900 [ 3584/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.143814 [ 3712/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.123865 [ 3840/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.114486 [ 3968/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.127136 [ 4096/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.123374 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.106330 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.129601 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.085254 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.102772 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.132967 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.098226 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.101491 [ 5120/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.127849 [ 5248/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.136067 [ 5376/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.130126 [ 5504/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.100074 [ 5632/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.109950 [ 5760/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.106406 [ 5888/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.087880 [ 6016/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.096539 [ 6144/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.101768 [ 6272/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.119012 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.141337 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.109615 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.075213 [ 6784/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.103281 [ 6912/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.097409 [ 7040/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.097977 [ 7168/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.088561 [ 7296/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.083947 [ 7424/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.139651 [ 7552/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.116696 [ 7680/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.107785 [ 7808/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.098728 [ 7936/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.081264 [ 8064/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.091918 [ 8192/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.134417 [ 8320/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.089920 [ 8448/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.085423 [ 8576/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.116639 [ 8704/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.075259 [ 8832/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.102642 [ 8960/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.109253 [ 9088/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.094938 [ 9216/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.106973 [ 9344/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.105862 [ 9472/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.090705 [ 9600/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.101729 [ 9728/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.076310 [ 9856/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.112538 [ 9984/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.095887 [10112/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.126436 [10240/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.113729 [10368/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.106162 [10496/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.092710 [10624/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.123580 [10752/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.092798 [10880/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.122788 [11008/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.103671 [11136/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.123786 [11264/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.080669 [11392/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.113593 [11520/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.094745 [11648/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.111181 [11776/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.102095 [11904/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.101364 [12032/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.103757 [12160/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.128469 [12288/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.077625 [12416/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.112598 [12544/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.145020 [12672/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.114227 [12800/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.120188 [12928/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.102672 [13056/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.114247 [13184/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.085894 [13312/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.136619 [13440/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.119337 [13568/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.119039 [13696/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.085528 [13824/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.128632 [13952/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.095237 [14080/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.097954 [14208/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.082459 [14336/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.090644 [14464/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.087860 [14592/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.101392 [14720/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.093513 [14848/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.096861 [14976/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.105424 [15104/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.113805 [15232/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.116030 [15360/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.089805 [15488/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.118213 [15616/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.097049 [15744/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.102782 [15872/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.077135 [16000/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.099780 [16128/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.106601 [16256/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.094622 [16384/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.121412 [16512/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.128271 [16640/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.098476 [16768/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.112262 [16896/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.098852 [17024/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.107311 [17152/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.107885 [17280/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.114229 [17408/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.116632 [17536/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.100863 [17664/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.100975 [17792/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.100443 [17920/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.111365 [18048/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.110719 [18176/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.085903 [18304/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.122988 [18432/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.097310 [18560/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.114838 [18688/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.079089 [18816/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.082534 [18944/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.096263 [19072/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.083646 [19200/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.120293 [19328/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.110200 [19456/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.115442 [19584/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.115414 [19712/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.081498 [19840/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.106288 [19968/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.097937 [20096/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.088837 [20224/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.108773 [20352/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.078210 [20480/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.113033 [20608/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.112928 [20736/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.127399 [20864/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.103494 [20992/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.109910 [21120/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.108070 [21248/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.098932 [21376/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.098786 [21504/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.100337 [21632/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.071494 [21760/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.096207 [21888/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.100615 [22016/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.133218 [22144/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.099954 [22272/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.085585 [22400/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.097673 [22528/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.110981 [22656/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.096133 [22784/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.086632 [22912/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.072652 [23040/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.127028 [23168/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.133073 [23296/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.121845 [23424/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.117922 [23552/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.103372 [23680/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.093321 [23808/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.112725 [23936/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.110604 [24064/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.092192 [24192/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.132547 [24320/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.099465 [24448/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.092470 [24576/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.097940 [24704/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.095011 [24832/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.125390 [24872/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.125390 [24872/24872]: : 195it [00:11, 16.51it/s]
Epoch 5, time=260.06s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.086734 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.113330 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.153846 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.105647 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.085745 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.129264 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.115729 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.104496 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.113628 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.107210 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.106721 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.097848 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.140879 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.097755 [ 1792/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.085540 [ 1920/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.112676 [ 2048/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.099909 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.096779 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.090117 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.084252 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.112068 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.089418 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.117494 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.105551 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.101948 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.108484 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.108632 [ 3456/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.091723 [ 3584/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.121193 [ 3712/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.116417 [ 3840/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.103275 [ 3968/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.117264 [ 4096/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.115402 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.099520 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.117024 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.081660 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.094448 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.111009 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.087784 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.091584 [ 5120/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.114355 [ 5248/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.119125 [ 5376/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.115439 [ 5504/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.096546 [ 5632/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.109863 [ 5760/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.094782 [ 5888/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.085500 [ 6016/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.088183 [ 6144/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.089292 [ 6272/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.116973 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.126372 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.106114 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.069902 [ 6784/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.106315 [ 6912/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.095162 [ 7040/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.104339 [ 7168/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.086201 [ 7296/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.087326 [ 7424/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.137546 [ 7552/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.117452 [ 7680/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.106768 [ 7808/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.098493 [ 7936/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.089917 [ 8064/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.091400 [ 8192/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.125611 [ 8320/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.099362 [ 8448/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.083160 [ 8576/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.122269 [ 8704/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.079102 [ 8832/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.129776 [ 8960/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.117235 [ 9088/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.105427 [ 9216/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.109238 [ 9344/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.109366 [ 9472/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.102864 [ 9600/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.113972 [ 9728/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.088334 [ 9856/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.110209 [ 9984/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.116375 [10112/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.122775 [10240/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.143907 [10368/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.098149 [10496/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.105562 [10624/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.111171 [10752/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.110890 [10880/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.100376 [11008/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.112896 [11136/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.122415 [11264/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.080991 [11392/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.121167 [11520/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.090630 [11648/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.120199 [11776/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.095638 [11904/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.120215 [12032/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.097962 [12160/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.124119 [12288/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.077408 [12416/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.111671 [12544/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.150175 [12672/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.112540 [12800/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.131821 [12928/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.098987 [13056/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.117806 [13184/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.078095 [13312/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.136928 [13440/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.108011 [13568/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.121579 [13696/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.081153 [13824/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.127181 [13952/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.086385 [14080/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.087310 [14208/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.070868 [14336/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.081380 [14464/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.081112 [14592/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.088942 [14720/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.086706 [14848/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.083129 [14976/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.102596 [15104/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.109019 [15232/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.106676 [15360/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.086323 [15488/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.106982 [15616/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.093570 [15744/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.094113 [15872/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.088369 [16000/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.092959 [16128/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.115068 [16256/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.091986 [16384/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.121271 [16512/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.119999 [16640/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.100182 [16768/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.113229 [16896/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.095942 [17024/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.109780 [17152/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.095973 [17280/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.115897 [17408/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.112721 [17536/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.094265 [17664/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.095343 [17792/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.098880 [17920/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.104944 [18048/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.098531 [18176/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.080899 [18304/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.111875 [18432/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.088326 [18560/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.098957 [18688/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.076304 [18816/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.077784 [18944/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.090157 [19072/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.082571 [19200/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.111141 [19328/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.106464 [19456/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.097029 [19584/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.113249 [19712/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.079545 [19840/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.089927 [19968/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.091489 [20096/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.088602 [20224/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.104613 [20352/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.072484 [20480/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.103270 [20608/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.109971 [20736/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.129122 [20864/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.094443 [20992/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.115960 [21120/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.103204 [21248/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.094947 [21376/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.092436 [21504/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.089319 [21632/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.070861 [21760/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.089867 [21888/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.098957 [22016/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.118739 [22144/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.100127 [22272/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.087348 [22400/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.096856 [22528/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.109438 [22656/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.092895 [22784/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.092426 [22912/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.073941 [23040/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.108748 [23168/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.133846 [23296/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.123218 [23424/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.108719 [23552/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.100829 [23680/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.082881 [23808/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.114438 [23936/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.104261 [24064/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.091799 [24192/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.143965 [24320/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.092299 [24448/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.088934 [24576/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.084905 [24704/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.093388 [24832/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.119240 [24872/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.119240 [24872/24872]: : 195it [00:11, 16.58it/s]
Epoch 6, time=271.82s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.089978 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.097510 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.134250 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.099042 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.078824 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.111546 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.112137 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.090104 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.112216 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.081722 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.100992 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.080644 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.118227 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.081838 [ 1792/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.080418 [ 1920/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.091068 [ 2048/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.105800 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.086459 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.087434 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.078494 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.102631 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.087619 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.108094 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.104099 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.094748 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.103652 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.114145 [ 3456/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.095169 [ 3584/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.121442 [ 3712/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.117018 [ 3840/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.096357 [ 3968/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.110104 [ 4096/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.105597 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.095251 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.108159 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.077132 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.091196 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.113955 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.089608 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.095208 [ 5120/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.111980 [ 5248/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.112791 [ 5376/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.117910 [ 5504/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.096388 [ 5632/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.099514 [ 5760/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.093931 [ 5888/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.083099 [ 6016/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.087119 [ 6144/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.088613 [ 6272/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.113439 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.113884 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.100652 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.066689 [ 6784/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.095393 [ 6912/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.087704 [ 7040/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.090307 [ 7168/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.081560 [ 7296/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.092488 [ 7424/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.123035 [ 7552/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.102320 [ 7680/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.106450 [ 7808/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.096580 [ 7936/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.084258 [ 8064/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.089448 [ 8192/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.127187 [ 8320/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.082507 [ 8448/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.081428 [ 8576/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.100322 [ 8704/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.073122 [ 8832/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.100338 [ 8960/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.105269 [ 9088/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.097959 [ 9216/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.096389 [ 9344/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.098683 [ 9472/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.094094 [ 9600/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.088014 [ 9728/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.071243 [ 9856/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.100963 [ 9984/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.100369 [10112/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.110423 [10240/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.105602 [10368/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.090433 [10496/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.076926 [10624/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.110837 [10752/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.078361 [10880/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.095650 [11008/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.087548 [11136/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.114357 [11264/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.074214 [11392/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.114810 [11520/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.084231 [11648/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.104605 [11776/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.092587 [11904/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.100420 [12032/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.092173 [12160/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.115180 [12288/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.067031 [12416/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.096756 [12544/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.132679 [12672/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.107433 [12800/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.110645 [12928/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.089449 [13056/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.113611 [13184/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.072472 [13312/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.129041 [13440/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.104773 [13568/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.115107 [13696/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.079521 [13824/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.128622 [13952/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.082759 [14080/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.087243 [14208/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.074627 [14336/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.093604 [14464/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.088274 [14592/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.089390 [14720/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.092463 [14848/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.088297 [14976/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.099423 [15104/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.116228 [15232/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.104428 [15360/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.100301 [15488/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.099587 [15616/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.095550 [15744/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.089489 [15872/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.074197 [16000/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.093647 [16128/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.103075 [16256/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.090960 [16384/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.119221 [16512/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.121708 [16640/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.097291 [16768/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.107872 [16896/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.097154 [17024/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.094398 [17152/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.096166 [17280/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.110396 [17408/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.114369 [17536/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.095184 [17664/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.092425 [17792/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.084882 [17920/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.103864 [18048/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.100141 [18176/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.082516 [18304/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.116720 [18432/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.090227 [18560/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.107126 [18688/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.070657 [18816/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.080097 [18944/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.096103 [19072/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.078128 [19200/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.111681 [19328/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.102159 [19456/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.100453 [19584/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.119626 [19712/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.078265 [19840/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.095018 [19968/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.089357 [20096/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.086828 [20224/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.121732 [20352/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.073293 [20480/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.116543 [20608/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.109598 [20736/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.126783 [20864/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.093351 [20992/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.109621 [21120/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.115976 [21248/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.103990 [21376/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.100838 [21504/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.105953 [21632/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.070661 [21760/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.092944 [21888/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.093128 [22016/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.110312 [22144/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.085797 [22272/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.083524 [22400/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.086886 [22528/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.100244 [22656/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.088413 [22784/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.083506 [22912/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.067924 [23040/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.122870 [23168/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.124282 [23296/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.113686 [23424/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.110884 [23552/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.096449 [23680/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.084182 [23808/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.100223 [23936/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.108465 [24064/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.092804 [24192/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.123724 [24320/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.100985 [24448/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.084884 [24576/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.084692 [24704/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.085637 [24832/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.112173 [24872/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.112173 [24872/24872]: : 195it [00:11, 16.73it/s]
-------------------------------
LR=1e-05, batch_size=256
-------------------------------
Epoch 1, time=283.48s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.094213 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.116586 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.090711 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.098942 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.091897 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.087287 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.097673 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.083586 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.092909 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.079073 [ 2560/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.096799 [ 2816/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.084060 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.090736 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.086189 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.109929 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.099515 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.103230 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.085613 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.098835 [ 4864/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.078482 [ 5120/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.106432 [ 5376/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.093882 [ 5632/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.097403 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.076952 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.095374 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.091011 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.074658 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.084081 [ 7168/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.076612 [ 7424/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.108627 [ 7680/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.084431 [ 7936/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.076183 [ 8192/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.095250 [ 8448/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.085050 [ 8704/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.072636 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.088514 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.087395 [ 9472/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.074884 [ 9728/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.080439 [ 9984/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.087580 [10240/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.090822 [10496/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.084174 [10752/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.074893 [11008/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.089407 [11264/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.080884 [11520/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.080058 [11776/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.081448 [12032/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.093950 [12288/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.073966 [12544/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.108944 [12800/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.090052 [13056/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.081634 [13312/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.107067 [13568/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.086885 [13824/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.093123 [14080/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.069636 [14336/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.069092 [14592/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.078140 [14848/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.079016 [15104/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.098032 [15360/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.084350 [15616/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.084317 [15872/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.076037 [16128/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.088324 [16384/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.109893 [16640/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.093443 [16896/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.087997 [17152/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.097372 [17408/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.097424 [17664/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.083162 [17920/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.085789 [18176/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.085701 [18432/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.083752 [18688/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.070727 [18944/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.074059 [19200/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.097433 [19456/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.089015 [19712/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.074259 [19968/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.078170 [20224/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.077103 [20480/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.092410 [20736/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.096658 [20992/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.091319 [21248/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.085325 [21504/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.073085 [21760/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.082501 [22016/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.093628 [22272/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.079266 [22528/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.084848 [22784/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.063658 [23040/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.106770 [23296/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.104110 [23552/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.078660 [23808/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.092491 [24064/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.094260 [24320/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.080091 [24576/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.073125 [24832/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.106891 [24872/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.106891 [24872/24872]: : 98it [00:08, 11.51it/s]
Epoch 2, time=291.99s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.082805 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.101750 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.084893 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.090809 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.086020 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.082086 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.089244 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.080428 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.088275 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.076363 [ 2560/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.093737 [ 2816/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.079251 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.088477 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.083839 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.105294 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.097829 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.099408 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.083745 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.092101 [ 4864/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.075847 [ 5120/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.105517 [ 5376/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.091249 [ 5632/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.093576 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.075257 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.092287 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.088650 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.072178 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.079838 [ 7168/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.073444 [ 7424/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.098744 [ 7680/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.082209 [ 7936/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.074963 [ 8192/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.091284 [ 8448/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.084904 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.074025 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.085706 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.086985 [ 9472/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.080180 [ 9728/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.078058 [ 9984/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.086914 [10240/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.091526 [10496/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.086341 [10752/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.072348 [11008/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.090982 [11264/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.080346 [11520/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.078043 [11776/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.080256 [12032/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.096111 [12288/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.072834 [12544/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.108325 [12800/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.088881 [13056/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.080755 [13312/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.111474 [13568/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.085013 [13824/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.092690 [14080/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.068797 [14336/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.066769 [14592/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.077365 [14848/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.077262 [15104/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.096980 [15360/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.081922 [15616/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.079205 [15872/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.075253 [16128/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.085663 [16384/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.105200 [16640/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.092458 [16896/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.083709 [17152/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.093082 [17408/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.092877 [17664/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.080572 [17920/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.084242 [18176/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.087011 [18432/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.082381 [18688/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.068107 [18944/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.071558 [19200/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.095729 [19456/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.087071 [19712/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.073448 [19968/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.078611 [20224/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.076077 [20480/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.091027 [20736/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.095250 [20992/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.090685 [21248/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.083847 [21504/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.071178 [21760/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.081360 [22016/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.091122 [22272/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.078179 [22528/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.083583 [22784/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.063274 [23040/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.105656 [23296/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.104004 [23552/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.077727 [23808/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.091738 [24064/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.092727 [24320/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.078961 [24576/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.071645 [24832/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.103781 [24872/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.103781 [24872/24872]: : 98it [00:08, 11.41it/s]
Epoch 3, time=300.59s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.080721 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.100496 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.083580 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.090124 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.085342 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.080981 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.086950 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.079358 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.085183 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.074113 [ 2560/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.091179 [ 2816/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.076848 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.086388 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.082872 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.102404 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.096539 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.094900 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.082795 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.086604 [ 4864/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.075205 [ 5120/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.104727 [ 5376/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.089960 [ 5632/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.087702 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.074787 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.088691 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.087954 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.071115 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.078559 [ 7168/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.072509 [ 7424/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.099391 [ 7680/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.080933 [ 7936/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.074169 [ 8192/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.089439 [ 8448/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.082660 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.070691 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.084248 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.086013 [ 9472/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.080274 [ 9728/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.077178 [ 9984/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.086887 [10240/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.091201 [10496/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.090802 [10752/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.071140 [11008/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.087468 [11264/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.079865 [11520/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.076824 [11776/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.080708 [12032/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.095564 [12288/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.071748 [12544/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.109829 [12800/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.088006 [13056/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.080252 [13312/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.112197 [13568/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.084423 [13824/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.092777 [14080/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.068101 [14336/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.065700 [14592/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.076886 [14848/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.076262 [15104/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.097174 [15360/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.081189 [15616/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.077630 [15872/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.074660 [16128/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.084855 [16384/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.103580 [16640/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.092566 [16896/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.082691 [17152/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.092215 [17408/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.090832 [17664/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.079401 [17920/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.084631 [18176/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.085921 [18432/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.081711 [18688/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.066162 [18944/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.071086 [19200/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.094446 [19456/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.085940 [19712/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.073759 [19968/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.079514 [20224/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.075561 [20480/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.090293 [20736/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.094390 [20992/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.090522 [21248/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.083047 [21504/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.069979 [21760/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.080729 [22016/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.089907 [22272/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.077353 [22528/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.083033 [22784/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.064046 [23040/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.105281 [23296/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.102955 [23552/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.077064 [23808/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.091703 [24064/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.091981 [24320/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.077447 [24576/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.070873 [24832/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.101979 [24872/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.101979 [24872/24872]: : 98it [00:08, 11.26it/s]
Epoch 4, time=309.30s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.079267 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.098893 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.083140 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.089850 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.086182 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.080758 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.084613 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.077202 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.081525 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.072634 [ 2560/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.086762 [ 2816/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.076436 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.085035 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.082315 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.101225 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.094473 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.092112 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.082011 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.085008 [ 4864/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.074412 [ 5120/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.099987 [ 5376/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.089791 [ 5632/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.085194 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.074496 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.086186 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.088266 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.070604 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.078278 [ 7168/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.072087 [ 7424/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.100721 [ 7680/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.080249 [ 7936/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.073933 [ 8192/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.088682 [ 8448/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.080909 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.068724 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.083760 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.085372 [ 9472/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.080210 [ 9728/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.076461 [ 9984/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.087224 [10240/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.090346 [10496/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.093405 [10752/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.070437 [11008/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.085374 [11264/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.079516 [11520/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.076098 [11776/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.081031 [12032/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.094233 [12288/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.071156 [12544/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.112222 [12800/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.087513 [13056/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.079966 [13312/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.108995 [13568/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.084605 [13824/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.093326 [14080/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.067628 [14336/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.065080 [14592/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.076385 [14848/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.075470 [15104/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.098633 [15360/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.080826 [15616/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.076340 [15872/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.074086 [16128/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.084416 [16384/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.102783 [16640/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.093899 [16896/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.082481 [17152/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.091577 [17408/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.089705 [17664/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.078465 [17920/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.085937 [18176/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.083810 [18432/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.081145 [18688/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.064432 [18944/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.071778 [19200/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.093398 [19456/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.085308 [19712/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.074306 [19968/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.079209 [20224/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.075086 [20480/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.089645 [20736/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.093787 [20992/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.090537 [21248/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.082255 [21504/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.068499 [21760/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.080005 [22016/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.088790 [22272/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.076506 [22528/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.082495 [22784/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.064621 [23040/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.104798 [23296/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.101409 [23552/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.076478 [23808/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.092327 [24064/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.092872 [24320/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.077058 [24576/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.070568 [24832/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.101072 [24872/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.101072 [24872/24872]: : 98it [00:08, 11.24it/s]
Epoch 5, time=318.02s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.078784 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.098791 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.081490 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.088289 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.085420 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.079472 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.083717 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.075143 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.078780 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.070884 [ 2560/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.084611 [ 2816/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.075588 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.083793 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.081463 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.099244 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.092508 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.089386 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.080700 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.083627 [ 4864/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.073758 [ 5120/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.096869 [ 5376/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.088522 [ 5632/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.083411 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.073582 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.084750 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.087760 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.069685 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.077658 [ 7168/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.071114 [ 7424/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.100236 [ 7680/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.079620 [ 7936/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.073347 [ 8192/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.087920 [ 8448/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.079876 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.067395 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.083220 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.084438 [ 9472/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.079512 [ 9728/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.076189 [ 9984/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.086984 [10240/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.089412 [10496/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.093171 [10752/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.069701 [11008/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.084410 [11264/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.078874 [11520/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.075257 [11776/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.080646 [12032/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.093012 [12288/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.070566 [12544/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.111865 [12800/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.086537 [13056/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.079540 [13312/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.107472 [13568/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.083933 [13824/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.092945 [14080/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.067196 [14336/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.064374 [14592/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.075735 [14848/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.074984 [15104/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.098305 [15360/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.080056 [15616/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.075088 [15872/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.073740 [16128/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.083704 [16384/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.102047 [16640/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.093668 [16896/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.081837 [17152/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.091042 [17408/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.089022 [17664/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.077224 [17920/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.086085 [18176/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.082645 [18432/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.080524 [18688/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.063452 [18944/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.071610 [19200/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.092826 [19456/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.084497 [19712/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.073584 [19968/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.078745 [20224/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.074558 [20480/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.089126 [20736/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.093276 [20992/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.090214 [21248/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.081436 [21504/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.067355 [21760/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.079273 [22016/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.087659 [22272/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.075767 [22528/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.082192 [22784/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.064333 [23040/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.104009 [23296/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.100599 [23552/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.075846 [23808/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.092407 [24064/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.091440 [24320/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.075919 [24576/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.070175 [24832/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.100423 [24872/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.100423 [24872/24872]: : 98it [00:08, 11.33it/s]
Epoch 6, time=326.67s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.078181 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.097793 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.080580 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.087432 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.084689 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.078568 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.082771 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.074167 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.077487 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.070069 [ 2560/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.083533 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.075839 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.083339 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.080968 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.099202 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.091588 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.089384 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.079473 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.083145 [ 4864/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.073589 [ 5120/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.094968 [ 5376/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.088257 [ 5632/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.082600 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.073406 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.084197 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.087514 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.069199 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.077134 [ 7168/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.071067 [ 7424/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.099126 [ 7680/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.079061 [ 7936/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.073193 [ 8192/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.087081 [ 8448/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.079429 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.066909 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.082242 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.083784 [ 9472/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.079159 [ 9728/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.075567 [ 9984/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.086876 [10240/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.088948 [10496/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.092778 [10752/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.069180 [11008/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.083891 [11264/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.078392 [11520/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.074676 [11776/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.080393 [12032/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.092405 [12288/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.069990 [12544/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.111550 [12800/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.085647 [13056/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.078889 [13312/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.106112 [13568/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.083363 [13824/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.092432 [14080/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.066444 [14336/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.063787 [14592/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.075055 [14848/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.074396 [15104/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.097767 [15360/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.079408 [15616/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.074024 [15872/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.073348 [16128/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.083116 [16384/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.101377 [16640/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.093296 [16896/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.081314 [17152/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.090512 [17408/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.088511 [17664/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.076117 [17920/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.086020 [18176/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.081778 [18432/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.079907 [18688/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.062807 [18944/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.071391 [19200/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.092179 [19456/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.083607 [19712/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.072706 [19968/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.078236 [20224/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.073965 [20480/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.088561 [20736/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.092750 [20992/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.089845 [21248/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.080725 [21504/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.066760 [21760/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.078606 [22016/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.086789 [22272/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.075057 [22528/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.081731 [22784/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.064044 [23040/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.103373 [23296/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.099864 [23552/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.075277 [23808/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.092325 [24064/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.089697 [24320/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.075146 [24576/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.070050 [24832/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.099753 [24872/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.099753 [24872/24872]: : 98it [00:08, 11.35it/s]
Done!
test the network#
Do some qualitative tests: Let the trained network predict some particle geometries and compare their Mie spectra with the traget spectrum.
# pick a few of the training samples for testing.
# Note: Ideally tests should be done on separate samples!
sca_test = q_sca_target_test
pred = model(sca_test)
# evaluate Mie
r_c_test, r_s_test, eps_c_test, eps_s_test = nn_pred_to_mie_geometry(pred)
res_mie = pmd.coreshell.cross_sections(
k0,
r_c=r_c_test,
eps_c=eps_c_test,
r_s=r_s_test,
eps_s=eps_s_test,
eps_env=eps_env,
n_max=n_max,
)
# plot
i_plot = np.random.randint(len(sca_test), size=4)
plt.figure(figsize=(12, 10))
for i_n, i in enumerate(i_plot):
plt.subplot(2, 2, i_n + 1)
plt.plot(
wl0.detach().cpu().numpy(),
sca_test[i].detach().cpu().numpy(),
label="reference",
)
plt.plot(
wl0.detach().cpu().numpy(),
res_mie["q_sca"][i].detach().cpu().numpy(),
label="predicted particle",
)
plt.legend()
plt.xlabel("wavelength (nm)")
plt.ylabel("scat. efficiency")
plt.show()

Total running time of the script: (5 minutes 41.364 seconds)
Estimated memory usage: 5356 MB