Note
Go to the end to download the full example code.
Mie-informed tandem neural network#
Here, we demonstrate how to train a design generator network capable to suggest core-shell particles with specific spectral response using PyMieDiff as differentiable forward-evaluator. The training pipeline follows the “Tandem” model:
target spectrum –> generator NN –> design –> Mie –> real spectrum
training loss is: MSE(target spec., real spec.)
author: O. Jackson, P. Wiecha, 06/2025
imports#
import time
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
from torch import nn
import pymiediff as pmd
setup optimiation target#
We setup the main configuration here: pymiediff backend, torch device, parameter limits and wavelengths
# pymiediff backend to use and torch compute device
backend = "torch"
device = "cpu"
# general config
N_samples = 25000
n_max = 4 # maximum Mie order fixed for performance
eps_env = torch.tensor(1.0, device=device)
lim_r = torch.as_tensor([40, 100], device=device)
lim_n_re = torch.as_tensor([1.5, 4.0], device=device)
lim_n_im = torch.as_tensor([0.0, 0.1], device=device)
wl0 = torch.linspace(400, 800, 40, device=device)
k0 = 2 * torch.pi / wl0
generate reference spectra#
we generate a large number of reference Mie spectra for existing particles, that will be used as design targets during training.
Note: this step could also be done without any physics knowledge, for example with artificial spectra (e.g. Lorentzians), or a scattering maximization loss.
# datagen: generate existing spectra (won't use the geometries for training)
r_c = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
d_s = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
r_s = r_c + d_s
n_re = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_re)[0] + lim_n_re[0]
n_im = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_im)[0] + lim_n_im[0]
n = n_re + 1j * n_im
# low-level API: permittivity required as spectra (for vectorization)
eps_c = torch.ones_like(k0).unsqueeze(0) * n[:, 0].unsqueeze(1) ** 2
eps_s = torch.ones_like(k0).unsqueeze(0) * n[:, 1].unsqueeze(1) ** 2
all_particles = pmd.farfield.cross_sections(
k0,
r_c=r_c,
eps_c=eps_c,
r_s=r_s,
eps_s=eps_s,
eps_env=eps_env,
backend=backend,
n_max=n_max,
)
N_test = 128 # keep a few samples for testing
q_sca_target = all_particles["q_sca"][N_test:].to(dtype=torch.float32)
q_sca_target_test = all_particles["q_sca"][:N_test].to(dtype=torch.float32)
plt.plot(q_sca_target[30].detach().cpu().numpy()) # plot some test sample

[<matplotlib.lines.Line2D object at 0x7f2052fde850>]
Neural network classes / functions#
define the network model (simple MLP) and training loop
class FullyConnected(nn.Module):
def __init__(self, hidden_dim=1024):
super().__init__()
self.fc_in = nn.Linear(len(k0), hidden_dim)
self.relu1 = nn.ReLU()
self.fc_1 = nn.Linear(hidden_dim, hidden_dim)
self.relu2 = nn.ReLU()
self.fc_2 = nn.Linear(hidden_dim, hidden_dim)
self.relu3 = nn.ReLU()
self.fc_out = nn.Linear(hidden_dim, 6)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.fc_in(x)
x = self.relu1(x)
x = self.fc_1(x)
x = self.relu2(x)
x = self.fc_2(x)
x = self.relu3(x)
x = self.fc_out(x)
x = self.sigmoid(x)
return x
def nn_pred_to_mie_geometry(pred):
# implicit normalization: multiply by user-defined limits
r_c = lim_r.max() * (pred[:, 0])
r_s = lim_r.max() * (pred[:, 0] + pred[:, 1])
n_c = lim_n_re.max() * pred[:, 2] + lim_n_im.max() * (1j * pred[:, 3])
n_s = lim_n_re.max() * pred[:, 4] + lim_n_im.max() * (1j * pred[:, 5])
eps_c = torch.ones_like(k0).unsqueeze(0) * n_c.unsqueeze(1) ** 2
eps_s = torch.ones_like(k0).unsqueeze(0) * n_s.unsqueeze(1) ** 2
return r_c, r_s, eps_c, eps_s
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
# Set the model to training mode - important for batch normalization and dropout layers
# Unnecessary in this situation but added for best practices
model.train()
prog_bar = tqdm(enumerate(dataloader), total=size // dataloader.batch_size)
for i_batch, X in prog_bar:
# model prediction: generate core-shell particles
pred = model(X)
# evaluate Mie
r_c, r_s, eps_c, eps_s = nn_pred_to_mie_geometry(pred)
res_mie = pmd.farfield.cross_sections(
k0,
r_c=r_c,
eps_c=eps_c,
r_s=r_s,
eps_s=eps_s,
eps_env=eps_env,
backend=backend,
n_max=n_max,
)
q_sca_mie = res_mie["q_sca"].to(dtype=torch.float32)
# calc. loss
loss = loss_fn(q_sca_mie, X)
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
# if i_batch % 100 == 0:
loss, current = loss.item(), i_batch * dataloader.batch_size + len(X)
prog_bar.set_description(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
training the Mie-informed network#
here we use some simple, manually optimized training schedule.
model = FullyConnected().to(device)
confs = [
dict(bs=32, lr=1e-4, n_ep=5),
dict(bs=64, lr=1e-4, n_ep=5),
dict(bs=128, lr=1e-4, n_ep=6),
dict(bs=256, lr=1e-5, n_ep=6),
]
t_start = time.time()
for conf in confs:
learning_rate = conf["lr"]
batch_size = conf["bs"]
epochs = conf["n_ep"]
print("-------------------------------")
print(f"LR={learning_rate}, batch_size={batch_size}")
print("-------------------------------")
loss_fn = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
train_dataloader = torch.utils.data.DataLoader(q_sca_target, batch_size=batch_size)
for t in range(epochs):
print(f"Epoch {t+1}, time={time.time()-t_start:.2f}s")
train_loop(train_dataloader, model, loss_fn, optimizer)
print("Done!")
-------------------------------
LR=0.0001, batch_size=32
-------------------------------
Epoch 1, time=0.00s
0%| | 0/777 [00:00<?, ?it/s]
loss: 6.329720 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 3.893605 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.541288 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.287702 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.912999 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 3.401494 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 3.521006 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 3.176595 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.333042 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.404341 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.987450 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.796337 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.087246 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.320459 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.110011 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.583368 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.274186 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.436142 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.275631 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.271340 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.306494 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.583768 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.279917 [ 736/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.194271 [ 768/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.499229 [ 800/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.285443 [ 832/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.175751 [ 864/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.329294 [ 896/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.234848 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.085934 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.258007 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.242399 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.236269 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.590491 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.943659 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.907389 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.193053 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.759891 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.179532 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.114766 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.923390 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.965239 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.950130 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.768834 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.819987 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.849760 [ 1472/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.004565 [ 1504/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.957278 [ 1536/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.868088 [ 1568/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.104992 [ 1600/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.021800 [ 1632/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.080884 [ 1664/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.796763 [ 1696/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.838064 [ 1728/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.895536 [ 1760/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.787959 [ 1792/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.837106 [ 1824/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.635757 [ 1856/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.013808 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.760683 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.968864 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.663567 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.915392 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.703101 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.944710 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.702104 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.008613 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.799965 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.963099 [ 2208/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 1.094274 [ 2240/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.837107 [ 2272/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.742020 [ 2304/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.873137 [ 2336/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.792163 [ 2368/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.934145 [ 2400/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 1.052865 [ 2432/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.885965 [ 2464/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.593445 [ 2496/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.713300 [ 2528/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.735547 [ 2560/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.793051 [ 2592/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.714031 [ 2624/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.805911 [ 2656/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.626959 [ 2688/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.709088 [ 2720/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.945150 [ 2752/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.794535 [ 2784/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.690588 [ 2816/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.728939 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.794471 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.616390 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.769166 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 1.009405 [ 2976/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.672475 [ 3008/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.712349 [ 3040/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.818138 [ 3072/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.834494 [ 3104/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.936695 [ 3136/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.947686 [ 3168/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.720525 [ 3200/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 1.275525 [ 3232/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.816448 [ 3264/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.640830 [ 3296/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.699512 [ 3328/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.724363 [ 3360/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.807510 [ 3392/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.525330 [ 3424/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.668358 [ 3456/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.835441 [ 3488/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 1.020612 [ 3520/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.741274 [ 3552/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.954894 [ 3584/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.719355 [ 3616/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.754294 [ 3648/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.712077 [ 3680/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.696096 [ 3712/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.764472 [ 3744/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.820821 [ 3776/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.784483 [ 3808/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.684618 [ 3840/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.542005 [ 3872/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.755062 [ 3904/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.638852 [ 3936/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.861135 [ 3968/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.738408 [ 4000/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.513977 [ 4032/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.579039 [ 4064/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.613166 [ 4096/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.450309 [ 4128/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.630916 [ 4160/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.688500 [ 4192/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.548975 [ 4224/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.571193 [ 4256/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.658275 [ 4288/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.666288 [ 4320/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.777157 [ 4352/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.600445 [ 4384/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.790007 [ 4416/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.661294 [ 4448/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.788263 [ 4480/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.487343 [ 4512/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.725966 [ 4544/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.510801 [ 4576/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.818338 [ 4608/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.464693 [ 4640/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.542450 [ 4672/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.615558 [ 4704/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.511763 [ 4736/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.630955 [ 4768/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.536544 [ 4800/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.706015 [ 4832/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.547444 [ 4864/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.594467 [ 4896/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.561115 [ 4928/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.624614 [ 4960/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.528944 [ 4992/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.616714 [ 5024/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.595726 [ 5056/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.498392 [ 5088/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.746957 [ 5120/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.494175 [ 5152/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.563299 [ 5184/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.452949 [ 5216/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.602027 [ 5248/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.498712 [ 5280/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.901854 [ 5312/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.510574 [ 5344/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.456499 [ 5376/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.721079 [ 5408/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.396174 [ 5440/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.570616 [ 5472/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.565692 [ 5504/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.535340 [ 5536/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.552577 [ 5568/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.628922 [ 5600/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.587739 [ 5632/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.589990 [ 5664/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.618312 [ 5696/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.625898 [ 5728/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.517803 [ 5760/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.554988 [ 5792/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.596735 [ 5824/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.665656 [ 5856/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.431821 [ 5888/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.441411 [ 5920/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.528411 [ 5952/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.579911 [ 5984/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.569648 [ 6016/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.685786 [ 6048/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.540844 [ 6080/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.629485 [ 6112/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.605282 [ 6144/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.464564 [ 6176/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.499036 [ 6208/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.536212 [ 6240/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.575692 [ 6272/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.728562 [ 6304/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.602117 [ 6336/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.495480 [ 6368/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.592051 [ 6400/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.643655 [ 6432/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.448587 [ 6464/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.568217 [ 6496/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.508891 [ 6528/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.675744 [ 6560/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.590426 [ 6592/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.528237 [ 6624/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.549971 [ 6656/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.634583 [ 6688/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.431949 [ 6720/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.514315 [ 6752/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.544474 [ 6784/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.494088 [ 6816/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.575113 [ 6848/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.501115 [ 6880/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.413808 [ 6912/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.610156 [ 6944/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.506997 [ 6976/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.608127 [ 7008/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.765208 [ 7040/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.610621 [ 7072/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.575672 [ 7104/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.366271 [ 7136/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.632053 [ 7168/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.567512 [ 7200/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.493878 [ 7232/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.455430 [ 7264/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.441612 [ 7296/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.478241 [ 7328/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.344425 [ 7360/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.503458 [ 7392/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.606787 [ 7424/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.529565 [ 7456/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.616328 [ 7488/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.436726 [ 7520/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.444392 [ 7552/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.585151 [ 7584/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.503471 [ 7616/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.531072 [ 7648/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.442733 [ 7680/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.559384 [ 7712/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.572397 [ 7744/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.564770 [ 7776/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.512597 [ 7808/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.631590 [ 7840/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.463754 [ 7872/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.568251 [ 7904/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.441372 [ 7936/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.631293 [ 7968/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.655692 [ 8000/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.423480 [ 8032/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.486350 [ 8064/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.386745 [ 8096/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.339537 [ 8128/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.446084 [ 8160/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.494765 [ 8192/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.443422 [ 8224/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.689093 [ 8256/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.655775 [ 8288/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.463297 [ 8320/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.491424 [ 8352/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.437737 [ 8384/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.404442 [ 8416/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.363106 [ 8448/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.429480 [ 8480/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.545790 [ 8512/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.522688 [ 8544/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.441147 [ 8576/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.399463 [ 8608/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.351862 [ 8640/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.559543 [ 8672/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.441236 [ 8704/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.514182 [ 8736/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.521758 [ 8768/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.466865 [ 8800/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.591162 [ 8832/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.386237 [ 8864/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.447706 [ 8896/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.587160 [ 8928/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.504463 [ 8960/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.415875 [ 8992/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.613529 [ 9024/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.461859 [ 9056/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.586805 [ 9088/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.412972 [ 9120/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.347374 [ 9152/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.530750 [ 9184/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.461045 [ 9216/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.386964 [ 9248/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.630727 [ 9280/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.501271 [ 9312/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.624975 [ 9344/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.385834 [ 9376/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.423949 [ 9408/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.454632 [ 9440/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.510700 [ 9472/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.278991 [ 9504/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.499247 [ 9536/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.365536 [ 9568/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.518824 [ 9600/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.419262 [ 9632/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.411808 [ 9664/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.484756 [ 9696/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.590918 [ 9728/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.618465 [ 9760/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.346579 [ 9792/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.482966 [ 9824/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.535122 [ 9856/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.317654 [ 9888/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.415425 [ 9920/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.364217 [ 9952/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.445557 [ 9984/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.753826 [10016/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.417078 [10048/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.528188 [10080/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.387783 [10112/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.457370 [10144/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.567906 [10176/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.387482 [10208/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.433058 [10240/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.460627 [10272/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.487534 [10304/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.463480 [10336/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.374397 [10368/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.486705 [10400/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.476468 [10432/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.362256 [10464/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.610280 [10496/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.616680 [10528/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.432170 [10560/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.514641 [10592/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.612449 [10624/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.351663 [10656/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.443617 [10688/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.458123 [10720/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.395775 [10752/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.579036 [10784/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.455268 [10816/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.455789 [10848/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.610950 [10880/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.361585 [10912/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.405387 [10944/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.436155 [10976/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.547301 [11008/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.427332 [11040/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.540416 [11072/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.443819 [11104/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.319156 [11136/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.417077 [11168/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.382108 [11200/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.349716 [11232/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.428245 [11264/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.408576 [11296/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.465712 [11328/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.441941 [11360/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.479455 [11392/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.502815 [11424/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.411159 [11456/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.395825 [11488/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.358852 [11520/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.456885 [11552/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.443839 [11584/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.463998 [11616/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.645580 [11648/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.348103 [11680/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.405294 [11712/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.392282 [11744/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.516236 [11776/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.479260 [11808/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.260089 [11840/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.420909 [11872/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.669966 [11904/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.417503 [11936/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.364877 [11968/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.379258 [12000/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.421287 [12032/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.362983 [12064/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.480573 [12096/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.513045 [12128/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.503120 [12160/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.519856 [12192/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.537149 [12224/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.308155 [12256/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.567256 [12288/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.505042 [12320/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.398751 [12352/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.587813 [12384/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.308555 [12416/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.636667 [12448/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.383471 [12480/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.479893 [12512/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.471464 [12544/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.407253 [12576/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.465827 [12608/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.311624 [12640/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.486688 [12672/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.473448 [12704/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.419422 [12736/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.466677 [12768/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.509296 [12800/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.472341 [12832/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.322334 [12864/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.352006 [12896/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.330168 [12928/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.455473 [12960/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.449922 [12992/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.456925 [13024/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.346309 [13056/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.412112 [13088/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.457046 [13120/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.464270 [13152/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.653894 [13184/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.343713 [13216/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.400381 [13248/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.496303 [13280/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.430535 [13312/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.360687 [13344/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.384525 [13376/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.481699 [13408/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.365675 [13440/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.369410 [13472/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.456542 [13504/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.409076 [13536/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.364806 [13568/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.527385 [13600/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.470875 [13632/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.415899 [13664/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.408331 [13696/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.377803 [13728/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.585184 [13760/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.386991 [13792/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.353043 [13824/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.410608 [13856/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.360129 [13888/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.252392 [13920/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.356516 [13952/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.349988 [13984/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.363187 [14016/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.427543 [14048/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.512881 [14080/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.463041 [14112/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.473239 [14144/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.385928 [14176/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.318924 [14208/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.455328 [14240/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.465207 [14272/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.375355 [14304/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.531881 [14336/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.398017 [14368/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.476002 [14400/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.306869 [14432/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.368717 [14464/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.271533 [14496/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.426079 [14528/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.389640 [14560/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.410404 [14592/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.601122 [14624/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.457782 [14656/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.481887 [14688/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.421771 [14720/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.484228 [14752/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.390803 [14784/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.393531 [14816/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.562275 [14848/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.370039 [14880/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.472965 [14912/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.518762 [14944/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.427457 [14976/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.403530 [15008/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.472418 [15040/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.366718 [15072/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.504167 [15104/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.363588 [15136/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.347451 [15168/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.378815 [15200/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.404185 [15232/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.339570 [15264/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.396061 [15296/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.401181 [15328/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.342418 [15360/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.474247 [15392/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.495032 [15424/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.512848 [15456/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.492040 [15488/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.367435 [15520/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.339502 [15552/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.506927 [15584/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.402065 [15616/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.259568 [15648/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.358387 [15680/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.415173 [15712/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.400764 [15744/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.424271 [15776/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.387451 [15808/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.433150 [15840/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.357338 [15872/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.313768 [15904/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.381364 [15936/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.467482 [15968/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.336002 [16000/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.333676 [16032/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.276012 [16064/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.461586 [16096/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.450690 [16128/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.342616 [16160/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.322037 [16192/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.298591 [16224/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.402644 [16256/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.449331 [16288/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.329494 [16320/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.339286 [16352/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.312135 [16384/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.414330 [16416/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.421167 [16448/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.452694 [16480/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.381357 [16512/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.414124 [16544/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.388635 [16576/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.433554 [16608/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.402536 [16640/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.341655 [16672/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.251612 [16704/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.381467 [16736/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.346403 [16768/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.541098 [16800/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.341047 [16832/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.336242 [16864/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.469228 [16896/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.299508 [16928/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.391402 [16960/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.519481 [16992/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.462513 [17024/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.338760 [17056/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.479337 [17088/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.340652 [17120/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.411665 [17152/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.344544 [17184/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.447766 [17216/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.432575 [17248/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.406267 [17280/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.337121 [17312/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.404347 [17344/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.702723 [17376/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.332910 [17408/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.445242 [17440/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.401157 [17472/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.284364 [17504/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.277158 [17536/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.468967 [17568/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.373510 [17600/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.324514 [17632/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.453248 [17664/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.401592 [17696/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.405795 [17728/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.332801 [17760/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.556869 [17792/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.420163 [17824/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.348372 [17856/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.435545 [17888/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.241980 [17920/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.394157 [17952/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.376585 [17984/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.339272 [18016/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.332349 [18048/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.500175 [18080/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.502744 [18112/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.493726 [18144/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.457253 [18176/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.237166 [18208/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.306373 [18240/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.385917 [18272/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.358460 [18304/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.393964 [18336/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.395847 [18368/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.283819 [18400/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.277966 [18432/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.378287 [18464/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.428370 [18496/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.383507 [18528/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.517246 [18560/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.290662 [18592/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.309014 [18624/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.488887 [18656/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.363337 [18688/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.444522 [18720/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.410293 [18752/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.530221 [18784/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.312412 [18816/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.365819 [18848/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.304132 [18880/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.357592 [18912/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.419300 [18944/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.436834 [18976/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.325914 [19008/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.412800 [19040/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.312843 [19072/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.240095 [19104/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.321073 [19136/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.334156 [19168/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.329189 [19200/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.383284 [19232/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.328358 [19264/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.374887 [19296/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.253884 [19328/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.339660 [19360/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.432111 [19392/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.447442 [19424/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.390074 [19456/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.375595 [19488/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.415793 [19520/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.309229 [19552/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.412419 [19584/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.370680 [19616/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.410280 [19648/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.312040 [19680/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.341263 [19712/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.282403 [19744/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.342683 [19776/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.310862 [19808/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.396040 [19840/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.345692 [19872/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.294340 [19904/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.335053 [19936/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.520679 [19968/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.383999 [20000/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.334889 [20032/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.387295 [20064/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.277892 [20096/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.303444 [20128/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.393313 [20160/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.400956 [20192/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.317335 [20224/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.421159 [20256/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.382120 [20288/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.358520 [20320/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.312756 [20352/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.269654 [20384/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.265919 [20416/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.408255 [20448/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.261578 [20480/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.301855 [20512/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.416620 [20544/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.472022 [20576/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.376215 [20608/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.540278 [20640/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.330407 [20672/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.416372 [20704/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.380810 [20736/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.369488 [20768/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.358772 [20800/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.389535 [20832/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.404725 [20864/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.233137 [20896/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.308009 [20928/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.477145 [20960/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.407479 [20992/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.379924 [21024/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.637139 [21056/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.365646 [21088/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.548720 [21120/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.349041 [21152/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.434521 [21184/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.421085 [21216/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.507729 [21248/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.376329 [21280/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.463043 [21312/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.383256 [21344/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.415021 [21376/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.467562 [21408/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.438886 [21440/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.367024 [21472/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.423843 [21504/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.405867 [21536/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.331084 [21568/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.401996 [21600/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.340189 [21632/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.314676 [21664/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.332276 [21696/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.367900 [21728/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.243867 [21760/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.303965 [21792/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.451398 [21824/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.379777 [21856/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.312852 [21888/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.432240 [21920/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.251165 [21952/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.391785 [21984/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.359098 [22016/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.414725 [22048/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.401304 [22080/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.407675 [22112/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.511489 [22144/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.246595 [22176/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.314853 [22208/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.306523 [22240/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.474073 [22272/24872]: 0%| | 0/777 [00:30<?, ?it/s]
loss: 0.474073 [22272/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.318255 [22304/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.385439 [22336/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.304020 [22368/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.300978 [22400/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.367343 [22432/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.524941 [22464/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.399037 [22496/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.329817 [22528/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.291814 [22560/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.294189 [22592/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.207017 [22624/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.284693 [22656/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.336812 [22688/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.347748 [22720/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.369530 [22752/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.270879 [22784/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.312260 [22816/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.375742 [22848/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.274678 [22880/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.302937 [22912/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.423826 [22944/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.434443 [22976/24872]: 90%|████████▉ | 696/777 [00:30<00:03, 23.17it/s]
loss: 0.357803 [23008/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.372709 [23040/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.450180 [23072/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.323311 [23104/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.455277 [23136/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.404404 [23168/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.327071 [23200/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.396691 [23232/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.211265 [23264/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.295775 [23296/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.313645 [23328/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.264674 [23360/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.341770 [23392/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.391698 [23424/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.317377 [23456/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.352614 [23488/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.318121 [23520/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.246367 [23552/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.320248 [23584/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.355455 [23616/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.499092 [23648/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.293613 [23680/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.312403 [23712/24872]: 90%|████████▉ | 696/777 [00:31<00:03, 23.17it/s]
loss: 0.411463 [23744/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.325645 [23776/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.416594 [23808/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.277857 [23840/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.333782 [23872/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.331621 [23904/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.424663 [23936/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.345341 [23968/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.338926 [24000/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.262642 [24032/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.348744 [24064/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.293378 [24096/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.339389 [24128/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.303116 [24160/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.211559 [24192/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.156832 [24224/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.462913 [24256/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.207359 [24288/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.268026 [24320/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.343841 [24352/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.285829 [24384/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.383778 [24416/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.326222 [24448/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.301138 [24480/24872]: 90%|████████▉ | 696/777 [00:32<00:03, 23.17it/s]
loss: 0.229546 [24512/24872]: 90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.294926 [24544/24872]: 90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.275142 [24576/24872]: 90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.294738 [24608/24872]: 90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.353833 [24640/24872]: 90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.305140 [24672/24872]: 90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.352088 [24704/24872]: 90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.254173 [24736/24872]: 90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.389734 [24768/24872]: 90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.332022 [24800/24872]: 90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.321424 [24832/24872]: 90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.315439 [24864/24872]: 90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.178564 [24872/24872]: 90%|████████▉ | 696/777 [00:33<00:03, 23.17it/s]
loss: 0.178564 [24872/24872]: : 778it [00:33, 23.21it/s]
Epoch 2, time=33.54s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.404187 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.222783 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.416474 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.366615 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.366640 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.281307 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.447916 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.468226 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.294580 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.309417 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.337174 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.352606 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.393122 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.294407 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.449272 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.448022 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.439467 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.459876 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.409790 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.359737 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.468563 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.300970 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.426303 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.417236 [ 768/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.358353 [ 800/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.372056 [ 832/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.537962 [ 864/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.519733 [ 896/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.370769 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.393825 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.297405 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.243266 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.324053 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.435341 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.346470 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.311440 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.501615 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.355588 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.294676 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.286669 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.389193 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.276141 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.287590 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.314632 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.396786 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.332931 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.360740 [ 1504/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.280905 [ 1536/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.328996 [ 1568/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.365030 [ 1600/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.338332 [ 1632/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.336948 [ 1664/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.335865 [ 1696/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.275579 [ 1728/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.376058 [ 1760/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.177390 [ 1792/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.304054 [ 1824/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.149808 [ 1856/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.337018 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.305091 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.376227 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.373562 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.267463 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.288083 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.306503 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.267116 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.343400 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.311010 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.278639 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.452647 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.315521 [ 2272/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.316880 [ 2304/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.428423 [ 2336/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.368057 [ 2368/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.340791 [ 2400/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.494836 [ 2432/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.349736 [ 2464/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.330815 [ 2496/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.291714 [ 2528/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.289061 [ 2560/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.385267 [ 2592/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.237267 [ 2624/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.356911 [ 2656/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.304261 [ 2688/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.346562 [ 2720/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.353015 [ 2752/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.311497 [ 2784/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.430441 [ 2816/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.384459 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.297296 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.241826 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.390781 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.419642 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.292457 [ 3008/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.340277 [ 3040/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.368538 [ 3072/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.288387 [ 3104/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.349304 [ 3136/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.411761 [ 3168/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.285357 [ 3200/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.411088 [ 3232/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.282391 [ 3264/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.239853 [ 3296/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.242545 [ 3328/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.304132 [ 3360/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.345762 [ 3392/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.221415 [ 3424/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.243758 [ 3456/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.282584 [ 3488/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.434496 [ 3520/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.305887 [ 3552/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.447281 [ 3584/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.343345 [ 3616/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.389238 [ 3648/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.344813 [ 3680/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.393981 [ 3712/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.324465 [ 3744/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.295813 [ 3776/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.271267 [ 3808/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.349221 [ 3840/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.226667 [ 3872/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.228687 [ 3904/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.283285 [ 3936/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.365009 [ 3968/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.379714 [ 4000/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.234699 [ 4032/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.292937 [ 4064/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.308575 [ 4096/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.255450 [ 4128/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.345025 [ 4160/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.249680 [ 4192/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.296573 [ 4224/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.242584 [ 4256/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.267263 [ 4288/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.293558 [ 4320/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.246383 [ 4352/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.266444 [ 4384/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.370522 [ 4416/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.390460 [ 4448/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.420678 [ 4480/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.195339 [ 4512/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.326830 [ 4544/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.223725 [ 4576/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.321615 [ 4608/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.271304 [ 4640/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.232657 [ 4672/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.264665 [ 4704/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.255390 [ 4736/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.332312 [ 4768/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.312686 [ 4800/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.299886 [ 4832/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.307673 [ 4864/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.346623 [ 4896/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.366023 [ 4928/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.283363 [ 4960/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.289699 [ 4992/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.370162 [ 5024/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.348422 [ 5056/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.327698 [ 5088/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.363501 [ 5120/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.291138 [ 5152/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.353512 [ 5184/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.227888 [ 5216/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.247526 [ 5248/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.285905 [ 5280/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.495619 [ 5312/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.310965 [ 5344/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.267652 [ 5376/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.518278 [ 5408/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.188295 [ 5440/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.361686 [ 5472/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.338128 [ 5504/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.334654 [ 5536/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.362402 [ 5568/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.330193 [ 5600/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.440170 [ 5632/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.395013 [ 5664/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.432696 [ 5696/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.402553 [ 5728/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.293071 [ 5760/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.339086 [ 5792/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.336945 [ 5824/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.343448 [ 5856/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.243287 [ 5888/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.256809 [ 5920/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.332577 [ 5952/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.311804 [ 5984/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.298718 [ 6016/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.385464 [ 6048/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.285886 [ 6080/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.399443 [ 6112/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.372270 [ 6144/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.236679 [ 6176/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.290068 [ 6208/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.306590 [ 6240/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.352756 [ 6272/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.253629 [ 6304/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.374739 [ 6336/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.280361 [ 6368/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.469634 [ 6400/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.388553 [ 6432/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.239059 [ 6464/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.399801 [ 6496/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.317513 [ 6528/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.316906 [ 6560/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.306220 [ 6592/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.369219 [ 6624/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.280867 [ 6656/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.427372 [ 6688/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.216892 [ 6720/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.284760 [ 6752/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.336783 [ 6784/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.266012 [ 6816/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.329342 [ 6848/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.250642 [ 6880/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.298598 [ 6912/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.349032 [ 6944/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.306966 [ 6976/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.280143 [ 7008/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.458781 [ 7040/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.368771 [ 7072/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.387121 [ 7104/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.203997 [ 7136/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.353558 [ 7168/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.315715 [ 7200/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.311955 [ 7232/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.337573 [ 7264/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.262616 [ 7296/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.283238 [ 7328/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.209279 [ 7360/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.293008 [ 7392/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.298489 [ 7424/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.306633 [ 7456/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.319069 [ 7488/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.234782 [ 7520/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.256492 [ 7552/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.295178 [ 7584/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.323459 [ 7616/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.320010 [ 7648/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.241534 [ 7680/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.360000 [ 7712/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.426168 [ 7744/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.359310 [ 7776/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.248258 [ 7808/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.252643 [ 7840/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.270086 [ 7872/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.318997 [ 7904/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.285932 [ 7936/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.340727 [ 7968/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.359038 [ 8000/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.327194 [ 8032/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.321555 [ 8064/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.210491 [ 8096/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.224990 [ 8128/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.260065 [ 8160/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.324513 [ 8192/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.301487 [ 8224/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.497893 [ 8256/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.449199 [ 8288/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.348342 [ 8320/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.296160 [ 8352/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.328815 [ 8384/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.272400 [ 8416/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.296241 [ 8448/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.268350 [ 8480/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.349449 [ 8512/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.334523 [ 8544/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.285676 [ 8576/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.321084 [ 8608/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.252711 [ 8640/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.394883 [ 8672/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.265245 [ 8704/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.402638 [ 8736/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.392680 [ 8768/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.375532 [ 8800/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.389293 [ 8832/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.253674 [ 8864/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.319077 [ 8896/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.296862 [ 8928/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.398439 [ 8960/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.232143 [ 8992/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.437296 [ 9024/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.310883 [ 9056/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.402364 [ 9088/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.303923 [ 9120/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.317515 [ 9152/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.397684 [ 9184/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.318533 [ 9216/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.319588 [ 9248/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.411402 [ 9280/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.406419 [ 9312/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.410094 [ 9344/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.252225 [ 9376/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.242737 [ 9408/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.307084 [ 9440/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.388508 [ 9472/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.200347 [ 9504/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.350593 [ 9536/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.300989 [ 9568/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.383450 [ 9600/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.351921 [ 9632/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.336720 [ 9664/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.402645 [ 9696/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.412636 [ 9728/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.383352 [ 9760/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.280165 [ 9792/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.356883 [ 9824/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.309602 [ 9856/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.223391 [ 9888/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.227026 [ 9920/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.246219 [ 9952/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.261220 [ 9984/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.470905 [10016/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.330083 [10048/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.448709 [10080/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.220374 [10112/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.321000 [10144/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.407456 [10176/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.267270 [10208/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.343823 [10240/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.442397 [10272/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.434061 [10304/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.308188 [10336/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.252437 [10368/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.349576 [10400/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.272344 [10432/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.273428 [10464/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.310989 [10496/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.526844 [10528/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.277507 [10560/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.312454 [10592/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.358572 [10624/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.275493 [10656/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.311259 [10688/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.316188 [10720/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.240363 [10752/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.394201 [10784/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.292890 [10816/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.265709 [10848/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.340715 [10880/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.270533 [10912/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.281121 [10944/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.246585 [10976/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.399455 [11008/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.271340 [11040/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.305677 [11072/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.256788 [11104/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.202451 [11136/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.226117 [11168/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.211703 [11200/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.210075 [11232/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.323807 [11264/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.229664 [11296/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.295731 [11328/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.243962 [11360/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.304177 [11392/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.397897 [11424/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.295433 [11456/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.279442 [11488/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.204203 [11520/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.274268 [11552/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.320692 [11584/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.314593 [11616/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.419662 [11648/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.313130 [11680/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.219781 [11712/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.279706 [11744/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.359939 [11776/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.381696 [11808/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.172126 [11840/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.292853 [11872/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.516563 [11904/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.259997 [11936/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.292091 [11968/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.264924 [12000/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.254121 [12032/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.273956 [12064/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.338114 [12096/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.398801 [12128/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.331722 [12160/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.345047 [12192/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.464785 [12224/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.189037 [12256/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.348822 [12288/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.283465 [12320/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.289327 [12352/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.341995 [12384/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.206763 [12416/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.487000 [12448/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.296177 [12480/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.325304 [12512/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.268940 [12544/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.256029 [12576/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.301209 [12608/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.208601 [12640/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.363241 [12672/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.339706 [12704/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.243451 [12736/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.344279 [12768/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.363852 [12800/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.323082 [12832/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.252399 [12864/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.252165 [12896/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.219570 [12928/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.286515 [12960/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.326460 [12992/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.283498 [13024/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.306454 [13056/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.254491 [13088/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.388216 [13120/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.344106 [13152/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.412341 [13184/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.290031 [13216/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.290956 [13248/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.320097 [13280/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.307864 [13312/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.366827 [13344/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.351775 [13376/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.320801 [13408/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.243664 [13440/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.268974 [13472/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.360649 [13504/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.250265 [13536/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.275754 [13568/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.454216 [13600/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.357454 [13632/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.265860 [13664/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.385996 [13696/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.265286 [13728/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.416871 [13760/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.267191 [13792/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.243273 [13824/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.248959 [13856/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.314400 [13888/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.197834 [13920/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.262697 [13952/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.233506 [13984/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.222538 [14016/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.253393 [14048/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.372436 [14080/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.323163 [14112/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.307986 [14144/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.229529 [14176/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.271663 [14208/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.313225 [14240/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.432033 [14272/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.279519 [14304/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.318640 [14336/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.275702 [14368/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.338103 [14400/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.229650 [14432/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.210635 [14464/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.210573 [14496/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.283859 [14528/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.238172 [14560/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.295504 [14592/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.354808 [14624/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.391939 [14656/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.319352 [14688/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.226607 [14720/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.348103 [14752/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.238016 [14784/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.256586 [14816/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.395583 [14848/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.285351 [14880/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.329104 [14912/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.465744 [14944/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.320771 [14976/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.294489 [15008/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.345198 [15040/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.238203 [15072/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.349707 [15104/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.286099 [15136/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.257103 [15168/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.257189 [15200/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.257008 [15232/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.235838 [15264/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.293637 [15296/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.279381 [15328/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.261288 [15360/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.343668 [15392/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.268121 [15424/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.420003 [15456/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.325066 [15488/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.252595 [15520/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.208970 [15552/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.347247 [15584/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.256623 [15616/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.141329 [15648/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.209532 [15680/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.318564 [15712/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.287385 [15744/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.320325 [15776/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.263103 [15808/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.303626 [15840/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.282835 [15872/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.230926 [15904/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.272082 [15936/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.331788 [15968/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.237012 [16000/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.248460 [16032/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.238833 [16064/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.355090 [16096/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.352567 [16128/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.248712 [16160/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.254282 [16192/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.238556 [16224/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.327216 [16256/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.316663 [16288/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.283011 [16320/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.282302 [16352/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.216563 [16384/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.342446 [16416/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.395954 [16448/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.342284 [16480/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.312585 [16512/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.338483 [16544/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.307981 [16576/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.323758 [16608/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.319060 [16640/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.311441 [16672/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.230287 [16704/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.279660 [16736/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.274584 [16768/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.420618 [16800/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.253333 [16832/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.246271 [16864/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.335362 [16896/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.194994 [16928/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.280700 [16960/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.410838 [16992/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.345684 [17024/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.228678 [17056/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.292124 [17088/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.249538 [17120/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.254231 [17152/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.270003 [17184/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.357505 [17216/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.324518 [17248/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.241968 [17280/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.269314 [17312/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.349189 [17344/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.559540 [17376/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.229467 [17408/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.361576 [17440/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.332507 [17472/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.243955 [17504/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.212564 [17536/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.378786 [17568/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.316863 [17600/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.301648 [17632/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.387508 [17664/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.270123 [17696/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.294147 [17728/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.261480 [17760/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.359473 [17792/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.335499 [17824/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.274487 [17856/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.304894 [17888/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.177809 [17920/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.304081 [17952/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.253184 [17984/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.301571 [18016/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.272544 [18048/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.311051 [18080/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.339038 [18112/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.385356 [18144/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.373268 [18176/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.188310 [18208/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.203181 [18240/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.287964 [18272/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.300931 [18304/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.278614 [18336/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.364993 [18368/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.248773 [18400/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.199407 [18432/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.306955 [18464/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.359762 [18496/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.296046 [18528/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.380188 [18560/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.231846 [18592/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.244490 [18624/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.392271 [18656/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.283235 [18688/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.337450 [18720/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.375280 [18752/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.408402 [18784/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.241304 [18816/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.287370 [18848/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.258334 [18880/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.279727 [18912/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.294688 [18944/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.304290 [18976/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.258872 [19008/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.348694 [19040/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.219793 [19072/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.150871 [19104/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.292667 [19136/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.298407 [19168/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.277483 [19200/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.309332 [19232/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.267056 [19264/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.291566 [19296/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.194036 [19328/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.282356 [19360/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.354813 [19392/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.304988 [19424/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.350156 [19456/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.302077 [19488/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.332002 [19520/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.274609 [19552/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.292531 [19584/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.258912 [19616/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.361263 [19648/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.204122 [19680/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.272575 [19712/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.202510 [19744/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.267457 [19776/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.246829 [19808/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.271347 [19840/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.262415 [19872/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.233462 [19904/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.242443 [19936/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.324369 [19968/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.294034 [20000/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.229191 [20032/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.270525 [20064/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.208865 [20096/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.206740 [20128/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.299406 [20160/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.269340 [20192/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.260271 [20224/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.305995 [20256/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.268686 [20288/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.260224 [20320/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.244167 [20352/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.181409 [20384/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.203661 [20416/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.332365 [20448/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.205709 [20480/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.239966 [20512/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.288247 [20544/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.277318 [20576/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.294088 [20608/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.370477 [20640/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.266949 [20672/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.293571 [20704/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.275205 [20736/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.283663 [20768/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.244079 [20800/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.231219 [20832/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.320830 [20864/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.165628 [20896/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.216472 [20928/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.320410 [20960/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.236453 [20992/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.239322 [21024/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.361837 [21056/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.229757 [21088/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.365846 [21120/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.228806 [21152/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.325530 [21184/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.252340 [21216/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.374684 [21248/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.237147 [21280/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.304604 [21312/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.254577 [21344/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.282884 [21376/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.304489 [21408/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.340912 [21440/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.307930 [21472/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.300965 [21504/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.340691 [21536/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.220342 [21568/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.299362 [21600/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.252016 [21632/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.262662 [21664/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.285231 [21696/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.280752 [21728/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.182458 [21760/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.249164 [21792/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.360709 [21824/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.307535 [21856/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.195823 [21888/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.314979 [21920/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.168208 [21952/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.284077 [21984/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.269507 [22016/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.338146 [22048/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.280872 [22080/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.308379 [22112/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.296550 [22144/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.176737 [22176/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.253822 [22208/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.221412 [22240/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.304450 [22272/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.276507 [22304/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.327541 [22336/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.239820 [22368/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.249446 [22400/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.286531 [22432/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.401572 [22464/24872]: 0%| | 0/777 [00:30<?, ?it/s]
loss: 0.401572 [22464/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.375996 [22496/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.234310 [22528/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.218044 [22560/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.246361 [22592/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.155620 [22624/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.200772 [22656/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.231084 [22688/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.205220 [22720/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.242573 [22752/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.209850 [22784/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.236021 [22816/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.279787 [22848/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.147888 [22880/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.215168 [22912/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.298108 [22944/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.347877 [22976/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.267019 [23008/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.265988 [23040/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.355325 [23072/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.235236 [23104/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.363528 [23136/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.343803 [23168/24872]: 90%|█████████ | 702/777 [00:30<00:03, 23.39it/s]
loss: 0.248549 [23200/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.297807 [23232/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.152982 [23264/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.221969 [23296/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.253528 [23328/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.208997 [23360/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.269488 [23392/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.309640 [23424/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.223017 [23456/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.245581 [23488/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.251413 [23520/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.180492 [23552/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.295872 [23584/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.266918 [23616/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.421467 [23648/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.206171 [23680/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.247079 [23712/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.252082 [23744/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.272842 [23776/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.344202 [23808/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.240192 [23840/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.257380 [23872/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.264775 [23904/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.395548 [23936/24872]: 90%|█████████ | 702/777 [00:31<00:03, 23.39it/s]
loss: 0.270442 [23968/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.303282 [24000/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.292070 [24032/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.280198 [24064/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.213080 [24096/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.261187 [24128/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.197120 [24160/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.125185 [24192/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.133198 [24224/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.395609 [24256/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.165101 [24288/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.250410 [24320/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.318926 [24352/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.238304 [24384/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.305084 [24416/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.288512 [24448/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.244254 [24480/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.174634 [24512/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.219351 [24544/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.228837 [24576/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.224965 [24608/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.280613 [24640/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.266388 [24672/24872]: 90%|█████████ | 702/777 [00:32<00:03, 23.39it/s]
loss: 0.287963 [24704/24872]: 90%|█████████ | 702/777 [00:33<00:03, 23.39it/s]
loss: 0.196905 [24736/24872]: 90%|█████████ | 702/777 [00:33<00:03, 23.39it/s]
loss: 0.303016 [24768/24872]: 90%|█████████ | 702/777 [00:33<00:03, 23.39it/s]
loss: 0.263844 [24800/24872]: 90%|█████████ | 702/777 [00:33<00:03, 23.39it/s]
loss: 0.256359 [24832/24872]: 90%|█████████ | 702/777 [00:33<00:03, 23.39it/s]
loss: 0.271165 [24864/24872]: 90%|█████████ | 702/777 [00:33<00:03, 23.39it/s]
loss: 0.163905 [24872/24872]: 90%|█████████ | 702/777 [00:33<00:03, 23.39it/s]
loss: 0.163905 [24872/24872]: : 778it [00:33, 23.40it/s]
Epoch 3, time=66.79s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.273572 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.188469 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.407836 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.331124 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.331691 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.191334 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.318810 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.354601 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.255331 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.217922 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.271434 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.265648 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.237126 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.208607 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.317505 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.318959 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.192692 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.277884 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.291694 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.253489 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.273887 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.261450 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.361013 [ 736/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.283125 [ 768/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.249632 [ 800/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.250333 [ 832/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.398857 [ 864/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.397942 [ 896/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.240074 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.324335 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.226735 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.206477 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.190896 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.283431 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.277476 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.243638 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.382407 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.264359 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.231116 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.197647 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.234303 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.184496 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.216267 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.261125 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.252114 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.293738 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.309655 [ 1504/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.206137 [ 1536/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.226137 [ 1568/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.236457 [ 1600/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.270115 [ 1632/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.211460 [ 1664/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.241190 [ 1696/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.234334 [ 1728/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.280741 [ 1760/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.096198 [ 1792/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.240458 [ 1824/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.105051 [ 1856/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.275285 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.227685 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.283113 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.283504 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.202874 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.222462 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.238822 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.208170 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.311170 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.249274 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.230219 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.327822 [ 2240/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.280102 [ 2272/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.252496 [ 2304/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.339852 [ 2336/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.248592 [ 2368/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.294132 [ 2400/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.388006 [ 2432/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.313610 [ 2464/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.221809 [ 2496/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.228648 [ 2528/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.214829 [ 2560/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.300429 [ 2592/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.197279 [ 2624/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.321347 [ 2656/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.242591 [ 2688/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.271701 [ 2720/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.256308 [ 2752/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.255880 [ 2784/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.314939 [ 2816/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.264994 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.261774 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.206347 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.291539 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.285086 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.225218 [ 3008/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.284172 [ 3040/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.309281 [ 3072/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.214900 [ 3104/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.265518 [ 3136/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.341211 [ 3168/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.232359 [ 3200/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.372777 [ 3232/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.251095 [ 3264/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.213037 [ 3296/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.206806 [ 3328/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.234919 [ 3360/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.278872 [ 3392/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.178378 [ 3424/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.223845 [ 3456/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.240281 [ 3488/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.342332 [ 3520/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.258487 [ 3552/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.330823 [ 3584/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.300710 [ 3616/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.283940 [ 3648/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.278292 [ 3680/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.350073 [ 3712/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.265235 [ 3744/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.254657 [ 3776/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.222700 [ 3808/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.342141 [ 3840/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.168587 [ 3872/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.176097 [ 3904/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.219769 [ 3936/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.301551 [ 3968/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.327521 [ 4000/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.193697 [ 4032/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.243738 [ 4064/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.287562 [ 4096/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.222106 [ 4128/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.255988 [ 4160/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.208690 [ 4192/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.249877 [ 4224/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.210911 [ 4256/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.212361 [ 4288/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.215257 [ 4320/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.195216 [ 4352/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.236663 [ 4384/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.284576 [ 4416/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.321733 [ 4448/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.329775 [ 4480/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.176091 [ 4512/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.276746 [ 4544/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.171182 [ 4576/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.260997 [ 4608/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.233042 [ 4640/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.195902 [ 4672/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.251148 [ 4704/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.210520 [ 4736/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.281737 [ 4768/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.259614 [ 4800/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.242503 [ 4832/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.261921 [ 4864/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.279716 [ 4896/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.352681 [ 4928/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.244579 [ 4960/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.223354 [ 4992/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.253857 [ 5024/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.321742 [ 5056/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.235050 [ 5088/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.356255 [ 5120/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.231523 [ 5152/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.268944 [ 5184/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.184871 [ 5216/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.246646 [ 5248/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.292379 [ 5280/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.312886 [ 5312/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.251336 [ 5344/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.209220 [ 5376/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.398969 [ 5408/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.137093 [ 5440/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.252252 [ 5472/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.247908 [ 5504/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.248529 [ 5536/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.301672 [ 5568/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.261204 [ 5600/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.294529 [ 5632/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.296271 [ 5664/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.319463 [ 5696/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.314465 [ 5728/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.277315 [ 5760/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.326544 [ 5792/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.283117 [ 5824/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.257629 [ 5856/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.207365 [ 5888/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.140372 [ 5920/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.260284 [ 5952/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.237991 [ 5984/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.233061 [ 6016/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.319560 [ 6048/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.197991 [ 6080/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.342851 [ 6112/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.339488 [ 6144/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.194494 [ 6176/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.264691 [ 6208/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.237663 [ 6240/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.298303 [ 6272/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.213383 [ 6304/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.269098 [ 6336/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.247150 [ 6368/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.395573 [ 6400/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.337905 [ 6432/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.198095 [ 6464/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.322550 [ 6496/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.271383 [ 6528/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.289044 [ 6560/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.254907 [ 6592/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.339413 [ 6624/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.223560 [ 6656/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.348204 [ 6688/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.206809 [ 6720/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.213100 [ 6752/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.217453 [ 6784/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.246996 [ 6816/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.304602 [ 6848/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.207277 [ 6880/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.199434 [ 6912/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.314940 [ 6944/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.295344 [ 6976/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.214038 [ 7008/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.468854 [ 7040/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.340363 [ 7072/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.314151 [ 7104/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.192753 [ 7136/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.304694 [ 7168/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.300866 [ 7200/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.284149 [ 7232/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.228430 [ 7264/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.189053 [ 7296/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.238512 [ 7328/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.197196 [ 7360/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.249134 [ 7392/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.228957 [ 7424/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.214979 [ 7456/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.293157 [ 7488/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.203890 [ 7520/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.210700 [ 7552/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.266445 [ 7584/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.243972 [ 7616/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.313819 [ 7648/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.219625 [ 7680/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.338556 [ 7712/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.397001 [ 7744/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.331723 [ 7776/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.199193 [ 7808/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.228757 [ 7840/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.234809 [ 7872/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.272131 [ 7904/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.234618 [ 7936/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.281973 [ 7968/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.290619 [ 8000/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.288735 [ 8032/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.279079 [ 8064/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.195521 [ 8096/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.211422 [ 8128/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.212893 [ 8160/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.270451 [ 8192/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.257488 [ 8224/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.379826 [ 8256/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.357024 [ 8288/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.321522 [ 8320/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.258560 [ 8352/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.323431 [ 8384/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.270983 [ 8416/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.276107 [ 8448/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.278522 [ 8480/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.297930 [ 8512/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.308241 [ 8544/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.237331 [ 8576/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.299183 [ 8608/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.170825 [ 8640/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.411495 [ 8672/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.220983 [ 8704/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.297298 [ 8736/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.410523 [ 8768/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.294442 [ 8800/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.417953 [ 8832/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.209633 [ 8864/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.244515 [ 8896/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.265909 [ 8928/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.364763 [ 8960/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.229473 [ 8992/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.386714 [ 9024/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.301022 [ 9056/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.348925 [ 9088/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.247059 [ 9120/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.254197 [ 9152/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.307902 [ 9184/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.232596 [ 9216/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.264698 [ 9248/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.404395 [ 9280/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.308714 [ 9312/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.372595 [ 9344/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.191940 [ 9376/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.205946 [ 9408/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.254846 [ 9440/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.313991 [ 9472/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.166507 [ 9504/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.328086 [ 9536/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.291517 [ 9568/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.305964 [ 9600/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.293166 [ 9632/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.282083 [ 9664/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.327012 [ 9696/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.323747 [ 9728/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.290056 [ 9760/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.219848 [ 9792/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.280581 [ 9824/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.262730 [ 9856/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.200933 [ 9888/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.171420 [ 9920/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.204607 [ 9952/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.217650 [ 9984/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.420284 [10016/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.283996 [10048/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.292194 [10080/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.175738 [10112/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.253308 [10144/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.292654 [10176/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.202705 [10208/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.268699 [10240/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.335022 [10272/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.382535 [10304/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.232110 [10336/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.194374 [10368/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.247935 [10400/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.218148 [10432/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.234051 [10464/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.238711 [10496/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.429198 [10528/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.221705 [10560/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.245252 [10592/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.301494 [10624/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.200727 [10656/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.247991 [10688/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.259605 [10720/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.216890 [10752/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.297114 [10784/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.274636 [10816/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.235944 [10848/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.226856 [10880/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.209618 [10912/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.217202 [10944/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.222413 [10976/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.319268 [11008/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.234925 [11040/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.254008 [11072/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.216786 [11104/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.181773 [11136/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.183522 [11168/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.162169 [11200/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.193865 [11232/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.289694 [11264/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.192370 [11296/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.251944 [11328/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.216735 [11360/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.265074 [11392/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.282301 [11424/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.276029 [11456/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.207320 [11488/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.166997 [11520/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.260799 [11552/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.263111 [11584/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.277320 [11616/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.357143 [11648/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.232957 [11680/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.179009 [11712/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.237953 [11744/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.265030 [11776/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.356368 [11808/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.115702 [11840/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.223302 [11872/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.433091 [11904/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.207464 [11936/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.210197 [11968/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.198586 [12000/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.209280 [12032/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.207801 [12064/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.257538 [12096/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.368126 [12128/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.243871 [12160/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.264564 [12192/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.412481 [12224/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.155290 [12256/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.290496 [12288/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.221186 [12320/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.250427 [12352/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.273407 [12384/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.163577 [12416/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.397405 [12448/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.245589 [12480/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.283979 [12512/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.223712 [12544/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.211909 [12576/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.265876 [12608/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.181232 [12640/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.306339 [12672/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.270606 [12704/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.186297 [12736/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.282237 [12768/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.277203 [12800/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.266264 [12832/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.213742 [12864/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.190214 [12896/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.180543 [12928/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.239913 [12960/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.271964 [12992/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.215860 [13024/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.278026 [13056/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.208133 [13088/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.330443 [13120/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.262283 [13152/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.336306 [13184/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.204003 [13216/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.235617 [13248/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.268859 [13280/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.276807 [13312/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.287144 [13344/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.272123 [13376/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.354345 [13408/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.207740 [13440/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.202647 [13472/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.271008 [13504/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.203191 [13536/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.246212 [13568/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.358294 [13600/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.286777 [13632/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.230721 [13664/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.275645 [13696/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.196699 [13728/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.377105 [13760/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.208485 [13792/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.186426 [13824/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.203730 [13856/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.255197 [13888/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.168354 [13920/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.202840 [13952/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.207112 [13984/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.185853 [14016/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.223778 [14048/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.308385 [14080/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.272809 [14112/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.239465 [14144/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.194898 [14176/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.230263 [14208/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.327892 [14240/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.318222 [14272/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.255795 [14304/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.256554 [14336/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.245357 [14368/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.305220 [14400/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.226644 [14432/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.198938 [14464/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.186971 [14496/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.235520 [14528/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.226847 [14560/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.230892 [14592/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.305864 [14624/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.382452 [14656/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.333190 [14688/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.156962 [14720/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.315994 [14752/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.221718 [14784/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.265897 [14816/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.334090 [14848/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.245418 [14880/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.253045 [14912/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.370567 [14944/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.281816 [14976/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.253148 [15008/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.313021 [15040/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.217073 [15072/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.310680 [15104/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.230105 [15136/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.178694 [15168/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.182067 [15200/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.214968 [15232/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.225763 [15264/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.245754 [15296/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.260592 [15328/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.206859 [15360/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.335706 [15392/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.224933 [15424/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.374921 [15456/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.282825 [15488/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.237988 [15520/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.177749 [15552/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.358171 [15584/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.196821 [15616/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.162709 [15648/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.230075 [15680/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.297156 [15712/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.296935 [15744/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.347301 [15776/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.280374 [15808/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.263005 [15840/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.263702 [15872/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.209557 [15904/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.247088 [15936/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.315711 [15968/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.218502 [16000/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.241669 [16032/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.160677 [16064/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.321759 [16096/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.277831 [16128/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.224448 [16160/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.264924 [16192/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.208819 [16224/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.265690 [16256/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.258005 [16288/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.227599 [16320/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.264774 [16352/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.195949 [16384/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.284016 [16416/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.284924 [16448/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.322165 [16480/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.268462 [16512/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.305934 [16544/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.226256 [16576/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.282880 [16608/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.274104 [16640/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.295960 [16672/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.226742 [16704/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.295503 [16736/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.287045 [16768/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.321874 [16800/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.243708 [16832/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.136978 [16864/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.246890 [16896/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.168094 [16928/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.264787 [16960/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.338366 [16992/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.289024 [17024/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.191126 [17056/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.243062 [17088/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.212391 [17120/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.222853 [17152/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.186117 [17184/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.315178 [17216/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.250277 [17248/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.206110 [17280/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.235824 [17312/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.284271 [17344/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.457578 [17376/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.152015 [17408/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.325584 [17440/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.238858 [17472/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.226003 [17504/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.188904 [17536/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.345940 [17568/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.275647 [17600/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.272080 [17632/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.358614 [17664/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.243053 [17696/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.258522 [17728/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.239918 [17760/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.297593 [17792/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.362883 [17824/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.245437 [17856/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.298437 [17888/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.137191 [17920/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.223296 [17952/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.241043 [17984/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.232097 [18016/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.244458 [18048/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.273599 [18080/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.287245 [18112/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.352970 [18144/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.299097 [18176/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.181722 [18208/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.188355 [18240/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.223779 [18272/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.263769 [18304/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.227070 [18336/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.302889 [18368/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.173315 [18400/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.164192 [18432/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.249948 [18464/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.283919 [18496/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.235769 [18528/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.309751 [18560/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.217014 [18592/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.196742 [18624/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.338664 [18656/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.228012 [18688/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.269401 [18720/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.279024 [18752/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.329810 [18784/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.191332 [18816/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.200069 [18848/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.221535 [18880/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.244399 [18912/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.229476 [18944/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.270048 [18976/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.210538 [19008/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.319310 [19040/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.202767 [19072/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.123468 [19104/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.264666 [19136/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.261626 [19168/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.221727 [19200/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.265961 [19232/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.198479 [19264/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.228473 [19296/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.144078 [19328/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.242426 [19360/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.318568 [19392/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.284166 [19424/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.253020 [19456/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.278254 [19488/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.257217 [19520/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.229707 [19552/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.248763 [19584/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.227853 [19616/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.330662 [19648/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.209017 [19680/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.281797 [19712/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.160933 [19744/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.229441 [19776/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.249412 [19808/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.202982 [19840/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.258848 [19872/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.202615 [19904/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.217955 [19936/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.276884 [19968/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.222065 [20000/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.188158 [20032/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.256165 [20064/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.190711 [20096/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.178788 [20128/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.250366 [20160/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.233391 [20192/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.233012 [20224/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.282628 [20256/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.269314 [20288/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.246030 [20320/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.232475 [20352/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.153235 [20384/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.174442 [20416/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.292643 [20448/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.145113 [20480/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.205352 [20512/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.266045 [20544/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.221143 [20576/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.276373 [20608/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.305564 [20640/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.233948 [20672/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.287249 [20704/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.259754 [20736/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.260929 [20768/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.222033 [20800/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.198817 [20832/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.328426 [20864/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.171759 [20896/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.212018 [20928/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.288888 [20960/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.193553 [20992/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.224788 [21024/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.274257 [21056/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.223164 [21088/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.342478 [21120/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.203069 [21152/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.246083 [21184/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.219072 [21216/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.351199 [21248/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.211810 [21280/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.240648 [21312/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.249949 [21344/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.221599 [21376/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.266394 [21408/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.305768 [21440/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.278002 [21472/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.302999 [21504/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.308683 [21536/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.212209 [21568/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.282320 [21600/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.180098 [21632/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.226466 [21664/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.255676 [21696/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.260165 [21728/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.180711 [21760/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.222950 [21792/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.386797 [21824/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.296744 [21856/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.192789 [21888/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.319842 [21920/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.195819 [21952/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.263659 [21984/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.264197 [22016/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.344655 [22048/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.275592 [22080/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.309751 [22112/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.286676 [22144/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.158863 [22176/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.221286 [22208/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.212161 [22240/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.267888 [22272/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.228520 [22304/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.312835 [22336/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.224962 [22368/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.223749 [22400/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.246352 [22432/24872]: 0%| | 0/777 [00:30<?, ?it/s]
loss: 0.246352 [22432/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.363382 [22464/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.329814 [22496/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.223499 [22528/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.214133 [22560/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.209479 [22592/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.143487 [22624/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.183908 [22656/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.201975 [22688/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.187572 [22720/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.228796 [22752/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.184587 [22784/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.217871 [22816/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.227331 [22848/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.137081 [22880/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.188964 [22912/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.246081 [22944/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.307684 [22976/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.241574 [23008/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.262530 [23040/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.332299 [23072/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.217284 [23104/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.327700 [23136/24872]: 90%|█████████ | 701/777 [00:30<00:03, 23.35it/s]
loss: 0.299627 [23168/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.213108 [23200/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.239554 [23232/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.139881 [23264/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.185709 [23296/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.227399 [23328/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.198884 [23360/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.237886 [23392/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.275088 [23424/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.206420 [23456/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.212096 [23488/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.213801 [23520/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.159702 [23552/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.258162 [23584/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.234310 [23616/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.387855 [23648/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.180971 [23680/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.208723 [23712/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.251756 [23744/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.246751 [23776/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.330614 [23808/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.204379 [23840/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.259910 [23872/24872]: 90%|█████████ | 701/777 [00:31<00:03, 23.35it/s]
loss: 0.243573 [23904/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.369093 [23936/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.245186 [23968/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.259659 [24000/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.225917 [24032/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.261260 [24064/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.181426 [24096/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.226778 [24128/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.186456 [24160/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.095960 [24192/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.117769 [24224/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.338371 [24256/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.160846 [24288/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.203330 [24320/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.296838 [24352/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.216351 [24384/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.282950 [24416/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.232048 [24448/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.258124 [24480/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.150086 [24512/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.200803 [24544/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.227350 [24576/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.245504 [24608/24872]: 90%|█████████ | 701/777 [00:32<00:03, 23.35it/s]
loss: 0.247224 [24640/24872]: 90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.200322 [24672/24872]: 90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.265170 [24704/24872]: 90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.164157 [24736/24872]: 90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.242595 [24768/24872]: 90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.221884 [24800/24872]: 90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.191837 [24832/24872]: 90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.215214 [24864/24872]: 90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.147651 [24872/24872]: 90%|█████████ | 701/777 [00:33<00:03, 23.35it/s]
loss: 0.147651 [24872/24872]: : 778it [00:33, 23.33it/s]
Epoch 4, time=100.14s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.225064 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.136552 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.333158 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.278143 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.253046 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.172922 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.302867 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.320890 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.236604 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.176873 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.235216 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.231853 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.195195 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.210284 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.265932 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.276758 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.162968 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.249925 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.241108 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.234733 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.238388 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.214867 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.323487 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.243315 [ 768/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.216930 [ 800/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.217194 [ 832/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.337830 [ 864/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.299683 [ 896/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.209866 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.273664 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.205417 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.182309 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.152186 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.258314 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.253187 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.224316 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.344423 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.218989 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.202610 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.168436 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.220571 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.175758 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.218386 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.237559 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.229563 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.267966 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.296402 [ 1504/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.188766 [ 1536/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.194769 [ 1568/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.226278 [ 1600/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.275120 [ 1632/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.183199 [ 1664/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.263105 [ 1696/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.217710 [ 1728/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.244229 [ 1760/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.091992 [ 1792/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.194243 [ 1824/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.087504 [ 1856/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.247673 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.201865 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.263492 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.242123 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.152112 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.216380 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.231974 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.175942 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.271504 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.221851 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.181080 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.270132 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.276284 [ 2272/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.225739 [ 2304/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.304345 [ 2336/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.222707 [ 2368/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.242858 [ 2400/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.380534 [ 2432/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.319709 [ 2464/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.172106 [ 2496/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.209302 [ 2528/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.197281 [ 2560/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.282368 [ 2592/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.191886 [ 2624/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.299582 [ 2656/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.231179 [ 2688/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.251856 [ 2720/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.213817 [ 2752/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.220032 [ 2784/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.331448 [ 2816/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.236974 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.246620 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.197573 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.272486 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.269950 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.213922 [ 3008/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.267964 [ 3040/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.262188 [ 3072/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.216033 [ 3104/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.221832 [ 3136/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.300242 [ 3168/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.228353 [ 3200/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.309151 [ 3232/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.212604 [ 3264/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.195725 [ 3296/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.179730 [ 3328/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.237885 [ 3360/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.235524 [ 3392/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.147889 [ 3424/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.172151 [ 3456/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.185193 [ 3488/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.320140 [ 3520/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.217085 [ 3552/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.296217 [ 3584/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.307890 [ 3616/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.252144 [ 3648/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.256154 [ 3680/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.325900 [ 3712/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.216590 [ 3744/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.213185 [ 3776/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.215844 [ 3808/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.251009 [ 3840/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.148652 [ 3872/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.164008 [ 3904/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.185335 [ 3936/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.259191 [ 3968/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.325066 [ 4000/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.166882 [ 4032/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.240109 [ 4064/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.258513 [ 4096/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.237257 [ 4128/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.231237 [ 4160/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.210958 [ 4192/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.212162 [ 4224/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.194567 [ 4256/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.189544 [ 4288/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.190763 [ 4320/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.175247 [ 4352/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.212584 [ 4384/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.266061 [ 4416/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.303229 [ 4448/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.295310 [ 4480/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.118712 [ 4512/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.251783 [ 4544/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.134287 [ 4576/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.238052 [ 4608/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.200040 [ 4640/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.185387 [ 4672/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.199387 [ 4704/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.179572 [ 4736/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.245088 [ 4768/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.210381 [ 4800/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.215327 [ 4832/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.193374 [ 4864/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.245005 [ 4896/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.295564 [ 4928/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.177954 [ 4960/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.180004 [ 4992/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.301845 [ 5024/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.268975 [ 5056/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.205195 [ 5088/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.289760 [ 5120/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.175796 [ 5152/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.243722 [ 5184/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.176407 [ 5216/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.210246 [ 5248/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.221471 [ 5280/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.283230 [ 5312/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.201185 [ 5344/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.189196 [ 5376/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.366072 [ 5408/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.116251 [ 5440/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.230446 [ 5472/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.219057 [ 5504/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.222889 [ 5536/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.286759 [ 5568/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.232828 [ 5600/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.229216 [ 5632/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.257760 [ 5664/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.295571 [ 5696/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.262012 [ 5728/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.252495 [ 5760/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.324385 [ 5792/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.240432 [ 5824/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.220969 [ 5856/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.209518 [ 5888/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.119607 [ 5920/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.222978 [ 5952/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.246463 [ 5984/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.204906 [ 6016/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.264401 [ 6048/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.178326 [ 6080/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.300517 [ 6112/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.290859 [ 6144/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.211712 [ 6176/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.212239 [ 6208/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.199765 [ 6240/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.274697 [ 6272/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.198698 [ 6304/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.202895 [ 6336/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.220357 [ 6368/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.363507 [ 6400/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.264777 [ 6432/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.166431 [ 6464/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.318165 [ 6496/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.240104 [ 6528/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.260504 [ 6560/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.222532 [ 6592/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.272825 [ 6624/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.166810 [ 6656/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.291242 [ 6688/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.170848 [ 6720/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.187830 [ 6752/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.188253 [ 6784/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.249050 [ 6816/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.261308 [ 6848/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.198117 [ 6880/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.172191 [ 6912/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.238810 [ 6944/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.250522 [ 6976/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.171375 [ 7008/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.355831 [ 7040/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.297607 [ 7072/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.260350 [ 7104/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.150260 [ 7136/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.250208 [ 7168/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.246276 [ 7200/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.230105 [ 7232/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.191060 [ 7264/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.175745 [ 7296/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.229865 [ 7328/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.161633 [ 7360/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.211885 [ 7392/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.194219 [ 7424/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.194183 [ 7456/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.275363 [ 7488/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.169492 [ 7520/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.184189 [ 7552/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.238641 [ 7584/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.211858 [ 7616/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.312037 [ 7648/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.165435 [ 7680/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.262826 [ 7712/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.337032 [ 7744/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.294836 [ 7776/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.176381 [ 7808/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.206169 [ 7840/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.205781 [ 7872/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.251526 [ 7904/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.201670 [ 7936/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.249894 [ 7968/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.257805 [ 8000/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.222730 [ 8032/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.223466 [ 8064/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.183956 [ 8096/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.175604 [ 8128/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.199685 [ 8160/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.241012 [ 8192/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.224001 [ 8224/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.308417 [ 8256/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.295836 [ 8288/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.227018 [ 8320/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.187582 [ 8352/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.235485 [ 8384/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.234580 [ 8416/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.194697 [ 8448/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.212474 [ 8480/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.273855 [ 8512/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.256954 [ 8544/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.173242 [ 8576/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.181074 [ 8608/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.145106 [ 8640/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.372006 [ 8672/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.177506 [ 8704/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.230710 [ 8736/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.325025 [ 8768/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.262012 [ 8800/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.371396 [ 8832/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.172176 [ 8864/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.175979 [ 8896/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.193922 [ 8928/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.305125 [ 8960/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.189572 [ 8992/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.331717 [ 9024/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.247526 [ 9056/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.289984 [ 9088/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.224058 [ 9120/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.202465 [ 9152/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.282182 [ 9184/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.192889 [ 9216/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.210926 [ 9248/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.356006 [ 9280/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.202445 [ 9312/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.330042 [ 9344/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.165300 [ 9376/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.185112 [ 9408/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.225576 [ 9440/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.229516 [ 9472/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.138306 [ 9504/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.252050 [ 9536/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.191591 [ 9568/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.281465 [ 9600/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.243169 [ 9632/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.216656 [ 9664/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.252793 [ 9696/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.290772 [ 9728/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.233957 [ 9760/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.181779 [ 9792/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.216354 [ 9824/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.261251 [ 9856/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.173068 [ 9888/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.146514 [ 9920/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.197001 [ 9952/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.275393 [ 9984/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.369247 [10016/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.248549 [10048/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.316464 [10080/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.199685 [10112/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.289417 [10144/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.280280 [10176/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.193319 [10208/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.242984 [10240/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.314446 [10272/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.310268 [10304/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.240397 [10336/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.172537 [10368/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.230959 [10400/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.256914 [10432/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.219239 [10464/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.239867 [10496/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.427339 [10528/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.242992 [10560/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.266945 [10592/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.298757 [10624/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.186330 [10656/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.248838 [10688/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.223893 [10720/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.212852 [10752/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.297877 [10784/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.256316 [10816/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.214236 [10848/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.206240 [10880/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.197754 [10912/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.193867 [10944/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.227642 [10976/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.251706 [11008/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.239931 [11040/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.254701 [11072/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.212363 [11104/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.151641 [11136/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.182575 [11168/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.146739 [11200/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.166056 [11232/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.241778 [11264/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.192370 [11296/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.252551 [11328/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.192188 [11360/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.240467 [11392/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.229124 [11424/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.251792 [11456/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.229289 [11488/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.148862 [11520/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.234332 [11552/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.278471 [11584/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.254570 [11616/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.355939 [11648/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.197729 [11680/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.179394 [11712/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.222581 [11744/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.235912 [11776/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.281726 [11808/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.087768 [11840/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.215861 [11872/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.454509 [11904/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.191095 [11936/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.211583 [11968/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.195771 [12000/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.208139 [12032/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.210211 [12064/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.237656 [12096/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.359629 [12128/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.224473 [12160/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.223404 [12192/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.348965 [12224/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.135595 [12256/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.268808 [12288/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.200096 [12320/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.212606 [12352/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.250187 [12384/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.143604 [12416/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.375093 [12448/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.240118 [12480/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.320097 [12512/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.204343 [12544/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.213317 [12576/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.234849 [12608/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.176068 [12640/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.242354 [12672/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.243560 [12704/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.165244 [12736/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.282723 [12768/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.263644 [12800/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.224844 [12832/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.206866 [12864/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.179887 [12896/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.190075 [12928/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.254655 [12960/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.278653 [12992/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.234420 [13024/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.241858 [13056/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.201370 [13088/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.274970 [13120/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.232508 [13152/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.312425 [13184/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.190932 [13216/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.223148 [13248/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.268312 [13280/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.274699 [13312/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.218555 [13344/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.210219 [13376/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.278317 [13408/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.172900 [13440/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.208903 [13472/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.267471 [13504/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.185238 [13536/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.249910 [13568/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.361337 [13600/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.263208 [13632/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.212039 [13664/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.229422 [13696/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.186608 [13728/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.350802 [13760/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.254277 [13792/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.167396 [13824/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.211655 [13856/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.213180 [13888/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.153507 [13920/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.182727 [13952/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.181972 [13984/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.193631 [14016/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.233035 [14048/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.270484 [14080/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.263787 [14112/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.275967 [14144/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.204472 [14176/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.244301 [14208/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.274065 [14240/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.291315 [14272/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.186465 [14304/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.244679 [14336/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.245749 [14368/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.302099 [14400/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.195844 [14432/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.176850 [14464/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.191056 [14496/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.200387 [14528/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.196298 [14560/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.226534 [14592/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.286171 [14624/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.343737 [14656/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.284630 [14688/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.181573 [14720/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.316051 [14752/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.196693 [14784/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.157947 [14816/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.313496 [14848/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.207265 [14880/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.221362 [14912/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.290522 [14944/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.254525 [14976/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.231578 [15008/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.282798 [15040/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.220819 [15072/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.268902 [15104/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.316846 [15136/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.152568 [15168/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.151438 [15200/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.177338 [15232/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.223574 [15264/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.241237 [15296/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.237284 [15328/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.181492 [15360/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.261991 [15392/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.180840 [15424/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.339537 [15456/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.240479 [15488/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.214973 [15520/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.150028 [15552/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.290509 [15584/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.202511 [15616/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.115604 [15648/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.206651 [15680/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.251419 [15712/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.228216 [15744/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.259691 [15776/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.167834 [15808/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.253599 [15840/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.258790 [15872/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.150286 [15904/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.283835 [15936/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.284697 [15968/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.191219 [16000/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.186934 [16032/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.146114 [16064/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.267557 [16096/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.269356 [16128/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.203740 [16160/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.211325 [16192/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.203750 [16224/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.242302 [16256/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.239374 [16288/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.231989 [16320/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.254803 [16352/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.174336 [16384/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.280364 [16416/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.268866 [16448/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.302574 [16480/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.249386 [16512/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.306974 [16544/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.209248 [16576/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.243772 [16608/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.275629 [16640/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.225044 [16672/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.170345 [16704/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.285288 [16736/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.244730 [16768/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.322454 [16800/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.198148 [16832/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.122061 [16864/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.237759 [16896/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.155540 [16928/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.285933 [16960/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.314901 [16992/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.257553 [17024/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.175577 [17056/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.217225 [17088/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.208601 [17120/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.218725 [17152/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.157402 [17184/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.265713 [17216/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.228527 [17248/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.197427 [17280/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.207951 [17312/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.247018 [17344/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.396110 [17376/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.143959 [17408/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.277292 [17440/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.239649 [17472/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.211773 [17504/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.156791 [17536/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.331445 [17568/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.236640 [17600/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.258211 [17632/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.352614 [17664/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.220766 [17696/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.220263 [17728/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.218054 [17760/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.281000 [17792/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.352699 [17824/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.191900 [17856/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.263003 [17888/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.122123 [17920/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.199862 [17952/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.227741 [17984/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.231192 [18016/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.226815 [18048/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.241602 [18080/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.255648 [18112/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.323675 [18144/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.273112 [18176/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.179402 [18208/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.163053 [18240/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.223045 [18272/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.245984 [18304/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.197027 [18336/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.262179 [18368/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.163248 [18400/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.158097 [18432/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.227085 [18464/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.267345 [18496/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.241007 [18528/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.262142 [18560/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.202661 [18592/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.191359 [18624/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.307808 [18656/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.217092 [18688/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.280635 [18720/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.270670 [18752/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.311000 [18784/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.218275 [18816/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.218988 [18848/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.209020 [18880/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.254079 [18912/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.240454 [18944/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.239569 [18976/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.222509 [19008/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.313369 [19040/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.194354 [19072/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.126941 [19104/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.272707 [19136/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.223891 [19168/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.211358 [19200/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.250156 [19232/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.195554 [19264/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.211350 [19296/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.149511 [19328/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.243401 [19360/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.287456 [19392/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.272917 [19424/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.236116 [19456/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.264273 [19488/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.239709 [19520/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.227804 [19552/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.221280 [19584/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.223345 [19616/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.283347 [19648/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.168507 [19680/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.230906 [19712/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.144096 [19744/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.200659 [19776/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.242786 [19808/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.180506 [19840/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.229905 [19872/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.177948 [19904/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.209555 [19936/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.251126 [19968/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.211062 [20000/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.180706 [20032/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.265352 [20064/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.166337 [20096/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.167199 [20128/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.248268 [20160/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.216469 [20192/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.222491 [20224/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.313198 [20256/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.269049 [20288/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.219019 [20320/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.247200 [20352/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.167697 [20384/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.142532 [20416/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.297232 [20448/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.149801 [20480/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.177135 [20512/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.231956 [20544/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.246206 [20576/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.252988 [20608/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.286760 [20640/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.224099 [20672/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.260810 [20704/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.235292 [20736/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.249934 [20768/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.182684 [20800/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.184644 [20832/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.267317 [20864/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.152782 [20896/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.187585 [20928/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.281423 [20960/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.158600 [20992/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.204567 [21024/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.244372 [21056/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.197369 [21088/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.311181 [21120/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.184714 [21152/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.225025 [21184/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.202497 [21216/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.317596 [21248/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.193022 [21280/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.232298 [21312/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.261989 [21344/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.206769 [21376/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.250790 [21408/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.289563 [21440/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.262204 [21472/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.265548 [21504/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.301907 [21536/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.202928 [21568/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.258937 [21600/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.183991 [21632/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.212428 [21664/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.202083 [21696/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.246002 [21728/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.163383 [21760/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.199054 [21792/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.325951 [21824/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.261402 [21856/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.167911 [21888/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.282795 [21920/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.140088 [21952/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.247733 [21984/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.244786 [22016/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.324357 [22048/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.250041 [22080/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.312410 [22112/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.244743 [22144/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.139498 [22176/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.192547 [22208/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.192119 [22240/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.245313 [22272/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.220430 [22304/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.291756 [22336/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.215050 [22368/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.219208 [22400/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.230280 [22432/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.336175 [22464/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.315494 [22496/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.227042 [22528/24872]: 0%| | 0/777 [00:30<?, ?it/s]
loss: 0.227042 [22528/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.206393 [22560/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.183595 [22592/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.124736 [22624/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.179131 [22656/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.194689 [22688/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.166881 [22720/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.233766 [22752/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.173770 [22784/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.225703 [22816/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.184712 [22848/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.126143 [22880/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.168810 [22912/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.227040 [22944/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.297863 [22976/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.218347 [23008/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.229356 [23040/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.265692 [23072/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.199016 [23104/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.313258 [23136/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.250108 [23168/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.204867 [23200/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.205961 [23232/24872]: 91%|█████████ | 704/777 [00:30<00:03, 23.43it/s]
loss: 0.109376 [23264/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.163363 [23296/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.202983 [23328/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.198397 [23360/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.229026 [23392/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.248102 [23424/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.191411 [23456/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.168108 [23488/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.205255 [23520/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.139436 [23552/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.224699 [23584/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.215581 [23616/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.336249 [23648/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.173522 [23680/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.183959 [23712/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.268512 [23744/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.238595 [23776/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.322949 [23808/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.183946 [23840/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.218420 [23872/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.231957 [23904/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.320227 [23936/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.234192 [23968/24872]: 91%|█████████ | 704/777 [00:31<00:03, 23.43it/s]
loss: 0.214343 [24000/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.203002 [24032/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.250765 [24064/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.184166 [24096/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.208170 [24128/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.150018 [24160/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.099855 [24192/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.110312 [24224/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.307379 [24256/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.154112 [24288/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.211317 [24320/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.259524 [24352/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.208296 [24384/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.282749 [24416/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.236840 [24448/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.272361 [24480/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.158533 [24512/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.247574 [24544/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.186421 [24576/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.184364 [24608/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.284857 [24640/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.202164 [24672/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.228512 [24704/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.202060 [24736/24872]: 91%|█████████ | 704/777 [00:32<00:03, 23.43it/s]
loss: 0.304767 [24768/24872]: 91%|█████████ | 704/777 [00:33<00:03, 23.43it/s]
loss: 0.210959 [24800/24872]: 91%|█████████ | 704/777 [00:33<00:03, 23.43it/s]
loss: 0.197831 [24832/24872]: 91%|█████████ | 704/777 [00:33<00:03, 23.43it/s]
loss: 0.235669 [24864/24872]: 91%|█████████ | 704/777 [00:33<00:03, 23.43it/s]
loss: 0.165323 [24872/24872]: 91%|█████████ | 704/777 [00:33<00:03, 23.43it/s]
loss: 0.165323 [24872/24872]: : 778it [00:33, 23.44it/s]
Epoch 5, time=133.34s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.215962 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.181025 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.356721 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.294840 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.267851 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.236777 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.255681 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.368363 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.218348 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.178353 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.232199 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.247413 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.192254 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.197739 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.297298 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.308295 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.181816 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.253343 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.278672 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.235063 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.222367 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.241854 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.301622 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.207183 [ 768/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.237348 [ 800/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.260142 [ 832/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.318137 [ 864/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.312979 [ 896/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.212119 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.262888 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.197865 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.188460 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.146935 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.248907 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.246469 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.164940 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.326878 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.202975 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.196983 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.151887 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.213923 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.167031 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.192473 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.212002 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.218445 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.239584 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.268312 [ 1504/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.168538 [ 1536/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.192694 [ 1568/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.211805 [ 1600/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.256871 [ 1632/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.170481 [ 1664/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.231860 [ 1696/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.189357 [ 1728/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.256134 [ 1760/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.086396 [ 1792/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.187405 [ 1824/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.096029 [ 1856/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.231108 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.183861 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.248100 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.243519 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.150933 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.214766 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.224143 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.158316 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.247253 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.199461 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.168934 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.244627 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.241449 [ 2272/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.209790 [ 2304/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.284796 [ 2336/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.212722 [ 2368/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.223148 [ 2400/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.320967 [ 2432/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.261504 [ 2464/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.148595 [ 2496/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.185230 [ 2528/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.169083 [ 2560/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.247116 [ 2592/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.156693 [ 2624/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.275082 [ 2656/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.210185 [ 2688/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.193891 [ 2720/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.179966 [ 2752/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.199770 [ 2784/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.285528 [ 2816/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.205632 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.224542 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.177005 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.244324 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.252665 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.189786 [ 3008/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.234965 [ 3040/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.203833 [ 3072/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.207197 [ 3104/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.192626 [ 3136/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.299414 [ 3168/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.211292 [ 3200/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.261442 [ 3232/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.194004 [ 3264/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.166943 [ 3296/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.161015 [ 3328/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.186879 [ 3360/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.206798 [ 3392/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.129879 [ 3424/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.162915 [ 3456/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.157637 [ 3488/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.267228 [ 3520/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.208340 [ 3552/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.255768 [ 3584/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.237510 [ 3616/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.216429 [ 3648/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.224508 [ 3680/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.320360 [ 3712/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.207841 [ 3744/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.199437 [ 3776/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.190387 [ 3808/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.226493 [ 3840/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.128884 [ 3872/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.143828 [ 3904/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.147235 [ 3936/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.230967 [ 3968/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.281604 [ 4000/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.162677 [ 4032/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.199543 [ 4064/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.237985 [ 4096/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.217102 [ 4128/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.177520 [ 4160/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.192549 [ 4192/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.191483 [ 4224/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.187687 [ 4256/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.175069 [ 4288/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.172920 [ 4320/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.151591 [ 4352/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.195770 [ 4384/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.239541 [ 4416/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.278662 [ 4448/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.271011 [ 4480/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.107489 [ 4512/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.249002 [ 4544/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.120460 [ 4576/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.220939 [ 4608/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.176157 [ 4640/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.170008 [ 4672/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.178885 [ 4704/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.174408 [ 4736/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.226473 [ 4768/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.187414 [ 4800/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.191332 [ 4832/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.174658 [ 4864/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.208260 [ 4896/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.280138 [ 4928/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.168676 [ 4960/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.153139 [ 4992/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.248721 [ 5024/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.247742 [ 5056/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.199580 [ 5088/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.279947 [ 5120/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.144268 [ 5152/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.226417 [ 5184/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.168429 [ 5216/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.205069 [ 5248/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.200517 [ 5280/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.257504 [ 5312/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.187593 [ 5344/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.186274 [ 5376/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.350888 [ 5408/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.111499 [ 5440/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.224220 [ 5472/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.211012 [ 5504/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.201021 [ 5536/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.267515 [ 5568/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.214169 [ 5600/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.214126 [ 5632/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.250257 [ 5664/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.282870 [ 5696/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.236324 [ 5728/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.247099 [ 5760/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.327622 [ 5792/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.223935 [ 5824/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.228436 [ 5856/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.204782 [ 5888/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.100193 [ 5920/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.231530 [ 5952/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.261360 [ 5984/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.197429 [ 6016/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.228075 [ 6048/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.166965 [ 6080/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.280025 [ 6112/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.263532 [ 6144/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.174938 [ 6176/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.200099 [ 6208/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.199101 [ 6240/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.272097 [ 6272/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.188395 [ 6304/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.198986 [ 6336/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.200679 [ 6368/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.325152 [ 6400/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.238906 [ 6432/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.178499 [ 6464/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.272805 [ 6496/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.212777 [ 6528/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.244678 [ 6560/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.207667 [ 6592/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.264171 [ 6624/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.156266 [ 6656/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.294802 [ 6688/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.169881 [ 6720/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.182922 [ 6752/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.153522 [ 6784/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.229359 [ 6816/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.242227 [ 6848/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.187748 [ 6880/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.160036 [ 6912/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.198663 [ 6944/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.187625 [ 6976/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.163559 [ 7008/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.316015 [ 7040/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.322563 [ 7072/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.256024 [ 7104/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.170435 [ 7136/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.222564 [ 7168/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.221267 [ 7200/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.200093 [ 7232/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.163029 [ 7264/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.187030 [ 7296/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.195076 [ 7328/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.154242 [ 7360/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.203852 [ 7392/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.194619 [ 7424/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.169556 [ 7456/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.269034 [ 7488/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.159256 [ 7520/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.176068 [ 7552/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.224596 [ 7584/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.205969 [ 7616/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.238413 [ 7648/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.143515 [ 7680/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.255574 [ 7712/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.303603 [ 7744/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.276561 [ 7776/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.205842 [ 7808/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.186051 [ 7840/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.189542 [ 7872/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.229085 [ 7904/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.183733 [ 7936/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.230332 [ 7968/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.223473 [ 8000/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.191017 [ 8032/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.220434 [ 8064/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.175796 [ 8096/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.189732 [ 8128/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.143082 [ 8160/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.232209 [ 8192/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.215203 [ 8224/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.262843 [ 8256/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.294491 [ 8288/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.196199 [ 8320/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.165925 [ 8352/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.218154 [ 8384/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.206039 [ 8416/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.195147 [ 8448/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.169634 [ 8480/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.249369 [ 8512/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.226135 [ 8544/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.159088 [ 8576/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.172688 [ 8608/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.133747 [ 8640/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.293467 [ 8672/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.158104 [ 8704/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.201479 [ 8736/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.279812 [ 8768/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.237360 [ 8800/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.347457 [ 8832/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.148175 [ 8864/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.166439 [ 8896/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.185748 [ 8928/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.276973 [ 8960/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.175227 [ 8992/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.309622 [ 9024/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.231327 [ 9056/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.267635 [ 9088/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.195569 [ 9120/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.162749 [ 9152/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.232958 [ 9184/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.176982 [ 9216/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.197249 [ 9248/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.316655 [ 9280/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.185667 [ 9312/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.280085 [ 9344/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.175219 [ 9376/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.179675 [ 9408/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.211954 [ 9440/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.208965 [ 9472/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.153314 [ 9504/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.230654 [ 9536/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.160090 [ 9568/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.247533 [ 9600/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.221331 [ 9632/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.193209 [ 9664/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.221482 [ 9696/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.270110 [ 9728/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.197794 [ 9760/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.150164 [ 9792/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.212460 [ 9824/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.242396 [ 9856/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.143665 [ 9888/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.146043 [ 9920/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.176859 [ 9952/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.208277 [ 9984/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.325269 [10016/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.230881 [10048/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.264469 [10080/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.185770 [10112/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.238098 [10144/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.233706 [10176/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.172749 [10208/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.231487 [10240/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.278571 [10272/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.260879 [10304/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.209669 [10336/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.155585 [10368/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.186773 [10400/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.217960 [10432/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.214407 [10464/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.203074 [10496/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.370068 [10528/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.200477 [10560/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.226893 [10592/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.273090 [10624/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.157040 [10656/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.231347 [10688/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.206187 [10720/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.185797 [10752/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.306362 [10784/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.222995 [10816/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.195010 [10848/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.202757 [10880/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.176474 [10912/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.208237 [10944/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.190543 [10976/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.233590 [11008/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.224886 [11040/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.226051 [11072/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.179940 [11104/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.198592 [11136/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.173285 [11168/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.147207 [11200/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.166801 [11232/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.226723 [11264/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.183065 [11296/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.251619 [11328/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.193544 [11360/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.208774 [11392/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.212962 [11424/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.232359 [11456/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.202618 [11488/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.135371 [11520/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.240221 [11552/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.237293 [11584/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.252126 [11616/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.343867 [11648/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.205522 [11680/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.177823 [11712/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.232268 [11744/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.253111 [11776/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.266863 [11808/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.092591 [11840/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.230477 [11872/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.426498 [11904/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.182091 [11936/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.190575 [11968/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.201733 [12000/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.224198 [12032/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.223559 [12064/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.236032 [12096/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.313222 [12128/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.248690 [12160/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.228542 [12192/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.328068 [12224/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.178522 [12256/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.238319 [12288/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.204765 [12320/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.213019 [12352/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.265311 [12384/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.135121 [12416/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.403498 [12448/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.233080 [12480/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.250566 [12512/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.215257 [12544/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.194655 [12576/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.215145 [12608/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.189201 [12640/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.214352 [12672/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.218361 [12704/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.169780 [12736/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.258674 [12768/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.241576 [12800/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.208135 [12832/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.195104 [12864/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.159011 [12896/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.157909 [12928/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.243725 [12960/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.266548 [12992/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.209956 [13024/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.229052 [13056/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.193214 [13088/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.245356 [13120/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.205079 [13152/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.299684 [13184/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.164837 [13216/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.206979 [13248/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.245810 [13280/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.226149 [13312/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.205419 [13344/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.220311 [13376/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.244809 [13408/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.148553 [13440/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.192339 [13472/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.236882 [13504/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.164591 [13536/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.255424 [13568/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.338008 [13600/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.237168 [13632/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.182205 [13664/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.213526 [13696/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.165888 [13728/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.282815 [13760/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.213016 [13792/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.148018 [13824/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.178340 [13856/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.205742 [13888/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.129562 [13920/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.155975 [13952/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.165469 [13984/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.223440 [14016/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.206937 [14048/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.229159 [14080/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.225689 [14112/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.203195 [14144/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.179022 [14176/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.227563 [14208/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.246815 [14240/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.263784 [14272/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.199501 [14304/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.186328 [14336/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.215941 [14368/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.329740 [14400/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.185012 [14432/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.181909 [14464/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.179730 [14496/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.181625 [14528/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.187967 [14560/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.214423 [14592/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.226985 [14624/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.324201 [14656/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.268321 [14688/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.135660 [14720/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.303257 [14752/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.160216 [14784/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.139426 [14816/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.281561 [14848/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.192813 [14880/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.176972 [14912/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.245060 [14944/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.234582 [14976/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.195679 [15008/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.241845 [15040/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.176026 [15072/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.239251 [15104/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.254083 [15136/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.158115 [15168/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.137316 [15200/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.156153 [15232/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.219628 [15264/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.195583 [15296/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.236158 [15328/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.165437 [15360/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.213545 [15392/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.158570 [15424/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.315187 [15456/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.203581 [15488/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.220020 [15520/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.128313 [15552/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.264935 [15584/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.189827 [15616/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.091312 [15648/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.173918 [15680/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.201651 [15712/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.211354 [15744/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.234586 [15776/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.147423 [15808/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.221853 [15840/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.241709 [15872/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.132513 [15904/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.264586 [15936/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.273397 [15968/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.180477 [16000/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.200498 [16032/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.127940 [16064/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.239798 [16096/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.277956 [16128/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.194145 [16160/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.248276 [16192/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.246966 [16224/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.227541 [16256/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.257113 [16288/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.227303 [16320/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.281393 [16352/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.185467 [16384/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.301276 [16416/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.267183 [16448/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.314866 [16480/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.216750 [16512/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.300972 [16544/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.210733 [16576/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.215267 [16608/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.252403 [16640/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.209285 [16672/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.171102 [16704/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.273486 [16736/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.248432 [16768/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.357491 [16800/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.222298 [16832/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.109463 [16864/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.239561 [16896/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.152241 [16928/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.269413 [16960/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.290491 [16992/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.250447 [17024/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.160690 [17056/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.186781 [17088/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.192514 [17120/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.187287 [17152/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.152121 [17184/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.223558 [17216/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.206994 [17248/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.176729 [17280/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.222679 [17312/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.226296 [17344/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.357538 [17376/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.123827 [17408/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.236466 [17440/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.192820 [17472/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.197101 [17504/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.135884 [17536/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.287584 [17568/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.188932 [17600/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.232409 [17632/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.259761 [17664/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.220272 [17696/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.201059 [17728/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.209227 [17760/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.247421 [17792/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.255972 [17824/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.197421 [17856/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.257303 [17888/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.112479 [17920/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.191409 [17952/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.151274 [17984/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.213042 [18016/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.251040 [18048/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.200591 [18080/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.240675 [18112/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.288123 [18144/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.253383 [18176/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.142197 [18208/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.167114 [18240/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.187088 [18272/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.228075 [18304/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.188373 [18336/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.246186 [18368/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.176973 [18400/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.151154 [18432/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.195201 [18464/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.273568 [18496/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.198805 [18528/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.228948 [18560/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.200383 [18592/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.165847 [18624/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.193924 [18656/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.175335 [18688/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.215149 [18720/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.190066 [18752/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.278645 [18784/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.136560 [18816/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.161417 [18848/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.200890 [18880/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.193459 [18912/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.153290 [18944/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.201215 [18976/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.172471 [19008/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.238663 [19040/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.168845 [19072/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.116669 [19104/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.245297 [19136/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.212202 [19168/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.173239 [19200/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.203303 [19232/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.169377 [19264/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.176766 [19296/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.124457 [19328/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.168653 [19360/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.270061 [19392/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.222892 [19424/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.181891 [19456/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.207197 [19488/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.166915 [19520/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.213938 [19552/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.183834 [19584/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.190496 [19616/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.262403 [19648/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.134143 [19680/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.227174 [19712/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.120829 [19744/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.164969 [19776/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.193118 [19808/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.165269 [19840/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.196786 [19872/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.181845 [19904/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.170833 [19936/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.213956 [19968/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.209582 [20000/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.161123 [20032/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.195003 [20064/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.133550 [20096/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.120775 [20128/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.189265 [20160/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.165089 [20192/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.191678 [20224/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.232535 [20256/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.209430 [20288/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.221583 [20320/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.185288 [20352/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.115240 [20384/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.115610 [20416/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.212768 [20448/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.108299 [20480/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.189118 [20512/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.159148 [20544/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.156078 [20576/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.271207 [20608/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.225071 [20640/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.214637 [20672/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.234361 [20704/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.209486 [20736/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.225043 [20768/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.207766 [20800/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.168573 [20832/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.235499 [20864/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.195630 [20896/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.169156 [20928/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.234921 [20960/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.158728 [20992/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.189451 [21024/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.188152 [21056/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.167643 [21088/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.297248 [21120/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.178461 [21152/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.190312 [21184/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.182262 [21216/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.311035 [21248/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.191354 [21280/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.196491 [21312/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.202553 [21344/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.176777 [21376/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.215598 [21408/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.198906 [21440/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.242312 [21472/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.190209 [21504/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.231886 [21536/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.185837 [21568/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.223620 [21600/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.164325 [21632/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.218866 [21664/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.189554 [21696/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.193158 [21728/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.173062 [21760/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.212538 [21792/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.295970 [21824/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.241928 [21856/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.135749 [21888/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.252492 [21920/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.119436 [21952/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.235623 [21984/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.197065 [22016/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.250227 [22048/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.225321 [22080/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.238068 [22112/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.196819 [22144/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.135339 [22176/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.182736 [22208/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.167340 [22240/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.240033 [22272/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.175500 [22304/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.258435 [22336/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.216147 [22368/24872]: 0%| | 0/777 [00:30<?, ?it/s]
loss: 0.216147 [22368/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.231576 [22400/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.276778 [22432/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.338171 [22464/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.276720 [22496/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.238136 [22528/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.208543 [22560/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.142007 [22592/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.122726 [22624/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.213651 [22656/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.205730 [22688/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.159760 [22720/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.199936 [22752/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.172316 [22784/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.206833 [22816/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.180921 [22848/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.123071 [22880/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.151300 [22912/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.228418 [22944/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.301283 [22976/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.187222 [23008/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.188639 [23040/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.262789 [23072/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.204827 [23104/24872]: 90%|████████▉ | 699/777 [00:30<00:03, 23.30it/s]
loss: 0.257445 [23136/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.206571 [23168/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.169577 [23200/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.183833 [23232/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.090571 [23264/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.154409 [23296/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.167371 [23328/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.211348 [23360/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.203880 [23392/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.223213 [23424/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.193430 [23456/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.137384 [23488/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.174576 [23520/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.118255 [23552/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.232129 [23584/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.186960 [23616/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.287638 [23648/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.151503 [23680/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.133722 [23712/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.219353 [23744/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.194315 [23776/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.272305 [23808/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.130208 [23840/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.147776 [23872/24872]: 90%|████████▉ | 699/777 [00:31<00:03, 23.30it/s]
loss: 0.184080 [23904/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.280891 [23936/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.210585 [23968/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.170937 [24000/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.176377 [24032/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.199637 [24064/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.132566 [24096/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.149191 [24128/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.135714 [24160/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.067088 [24192/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.080955 [24224/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.223045 [24256/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.131048 [24288/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.157651 [24320/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.189970 [24352/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.157780 [24384/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.201600 [24416/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.212442 [24448/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.215208 [24480/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.104911 [24512/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.161324 [24544/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.150344 [24576/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.147008 [24608/24872]: 90%|████████▉ | 699/777 [00:32<00:03, 23.30it/s]
loss: 0.201246 [24640/24872]: 90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.145471 [24672/24872]: 90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.214398 [24704/24872]: 90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.164375 [24736/24872]: 90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.225740 [24768/24872]: 90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.174293 [24800/24872]: 90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.143668 [24832/24872]: 90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.186313 [24864/24872]: 90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.134436 [24872/24872]: 90%|████████▉ | 699/777 [00:33<00:03, 23.30it/s]
loss: 0.134436 [24872/24872]: : 778it [00:33, 23.34it/s]
-------------------------------
LR=0.0001, batch_size=64
-------------------------------
Epoch 1, time=166.67s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.144158 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.938114 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.216651 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.333802 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.399317 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.428313 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.291134 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.264903 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.229243 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.354436 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.300824 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.279724 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.246788 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.277786 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.266368 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.244360 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.242879 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.209085 [ 1152/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.264400 [ 1216/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.162860 [ 1280/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.173941 [ 1344/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.200744 [ 1408/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.227800 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.200097 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.188385 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.161728 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.214589 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.156758 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.121906 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.199353 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.220811 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.187175 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.196131 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.206815 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.184157 [ 2240/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.180445 [ 2304/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.234566 [ 2368/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.241754 [ 2432/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.181813 [ 2496/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.173994 [ 2560/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.178993 [ 2624/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.209888 [ 2688/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.169515 [ 2752/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.200038 [ 2816/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.189498 [ 2880/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.184609 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.176138 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.184421 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.158317 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.225818 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.172568 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.139140 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.163853 [ 3392/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.121372 [ 3456/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.174323 [ 3520/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.180451 [ 3584/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.179633 [ 3648/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.227829 [ 3712/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.181421 [ 3776/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.182915 [ 3840/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.111076 [ 3904/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.158244 [ 3968/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.200597 [ 4032/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.176800 [ 4096/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.158035 [ 4160/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.170064 [ 4224/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.157136 [ 4288/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.134802 [ 4352/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.188117 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.257061 [ 4480/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.150202 [ 4544/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.131180 [ 4608/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.136870 [ 4672/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.144282 [ 4736/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.177554 [ 4800/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.157625 [ 4864/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.195341 [ 4928/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.132661 [ 4992/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.200287 [ 5056/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.205789 [ 5120/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.144657 [ 5184/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.161607 [ 5248/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.195541 [ 5312/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.165166 [ 5376/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.168982 [ 5440/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.183404 [ 5504/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.211256 [ 5568/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.172738 [ 5632/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.214477 [ 5696/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.186167 [ 5760/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.236867 [ 5824/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.170520 [ 5888/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.134491 [ 5952/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.186401 [ 6016/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.152312 [ 6080/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.249595 [ 6144/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.156942 [ 6208/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.191051 [ 6272/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.169561 [ 6336/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.222326 [ 6400/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.172449 [ 6464/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.199601 [ 6528/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.192800 [ 6592/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.157470 [ 6656/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.191787 [ 6720/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.138800 [ 6784/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.186611 [ 6848/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.153492 [ 6912/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.163359 [ 6976/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.195769 [ 7040/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.242099 [ 7104/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.179939 [ 7168/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.181576 [ 7232/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.151813 [ 7296/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.144172 [ 7360/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.169324 [ 7424/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.188829 [ 7488/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.150889 [ 7552/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.195110 [ 7616/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.153907 [ 7680/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.263301 [ 7744/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.206343 [ 7808/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.141907 [ 7872/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.176950 [ 7936/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.184638 [ 8000/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.176437 [ 8064/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.151714 [ 8128/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.155199 [ 8192/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.185403 [ 8256/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.224320 [ 8320/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.152071 [ 8384/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.171942 [ 8448/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.179121 [ 8512/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.160633 [ 8576/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.136041 [ 8640/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.202637 [ 8704/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.183075 [ 8768/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.239430 [ 8832/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.139389 [ 8896/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.189289 [ 8960/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.190109 [ 9024/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.192792 [ 9088/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.146582 [ 9152/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.165873 [ 9216/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.216944 [ 9280/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.199044 [ 9344/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.150808 [ 9408/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.167700 [ 9472/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.167845 [ 9536/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.164878 [ 9600/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.181394 [ 9664/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.196384 [ 9728/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.139403 [ 9792/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.194606 [ 9856/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.136174 [ 9920/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.137648 [ 9984/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.235520 [10048/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.164732 [10112/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.161386 [10176/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.157338 [10240/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.215528 [10304/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.153288 [10368/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.133960 [10432/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.163721 [10496/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.244327 [10560/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.203101 [10624/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.155353 [10688/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.158202 [10752/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.218096 [10816/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.141387 [10880/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.160720 [10944/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.178667 [11008/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.171935 [11072/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.138038 [11136/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.123044 [11200/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.152991 [11264/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.176629 [11328/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.165948 [11392/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.181134 [11456/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.123079 [11520/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.200205 [11584/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.229329 [11648/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.137667 [11712/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.185984 [11776/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.148808 [11840/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.247056 [11904/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.153572 [11968/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.161747 [12032/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.145781 [12096/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.189694 [12160/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.211665 [12224/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.127834 [12288/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.163445 [12352/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.147238 [12416/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.251615 [12480/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.152941 [12544/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.147805 [12608/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.177388 [12672/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.135101 [12736/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.212210 [12800/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.160790 [12864/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.127197 [12928/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.211107 [12992/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.179172 [13056/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.166725 [13120/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.178327 [13184/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.144532 [13248/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.204152 [13312/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.166884 [13376/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.163914 [13440/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.174869 [13504/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.161635 [13568/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.234777 [13632/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.153057 [13696/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.163051 [13760/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.157300 [13824/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.162136 [13888/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.115503 [13952/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.153728 [14016/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.179210 [14080/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.170523 [14144/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.158819 [14208/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.187970 [14272/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.175608 [14336/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.223222 [14400/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.149458 [14464/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.152445 [14528/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.169683 [14592/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.235063 [14656/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.157225 [14720/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.212271 [14784/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.193205 [14848/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.150423 [14912/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.204599 [14976/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.196050 [15040/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.192309 [15104/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.192356 [15168/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.140494 [15232/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.181326 [15296/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.177352 [15360/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.183116 [15424/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.236355 [15488/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.142910 [15552/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.235558 [15616/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.136062 [15680/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.164414 [15744/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.159751 [15808/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.192582 [15872/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.150228 [15936/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.196307 [16000/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.102892 [16064/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.189261 [16128/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.159775 [16192/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.184205 [16256/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.179411 [16320/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.189648 [16384/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.226946 [16448/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.186242 [16512/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.212849 [16576/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.219846 [16640/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.150357 [16704/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.205292 [16768/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.194070 [16832/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.161726 [16896/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.162523 [16960/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.216178 [17024/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.141556 [17088/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.162644 [17152/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.148931 [17216/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.141381 [17280/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.181968 [17344/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.176305 [17408/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.175055 [17472/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.138035 [17536/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.200540 [17600/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.282810 [17664/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.160777 [17728/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.201345 [17792/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.171931 [17856/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.132491 [17920/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.149304 [17984/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.186585 [18048/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.175539 [18112/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.240344 [18176/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.102235 [18240/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.166466 [18304/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.177666 [18368/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.115077 [18432/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.172527 [18496/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.180815 [18560/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.143014 [18624/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.146115 [18688/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.171022 [18752/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.185284 [18816/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.150688 [18880/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.144963 [18944/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.169418 [19008/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.169687 [19072/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.150342 [19136/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.157838 [19200/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.172254 [19264/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.119745 [19328/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.184877 [19392/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.158548 [19456/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.160986 [19520/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.141557 [19584/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.194198 [19648/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.147157 [19712/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.120654 [19776/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.157699 [19840/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.162253 [19904/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.170691 [19968/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.172888 [20032/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.149126 [20096/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.138272 [20160/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.153478 [20224/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.194284 [20288/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.185828 [20352/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.102227 [20416/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.149702 [20480/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.131688 [20544/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.184380 [20608/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.208491 [20672/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.175052 [20736/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.155147 [20800/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.170095 [20864/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.143670 [20928/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.156000 [20992/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.158235 [21056/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.197649 [21120/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.166921 [21184/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.209182 [21248/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.179632 [21312/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.161195 [21376/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.165799 [21440/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.192891 [21504/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.137531 [21568/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.163331 [21632/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.177518 [21696/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.152814 [21760/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.213328 [21824/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.171854 [21888/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.147944 [21952/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.168628 [22016/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.191429 [22080/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.174203 [22144/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.117497 [22208/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.175690 [22272/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.181269 [22336/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.174409 [22400/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.243074 [22464/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.195544 [22528/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.129129 [22592/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.123174 [22656/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.140171 [22720/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.153418 [22784/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.142164 [22848/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.112045 [22912/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.233135 [22976/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.135737 [23040/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.213059 [23104/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.208916 [23168/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.145352 [23232/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.104388 [23296/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.159143 [23360/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.182331 [23424/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.140811 [23488/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.127064 [23552/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.208059 [23616/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.195159 [23680/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.195969 [23744/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.227035 [23808/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.126470 [23872/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.211717 [23936/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.180392 [24000/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.197398 [24064/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.115586 [24128/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.093552 [24192/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.142782 [24256/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.138936 [24320/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.157615 [24384/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.172042 [24448/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.145335 [24512/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.151493 [24576/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.163186 [24640/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.165942 [24704/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.193864 [24768/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.159247 [24832/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.148817 [24872/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.148817 [24872/24872]: : 389it [00:22, 17.52it/s]
Epoch 2, time=188.88s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.117926 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.234955 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.153669 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.223500 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.163820 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.176037 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.158497 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.192161 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.138778 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.187142 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.164111 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.218250 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.183511 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.242875 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.194694 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.185493 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.155363 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.183778 [ 1152/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.206733 [ 1216/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.154360 [ 1280/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.140729 [ 1344/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.163866 [ 1408/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.183636 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.156602 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.164443 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.145207 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.180595 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.113295 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.108923 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.186074 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.207876 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.167292 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.163692 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.169755 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.161703 [ 2240/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.153230 [ 2304/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.227024 [ 2368/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.218592 [ 2432/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.168101 [ 2496/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.147298 [ 2560/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.161769 [ 2624/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.213295 [ 2688/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.153864 [ 2752/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.177859 [ 2816/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.169789 [ 2880/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.173673 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.163730 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.177804 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.142609 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.206260 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.164447 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.136610 [ 3328/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.164943 [ 3392/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.106837 [ 3456/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.159031 [ 3520/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.168550 [ 3584/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.166106 [ 3648/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.225630 [ 3712/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.177834 [ 3776/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.182298 [ 3840/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.112277 [ 3904/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.151748 [ 3968/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.197454 [ 4032/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.168454 [ 4096/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.167119 [ 4160/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.170057 [ 4224/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.140817 [ 4288/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.123754 [ 4352/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.185972 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.263131 [ 4480/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.135263 [ 4544/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.123029 [ 4608/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.132816 [ 4672/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.139309 [ 4736/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.167592 [ 4800/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.143989 [ 4864/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.185370 [ 4928/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.124501 [ 4992/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.162801 [ 5056/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.183547 [ 5120/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.137843 [ 5184/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.138788 [ 5248/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.190911 [ 5312/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.156730 [ 5376/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.153540 [ 5440/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.178675 [ 5504/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.217629 [ 5568/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.154752 [ 5632/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.192255 [ 5696/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.167748 [ 5760/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.208854 [ 5824/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.158388 [ 5888/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.130929 [ 5952/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.167483 [ 6016/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.151688 [ 6080/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.237416 [ 6144/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.146303 [ 6208/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.186888 [ 6272/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.159511 [ 6336/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.223734 [ 6400/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.188168 [ 6464/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.185903 [ 6528/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.200000 [ 6592/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.149109 [ 6656/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.189656 [ 6720/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.137202 [ 6784/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.186018 [ 6848/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.169898 [ 6912/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.160009 [ 6976/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.195117 [ 7040/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.265044 [ 7104/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.159485 [ 7168/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.180731 [ 7232/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.141231 [ 7296/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.129134 [ 7360/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.180921 [ 7424/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.211422 [ 7488/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.146721 [ 7552/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.227159 [ 7616/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.149692 [ 7680/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.228626 [ 7744/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.205912 [ 7808/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.153350 [ 7872/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.159306 [ 7936/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.195654 [ 8000/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.178341 [ 8064/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.142068 [ 8128/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.134292 [ 8192/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.180446 [ 8256/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.202860 [ 8320/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.144343 [ 8384/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.161386 [ 8448/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.161643 [ 8512/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.149965 [ 8576/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.134901 [ 8640/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.211319 [ 8704/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.181994 [ 8768/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.215396 [ 8832/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.147732 [ 8896/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.171225 [ 8960/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.202439 [ 9024/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.170740 [ 9088/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.140000 [ 9152/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.156233 [ 9216/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.211525 [ 9280/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.194288 [ 9344/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.146039 [ 9408/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.191195 [ 9472/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.168891 [ 9536/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.164829 [ 9600/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.180529 [ 9664/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.193526 [ 9728/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.138363 [ 9792/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.197064 [ 9856/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.135673 [ 9920/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.145665 [ 9984/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.238171 [10048/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.151467 [10112/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.182695 [10176/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.157721 [10240/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.203452 [10304/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.162549 [10368/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.118399 [10432/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.151662 [10496/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.233755 [10560/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.194311 [10624/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.163050 [10688/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.147847 [10752/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.207608 [10816/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.157740 [10880/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.170698 [10944/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.163651 [11008/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.201783 [11072/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.151039 [11136/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.115051 [11200/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.175117 [11264/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.183431 [11328/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.162003 [11392/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.192837 [11456/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.127385 [11520/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.185276 [11584/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.223745 [11648/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.141305 [11712/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.185309 [11776/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.154283 [11840/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.253513 [11904/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.148753 [11968/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.170915 [12032/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.136883 [12096/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.187341 [12160/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.224566 [12224/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.124854 [12288/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.154344 [12352/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.159527 [12416/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.238626 [12480/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.168095 [12544/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.159036 [12608/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.178164 [12672/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.165397 [12736/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.192700 [12800/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.171056 [12864/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.132837 [12928/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.208912 [12992/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.201228 [13056/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.162262 [13120/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.189236 [13184/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.145364 [13248/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.193576 [13312/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.162890 [13376/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.162567 [13440/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.171829 [13504/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.151062 [13568/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.212329 [13632/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.151473 [13696/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.159439 [13760/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.149195 [13824/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.157869 [13888/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.109933 [13952/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.147834 [14016/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.171004 [14080/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.168866 [14144/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.155187 [14208/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.183532 [14272/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.154542 [14336/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.221765 [14400/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.139958 [14464/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.145943 [14528/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.171465 [14592/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.220764 [14656/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.169625 [14720/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.183445 [14784/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.187302 [14848/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.126714 [14912/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.179709 [14976/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.178156 [15040/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.173891 [15104/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.174504 [15168/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.128815 [15232/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.162523 [15296/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.167028 [15360/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.168310 [15424/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.211386 [15488/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.134509 [15552/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.207198 [15616/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.108804 [15680/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.165399 [15744/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.143440 [15808/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.174060 [15872/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.144290 [15936/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.176065 [16000/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.099100 [16064/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.159420 [16128/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.150558 [16192/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.175694 [16256/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.152513 [16320/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.173713 [16384/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.196413 [16448/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.166315 [16512/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.214101 [16576/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.195898 [16640/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.152185 [16704/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.179469 [16768/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.184750 [16832/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.167192 [16896/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.157400 [16960/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.207009 [17024/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.136797 [17088/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.176737 [17152/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.152514 [17216/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.139917 [17280/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.168395 [17344/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.188654 [17408/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.165159 [17472/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.134860 [17536/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.219302 [17600/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.273981 [17664/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.169926 [17728/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.182532 [17792/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.172316 [17856/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.139722 [17920/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.141642 [17984/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.197293 [18048/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.187303 [18112/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.243670 [18176/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.108122 [18240/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.162092 [18304/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.166655 [18368/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.129833 [18432/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.165259 [18496/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.174067 [18560/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.145269 [18624/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.157426 [18688/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.161034 [18752/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.193480 [18816/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.145448 [18880/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.154642 [18944/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.153990 [19008/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.158169 [19072/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.144179 [19136/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.151214 [19200/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.161270 [19264/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.128903 [19328/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.189257 [19392/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.150511 [19456/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.142436 [19520/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.140509 [19584/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.180986 [19648/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.136254 [19712/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.115826 [19776/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.142784 [19840/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.164365 [19904/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.163634 [19968/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.174100 [20032/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.143330 [20096/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.144127 [20160/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.134422 [20224/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.180661 [20288/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.168721 [20352/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.106523 [20416/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.157981 [20480/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.126697 [20544/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.159450 [20608/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.180535 [20672/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.168344 [20736/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.158344 [20800/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.157213 [20864/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.141387 [20928/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.169833 [20992/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.144012 [21056/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.197937 [21120/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.150839 [21184/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.182124 [21248/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.153007 [21312/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.153817 [21376/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.167008 [21440/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.185802 [21504/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.120471 [21568/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.144688 [21632/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.172951 [21696/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.152907 [21760/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.190717 [21824/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.157107 [21888/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.148580 [21952/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.174680 [22016/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.177418 [22080/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.161256 [22144/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.121843 [22208/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.158382 [22272/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.182522 [22336/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.162353 [22400/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.212829 [22464/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.186551 [22528/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.133498 [22592/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.104712 [22656/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.138579 [22720/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.134104 [22784/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.128846 [22848/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.107163 [22912/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.200706 [22976/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.120847 [23040/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.166933 [23104/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.191825 [23168/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.134298 [23232/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.088867 [23296/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.165746 [23360/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.176469 [23424/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.134668 [23488/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.114653 [23552/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.168694 [23616/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.176212 [23680/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.163146 [23744/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.215519 [23808/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.121452 [23872/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.197088 [23936/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.175607 [24000/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.163482 [24064/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.124969 [24128/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.077306 [24192/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.134598 [24256/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.139288 [24320/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.149131 [24384/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.163334 [24448/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.162910 [24512/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.139080 [24576/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.159032 [24640/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.167365 [24704/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.174494 [24768/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.143378 [24832/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.124543 [24872/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.124543 [24872/24872]: : 389it [00:22, 17.54it/s]
Epoch 3, time=211.06s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.124900 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.214100 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.139726 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.227676 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.152446 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.183770 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.161304 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.179754 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.174716 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.185183 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.184583 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.207570 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.165869 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.222844 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.193872 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.152354 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.152395 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.172868 [ 1152/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.222874 [ 1216/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.137915 [ 1280/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.122025 [ 1344/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.158903 [ 1408/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.167958 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.138219 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.158473 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.136222 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.137971 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.119170 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.102445 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.169657 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.165805 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.145666 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.152112 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.157124 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.156686 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.141525 [ 2304/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.190522 [ 2368/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.210955 [ 2432/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.159031 [ 2496/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.130493 [ 2560/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.143842 [ 2624/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.171943 [ 2688/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.138036 [ 2752/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.165401 [ 2816/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.172823 [ 2880/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.178502 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.156739 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.157225 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.122154 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.190614 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.160761 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.131699 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.153518 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.112356 [ 3456/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.162152 [ 3520/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.146801 [ 3584/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.170599 [ 3648/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.199060 [ 3712/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.175273 [ 3776/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.165154 [ 3840/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.106207 [ 3904/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.147021 [ 3968/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.186027 [ 4032/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.180476 [ 4096/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.127245 [ 4160/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.171076 [ 4224/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.157243 [ 4288/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.115991 [ 4352/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.202084 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.261960 [ 4480/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.153742 [ 4544/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.131615 [ 4608/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.146841 [ 4672/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.164975 [ 4736/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.186917 [ 4800/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.157638 [ 4864/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.204916 [ 4928/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.121606 [ 4992/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.166762 [ 5056/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.196366 [ 5120/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.145028 [ 5184/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.128730 [ 5248/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.181393 [ 5312/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.160510 [ 5376/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.164608 [ 5440/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.176851 [ 5504/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.204917 [ 5568/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.174222 [ 5632/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.212136 [ 5696/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.170341 [ 5760/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.199404 [ 5824/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.144972 [ 5888/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.139989 [ 5952/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.169605 [ 6016/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.143533 [ 6080/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.241150 [ 6144/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.147966 [ 6208/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.168314 [ 6272/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.160834 [ 6336/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.196668 [ 6400/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.145345 [ 6464/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.169654 [ 6528/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.173934 [ 6592/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.127858 [ 6656/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.184273 [ 6720/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.118855 [ 6784/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.182129 [ 6848/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.145201 [ 6912/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.150923 [ 6976/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.166928 [ 7040/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.223784 [ 7104/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.142540 [ 7168/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.163069 [ 7232/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.136191 [ 7296/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.106684 [ 7360/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.140235 [ 7424/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.168763 [ 7488/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.147174 [ 7552/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.180668 [ 7616/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.153486 [ 7680/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.223630 [ 7744/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.185728 [ 7808/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.137994 [ 7872/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.142670 [ 7936/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.154812 [ 8000/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.165743 [ 8064/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.122488 [ 8128/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.129502 [ 8192/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.157211 [ 8256/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.197588 [ 8320/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.122681 [ 8384/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.157716 [ 8448/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.152217 [ 8512/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.139310 [ 8576/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.126936 [ 8640/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.181947 [ 8704/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.165534 [ 8768/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.207197 [ 8832/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.113067 [ 8896/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.157420 [ 8960/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.169043 [ 9024/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.149610 [ 9088/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.121549 [ 9152/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.151094 [ 9216/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.197110 [ 9280/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.138587 [ 9344/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.142694 [ 9408/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.135203 [ 9472/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.143059 [ 9536/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.150111 [ 9600/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.159014 [ 9664/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.162708 [ 9728/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.121365 [ 9792/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.170066 [ 9856/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.125597 [ 9920/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.115546 [ 9984/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.196210 [10048/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.143270 [10112/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.135376 [10176/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.148555 [10240/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.180296 [10304/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.136703 [10368/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.118354 [10432/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.142386 [10496/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.181851 [10560/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.170290 [10624/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.160811 [10688/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.128919 [10752/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.173695 [10816/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.152715 [10880/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.139711 [10944/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.173562 [11008/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.158954 [11072/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.115622 [11136/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.119852 [11200/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.142791 [11264/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.166381 [11328/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.144196 [11392/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.167259 [11456/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.116308 [11520/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.183233 [11584/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.233820 [11648/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.114730 [11712/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.150283 [11776/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.139209 [11840/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.238146 [11904/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.155904 [11968/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.162236 [12032/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.167099 [12096/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.212398 [12160/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.209142 [12224/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.131664 [12288/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.152267 [12352/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.135477 [12416/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.218137 [12480/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.146640 [12544/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.148400 [12608/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.194506 [12672/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.145626 [12736/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.183855 [12800/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.146504 [12864/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.103844 [12928/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.173879 [12992/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.166499 [13056/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.154876 [13120/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.168030 [13184/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.133866 [13248/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.168297 [13312/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.149739 [13376/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.148231 [13440/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.156045 [13504/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.117115 [13568/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.217429 [13632/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.154071 [13696/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.161132 [13760/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.142485 [13824/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.173249 [13888/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.102156 [13952/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.135660 [14016/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.171816 [14080/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.144498 [14144/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.152022 [14208/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.189892 [14272/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.161427 [14336/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.217129 [14400/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.125989 [14464/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.162346 [14528/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.159954 [14592/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.209859 [14656/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.186748 [14720/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.166125 [14784/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.194181 [14848/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.140033 [14912/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.178257 [14976/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.172502 [15040/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.190614 [15104/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.129806 [15168/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.136243 [15232/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.166531 [15296/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.151407 [15360/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.192773 [15424/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.217420 [15488/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.128763 [15552/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.234533 [15616/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.105471 [15680/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.149415 [15744/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.184641 [15808/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.150110 [15872/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.134676 [15936/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.178952 [16000/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.101259 [16064/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.155181 [16128/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.145600 [16192/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.159169 [16256/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.157831 [16320/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.163580 [16384/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.186873 [16448/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.171035 [16512/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.200325 [16576/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.206104 [16640/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.136614 [16704/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.175628 [16768/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.180085 [16832/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.154351 [16896/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.144968 [16960/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.189390 [17024/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.138235 [17088/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.131430 [17152/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.145008 [17216/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.139827 [17280/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.160167 [17344/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.162467 [17408/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.160803 [17472/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.116373 [17536/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.192374 [17600/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.237447 [17664/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.148416 [17728/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.172559 [17792/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.147971 [17856/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.125012 [17920/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.135922 [17984/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.178037 [18048/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.151892 [18112/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.218452 [18176/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.094096 [18240/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.146560 [18304/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.155416 [18368/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.116783 [18432/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.158793 [18496/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.159928 [18560/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.134889 [18624/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.134652 [18688/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.156045 [18752/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.184320 [18816/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.139634 [18880/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.157482 [18944/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.150901 [19008/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.153759 [19072/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.135873 [19136/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.123177 [19200/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.147085 [19264/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.130375 [19328/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.160519 [19392/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.133296 [19456/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.149222 [19520/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.125637 [19584/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.161563 [19648/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.125890 [19712/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.136180 [19776/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.145247 [19840/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.156817 [19904/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.167574 [19968/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.157187 [20032/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.130796 [20096/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.132834 [20160/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.146856 [20224/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.183953 [20288/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.169330 [20352/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.091053 [20416/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.166084 [20480/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.123779 [20544/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.146187 [20608/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.183075 [20672/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.164371 [20736/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.158045 [20800/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.148529 [20864/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.142336 [20928/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.147760 [20992/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.146257 [21056/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.194032 [21120/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.154300 [21184/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.182000 [21248/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.141517 [21312/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.152644 [21376/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.158585 [21440/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.172213 [21504/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.131917 [21568/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.142056 [21632/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.173218 [21696/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.141044 [21760/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.184532 [21824/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.151090 [21888/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.160347 [21952/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.165973 [22016/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.169552 [22080/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.214675 [22144/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.113152 [22208/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.172663 [22272/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.201564 [22336/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.177380 [22400/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.256814 [22464/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.185814 [22528/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.124264 [22592/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.113708 [22656/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.124920 [22720/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.139220 [22784/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.149076 [22848/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.106773 [22912/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.222154 [22976/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.118973 [23040/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.169938 [23104/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.193204 [23168/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.124334 [23232/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.090532 [23296/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.146033 [23360/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.167404 [23424/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.129873 [23488/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.113111 [23552/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.185407 [23616/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.170561 [23680/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.161038 [23744/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.215967 [23808/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.115836 [23872/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.188124 [23936/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.150942 [24000/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.148598 [24064/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.107620 [24128/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.073511 [24192/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.126053 [24256/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.121869 [24320/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.129282 [24384/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.146354 [24448/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.145785 [24512/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.124202 [24576/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.141116 [24640/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.152554 [24704/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.144728 [24768/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.126532 [24832/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.116112 [24872/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.116112 [24872/24872]: : 389it [00:22, 17.55it/s]
Epoch 4, time=233.22s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.107082 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.194477 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.131928 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.198677 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.144585 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.180696 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.151230 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.159870 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.125566 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.166842 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.165718 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.181032 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.172251 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.204335 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.184223 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.145556 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.149984 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.170313 [ 1152/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.205026 [ 1216/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.128146 [ 1280/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.113322 [ 1344/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.149434 [ 1408/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.153336 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.135850 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.150879 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.140838 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.116635 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.102077 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.089773 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.146177 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.165694 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.143275 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.152106 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.148342 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.145365 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.134940 [ 2304/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.189028 [ 2368/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.200011 [ 2432/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.146958 [ 2496/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.124294 [ 2560/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.136583 [ 2624/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.164971 [ 2688/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.127582 [ 2752/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.151352 [ 2816/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.169508 [ 2880/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.151977 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.140597 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.145150 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.111307 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.180620 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.128617 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.118168 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.149060 [ 3392/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.090695 [ 3456/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.134144 [ 3520/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.127518 [ 3584/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.148941 [ 3648/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.186796 [ 3712/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.151719 [ 3776/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.149508 [ 3840/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.087978 [ 3904/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.127929 [ 3968/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.154220 [ 4032/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.158187 [ 4096/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.113467 [ 4160/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.141792 [ 4224/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.129771 [ 4288/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.105212 [ 4352/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.165897 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.238123 [ 4480/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.134178 [ 4544/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.104781 [ 4608/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.139804 [ 4672/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.128005 [ 4736/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.167901 [ 4800/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.156060 [ 4864/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.154519 [ 4928/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.104371 [ 4992/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.146075 [ 5056/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.163061 [ 5120/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.114979 [ 5184/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.123202 [ 5248/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.151666 [ 5312/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.146643 [ 5376/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.156638 [ 5440/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.156287 [ 5504/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.199373 [ 5568/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.161905 [ 5632/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.167280 [ 5696/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.158486 [ 5760/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.168012 [ 5824/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.130086 [ 5888/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.124211 [ 5952/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.163503 [ 6016/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.127212 [ 6080/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.213102 [ 6144/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.132789 [ 6208/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.154327 [ 6272/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.131618 [ 6336/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.172333 [ 6400/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.132370 [ 6464/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.151272 [ 6528/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.162213 [ 6592/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.119339 [ 6656/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.154739 [ 6720/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.104882 [ 6784/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.167624 [ 6848/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.139067 [ 6912/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.129406 [ 6976/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.167611 [ 7040/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.196688 [ 7104/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.116095 [ 7168/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.143394 [ 7232/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.127464 [ 7296/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.097729 [ 7360/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.145572 [ 7424/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.148158 [ 7488/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.122486 [ 7552/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.175058 [ 7616/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.123460 [ 7680/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.218907 [ 7744/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.180184 [ 7808/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.127789 [ 7872/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.137048 [ 7936/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.154215 [ 8000/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.150500 [ 8064/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.125575 [ 8128/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.109770 [ 8192/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.154780 [ 8256/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.177165 [ 8320/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.125428 [ 8384/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.146376 [ 8448/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.147466 [ 8512/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.120369 [ 8576/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.120670 [ 8640/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.166982 [ 8704/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.155183 [ 8768/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.185883 [ 8832/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.123462 [ 8896/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.146906 [ 8960/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.148170 [ 9024/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.146364 [ 9088/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.128306 [ 9152/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.137953 [ 9216/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.187661 [ 9280/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.132771 [ 9344/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.137937 [ 9408/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.139506 [ 9472/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.120404 [ 9536/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.140437 [ 9600/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.144548 [ 9664/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.154249 [ 9728/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.122385 [ 9792/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.155511 [ 9856/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.110870 [ 9920/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.104919 [ 9984/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.197298 [10048/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.125862 [10112/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.119852 [10176/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.137007 [10240/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.169871 [10304/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.113207 [10368/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.093426 [10432/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.124918 [10496/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.177977 [10560/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.164597 [10624/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.139319 [10688/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.119830 [10752/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.170953 [10816/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.119030 [10880/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.132666 [10944/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.148936 [11008/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.147002 [11072/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.121365 [11136/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.101357 [11200/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.121215 [11264/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.147312 [11328/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.134137 [11392/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.148868 [11456/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.093681 [11520/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.148089 [11584/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.198452 [11648/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.121807 [11712/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.135509 [11776/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.118084 [11840/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.213452 [11904/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.130318 [11968/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.139570 [12032/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.128990 [12096/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.162935 [12160/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.169558 [12224/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.110307 [12288/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.132253 [12352/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.117335 [12416/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.183428 [12480/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.111076 [12544/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.128497 [12608/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.152616 [12672/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.110909 [12736/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.175511 [12800/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.142766 [12864/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.096789 [12928/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.195844 [12992/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.145734 [13056/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.119661 [13120/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.147358 [13184/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.122491 [13248/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.175575 [13312/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.131406 [13376/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.132926 [13440/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.150289 [13504/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.103695 [13568/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.193914 [13632/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.136577 [13696/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.145474 [13760/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.143509 [13824/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.133839 [13888/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.086995 [13952/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.116555 [14016/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.130225 [14080/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.148769 [14144/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.124641 [14208/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.201683 [14272/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.127800 [14336/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.202595 [14400/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.125786 [14464/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.128100 [14528/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.155736 [14592/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.205686 [14656/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.170634 [14720/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.158646 [14784/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.168640 [14848/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.116011 [14912/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.146930 [14976/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.149996 [15040/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.170439 [15104/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.147950 [15168/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.102131 [15232/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.153344 [15296/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.152896 [15360/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.151018 [15424/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.193934 [15488/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.114128 [15552/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.191935 [15616/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.097745 [15680/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.136150 [15744/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.131090 [15808/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.151834 [15872/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.120368 [15936/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.148701 [16000/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.092410 [16064/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.134121 [16128/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.118938 [16192/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.143266 [16256/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.136411 [16320/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.151766 [16384/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.155661 [16448/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.146858 [16512/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.169858 [16576/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.173613 [16640/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.120724 [16704/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.158102 [16768/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.170551 [16832/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.135693 [16896/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.136732 [16960/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.172324 [17024/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.118094 [17088/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.138001 [17152/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.132921 [17216/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.097794 [17280/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.152951 [17344/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.147030 [17408/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.157274 [17472/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.110317 [17536/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.215895 [17600/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.228540 [17664/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.140955 [17728/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.175278 [17792/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.141998 [17856/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.113594 [17920/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.129727 [17984/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.160539 [18048/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.153049 [18112/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.208649 [18176/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.084698 [18240/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.125727 [18304/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.167714 [18368/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.104801 [18432/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.150183 [18496/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.159275 [18560/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.123476 [18624/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.125070 [18688/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.140948 [18752/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.164559 [18816/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.128362 [18880/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.150855 [18944/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.132336 [19008/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.147487 [19072/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.128781 [19136/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.113570 [19200/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.147859 [19264/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.128679 [19328/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.144389 [19392/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.131376 [19456/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.120842 [19520/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.115021 [19584/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.149401 [19648/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.121035 [19712/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.107063 [19776/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.131160 [19840/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.138565 [19904/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.146662 [19968/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.157237 [20032/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.119103 [20096/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.126079 [20160/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.124916 [20224/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.172820 [20288/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.156403 [20352/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.087463 [20416/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.134604 [20480/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.129132 [20544/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.130168 [20608/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.170460 [20672/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.150874 [20736/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.136398 [20800/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.136133 [20864/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.149196 [20928/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.154820 [20992/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.126299 [21056/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.191567 [21120/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.147834 [21184/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.157230 [21248/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.158875 [21312/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.154182 [21376/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.161702 [21440/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.187319 [21504/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.120865 [21568/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.136849 [21632/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.169578 [21696/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.120489 [21760/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.179082 [21824/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.178180 [21888/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.143503 [21952/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.170535 [22016/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.170988 [22080/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.171099 [22144/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.122997 [22208/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.207125 [22272/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.183622 [22336/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.165982 [22400/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.178413 [22464/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.182471 [22528/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.118715 [22592/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.095764 [22656/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.149792 [22720/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.120192 [22784/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.120348 [22848/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.093164 [22912/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.192975 [22976/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.125128 [23040/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.165843 [23104/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.165466 [23168/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.110897 [23232/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.080241 [23296/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.152573 [23360/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.166362 [23424/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.134658 [23488/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.102671 [23552/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.166250 [23616/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.159775 [23680/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.129358 [23744/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.217326 [23808/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.107198 [23872/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.200769 [23936/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.144689 [24000/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.166595 [24064/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.117226 [24128/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.074414 [24192/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.125434 [24256/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.122700 [24320/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.121873 [24384/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.125663 [24448/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.120311 [24512/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.124289 [24576/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.132919 [24640/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.143914 [24704/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.134413 [24768/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.119440 [24832/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.111493 [24872/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.111493 [24872/24872]: : 389it [00:22, 17.62it/s]
Epoch 5, time=255.30s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.098102 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.180392 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.125928 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.186247 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.127555 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.152701 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.127903 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.139308 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.113619 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.142107 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.152003 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.169609 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.140716 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.199444 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.160111 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.142251 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.127357 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.155315 [ 1152/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.182428 [ 1216/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.124333 [ 1280/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.118075 [ 1344/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.137705 [ 1408/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.150387 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.119437 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.149899 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.138557 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.110491 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.112131 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.104220 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.139924 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.179947 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.130095 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.150064 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.154794 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.137216 [ 2240/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.156749 [ 2304/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.185974 [ 2368/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.215562 [ 2432/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.149662 [ 2496/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.118080 [ 2560/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.147766 [ 2624/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.156988 [ 2688/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.125089 [ 2752/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.153081 [ 2816/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.156498 [ 2880/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.147844 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.130270 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.147040 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.111652 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.175882 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.129306 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.118030 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.158451 [ 3392/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.094680 [ 3456/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.132241 [ 3520/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.144141 [ 3584/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.150552 [ 3648/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.211626 [ 3712/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.164016 [ 3776/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.153108 [ 3840/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.106439 [ 3904/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.144457 [ 3968/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.169170 [ 4032/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.188502 [ 4096/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.114463 [ 4160/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.147539 [ 4224/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.154199 [ 4288/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.115478 [ 4352/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.175507 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.259461 [ 4480/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.134273 [ 4544/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.106933 [ 4608/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.160784 [ 4672/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.130127 [ 4736/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.175402 [ 4800/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.149628 [ 4864/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.142595 [ 4928/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.109013 [ 4992/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.159187 [ 5056/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.167112 [ 5120/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.128124 [ 5184/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.124097 [ 5248/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.139333 [ 5312/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.152548 [ 5376/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.176241 [ 5440/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.153904 [ 5504/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.201635 [ 5568/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.162991 [ 5632/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.155856 [ 5696/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.138303 [ 5760/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.168978 [ 5824/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.135092 [ 5888/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.125588 [ 5952/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.155856 [ 6016/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.122130 [ 6080/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.212107 [ 6144/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.144109 [ 6208/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.162103 [ 6272/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.137904 [ 6336/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.180876 [ 6400/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.146545 [ 6464/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.161971 [ 6528/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.156047 [ 6592/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.122798 [ 6656/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.140374 [ 6720/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.104654 [ 6784/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.174819 [ 6848/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.148028 [ 6912/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.129537 [ 6976/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.164692 [ 7040/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.189256 [ 7104/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.116347 [ 7168/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.147350 [ 7232/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.124100 [ 7296/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.102235 [ 7360/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.137811 [ 7424/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.149804 [ 7488/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.124840 [ 7552/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.170315 [ 7616/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.127958 [ 7680/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.222819 [ 7744/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.174738 [ 7808/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.115316 [ 7872/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.127210 [ 7936/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.139185 [ 8000/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.150846 [ 8064/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.119764 [ 8128/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.109217 [ 8192/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.133117 [ 8256/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.173570 [ 8320/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.114065 [ 8384/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.148327 [ 8448/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.146283 [ 8512/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.124391 [ 8576/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.112245 [ 8640/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.165685 [ 8704/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.147640 [ 8768/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.170551 [ 8832/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.124825 [ 8896/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.149796 [ 8960/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.132747 [ 9024/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.146158 [ 9088/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.128830 [ 9152/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.135245 [ 9216/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.187445 [ 9280/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.143422 [ 9344/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.134345 [ 9408/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.138691 [ 9472/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.116881 [ 9536/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.139965 [ 9600/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.144550 [ 9664/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.153000 [ 9728/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.118070 [ 9792/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.164534 [ 9856/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.111457 [ 9920/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.104161 [ 9984/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.180187 [10048/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.120076 [10112/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.129857 [10176/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.136175 [10240/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.158380 [10304/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.108859 [10368/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.092805 [10432/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.125097 [10496/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.172252 [10560/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.166099 [10624/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.130105 [10688/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.106803 [10752/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.141858 [10816/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.121790 [10880/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.135915 [10944/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.149072 [11008/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.145165 [11072/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.115883 [11136/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.106345 [11200/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.114631 [11264/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.156993 [11328/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.151433 [11392/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.146640 [11456/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.105420 [11520/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.143382 [11584/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.185902 [11648/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.117888 [11712/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.155780 [11776/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.128699 [11840/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.212024 [11904/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.135143 [11968/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.136652 [12032/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.112369 [12096/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.163142 [12160/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.189589 [12224/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.106751 [12288/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.133465 [12352/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.135687 [12416/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.193045 [12480/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.155383 [12544/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.134665 [12608/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.146123 [12672/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.135577 [12736/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.171010 [12800/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.142263 [12864/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.094888 [12928/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.194298 [12992/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.155320 [13056/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.127485 [13120/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.170081 [13184/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.122968 [13248/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.169300 [13312/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.137635 [13376/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.144973 [13440/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.163422 [13504/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.146145 [13568/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.180076 [13632/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.136053 [13696/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.145129 [13760/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.146275 [13824/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.143412 [13888/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.092544 [13952/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.122909 [14016/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.157728 [14080/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.154713 [14144/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.120920 [14208/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.176919 [14272/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.130362 [14336/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.182907 [14400/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.122979 [14464/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.135065 [14528/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.149334 [14592/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.217169 [14656/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.175707 [14720/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.179221 [14784/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.179208 [14848/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.128179 [14912/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.141945 [14976/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.149457 [15040/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.160106 [15104/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.132790 [15168/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.110248 [15232/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.152040 [15296/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.139796 [15360/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.150356 [15424/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.187059 [15488/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.116324 [15552/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.183576 [15616/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.084624 [15680/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.133077 [15744/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.101715 [15808/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.149576 [15872/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.121901 [15936/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.142296 [16000/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.090709 [16064/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.132394 [16128/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.121590 [16192/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.160245 [16256/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.130079 [16320/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.156307 [16384/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.163143 [16448/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.147061 [16512/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.178531 [16576/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.159962 [16640/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.117289 [16704/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.160437 [16768/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.187474 [16832/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.126785 [16896/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.138376 [16960/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.172381 [17024/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.117495 [17088/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.148940 [17152/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.129595 [17216/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.096055 [17280/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.147845 [17344/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.130613 [17408/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.155521 [17472/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.113488 [17536/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.186921 [17600/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.205879 [17664/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.129864 [17728/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.166077 [17792/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.129691 [17856/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.096001 [17920/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.129846 [17984/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.149906 [18048/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.159629 [18112/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.186547 [18176/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.074430 [18240/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.116064 [18304/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.145041 [18368/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.093771 [18432/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.138420 [18496/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.135894 [18560/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.118980 [18624/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.120647 [18688/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.127264 [18752/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.160304 [18816/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.109598 [18880/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.125036 [18944/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.131859 [19008/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.136705 [19072/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.116392 [19136/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.111399 [19200/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.131855 [19264/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.114267 [19328/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.133292 [19392/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.118496 [19456/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.110922 [19520/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.121373 [19584/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.147010 [19648/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.120240 [19712/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.093121 [19776/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.125561 [19840/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.137928 [19904/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.122265 [19968/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.150081 [20032/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.124118 [20096/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.109694 [20160/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.128954 [20224/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.164268 [20288/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.137215 [20352/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.099416 [20416/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.130792 [20480/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.121671 [20544/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.150076 [20608/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.170944 [20672/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.152894 [20736/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.141506 [20800/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.130039 [20864/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.135640 [20928/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.146021 [20992/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.131294 [21056/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.176596 [21120/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.153603 [21184/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.154447 [21248/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.135890 [21312/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.144715 [21376/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.138458 [21440/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.165144 [21504/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.099068 [21568/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.120525 [21632/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.156774 [21696/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.105649 [21760/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.162911 [21824/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.152032 [21888/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.126651 [21952/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.156207 [22016/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.157210 [22080/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.149191 [22144/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.127292 [22208/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.165573 [22272/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.145899 [22336/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.161747 [22400/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.161386 [22464/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.192491 [22528/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.102298 [22592/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.074863 [22656/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.114040 [22720/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.107189 [22784/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.117784 [22848/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.099382 [22912/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.181747 [22976/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.117398 [23040/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.151781 [23104/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.154723 [23168/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.106208 [23232/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.083356 [23296/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.122291 [23360/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.175734 [23424/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.116325 [23488/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.090157 [23552/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.146625 [23616/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.151039 [23680/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.139661 [23744/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.202819 [23808/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.119001 [23872/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.179631 [23936/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.128584 [24000/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.152361 [24064/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.099071 [24128/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.065351 [24192/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.126815 [24256/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.118329 [24320/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.120411 [24384/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.115055 [24448/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.113363 [24512/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.128628 [24576/24872]: 0%| | 0/388 [00:21<?, ?it/s]
loss: 0.116593 [24640/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.127630 [24704/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.129138 [24768/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.125214 [24832/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.101060 [24872/24872]: 0%| | 0/388 [00:22<?, ?it/s]
loss: 0.101060 [24872/24872]: : 389it [00:22, 17.49it/s]
-------------------------------
LR=0.0001, batch_size=128
-------------------------------
Epoch 1, time=277.54s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.135103 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.321957 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.229976 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.187619 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.154031 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.206922 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.181292 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.150995 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.192712 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.200737 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.147492 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.145935 [ 1536/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.156060 [ 1664/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.107976 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.118166 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.154300 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.161464 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.129964 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.185968 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.121035 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.145747 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.129586 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.143524 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.129118 [ 3072/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.135740 [ 3200/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.112121 [ 3328/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.113046 [ 3456/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.128907 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.150142 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.132255 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.094338 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.143698 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.117093 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.099643 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.167877 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.103702 [ 4608/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.108158 [ 4736/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.134271 [ 4864/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.098747 [ 4992/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.142518 [ 5120/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.099173 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.127884 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.142005 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.149459 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.142788 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.134181 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.114715 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.147298 [ 6144/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.133585 [ 6272/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.132070 [ 6400/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.118629 [ 6528/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.127280 [ 6656/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.111195 [ 6784/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.143236 [ 6912/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.130231 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.140862 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.122563 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.098756 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.113051 [ 7552/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.131550 [ 7680/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.173952 [ 7808/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.114270 [ 7936/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.131563 [ 8064/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.103196 [ 8192/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.129438 [ 8320/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.119159 [ 8448/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.118657 [ 8576/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.122387 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.146886 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.120148 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.122219 [ 9088/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.117849 [ 9216/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.140454 [ 9344/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.120822 [ 9472/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.118732 [ 9600/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.126020 [ 9728/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.122008 [ 9856/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.097809 [ 9984/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.138266 [10112/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.115572 [10240/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.119649 [10368/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.099095 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.144325 [10624/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.110755 [10752/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.118294 [10880/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.125334 [11008/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.096729 [11136/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.096893 [11264/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.126278 [11392/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.108094 [11520/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.153788 [11648/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.107735 [11776/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.140412 [11904/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.112355 [12032/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.114622 [12160/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.117809 [12288/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.110851 [12416/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.129297 [12544/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.116626 [12672/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.121335 [12800/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.096234 [12928/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.151823 [13056/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.113965 [13184/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.124690 [13312/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.123452 [13440/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.122093 [13568/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.146955 [13696/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.118070 [13824/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.092420 [13952/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.113148 [14080/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.111540 [14208/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.141372 [14336/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.130369 [14464/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.112531 [14592/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.158012 [14720/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.154802 [14848/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.112596 [14976/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.134024 [15104/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.112954 [15232/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.141262 [15360/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.150711 [15488/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.140781 [15616/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.096757 [15744/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.129141 [15872/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.117516 [16000/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.095626 [16128/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.123065 [16256/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.130875 [16384/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.144697 [16512/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.152156 [16640/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.138795 [16768/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.138472 [16896/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.152871 [17024/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.117316 [17152/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.100221 [17280/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.133177 [17408/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.117454 [17536/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.171227 [17664/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.149270 [17792/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.104478 [17920/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.131374 [18048/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.145912 [18176/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.090308 [18304/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.109451 [18432/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.127162 [18560/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.106699 [18688/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.126348 [18816/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.114990 [18944/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.113924 [19072/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.092845 [19200/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.115676 [19328/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.111438 [19456/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.105450 [19584/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.115310 [19712/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.108173 [19840/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.121238 [19968/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.124288 [20096/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.101147 [20224/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.143865 [20352/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.096053 [20480/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.106864 [20608/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.145836 [20736/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.112516 [20864/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.118908 [20992/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.136474 [21120/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.127798 [21248/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.125084 [21376/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.134151 [21504/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.098046 [21632/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.119111 [21760/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.151327 [21888/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.140663 [22016/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.128907 [22144/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.126545 [22272/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.133641 [22400/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.169426 [22528/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.080970 [22656/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.096725 [22784/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.093444 [22912/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.133327 [23040/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.143793 [23168/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.079170 [23296/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.139908 [23424/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.098632 [23552/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.145354 [23680/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.147763 [23808/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.122602 [23936/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.130356 [24064/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.076087 [24192/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.108572 [24320/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.101167 [24448/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.105814 [24576/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.109110 [24704/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.119798 [24832/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.099133 [24872/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.099133 [24872/24872]: : 195it [00:16, 11.88it/s]
Epoch 2, time=293.96s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.127088 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.141200 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.137555 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.124292 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.124911 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.139220 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.163631 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.140909 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.147022 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.136737 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.133885 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.131317 [ 1536/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.130436 [ 1664/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.100668 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.113499 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.137258 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.121962 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.115643 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.167726 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.114855 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.131904 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.120681 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.133159 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.124417 [ 3072/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.124081 [ 3200/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.104280 [ 3328/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.109182 [ 3456/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.114852 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.144865 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.128781 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.089445 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.141628 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.114987 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.092270 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.167206 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.104921 [ 4608/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.108292 [ 4736/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.129221 [ 4864/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.099222 [ 4992/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.142227 [ 5120/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.095173 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.121809 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.134011 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.146214 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.138819 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.132697 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.114466 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.144195 [ 6144/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.130503 [ 6272/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.133756 [ 6400/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.114070 [ 6528/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.121644 [ 6656/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.109906 [ 6784/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.147050 [ 6912/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.129700 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.129995 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.118778 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.096652 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.112036 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.128978 [ 7680/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.169228 [ 7808/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.112927 [ 7936/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.127807 [ 8064/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.101259 [ 8192/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.124433 [ 8320/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.121911 [ 8448/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.115631 [ 8576/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.121609 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.142697 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.121571 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.116054 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.117408 [ 9216/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.142025 [ 9344/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.117279 [ 9472/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.113854 [ 9600/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.121047 [ 9728/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.119925 [ 9856/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.089761 [ 9984/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.139789 [10112/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.109132 [10240/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.112898 [10368/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.093240 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.141808 [10624/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.107454 [10752/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.111442 [10880/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.126390 [11008/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.101753 [11136/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.096980 [11264/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.124053 [11392/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.106425 [11520/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.152050 [11648/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.105175 [11776/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.140913 [11904/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.111424 [12032/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.115847 [12160/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.115844 [12288/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.109555 [12416/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.114624 [12544/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.118502 [12672/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.113735 [12800/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.094622 [12928/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.143663 [13056/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.104032 [13184/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.122501 [13312/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.115273 [13440/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.112653 [13568/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.135294 [13696/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.112866 [13824/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.095614 [13952/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.109519 [14080/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.106222 [14208/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.133520 [14336/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.127567 [14464/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.110521 [14592/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.159017 [14720/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.151810 [14848/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.103161 [14976/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.120420 [15104/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.110892 [15232/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.125969 [15360/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.139185 [15488/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.125665 [15616/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.101883 [15744/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.124594 [15872/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.111045 [16000/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.091261 [16128/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.122283 [16256/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.127358 [16384/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.135315 [16512/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.146812 [16640/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.127322 [16768/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.137028 [16896/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.137683 [17024/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.118452 [17152/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.098645 [17280/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.129093 [17408/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.111675 [17536/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.169347 [17664/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.152431 [17792/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.105441 [17920/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.129147 [18048/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.149411 [18176/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.087413 [18304/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.107444 [18432/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.127498 [18560/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.112358 [18688/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.120889 [18816/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.126029 [18944/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.114754 [19072/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.095427 [19200/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.117954 [19328/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.110531 [19456/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.102071 [19584/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.119405 [19712/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.106887 [19840/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.123786 [19968/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.121922 [20096/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.096809 [20224/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.142259 [20352/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.095430 [20480/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.105153 [20608/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.143353 [20736/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.104027 [20864/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.110210 [20992/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.135857 [21120/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.124146 [21248/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.120879 [21376/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.133698 [21504/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.104613 [21632/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.116504 [21760/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.148450 [21888/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.131907 [22016/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.127916 [22144/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.117918 [22272/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.122209 [22400/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.158813 [22528/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.077221 [22656/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.097209 [22784/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.090701 [22912/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.132273 [23040/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.127990 [23168/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.077333 [23296/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.130936 [23424/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.091562 [23552/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.136439 [23680/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.143330 [23808/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.117498 [23936/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.127413 [24064/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.078787 [24192/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.108859 [24320/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.097398 [24448/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.108236 [24576/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.107005 [24704/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.110402 [24832/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.101464 [24872/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.101464 [24872/24872]: : 195it [00:16, 11.91it/s]
Epoch 3, time=310.34s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.119402 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.131286 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.131531 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.116048 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.109930 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.128121 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.142619 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.131710 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.137871 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.129417 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.119745 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.115567 [ 1536/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.126239 [ 1664/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.088024 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.108964 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.119260 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.122249 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.106290 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.161427 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.105941 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.124007 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.115489 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.122746 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.115904 [ 3072/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.116148 [ 3200/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.104947 [ 3328/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.100740 [ 3456/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.102755 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.140414 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.119608 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.079331 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.131910 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.099608 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.084702 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.156537 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.097046 [ 4608/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.098765 [ 4736/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.120611 [ 4864/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.097527 [ 4992/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.140191 [ 5120/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.090891 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.117774 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.133018 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.137859 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.128812 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.126515 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.111136 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.140248 [ 6144/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.129658 [ 6272/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.119200 [ 6400/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.106938 [ 6528/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.118539 [ 6656/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.105835 [ 6784/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.142429 [ 6912/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.124147 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.125275 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.116629 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.090050 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.111292 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.132641 [ 7680/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.165942 [ 7808/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.108522 [ 7936/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.125122 [ 8064/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.099202 [ 8192/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.120634 [ 8320/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.120501 [ 8448/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.110470 [ 8576/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.120224 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.139168 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.120933 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.116996 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.124582 [ 9216/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.132996 [ 9344/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.116990 [ 9472/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.114004 [ 9600/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.119640 [ 9728/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.115269 [ 9856/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.097144 [ 9984/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.140464 [10112/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.117845 [10240/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.106558 [10368/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.095945 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.148242 [10624/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.121420 [10752/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.137261 [10880/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.142510 [11008/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.116779 [11136/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.116028 [11264/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.136984 [11392/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.103319 [11520/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.169156 [11648/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.105944 [11776/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.154509 [11904/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.107585 [12032/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.116278 [12160/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.118739 [12288/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.115331 [12416/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.125173 [12544/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.127902 [12672/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.129557 [12800/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.103445 [12928/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.155285 [13056/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.109323 [13184/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.134495 [13312/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.117870 [13440/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.132260 [13568/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.137041 [13696/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.113521 [13824/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.096010 [13952/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.108155 [14080/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.104059 [14208/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.132742 [14336/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.122762 [14464/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.106073 [14592/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.151552 [14720/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.146078 [14848/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.106745 [14976/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.115599 [15104/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.097168 [15232/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.119822 [15360/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.142026 [15488/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.122076 [15616/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.098601 [15744/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.127162 [15872/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.116880 [16000/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.090164 [16128/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.114801 [16256/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.128108 [16384/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.125145 [16512/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.138362 [16640/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.123330 [16768/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.131663 [16896/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.136164 [17024/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.117579 [17152/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.094894 [17280/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.121010 [17408/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.114749 [17536/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.157325 [17664/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.142598 [17792/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.091436 [17920/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.124785 [18048/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.139205 [18176/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.083500 [18304/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.106509 [18432/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.119171 [18560/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.100534 [18688/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.118109 [18816/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.114641 [18944/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.110090 [19072/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.097528 [19200/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.111155 [19328/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.108491 [19456/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.092854 [19584/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.114485 [19712/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.100731 [19840/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.117809 [19968/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.118460 [20096/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.098989 [20224/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.135839 [20352/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.088366 [20480/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.099137 [20608/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.141747 [20736/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.102643 [20864/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.104523 [20992/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.134112 [21120/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.121684 [21248/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.128377 [21376/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.126917 [21504/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.111346 [21632/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.113520 [21760/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.146568 [21888/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.129307 [22016/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.120659 [22144/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.115699 [22272/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.110425 [22400/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.160060 [22528/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.073516 [22656/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.089481 [22784/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.083037 [22912/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.127750 [23040/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.124128 [23168/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.075343 [23296/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.122463 [23424/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.093852 [23552/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.129486 [23680/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.141139 [23808/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.112702 [23936/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.121834 [24064/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.071237 [24192/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.102999 [24320/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.102082 [24448/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.104919 [24576/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.105393 [24704/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.122163 [24832/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.094382 [24872/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.094382 [24872/24872]: : 195it [00:16, 11.87it/s]
Epoch 4, time=326.77s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.115482 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.144683 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.125683 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.108536 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.113327 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.130784 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.139284 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.125854 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.135503 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.143905 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.118188 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.127203 [ 1536/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.124014 [ 1664/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.093733 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.088874 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.134144 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.117296 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.104763 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.158449 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.110865 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.144001 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.115270 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.134674 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.115287 [ 3072/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.119714 [ 3200/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.104830 [ 3328/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.101238 [ 3456/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.118576 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.138360 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.138665 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.081910 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.142385 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.105375 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.089501 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.161819 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.094685 [ 4608/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.109611 [ 4736/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.124955 [ 4864/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.095641 [ 4992/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.135199 [ 5120/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.089601 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.115090 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.130950 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.134421 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.123954 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.119401 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.113794 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.139460 [ 6144/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.118391 [ 6272/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.121229 [ 6400/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.105131 [ 6528/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.122467 [ 6656/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.101605 [ 6784/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.144358 [ 6912/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.113774 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.115402 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.111304 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.084502 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.103583 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.121720 [ 7680/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.141434 [ 7808/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.103826 [ 7936/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.131780 [ 8064/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.081197 [ 8192/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.117298 [ 8320/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.121775 [ 8448/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.102202 [ 8576/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.123913 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.141926 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.118986 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.107718 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.108832 [ 9216/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.144605 [ 9344/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.102989 [ 9472/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.109706 [ 9600/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.106086 [ 9728/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.120786 [ 9856/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.086797 [ 9984/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.121720 [10112/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.107013 [10240/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.105406 [10368/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.092806 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.139725 [10624/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.103633 [10752/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.113153 [10880/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.118551 [11008/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.089305 [11136/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.088159 [11264/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.122389 [11392/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.103332 [11520/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.135404 [11648/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.094497 [11776/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.131116 [11904/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.104762 [12032/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.107577 [12160/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.108508 [12288/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.109574 [12416/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.107051 [12544/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.101733 [12672/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.102539 [12800/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.092039 [12928/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.123712 [13056/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.110517 [13184/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.107324 [13312/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.112747 [13440/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.102082 [13568/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.138933 [13696/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.112692 [13824/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.092728 [13952/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.105109 [14080/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.095822 [14208/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.133552 [14336/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.121105 [14464/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.107695 [14592/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.153457 [14720/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.138965 [14848/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.122834 [14976/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.129796 [15104/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.102767 [15232/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.119751 [15360/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.145643 [15488/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.128304 [15616/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.086833 [15744/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.138334 [15872/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.112518 [16000/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.102812 [16128/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.115104 [16256/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.133585 [16384/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.129380 [16512/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.144541 [16640/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.138490 [16768/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.123494 [16896/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.152053 [17024/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.113483 [17152/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.097124 [17280/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.112748 [17408/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.116563 [17536/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.156056 [17664/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.149255 [17792/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.091398 [17920/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.125281 [18048/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.144054 [18176/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.077866 [18304/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.109147 [18432/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.108301 [18560/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.104942 [18688/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.106434 [18816/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.110041 [18944/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.115125 [19072/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.090754 [19200/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.113472 [19328/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.105000 [19456/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.095629 [19584/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.102823 [19712/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.101037 [19840/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.111497 [19968/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.131481 [20096/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.091065 [20224/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.142334 [20352/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.113262 [20480/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.095543 [20608/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.151740 [20736/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.098334 [20864/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.115295 [20992/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.127461 [21120/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.125243 [21248/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.114011 [21376/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.130069 [21504/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.101189 [21632/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.113782 [21760/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.144200 [21888/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.115605 [22016/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.123985 [22144/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.113765 [22272/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.122737 [22400/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.150093 [22528/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.083130 [22656/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.090427 [22784/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.085570 [22912/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.140976 [23040/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.122604 [23168/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.092501 [23296/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.119329 [23424/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.094846 [23552/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.135145 [23680/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.154619 [23808/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.116604 [23936/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.122716 [24064/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.097331 [24192/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.106681 [24320/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.107646 [24448/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.102283 [24576/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.104617 [24704/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.125635 [24832/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.093095 [24872/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.093095 [24872/24872]: : 195it [00:16, 11.85it/s]
Epoch 5, time=343.23s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.135984 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.128137 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.131898 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.120286 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.099057 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.126843 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.135402 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.131316 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.120182 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.130453 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.129970 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.112572 [ 1536/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.125295 [ 1664/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.089624 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.084192 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.124132 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.110510 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.104088 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.150071 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.110105 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.118032 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.114455 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.123708 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.114467 [ 3072/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.110160 [ 3200/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.094648 [ 3328/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.098920 [ 3456/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.099874 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.141364 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.114826 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.082215 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.130116 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.097985 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.088803 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.157218 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.089015 [ 4608/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.092691 [ 4736/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.108970 [ 4864/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.097380 [ 4992/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.130168 [ 5120/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.084251 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.112017 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.132685 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.127327 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.124640 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.113205 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.112144 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.133050 [ 6144/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.116337 [ 6272/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.114535 [ 6400/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.094502 [ 6528/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.116093 [ 6656/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.096158 [ 6784/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.138742 [ 6912/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.108635 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.113965 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.104538 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.079217 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.096691 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.115788 [ 7680/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.129772 [ 7808/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.097143 [ 7936/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.128147 [ 8064/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.074052 [ 8192/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.107239 [ 8320/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.111993 [ 8448/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.096041 [ 8576/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.112699 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.130263 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.110460 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.098393 [ 9088/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.104670 [ 9216/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.134739 [ 9344/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.095942 [ 9472/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.108405 [ 9600/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.101723 [ 9728/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.113841 [ 9856/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.085481 [ 9984/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.119706 [10112/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.098946 [10240/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.101602 [10368/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.090460 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.135039 [10624/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.099612 [10752/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.112503 [10880/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.113689 [11008/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.086888 [11136/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.084999 [11264/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.121243 [11392/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.103250 [11520/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.129619 [11648/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.096567 [11776/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.125914 [11904/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.102248 [12032/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.099967 [12160/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.111232 [12288/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.103727 [12416/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.100132 [12544/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.105163 [12672/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.093817 [12800/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.098030 [12928/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.131305 [13056/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.098686 [13184/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.119736 [13312/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.101004 [13440/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.112283 [13568/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.122194 [13696/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.126203 [13824/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.092567 [13952/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.097047 [14080/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.113086 [14208/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.129298 [14336/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.153042 [14464/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.098931 [14592/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.160450 [14720/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.145849 [14848/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.109042 [14976/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.130558 [15104/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.081714 [15232/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.123637 [15360/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.165067 [15488/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.123385 [15616/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.102661 [15744/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.105451 [15872/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.125136 [16000/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.086909 [16128/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.121628 [16256/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.127699 [16384/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.129342 [16512/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.148079 [16640/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.129475 [16768/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.125167 [16896/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.134648 [17024/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.110171 [17152/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.103254 [17280/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.120943 [17408/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.116608 [17536/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.148499 [17664/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.137325 [17792/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.088322 [17920/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.131541 [18048/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.142651 [18176/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.085015 [18304/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.090563 [18432/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.108414 [18560/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.096026 [18688/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.104778 [18816/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.104299 [18944/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.105430 [19072/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.093099 [19200/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.099475 [19328/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.121786 [19456/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.088534 [19584/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.112324 [19712/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.094913 [19840/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.116084 [19968/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.117050 [20096/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.093903 [20224/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.126809 [20352/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.090933 [20480/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.108169 [20608/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.137668 [20736/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.097962 [20864/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.101505 [20992/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.135520 [21120/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.131987 [21248/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.130989 [21376/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.132646 [21504/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.095964 [21632/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.115399 [21760/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.132165 [21888/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.116633 [22016/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.113660 [22144/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.108729 [22272/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.110341 [22400/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.150427 [22528/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.075423 [22656/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.084903 [22784/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.084419 [22912/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.123150 [23040/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.127589 [23168/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.080810 [23296/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.117638 [23424/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.092276 [23552/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.130594 [23680/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.133716 [23808/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.117592 [23936/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.113938 [24064/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.073992 [24192/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.109925 [24320/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.091012 [24448/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.110299 [24576/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.104093 [24704/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.122265 [24832/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.111512 [24872/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.111512 [24872/24872]: : 195it [00:16, 11.83it/s]
Epoch 6, time=359.71s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.112614 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.151051 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.123797 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.116394 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.101415 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.121237 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.131907 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.129855 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.118213 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.119219 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.115996 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.106437 [ 1536/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.117904 [ 1664/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.097768 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.085648 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.127223 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.110564 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.097328 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.148503 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.106677 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.113639 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.118133 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.119934 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.113868 [ 3072/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.107213 [ 3200/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.102406 [ 3328/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.093730 [ 3456/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.096802 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.138280 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.112403 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.080992 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.133848 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.099750 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.082470 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.156670 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.089928 [ 4608/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.093648 [ 4736/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.103966 [ 4864/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.085654 [ 4992/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.121495 [ 5120/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.086325 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.118050 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.128985 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.129457 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.118591 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.113647 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.103646 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.136006 [ 6144/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.113741 [ 6272/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.107247 [ 6400/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.096015 [ 6528/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.116269 [ 6656/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.098891 [ 6784/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.127646 [ 6912/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.109426 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.110737 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.099353 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.077775 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.094458 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.115419 [ 7680/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.130187 [ 7808/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.094735 [ 7936/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.125467 [ 8064/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.079398 [ 8192/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.106829 [ 8320/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.113897 [ 8448/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.104033 [ 8576/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.107701 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.135369 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.108716 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.105990 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.111708 [ 9216/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.134618 [ 9344/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.098323 [ 9472/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.109131 [ 9600/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.101998 [ 9728/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.122315 [ 9856/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.080655 [ 9984/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.130575 [10112/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.104963 [10240/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.098897 [10368/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.083755 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.134214 [10624/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.103977 [10752/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.106544 [10880/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.108929 [11008/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.087149 [11136/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.088213 [11264/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.119119 [11392/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.087727 [11520/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.133344 [11648/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.085568 [11776/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.134618 [11904/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.110622 [12032/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.098544 [12160/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.101300 [12288/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.111231 [12416/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.097279 [12544/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.109546 [12672/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.093816 [12800/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.092787 [12928/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.148867 [13056/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.095717 [13184/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.122655 [13312/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.101683 [13440/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.114929 [13568/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.142226 [13696/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.129663 [13824/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.110762 [13952/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.099591 [14080/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.134331 [14208/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.130047 [14336/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.136396 [14464/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.119316 [14592/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.140500 [14720/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.162187 [14848/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.118233 [14976/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.133536 [15104/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.096246 [15232/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.115895 [15360/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.152743 [15488/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.121932 [15616/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.086575 [15744/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.121166 [15872/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.110939 [16000/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.088159 [16128/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.103482 [16256/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.112855 [16384/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.122954 [16512/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.140929 [16640/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.113824 [16768/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.126029 [16896/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.130452 [17024/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.110590 [17152/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.086723 [17280/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.109352 [17408/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.101764 [17536/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.138443 [17664/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.139048 [17792/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.084551 [17920/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.113246 [18048/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.153265 [18176/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.072396 [18304/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.099416 [18432/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.105226 [18560/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.088648 [18688/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.099920 [18816/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.103490 [18944/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.103307 [19072/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.077590 [19200/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.093775 [19328/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.091811 [19456/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.081388 [19584/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.103663 [19712/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.085748 [19840/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.105019 [19968/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.113398 [20096/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.079944 [20224/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.122621 [20352/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.084828 [20480/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.087333 [20608/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.128510 [20736/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.088029 [20864/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.088811 [20992/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.123448 [21120/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.110268 [21248/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.109451 [21376/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.119476 [21504/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.088493 [21632/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.107962 [21760/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.141233 [21888/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.105010 [22016/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.108928 [22144/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.114662 [22272/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.103149 [22400/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.138827 [22528/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.076665 [22656/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.086757 [22784/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.081258 [22912/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.120326 [23040/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.114432 [23168/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.073790 [23296/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.114386 [23424/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.084926 [23552/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.130714 [23680/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.131563 [23808/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.117874 [23936/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.111914 [24064/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.076596 [24192/24872]: 0%| | 0/194 [00:15<?, ?it/s]
loss: 0.113624 [24320/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.086416 [24448/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.115562 [24576/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.106703 [24704/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.110491 [24832/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.095379 [24872/24872]: 0%| | 0/194 [00:16<?, ?it/s]
loss: 0.095379 [24872/24872]: : 195it [00:16, 11.85it/s]
-------------------------------
LR=1e-05, batch_size=256
-------------------------------
Epoch 1, time=376.16s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.112657 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.107971 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.101506 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.108027 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.114396 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.100568 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.096691 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.092114 [ 2048/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.093056 [ 2304/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.120716 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.103184 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.107511 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.095672 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.085505 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.112139 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.093192 [ 4096/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.078020 [ 4352/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.112541 [ 4608/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.095606 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.091945 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.090856 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.115465 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.109417 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.109315 [ 6144/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.101016 [ 6400/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.094011 [ 6656/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.105549 [ 6912/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.103654 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.081958 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.096639 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.106678 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.088477 [ 8192/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.097680 [ 8448/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.095653 [ 8704/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.119726 [ 8960/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.092152 [ 9216/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.104932 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.095256 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.088017 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.101753 [10240/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.087794 [10496/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.110123 [10752/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.096305 [11008/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.077289 [11264/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.094520 [11520/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.097824 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.109740 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.095577 [12288/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.091647 [12544/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.086681 [12800/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.096699 [13056/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.091751 [13312/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.084704 [13568/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.102956 [13824/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.080058 [14080/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.095639 [14336/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.098426 [14592/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.117832 [14848/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.097480 [15104/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.088544 [15360/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.113282 [15616/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.092652 [15872/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.084007 [16128/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.098087 [16384/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.113525 [16640/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.104181 [16896/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.110269 [17152/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.085347 [17408/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.109109 [17664/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.103992 [17920/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.121398 [18176/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.074626 [18432/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.089627 [18688/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.090340 [18944/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.081105 [19200/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.086651 [19456/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.085800 [19712/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.086774 [19968/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.088039 [20224/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.094978 [20480/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.104850 [20736/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.084735 [20992/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.113286 [21248/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.109647 [21504/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.088397 [21760/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.108713 [22016/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.100162 [22272/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.109163 [22528/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.068858 [22784/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.084093 [23040/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.082324 [23296/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.090278 [23552/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.117197 [23808/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.102644 [24064/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.075780 [24320/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.085093 [24576/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.089627 [24832/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.077942 [24872/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.077942 [24872/24872]: : 98it [00:12, 7.99it/s]
Epoch 2, time=388.43s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.098276 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.098225 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.095949 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.101868 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.104763 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.093533 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.090559 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.086880 [ 2048/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.088917 [ 2304/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.118116 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.100200 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.105986 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.092538 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.082997 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.107080 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.090431 [ 4096/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.075836 [ 4352/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.110823 [ 4608/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.092404 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.089388 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.087525 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.113149 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.107447 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.106404 [ 6144/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.097540 [ 6400/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.089465 [ 6656/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.103651 [ 6912/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.099489 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.079627 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.094793 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.104957 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.086384 [ 8192/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.096332 [ 8448/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.094666 [ 8704/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.117849 [ 8960/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.090261 [ 9216/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.101845 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.093668 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.086401 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.100306 [10240/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.084925 [10496/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.108753 [10752/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.095120 [11008/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.076254 [11264/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.092893 [11520/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.096284 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.108196 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.092698 [12288/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.088842 [12544/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.084736 [12800/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.095886 [13056/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.089934 [13312/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.082736 [13568/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.101976 [13824/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.079801 [14080/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.094488 [14336/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.097156 [14592/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.115967 [14848/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.096360 [15104/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.087538 [15360/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.110757 [15616/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.091656 [15872/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.082977 [16128/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.097009 [16384/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.112168 [16640/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.102328 [16896/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.108078 [17152/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.084368 [17408/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.107993 [17664/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.102799 [17920/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.119084 [18176/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.073746 [18432/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.088996 [18688/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.089009 [18944/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.079954 [19200/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.085054 [19456/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.084674 [19712/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.086000 [19968/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.086574 [20224/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.094326 [20480/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.104387 [20736/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.083687 [20992/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.112401 [21248/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.108340 [21504/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.088148 [21760/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.105667 [22016/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.099235 [22272/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.108621 [22528/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.068308 [22784/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.083186 [23040/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.080116 [23296/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.089757 [23552/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.115488 [23808/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.099705 [24064/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.074788 [24320/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.084149 [24576/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.088968 [24832/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.076861 [24872/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.076861 [24872/24872]: : 98it [00:12, 8.04it/s]
Epoch 3, time=400.62s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.097483 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.097280 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.094773 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.100796 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.103097 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.092651 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.089715 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.086125 [ 2048/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.087845 [ 2304/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.116692 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.099544 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.105142 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.091692 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.081978 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.105725 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.089465 [ 4096/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.075047 [ 4352/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.109899 [ 4608/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.090958 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.088561 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.086980 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.112047 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.106370 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.105152 [ 6144/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.096132 [ 6400/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.087978 [ 6656/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.102826 [ 6912/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.097877 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.078879 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.093693 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.103678 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.085361 [ 8192/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.095629 [ 8448/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.093584 [ 8704/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.117193 [ 8960/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.089304 [ 9216/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.100784 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.092569 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.085587 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.099684 [10240/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.083826 [10496/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.107976 [10752/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.094618 [11008/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.075624 [11264/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.092034 [11520/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.095470 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.106617 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.090684 [12288/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.087619 [12544/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.083120 [12800/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.095129 [13056/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.088622 [13312/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.081740 [13568/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.101517 [13824/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.079522 [14080/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.093595 [14336/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.096164 [14592/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.114855 [14848/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.095822 [15104/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.086622 [15360/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.109273 [15616/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.090797 [15872/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.082344 [16128/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.096245 [16384/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.111215 [16640/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.101128 [16896/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.106861 [17152/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.083748 [17408/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.107169 [17664/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.101865 [17920/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.117628 [18176/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.073186 [18432/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.088375 [18688/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.088009 [18944/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.079037 [19200/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.083812 [19456/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.082916 [19712/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.085393 [19968/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.085577 [20224/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.093884 [20480/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.103612 [20736/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.082612 [20992/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.111720 [21248/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.107456 [21504/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.087639 [21760/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.104481 [22016/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.098644 [22272/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.107964 [22528/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.067819 [22784/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.082536 [23040/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.078984 [23296/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.089259 [23552/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.114014 [23808/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.098009 [24064/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.074127 [24320/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.083437 [24576/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.087836 [24832/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.076112 [24872/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.076112 [24872/24872]: : 98it [00:12, 8.01it/s]
Epoch 4, time=412.86s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.096653 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.096121 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.093393 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.099648 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.101898 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.091980 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.088971 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.086038 [ 2048/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.087124 [ 2304/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.115477 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.098776 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.104122 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.091036 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.081142 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.104706 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.088871 [ 4096/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.074585 [ 4352/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.109115 [ 4608/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.089982 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.088036 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.086438 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.111143 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.105399 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.104373 [ 6144/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.095160 [ 6400/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.086923 [ 6656/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.102228 [ 6912/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.096801 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.078208 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.092870 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.102461 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.084664 [ 8192/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.094850 [ 8448/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.092736 [ 8704/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.116458 [ 8960/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.088360 [ 9216/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.100137 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.091757 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.084914 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.099132 [10240/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.082867 [10496/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.107290 [10752/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.093656 [11008/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.075032 [11264/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.091386 [11520/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.094826 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.105723 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.089218 [12288/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.086796 [12544/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.082068 [12800/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.094279 [13056/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.087556 [13312/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.080939 [13568/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.101067 [13824/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.078938 [14080/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.092854 [14336/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.095329 [14592/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.114025 [14848/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.095378 [15104/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.085766 [15360/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.108255 [15616/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.090065 [15872/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.081734 [16128/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.095559 [16384/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.110365 [16640/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.100200 [16896/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.105961 [17152/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.083164 [17408/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.106527 [17664/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.101089 [17920/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.116352 [18176/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.072679 [18432/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.087764 [18688/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.087108 [18944/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.078309 [19200/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.082803 [19456/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.081565 [19712/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.084845 [19968/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.084885 [20224/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.093522 [20480/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.102858 [20736/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.081758 [20992/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.111109 [21248/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.106757 [21504/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.087106 [21760/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.103900 [22016/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.098233 [22272/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.106951 [22528/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.067425 [22784/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.081706 [23040/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.078498 [23296/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.088907 [23552/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.113081 [23808/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.096720 [24064/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.073777 [24320/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.082967 [24576/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.087265 [24832/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.075601 [24872/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.075601 [24872/24872]: : 98it [00:12, 8.05it/s]
Epoch 5, time=425.04s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.096313 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.095463 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.092881 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.098686 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.100793 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.091576 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.088430 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.085338 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.086437 [ 2304/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.114603 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.098087 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.103576 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.090376 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.080568 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.103775 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.088422 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.074140 [ 4352/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.108590 [ 4608/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.089247 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.087350 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.085984 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.110297 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.104722 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.103548 [ 6144/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.094308 [ 6400/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.086079 [ 6656/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.101661 [ 6912/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.095651 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.077624 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.092191 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.101538 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.084113 [ 8192/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.094347 [ 8448/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.091998 [ 8704/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.115901 [ 8960/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.087513 [ 9216/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.099490 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.091131 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.084301 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.098665 [10240/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.082070 [10496/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.106833 [10752/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.092845 [11008/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.074593 [11264/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.090857 [11520/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.094257 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.104809 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.088154 [12288/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.086048 [12544/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.081251 [12800/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.093574 [13056/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.086624 [13312/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.080274 [13568/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.100592 [13824/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.078468 [14080/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.092211 [14336/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.094674 [14592/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.113292 [14848/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.094841 [15104/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.085031 [15360/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.107464 [15616/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.089532 [15872/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.081166 [16128/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.094906 [16384/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.109732 [16640/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.099434 [16896/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.105199 [17152/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.082578 [17408/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.105921 [17664/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.100521 [17920/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.115227 [18176/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.072234 [18432/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.087185 [18688/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.086305 [18944/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.077681 [19200/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.082042 [19456/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.080371 [19712/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.084334 [19968/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.084240 [20224/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.093140 [20480/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.102108 [20736/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.080938 [20992/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.110528 [21248/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.106072 [21504/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.086561 [21760/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.103313 [22016/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.097747 [22272/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.106198 [22528/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.066991 [22784/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.081127 [23040/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.077782 [23296/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.088407 [23552/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.112427 [23808/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.096003 [24064/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.073273 [24320/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.082522 [24576/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.086475 [24832/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.075057 [24872/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.075057 [24872/24872]: : 98it [00:12, 8.05it/s]
Epoch 6, time=437.21s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.095569 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.094727 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.092092 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.097689 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.099825 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.091094 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.087878 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.084914 [ 2048/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.085781 [ 2304/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.113804 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.097401 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.102879 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.089698 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.080031 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.103045 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.088036 [ 4096/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.073696 [ 4352/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.108011 [ 4608/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.088716 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.086774 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.085482 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.109483 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.104104 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.102879 [ 6144/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.093512 [ 6400/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.085313 [ 6656/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.101163 [ 6912/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.094707 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.077069 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.091512 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.100691 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.083625 [ 8192/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.093819 [ 8448/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.091326 [ 8704/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.115424 [ 8960/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.086651 [ 9216/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.098917 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.090531 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.083863 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.098200 [10240/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.081394 [10496/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.106338 [10752/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.091983 [11008/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.074107 [11264/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.090322 [11520/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.093675 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.103947 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.087226 [12288/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.085329 [12544/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.080587 [12800/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.092962 [13056/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.085832 [13312/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.079716 [13568/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.100139 [13824/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.077909 [14080/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.091640 [14336/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.094070 [14592/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.112618 [14848/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.094387 [15104/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.084422 [15360/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.106820 [15616/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.088966 [15872/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.080629 [16128/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.094295 [16384/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.109097 [16640/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.098678 [16896/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.104534 [17152/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.082043 [17408/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.105249 [17664/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.099907 [17920/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.114352 [18176/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.071777 [18432/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.086570 [18688/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.085568 [18944/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.077078 [19200/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.081484 [19456/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.079337 [19712/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.083836 [19968/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.083705 [20224/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.092772 [20480/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.101411 [20736/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.080217 [20992/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.109956 [21248/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.105414 [21504/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.086074 [21760/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.102896 [22016/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.097427 [22272/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.105458 [22528/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.066590 [22784/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.080477 [23040/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.077406 [23296/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.087946 [23552/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.111579 [23808/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.095308 [24064/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.072902 [24320/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.082181 [24576/24872]: 0%| | 0/97 [00:11<?, ?it/s]
loss: 0.085867 [24832/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.074690 [24872/24872]: 0%| | 0/97 [00:12<?, ?it/s]
loss: 0.074690 [24872/24872]: : 98it [00:12, 8.09it/s]
Done!
test the network#
Do some qualitative tests: Let the trained network predict some particle geometries and compare their Mie spectra with the traget spectrum.
# pick a few of the training samples for testing.
# Note: Ideally tests should be done on separate samples!
sca_test = q_sca_target_test
pred = model(sca_test)
# evaluate Mie
r_c_test, r_s_test, eps_c_test, eps_s_test = nn_pred_to_mie_geometry(pred)
res_mie = pmd.farfield.cross_sections(
k0,
r_c=r_c_test,
eps_c=eps_c_test,
r_s=r_s_test,
eps_s=eps_s_test,
eps_env=eps_env,
n_max=n_max,
)
# plot
i_plot = np.random.randint(len(sca_test), size=4)
plt.figure(figsize=(12, 10))
for i_n, i in enumerate(i_plot):
plt.subplot(2, 2, i_n + 1)
plt.plot(
wl0.detach().cpu().numpy(),
sca_test[i].detach().cpu().numpy(),
label="reference",
)
plt.plot(
wl0.detach().cpu().numpy(),
res_mie["q_sca"][i].detach().cpu().numpy(),
label="predicted particle",
)
plt.legend()
plt.xlabel("wavelength (nm)")
plt.ylabel("scat. efficiency")
plt.show()

Total running time of the script: (7 minutes 35.560 seconds)
Estimated memory usage: 3012 MB