Note
Go to the end to download the full example code.
Mie-informed tandem neural network#
Here, we demonstrate how to train a design generator network capable to suggest core-shell particles with specific spectral response using PyMieDiff as differentiable forward-evaluator. The training pipeline follows the “Tandem” model:
target spectrum –> generator NN –> design –> Mie –> real spectrum
training loss is: MSE(target spec., real spec.)
author: O. Jackson, P. Wiecha, 06/2025
imports#
import time
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
from torch import nn
import pymiediff as pmd
setup optimiation target#
We setup the main configuration here: torch device, parameter limits and wavelengths
# torch compute device
device = "cpu"
# general config
N_samples = 25000
n_max = 4 # maximum Mie order fixed for performance
eps_env = torch.tensor(1.0, device=device)
lim_r = torch.as_tensor([40, 100], device=device)
lim_n_re = torch.as_tensor([1.5, 4.0], device=device)
lim_n_im = torch.as_tensor([0.0, 0.1], device=device)
wl0 = torch.linspace(400, 800, 40, device=device)
k0 = 2 * torch.pi / wl0
generate reference spectra#
we generate a large number of reference Mie spectra for existing particles, that will be used as design targets during training.
Note: this step could also be done without any physics knowledge, for example with artificial spectra (e.g. Lorentzians), or a scattering maximization loss.
# datagen: generate existing spectra (won't use the geometries for training)
r_c = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
d_s = torch.rand((N_samples), device=device) * torch.diff(lim_r)[0] + lim_r[0]
r_s = r_c + d_s
n_re = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_re)[0] + lim_n_re[0]
n_im = torch.rand((N_samples, 2), device=device) * torch.diff(lim_n_im)[0] + lim_n_im[0]
n = n_re + 1j * n_im
# low-level API: permittivity required as spectra (for vectorization)
eps_c = torch.ones_like(k0).unsqueeze(0) * n[:, 0].unsqueeze(1) ** 2
eps_s = torch.ones_like(k0).unsqueeze(0) * n[:, 1].unsqueeze(1) ** 2
all_particles = pmd.multishell.cross_sections(
k0,
r_c=r_c,
eps_c=eps_c,
r_s=r_s,
eps_s=eps_s,
eps_env=eps_env,
n_max=n_max,
)
N_test = 128 # keep a few samples for testing
q_sca_target = all_particles["q_sca"][N_test:].to(dtype=torch.float32)
q_sca_target_test = all_particles["q_sca"][:N_test].to(dtype=torch.float32)
plt.plot(q_sca_target[30].detach().cpu().numpy()) # plot some test sample

[<matplotlib.lines.Line2D object at 0x7f06a0074590>]
Neural network classes / functions#
define the network model (simple MLP) and training loop
class FullyConnected(nn.Module):
def __init__(self, hidden_dim=1024):
super().__init__()
self.fc_in = nn.Linear(len(k0), hidden_dim)
self.relu1 = nn.ReLU()
self.fc_1 = nn.Linear(hidden_dim, hidden_dim)
self.relu2 = nn.ReLU()
self.fc_2 = nn.Linear(hidden_dim, hidden_dim)
self.relu3 = nn.ReLU()
self.fc_out = nn.Linear(hidden_dim, 6)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.fc_in(x)
x = self.relu1(x)
x = self.fc_1(x)
x = self.relu2(x)
x = self.fc_2(x)
x = self.relu3(x)
x = self.fc_out(x)
x = self.sigmoid(x)
return x
def nn_pred_to_mie_geometry(pred):
# implicit normalization: multiply by user-defined limits
r_c = lim_r.max() * (pred[:, 0])
r_s = lim_r.max() * (pred[:, 0] + pred[:, 1])
n_c = lim_n_re.max() * pred[:, 2] + lim_n_im.max() * (1j * pred[:, 3])
n_s = lim_n_re.max() * pred[:, 4] + lim_n_im.max() * (1j * pred[:, 5])
eps_c = torch.ones_like(k0).unsqueeze(0) * n_c.unsqueeze(1) ** 2
eps_s = torch.ones_like(k0).unsqueeze(0) * n_s.unsqueeze(1) ** 2
return r_c, r_s, eps_c, eps_s
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
# Set the model to training mode - important for batch normalization and dropout layers
# Unnecessary in this situation but added for best practices
model.train()
prog_bar = tqdm(enumerate(dataloader), total=size // dataloader.batch_size)
for i_batch, X in prog_bar:
# model prediction: generate core-shell particles
pred = model(X)
# evaluate Mie
r_c, r_s, eps_c, eps_s = nn_pred_to_mie_geometry(pred)
res_mie = pmd.multishell.cross_sections(
k0,
r_c=r_c,
eps_c=eps_c,
r_s=r_s,
eps_s=eps_s,
eps_env=eps_env,
n_max=n_max,
)
q_sca_mie = res_mie["q_sca"].to(dtype=torch.float32)
# calc. loss
loss = loss_fn(q_sca_mie, X)
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
# if i_batch % 100 == 0:
loss, current = loss.item(), i_batch * dataloader.batch_size + len(X)
prog_bar.set_description(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
training the Mie-informed network#
here we use some simple, manually optimized training schedule.
model = FullyConnected().to(device)
confs = [
dict(bs=32, lr=1e-4, n_ep=5),
dict(bs=64, lr=1e-4, n_ep=5),
dict(bs=128, lr=1e-4, n_ep=6),
dict(bs=256, lr=1e-5, n_ep=6),
]
t_start = time.time()
for conf in confs:
learning_rate = conf["lr"]
batch_size = conf["bs"]
epochs = conf["n_ep"]
print("-------------------------------")
print(f"LR={learning_rate}, batch_size={batch_size}")
print("-------------------------------")
loss_fn = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
train_dataloader = torch.utils.data.DataLoader(q_sca_target, batch_size=batch_size)
for t in range(epochs):
print(f"Epoch {t+1}, time={time.time()-t_start:.2f}s")
train_loop(train_dataloader, model, loss_fn, optimizer)
print("Done!")
-------------------------------
LR=0.0001, batch_size=32
-------------------------------
Epoch 1, time=0.00s
0%| | 0/777 [00:00<?, ?it/s]
loss: 4.549628 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 3.024712 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.603119 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.700456 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.396730 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.976609 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.351362 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 2.066803 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.822288 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.593468 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.564855 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.606593 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.178725 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.213516 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.375950 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.206874 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.248866 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.326798 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.103084 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.184518 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.954216 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.145559 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 1.104081 [ 736/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.864190 [ 768/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.018764 [ 800/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.194699 [ 832/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.168010 [ 864/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.903035 [ 896/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.134447 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.369251 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.968039 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.177012 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.044216 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.944270 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.990864 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.117964 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.297089 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.850444 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.030533 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.962755 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.870597 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.255670 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.842951 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 1.094264 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.956608 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.813043 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.981611 [ 1504/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.115376 [ 1536/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.215330 [ 1568/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.693462 [ 1600/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.941374 [ 1632/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.938547 [ 1664/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.744120 [ 1696/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.768484 [ 1728/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.966859 [ 1760/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.073991 [ 1792/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.004996 [ 1824/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.826588 [ 1856/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.100463 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.803903 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.837029 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.114654 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.692557 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.856841 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.027091 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.206690 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.947298 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.905120 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.988815 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.619839 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.049874 [ 2272/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 1.056211 [ 2304/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.707493 [ 2336/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 1.032326 [ 2368/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.869738 [ 2400/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.830498 [ 2432/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.681917 [ 2464/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.950300 [ 2496/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.983554 [ 2528/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.650350 [ 2560/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.830604 [ 2592/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.696787 [ 2624/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.605493 [ 2656/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.925351 [ 2688/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.797087 [ 2720/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.869690 [ 2752/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.724087 [ 2784/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.830089 [ 2816/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.694161 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.692300 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.717461 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.909331 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.699228 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.767702 [ 3008/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.966523 [ 3040/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.739716 [ 3072/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.708228 [ 3104/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.623273 [ 3136/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.765987 [ 3168/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.841981 [ 3200/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.614877 [ 3232/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.811593 [ 3264/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.759134 [ 3296/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.735067 [ 3328/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.648065 [ 3360/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.796157 [ 3392/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.541324 [ 3424/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 1.046082 [ 3456/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.913262 [ 3488/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.777712 [ 3520/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.786183 [ 3552/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.797318 [ 3584/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.685859 [ 3616/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.692348 [ 3648/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.766447 [ 3680/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.905225 [ 3712/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.781734 [ 3744/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.581652 [ 3776/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.897002 [ 3808/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.581007 [ 3840/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.775189 [ 3872/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.757537 [ 3904/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.743633 [ 3936/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.708249 [ 3968/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.686087 [ 4000/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.686662 [ 4032/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.737527 [ 4064/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.525106 [ 4096/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.630956 [ 4128/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.667601 [ 4160/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.627045 [ 4192/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.724834 [ 4224/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.637911 [ 4256/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.780280 [ 4288/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.569397 [ 4320/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.515206 [ 4352/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.584434 [ 4384/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.655563 [ 4416/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.665454 [ 4448/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.703536 [ 4480/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.522763 [ 4512/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.656305 [ 4544/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.536123 [ 4576/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.796360 [ 4608/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.682935 [ 4640/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.459931 [ 4672/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.657267 [ 4704/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.554008 [ 4736/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.594858 [ 4768/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.644254 [ 4800/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.710495 [ 4832/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.617788 [ 4864/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.614180 [ 4896/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.629451 [ 4928/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.533078 [ 4960/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.480037 [ 4992/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.601435 [ 5024/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.444080 [ 5056/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.672689 [ 5088/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.503242 [ 5120/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.686830 [ 5152/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.702820 [ 5184/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.578752 [ 5216/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.544487 [ 5248/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 1.196646 [ 5280/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.627913 [ 5312/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.527364 [ 5344/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.606205 [ 5376/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.505998 [ 5408/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.363671 [ 5440/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.634818 [ 5472/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.691325 [ 5504/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.613241 [ 5536/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.611454 [ 5568/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.545395 [ 5600/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.495668 [ 5632/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.478125 [ 5664/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.576021 [ 5696/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.613744 [ 5728/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.485815 [ 5760/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.514641 [ 5792/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.534588 [ 5824/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.736430 [ 5856/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.483989 [ 5888/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.675932 [ 5920/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.528801 [ 5952/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.557654 [ 5984/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.504843 [ 6016/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.609743 [ 6048/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.616528 [ 6080/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.535119 [ 6112/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.486834 [ 6144/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.841753 [ 6176/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.499647 [ 6208/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.508176 [ 6240/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.525578 [ 6272/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.554933 [ 6304/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.671742 [ 6336/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.530646 [ 6368/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.524304 [ 6400/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.488595 [ 6432/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.617125 [ 6464/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.440587 [ 6496/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.374608 [ 6528/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.643205 [ 6560/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.600344 [ 6592/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.572962 [ 6624/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.544347 [ 6656/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.531797 [ 6688/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.568195 [ 6720/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.478528 [ 6752/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.545627 [ 6784/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.566370 [ 6816/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.526727 [ 6848/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.550577 [ 6880/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.656246 [ 6912/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.557936 [ 6944/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.545366 [ 6976/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.419109 [ 7008/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.441224 [ 7040/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.650576 [ 7072/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.523005 [ 7104/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.477916 [ 7136/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.616433 [ 7168/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.380292 [ 7200/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.603326 [ 7232/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.653603 [ 7264/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.487595 [ 7296/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.571038 [ 7328/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.590345 [ 7360/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.517546 [ 7392/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.513353 [ 7424/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.474648 [ 7456/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.762551 [ 7488/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.415614 [ 7520/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.534361 [ 7552/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.488098 [ 7584/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.448855 [ 7616/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.598816 [ 7648/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.582722 [ 7680/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.662912 [ 7712/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.589332 [ 7744/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.689269 [ 7776/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.731560 [ 7808/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.614626 [ 7840/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.449892 [ 7872/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.428820 [ 7904/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.412033 [ 7936/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.426400 [ 7968/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.299821 [ 8000/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.564144 [ 8032/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.539655 [ 8064/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.437809 [ 8096/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.507565 [ 8128/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.693444 [ 8160/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.479798 [ 8192/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.336999 [ 8224/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.411812 [ 8256/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.529656 [ 8288/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.554582 [ 8320/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.503436 [ 8352/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.353426 [ 8384/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.405522 [ 8416/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.467683 [ 8448/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.445175 [ 8480/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.483850 [ 8512/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.404057 [ 8544/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.464224 [ 8576/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.520994 [ 8608/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.604560 [ 8640/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.719930 [ 8672/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.388810 [ 8704/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.408840 [ 8736/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.426316 [ 8768/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.308958 [ 8800/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.407226 [ 8832/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.303953 [ 8864/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.419643 [ 8896/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.516337 [ 8928/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.390661 [ 8960/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.539882 [ 8992/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.694293 [ 9024/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.522615 [ 9056/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.509678 [ 9088/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.385783 [ 9120/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.371328 [ 9152/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.447466 [ 9184/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.460427 [ 9216/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.397998 [ 9248/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.387884 [ 9280/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.553470 [ 9312/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.433575 [ 9344/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.392388 [ 9376/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.495469 [ 9408/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.590276 [ 9440/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.421869 [ 9472/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.412991 [ 9504/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.379875 [ 9536/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.396954 [ 9568/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.392329 [ 9600/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.388244 [ 9632/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.408485 [ 9664/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.512046 [ 9696/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.334044 [ 9728/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.454203 [ 9760/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.456139 [ 9792/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.351116 [ 9824/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.472090 [ 9856/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.622287 [ 9888/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.453783 [ 9920/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.698191 [ 9952/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.553079 [ 9984/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.389329 [10016/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.428926 [10048/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.524185 [10080/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.449711 [10112/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.411744 [10144/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.547091 [10176/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.419536 [10208/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.394449 [10240/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.344872 [10272/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.483099 [10304/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.526175 [10336/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.382574 [10368/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.352601 [10400/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.727609 [10432/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.467378 [10464/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.482972 [10496/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.463137 [10528/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.578260 [10560/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.374397 [10592/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.441601 [10624/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.494875 [10656/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.474518 [10688/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.336959 [10720/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.537207 [10752/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.398783 [10784/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.430813 [10816/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.441473 [10848/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.389757 [10880/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.321666 [10912/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.504341 [10944/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.315483 [10976/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.473437 [11008/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.454556 [11040/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.566269 [11072/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.587890 [11104/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.525940 [11136/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.363633 [11168/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.637930 [11200/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.439117 [11232/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.619214 [11264/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.411787 [11296/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.426952 [11328/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.401679 [11360/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.315194 [11392/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.461923 [11424/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.544348 [11456/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.430461 [11488/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.559629 [11520/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.529309 [11552/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.500499 [11584/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.459413 [11616/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.523774 [11648/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.476505 [11680/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.497862 [11712/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.455932 [11744/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.415073 [11776/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.399761 [11808/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.590684 [11840/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.546350 [11872/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.447200 [11904/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.495819 [11936/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.384262 [11968/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.449788 [12000/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.321127 [12032/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.558259 [12064/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.579662 [12096/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.504516 [12128/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.368363 [12160/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.624155 [12192/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.527096 [12224/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.501503 [12256/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.332513 [12288/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.530861 [12320/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.679022 [12352/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.396214 [12384/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.373015 [12416/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.435047 [12448/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.466307 [12480/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.610844 [12512/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.486569 [12544/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.554915 [12576/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.326728 [12608/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.384791 [12640/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.450395 [12672/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.416946 [12704/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.582915 [12736/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.541123 [12768/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.401172 [12800/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.488855 [12832/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.411621 [12864/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.438094 [12896/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.317467 [12928/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.329888 [12960/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.513414 [12992/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.301342 [13024/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.493120 [13056/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.373194 [13088/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.389756 [13120/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.376352 [13152/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.426434 [13184/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.337656 [13216/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.521722 [13248/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.370779 [13280/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.471989 [13312/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.410423 [13344/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.373401 [13376/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.357253 [13408/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.437061 [13440/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.506767 [13472/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.350172 [13504/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.495880 [13536/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.380744 [13568/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.391955 [13600/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.622314 [13632/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.290438 [13664/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.417524 [13696/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.331813 [13728/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.499983 [13760/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.534947 [13792/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.339916 [13824/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.356018 [13856/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.507206 [13888/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.381724 [13920/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.352162 [13952/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.521444 [13984/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.445944 [14016/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.330387 [14048/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.343746 [14080/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.388855 [14112/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.450348 [14144/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.346402 [14176/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.392995 [14208/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.391251 [14240/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.362365 [14272/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.580504 [14304/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.347771 [14336/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.445925 [14368/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.322515 [14400/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.319873 [14432/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.309469 [14464/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.442309 [14496/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.423842 [14528/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.293089 [14560/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.361317 [14592/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.338995 [14624/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.412417 [14656/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.508890 [14688/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.304614 [14720/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.402269 [14752/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.467089 [14784/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.328270 [14816/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.375803 [14848/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.472397 [14880/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.385040 [14912/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.463952 [14944/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.356906 [14976/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.383532 [15008/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.387146 [15040/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.419348 [15072/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.319023 [15104/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.323296 [15136/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.335487 [15168/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.361660 [15200/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.318061 [15232/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.362902 [15264/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.425487 [15296/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.299420 [15328/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.344759 [15360/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.320463 [15392/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.333913 [15424/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.358150 [15456/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.397917 [15488/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.263146 [15520/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.361221 [15552/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.494278 [15584/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.348267 [15616/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.387530 [15648/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.479746 [15680/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.408308 [15712/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.438234 [15744/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.362222 [15776/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.335714 [15808/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.443347 [15840/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.381987 [15872/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.384488 [15904/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.384167 [15936/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.436453 [15968/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.383691 [16000/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.340311 [16032/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.591009 [16064/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.353621 [16096/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.322694 [16128/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.523676 [16160/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.362979 [16192/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.509963 [16224/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.424580 [16256/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.246852 [16288/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.405392 [16320/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.480509 [16352/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.364842 [16384/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.500431 [16416/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.320942 [16448/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.418386 [16480/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.317397 [16512/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.577402 [16544/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.327841 [16576/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.383394 [16608/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.401530 [16640/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.480230 [16672/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.358451 [16704/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.418698 [16736/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.397019 [16768/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.468379 [16800/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.386432 [16832/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.350056 [16864/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.428424 [16896/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.341039 [16928/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.300668 [16960/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.315750 [16992/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.273489 [17024/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.322349 [17056/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.386824 [17088/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.487324 [17120/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.355576 [17152/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.330146 [17184/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.431542 [17216/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.407376 [17248/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.430952 [17280/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.319604 [17312/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.336517 [17344/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.333201 [17376/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.499510 [17408/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.476563 [17440/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.318845 [17472/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.489813 [17504/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.337632 [17536/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.467893 [17568/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.380177 [17600/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.474777 [17632/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.400263 [17664/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.377945 [17696/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.280561 [17728/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.412686 [17760/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.340615 [17792/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.414274 [17824/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.412331 [17856/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.534486 [17888/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.406893 [17920/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.251940 [17952/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.440566 [17984/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.322921 [18016/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.341320 [18048/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.388748 [18080/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.262315 [18112/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.535959 [18144/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.377091 [18176/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.425502 [18208/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.354572 [18240/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.390230 [18272/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.379879 [18304/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.368647 [18336/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.498882 [18368/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.334948 [18400/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.463478 [18432/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.308384 [18464/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.400624 [18496/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.380907 [18528/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.314939 [18560/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.486488 [18592/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.402198 [18624/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.387731 [18656/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.270144 [18688/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.317397 [18720/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.344869 [18752/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.325180 [18784/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.369843 [18816/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.489812 [18848/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.351519 [18880/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.312784 [18912/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.361861 [18944/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.378031 [18976/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.512727 [19008/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.273538 [19040/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.370248 [19072/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.492742 [19104/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.365299 [19136/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.325249 [19168/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.372361 [19200/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.358694 [19232/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.292300 [19264/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.447966 [19296/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.418573 [19328/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.313550 [19360/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.500564 [19392/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.259482 [19424/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.354409 [19456/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.359396 [19488/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.419542 [19520/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.432468 [19552/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.470844 [19584/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.461298 [19616/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.395918 [19648/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.401139 [19680/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.379364 [19712/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.306118 [19744/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.551871 [19776/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.351526 [19808/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.293987 [19840/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.392219 [19872/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.390839 [19904/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.271481 [19936/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.326211 [19968/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.348391 [20000/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.352165 [20032/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.418399 [20064/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.494591 [20096/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.474764 [20128/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.354494 [20160/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.271876 [20192/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.450272 [20224/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.451023 [20256/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.373034 [20288/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.555970 [20320/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.362142 [20352/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.515864 [20384/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.369445 [20416/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.508467 [20448/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.320921 [20480/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.455923 [20512/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.356555 [20544/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.262221 [20576/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.400646 [20608/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.505278 [20640/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.371812 [20672/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.301972 [20704/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.408529 [20736/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.249654 [20768/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.370492 [20800/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.543774 [20832/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.335540 [20864/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.326588 [20896/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.384606 [20928/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.309123 [20960/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.298672 [20992/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.363904 [21024/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.435579 [21056/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.266862 [21088/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.404633 [21120/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.301502 [21152/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.447788 [21184/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.267154 [21216/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.408455 [21248/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.339121 [21280/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.339151 [21312/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.464472 [21344/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.474612 [21376/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.509788 [21408/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.499553 [21440/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.333445 [21472/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.327020 [21504/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.389921 [21536/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.348973 [21568/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.257835 [21600/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.320440 [21632/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.296033 [21664/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.414327 [21696/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.272428 [21728/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.378775 [21760/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.286031 [21792/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.333130 [21824/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.303862 [21856/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.270307 [21888/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.292891 [21920/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.197499 [21952/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.240744 [21984/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.334758 [22016/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.340978 [22048/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.308360 [22080/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.321220 [22112/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.333909 [22144/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.388880 [22176/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.325752 [22208/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.365014 [22240/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.346643 [22272/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.410152 [22304/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.437560 [22336/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.353799 [22368/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.267648 [22400/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.275618 [22432/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.237514 [22464/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.355445 [22496/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.380280 [22528/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.306545 [22560/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.334275 [22592/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.431062 [22624/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.333115 [22656/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.321273 [22688/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.313332 [22720/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.303174 [22752/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.416200 [22784/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.323261 [22816/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.340338 [22848/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.170217 [22880/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.250049 [22912/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.291061 [22944/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.287828 [22976/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.265184 [23008/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.449629 [23040/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.293007 [23072/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.439108 [23104/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.293973 [23136/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.433186 [23168/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.270668 [23200/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.389370 [23232/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.348672 [23264/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.424665 [23296/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.306012 [23328/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.269034 [23360/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.334453 [23392/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.259544 [23424/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.424439 [23456/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.318240 [23488/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.372348 [23520/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.356491 [23552/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.358771 [23584/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.222680 [23616/24872]: 0%| | 0/777 [00:30<?, ?it/s]
loss: 0.222680 [23616/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.346880 [23648/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.344657 [23680/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.335473 [23712/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.355527 [23744/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.236474 [23776/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.371519 [23808/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.296287 [23840/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.409068 [23872/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.300034 [23904/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.475202 [23936/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.275134 [23968/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.267775 [24000/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.255639 [24032/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.307331 [24064/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.263582 [24096/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.423794 [24128/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.355937 [24160/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.299370 [24192/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.329409 [24224/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.259210 [24256/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.311036 [24288/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.360720 [24320/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.308461 [24352/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.201081 [24384/24872]: 95%|█████████▍| 738/777 [00:30<00:01, 24.57it/s]
loss: 0.433952 [24416/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.278161 [24448/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.335278 [24480/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.338501 [24512/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.292395 [24544/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.299277 [24576/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.290700 [24608/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.435608 [24640/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.245576 [24672/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.245214 [24704/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.423037 [24736/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.309382 [24768/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.285760 [24800/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.316207 [24832/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.251803 [24864/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.266870 [24872/24872]: 95%|█████████▍| 738/777 [00:31<00:01, 24.57it/s]
loss: 0.266870 [24872/24872]: : 778it [00:31, 24.59it/s]
Epoch 2, time=31.64s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.283125 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.411228 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.269505 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.350134 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.335825 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.346835 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.380049 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.389290 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.464753 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.328631 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.484095 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.378010 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.236295 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.287581 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.311718 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.438121 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.279064 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.290778 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.393975 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.332091 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.384161 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.291357 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.280420 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.230342 [ 768/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.322365 [ 800/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.313599 [ 832/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.536940 [ 864/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.221580 [ 896/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.400341 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.350411 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.250367 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.366883 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.270114 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.330985 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.301963 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.299400 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.395208 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.233439 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.338958 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.193938 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.378336 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.310304 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.243454 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.398139 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.244661 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.392542 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.270657 [ 1504/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.369263 [ 1536/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.283508 [ 1568/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.254809 [ 1600/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.344699 [ 1632/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.224397 [ 1664/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.258598 [ 1696/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.253651 [ 1728/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.431209 [ 1760/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.390843 [ 1792/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.434032 [ 1824/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.277459 [ 1856/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.284102 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.257802 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.318801 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.259288 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.223426 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.322985 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.349482 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.439607 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.284697 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.307575 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.444187 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.233325 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.466372 [ 2272/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.271507 [ 2304/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.253530 [ 2336/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.398921 [ 2368/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.299402 [ 2400/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.373275 [ 2432/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.309798 [ 2464/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.504842 [ 2496/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.351272 [ 2528/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.302515 [ 2560/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.288677 [ 2592/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.226544 [ 2624/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.248727 [ 2656/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.385478 [ 2688/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.367891 [ 2720/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.315276 [ 2752/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.307414 [ 2784/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.321228 [ 2816/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.285854 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.273705 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.258222 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.348028 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.288998 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.201752 [ 3008/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.413191 [ 3040/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.289343 [ 3072/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.314572 [ 3104/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.292626 [ 3136/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.363701 [ 3168/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.449250 [ 3200/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.261443 [ 3232/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.335878 [ 3264/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.375275 [ 3296/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.308554 [ 3328/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.255685 [ 3360/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.259187 [ 3392/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.213246 [ 3424/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.400277 [ 3456/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.326957 [ 3488/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.473284 [ 3520/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.411850 [ 3552/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.344118 [ 3584/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.275703 [ 3616/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.399979 [ 3648/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.321730 [ 3680/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.259960 [ 3712/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.285948 [ 3744/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.269583 [ 3776/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.408090 [ 3808/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.206076 [ 3840/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.301462 [ 3872/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.334388 [ 3904/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.417729 [ 3936/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.266612 [ 3968/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.351207 [ 4000/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.272201 [ 4032/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.510018 [ 4064/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.251985 [ 4096/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.240293 [ 4128/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.315287 [ 4160/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.354413 [ 4192/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.357082 [ 4224/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.385048 [ 4256/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.375314 [ 4288/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.313760 [ 4320/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.307647 [ 4352/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.302591 [ 4384/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.390135 [ 4416/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.319135 [ 4448/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.288436 [ 4480/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.266418 [ 4512/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.347196 [ 4544/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.236476 [ 4576/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.379059 [ 4608/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.512721 [ 4640/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.325025 [ 4672/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.414932 [ 4704/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.319960 [ 4736/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.342416 [ 4768/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.304565 [ 4800/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.366954 [ 4832/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.336471 [ 4864/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.354645 [ 4896/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.324350 [ 4928/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.244702 [ 4960/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.230151 [ 4992/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.249894 [ 5024/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.251706 [ 5056/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.367758 [ 5088/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.342536 [ 5120/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.344872 [ 5152/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.362867 [ 5184/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.394302 [ 5216/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.288501 [ 5248/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.570035 [ 5280/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.296051 [ 5312/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.341469 [ 5344/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.321411 [ 5376/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.288694 [ 5408/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.207944 [ 5440/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.297923 [ 5472/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.381821 [ 5504/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.287498 [ 5536/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.341360 [ 5568/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.372690 [ 5600/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.225668 [ 5632/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.308513 [ 5664/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.353651 [ 5696/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.343213 [ 5728/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.225829 [ 5760/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.259179 [ 5792/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.268429 [ 5824/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.452516 [ 5856/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.271319 [ 5888/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.313505 [ 5920/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.306439 [ 5952/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.234428 [ 5984/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.279250 [ 6016/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.269530 [ 6048/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.359317 [ 6080/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.279197 [ 6112/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.273001 [ 6144/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.427209 [ 6176/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.228566 [ 6208/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.257173 [ 6240/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.265612 [ 6272/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.255955 [ 6304/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.389432 [ 6336/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.291519 [ 6368/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.300908 [ 6400/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.296905 [ 6432/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.271939 [ 6464/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.302886 [ 6496/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.212850 [ 6528/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.386339 [ 6560/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.316571 [ 6592/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.321973 [ 6624/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.278355 [ 6656/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.326488 [ 6688/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.319433 [ 6720/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.260900 [ 6752/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.315254 [ 6784/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.356268 [ 6816/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.320472 [ 6848/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.233872 [ 6880/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.353544 [ 6912/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.257432 [ 6944/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.332819 [ 6976/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.236242 [ 7008/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.241539 [ 7040/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.296871 [ 7072/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.267780 [ 7104/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.211294 [ 7136/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.385634 [ 7168/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.220884 [ 7200/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.306814 [ 7232/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.421682 [ 7264/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.329465 [ 7296/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.351548 [ 7328/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.262606 [ 7360/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.260161 [ 7392/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.316669 [ 7424/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.294544 [ 7456/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.432232 [ 7488/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.257959 [ 7520/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.284458 [ 7552/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.228584 [ 7584/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.235188 [ 7616/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.337516 [ 7648/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.329253 [ 7680/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.438873 [ 7712/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.434022 [ 7744/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.353854 [ 7776/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.414820 [ 7808/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.408301 [ 7840/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.313158 [ 7872/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.253104 [ 7904/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.240295 [ 7936/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.267382 [ 7968/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.219396 [ 8000/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.328134 [ 8032/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.350492 [ 8064/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.256752 [ 8096/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.354131 [ 8128/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.436695 [ 8160/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.235047 [ 8192/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.210712 [ 8224/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.254201 [ 8256/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.315851 [ 8288/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.360827 [ 8320/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.287297 [ 8352/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.200231 [ 8384/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.230466 [ 8416/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.238478 [ 8448/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.292137 [ 8480/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.328957 [ 8512/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.313162 [ 8544/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.250183 [ 8576/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.432714 [ 8608/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.355828 [ 8640/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.653207 [ 8672/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.262356 [ 8704/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.278370 [ 8736/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.274638 [ 8768/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.173467 [ 8800/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.243358 [ 8832/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.202503 [ 8864/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.254270 [ 8896/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.281973 [ 8928/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.238935 [ 8960/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.348737 [ 8992/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.455748 [ 9024/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.368188 [ 9056/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.336085 [ 9088/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.290611 [ 9120/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.252552 [ 9152/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.308165 [ 9184/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.341043 [ 9216/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.232957 [ 9248/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.284980 [ 9280/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.367221 [ 9312/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.289757 [ 9344/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.338949 [ 9376/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.315924 [ 9408/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.391519 [ 9440/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.309442 [ 9472/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.324944 [ 9504/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.286630 [ 9536/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.207541 [ 9568/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.268363 [ 9600/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.237888 [ 9632/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.240158 [ 9664/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.338208 [ 9696/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.256907 [ 9728/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.283721 [ 9760/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.326248 [ 9792/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.252465 [ 9824/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.328722 [ 9856/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.347461 [ 9888/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.295555 [ 9920/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.487708 [ 9952/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.450508 [ 9984/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.265394 [10016/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.254454 [10048/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.371183 [10080/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.280824 [10112/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.300214 [10144/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.342829 [10176/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.337153 [10208/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.220623 [10240/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.226935 [10272/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.347403 [10304/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.383249 [10336/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.267275 [10368/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.252889 [10400/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.494623 [10432/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.356170 [10464/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.397078 [10496/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.333082 [10528/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.378337 [10560/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.306573 [10592/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.376200 [10624/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.292871 [10656/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.368518 [10688/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.264547 [10720/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.412769 [10752/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.237073 [10784/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.310263 [10816/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.301891 [10848/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.250352 [10880/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.236736 [10912/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.334746 [10944/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.201470 [10976/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.308255 [11008/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.233776 [11040/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.372350 [11072/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.403429 [11104/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.382165 [11136/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.217276 [11168/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.340724 [11200/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.354264 [11232/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.403458 [11264/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.269134 [11296/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.327926 [11328/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.356637 [11360/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.204031 [11392/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.292600 [11424/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.421179 [11456/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.432080 [11488/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.361935 [11520/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.373924 [11552/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.382705 [11584/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.304896 [11616/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.293582 [11648/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.286836 [11680/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.361164 [11712/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.256195 [11744/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.324247 [11776/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.270204 [11808/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.447708 [11840/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.291288 [11872/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.268740 [11904/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.327126 [11936/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.242892 [11968/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.294443 [12000/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.240311 [12032/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.355534 [12064/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.402110 [12096/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.338846 [12128/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.250733 [12160/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.387843 [12192/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.324227 [12224/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.307363 [12256/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.273750 [12288/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.331122 [12320/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.382723 [12352/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.276902 [12384/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.274959 [12416/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.275893 [12448/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.307337 [12480/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.455947 [12512/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.298646 [12544/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.405907 [12576/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.222697 [12608/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.285629 [12640/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.274512 [12672/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.280437 [12704/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.419032 [12736/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.402568 [12768/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.302870 [12800/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.349057 [12832/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.253546 [12864/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.316808 [12896/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.211420 [12928/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.244912 [12960/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.340049 [12992/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.200072 [13024/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.365378 [13056/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.300885 [13088/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.286990 [13120/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.302635 [13152/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.280980 [13184/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.193788 [13216/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.343860 [13248/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.250760 [13280/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.295179 [13312/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.294683 [13344/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.195121 [13376/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.298824 [13408/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.256920 [13440/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.398392 [13472/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.232464 [13504/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.358769 [13536/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.252718 [13568/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.249935 [13600/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.444748 [13632/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.210771 [13664/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.333124 [13696/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.241222 [13728/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.328214 [13760/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.360327 [13792/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.276640 [13824/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.259483 [13856/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.364306 [13888/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.257225 [13920/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.228175 [13952/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.327080 [13984/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.319745 [14016/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.254768 [14048/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.225923 [14080/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.270361 [14112/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.287317 [14144/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.268560 [14176/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.268844 [14208/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.247540 [14240/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.262881 [14272/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.345443 [14304/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.292779 [14336/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.302470 [14368/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.278669 [14400/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.164424 [14432/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.213713 [14464/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.264976 [14496/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.290851 [14528/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.215406 [14560/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.242225 [14592/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.239785 [14624/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.313312 [14656/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.406784 [14688/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.219770 [14720/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.394031 [14752/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.307792 [14784/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.245003 [14816/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.332450 [14848/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.296359 [14880/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.258599 [14912/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.396527 [14944/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.278687 [14976/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.262632 [15008/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.325456 [15040/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.382625 [15072/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.197688 [15104/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.288563 [15136/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.282949 [15168/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.215819 [15200/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.239022 [15232/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.302304 [15264/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.270064 [15296/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.231045 [15328/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.253516 [15360/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.254414 [15392/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.259087 [15424/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.267647 [15456/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.337119 [15488/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.204591 [15520/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.302182 [15552/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.409223 [15584/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.292441 [15616/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.296205 [15648/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.424672 [15680/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.328123 [15712/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.344667 [15744/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.261303 [15776/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.246445 [15808/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.356770 [15840/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.308815 [15872/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.253945 [15904/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.283525 [15936/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.290659 [15968/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.340692 [16000/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.245996 [16032/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.447661 [16064/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.276570 [16096/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.267608 [16128/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.452782 [16160/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.250493 [16192/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.361832 [16224/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.341269 [16256/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.175367 [16288/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.345972 [16320/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.379525 [16352/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.325949 [16384/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.347248 [16416/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.233543 [16448/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.254702 [16480/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.228529 [16512/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.375041 [16544/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.226247 [16576/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.274755 [16608/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.293790 [16640/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.363546 [16672/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.200898 [16704/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.297025 [16736/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.288971 [16768/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.257371 [16800/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.266654 [16832/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.250066 [16864/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.294934 [16896/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.217626 [16928/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.234650 [16960/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.255740 [16992/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.190071 [17024/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.203783 [17056/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.299582 [17088/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.341935 [17120/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.265988 [17152/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.208472 [17184/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.307397 [17216/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.307391 [17248/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.302836 [17280/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.284320 [17312/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.298571 [17344/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.301814 [17376/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.360180 [17408/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.353715 [17440/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.241761 [17472/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.388926 [17504/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.292070 [17536/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.355464 [17568/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.277623 [17600/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.307966 [17632/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.300545 [17664/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.295483 [17696/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.207195 [17728/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.342486 [17760/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.254482 [17792/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.338374 [17824/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.321208 [17856/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.403231 [17888/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.318407 [17920/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.183099 [17952/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.332952 [17984/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.247948 [18016/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.272869 [18048/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.271882 [18080/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.195860 [18112/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.376769 [18144/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.256583 [18176/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.315612 [18208/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.277402 [18240/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.313100 [18272/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.245571 [18304/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.244779 [18336/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.335378 [18368/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.266667 [18400/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.289508 [18432/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.206267 [18464/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.283084 [18496/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.345027 [18528/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.236577 [18560/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.354842 [18592/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.330507 [18624/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.250222 [18656/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.247999 [18688/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.231640 [18720/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.237408 [18752/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.203409 [18784/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.241765 [18816/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.358400 [18848/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.252885 [18880/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.254202 [18912/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.258679 [18944/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.236516 [18976/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.354075 [19008/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.200970 [19040/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.227542 [19072/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.363360 [19104/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.281444 [19136/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.208310 [19168/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.262927 [19200/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.306742 [19232/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.166761 [19264/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.336331 [19296/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.309400 [19328/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.231713 [19360/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.379656 [19392/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.201803 [19424/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.246939 [19456/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.228569 [19488/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.330752 [19520/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.338145 [19552/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.363791 [19584/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.383342 [19616/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.262816 [19648/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.308388 [19680/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.306549 [19712/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.217581 [19744/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.433382 [19776/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.269089 [19808/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.223126 [19840/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.309200 [19872/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.356300 [19904/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.235808 [19936/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.233596 [19968/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.263861 [20000/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.235534 [20032/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.316401 [20064/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.340608 [20096/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.331362 [20128/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.253219 [20160/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.108887 [20192/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.269381 [20224/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.293828 [20256/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.216889 [20288/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.347065 [20320/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.207600 [20352/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.343189 [20384/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.222312 [20416/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.253830 [20448/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.182829 [20480/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.294978 [20512/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.247168 [20544/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.193570 [20576/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.336162 [20608/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.323476 [20640/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.261333 [20672/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.174719 [20704/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.301761 [20736/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.180892 [20768/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.282023 [20800/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.410580 [20832/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.252878 [20864/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.191051 [20896/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.248791 [20928/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.211562 [20960/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.228746 [20992/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.223679 [21024/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.350975 [21056/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.200432 [21088/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.317935 [21120/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.242440 [21152/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.357959 [21184/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.197261 [21216/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.349344 [21248/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.250858 [21280/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.265313 [21312/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.387489 [21344/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.370432 [21376/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.465018 [21408/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.347911 [21440/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.246725 [21472/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.255353 [21504/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.315152 [21536/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.280347 [21568/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.232140 [21600/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.250781 [21632/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.226980 [21664/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.273166 [21696/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.207491 [21728/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.244837 [21760/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.213336 [21792/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.279910 [21824/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.230611 [21856/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.197361 [21888/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.232272 [21920/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.146780 [21952/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.185711 [21984/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.235739 [22016/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.286285 [22048/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.245558 [22080/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.273690 [22112/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.223294 [22144/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.288927 [22176/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.256182 [22208/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.256402 [22240/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.277075 [22272/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.329837 [22304/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.297052 [22336/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.242542 [22368/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.179541 [22400/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.202728 [22432/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.169913 [22464/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.265438 [22496/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.273574 [22528/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.239752 [22560/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.268433 [22592/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.283642 [22624/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.262078 [22656/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.264919 [22688/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.239864 [22720/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.212984 [22752/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.342134 [22784/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.265762 [22816/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.268547 [22848/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.131165 [22880/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.234624 [22912/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.215642 [22944/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.214907 [22976/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.212157 [23008/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.379055 [23040/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.198605 [23072/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.373102 [23104/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.226813 [23136/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.342227 [23168/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.162817 [23200/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.285306 [23232/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.274991 [23264/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.355267 [23296/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.234675 [23328/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.208324 [23360/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.263041 [23392/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.255555 [23424/24872]: 0%| | 0/777 [00:30<?, ?it/s]
loss: 0.255555 [23424/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.350746 [23456/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.275276 [23488/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.295389 [23520/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.247013 [23552/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.278580 [23584/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.176218 [23616/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.325020 [23648/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.237580 [23680/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.265217 [23712/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.283457 [23744/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.198589 [23776/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.296756 [23808/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.199334 [23840/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.290444 [23872/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.227193 [23904/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.401452 [23936/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.226327 [23968/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.192956 [24000/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.206366 [24032/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.278068 [24064/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.205089 [24096/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.355511 [24128/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.275013 [24160/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.268794 [24192/24872]: 94%|█████████▍| 732/777 [00:30<00:01, 24.39it/s]
loss: 0.260093 [24224/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.192018 [24256/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.221487 [24288/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.305547 [24320/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.263421 [24352/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.169625 [24384/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.373240 [24416/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.220283 [24448/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.268430 [24480/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.277551 [24512/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.216989 [24544/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.218353 [24576/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.221749 [24608/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.338623 [24640/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.194554 [24672/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.208256 [24704/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.293702 [24736/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.249164 [24768/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.220105 [24800/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.258191 [24832/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.187857 [24864/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.242622 [24872/24872]: 94%|█████████▍| 732/777 [00:31<00:01, 24.39it/s]
loss: 0.242622 [24872/24872]: : 778it [00:31, 24.41it/s]
Epoch 3, time=63.50s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.219644 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.328128 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.227656 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.285419 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.284449 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.252201 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.322977 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.249440 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.341812 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.241536 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.368137 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.299879 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.190878 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.217680 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.253465 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.338251 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.219741 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.245202 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.312780 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.287664 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.302664 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.224215 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.198540 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.189870 [ 768/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.259084 [ 800/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.241543 [ 832/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.430814 [ 864/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.195703 [ 896/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.333608 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.283829 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.193870 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.296555 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.212226 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.219256 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.218557 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.209060 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.307712 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.185443 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.260597 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.133019 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.320840 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.236119 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.193061 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.278651 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.182960 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.282757 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.226524 [ 1504/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.266534 [ 1536/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.253580 [ 1568/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.209213 [ 1600/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.287619 [ 1632/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.179154 [ 1664/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.193298 [ 1696/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.223150 [ 1728/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.329959 [ 1760/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.286555 [ 1792/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.345604 [ 1824/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.207188 [ 1856/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.210749 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.220632 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.253038 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.209667 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.167523 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.262714 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.272972 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.347449 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.235044 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.250170 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.333258 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.188699 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.387687 [ 2272/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.201442 [ 2304/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.198009 [ 2336/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.335516 [ 2368/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.227308 [ 2400/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.309478 [ 2432/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.270351 [ 2464/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.441436 [ 2496/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.253114 [ 2528/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.264736 [ 2560/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.245393 [ 2592/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.170810 [ 2624/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.167386 [ 2656/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.291336 [ 2688/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.279992 [ 2720/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.251140 [ 2752/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.270387 [ 2784/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.263132 [ 2816/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.237214 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.220850 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.220749 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.296729 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.236371 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.148607 [ 3008/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.356595 [ 3040/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.244326 [ 3072/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.257911 [ 3104/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.219209 [ 3136/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.294501 [ 3168/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.338753 [ 3200/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.190482 [ 3232/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.277905 [ 3264/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.279705 [ 3296/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.224811 [ 3328/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.202864 [ 3360/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.187796 [ 3392/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.175602 [ 3424/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.334948 [ 3456/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.263179 [ 3488/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.402449 [ 3520/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.327533 [ 3552/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.279472 [ 3584/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.210577 [ 3616/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.301551 [ 3648/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.252880 [ 3680/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.204654 [ 3712/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.220518 [ 3744/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.223525 [ 3776/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.317242 [ 3808/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.169324 [ 3840/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.207955 [ 3872/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.253972 [ 3904/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.322258 [ 3936/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.220815 [ 3968/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.302460 [ 4000/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.237450 [ 4032/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.394116 [ 4064/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.243188 [ 4096/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.212593 [ 4128/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.226118 [ 4160/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.263908 [ 4192/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.306094 [ 4224/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.333919 [ 4256/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.421927 [ 4288/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.146974 [ 4320/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.250101 [ 4352/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.283240 [ 4384/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.259443 [ 4416/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.241671 [ 4448/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.215254 [ 4480/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.191482 [ 4512/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.233835 [ 4544/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.185185 [ 4576/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.299944 [ 4608/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.347302 [ 4640/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.236797 [ 4672/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.337775 [ 4704/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.229700 [ 4736/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.238750 [ 4768/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.260113 [ 4800/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.298459 [ 4832/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.266374 [ 4864/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.286883 [ 4896/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.273307 [ 4928/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.199436 [ 4960/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.154560 [ 4992/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.194934 [ 5024/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.210042 [ 5056/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.263428 [ 5088/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.219958 [ 5120/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.271480 [ 5152/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.304205 [ 5184/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.304212 [ 5216/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.255634 [ 5248/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.495393 [ 5280/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.258433 [ 5312/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.263927 [ 5344/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.250971 [ 5376/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.199691 [ 5408/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.187781 [ 5440/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.247665 [ 5472/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.267799 [ 5504/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.224625 [ 5536/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.277905 [ 5568/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.270329 [ 5600/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.196176 [ 5632/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.243441 [ 5664/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.308874 [ 5696/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.280994 [ 5728/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.172752 [ 5760/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.184301 [ 5792/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.257909 [ 5824/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.407176 [ 5856/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.192155 [ 5888/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.272464 [ 5920/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.257723 [ 5952/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.193394 [ 5984/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.226249 [ 6016/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.205866 [ 6048/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.310797 [ 6080/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.253548 [ 6112/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.196611 [ 6144/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.352438 [ 6176/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.203755 [ 6208/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.211782 [ 6240/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.209322 [ 6272/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.201518 [ 6304/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.320265 [ 6336/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.203654 [ 6368/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.261709 [ 6400/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.245552 [ 6432/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.220605 [ 6464/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.224710 [ 6496/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.176409 [ 6528/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.349678 [ 6560/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.238305 [ 6592/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.260202 [ 6624/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.236581 [ 6656/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.239229 [ 6688/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.267024 [ 6720/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.198179 [ 6752/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.254251 [ 6784/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.304834 [ 6816/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.271144 [ 6848/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.188422 [ 6880/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.296805 [ 6912/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.220334 [ 6944/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.287911 [ 6976/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.193025 [ 7008/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.213215 [ 7040/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.258705 [ 7072/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.216135 [ 7104/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.170216 [ 7136/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.297482 [ 7168/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.193571 [ 7200/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.275106 [ 7232/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.328217 [ 7264/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.280563 [ 7296/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.295487 [ 7328/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.192555 [ 7360/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.232840 [ 7392/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.276891 [ 7424/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.233945 [ 7456/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.351537 [ 7488/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.223204 [ 7520/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.229558 [ 7552/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.173370 [ 7584/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.168646 [ 7616/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.294313 [ 7648/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.259510 [ 7680/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.335880 [ 7712/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.374870 [ 7744/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.296272 [ 7776/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.300534 [ 7808/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.356469 [ 7840/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.264221 [ 7872/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.206863 [ 7904/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.202252 [ 7936/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.236768 [ 7968/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.183628 [ 8000/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.262167 [ 8032/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.281463 [ 8064/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.201663 [ 8096/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.304637 [ 8128/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.331779 [ 8160/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.181127 [ 8192/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.167595 [ 8224/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.210007 [ 8256/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.252889 [ 8288/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.318501 [ 8320/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.229340 [ 8352/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.158894 [ 8384/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.191389 [ 8416/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.185720 [ 8448/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.262757 [ 8480/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.269285 [ 8512/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.249676 [ 8544/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.199533 [ 8576/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.381033 [ 8608/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.291238 [ 8640/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.556406 [ 8672/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.225179 [ 8704/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.232200 [ 8736/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.233786 [ 8768/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.149119 [ 8800/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.192060 [ 8832/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.157043 [ 8864/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.204735 [ 8896/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.221979 [ 8928/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.190035 [ 8960/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.267714 [ 8992/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.340151 [ 9024/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.260246 [ 9056/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.227412 [ 9088/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.230493 [ 9120/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.189893 [ 9152/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.218124 [ 9184/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.227727 [ 9216/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.177840 [ 9248/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.182214 [ 9280/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.251234 [ 9312/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.242455 [ 9344/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.269437 [ 9376/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.234500 [ 9408/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.319231 [ 9440/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.236138 [ 9472/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.221543 [ 9504/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.225747 [ 9536/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.167077 [ 9568/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.208223 [ 9600/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.153385 [ 9632/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.187888 [ 9664/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.263009 [ 9696/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.227978 [ 9728/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.217582 [ 9760/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.278217 [ 9792/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.212465 [ 9824/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.252160 [ 9856/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.294494 [ 9888/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.231133 [ 9920/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.398147 [ 9952/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.298098 [ 9984/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.194446 [10016/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.251984 [10048/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.305234 [10080/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.240804 [10112/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.264420 [10144/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.277710 [10176/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.302518 [10208/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.200164 [10240/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.174641 [10272/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.266947 [10304/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.285834 [10336/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.183706 [10368/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.184066 [10400/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.407249 [10432/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.291107 [10464/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.315510 [10496/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.274405 [10528/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.353530 [10560/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.268593 [10592/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.209162 [10624/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.222513 [10656/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.282850 [10688/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.201958 [10720/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.329189 [10752/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.184871 [10784/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.234980 [10816/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.262435 [10848/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.210693 [10880/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.212486 [10912/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.286202 [10944/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.172759 [10976/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.247454 [11008/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.205124 [11040/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.313775 [11072/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.349360 [11104/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.318243 [11136/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.173956 [11168/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.296160 [11200/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.280686 [11232/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.293035 [11264/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.207029 [11296/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.221651 [11328/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.263427 [11360/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.167695 [11392/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.236517 [11424/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.308934 [11456/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.309403 [11488/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.329610 [11520/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.299427 [11552/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.283955 [11584/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.240507 [11616/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.250928 [11648/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.233832 [11680/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.301147 [11712/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.206996 [11744/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.266721 [11776/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.223287 [11808/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.323111 [11840/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.237018 [11872/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.225063 [11904/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.285147 [11936/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.197901 [11968/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.227594 [12000/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.183639 [12032/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.306658 [12064/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.340780 [12096/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.279408 [12128/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.241289 [12160/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.324841 [12192/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.274397 [12224/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.252631 [12256/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.227473 [12288/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.287165 [12320/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.349448 [12352/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.235095 [12384/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.273399 [12416/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.275800 [12448/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.242092 [12480/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.421312 [12512/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.295039 [12544/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.329299 [12576/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.186200 [12608/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.240408 [12640/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.232166 [12672/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.237033 [12704/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.393726 [12736/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.351635 [12768/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.275019 [12800/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.307211 [12832/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.199403 [12864/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.266263 [12896/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.184867 [12928/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.239427 [12960/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.240185 [12992/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.187640 [13024/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.307710 [13056/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.257461 [13088/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.263651 [13120/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.229820 [13152/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.241979 [13184/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.164302 [13216/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.288061 [13248/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.225038 [13280/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.268909 [13312/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.245732 [13344/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.141810 [13376/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.277796 [13408/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.213535 [13440/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.347491 [13472/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.203940 [13504/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.307947 [13536/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.200396 [13568/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.217631 [13600/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.388996 [13632/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.175892 [13664/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.259899 [13696/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.215903 [13728/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.298325 [13760/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.325096 [13792/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.248282 [13824/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.213705 [13856/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.317264 [13888/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.226506 [13920/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.195550 [13952/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.290788 [13984/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.291869 [14016/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.234551 [14048/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.209613 [14080/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.247544 [14112/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.268358 [14144/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.220083 [14176/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.219826 [14208/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.198515 [14240/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.238050 [14272/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.305885 [14304/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.243456 [14336/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.262523 [14368/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.247951 [14400/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.142300 [14432/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.194179 [14464/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.217768 [14496/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.249563 [14528/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.190905 [14560/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.215144 [14592/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.206832 [14624/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.263767 [14656/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.346308 [14688/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.181433 [14720/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.342634 [14752/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.273505 [14784/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.226524 [14816/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.278530 [14848/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.264909 [14880/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.249661 [14912/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.319869 [14944/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.250757 [14976/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.292042 [15008/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.258884 [15040/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.294429 [15072/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.206702 [15104/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.260134 [15136/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.212335 [15168/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.206722 [15200/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.232551 [15232/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.286552 [15264/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.240452 [15296/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.249125 [15328/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.239717 [15360/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.218056 [15392/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.249263 [15424/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.255267 [15456/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.287795 [15488/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.180289 [15520/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.255527 [15552/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.350789 [15584/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.248626 [15616/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.274726 [15648/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.402955 [15680/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.272388 [15712/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.302713 [15744/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.223652 [15776/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.228942 [15808/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.302137 [15840/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.277698 [15872/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.199613 [15904/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.239813 [15936/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.228075 [15968/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.310361 [16000/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.232915 [16032/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.353055 [16064/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.247902 [16096/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.216836 [16128/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.341128 [16160/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.223126 [16192/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.309240 [16224/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.271195 [16256/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.144442 [16288/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.306802 [16320/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.323198 [16352/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.265516 [16384/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.344272 [16416/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.208250 [16448/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.182723 [16480/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.180815 [16512/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.334779 [16544/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.169427 [16576/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.213519 [16608/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.273321 [16640/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.330060 [16672/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.153784 [16704/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.251251 [16736/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.257673 [16768/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.205314 [16800/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.267310 [16832/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.223395 [16864/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.240747 [16896/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.177008 [16928/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.206198 [16960/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.225041 [16992/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.165508 [17024/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.158881 [17056/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.249028 [17088/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.301023 [17120/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.210799 [17152/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.165595 [17184/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.252411 [17216/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.216263 [17248/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.253395 [17280/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.221270 [17312/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.250899 [17344/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.264430 [17376/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.309987 [17408/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.269731 [17440/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.189261 [17472/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.352788 [17504/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.270197 [17536/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.289355 [17568/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.263497 [17600/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.265845 [17632/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.257111 [17664/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.267664 [17696/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.175832 [17728/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.321690 [17760/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.221381 [17792/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.269859 [17824/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.278091 [17856/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.360298 [17888/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.266004 [17920/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.164916 [17952/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.294573 [17984/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.212724 [18016/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.252062 [18048/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.235575 [18080/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.164370 [18112/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.357549 [18144/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.236984 [18176/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.294025 [18208/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.240797 [18240/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.286620 [18272/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.224286 [18304/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.205093 [18336/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.252879 [18368/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.257170 [18400/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.305663 [18432/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.161855 [18464/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.259745 [18496/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.329260 [18528/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.180992 [18560/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.337421 [18592/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.315489 [18624/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.222501 [18656/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.182215 [18688/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.212677 [18720/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.193676 [18752/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.192810 [18784/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.181897 [18816/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.278642 [18848/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.217405 [18880/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.228425 [18912/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.187452 [18944/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.216195 [18976/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.313190 [19008/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.172105 [19040/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.190405 [19072/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.290407 [19104/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.248300 [19136/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.188988 [19168/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.212936 [19200/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.286495 [19232/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.150107 [19264/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.331972 [19296/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.232368 [19328/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.172374 [19360/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.355163 [19392/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.167607 [19424/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.183842 [19456/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.184638 [19488/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.290336 [19520/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.306515 [19552/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.306828 [19584/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.360615 [19616/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.220905 [19648/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.303081 [19680/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.263791 [19712/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.189543 [19744/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.356794 [19776/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.232681 [19808/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.181840 [19840/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.291256 [19872/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.273473 [19904/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.193189 [19936/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.203109 [19968/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.226796 [20000/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.200095 [20032/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.261006 [20064/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.286587 [20096/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.292828 [20128/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.220321 [20160/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.109919 [20192/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.260754 [20224/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.251850 [20256/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.181957 [20288/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.308342 [20320/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.187274 [20352/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.298057 [20384/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.194629 [20416/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.214207 [20448/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.155369 [20480/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.230134 [20512/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.219616 [20544/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.158264 [20576/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.297839 [20608/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.292524 [20640/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.217662 [20672/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.155571 [20704/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.294752 [20736/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.161366 [20768/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.278711 [20800/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.378722 [20832/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.228468 [20864/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.144932 [20896/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.210516 [20928/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.185379 [20960/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.206058 [20992/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.187220 [21024/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.299715 [21056/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.189724 [21088/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.298247 [21120/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.215558 [21152/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.297780 [21184/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.184660 [21216/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.301554 [21248/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.204830 [21280/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.230119 [21312/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.316877 [21344/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.342335 [21376/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.410574 [21408/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.299405 [21440/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.232090 [21472/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.221928 [21504/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.273348 [21536/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.259750 [21568/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.215221 [21600/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.232325 [21632/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.200895 [21664/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.243785 [21696/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.186628 [21728/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.225161 [21760/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.215951 [21792/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.245577 [21824/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.203683 [21856/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.191279 [21888/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.212547 [21920/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.134899 [21952/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.175179 [21984/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.222288 [22016/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.270764 [22048/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.232338 [22080/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.241867 [22112/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.180241 [22144/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.294908 [22176/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.216660 [22208/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.231606 [22240/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.245150 [22272/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.295014 [22304/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.267560 [22336/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.210962 [22368/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.148716 [22400/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.181478 [22432/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.146233 [22464/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.234669 [22496/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.219350 [22528/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.207407 [22560/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.228558 [22592/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.227355 [22624/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.225238 [22656/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.218112 [22688/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.218202 [22720/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.177299 [22752/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.293570 [22784/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.237814 [22816/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.246724 [22848/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.108730 [22880/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.188866 [22912/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.176134 [22944/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.192262 [22976/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.192921 [23008/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.322374 [23040/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.174806 [23072/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.319148 [23104/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.197924 [23136/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.325776 [23168/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.142494 [23200/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.260466 [23232/24872]: 0%| | 0/777 [00:30<?, ?it/s]
loss: 0.260466 [23232/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.244684 [23264/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.342234 [23296/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.212300 [23328/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.181741 [23360/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.230813 [23392/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.236332 [23424/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.329861 [23456/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.241722 [23488/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.276854 [23520/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.217380 [23552/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.245094 [23584/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.156858 [23616/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.316753 [23648/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.214877 [23680/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.249560 [23712/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.249726 [23744/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.173725 [23776/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.278541 [23808/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.195228 [23840/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.249049 [23872/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.206104 [23904/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.363155 [23936/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.188020 [23968/24872]: 93%|█████████▎| 726/777 [00:30<00:02, 24.18it/s]
loss: 0.174132 [24000/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.172741 [24032/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.256323 [24064/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.166860 [24096/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.320422 [24128/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.242151 [24160/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.226131 [24192/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.234046 [24224/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.168483 [24256/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.179039 [24288/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.278832 [24320/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.248030 [24352/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.154076 [24384/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.341630 [24416/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.198793 [24448/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.254220 [24480/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.240209 [24512/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.208710 [24544/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.198859 [24576/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.199115 [24608/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.303159 [24640/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.171682 [24672/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.189002 [24704/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.275190 [24736/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.234884 [24768/24872]: 93%|█████████▎| 726/777 [00:31<00:02, 24.18it/s]
loss: 0.189010 [24800/24872]: 93%|█████████▎| 726/777 [00:32<00:02, 24.18it/s]
loss: 0.254258 [24832/24872]: 93%|█████████▎| 726/777 [00:32<00:02, 24.18it/s]
loss: 0.170808 [24864/24872]: 93%|█████████▎| 726/777 [00:32<00:02, 24.18it/s]
loss: 0.207310 [24872/24872]: 93%|█████████▎| 726/777 [00:32<00:02, 24.18it/s]
loss: 0.207310 [24872/24872]: : 778it [00:32, 24.22it/s]
Epoch 4, time=95.63s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.190080 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.301387 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.203102 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.254241 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.244961 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.249300 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.276830 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.199123 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.341630 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.210239 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.316476 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.282204 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.165451 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.212533 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.233706 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.318023 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.218333 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.217420 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.296859 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.252815 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.280344 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.197031 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.187240 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.172691 [ 768/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.228611 [ 800/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.227169 [ 832/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.392719 [ 864/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.184004 [ 896/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.331351 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.266399 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.182308 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.276334 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.195540 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.198070 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.217743 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.186887 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.270635 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.176710 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.225471 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.129584 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.303552 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.256096 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.182209 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.261631 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.180003 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.254820 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.214903 [ 1504/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.250638 [ 1536/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.229622 [ 1568/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.194191 [ 1600/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.269187 [ 1632/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.163699 [ 1664/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.176856 [ 1696/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.220197 [ 1728/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.295724 [ 1760/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.248367 [ 1792/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.304797 [ 1824/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.176635 [ 1856/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.182034 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.201943 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.214503 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.170670 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.149021 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.246953 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.244774 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.297486 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.225273 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.229919 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.275570 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.179063 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.325462 [ 2272/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.169168 [ 2304/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.175192 [ 2336/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.314853 [ 2368/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.206381 [ 2400/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.276510 [ 2432/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.238534 [ 2464/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.398650 [ 2496/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.204620 [ 2528/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.249807 [ 2560/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.218929 [ 2592/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.158823 [ 2624/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.139573 [ 2656/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.254816 [ 2688/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.259329 [ 2720/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.189586 [ 2752/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.232047 [ 2784/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.242140 [ 2816/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.205357 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.183056 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.192042 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.255571 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.217249 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.137355 [ 3008/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.313204 [ 3040/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.225783 [ 3072/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.219741 [ 3104/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.189852 [ 3136/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.231407 [ 3168/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.287167 [ 3200/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.158671 [ 3232/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.239089 [ 3264/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.237780 [ 3296/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.191808 [ 3328/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.186321 [ 3360/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.159179 [ 3392/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.153776 [ 3424/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.285393 [ 3456/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.230827 [ 3488/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.347835 [ 3520/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.284850 [ 3552/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.237400 [ 3584/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.186115 [ 3616/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.263193 [ 3648/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.236696 [ 3680/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.159856 [ 3712/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.194236 [ 3744/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.207028 [ 3776/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.285995 [ 3808/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.165248 [ 3840/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.184529 [ 3872/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.230760 [ 3904/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.290154 [ 3936/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.214472 [ 3968/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.266113 [ 4000/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.188519 [ 4032/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.378324 [ 4064/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.225192 [ 4096/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.194578 [ 4128/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.189853 [ 4160/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.243498 [ 4192/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.293876 [ 4224/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.327112 [ 4256/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.323878 [ 4288/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.129700 [ 4320/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.220681 [ 4352/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.208879 [ 4384/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.289250 [ 4416/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.210308 [ 4448/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.191352 [ 4480/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.173851 [ 4512/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.223070 [ 4544/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.166935 [ 4576/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.269083 [ 4608/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.296928 [ 4640/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.212171 [ 4672/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.315206 [ 4704/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.215578 [ 4736/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.228496 [ 4768/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.244535 [ 4800/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.269723 [ 4832/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.231058 [ 4864/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.293236 [ 4896/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.255197 [ 4928/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.176997 [ 4960/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.141809 [ 4992/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.183467 [ 5024/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.193743 [ 5056/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.251657 [ 5088/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.194235 [ 5120/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.253542 [ 5152/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.281459 [ 5184/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.274859 [ 5216/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.224625 [ 5248/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.427793 [ 5280/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.251655 [ 5312/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.245225 [ 5344/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.212337 [ 5376/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.196467 [ 5408/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.178912 [ 5440/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.214206 [ 5472/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.256455 [ 5504/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.217632 [ 5536/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.253575 [ 5568/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.236200 [ 5600/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.181710 [ 5632/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.222043 [ 5664/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.254605 [ 5696/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.259590 [ 5728/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.161534 [ 5760/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.178688 [ 5792/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.230491 [ 5824/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.390864 [ 5856/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.142612 [ 5888/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.271075 [ 5920/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.229739 [ 5952/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.169574 [ 5984/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.206146 [ 6016/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.186535 [ 6048/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.281382 [ 6080/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.235333 [ 6112/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.176501 [ 6144/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.317434 [ 6176/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.187748 [ 6208/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.172405 [ 6240/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.165760 [ 6272/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.176170 [ 6304/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.278212 [ 6336/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.203146 [ 6368/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.229675 [ 6400/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.210721 [ 6432/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.188858 [ 6464/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.213393 [ 6496/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.163210 [ 6528/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.286229 [ 6560/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.218088 [ 6592/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.235212 [ 6624/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.232540 [ 6656/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.225314 [ 6688/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.265417 [ 6720/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.178536 [ 6752/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.248203 [ 6784/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.291760 [ 6816/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.261929 [ 6848/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.164656 [ 6880/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.260793 [ 6912/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.200820 [ 6944/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.265668 [ 6976/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.170366 [ 7008/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.193448 [ 7040/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.235461 [ 7072/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.204775 [ 7104/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.148720 [ 7136/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.261378 [ 7168/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.162363 [ 7200/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.266121 [ 7232/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.322526 [ 7264/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.273058 [ 7296/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.255782 [ 7328/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.186913 [ 7360/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.231592 [ 7392/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.275175 [ 7424/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.198697 [ 7456/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.343224 [ 7488/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.203617 [ 7520/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.210044 [ 7552/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.159326 [ 7584/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.157360 [ 7616/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.267409 [ 7648/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.237860 [ 7680/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.310999 [ 7712/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.361820 [ 7744/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.282218 [ 7776/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.281281 [ 7808/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.328137 [ 7840/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.257787 [ 7872/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.197587 [ 7904/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.198933 [ 7936/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.221198 [ 7968/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.177459 [ 8000/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.227992 [ 8032/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.242623 [ 8064/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.177472 [ 8096/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.275805 [ 8128/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.281497 [ 8160/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.162189 [ 8192/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.151129 [ 8224/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.189163 [ 8256/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.219845 [ 8288/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.271524 [ 8320/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.214962 [ 8352/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.155640 [ 8384/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.191097 [ 8416/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.169606 [ 8448/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.246302 [ 8480/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.223645 [ 8512/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.267050 [ 8544/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.187675 [ 8576/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.363297 [ 8608/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.280201 [ 8640/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.501747 [ 8672/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.205733 [ 8704/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.232415 [ 8736/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.233803 [ 8768/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.151997 [ 8800/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.170933 [ 8832/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.144006 [ 8864/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.198224 [ 8896/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.194043 [ 8928/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.170959 [ 8960/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.236665 [ 8992/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.370803 [ 9024/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.230694 [ 9056/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.220040 [ 9088/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.173747 [ 9120/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.182107 [ 9152/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.207925 [ 9184/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.214337 [ 9216/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.171321 [ 9248/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.178680 [ 9280/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.244063 [ 9312/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.231880 [ 9344/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.270143 [ 9376/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.209298 [ 9408/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.291940 [ 9440/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.214666 [ 9472/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.205402 [ 9504/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.202189 [ 9536/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.169625 [ 9568/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.187721 [ 9600/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.170970 [ 9632/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.173284 [ 9664/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.242622 [ 9696/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.231012 [ 9728/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.207080 [ 9760/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.231562 [ 9792/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.183713 [ 9824/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.238219 [ 9856/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.279004 [ 9888/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.238795 [ 9920/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.363090 [ 9952/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.255188 [ 9984/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.173244 [10016/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.201303 [10048/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.296097 [10080/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.207584 [10112/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.244841 [10144/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.264408 [10176/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.275789 [10208/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.182544 [10240/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.164556 [10272/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.255078 [10304/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.255917 [10336/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.169508 [10368/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.163254 [10400/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.398363 [10432/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.283183 [10464/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.291335 [10496/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.251927 [10528/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.329747 [10560/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.252815 [10592/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.191730 [10624/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.207473 [10656/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.288728 [10688/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.193670 [10720/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.309780 [10752/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.158905 [10784/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.224689 [10816/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.241453 [10848/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.194509 [10880/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.184011 [10912/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.277185 [10944/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.151486 [10976/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.211477 [11008/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.166175 [11040/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.283933 [11072/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.327457 [11104/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.302701 [11136/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.163049 [11168/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.255621 [11200/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.264876 [11232/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.263796 [11264/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.196371 [11296/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.200901 [11328/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.238821 [11360/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.142725 [11392/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.208912 [11424/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.275796 [11456/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.289367 [11488/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.281339 [11520/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.274592 [11552/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.263906 [11584/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.221131 [11616/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.225812 [11648/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.188858 [11680/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.281414 [11712/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.184972 [11744/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.210486 [11776/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.195625 [11808/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.287675 [11840/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.237115 [11872/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.224644 [11904/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.277680 [11936/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.190389 [11968/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.202245 [12000/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.171066 [12032/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.288278 [12064/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.297484 [12096/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.274684 [12128/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.200135 [12160/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.282745 [12192/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.255370 [12224/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.216711 [12256/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.213316 [12288/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.254961 [12320/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.270241 [12352/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.225629 [12384/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.227501 [12416/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.240441 [12448/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.211346 [12480/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.342546 [12512/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.257347 [12544/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.294554 [12576/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.160704 [12608/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.210013 [12640/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.203554 [12672/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.207844 [12704/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.354136 [12736/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.317982 [12768/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.257058 [12800/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.287973 [12832/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.168374 [12864/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.240295 [12896/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.181685 [12928/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.170789 [12960/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.209566 [12992/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.178840 [13024/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.266345 [13056/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.252033 [13088/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.233276 [13120/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.205793 [13152/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.239803 [13184/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.151477 [13216/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.270320 [13248/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.216547 [13280/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.218766 [13312/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.218660 [13344/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.126222 [13376/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.257157 [13408/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.198463 [13440/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.312367 [13472/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.186784 [13504/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.260183 [13536/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.165805 [13568/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.203299 [13600/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.387337 [13632/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.160982 [13664/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.237319 [13696/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.216329 [13728/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.279432 [13760/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.305871 [13792/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.240264 [13824/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.198654 [13856/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.291549 [13888/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.198745 [13920/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.177232 [13952/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.247323 [13984/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.260933 [14016/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.237592 [14048/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.189026 [14080/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.205103 [14112/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.247486 [14144/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.198471 [14176/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.198809 [14208/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.185461 [14240/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.211802 [14272/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.270914 [14304/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.220554 [14336/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.258038 [14368/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.223366 [14400/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.137141 [14432/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.186535 [14464/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.187439 [14496/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.233578 [14528/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.204905 [14560/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.201238 [14592/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.182727 [14624/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.235013 [14656/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.326347 [14688/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.172750 [14720/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.301196 [14752/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.248235 [14784/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.202684 [14816/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.230764 [14848/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.258443 [14880/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.221862 [14912/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.251565 [14944/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.242550 [14976/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.232674 [15008/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.226030 [15040/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.265746 [15072/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.166094 [15104/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.231024 [15136/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.195077 [15168/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.183605 [15200/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.212789 [15232/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.222327 [15264/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.227931 [15296/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.215729 [15328/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.215260 [15360/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.202409 [15392/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.235710 [15424/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.226977 [15456/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.266925 [15488/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.168541 [15520/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.250570 [15552/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.314220 [15584/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.193345 [15616/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.262140 [15648/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.327166 [15680/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.219048 [15712/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.283041 [15744/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.189547 [15776/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.198086 [15808/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.300327 [15840/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.251962 [15872/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.215221 [15904/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.220968 [15936/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.227535 [15968/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.265413 [16000/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.194197 [16032/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.286229 [16064/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.224032 [16096/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.170210 [16128/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.328671 [16160/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.194582 [16192/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.273256 [16224/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.328911 [16256/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.132040 [16288/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.347475 [16320/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.297849 [16352/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.240490 [16384/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.288676 [16416/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.206121 [16448/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.179049 [16480/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.178115 [16512/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.300009 [16544/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.147072 [16576/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.233040 [16608/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.270829 [16640/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.297284 [16672/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.149285 [16704/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.279406 [16736/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.231918 [16768/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.182886 [16800/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.230514 [16832/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.205498 [16864/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.214849 [16896/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.155262 [16928/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.217948 [16960/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.202629 [16992/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.163073 [17024/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.145377 [17056/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.235567 [17088/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.276143 [17120/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.197458 [17152/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.162876 [17184/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.246031 [17216/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.213977 [17248/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.233668 [17280/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.203687 [17312/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.246341 [17344/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.244295 [17376/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.284871 [17408/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.234649 [17440/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.183281 [17472/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.311950 [17504/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.271464 [17536/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.292274 [17568/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.244865 [17600/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.265468 [17632/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.254332 [17664/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.305537 [17696/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.166632 [17728/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.307052 [17760/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.178404 [17792/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.247684 [17824/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.219778 [17856/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.344027 [17888/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.249295 [17920/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.180807 [17952/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.290438 [17984/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.199424 [18016/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.252731 [18048/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.228502 [18080/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.158037 [18112/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.292850 [18144/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.231292 [18176/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.276007 [18208/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.208405 [18240/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.241874 [18272/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.221780 [18304/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.181693 [18336/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.240556 [18368/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.214279 [18400/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.263452 [18432/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.141439 [18464/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.223954 [18496/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.302876 [18528/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.167155 [18560/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.304705 [18592/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.304343 [18624/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.204974 [18656/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.170875 [18688/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.216363 [18720/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.187717 [18752/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.179163 [18784/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.171353 [18816/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.275506 [18848/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.202587 [18880/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.223976 [18912/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.184109 [18944/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.181905 [18976/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.279718 [19008/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.159604 [19040/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.197042 [19072/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.252109 [19104/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.211105 [19136/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.186713 [19168/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.196711 [19200/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.243253 [19232/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.137044 [19264/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.283992 [19296/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.205381 [19328/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.153217 [19360/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.308686 [19392/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.153729 [19424/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.174174 [19456/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.159215 [19488/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.258382 [19520/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.288724 [19552/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.286249 [19584/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.333474 [19616/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.214481 [19648/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.273245 [19680/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.207961 [19712/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.175073 [19744/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.335151 [19776/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.211228 [19808/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.163130 [19840/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.291289 [19872/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.252679 [19904/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.185128 [19936/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.188494 [19968/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.181014 [20000/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.169496 [20032/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.243250 [20064/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.247319 [20096/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.272650 [20128/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.197908 [20160/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.099666 [20192/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.229242 [20224/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.211123 [20256/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.162186 [20288/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.302013 [20320/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.164569 [20352/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.265502 [20384/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.175000 [20416/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.182259 [20448/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.147319 [20480/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.200092 [20512/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.201428 [20544/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.151182 [20576/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.243694 [20608/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.249467 [20640/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.187040 [20672/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.132657 [20704/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.245599 [20736/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.150113 [20768/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.249957 [20800/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.319454 [20832/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.185782 [20864/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.132298 [20896/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.182195 [20928/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.160231 [20960/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.193834 [20992/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.169843 [21024/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.267396 [21056/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.198245 [21088/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.282943 [21120/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.200786 [21152/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.273568 [21184/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.174639 [21216/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.267693 [21248/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.187718 [21280/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.208589 [21312/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.284181 [21344/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.314038 [21376/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.397660 [21408/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.275944 [21440/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.222124 [21472/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.192027 [21504/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.272951 [21536/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.236063 [21568/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.192968 [21600/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.241313 [21632/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.186410 [21664/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.249590 [21696/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.223477 [21728/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.216919 [21760/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.199807 [21792/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.239471 [21824/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.204861 [21856/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.174544 [21888/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.209838 [21920/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.128947 [21952/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.166584 [21984/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.261183 [22016/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.268770 [22048/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.201490 [22080/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.219246 [22112/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.205388 [22144/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.311159 [22176/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.207269 [22208/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.256144 [22240/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.232049 [22272/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.291443 [22304/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.294765 [22336/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.231425 [22368/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.189118 [22400/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.190046 [22432/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.158145 [22464/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.241266 [22496/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.208627 [22528/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.202678 [22560/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.201407 [22592/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.223594 [22624/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.237786 [22656/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.202593 [22688/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.209869 [22720/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.169872 [22752/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.287376 [22784/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.229877 [22816/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.229068 [22848/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.104239 [22880/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.169200 [22912/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.158002 [22944/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.174990 [22976/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.179601 [23008/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.278446 [23040/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.162705 [23072/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.292155 [23104/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.187773 [23136/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.285316 [23168/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.138999 [23200/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.221685 [23232/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.208961 [23264/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.310506 [23296/24872]: 0%| | 0/777 [00:30<?, ?it/s]
loss: 0.310506 [23296/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.188899 [23328/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.165502 [23360/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.210971 [23392/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.196140 [23424/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.287855 [23456/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.215361 [23488/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.239329 [23520/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.196779 [23552/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.237473 [23584/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.149353 [23616/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.282091 [23648/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.201593 [23680/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.224037 [23712/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.244668 [23744/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.174341 [23776/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.223451 [23808/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.160269 [23840/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.208183 [23872/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.195317 [23904/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.344156 [23936/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.175776 [23968/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.159474 [24000/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.144877 [24032/24872]: 94%|█████████▎| 728/777 [00:30<00:02, 24.24it/s]
loss: 0.231458 [24064/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.153034 [24096/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.287441 [24128/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.243038 [24160/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.172962 [24192/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.211541 [24224/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.166176 [24256/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.164050 [24288/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.258868 [24320/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.240365 [24352/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.149504 [24384/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.317467 [24416/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.180037 [24448/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.244772 [24480/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.215650 [24512/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.212149 [24544/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.181679 [24576/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.174152 [24608/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.267714 [24640/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.160238 [24672/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.171379 [24704/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.263279 [24736/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.213443 [24768/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.174841 [24800/24872]: 94%|█████████▎| 728/777 [00:31<00:02, 24.24it/s]
loss: 0.252641 [24832/24872]: 94%|█████████▎| 728/777 [00:32<00:02, 24.24it/s]
loss: 0.149507 [24864/24872]: 94%|█████████▎| 728/777 [00:32<00:02, 24.24it/s]
loss: 0.217583 [24872/24872]: 94%|█████████▎| 728/777 [00:32<00:02, 24.24it/s]
loss: 0.217583 [24872/24872]: : 778it [00:32, 24.24it/s]
Epoch 5, time=127.73s
0%| | 0/777 [00:00<?, ?it/s]
loss: 0.184487 [ 32/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.268028 [ 64/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.184546 [ 96/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.238333 [ 128/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.238619 [ 160/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.241502 [ 192/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.248014 [ 224/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.187908 [ 256/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.308608 [ 288/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.189654 [ 320/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.299007 [ 352/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.274413 [ 384/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.155499 [ 416/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.185493 [ 448/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.227523 [ 480/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.325771 [ 512/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.210656 [ 544/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.204029 [ 576/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.278616 [ 608/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.245539 [ 640/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.265067 [ 672/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.195817 [ 704/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.171693 [ 736/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.156800 [ 768/24872]: 0%| | 0/777 [00:00<?, ?it/s]
loss: 0.210632 [ 800/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.223507 [ 832/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.359056 [ 864/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.171858 [ 896/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.316232 [ 928/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.250520 [ 960/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.176117 [ 992/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.279893 [ 1024/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.191590 [ 1056/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.185357 [ 1088/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.207776 [ 1120/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.180038 [ 1152/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.238349 [ 1184/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.169577 [ 1216/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.222956 [ 1248/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.139792 [ 1280/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.298308 [ 1312/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.257465 [ 1344/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.180492 [ 1376/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.247313 [ 1408/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.173595 [ 1440/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.226574 [ 1472/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.198266 [ 1504/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.222018 [ 1536/24872]: 0%| | 0/777 [00:01<?, ?it/s]
loss: 0.213598 [ 1568/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.188487 [ 1600/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.254637 [ 1632/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.175812 [ 1664/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.162722 [ 1696/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.206478 [ 1728/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.277153 [ 1760/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.225374 [ 1792/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.277928 [ 1824/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.166010 [ 1856/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.163041 [ 1888/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.194185 [ 1920/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.185212 [ 1952/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.141050 [ 1984/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.140585 [ 2016/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.207821 [ 2048/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.223443 [ 2080/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.280901 [ 2112/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.199359 [ 2144/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.201745 [ 2176/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.242774 [ 2208/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.176942 [ 2240/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.301763 [ 2272/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.156402 [ 2304/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.156821 [ 2336/24872]: 0%| | 0/777 [00:02<?, ?it/s]
loss: 0.295274 [ 2368/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.175190 [ 2400/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.266213 [ 2432/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.209021 [ 2464/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.386048 [ 2496/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.183170 [ 2528/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.226629 [ 2560/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.193772 [ 2592/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.158141 [ 2624/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.131498 [ 2656/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.232991 [ 2688/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.235987 [ 2720/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.192181 [ 2752/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.214179 [ 2784/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.219568 [ 2816/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.177956 [ 2848/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.154159 [ 2880/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.168615 [ 2912/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.218368 [ 2944/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.209492 [ 2976/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.124455 [ 3008/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.294902 [ 3040/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.205312 [ 3072/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.190666 [ 3104/24872]: 0%| | 0/777 [00:03<?, ?it/s]
loss: 0.170737 [ 3136/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.216147 [ 3168/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.281551 [ 3200/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.139902 [ 3232/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.217451 [ 3264/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.217823 [ 3296/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.175555 [ 3328/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.167431 [ 3360/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.132396 [ 3392/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.145414 [ 3424/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.257855 [ 3456/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.220469 [ 3488/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.321512 [ 3520/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.257178 [ 3552/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.222912 [ 3584/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.168306 [ 3616/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.239199 [ 3648/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.210007 [ 3680/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.140636 [ 3712/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.182428 [ 3744/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.185261 [ 3776/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.233902 [ 3808/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.159175 [ 3840/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.187733 [ 3872/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.225390 [ 3904/24872]: 0%| | 0/777 [00:04<?, ?it/s]
loss: 0.266089 [ 3936/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.194197 [ 3968/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.231465 [ 4000/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.185335 [ 4032/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.341022 [ 4064/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.165785 [ 4096/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.167842 [ 4128/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.176901 [ 4160/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.195296 [ 4192/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.270394 [ 4224/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.314879 [ 4256/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.237131 [ 4288/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.122665 [ 4320/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.189103 [ 4352/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.173810 [ 4384/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.223680 [ 4416/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.185419 [ 4448/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.177030 [ 4480/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.168867 [ 4512/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.202805 [ 4544/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.151421 [ 4576/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.249384 [ 4608/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.263369 [ 4640/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.190023 [ 4672/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.291236 [ 4704/24872]: 0%| | 0/777 [00:05<?, ?it/s]
loss: 0.179663 [ 4736/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.206802 [ 4768/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.222480 [ 4800/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.233041 [ 4832/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.209469 [ 4864/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.276064 [ 4896/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.231135 [ 4928/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.153929 [ 4960/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.124786 [ 4992/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.160978 [ 5024/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.169353 [ 5056/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.216516 [ 5088/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.184550 [ 5120/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.221687 [ 5152/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.246030 [ 5184/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.245780 [ 5216/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.200677 [ 5248/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.394160 [ 5280/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.225466 [ 5312/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.226198 [ 5344/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.187757 [ 5376/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.170377 [ 5408/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.162241 [ 5440/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.184210 [ 5472/24872]: 0%| | 0/777 [00:06<?, ?it/s]
loss: 0.249458 [ 5504/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.197046 [ 5536/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.240921 [ 5568/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.214630 [ 5600/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.171587 [ 5632/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.201181 [ 5664/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.219871 [ 5696/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.246956 [ 5728/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.162874 [ 5760/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.169593 [ 5792/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.198671 [ 5824/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.389442 [ 5856/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.127707 [ 5888/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.248250 [ 5920/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.214452 [ 5952/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.153689 [ 5984/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.189549 [ 6016/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.176826 [ 6048/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.260629 [ 6080/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.219094 [ 6112/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.160869 [ 6144/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.279108 [ 6176/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.160330 [ 6208/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.165577 [ 6240/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.158131 [ 6272/24872]: 0%| | 0/777 [00:07<?, ?it/s]
loss: 0.170436 [ 6304/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.248961 [ 6336/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.206759 [ 6368/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.227307 [ 6400/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.193735 [ 6432/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.172946 [ 6464/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.198560 [ 6496/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.157590 [ 6528/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.275918 [ 6560/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.188258 [ 6592/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.213988 [ 6624/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.220092 [ 6656/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.180888 [ 6688/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.248899 [ 6720/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.176714 [ 6752/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.239144 [ 6784/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.273687 [ 6816/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.224503 [ 6848/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.143452 [ 6880/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.247562 [ 6912/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.193948 [ 6944/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.248761 [ 6976/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.151374 [ 7008/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.170274 [ 7040/24872]: 0%| | 0/777 [00:08<?, ?it/s]
loss: 0.224763 [ 7072/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.201737 [ 7104/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.154792 [ 7136/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.244247 [ 7168/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.154688 [ 7200/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.226519 [ 7232/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.293908 [ 7264/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.264042 [ 7296/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.261079 [ 7328/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.184937 [ 7360/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.249922 [ 7392/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.233020 [ 7424/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.184837 [ 7456/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.301918 [ 7488/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.221730 [ 7520/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.190455 [ 7552/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.142915 [ 7584/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.152851 [ 7616/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.251529 [ 7648/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.216069 [ 7680/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.273279 [ 7712/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.321129 [ 7744/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.260381 [ 7776/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.272932 [ 7808/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.310966 [ 7840/24872]: 0%| | 0/777 [00:09<?, ?it/s]
loss: 0.210424 [ 7872/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.208786 [ 7904/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.176979 [ 7936/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.189728 [ 7968/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.165676 [ 8000/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.221412 [ 8032/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.222315 [ 8064/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.162752 [ 8096/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.249454 [ 8128/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.280687 [ 8160/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.144309 [ 8192/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.139452 [ 8224/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.194847 [ 8256/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.208179 [ 8288/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.226479 [ 8320/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.203353 [ 8352/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.158960 [ 8384/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.188171 [ 8416/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.182359 [ 8448/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.218488 [ 8480/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.206799 [ 8512/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.258811 [ 8544/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.181351 [ 8576/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.329634 [ 8608/24872]: 0%| | 0/777 [00:10<?, ?it/s]
loss: 0.253220 [ 8640/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.358961 [ 8672/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.178169 [ 8704/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.229813 [ 8736/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.249194 [ 8768/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.132241 [ 8800/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.162429 [ 8832/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.147840 [ 8864/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.194007 [ 8896/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.181347 [ 8928/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.161351 [ 8960/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.228689 [ 8992/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.319909 [ 9024/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.203834 [ 9056/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.201564 [ 9088/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.167393 [ 9120/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.155417 [ 9152/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.163907 [ 9184/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.177658 [ 9216/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.170634 [ 9248/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.148079 [ 9280/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.209062 [ 9312/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.216462 [ 9344/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.230856 [ 9376/24872]: 0%| | 0/777 [00:11<?, ?it/s]
loss: 0.211876 [ 9408/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.300563 [ 9440/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.220088 [ 9472/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.183788 [ 9504/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.173661 [ 9536/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.172475 [ 9568/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.176068 [ 9600/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.163555 [ 9632/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.184684 [ 9664/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.253230 [ 9696/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.234412 [ 9728/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.269971 [ 9760/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.190410 [ 9792/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.183432 [ 9824/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.264421 [ 9856/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.299855 [ 9888/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.250576 [ 9920/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.326980 [ 9952/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.325761 [ 9984/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.173842 [10016/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.222986 [10048/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.289973 [10080/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.201933 [10112/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.267757 [10144/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.240842 [10176/24872]: 0%| | 0/777 [00:12<?, ?it/s]
loss: 0.275648 [10208/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.170299 [10240/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.184602 [10272/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.255797 [10304/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.263590 [10336/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.181506 [10368/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.198269 [10400/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.394338 [10432/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.318124 [10464/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.360026 [10496/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.257232 [10528/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.402894 [10560/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.245394 [10592/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.184805 [10624/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.222991 [10656/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.325542 [10688/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.199724 [10720/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.351470 [10752/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.162471 [10784/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.278717 [10816/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.240311 [10848/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.218861 [10880/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.218323 [10912/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.345283 [10944/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.156457 [10976/24872]: 0%| | 0/777 [00:13<?, ?it/s]
loss: 0.266801 [11008/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.191330 [11040/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.281041 [11072/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.307290 [11104/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.317760 [11136/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.179871 [11168/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.248855 [11200/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.288953 [11232/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.278400 [11264/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.199702 [11296/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.215189 [11328/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.250183 [11360/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.136757 [11392/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.195541 [11424/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.258006 [11456/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.307404 [11488/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.270499 [11520/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.267315 [11552/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.263414 [11584/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.202039 [11616/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.211774 [11648/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.174018 [11680/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.275765 [11712/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.171159 [11744/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.199638 [11776/24872]: 0%| | 0/777 [00:14<?, ?it/s]
loss: 0.175453 [11808/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.281067 [11840/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.228057 [11872/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.205150 [11904/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.266104 [11936/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.175079 [11968/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.175769 [12000/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.158471 [12032/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.265265 [12064/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.276859 [12096/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.246964 [12128/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.175443 [12160/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.280843 [12192/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.226496 [12224/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.191900 [12256/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.187271 [12288/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.246101 [12320/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.239643 [12352/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.190593 [12384/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.188295 [12416/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.223619 [12448/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.197898 [12480/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.316466 [12512/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.236978 [12544/24872]: 0%| | 0/777 [00:15<?, ?it/s]
loss: 0.277822 [12576/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.146687 [12608/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.187377 [12640/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.176804 [12672/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.188567 [12704/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.279751 [12736/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.320720 [12768/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.245011 [12800/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.260546 [12832/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.160632 [12864/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.219612 [12896/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.152440 [12928/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.161621 [12960/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.188991 [12992/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.129737 [13024/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.236509 [13056/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.217441 [13088/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.193203 [13120/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.180964 [13152/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.218673 [13184/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.113852 [13216/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.228320 [13248/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.185991 [13280/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.191892 [13312/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.186313 [13344/24872]: 0%| | 0/777 [00:16<?, ?it/s]
loss: 0.118403 [13376/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.226494 [13408/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.188894 [13440/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.282231 [13472/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.173238 [13504/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.232437 [13536/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.154373 [13568/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.209089 [13600/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.339260 [13632/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.167654 [13664/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.217741 [13696/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.173460 [13728/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.307790 [13760/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.285600 [13792/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.198876 [13824/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.193789 [13856/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.263346 [13888/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.180522 [13920/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.174520 [13952/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.228566 [13984/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.218705 [14016/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.219347 [14048/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.172117 [14080/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.200195 [14112/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.248676 [14144/24872]: 0%| | 0/777 [00:17<?, ?it/s]
loss: 0.190908 [14176/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.196968 [14208/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.178270 [14240/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.191559 [14272/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.243417 [14304/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.211964 [14336/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.247274 [14368/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.188830 [14400/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.147929 [14432/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.180822 [14464/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.164672 [14496/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.225546 [14528/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.202903 [14560/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.181402 [14592/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.168620 [14624/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.230261 [14656/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.294027 [14688/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.158400 [14720/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.264110 [14752/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.200962 [14784/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.193726 [14816/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.201205 [14848/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.238540 [14880/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.208971 [14912/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.280177 [14944/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.214193 [14976/24872]: 0%| | 0/777 [00:18<?, ?it/s]
loss: 0.213784 [15008/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.217948 [15040/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.223132 [15072/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.143530 [15104/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.223080 [15136/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.173652 [15168/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.160746 [15200/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.193044 [15232/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.219789 [15264/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.199969 [15296/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.196262 [15328/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.196708 [15360/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.185539 [15392/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.215282 [15424/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.188438 [15456/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.235755 [15488/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.154073 [15520/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.213893 [15552/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.287137 [15584/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.166068 [15616/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.197423 [15648/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.266139 [15680/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.186469 [15712/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.223197 [15744/24872]: 0%| | 0/777 [00:19<?, ?it/s]
loss: 0.158668 [15776/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.161818 [15808/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.236076 [15840/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.221046 [15872/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.191332 [15904/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.179100 [15936/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.177516 [15968/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.242800 [16000/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.157098 [16032/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.244844 [16064/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.179355 [16096/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.157621 [16128/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.280550 [16160/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.160210 [16192/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.250372 [16224/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.248580 [16256/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.112442 [16288/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.281110 [16320/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.250437 [16352/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.198963 [16384/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.266970 [16416/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.183987 [16448/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.155860 [16480/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.143509 [16512/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.285870 [16544/24872]: 0%| | 0/777 [00:20<?, ?it/s]
loss: 0.114743 [16576/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.185451 [16608/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.235392 [16640/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.266339 [16672/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.125498 [16704/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.234702 [16736/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.178062 [16768/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.148218 [16800/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.197787 [16832/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.182083 [16864/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.184380 [16896/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.145602 [16928/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.184404 [16960/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.180320 [16992/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.150624 [17024/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.147330 [17056/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.217215 [17088/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.249063 [17120/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.169716 [17152/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.120778 [17184/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.201157 [17216/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.162079 [17248/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.180297 [17280/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.189659 [17312/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.228647 [17344/24872]: 0%| | 0/777 [00:21<?, ?it/s]
loss: 0.201874 [17376/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.276566 [17408/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.226167 [17440/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.196564 [17472/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.264162 [17504/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.248460 [17536/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.268874 [17568/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.196166 [17600/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.215904 [17632/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.230269 [17664/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.251043 [17696/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.162418 [17728/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.248222 [17760/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.153611 [17792/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.227349 [17824/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.230989 [17856/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.321469 [17888/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.206423 [17920/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.132472 [17952/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.238983 [17984/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.178915 [18016/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.224837 [18048/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.204553 [18080/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.148392 [18112/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.305528 [18144/24872]: 0%| | 0/777 [00:22<?, ?it/s]
loss: 0.182119 [18176/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.243129 [18208/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.196100 [18240/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.230752 [18272/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.213495 [18304/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.158886 [18336/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.235075 [18368/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.197038 [18400/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.213581 [18432/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.111312 [18464/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.204669 [18496/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.270054 [18528/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.170217 [18560/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.300832 [18592/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.238461 [18624/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.191592 [18656/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.166080 [18688/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.218148 [18720/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.184979 [18752/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.172978 [18784/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.175530 [18816/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.263918 [18848/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.179543 [18880/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.225042 [18912/24872]: 0%| | 0/777 [00:23<?, ?it/s]
loss: 0.223385 [18944/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.178276 [18976/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.262281 [19008/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.156209 [19040/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.188742 [19072/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.220405 [19104/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.167597 [19136/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.153090 [19168/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.178007 [19200/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.230411 [19232/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.126273 [19264/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.259132 [19296/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.187442 [19328/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.162142 [19360/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.312344 [19392/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.124134 [19424/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.179196 [19456/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.139016 [19488/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.236216 [19520/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.272284 [19552/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.277810 [19584/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.316352 [19616/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.199405 [19648/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.261601 [19680/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.216947 [19712/24872]: 0%| | 0/777 [00:24<?, ?it/s]
loss: 0.162551 [19744/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.318559 [19776/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.201454 [19808/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.174720 [19840/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.275013 [19872/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.207977 [19904/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.158325 [19936/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.191578 [19968/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.218957 [20000/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.188816 [20032/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.231126 [20064/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.236942 [20096/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.233877 [20128/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.182716 [20160/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.112936 [20192/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.210450 [20224/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.203484 [20256/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.170230 [20288/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.257277 [20320/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.160853 [20352/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.264457 [20384/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.202295 [20416/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.186025 [20448/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.174534 [20480/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.231911 [20512/24872]: 0%| | 0/777 [00:25<?, ?it/s]
loss: 0.180832 [20544/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.162922 [20576/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.323971 [20608/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.278555 [20640/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.187062 [20672/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.169742 [20704/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.280803 [20736/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.142973 [20768/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.226349 [20800/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.314026 [20832/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.201804 [20864/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.127747 [20896/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.180813 [20928/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.169859 [20960/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.191793 [20992/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.168820 [21024/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.320643 [21056/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.195283 [21088/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.263558 [21120/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.203475 [21152/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.328041 [21184/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.170692 [21216/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.280063 [21248/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.222936 [21280/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.232620 [21312/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.222466 [21344/24872]: 0%| | 0/777 [00:26<?, ?it/s]
loss: 0.313402 [21376/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.393149 [21408/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.269758 [21440/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.224764 [21472/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.187728 [21504/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.243637 [21536/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.218983 [21568/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.173330 [21600/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.213436 [21632/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.173946 [21664/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.212529 [21696/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.143591 [21728/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.159568 [21760/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.156704 [21792/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.191097 [21824/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.183755 [21856/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.146881 [21888/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.176003 [21920/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.132528 [21952/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.138967 [21984/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.202791 [22016/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.244094 [22048/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.203790 [22080/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.191983 [22112/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.153552 [22144/24872]: 0%| | 0/777 [00:27<?, ?it/s]
loss: 0.207769 [22176/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.176555 [22208/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.231793 [22240/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.236986 [22272/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.317226 [22304/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.219293 [22336/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.214391 [22368/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.155736 [22400/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.176400 [22432/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.113130 [22464/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.201848 [22496/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.189903 [22528/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.156784 [22560/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.182320 [22592/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.222461 [22624/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.217577 [22656/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.184329 [22688/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.179371 [22720/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.160131 [22752/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.252535 [22784/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.211395 [22816/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.215233 [22848/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.107393 [22880/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.159843 [22912/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.128523 [22944/24872]: 0%| | 0/777 [00:28<?, ?it/s]
loss: 0.128255 [22976/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.162524 [23008/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.253909 [23040/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.141670 [23072/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.262513 [23104/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.155989 [23136/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.266513 [23168/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.118596 [23200/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.200894 [23232/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.192943 [23264/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.308918 [23296/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.153642 [23328/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.156911 [23360/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.200682 [23392/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.173011 [23424/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.275910 [23456/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.216240 [23488/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.228651 [23520/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.203906 [23552/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.211696 [23584/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.131109 [23616/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.257107 [23648/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.192307 [23680/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.189760 [23712/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.198439 [23744/24872]: 0%| | 0/777 [00:29<?, ?it/s]
loss: 0.142595 [23776/24872]: 0%| | 0/777 [00:30<?, ?it/s]
loss: 0.142595 [23776/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.202703 [23808/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.137616 [23840/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.181918 [23872/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.173316 [23904/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.307944 [23936/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.133633 [23968/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.140645 [24000/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.131274 [24032/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.218077 [24064/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.125636 [24096/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.274108 [24128/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.231587 [24160/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.139581 [24192/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.161746 [24224/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.106568 [24256/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.140085 [24288/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.221657 [24320/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.190212 [24352/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.131423 [24384/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.244949 [24416/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.144375 [24448/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.196630 [24480/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.184308 [24512/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.177361 [24544/24872]: 96%|█████████▌| 743/777 [00:30<00:01, 24.75it/s]
loss: 0.173422 [24576/24872]: 96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.150845 [24608/24872]: 96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.233674 [24640/24872]: 96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.140672 [24672/24872]: 96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.139329 [24704/24872]: 96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.237641 [24736/24872]: 96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.189219 [24768/24872]: 96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.156036 [24800/24872]: 96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.237722 [24832/24872]: 96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.141117 [24864/24872]: 96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.199975 [24872/24872]: 96%|█████████▌| 743/777 [00:31<00:01, 24.75it/s]
loss: 0.199975 [24872/24872]: : 778it [00:31, 24.77it/s]
-------------------------------
LR=0.0001, batch_size=64
-------------------------------
Epoch 1, time=159.13s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.191362 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.307791 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.224230 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.279302 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.277204 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.258955 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.157431 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.249664 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.232416 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.284636 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.215462 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.169824 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.201766 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.251067 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.235157 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.200749 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.158294 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.160163 [ 1152/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.203251 [ 1216/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.153660 [ 1280/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.236096 [ 1344/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.188776 [ 1408/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.176138 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.174874 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.211362 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.183136 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.153854 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.236833 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.207110 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.141218 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.142771 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.165459 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.201413 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.191127 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.184677 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.192441 [ 2304/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.208922 [ 2368/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.180912 [ 2432/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.266760 [ 2496/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.173942 [ 2560/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.154204 [ 2624/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.153277 [ 2688/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.163186 [ 2752/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.183419 [ 2816/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.144250 [ 2880/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.149126 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.161401 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.228985 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.154530 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.198548 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.153870 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.160367 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.114213 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.165266 [ 3456/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.216061 [ 3520/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.166851 [ 3584/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.174313 [ 3648/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.132399 [ 3712/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.156650 [ 3776/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.171712 [ 3840/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.162389 [ 3904/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.185253 [ 3968/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.188641 [ 4032/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.217297 [ 4096/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.147008 [ 4160/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.170709 [ 4224/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.224025 [ 4288/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.122374 [ 4352/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.193254 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.156426 [ 4480/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.154314 [ 4544/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.171296 [ 4608/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.185334 [ 4672/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.202489 [ 4736/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.177009 [ 4800/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.171319 [ 4864/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.224285 [ 4928/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.122154 [ 4992/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.143881 [ 5056/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.160525 [ 5120/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.194334 [ 5184/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.201236 [ 5248/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.248181 [ 5312/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.187355 [ 5376/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.139467 [ 5440/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.187508 [ 5504/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.192309 [ 5568/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.177676 [ 5632/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.184939 [ 5696/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.171288 [ 5760/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.144461 [ 5824/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.235083 [ 5888/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.205166 [ 5952/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.145589 [ 6016/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.191503 [ 6080/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.164601 [ 6144/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.196013 [ 6208/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.136793 [ 6272/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.181422 [ 6336/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.194735 [ 6400/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.153929 [ 6464/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.164904 [ 6528/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.197826 [ 6592/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.188723 [ 6656/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.189161 [ 6720/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.153877 [ 6784/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.195797 [ 6848/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.151477 [ 6912/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.198056 [ 6976/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.143211 [ 7040/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.174628 [ 7104/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.150134 [ 7168/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.157433 [ 7232/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.222027 [ 7296/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.209115 [ 7360/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.209856 [ 7424/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.198880 [ 7488/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.175044 [ 7552/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.124525 [ 7616/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.211628 [ 7680/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.227821 [ 7744/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.231248 [ 7808/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.231708 [ 7872/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.146223 [ 7936/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.153288 [ 8000/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.194328 [ 8064/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.171856 [ 8128/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.194196 [ 8192/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.148274 [ 8256/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.182268 [ 8320/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.153135 [ 8384/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.138534 [ 8448/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.188656 [ 8512/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.209938 [ 8576/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.231830 [ 8640/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.239458 [ 8704/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.191799 [ 8768/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.118470 [ 8832/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.132665 [ 8896/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.142667 [ 8960/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.226490 [ 9024/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.162547 [ 9088/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.129348 [ 9152/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.144468 [ 9216/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.132564 [ 9280/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.186043 [ 9344/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.187937 [ 9408/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.225720 [ 9472/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.142412 [ 9536/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.134751 [ 9600/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.110381 [ 9664/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.207834 [ 9728/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.159677 [ 9792/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.188222 [ 9856/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.197355 [ 9920/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.190176 [ 9984/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.146310 [10048/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.176556 [10112/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.189737 [10176/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.169496 [10240/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.157873 [10304/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.166252 [10368/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.224744 [10432/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.216103 [10496/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.210091 [10560/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.155900 [10624/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.173766 [10688/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.166703 [10752/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.152078 [10816/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.165315 [10880/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.179400 [10944/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.145410 [11008/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.170469 [11072/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.245824 [11136/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.170745 [11200/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.188804 [11264/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.163741 [11328/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.145611 [11392/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.198746 [11456/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.227862 [11520/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.241638 [11584/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.174385 [11648/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.180816 [11712/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.162147 [11776/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.188008 [11840/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.187769 [11904/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.192126 [11968/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.141225 [12032/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.236379 [12096/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.192910 [12160/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.210445 [12224/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.142417 [12288/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.214021 [12352/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.155134 [12416/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.173745 [12480/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.236087 [12544/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.167062 [12608/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.150535 [12672/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.194657 [12736/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.237225 [12800/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.182546 [12864/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.151600 [12928/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.125304 [12992/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.145920 [13056/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.176961 [13120/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.166390 [13184/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.137884 [13248/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.161340 [13312/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.122917 [13376/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.174683 [13440/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.189548 [13504/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.168348 [13568/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.232053 [13632/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.165732 [13696/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.181615 [13760/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.186899 [13824/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.195955 [13888/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.141730 [13952/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.197233 [14016/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.164106 [14080/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.191446 [14144/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.163587 [14208/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.151089 [14272/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.182244 [14336/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.162573 [14400/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.120099 [14464/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.178337 [14528/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.162420 [14592/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.185498 [14656/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.170237 [14720/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.195925 [14784/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.181056 [14848/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.183677 [14912/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.183207 [14976/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.166272 [15040/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.144007 [15104/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.157270 [15168/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.146148 [15232/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.149305 [15296/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.142038 [15360/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.161974 [15424/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.180417 [15488/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.165704 [15552/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.182878 [15616/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.189827 [15680/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.171934 [15744/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.118874 [15808/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.196679 [15872/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.150053 [15936/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.193279 [16000/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.169311 [16064/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.140757 [16128/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.181083 [16192/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.221789 [16256/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.168815 [16320/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.196727 [16384/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.200696 [16448/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.123924 [16512/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.168527 [16576/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.181823 [16640/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.167213 [16704/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.193390 [16768/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.142375 [16832/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.173038 [16896/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.153437 [16960/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.152288 [17024/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.162343 [17088/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.187951 [17152/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.148487 [17216/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.161406 [17280/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.174881 [17344/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.206882 [17408/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.163933 [17472/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.228457 [17536/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.207315 [17600/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.193357 [17664/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.172890 [17728/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.159039 [17792/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.196978 [17856/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.219481 [17920/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.157065 [17984/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.139856 [18048/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.157789 [18112/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.182541 [18176/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.183185 [18240/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.178027 [18304/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.170979 [18368/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.177213 [18432/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.128326 [18496/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.188371 [18560/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.218649 [18624/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.135990 [18688/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.157174 [18752/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.142531 [18816/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.158168 [18880/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.160452 [18944/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.179417 [19008/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.133338 [19072/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.148712 [19136/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.143772 [19200/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.153172 [19264/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.183945 [19328/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.177424 [19392/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.118514 [19456/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.140955 [19520/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.218406 [19584/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.206742 [19648/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.212327 [19712/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.196401 [19776/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.156396 [19840/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.219986 [19904/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.135914 [19968/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.178394 [20032/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.191993 [20096/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.178183 [20160/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.114442 [20224/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.148953 [20288/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.179676 [20352/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.176485 [20416/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.162694 [20480/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.158898 [20544/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.178236 [20608/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.164135 [20672/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.153740 [20736/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.156097 [20800/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.194022 [20864/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.116789 [20928/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.144730 [20992/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.176914 [21056/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.183792 [21120/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.204086 [21184/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.184484 [21248/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.166381 [21312/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.261444 [21376/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.275528 [21440/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.183229 [21504/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.178120 [21568/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.180821 [21632/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.158650 [21696/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.115606 [21760/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.150495 [21824/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.138664 [21888/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.118069 [21952/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.124951 [22016/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.200384 [22080/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.132858 [22144/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.169561 [22208/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.184719 [22272/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.229883 [22336/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.133548 [22400/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.138497 [22464/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.149706 [22528/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.143221 [22592/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.189214 [22656/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.161408 [22720/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.171697 [22784/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.164743 [22848/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.111293 [22912/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.120728 [22976/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.182607 [23040/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.177272 [23104/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.184914 [23168/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.138318 [23232/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.219400 [23296/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.128814 [23360/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.160478 [23424/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.219212 [23488/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.167286 [23552/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.160536 [23616/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.185509 [23680/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.163449 [23744/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.143578 [23808/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.130626 [23872/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.219252 [23936/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.123358 [24000/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.154104 [24064/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.176036 [24128/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.165314 [24192/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.122759 [24256/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.157528 [24320/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.141191 [24384/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.169432 [24448/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.169002 [24512/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.161568 [24576/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.162856 [24640/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.127481 [24704/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.193814 [24768/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.180551 [24832/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.136429 [24872/24872]: 0%| | 0/388 [00:20<?, ?it/s]
loss: 0.136429 [24872/24872]: : 389it [00:20, 19.44it/s]
Epoch 2, time=179.15s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.170052 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.168175 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.165809 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.187426 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.211198 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.224520 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.133040 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.224583 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.150611 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.220894 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.168977 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.144770 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.197849 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.235137 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.209035 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.178416 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.152840 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.133815 [ 1152/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.172062 [ 1216/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.151535 [ 1280/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.218506 [ 1344/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.178149 [ 1408/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.156520 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.156033 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.178810 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.154950 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.155652 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.205672 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.177482 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.133026 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.145595 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.164178 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.187324 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.176176 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.171510 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.183103 [ 2304/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.201595 [ 2368/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.156182 [ 2432/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.240845 [ 2496/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.174082 [ 2560/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.157034 [ 2624/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.152619 [ 2688/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.151360 [ 2752/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.174931 [ 2816/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.138166 [ 2880/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.143772 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.151869 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.216714 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.143085 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.194244 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.139414 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.140370 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.106944 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.161106 [ 3456/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.206932 [ 3520/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.161825 [ 3584/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.171725 [ 3648/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.131483 [ 3712/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.155166 [ 3776/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.164893 [ 3840/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.156705 [ 3904/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.182922 [ 3968/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.171821 [ 4032/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.209078 [ 4096/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.137142 [ 4160/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.167682 [ 4224/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.217210 [ 4288/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.121861 [ 4352/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.176868 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.159164 [ 4480/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.145604 [ 4544/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.152646 [ 4608/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.186996 [ 4672/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.177955 [ 4736/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.175740 [ 4800/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.169580 [ 4864/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.200430 [ 4928/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.125417 [ 4992/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.122235 [ 5056/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.152676 [ 5120/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.195575 [ 5184/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.175050 [ 5248/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.241311 [ 5312/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.176986 [ 5376/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.126720 [ 5440/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.188578 [ 5504/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.194336 [ 5568/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.165975 [ 5632/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.184777 [ 5696/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.166682 [ 5760/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.145588 [ 5824/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.231029 [ 5888/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.183418 [ 5952/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.145688 [ 6016/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.190171 [ 6080/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.159911 [ 6144/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.202276 [ 6208/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.134719 [ 6272/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.171652 [ 6336/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.210542 [ 6400/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.151844 [ 6464/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.153744 [ 6528/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.200651 [ 6592/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.180578 [ 6656/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.166956 [ 6720/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.159444 [ 6784/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.170257 [ 6848/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.149395 [ 6912/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.180961 [ 6976/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.134384 [ 7040/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.164881 [ 7104/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.140950 [ 7168/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.143671 [ 7232/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.204450 [ 7296/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.182027 [ 7360/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.187341 [ 7424/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.182169 [ 7488/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.151966 [ 7552/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.123638 [ 7616/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.194875 [ 7680/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.224297 [ 7744/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.226170 [ 7808/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.199554 [ 7872/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.141987 [ 7936/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.129833 [ 8000/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.189086 [ 8064/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.157104 [ 8128/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.181631 [ 8192/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.136255 [ 8256/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.166937 [ 8320/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.146766 [ 8384/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.128571 [ 8448/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.168690 [ 8512/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.197059 [ 8576/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.224616 [ 8640/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.224178 [ 8704/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.183657 [ 8768/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.106022 [ 8832/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.133392 [ 8896/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.137348 [ 8960/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.205048 [ 9024/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.135906 [ 9088/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.114033 [ 9152/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.127665 [ 9216/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.120592 [ 9280/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.167380 [ 9344/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.181157 [ 9408/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.203741 [ 9472/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.132580 [ 9536/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.122624 [ 9600/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.104601 [ 9664/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.169848 [ 9728/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.140748 [ 9792/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.160128 [ 9856/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.179851 [ 9920/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.178858 [ 9984/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.135719 [10048/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.164486 [10112/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.181971 [10176/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.164530 [10240/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.131379 [10304/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.147915 [10368/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.214490 [10432/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.203550 [10496/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.186777 [10560/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.138492 [10624/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.150386 [10688/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.165811 [10752/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.146862 [10816/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.155020 [10880/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.153619 [10944/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.129364 [11008/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.166914 [11072/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.222011 [11136/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.165381 [11200/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.166159 [11264/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.150184 [11328/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.118094 [11392/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.178785 [11456/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.212047 [11520/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.214918 [11584/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.165354 [11648/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.175704 [11712/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.143549 [11776/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.172394 [11840/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.180687 [11904/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.171690 [11968/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.117028 [12032/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.219895 [12096/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.203852 [12160/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.189370 [12224/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.162627 [12288/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.203864 [12352/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.143180 [12416/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.165833 [12480/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.220197 [12544/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.152467 [12608/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.140125 [12672/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.177746 [12736/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.217694 [12800/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.174271 [12864/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.144362 [12928/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.120473 [12992/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.128719 [13056/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.169882 [13120/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.149341 [13184/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.113284 [13248/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.148102 [13312/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.117310 [13376/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.159185 [13440/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.160033 [13504/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.155057 [13568/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.220219 [13632/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.150115 [13696/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.180039 [13760/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.184351 [13824/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.192152 [13888/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.134817 [13952/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.186468 [14016/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.150692 [14080/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.198766 [14144/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.158535 [14208/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.154945 [14272/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.176282 [14336/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.152334 [14400/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.115972 [14464/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.166663 [14528/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.154438 [14592/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.172513 [14656/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.174686 [14720/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.181484 [14784/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.174989 [14848/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.183225 [14912/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.191169 [14976/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.157216 [15040/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.137436 [15104/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.145794 [15168/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.139934 [15232/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.146065 [15296/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.140603 [15360/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.148951 [15424/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.169523 [15488/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.161564 [15552/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.167119 [15616/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.190392 [15680/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.157061 [15744/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.105261 [15808/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.197775 [15872/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.144101 [15936/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.172483 [16000/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.145005 [16064/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.128079 [16128/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.169581 [16192/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.222009 [16256/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.168476 [16320/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.192406 [16384/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.184000 [16448/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.135158 [16512/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.154544 [16576/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.173558 [16640/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.162761 [16704/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.174537 [16768/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.138099 [16832/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.159209 [16896/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.153856 [16960/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.136083 [17024/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.152836 [17088/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.163442 [17152/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.136629 [17216/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.144283 [17280/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.168253 [17344/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.196364 [17408/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.155871 [17472/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.202834 [17536/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.185006 [17600/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.186778 [17664/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.177182 [17728/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.160956 [17792/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.181685 [17856/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.207789 [17920/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.150254 [17984/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.127478 [18048/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.142558 [18112/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.194352 [18176/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.170768 [18240/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.169964 [18304/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.163954 [18368/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.181635 [18432/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.114251 [18496/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.173884 [18560/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.188726 [18624/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.129852 [18688/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.142317 [18752/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.131761 [18816/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.141166 [18880/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.150909 [18944/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.169045 [19008/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.117163 [19072/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.164841 [19136/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.125989 [19200/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.139848 [19264/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.190103 [19328/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.180654 [19392/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.114231 [19456/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.127797 [19520/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.224407 [19584/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.215293 [19648/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.160693 [19712/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.185470 [19776/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.153616 [19840/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.222224 [19904/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.127030 [19968/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.203481 [20032/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.176807 [20096/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.174259 [20160/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.120902 [20224/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.130891 [20288/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.168846 [20352/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.176322 [20416/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.140243 [20480/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.166548 [20544/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.155077 [20608/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.150706 [20672/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.151372 [20736/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.131160 [20800/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.171643 [20864/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.102480 [20928/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.121561 [20992/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.154328 [21056/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.176691 [21120/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.157944 [21184/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.173912 [21248/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.161189 [21312/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.218886 [21376/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.229566 [21440/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.143052 [21504/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.178934 [21568/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.157477 [21632/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.164984 [21696/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.125620 [21760/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.138995 [21824/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.136486 [21888/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.111081 [21952/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.111101 [22016/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.197640 [22080/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.111320 [22144/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.142223 [22208/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.186543 [22272/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.205143 [22336/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.122507 [22400/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.111421 [22464/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.146392 [22528/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.125647 [22592/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.168502 [22656/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.153622 [22720/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.158848 [22784/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.153974 [22848/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.098766 [22912/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.104514 [22976/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.158981 [23040/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.160547 [23104/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.167569 [23168/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.131260 [23232/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.204798 [23296/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.132713 [23360/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.152785 [23424/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.208184 [23488/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.145479 [23552/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.147457 [23616/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.176215 [23680/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.158778 [23744/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.137886 [23808/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.116825 [23872/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.206726 [23936/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.114050 [24000/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.150502 [24064/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.166932 [24128/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.166165 [24192/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.107743 [24256/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.143552 [24320/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.128938 [24384/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.166512 [24448/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.152860 [24512/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.145089 [24576/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.155415 [24640/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.119741 [24704/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.184134 [24768/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.158453 [24832/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.110952 [24872/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.110952 [24872/24872]: : 389it [00:19, 19.71it/s]
Epoch 3, time=198.88s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.164502 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.155964 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.147437 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.174289 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.170255 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.185828 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.095009 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.201174 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.149209 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.192433 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.153900 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.137268 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.160290 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.218222 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.181089 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.164969 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.122184 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.131601 [ 1152/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.149327 [ 1216/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.127515 [ 1280/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.184833 [ 1344/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.160685 [ 1408/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.155060 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.137869 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.148442 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.138698 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.117168 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.169553 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.158298 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.101623 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.120771 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.131587 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.168886 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.153889 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.159363 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.152299 [ 2304/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.163431 [ 2368/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.132586 [ 2432/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.209446 [ 2496/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.151507 [ 2560/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.133130 [ 2624/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.110953 [ 2688/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.134228 [ 2752/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.162318 [ 2816/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.129602 [ 2880/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.122298 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.139105 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.199985 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.130011 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.174577 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.119905 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.133213 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.109346 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.152598 [ 3456/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.176661 [ 3520/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.140769 [ 3584/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.166126 [ 3648/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.124129 [ 3712/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.134036 [ 3776/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.145304 [ 3840/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.146001 [ 3904/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.154589 [ 3968/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.166673 [ 4032/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.192803 [ 4096/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.118532 [ 4160/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.156283 [ 4224/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.210616 [ 4288/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.109315 [ 4352/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.151979 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.135559 [ 4480/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.134820 [ 4544/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.140668 [ 4608/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.174282 [ 4672/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.159357 [ 4736/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.161062 [ 4800/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.152843 [ 4864/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.206172 [ 4928/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.110045 [ 4992/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.112005 [ 5056/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.149929 [ 5120/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.172539 [ 5184/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.174244 [ 5248/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.228650 [ 5312/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.170096 [ 5376/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.126967 [ 5440/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.164415 [ 5504/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.181343 [ 5568/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.155876 [ 5632/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.167020 [ 5696/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.165455 [ 5760/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.132630 [ 5824/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.217711 [ 5888/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.161824 [ 5952/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.110908 [ 6016/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.165739 [ 6080/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.144785 [ 6144/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.158055 [ 6208/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.119669 [ 6272/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.143893 [ 6336/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.178977 [ 6400/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.139454 [ 6464/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.134342 [ 6528/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.172785 [ 6592/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.177497 [ 6656/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.155229 [ 6720/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.141153 [ 6784/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.157880 [ 6848/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.134262 [ 6912/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.172991 [ 6976/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.132823 [ 7040/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.149464 [ 7104/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.116448 [ 7168/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.143783 [ 7232/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.181661 [ 7296/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.186394 [ 7360/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.191051 [ 7424/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.173390 [ 7488/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.139520 [ 7552/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.117890 [ 7616/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.180808 [ 7680/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.201919 [ 7744/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.205312 [ 7808/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.192693 [ 7872/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.139238 [ 7936/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.120350 [ 8000/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.183305 [ 8064/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.148779 [ 8128/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.162149 [ 8192/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.135903 [ 8256/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.159089 [ 8320/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.145246 [ 8384/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.134535 [ 8448/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.137371 [ 8512/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.204103 [ 8576/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.234834 [ 8640/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.222958 [ 8704/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.174677 [ 8768/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.107667 [ 8832/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.131573 [ 8896/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.129653 [ 8960/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.190307 [ 9024/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.141658 [ 9088/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.131636 [ 9152/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.129327 [ 9216/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.135023 [ 9280/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.178022 [ 9344/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.167884 [ 9408/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.203631 [ 9472/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.128054 [ 9536/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.113460 [ 9600/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.098710 [ 9664/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.180721 [ 9728/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.154470 [ 9792/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.161385 [ 9856/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.176709 [ 9920/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.164065 [ 9984/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.127906 [10048/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.159565 [10112/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.180387 [10176/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.141611 [10240/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.137167 [10304/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.140074 [10368/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.219419 [10432/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.175699 [10496/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.191268 [10560/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.155926 [10624/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.160648 [10688/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.144244 [10752/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.141596 [10816/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.151481 [10880/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.145222 [10944/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.130607 [11008/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.164581 [11072/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.213883 [11136/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.156916 [11200/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.166515 [11264/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.143788 [11328/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.115474 [11392/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.165563 [11456/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.193008 [11520/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.214653 [11584/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.165093 [11648/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.165012 [11712/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.126490 [11776/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.156861 [11840/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.156851 [11904/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.154807 [11968/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.105382 [12032/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.206804 [12096/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.183578 [12160/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.182939 [12224/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.125473 [12288/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.198857 [12352/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.133217 [12416/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.160578 [12480/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.207351 [12544/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.143380 [12608/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.133659 [12672/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.167793 [12736/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.210906 [12800/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.164279 [12864/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.127706 [12928/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.111994 [12992/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.131052 [13056/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.174048 [13120/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.149329 [13184/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.115214 [13248/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.140427 [13312/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.110157 [13376/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.146184 [13440/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.146775 [13504/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.146223 [13568/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.218962 [13632/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.141791 [13696/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.175910 [13760/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.184140 [13824/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.176148 [13888/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.117613 [13952/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.157896 [14016/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.137874 [14080/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.173362 [14144/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.162423 [14208/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.140347 [14272/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.166022 [14336/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.148215 [14400/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.109613 [14464/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.158723 [14528/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.148294 [14592/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.186317 [14656/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.153450 [14720/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.186909 [14784/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.164811 [14848/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.186613 [14912/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.157107 [14976/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.151480 [15040/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.134722 [15104/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.144317 [15168/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.146747 [15232/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.116280 [15296/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.126336 [15360/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.139974 [15424/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.170079 [15488/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.160964 [15552/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.150388 [15616/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.184518 [15680/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.159552 [15744/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.101086 [15808/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.199864 [15872/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.128630 [15936/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.167939 [16000/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.163638 [16064/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.133376 [16128/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.191071 [16192/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.233187 [16256/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.177199 [16320/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.218369 [16384/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.185835 [16448/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.137264 [16512/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.169783 [16576/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.169657 [16640/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.186419 [16704/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.196142 [16768/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.159390 [16832/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.182568 [16896/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.145045 [16960/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.167564 [17024/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.157725 [17088/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.159709 [17152/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.154052 [17216/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.159632 [17280/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.156433 [17344/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.198264 [17408/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.147586 [17472/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.213050 [17536/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.203857 [17600/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.186002 [17664/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.155869 [17728/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.155291 [17792/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.172677 [17856/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.192150 [17920/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.136239 [17984/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.135480 [18048/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.147135 [18112/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.167650 [18176/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.170520 [18240/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.164241 [18304/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.160157 [18368/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.170550 [18432/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.125493 [18496/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.164629 [18560/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.177480 [18624/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.114313 [18688/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.142279 [18752/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.127930 [18816/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.124684 [18880/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.136512 [18944/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.160568 [19008/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.119044 [19072/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.134895 [19136/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.120170 [19200/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.131099 [19264/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.161124 [19328/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.161320 [19392/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.094753 [19456/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.122078 [19520/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.211619 [19584/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.185051 [19648/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.207301 [19712/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.180324 [19776/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.139152 [19840/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.208914 [19904/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.123090 [19968/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.160679 [20032/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.184447 [20096/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.156666 [20160/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.120395 [20224/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.126769 [20288/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.152683 [20352/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.178398 [20416/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.131487 [20480/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.139501 [20544/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.134863 [20608/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.128111 [20672/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.131519 [20736/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.116805 [20800/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.154499 [20864/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.104885 [20928/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.120432 [20992/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.146283 [21056/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.166522 [21120/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.158257 [21184/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.161768 [21248/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.141410 [21312/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.247408 [21376/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.260909 [21440/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.171944 [21504/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.168457 [21568/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.154547 [21632/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.187575 [21696/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.135745 [21760/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.130110 [21824/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.139478 [21888/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.107971 [21952/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.108595 [22016/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.195299 [22080/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.151728 [22144/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.146344 [22208/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.230636 [22272/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.203026 [22336/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.138262 [22400/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.144604 [22464/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.147974 [22528/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.116733 [22592/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.209666 [22656/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.157019 [22720/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.170354 [22784/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.181074 [22848/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.103406 [22912/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.104526 [22976/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.186670 [23040/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.179545 [23104/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.172914 [23168/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.141027 [23232/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.192844 [23296/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.117575 [23360/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.151837 [23424/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.196320 [23488/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.149488 [23552/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.162849 [23616/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.166823 [23680/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.159816 [23744/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.134926 [23808/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.115035 [23872/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.208341 [23936/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.115880 [24000/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.150401 [24064/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.160453 [24128/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.160490 [24192/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.101335 [24256/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.146997 [24320/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.121959 [24384/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.163458 [24448/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.145880 [24512/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.149887 [24576/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.158731 [24640/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.115718 [24704/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.167446 [24768/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.153452 [24832/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.114273 [24872/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.114273 [24872/24872]: : 389it [00:19, 19.77it/s]
Epoch 4, time=218.56s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.163906 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.157845 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.156643 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.158009 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.181489 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.202169 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.113293 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.229643 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.137166 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.214431 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.185228 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.133230 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.168967 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.234095 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.197803 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.171834 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.166122 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.153474 [ 1152/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.149272 [ 1216/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.175406 [ 1280/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.185740 [ 1344/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.164930 [ 1408/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.180795 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.130450 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.151757 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.164020 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.114799 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.168585 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.164045 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.102429 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.112570 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.136152 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.170438 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.137809 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.166270 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.151395 [ 2304/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.163473 [ 2368/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.143605 [ 2432/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.217687 [ 2496/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.149967 [ 2560/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.152003 [ 2624/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.131352 [ 2688/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.131207 [ 2752/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.176059 [ 2816/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.133798 [ 2880/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.119779 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.132711 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.228813 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.129140 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.200003 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.136717 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.122133 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.098111 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.157205 [ 3456/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.170429 [ 3520/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.136883 [ 3584/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.133469 [ 3648/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.113742 [ 3712/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.137613 [ 3776/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.140853 [ 3840/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.138468 [ 3904/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.147913 [ 3968/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.154373 [ 4032/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.187143 [ 4096/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.120754 [ 4160/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.134879 [ 4224/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.189065 [ 4288/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.110887 [ 4352/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.150669 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.120876 [ 4480/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.135463 [ 4544/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.130694 [ 4608/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.160335 [ 4672/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.128666 [ 4736/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.144207 [ 4800/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.145292 [ 4864/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.178028 [ 4928/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.094050 [ 4992/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.109334 [ 5056/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.127037 [ 5120/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.158005 [ 5184/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.154729 [ 5248/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.226753 [ 5312/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.151384 [ 5376/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.113010 [ 5440/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.152197 [ 5504/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.168344 [ 5568/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.135448 [ 5632/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.150642 [ 5696/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.157878 [ 5760/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.109419 [ 5824/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.189272 [ 5888/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.140913 [ 5952/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.101387 [ 6016/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.159709 [ 6080/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.144221 [ 6144/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.162464 [ 6208/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.108663 [ 6272/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.141046 [ 6336/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.175151 [ 6400/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.130860 [ 6464/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.127759 [ 6528/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.174551 [ 6592/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.163876 [ 6656/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.158489 [ 6720/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.131051 [ 6784/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.140807 [ 6848/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.151853 [ 6912/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.166845 [ 6976/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.130920 [ 7040/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.153479 [ 7104/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.114246 [ 7168/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.130506 [ 7232/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.171837 [ 7296/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.153359 [ 7360/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.171767 [ 7424/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.174262 [ 7488/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.130054 [ 7552/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.113995 [ 7616/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.195102 [ 7680/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.178630 [ 7744/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.195620 [ 7808/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.183737 [ 7872/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.116574 [ 7936/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.104681 [ 8000/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.168957 [ 8064/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.142068 [ 8128/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.149040 [ 8192/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.117451 [ 8256/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.142035 [ 8320/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.137716 [ 8384/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.118906 [ 8448/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.126731 [ 8512/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.189207 [ 8576/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.200744 [ 8640/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.197137 [ 8704/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.151700 [ 8768/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.094535 [ 8832/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.114869 [ 8896/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.125248 [ 8960/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.205786 [ 9024/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.123029 [ 9088/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.105696 [ 9152/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.116622 [ 9216/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.105978 [ 9280/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.158612 [ 9344/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.157183 [ 9408/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.181260 [ 9472/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.124842 [ 9536/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.105175 [ 9600/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.104327 [ 9664/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.173022 [ 9728/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.127620 [ 9792/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.159325 [ 9856/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.156649 [ 9920/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.135679 [ 9984/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.119467 [10048/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.132788 [10112/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.169585 [10176/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.145479 [10240/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.123372 [10304/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.123913 [10368/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.192986 [10432/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.162409 [10496/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.184932 [10560/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.138203 [10624/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.151020 [10688/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.134723 [10752/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.131025 [10816/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.138923 [10880/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.147975 [10944/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.114675 [11008/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.163536 [11072/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.214837 [11136/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.148452 [11200/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.160538 [11264/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.156956 [11328/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.114210 [11392/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.183248 [11456/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.206463 [11520/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.181110 [11584/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.165261 [11648/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.179060 [11712/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.121733 [11776/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.161299 [11840/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.154814 [11904/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.145632 [11968/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.115328 [12032/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.210300 [12096/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.169283 [12160/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.171520 [12224/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.132904 [12288/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.175768 [12352/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.145136 [12416/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.153064 [12480/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.195653 [12544/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.136685 [12608/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.118317 [12672/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.172029 [12736/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.194361 [12800/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.154096 [12864/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.119591 [12928/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.087358 [12992/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.114039 [13056/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.157121 [13120/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.142725 [13184/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.096812 [13248/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.124414 [13312/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.100810 [13376/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.136495 [13440/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.134077 [13504/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.139516 [13568/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.216983 [13632/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.128291 [13696/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.158075 [13760/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.154240 [13824/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.156091 [13888/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.104057 [13952/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.158211 [14016/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.115249 [14080/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.150895 [14144/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.140490 [14208/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.132920 [14272/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.152967 [14336/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.146465 [14400/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.098268 [14464/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.142783 [14528/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.133942 [14592/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.188619 [14656/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.137153 [14720/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.163076 [14784/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.144574 [14848/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.161244 [14912/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.142131 [14976/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.143942 [15040/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.125117 [15104/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.134449 [15168/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.128022 [15232/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.121916 [15296/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.120927 [15360/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.137439 [15424/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.148087 [15488/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.132679 [15552/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.150805 [15616/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.149999 [15680/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.142499 [15744/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.092210 [15808/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.178685 [15872/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.116271 [15936/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.152487 [16000/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.124623 [16064/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.118076 [16128/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.137356 [16192/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.206423 [16256/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.158031 [16320/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.168833 [16384/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.162999 [16448/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.108912 [16512/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.138920 [16576/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.138406 [16640/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.132938 [16704/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.161264 [16768/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.119162 [16832/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.142195 [16896/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.133528 [16960/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.104709 [17024/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.140016 [17088/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.135437 [17152/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.128950 [17216/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.130748 [17280/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.133626 [17344/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.188511 [17408/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.126197 [17472/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.203206 [17536/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.180749 [17600/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.169994 [17664/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.147477 [17728/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.141788 [17792/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.154999 [17856/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.162185 [17920/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.124416 [17984/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.102904 [18048/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.135410 [18112/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.153859 [18176/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.151566 [18240/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.161762 [18304/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.148868 [18368/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.156088 [18432/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.115660 [18496/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.144533 [18560/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.168512 [18624/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.103439 [18688/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.129575 [18752/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.107232 [18816/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.110094 [18880/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.137672 [18944/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.151116 [19008/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.109504 [19072/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.113635 [19136/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.129629 [19200/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.125650 [19264/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.152757 [19328/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.149783 [19392/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.087074 [19456/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.116658 [19520/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.176758 [19584/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.177509 [19648/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.190384 [19712/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.156691 [19776/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.127193 [19840/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.183041 [19904/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.115785 [19968/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.164848 [20032/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.164221 [20096/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.149306 [20160/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.115265 [20224/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.134275 [20288/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.137729 [20352/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.163283 [20416/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.115095 [20480/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.141497 [20544/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.132172 [20608/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.121662 [20672/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.129293 [20736/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.109935 [20800/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.145707 [20864/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.107943 [20928/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.109441 [20992/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.129516 [21056/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.160503 [21120/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.127246 [21184/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.166259 [21248/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.131143 [21312/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.188685 [21376/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.251485 [21440/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.120733 [21504/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.160609 [21568/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.135510 [21632/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.143903 [21696/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.122559 [21760/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.121776 [21824/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.122878 [21888/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.101503 [21952/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.093052 [22016/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.167860 [22080/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.121097 [22144/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.120841 [22208/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.183161 [22272/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.180401 [22336/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.110714 [22400/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.106681 [22464/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.123199 [22528/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.105127 [22592/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.162528 [22656/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.140588 [22720/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.159138 [22784/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.153671 [22848/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.104100 [22912/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.108770 [22976/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.156796 [23040/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.156916 [23104/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.140305 [23168/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.112665 [23232/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.203345 [23296/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.122076 [23360/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.147217 [23424/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.175025 [23488/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.152526 [23552/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.146315 [23616/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.143831 [23680/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.131240 [23744/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.125723 [23808/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.111189 [23872/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.192106 [23936/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.103306 [24000/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.137442 [24064/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.144793 [24128/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.153901 [24192/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.100311 [24256/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.129913 [24320/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.126040 [24384/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.176261 [24448/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.140712 [24512/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.143256 [24576/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.155171 [24640/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.113284 [24704/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.172543 [24768/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.152373 [24832/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.114209 [24872/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.114209 [24872/24872]: : 389it [00:19, 19.53it/s]
Epoch 5, time=238.48s
0%| | 0/388 [00:00<?, ?it/s]
loss: 0.157690 [ 64/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.155700 [ 128/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.139376 [ 192/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.156539 [ 256/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.166700 [ 320/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.169294 [ 384/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.093200 [ 448/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.182094 [ 512/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.120945 [ 576/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.189477 [ 640/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.140213 [ 704/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.124483 [ 768/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.159179 [ 832/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.190067 [ 896/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.169327 [ 960/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.157011 [ 1024/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.109676 [ 1088/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.106707 [ 1152/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.142970 [ 1216/24872]: 0%| | 0/388 [00:00<?, ?it/s]
loss: 0.129350 [ 1280/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.158433 [ 1344/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.151423 [ 1408/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.135320 [ 1472/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.123936 [ 1536/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.135317 [ 1600/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.120682 [ 1664/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.106133 [ 1728/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.163803 [ 1792/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.136886 [ 1856/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.110736 [ 1920/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.114762 [ 1984/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.131193 [ 2048/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.161880 [ 2112/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.142376 [ 2176/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.152509 [ 2240/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.136976 [ 2304/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.151216 [ 2368/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.121362 [ 2432/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.203750 [ 2496/24872]: 0%| | 0/388 [00:01<?, ?it/s]
loss: 0.137440 [ 2560/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.124216 [ 2624/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.110766 [ 2688/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.124112 [ 2752/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.142506 [ 2816/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.111559 [ 2880/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.113908 [ 2944/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.121165 [ 3008/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.185791 [ 3072/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.119279 [ 3136/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.161951 [ 3200/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.097334 [ 3264/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.126753 [ 3328/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.098806 [ 3392/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.153131 [ 3456/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.162717 [ 3520/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.136167 [ 3584/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.136076 [ 3648/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.117374 [ 3712/24872]: 0%| | 0/388 [00:02<?, ?it/s]
loss: 0.125989 [ 3776/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.140331 [ 3840/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.124097 [ 3904/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.146732 [ 3968/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.158128 [ 4032/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.189545 [ 4096/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.132539 [ 4160/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.131018 [ 4224/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.189901 [ 4288/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.120885 [ 4352/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.147780 [ 4416/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.120391 [ 4480/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.133805 [ 4544/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.123583 [ 4608/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.144233 [ 4672/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.132717 [ 4736/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.144281 [ 4800/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.137193 [ 4864/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.186479 [ 4928/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.097479 [ 4992/24872]: 0%| | 0/388 [00:03<?, ?it/s]
loss: 0.115992 [ 5056/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.116999 [ 5120/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.159975 [ 5184/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.148388 [ 5248/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.214266 [ 5312/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.149073 [ 5376/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.106551 [ 5440/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.145720 [ 5504/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.161114 [ 5568/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.142186 [ 5632/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.137076 [ 5696/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.147749 [ 5760/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.117297 [ 5824/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.176977 [ 5888/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.144759 [ 5952/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.096725 [ 6016/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.170025 [ 6080/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.147391 [ 6144/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.129466 [ 6208/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.110968 [ 6272/24872]: 0%| | 0/388 [00:04<?, ?it/s]
loss: 0.129109 [ 6336/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.165717 [ 6400/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.123625 [ 6464/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.116273 [ 6528/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.154503 [ 6592/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.155006 [ 6656/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.137187 [ 6720/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.123860 [ 6784/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.150084 [ 6848/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.124739 [ 6912/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.159300 [ 6976/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.128578 [ 7040/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.126880 [ 7104/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.108590 [ 7168/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.130074 [ 7232/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.165846 [ 7296/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.156612 [ 7360/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.183318 [ 7424/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.177524 [ 7488/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.120986 [ 7552/24872]: 0%| | 0/388 [00:05<?, ?it/s]
loss: 0.106870 [ 7616/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.182586 [ 7680/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.173173 [ 7744/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.199449 [ 7808/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.176761 [ 7872/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.117905 [ 7936/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.112316 [ 8000/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.158481 [ 8064/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.142775 [ 8128/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.148690 [ 8192/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.105868 [ 8256/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.145579 [ 8320/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.131567 [ 8384/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.110591 [ 8448/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.137514 [ 8512/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.169618 [ 8576/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.196025 [ 8640/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.182950 [ 8704/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.161891 [ 8768/24872]: 0%| | 0/388 [00:06<?, ?it/s]
loss: 0.095758 [ 8832/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.109868 [ 8896/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.117452 [ 8960/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.186199 [ 9024/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.122369 [ 9088/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.103455 [ 9152/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.123062 [ 9216/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.099746 [ 9280/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.158957 [ 9344/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.157307 [ 9408/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.169877 [ 9472/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.123995 [ 9536/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.109233 [ 9600/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.102715 [ 9664/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.177888 [ 9728/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.130280 [ 9792/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.147955 [ 9856/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.176279 [ 9920/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.141229 [ 9984/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.120502 [10048/24872]: 0%| | 0/388 [00:07<?, ?it/s]
loss: 0.136394 [10112/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.164097 [10176/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.128463 [10240/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.130279 [10304/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.118010 [10368/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.181392 [10432/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.147848 [10496/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.182807 [10560/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.126278 [10624/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.134791 [10688/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.127164 [10752/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.129557 [10816/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.132342 [10880/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.122256 [10944/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.107230 [11008/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.143851 [11072/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.204010 [11136/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.143530 [11200/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.146152 [11264/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.141914 [11328/24872]: 0%| | 0/388 [00:08<?, ?it/s]
loss: 0.101739 [11392/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.164095 [11456/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.176845 [11520/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.166638 [11584/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.145821 [11648/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.165703 [11712/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.119736 [11776/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.149170 [11840/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.139845 [11904/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.143941 [11968/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.104277 [12032/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.199128 [12096/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.163068 [12160/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.154280 [12224/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.120143 [12288/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.166309 [12352/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.133227 [12416/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.141009 [12480/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.180822 [12544/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.111994 [12608/24872]: 0%| | 0/388 [00:09<?, ?it/s]
loss: 0.121407 [12672/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.175519 [12736/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.180304 [12800/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.142916 [12864/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.103890 [12928/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.083670 [12992/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.106376 [13056/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.138947 [13120/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.136995 [13184/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.093202 [13248/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.120291 [13312/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.094172 [13376/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.147104 [13440/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.125525 [13504/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.133866 [13568/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.189888 [13632/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.135265 [13696/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.154062 [13760/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.151848 [13824/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.179931 [13888/24872]: 0%| | 0/388 [00:10<?, ?it/s]
loss: 0.112546 [13952/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.164613 [14016/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.136450 [14080/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.142437 [14144/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.146542 [14208/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.132734 [14272/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.156485 [14336/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.146196 [14400/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.099733 [14464/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.149261 [14528/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.133193 [14592/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.174064 [14656/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.142341 [14720/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.176402 [14784/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.144498 [14848/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.180208 [14912/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.154315 [14976/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.137402 [15040/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.114836 [15104/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.135437 [15168/24872]: 0%| | 0/388 [00:11<?, ?it/s]
loss: 0.118008 [15232/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.106971 [15296/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.114195 [15360/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.124987 [15424/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.133874 [15488/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.127643 [15552/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.139272 [15616/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.145890 [15680/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.124609 [15744/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.102669 [15808/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.185536 [15872/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.106214 [15936/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.152988 [16000/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.125000 [16064/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.111004 [16128/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.113200 [16192/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.203400 [16256/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.152531 [16320/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.164330 [16384/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.164855 [16448/24872]: 0%| | 0/388 [00:12<?, ?it/s]
loss: 0.092235 [16512/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.131351 [16576/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.129981 [16640/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.150377 [16704/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.157210 [16768/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.110965 [16832/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.127223 [16896/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.115556 [16960/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.102594 [17024/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.130648 [17088/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.139175 [17152/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.120541 [17216/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.123920 [17280/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.120499 [17344/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.171945 [17408/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.131730 [17472/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.180337 [17536/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.161718 [17600/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.149497 [17664/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.159292 [17728/24872]: 0%| | 0/388 [00:13<?, ?it/s]
loss: 0.134463 [17792/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.135665 [17856/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.139370 [17920/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.133246 [17984/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.109077 [18048/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.129571 [18112/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.151928 [18176/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.140067 [18240/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.155883 [18304/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.142929 [18368/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.150098 [18432/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.107290 [18496/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.130730 [18560/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.151092 [18624/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.105519 [18688/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.126802 [18752/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.099010 [18816/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.110620 [18880/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.128372 [18944/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.143060 [19008/24872]: 0%| | 0/388 [00:14<?, ?it/s]
loss: 0.100515 [19072/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.111327 [19136/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.123999 [19200/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.122930 [19264/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.153534 [19328/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.147008 [19392/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.092781 [19456/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.116898 [19520/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.182186 [19584/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.172362 [19648/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.179713 [19712/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.150564 [19776/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.112351 [19840/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.163104 [19904/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.112830 [19968/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.133487 [20032/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.156377 [20096/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.138216 [20160/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.103090 [20224/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.118833 [20288/24872]: 0%| | 0/388 [00:15<?, ?it/s]
loss: 0.151998 [20352/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.141158 [20416/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.098200 [20480/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.123014 [20544/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.117387 [20608/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.125663 [20672/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.119959 [20736/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.104232 [20800/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.133940 [20864/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.103398 [20928/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.105736 [20992/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.120056 [21056/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.161626 [21120/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.122978 [21184/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.144330 [21248/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.122847 [21312/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.207689 [21376/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.228017 [21440/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.121795 [21504/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.141580 [21568/24872]: 0%| | 0/388 [00:16<?, ?it/s]
loss: 0.131513 [21632/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.146737 [21696/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.108097 [21760/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.115661 [21824/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.112336 [21888/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.086015 [21952/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.087985 [22016/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.161084 [22080/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.113683 [22144/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.115813 [22208/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.152712 [22272/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.186569 [22336/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.098770 [22400/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.113191 [22464/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.128276 [22528/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.090402 [22592/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.186313 [22656/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.138341 [22720/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.145689 [22784/24872]: 0%| | 0/388 [00:17<?, ?it/s]
loss: 0.165147 [22848/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.097239 [22912/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.094309 [22976/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.160694 [23040/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.163976 [23104/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.146698 [23168/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.101932 [23232/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.193247 [23296/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.104029 [23360/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.136115 [23424/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.155174 [23488/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.149022 [23552/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.134683 [23616/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.141567 [23680/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.136841 [23744/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.122603 [23808/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.100385 [23872/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.182681 [23936/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.096504 [24000/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.125284 [24064/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.132583 [24128/24872]: 0%| | 0/388 [00:18<?, ?it/s]
loss: 0.138479 [24192/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.087493 [24256/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.114234 [24320/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.120471 [24384/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.158643 [24448/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.129835 [24512/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.128388 [24576/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.129500 [24640/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.098307 [24704/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.165975 [24768/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.141528 [24832/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.095943 [24872/24872]: 0%| | 0/388 [00:19<?, ?it/s]
loss: 0.095943 [24872/24872]: : 389it [00:19, 19.86it/s]
-------------------------------
LR=0.0001, batch_size=128
-------------------------------
Epoch 1, time=258.07s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.142783 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.318197 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.272213 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.183752 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.180756 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.211264 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.254027 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.205519 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.148999 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.160338 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.173786 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.149375 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.154195 [ 1664/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.161542 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.143039 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.137011 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.163445 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.138007 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.144709 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.176787 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.124077 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.138912 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.117473 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.155616 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.145639 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.110078 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.114347 [ 3456/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.143942 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.109119 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.120746 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.121587 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.161608 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.117964 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.141754 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.131479 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.121943 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.136237 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.124513 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.130321 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.103479 [ 5120/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.141081 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.169484 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.113561 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.134486 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.143212 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.131322 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.107292 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.145813 [ 6144/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.113650 [ 6272/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.135935 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.114826 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.143539 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.119433 [ 6784/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.122040 [ 6912/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.129901 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.104806 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.125230 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.142775 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.129610 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.131659 [ 7680/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.152315 [ 7808/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.129623 [ 7936/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.122329 [ 8064/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.124513 [ 8192/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.106749 [ 8320/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.112249 [ 8448/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.139957 [ 8576/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.179237 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.121034 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.098906 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.129472 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.096442 [ 9216/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.119094 [ 9344/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.143925 [ 9472/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.103307 [ 9600/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.116584 [ 9728/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.116228 [ 9856/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.131462 [ 9984/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.112036 [10112/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.139025 [10240/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.108428 [10368/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.155180 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.135141 [10624/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.114785 [10752/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.124782 [10880/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.109052 [11008/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.159262 [11136/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.141523 [11264/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.107452 [11392/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.144349 [11520/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.140401 [11648/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.123751 [11776/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.132009 [11904/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.109795 [12032/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.168952 [12160/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.132988 [12288/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.127884 [12416/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.148005 [12544/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.109067 [12672/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.158999 [12800/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.113808 [12928/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.091711 [13056/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.138068 [13184/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.108629 [13312/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.105467 [13440/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.125233 [13568/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.128647 [13696/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.142498 [13824/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.113421 [13952/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.120031 [14080/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.123283 [14208/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.125616 [14336/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.103725 [14464/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.126271 [14592/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.145357 [14720/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.140381 [14848/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.148719 [14976/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.115973 [15104/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.117101 [15232/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.099240 [15360/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.125595 [15488/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.115461 [15616/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.123114 [15744/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.119111 [15872/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.114060 [16000/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.107653 [16128/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.151406 [16256/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.141402 [16384/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.117543 [16512/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.126884 [16640/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.135094 [16768/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.108448 [16896/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.110245 [17024/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.120682 [17152/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.100950 [17280/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.134823 [17408/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.146753 [17536/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.151681 [17664/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.126841 [17792/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.131457 [17920/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.099320 [18048/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.134232 [18176/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.144548 [18304/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.140356 [18432/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.108015 [18560/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.116562 [18688/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.105685 [18816/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.102667 [18944/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.115381 [19072/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.105418 [19200/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.126483 [19328/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.107248 [19456/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.126438 [19584/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.151572 [19712/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.116784 [19840/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.116939 [19968/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.126402 [20096/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.109497 [20224/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.129648 [20352/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.117655 [20480/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.115244 [20608/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.112004 [20736/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.111157 [20864/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.099139 [20992/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.129789 [21120/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.131012 [21248/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.131856 [21376/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.163520 [21504/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.125261 [21632/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.108464 [21760/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.108581 [21888/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.078611 [22016/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.121967 [22144/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.126070 [22272/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.128368 [22400/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.096561 [22528/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.115507 [22656/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.122155 [22784/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.110513 [22912/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.100221 [23040/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.136212 [23168/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.126761 [23296/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.110471 [23424/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.148485 [23552/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.128029 [23680/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.116628 [23808/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.135596 [23936/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.107224 [24064/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.131007 [24192/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.099302 [24320/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.129039 [24448/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.131201 [24576/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.116311 [24704/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.153669 [24832/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.086235 [24872/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.086235 [24872/24872]: : 195it [00:14, 13.53it/s]
Epoch 2, time=272.48s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.134623 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.119232 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.136745 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.114870 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.134568 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.115862 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.155697 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.136399 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.102779 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.118638 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.137590 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.127191 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.117703 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.114808 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.111397 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.117870 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.137275 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.128198 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.123082 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.151196 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.106674 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.127233 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.098807 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.142984 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.127330 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.099469 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.106055 [ 3456/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.136757 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.100580 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.116262 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.116563 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.144449 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.111656 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.136463 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.122267 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.112985 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.122177 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.117762 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.119056 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.100033 [ 5120/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.130728 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.154210 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.108162 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.128323 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.131798 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.118803 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.099345 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.129726 [ 6144/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.110366 [ 6272/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.129185 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.104669 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.139017 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.116219 [ 6784/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.116885 [ 6912/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.128986 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.105740 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.122081 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.138013 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.115484 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.133302 [ 7680/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.146749 [ 7808/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.129267 [ 7936/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.119135 [ 8064/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.117060 [ 8192/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.102598 [ 8320/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.107959 [ 8448/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.134262 [ 8576/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.177738 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.113840 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.093147 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.121403 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.097801 [ 9216/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.113830 [ 9344/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.145325 [ 9472/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.102440 [ 9600/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.115192 [ 9728/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.111840 [ 9856/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.126908 [ 9984/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.111235 [10112/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.134948 [10240/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.099732 [10368/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.144566 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.128858 [10624/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.107292 [10752/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.116218 [10880/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.106663 [11008/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.160715 [11136/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.133773 [11264/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.101235 [11392/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.134379 [11520/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.132942 [11648/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.115131 [11776/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.125841 [11904/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.106869 [12032/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.160978 [12160/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.125563 [12288/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.124402 [12416/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.152891 [12544/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.104275 [12672/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.161321 [12800/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.111567 [12928/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.090528 [13056/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.134615 [13184/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.099569 [13312/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.120183 [13440/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.112915 [13568/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.129986 [13696/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.139516 [13824/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.112888 [13952/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.119465 [14080/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.122788 [14208/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.125716 [14336/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.105962 [14464/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.130771 [14592/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.144068 [14720/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.147262 [14848/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.142891 [14976/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.119377 [15104/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.115934 [15232/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.099897 [15360/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.119468 [15488/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.119953 [15616/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.118484 [15744/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.120725 [15872/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.113261 [16000/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.107098 [16128/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.153171 [16256/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.142657 [16384/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.117940 [16512/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.123757 [16640/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.129992 [16768/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.110345 [16896/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.106863 [17024/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.111898 [17152/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.099189 [17280/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.134159 [17408/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.141361 [17536/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.145049 [17664/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.121648 [17792/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.118776 [17920/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.094608 [18048/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.132361 [18176/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.137304 [18304/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.137545 [18432/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.106975 [18560/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.113879 [18688/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.107258 [18816/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.093988 [18944/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.113488 [19072/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.104167 [19200/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.128724 [19328/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.103781 [19456/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.122168 [19584/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.161501 [19712/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.113619 [19840/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.119271 [19968/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.122325 [20096/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.110657 [20224/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.121937 [20352/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.120951 [20480/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.113933 [20608/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.105454 [20736/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.113970 [20864/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.096267 [20992/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.125225 [21120/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.120753 [21248/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.123184 [21376/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.157893 [21504/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.122317 [21632/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.100133 [21760/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.108847 [21888/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.072674 [22016/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.119451 [22144/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.118693 [22272/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.125782 [22400/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.096010 [22528/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.109022 [22656/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.121198 [22784/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.108757 [22912/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.095744 [23040/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.125610 [23168/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.125804 [23296/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.109478 [23424/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.143524 [23552/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.122485 [23680/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.111418 [23808/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.130316 [23936/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.096271 [24064/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.128617 [24192/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.092377 [24320/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.127228 [24448/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.124644 [24576/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.124227 [24704/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.143582 [24832/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.082755 [24872/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.082755 [24872/24872]: : 195it [00:14, 13.41it/s]
Epoch 3, time=287.02s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.131183 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.115411 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.135472 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.111713 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.131303 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.112330 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.146282 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.135175 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.092074 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.124216 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.130108 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.124492 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.114300 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.120357 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.110993 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.114858 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.132353 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.121585 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.116004 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.140288 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.100123 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.121718 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.093496 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.139320 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.118435 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.091724 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.102656 [ 3456/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.133786 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.094763 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.113444 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.111846 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.138560 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.102418 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.132393 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.117541 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.109601 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.117767 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.109326 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.118783 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.092367 [ 5120/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.126557 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.146719 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.101276 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.120897 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.127130 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.112517 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.095301 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.126319 [ 6144/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.105550 [ 6272/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.121382 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.099205 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.132966 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.116213 [ 6784/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.116450 [ 6912/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.125870 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.100209 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.113273 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.136517 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.113304 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.123326 [ 7680/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.140467 [ 7808/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.125016 [ 7936/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.112649 [ 8064/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.109381 [ 8192/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.099857 [ 8320/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.104996 [ 8448/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.128408 [ 8576/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.164930 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.109327 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.092242 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.123328 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.094966 [ 9216/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.107485 [ 9344/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.146740 [ 9472/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.096786 [ 9600/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.111345 [ 9728/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.109797 [ 9856/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.126275 [ 9984/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.113937 [10112/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.132377 [10240/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.094756 [10368/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.138999 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.123384 [10624/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.108331 [10752/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.120514 [10880/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.109300 [11008/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.155128 [11136/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.134474 [11264/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.110377 [11392/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.133934 [11520/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.136011 [11648/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.114347 [11776/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.136989 [11904/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.111065 [12032/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.156486 [12160/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.123032 [12288/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.126556 [12416/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.140693 [12544/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.104426 [12672/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.151503 [12800/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.105915 [12928/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.085069 [13056/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.129436 [13184/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.093734 [13312/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.109588 [13440/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.105945 [13568/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.127582 [13696/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.138494 [13824/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.119249 [13952/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.114235 [14080/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.132059 [14208/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.122163 [14336/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.121669 [14464/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.127640 [14592/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.169965 [14720/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.148645 [14848/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.174580 [14976/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.113422 [15104/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.127839 [15232/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.107144 [15360/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.122315 [15488/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.112438 [15616/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.118583 [15744/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.120122 [15872/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.109015 [16000/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.111203 [16128/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.143609 [16256/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.134874 [16384/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.120774 [16512/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.117680 [16640/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.125879 [16768/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.106628 [16896/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.100233 [17024/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.111217 [17152/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.110894 [17280/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.136958 [17408/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.147019 [17536/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.146062 [17664/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.126774 [17792/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.114958 [17920/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.095776 [18048/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.131184 [18176/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.132970 [18304/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.140649 [18432/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.102592 [18560/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.116196 [18688/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.103257 [18816/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.101324 [18944/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.110814 [19072/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.102128 [19200/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.121497 [19328/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.102310 [19456/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.121376 [19584/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.156073 [19712/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.118237 [19840/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.117037 [19968/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.127026 [20096/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.104937 [20224/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.124237 [20352/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.111089 [20480/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.115952 [20608/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.108748 [20736/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.114660 [20864/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.097005 [20992/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.124526 [21120/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.119477 [21248/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.124000 [21376/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.155048 [21504/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.118285 [21632/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.102102 [21760/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.102854 [21888/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.070230 [22016/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.112320 [22144/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.114210 [22272/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.123069 [22400/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.096218 [22528/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.111220 [22656/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.134113 [22784/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.112398 [22912/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.094525 [23040/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.124890 [23168/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.125654 [23296/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.104904 [23424/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.143613 [23552/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.123411 [23680/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.105253 [23808/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.126238 [23936/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.095436 [24064/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.125939 [24192/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.087280 [24320/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.120026 [24448/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.115811 [24576/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.108638 [24704/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.148531 [24832/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.079670 [24872/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.079670 [24872/24872]: : 195it [00:14, 13.62it/s]
Epoch 4, time=301.34s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.131819 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.110132 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.136175 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.112239 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.134733 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.108992 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.146473 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.131443 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.099937 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.115626 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.133235 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.128370 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.113402 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.115748 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.107494 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.110735 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.134484 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.116377 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.117496 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.141698 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.100171 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.124069 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.093274 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.137288 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.114658 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.100126 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.111982 [ 3456/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.129430 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.097874 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.116814 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.110053 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.138603 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.110583 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.135824 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.116050 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.116418 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.131558 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.109786 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.121636 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.097637 [ 5120/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.124613 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.150021 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.095697 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.124753 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.129429 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.121563 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.092486 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.119901 [ 6144/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.106892 [ 6272/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.119599 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.102217 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.130353 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.111529 [ 6784/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.117489 [ 6912/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.122810 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.105058 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.107461 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.141280 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.107477 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.120831 [ 7680/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.145936 [ 7808/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.123732 [ 7936/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.110937 [ 8064/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.115023 [ 8192/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.100991 [ 8320/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.105866 [ 8448/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.125093 [ 8576/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.155260 [ 8704/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.107787 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.086298 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.107986 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.092380 [ 9216/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.104810 [ 9344/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.139197 [ 9472/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.098919 [ 9600/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.108215 [ 9728/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.108893 [ 9856/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.122436 [ 9984/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.106942 [10112/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.131091 [10240/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.088060 [10368/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.132457 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.118488 [10624/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.107550 [10752/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.115106 [10880/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.097515 [11008/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.151900 [11136/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.131238 [11264/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.107524 [11392/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.115849 [11520/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.132889 [11648/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.113571 [11776/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.125540 [11904/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.110698 [12032/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.154756 [12160/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.127722 [12288/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.115718 [12416/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.153086 [12544/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.104798 [12672/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.158382 [12800/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.105768 [12928/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.082447 [13056/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.133555 [13184/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.090361 [13312/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.107109 [13440/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.111904 [13568/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.129294 [13696/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.134384 [13824/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.107849 [13952/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.115574 [14080/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.120240 [14208/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.121741 [14336/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.104873 [14464/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.123284 [14592/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.136953 [14720/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.148041 [14848/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.149127 [14976/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.109158 [15104/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.110266 [15232/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.089882 [15360/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.107385 [15488/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.102670 [15616/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.106940 [15744/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.115440 [15872/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.099042 [16000/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.097060 [16128/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.141495 [16256/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.126009 [16384/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.114234 [16512/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.110865 [16640/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.123773 [16768/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.098922 [16896/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.099696 [17024/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.104422 [17152/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.101988 [17280/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.128203 [17408/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.142216 [17536/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.141351 [17664/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.119595 [17792/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.111129 [17920/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.088943 [18048/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.123242 [18176/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.124783 [18304/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.137231 [18432/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.091335 [18560/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.106250 [18688/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.092939 [18816/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.092253 [18944/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.101571 [19072/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.095096 [19200/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.117975 [19328/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.094658 [19456/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.110679 [19584/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.147018 [19712/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.105733 [19840/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.106206 [19968/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.112905 [20096/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.096843 [20224/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.113894 [20352/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.108496 [20480/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.104927 [20608/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.100431 [20736/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.104987 [20864/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.090293 [20992/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.114833 [21120/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.110074 [21248/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.119553 [21376/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.142086 [21504/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.114770 [21632/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.087989 [21760/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.096026 [21888/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.064134 [22016/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.109899 [22144/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.105860 [22272/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.116549 [22400/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.088718 [22528/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.102508 [22656/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.116250 [22784/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.103099 [22912/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.090342 [23040/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.118584 [23168/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.113713 [23296/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.098214 [23424/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.134687 [23552/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.116145 [23680/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.104334 [23808/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.122931 [23936/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.088084 [24064/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.124307 [24192/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.085473 [24320/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.123072 [24448/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.109664 [24576/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.119419 [24704/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.134288 [24832/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.093022 [24872/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.093022 [24872/24872]: : 195it [00:14, 13.65it/s]
Epoch 5, time=315.63s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.137590 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.119256 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.137168 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.114021 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.134218 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.123077 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.144491 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.148835 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.093064 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.129541 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.131895 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.125880 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.119533 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.113792 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.120089 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.105405 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.148101 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.108919 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.123825 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.138459 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.105741 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.127338 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.089622 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.137716 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.118430 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.091397 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.105139 [ 3456/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.125403 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.099156 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.110578 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.114320 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.131220 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.113018 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.130038 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.120542 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.103373 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.115529 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.109406 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.120603 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.087242 [ 5120/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.132065 [ 5248/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.147542 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.096116 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.118912 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.127992 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.122630 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.080941 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.131834 [ 6144/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.096346 [ 6272/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.115440 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.099619 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.127709 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.124440 [ 6784/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.113790 [ 6912/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.120603 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.092718 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.113159 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.132649 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.116827 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.108842 [ 7680/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.134836 [ 7808/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.124693 [ 7936/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.103361 [ 8064/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.143659 [ 8192/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.101109 [ 8320/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.110589 [ 8448/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.134380 [ 8576/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.168423 [ 8704/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.122220 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.086950 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.124604 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.099307 [ 9216/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.106536 [ 9344/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.138139 [ 9472/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.095416 [ 9600/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.109421 [ 9728/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.116491 [ 9856/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.134610 [ 9984/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.129069 [10112/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.135629 [10240/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.106665 [10368/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.139741 [10496/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.130227 [10624/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.106398 [10752/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.127370 [10880/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.116084 [11008/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.157410 [11136/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.151888 [11264/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.113793 [11392/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.161509 [11520/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.136447 [11648/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.150747 [11776/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.118696 [11904/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.129229 [12032/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.177383 [12160/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.128321 [12288/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.161420 [12416/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.146295 [12544/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.149435 [12672/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.166109 [12800/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.112277 [12928/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.131646 [13056/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.129287 [13184/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.119797 [13312/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.112212 [13440/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.134179 [13568/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.140154 [13696/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.141359 [13824/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.117427 [13952/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.113662 [14080/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.120130 [14208/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.124672 [14336/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.103514 [14464/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.125431 [14592/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.141355 [14720/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.137648 [14848/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.151379 [14976/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.109059 [15104/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.111422 [15232/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.094234 [15360/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.118125 [15488/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.101204 [15616/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.110946 [15744/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.105649 [15872/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.104658 [16000/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.093996 [16128/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.139824 [16256/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.140329 [16384/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.110740 [16512/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.128654 [16640/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.113116 [16768/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.112224 [16896/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.095240 [17024/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.106570 [17152/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.100255 [17280/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.139633 [17408/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.132981 [17536/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.132696 [17664/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.126255 [17792/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.122774 [17920/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.088238 [18048/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.126808 [18176/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.124274 [18304/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.133747 [18432/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.090333 [18560/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.108478 [18688/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.096918 [18816/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.089130 [18944/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.111404 [19072/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.093377 [19200/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.119005 [19328/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.097407 [19456/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.115518 [19584/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.151030 [19712/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.117003 [19840/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.109576 [19968/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.114894 [20096/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.099277 [20224/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.116169 [20352/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.108952 [20480/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.108094 [20608/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.104682 [20736/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.103601 [20864/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.089387 [20992/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.121680 [21120/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.112756 [21248/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.131191 [21376/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.146375 [21504/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.116694 [21632/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.114014 [21760/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.098410 [21888/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.073564 [22016/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.117909 [22144/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.112806 [22272/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.118973 [22400/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.091619 [22528/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.108238 [22656/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.120878 [22784/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.105933 [22912/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.093504 [23040/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.123811 [23168/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.114011 [23296/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.103896 [23424/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.135402 [23552/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.120973 [23680/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.110659 [23808/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.129478 [23936/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.095601 [24064/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.121487 [24192/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.101734 [24320/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.116658 [24448/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.121540 [24576/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.110265 [24704/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.149018 [24832/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.085656 [24872/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.085656 [24872/24872]: : 195it [00:14, 13.69it/s]
Epoch 6, time=329.87s
0%| | 0/194 [00:00<?, ?it/s]
loss: 0.134137 [ 128/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.110612 [ 256/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.139018 [ 384/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.115517 [ 512/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.119020 [ 640/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.113061 [ 768/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.138482 [ 896/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.128189 [ 1024/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.092866 [ 1152/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.113975 [ 1280/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.127168 [ 1408/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.114337 [ 1536/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.108250 [ 1664/24872]: 0%| | 0/194 [00:00<?, ?it/s]
loss: 0.103087 [ 1792/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.098561 [ 1920/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.101804 [ 2048/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.124222 [ 2176/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.109715 [ 2304/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.113548 [ 2432/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.130200 [ 2560/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.092233 [ 2688/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.116961 [ 2816/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.087786 [ 2944/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.129353 [ 3072/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.114649 [ 3200/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.082553 [ 3328/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.104462 [ 3456/24872]: 0%| | 0/194 [00:01<?, ?it/s]
loss: 0.125414 [ 3584/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.091200 [ 3712/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.117250 [ 3840/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.099045 [ 3968/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.135341 [ 4096/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.098832 [ 4224/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.139267 [ 4352/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.115358 [ 4480/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.108932 [ 4608/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.125217 [ 4736/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.114053 [ 4864/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.125561 [ 4992/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.093612 [ 5120/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.129126 [ 5248/24872]: 0%| | 0/194 [00:02<?, ?it/s]
loss: 0.149738 [ 5376/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.093303 [ 5504/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.132159 [ 5632/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.124177 [ 5760/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.126998 [ 5888/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.084849 [ 6016/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.122916 [ 6144/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.095847 [ 6272/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.108029 [ 6400/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.095377 [ 6528/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.129020 [ 6656/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.109150 [ 6784/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.114417 [ 6912/24872]: 0%| | 0/194 [00:03<?, ?it/s]
loss: 0.128099 [ 7040/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.093441 [ 7168/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.108748 [ 7296/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.125193 [ 7424/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.113423 [ 7552/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.099983 [ 7680/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.134876 [ 7808/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.119904 [ 7936/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.101767 [ 8064/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.122994 [ 8192/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.109261 [ 8320/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.102913 [ 8448/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.126033 [ 8576/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.140794 [ 8704/24872]: 0%| | 0/194 [00:04<?, ?it/s]
loss: 0.114285 [ 8832/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.093641 [ 8960/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.107880 [ 9088/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.094826 [ 9216/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.100763 [ 9344/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.139321 [ 9472/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.093949 [ 9600/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.100279 [ 9728/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.109016 [ 9856/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.113281 [ 9984/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.113341 [10112/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.128662 [10240/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.085905 [10368/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.132086 [10496/24872]: 0%| | 0/194 [00:05<?, ?it/s]
loss: 0.110434 [10624/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.103635 [10752/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.110090 [10880/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.099561 [11008/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.140769 [11136/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.133724 [11264/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.094479 [11392/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.120784 [11520/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.121219 [11648/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.098241 [11776/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.115778 [11904/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.099495 [12032/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.153667 [12160/24872]: 0%| | 0/194 [00:06<?, ?it/s]
loss: 0.113109 [12288/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.113176 [12416/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.131324 [12544/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.100558 [12672/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.156365 [12800/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.099964 [12928/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.084029 [13056/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.116693 [13184/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.092485 [13312/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.094180 [13440/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.110004 [13568/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.119312 [13696/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.146285 [13824/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.115738 [13952/24872]: 0%| | 0/194 [00:07<?, ?it/s]
loss: 0.102809 [14080/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.116945 [14208/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.117585 [14336/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.100475 [14464/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.125208 [14592/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.139306 [14720/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.138471 [14848/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.138715 [14976/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.103248 [15104/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.108418 [15232/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.092233 [15360/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.117818 [15488/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.100584 [15616/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.107023 [15744/24872]: 0%| | 0/194 [00:08<?, ?it/s]
loss: 0.114250 [15872/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.093594 [16000/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.086161 [16128/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.136947 [16256/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.130386 [16384/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.111395 [16512/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.111068 [16640/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.104959 [16768/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.106656 [16896/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.090164 [17024/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.103537 [17152/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.091773 [17280/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.118975 [17408/24872]: 0%| | 0/194 [00:09<?, ?it/s]
loss: 0.134241 [17536/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.141536 [17664/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.138072 [17792/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.100548 [17920/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.088636 [18048/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.128640 [18176/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.122943 [18304/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.123983 [18432/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.088342 [18560/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.112204 [18688/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.087098 [18816/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.090309 [18944/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.110186 [19072/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.088190 [19200/24872]: 0%| | 0/194 [00:10<?, ?it/s]
loss: 0.115488 [19328/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.093798 [19456/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.117234 [19584/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.125436 [19712/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.110252 [19840/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.102482 [19968/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.110100 [20096/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.096619 [20224/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.113607 [20352/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.105015 [20480/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.101021 [20608/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.095846 [20736/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.101375 [20864/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.092782 [20992/24872]: 0%| | 0/194 [00:11<?, ?it/s]
loss: 0.119663 [21120/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.106763 [21248/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.125835 [21376/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.133247 [21504/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.113288 [21632/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.091302 [21760/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.093961 [21888/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.066603 [22016/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.109814 [22144/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.099878 [22272/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.121629 [22400/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.086501 [22528/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.100002 [22656/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.113477 [22784/24872]: 0%| | 0/194 [00:12<?, ?it/s]
loss: 0.102238 [22912/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.085228 [23040/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.115822 [23168/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.104329 [23296/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.092152 [23424/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.109461 [23552/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.117103 [23680/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.108090 [23808/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.121995 [23936/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.089504 [24064/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.123766 [24192/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.096302 [24320/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.106290 [24448/24872]: 0%| | 0/194 [00:13<?, ?it/s]
loss: 0.101002 [24576/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.111388 [24704/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.118918 [24832/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.089259 [24872/24872]: 0%| | 0/194 [00:14<?, ?it/s]
loss: 0.089259 [24872/24872]: : 195it [00:14, 13.73it/s]
-------------------------------
LR=1e-05, batch_size=256
-------------------------------
Epoch 1, time=344.07s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.108217 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.110070 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.096328 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.125296 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.090557 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.115912 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.092941 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.092029 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.109067 [ 2304/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.100311 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.097294 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.101872 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.096516 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.101936 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.088370 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.114843 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.109611 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.097164 [ 4608/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.093797 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.093911 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.129108 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.096821 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.101076 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.088607 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.091945 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.102855 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.101746 [ 6912/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.091180 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.094524 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.098454 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.112366 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.115194 [ 8192/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.092214 [ 8448/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.111684 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.089302 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.084426 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.107188 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.085098 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.099955 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.106329 [10240/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.095469 [10496/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.098792 [10752/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.094446 [11008/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.122694 [11264/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.092548 [11520/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.103076 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.095084 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.117496 [12288/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.112452 [12544/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.113610 [12800/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.080341 [13056/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.090865 [13312/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.091854 [13568/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.101819 [13824/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.092054 [14080/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.106429 [14336/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.096450 [14592/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.116510 [14848/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.107026 [15104/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.087298 [15360/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.091552 [15616/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.098521 [15872/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.082908 [16128/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.121091 [16384/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.099262 [16640/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.097785 [16896/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.088848 [17152/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.091694 [17408/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.130108 [17664/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.095430 [17920/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.093790 [18176/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.113521 [18432/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.083267 [18688/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.083952 [18944/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.088877 [19200/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.094408 [19456/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.105448 [19712/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.098053 [19968/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.094364 [20224/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.102520 [20480/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.089437 [20736/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.089354 [20992/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.101166 [21248/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.111342 [21504/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.092791 [21760/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.071775 [22016/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.096323 [22272/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.097537 [22528/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.093437 [22784/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.086087 [23040/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.099953 [23296/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.089185 [23552/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.102087 [23808/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.095477 [24064/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.100683 [24320/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.095420 [24576/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.104947 [24832/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.080749 [24872/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.080749 [24872/24872]: : 98it [00:10, 9.13it/s]
Epoch 2, time=354.81s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.099365 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.102729 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.091872 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.119069 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.085731 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.109031 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.089242 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.090156 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.105308 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.096462 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.095333 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.098911 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.091997 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.099408 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.085358 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.108740 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.105579 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.094042 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.091163 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.089644 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.124077 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.093196 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.099393 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.086619 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.088522 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.100332 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.098775 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.088652 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.091481 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.094267 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.108011 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.109504 [ 8192/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.090827 [ 8448/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.108515 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.087284 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.082089 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.105112 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.084911 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.099163 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.103898 [10240/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.093089 [10496/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.097299 [10752/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.091984 [11008/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.119832 [11264/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.089681 [11520/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.101378 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.094047 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.115870 [12288/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.109771 [12544/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.111430 [12800/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.078878 [13056/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.087858 [13312/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.090621 [13568/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.100615 [13824/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.090473 [14080/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.104374 [14336/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.095820 [14592/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.114463 [14848/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.104053 [15104/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.086163 [15360/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.090467 [15616/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.095957 [15872/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.081628 [16128/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.119585 [16384/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.099024 [16640/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.097244 [16896/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.086191 [17152/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.090041 [17408/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.128586 [17664/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.094702 [17920/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.092230 [18176/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.111931 [18432/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.082786 [18688/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.083355 [18944/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.087008 [19200/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.092766 [19456/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.104114 [19712/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.097143 [19968/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.092933 [20224/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.101929 [20480/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.088154 [20736/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.088293 [20992/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.099140 [21248/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.108494 [21504/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.092092 [21760/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.070918 [22016/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.093733 [22272/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.097884 [22528/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.092840 [22784/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.084954 [23040/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.099314 [23296/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.086647 [23552/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.100788 [23808/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.094540 [24064/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.099071 [24320/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.094659 [24576/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.103678 [24832/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.076256 [24872/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.076256 [24872/24872]: : 98it [00:10, 9.24it/s]
Epoch 3, time=365.42s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.097903 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.100975 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.091634 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.118348 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.085420 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.108674 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.088207 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.089395 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.103751 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.095408 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.094225 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.098036 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.090465 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.098950 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.084106 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.107932 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.104078 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.092926 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.089826 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.087559 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.119803 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.092072 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.098110 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.086777 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.087275 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.099715 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.097095 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.087362 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.090250 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.092651 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.106994 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.105575 [ 8192/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.090338 [ 8448/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.106818 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.086032 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.080827 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.104259 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.084417 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.099634 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.103203 [10240/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.091669 [10496/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.096017 [10752/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.090259 [11008/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.118311 [11264/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.088468 [11520/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.100874 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.093695 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.114312 [12288/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.108672 [12544/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.110520 [12800/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.078232 [13056/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.086505 [13312/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.089885 [13568/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.098923 [13824/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.089586 [14080/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.103009 [14336/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.095002 [14592/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.113360 [14848/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.103081 [15104/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.085742 [15360/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.089554 [15616/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.094092 [15872/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.080923 [16128/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.118585 [16384/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.098411 [16640/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.096008 [16896/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.084820 [17152/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.089128 [17408/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.127455 [17664/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.093837 [17920/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.091139 [18176/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.111029 [18432/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.082238 [18688/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.082443 [18944/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.086037 [19200/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.091983 [19456/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.103495 [19712/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.096391 [19968/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.091825 [20224/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.101543 [20480/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.087350 [20736/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.087586 [20992/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.098138 [21248/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.107019 [21504/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.091729 [21760/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.070401 [22016/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.092185 [22272/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.097738 [22528/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.092322 [22784/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.084367 [23040/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.098724 [23296/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.085788 [23552/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.099621 [23808/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.093984 [24064/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.098130 [24320/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.094082 [24576/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.102679 [24832/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.073965 [24872/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.073965 [24872/24872]: : 98it [00:10, 9.24it/s]
Epoch 4, time=376.03s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.096944 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.100035 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.091391 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.117738 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.085046 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.107875 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.087481 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.088621 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.102622 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.094618 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.093594 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.097431 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.089465 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.098550 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.083253 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.107129 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.103200 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.092341 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.088627 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.086424 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.117105 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.091068 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.097228 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.086750 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.086498 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.099034 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.096116 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.086519 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.089315 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.091577 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.106267 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.103288 [ 8192/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.089913 [ 8448/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.105429 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.085231 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.080073 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.103210 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.083904 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.098909 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.102616 [10240/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.090466 [10496/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.094952 [10752/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.089243 [11008/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.116736 [11264/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.087621 [11520/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.100220 [11776/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.093355 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.113054 [12288/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.107750 [12544/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.109552 [12800/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.077617 [13056/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.085676 [13312/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.089203 [13568/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.097441 [13824/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.088834 [14080/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.101996 [14336/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.094255 [14592/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.112549 [14848/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.102225 [15104/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.085369 [15360/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.088696 [15616/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.092888 [15872/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.080331 [16128/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.117677 [16384/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.097649 [16640/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.094941 [16896/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.083989 [17152/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.088446 [17408/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.126576 [17664/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.093110 [17920/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.090247 [18176/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.110163 [18432/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.081742 [18688/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.081740 [18944/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.085282 [19200/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.091344 [19456/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.103042 [19712/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.095586 [19968/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.090978 [20224/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.101211 [20480/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.086608 [20736/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.086958 [20992/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.097337 [21248/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.105897 [21504/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.091322 [21760/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.070003 [22016/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.091042 [22272/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.097342 [22528/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.091823 [22784/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.083667 [23040/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.097962 [23296/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.085032 [23552/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.098688 [23808/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.093460 [24064/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.097352 [24320/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.093560 [24576/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.101821 [24832/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.071918 [24872/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.071918 [24872/24872]: : 98it [00:10, 9.27it/s]
Epoch 5, time=386.60s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.096075 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.099436 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.091104 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.117138 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.084944 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.107370 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.086825 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.087701 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.101554 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.093927 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.093098 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.096849 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.088273 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.098090 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.082350 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.106082 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.102607 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.091970 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.087776 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.085331 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.114442 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.090099 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.096319 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.087087 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.085837 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.098321 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.095241 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.085733 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.088486 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.090431 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.105673 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.102312 [ 8192/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.089527 [ 8448/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.104702 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.084354 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.079498 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.102426 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.083563 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.099138 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.102357 [10240/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.089408 [10496/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.093885 [10752/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.088347 [11008/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.115643 [11264/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.086998 [11520/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.099831 [11776/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.092865 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.111811 [12288/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.106983 [12544/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.108721 [12800/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.077117 [13056/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.084928 [13312/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.088745 [13568/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.096323 [13824/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.088213 [14080/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.100953 [14336/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.093628 [14592/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.111750 [14848/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.101891 [15104/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.085024 [15360/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.088056 [15616/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.091668 [15872/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.079752 [16128/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.116878 [16384/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.096961 [16640/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.093744 [16896/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.083289 [17152/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.087681 [17408/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.125328 [17664/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.092389 [17920/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.089361 [18176/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.109311 [18432/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.081184 [18688/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.080959 [18944/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.084505 [19200/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.090808 [19456/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.102603 [19712/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.094908 [19968/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.090159 [20224/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.100965 [20480/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.085992 [20736/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.086364 [20992/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.096794 [21248/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.105008 [21504/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.090827 [21760/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.069599 [22016/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.090073 [22272/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.096966 [22528/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.091365 [22784/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.083004 [23040/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.097206 [23296/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.084298 [23552/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.097826 [23808/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.092910 [24064/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.096586 [24320/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.093064 [24576/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.101091 [24832/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.070741 [24872/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.070741 [24872/24872]: : 98it [00:10, 9.29it/s]
Epoch 6, time=397.15s
0%| | 0/97 [00:00<?, ?it/s]
loss: 0.095327 [ 256/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.098963 [ 512/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.090715 [ 768/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.116529 [ 1024/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.084623 [ 1280/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.106807 [ 1536/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.086247 [ 1792/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.086931 [ 2048/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.100634 [ 2304/24872]: 0%| | 0/97 [00:00<?, ?it/s]
loss: 0.093270 [ 2560/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.092625 [ 2816/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.096370 [ 3072/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.087511 [ 3328/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.097617 [ 3584/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.081669 [ 3840/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.105266 [ 4096/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.102177 [ 4352/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.091567 [ 4608/24872]: 0%| | 0/97 [00:01<?, ?it/s]
loss: 0.087249 [ 4864/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.084559 [ 5120/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.112894 [ 5376/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.089299 [ 5632/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.095508 [ 5888/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.086823 [ 6144/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.085252 [ 6400/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.097734 [ 6656/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.094712 [ 6912/24872]: 0%| | 0/97 [00:02<?, ?it/s]
loss: 0.085097 [ 7168/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.087822 [ 7424/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.089554 [ 7680/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.105078 [ 7936/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.101668 [ 8192/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.089224 [ 8448/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.103895 [ 8704/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.083969 [ 8960/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.078987 [ 9216/24872]: 0%| | 0/97 [00:03<?, ?it/s]
loss: 0.101514 [ 9472/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.083216 [ 9728/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.098878 [ 9984/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.102005 [10240/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.088577 [10496/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.092822 [10752/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.087657 [11008/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.114440 [11264/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.086451 [11520/24872]: 0%| | 0/97 [00:04<?, ?it/s]
loss: 0.099225 [11776/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.092356 [12032/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.110790 [12288/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.106115 [12544/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.107864 [12800/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.076630 [13056/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.084215 [13312/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.088278 [13568/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.095494 [13824/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.087687 [14080/24872]: 0%| | 0/97 [00:05<?, ?it/s]
loss: 0.100112 [14336/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.093042 [14592/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.111093 [14848/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.101323 [15104/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.084599 [15360/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.087428 [15616/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.090848 [15872/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.079151 [16128/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.116214 [16384/24872]: 0%| | 0/97 [00:06<?, ?it/s]
loss: 0.096346 [16640/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.092896 [16896/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.082781 [17152/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.087126 [17408/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.124403 [17664/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.091715 [17920/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.088609 [18176/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.108369 [18432/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.080703 [18688/24872]: 0%| | 0/97 [00:07<?, ?it/s]
loss: 0.080300 [18944/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.083706 [19200/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.090303 [19456/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.102226 [19712/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.094262 [19968/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.089361 [20224/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.100687 [20480/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.085418 [20736/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.085752 [20992/24872]: 0%| | 0/97 [00:08<?, ?it/s]
loss: 0.096263 [21248/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.104317 [21504/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.090328 [21760/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.069265 [22016/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.089251 [22272/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.096639 [22528/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.090888 [22784/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.082190 [23040/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.096483 [23296/24872]: 0%| | 0/97 [00:09<?, ?it/s]
loss: 0.083633 [23552/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.097105 [23808/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.092342 [24064/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.095951 [24320/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.092618 [24576/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.100383 [24832/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.069638 [24872/24872]: 0%| | 0/97 [00:10<?, ?it/s]
loss: 0.069638 [24872/24872]: : 98it [00:10, 9.25it/s]
Done!
test the network#
Do some qualitative tests: Let the trained network predict some particle geometries and compare their Mie spectra with the traget spectrum.
# pick a few of the training samples for testing.
# Note: Ideally tests should be done on separate samples!
sca_test = q_sca_target_test
pred = model(sca_test)
# evaluate Mie
r_c_test, r_s_test, eps_c_test, eps_s_test = nn_pred_to_mie_geometry(pred)
res_mie = pmd.multishell.cross_sections(
k0,
r_c=r_c_test,
eps_c=eps_c_test,
r_s=r_s_test,
eps_s=eps_s_test,
eps_env=eps_env,
n_max=n_max,
)
# plot
i_plot = np.random.randint(len(sca_test), size=4)
plt.figure(figsize=(12, 10))
for i_n, i in enumerate(i_plot):
plt.subplot(2, 2, i_n + 1)
plt.plot(
wl0.detach().cpu().numpy(),
sca_test[i].detach().cpu().numpy(),
label="reference",
)
plt.plot(
wl0.detach().cpu().numpy(),
res_mie["q_sca"][i].detach().cpu().numpy(),
label="predicted particle",
)
plt.legend()
plt.xlabel("wavelength (nm)")
plt.ylabel("scat. efficiency")
plt.show()

Total running time of the script: (6 minutes 55.181 seconds)
Estimated memory usage: 873 MB