-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtrain_xnli.log
5613 lines (5613 loc) · 446 KB
/
train_xnli.log
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
| load pretrained model state sucessfully.
Namespace(adam_epsilon=1e-08, batch_size=32, data_path='./data', device='gpu', epochs=5, init_from_ckpt=None, learning_rate=1.3e-05, max_seq_length=256, save_dir='outputs/xnli', seed=2333, warmup_proportion=0.1, weight_decay=0.001)
global step 10, epoch: 1, batch: 10, loss: 1.12490, accu: 0.34687, speed: 1.33 step/s
global step 20, epoch: 1, batch: 20, loss: 1.14057, accu: 0.35313, speed: 1.31 step/s
global step 30, epoch: 1, batch: 30, loss: 1.09969, accu: 0.36458, speed: 1.41 step/s
global step 40, epoch: 1, batch: 40, loss: 1.13844, accu: 0.36172, speed: 1.42 step/s
global step 50, epoch: 1, batch: 50, loss: 1.11730, accu: 0.36188, speed: 1.54 step/s
global step 60, epoch: 1, batch: 60, loss: 1.10247, accu: 0.36094, speed: 1.42 step/s
global step 70, epoch: 1, batch: 70, loss: 1.10941, accu: 0.35848, speed: 1.52 step/s
global step 80, epoch: 1, batch: 80, loss: 1.21211, accu: 0.35508, speed: 1.69 step/s
global step 90, epoch: 1, batch: 90, loss: 1.10595, accu: 0.35278, speed: 1.52 step/s
global step 100, epoch: 1, batch: 100, loss: 1.14724, accu: 0.35906, speed: 1.41 step/s
eval loss: 1.12818, accu: 0.33333
eval loss: 1.12815, accu: 0.33333
global step 110, epoch: 1, batch: 110, loss: 1.14886, accu: 0.30312, speed: 0.22 step/s
global step 120, epoch: 1, batch: 120, loss: 1.14197, accu: 0.33437, speed: 1.46 step/s
global step 130, epoch: 1, batch: 130, loss: 1.12746, accu: 0.33333, speed: 1.33 step/s
global step 140, epoch: 1, batch: 140, loss: 1.04260, accu: 0.34141, speed: 1.59 step/s
global step 150, epoch: 1, batch: 150, loss: 1.10918, accu: 0.35687, speed: 1.46 step/s
global step 160, epoch: 1, batch: 160, loss: 1.11827, accu: 0.36094, speed: 1.54 step/s
global step 170, epoch: 1, batch: 170, loss: 1.04369, accu: 0.36071, speed: 1.40 step/s
global step 180, epoch: 1, batch: 180, loss: 1.09878, accu: 0.36367, speed: 1.33 step/s
global step 190, epoch: 1, batch: 190, loss: 1.11679, accu: 0.36146, speed: 1.21 step/s
global step 200, epoch: 1, batch: 200, loss: 1.09300, accu: 0.36531, speed: 1.39 step/s
eval loss: 1.11698, accu: 0.33333
eval loss: 1.11694, accu: 0.33333
global step 210, epoch: 1, batch: 210, loss: 1.16840, accu: 0.35625, speed: 0.23 step/s
global step 220, epoch: 1, batch: 220, loss: 1.10032, accu: 0.35938, speed: 1.57 step/s
global step 230, epoch: 1, batch: 230, loss: 1.17784, accu: 0.35417, speed: 1.49 step/s
global step 240, epoch: 1, batch: 240, loss: 1.08445, accu: 0.37031, speed: 1.45 step/s
global step 250, epoch: 1, batch: 250, loss: 1.09002, accu: 0.36750, speed: 1.33 step/s
global step 260, epoch: 1, batch: 260, loss: 1.09584, accu: 0.36094, speed: 1.44 step/s
global step 270, epoch: 1, batch: 270, loss: 1.08854, accu: 0.36250, speed: 1.32 step/s
global step 280, epoch: 1, batch: 280, loss: 1.12974, accu: 0.35859, speed: 1.34 step/s
global step 290, epoch: 1, batch: 290, loss: 1.10306, accu: 0.35868, speed: 1.54 step/s
global step 300, epoch: 1, batch: 300, loss: 1.09177, accu: 0.35531, speed: 1.37 step/s
eval loss: 1.10675, accu: 0.33373
eval loss: 1.10665, accu: 0.33353
global step 310, epoch: 1, batch: 310, loss: 1.08862, accu: 0.35625, speed: 0.23 step/s
global step 320, epoch: 1, batch: 320, loss: 1.11010, accu: 0.34219, speed: 1.09 step/s
global step 330, epoch: 1, batch: 330, loss: 1.11816, accu: 0.34375, speed: 1.24 step/s
global step 340, epoch: 1, batch: 340, loss: 1.12353, accu: 0.35156, speed: 1.38 step/s
global step 350, epoch: 1, batch: 350, loss: 1.06815, accu: 0.35437, speed: 1.55 step/s
global step 360, epoch: 1, batch: 360, loss: 1.10407, accu: 0.35313, speed: 1.50 step/s
global step 370, epoch: 1, batch: 370, loss: 1.11063, accu: 0.35625, speed: 1.22 step/s
global step 380, epoch: 1, batch: 380, loss: 1.10244, accu: 0.35781, speed: 1.43 step/s
global step 390, epoch: 1, batch: 390, loss: 1.08906, accu: 0.35590, speed: 1.54 step/s
global step 400, epoch: 1, batch: 400, loss: 1.13605, accu: 0.35344, speed: 1.33 step/s
eval loss: 1.10766, accu: 0.33333
eval loss: 1.10758, accu: 0.33333
global step 410, epoch: 1, batch: 410, loss: 1.08432, accu: 0.35313, speed: 0.22 step/s
global step 420, epoch: 1, batch: 420, loss: 1.10415, accu: 0.35938, speed: 1.52 step/s
global step 430, epoch: 1, batch: 430, loss: 1.11253, accu: 0.37083, speed: 1.27 step/s
global step 440, epoch: 1, batch: 440, loss: 1.10544, accu: 0.37500, speed: 1.46 step/s
global step 450, epoch: 1, batch: 450, loss: 1.07930, accu: 0.36938, speed: 1.33 step/s
global step 460, epoch: 1, batch: 460, loss: 1.09270, accu: 0.36562, speed: 1.32 step/s
global step 470, epoch: 1, batch: 470, loss: 1.07992, accu: 0.36652, speed: 1.50 step/s
global step 480, epoch: 1, batch: 480, loss: 1.10945, accu: 0.36367, speed: 1.51 step/s
global step 490, epoch: 1, batch: 490, loss: 1.09436, accu: 0.36562, speed: 1.68 step/s
global step 500, epoch: 1, batch: 500, loss: 1.07939, accu: 0.37219, speed: 1.32 step/s
eval loss: 1.10256, accu: 0.36386
eval loss: 1.10238, accu: 0.36567
global step 510, epoch: 1, batch: 510, loss: 1.09887, accu: 0.35938, speed: 0.23 step/s
global step 520, epoch: 1, batch: 520, loss: 1.08340, accu: 0.37812, speed: 1.54 step/s
global step 530, epoch: 1, batch: 530, loss: 1.07578, accu: 0.37604, speed: 1.43 step/s
global step 540, epoch: 1, batch: 540, loss: 1.08468, accu: 0.37812, speed: 1.59 step/s
global step 550, epoch: 1, batch: 550, loss: 1.08123, accu: 0.38688, speed: 1.48 step/s
global step 560, epoch: 1, batch: 560, loss: 1.08864, accu: 0.39323, speed: 1.35 step/s
global step 570, epoch: 1, batch: 570, loss: 1.08411, accu: 0.39464, speed: 1.56 step/s
global step 580, epoch: 1, batch: 580, loss: 1.08059, accu: 0.39883, speed: 1.37 step/s
global step 590, epoch: 1, batch: 590, loss: 1.02967, accu: 0.40694, speed: 1.40 step/s
global step 600, epoch: 1, batch: 600, loss: 1.08429, accu: 0.41500, speed: 1.37 step/s
eval loss: 1.09648, accu: 0.43052
eval loss: 1.09614, accu: 0.43653
global step 610, epoch: 1, batch: 610, loss: 1.01148, accu: 0.50000, speed: 0.23 step/s
global step 620, epoch: 1, batch: 620, loss: 0.97754, accu: 0.51719, speed: 1.55 step/s
global step 630, epoch: 1, batch: 630, loss: 1.00071, accu: 0.52187, speed: 1.40 step/s
global step 640, epoch: 1, batch: 640, loss: 1.04459, accu: 0.52109, speed: 1.42 step/s
global step 650, epoch: 1, batch: 650, loss: 0.94670, accu: 0.52125, speed: 1.50 step/s
global step 660, epoch: 1, batch: 660, loss: 1.04749, accu: 0.53125, speed: 1.63 step/s
global step 670, epoch: 1, batch: 670, loss: 0.95771, accu: 0.53393, speed: 1.61 step/s
global step 680, epoch: 1, batch: 680, loss: 1.02432, accu: 0.53398, speed: 1.38 step/s
global step 690, epoch: 1, batch: 690, loss: 0.91321, accu: 0.54097, speed: 1.20 step/s
global step 700, epoch: 1, batch: 700, loss: 1.01674, accu: 0.54781, speed: 1.42 step/s
eval loss: 1.02873, accu: 0.50281
eval loss: 1.02723, accu: 0.51377
global step 710, epoch: 1, batch: 710, loss: 0.77218, accu: 0.60625, speed: 0.23 step/s
global step 720, epoch: 1, batch: 720, loss: 0.87504, accu: 0.62969, speed: 1.25 step/s
global step 730, epoch: 1, batch: 730, loss: 1.00399, accu: 0.62187, speed: 1.33 step/s
global step 740, epoch: 1, batch: 740, loss: 0.86978, accu: 0.63047, speed: 1.23 step/s
global step 750, epoch: 1, batch: 750, loss: 0.74207, accu: 0.63375, speed: 1.54 step/s
global step 760, epoch: 1, batch: 760, loss: 0.93283, accu: 0.63073, speed: 1.50 step/s
global step 770, epoch: 1, batch: 770, loss: 0.67613, accu: 0.64196, speed: 1.37 step/s
global step 780, epoch: 1, batch: 780, loss: 0.85630, accu: 0.63555, speed: 1.37 step/s
global step 790, epoch: 1, batch: 790, loss: 0.66040, accu: 0.63889, speed: 1.34 step/s
global step 800, epoch: 1, batch: 800, loss: 0.69336, accu: 0.64062, speed: 1.35 step/s
eval loss: 0.89645, accu: 0.60361
eval loss: 0.89363, accu: 0.61018
global step 810, epoch: 1, batch: 810, loss: 1.00067, accu: 0.67188, speed: 0.23 step/s
global step 820, epoch: 1, batch: 820, loss: 0.79295, accu: 0.66250, speed: 1.41 step/s
global step 830, epoch: 1, batch: 830, loss: 0.78580, accu: 0.66250, speed: 1.31 step/s
global step 840, epoch: 1, batch: 840, loss: 0.67515, accu: 0.66953, speed: 1.23 step/s
global step 850, epoch: 1, batch: 850, loss: 0.78591, accu: 0.66063, speed: 1.17 step/s
global step 860, epoch: 1, batch: 860, loss: 0.78989, accu: 0.65938, speed: 1.42 step/s
global step 870, epoch: 1, batch: 870, loss: 0.96540, accu: 0.66473, speed: 1.38 step/s
global step 880, epoch: 1, batch: 880, loss: 0.91342, accu: 0.66445, speed: 1.46 step/s
global step 890, epoch: 1, batch: 890, loss: 1.04807, accu: 0.66944, speed: 1.46 step/s
global step 900, epoch: 1, batch: 900, loss: 0.79948, accu: 0.67219, speed: 1.39 step/s
eval loss: 0.82297, accu: 0.65582
eval loss: 0.82702, accu: 0.64551
global step 910, epoch: 1, batch: 910, loss: 0.62711, accu: 0.69063, speed: 0.23 step/s
global step 920, epoch: 1, batch: 920, loss: 0.76998, accu: 0.67344, speed: 1.51 step/s
global step 930, epoch: 1, batch: 930, loss: 0.79566, accu: 0.68437, speed: 1.48 step/s
global step 940, epoch: 1, batch: 940, loss: 0.79468, accu: 0.68516, speed: 1.30 step/s
global step 950, epoch: 1, batch: 950, loss: 0.68966, accu: 0.69188, speed: 1.47 step/s
global step 960, epoch: 1, batch: 960, loss: 0.89984, accu: 0.69531, speed: 1.22 step/s
global step 970, epoch: 1, batch: 970, loss: 0.51888, accu: 0.69598, speed: 1.36 step/s
global step 980, epoch: 1, batch: 980, loss: 0.70546, accu: 0.69609, speed: 1.34 step/s
global step 990, epoch: 1, batch: 990, loss: 0.67239, accu: 0.69201, speed: 1.40 step/s
global step 1000, epoch: 1, batch: 1000, loss: 0.78272, accu: 0.68937, speed: 1.38 step/s
eval loss: 0.76524, accu: 0.67791
eval loss: 0.76881, accu: 0.66727
global step 1010, epoch: 1, batch: 1010, loss: 0.76976, accu: 0.71250, speed: 0.22 step/s
global step 1020, epoch: 1, batch: 1020, loss: 0.88769, accu: 0.69063, speed: 1.33 step/s
global step 1030, epoch: 1, batch: 1030, loss: 0.64597, accu: 0.70208, speed: 1.43 step/s
global step 1040, epoch: 1, batch: 1040, loss: 0.60114, accu: 0.70469, speed: 1.34 step/s
global step 1050, epoch: 1, batch: 1050, loss: 0.73541, accu: 0.70688, speed: 1.23 step/s
global step 1060, epoch: 1, batch: 1060, loss: 0.64679, accu: 0.71354, speed: 1.42 step/s
global step 1070, epoch: 1, batch: 1070, loss: 0.61133, accu: 0.71607, speed: 1.48 step/s
global step 1080, epoch: 1, batch: 1080, loss: 0.67303, accu: 0.71562, speed: 1.48 step/s
global step 1090, epoch: 1, batch: 1090, loss: 0.67402, accu: 0.71215, speed: 1.43 step/s
global step 1100, epoch: 1, batch: 1100, loss: 0.83447, accu: 0.71156, speed: 1.35 step/s
eval loss: 0.75705, accu: 0.66988
eval loss: 0.75860, accu: 0.66707
global step 1110, epoch: 1, batch: 1110, loss: 0.84944, accu: 0.74687, speed: 0.22 step/s
global step 1120, epoch: 1, batch: 1120, loss: 0.77252, accu: 0.71719, speed: 1.39 step/s
global step 1130, epoch: 1, batch: 1130, loss: 0.65281, accu: 0.71458, speed: 1.36 step/s
global step 1140, epoch: 1, batch: 1140, loss: 0.88268, accu: 0.70469, speed: 1.58 step/s
global step 1150, epoch: 1, batch: 1150, loss: 0.64647, accu: 0.70937, speed: 1.41 step/s
global step 1160, epoch: 1, batch: 1160, loss: 0.65900, accu: 0.70990, speed: 1.43 step/s
global step 1170, epoch: 1, batch: 1170, loss: 0.89572, accu: 0.71741, speed: 1.50 step/s
global step 1180, epoch: 1, batch: 1180, loss: 0.63291, accu: 0.71641, speed: 1.54 step/s
global step 1190, epoch: 1, batch: 1190, loss: 0.66181, accu: 0.72188, speed: 1.42 step/s
global step 1200, epoch: 1, batch: 1200, loss: 0.54719, accu: 0.72469, speed: 1.37 step/s
eval loss: 0.69512, accu: 0.70522
eval loss: 0.69350, accu: 0.70339
global step 1210, epoch: 1, batch: 1210, loss: 0.62684, accu: 0.73750, speed: 0.22 step/s
global step 1220, epoch: 1, batch: 1220, loss: 0.72632, accu: 0.71094, speed: 1.41 step/s
global step 1230, epoch: 1, batch: 1230, loss: 0.60307, accu: 0.71354, speed: 1.29 step/s
global step 1240, epoch: 1, batch: 1240, loss: 0.75204, accu: 0.70937, speed: 1.45 step/s
global step 1250, epoch: 1, batch: 1250, loss: 0.95195, accu: 0.71125, speed: 1.52 step/s
global step 1260, epoch: 1, batch: 1260, loss: 0.52446, accu: 0.71094, speed: 1.40 step/s
global step 1270, epoch: 1, batch: 1270, loss: 0.80726, accu: 0.70804, speed: 1.22 step/s
global step 1280, epoch: 1, batch: 1280, loss: 0.83975, accu: 0.70625, speed: 1.42 step/s
global step 1290, epoch: 1, batch: 1290, loss: 0.45627, accu: 0.71111, speed: 1.40 step/s
global step 1300, epoch: 1, batch: 1300, loss: 0.55392, accu: 0.71344, speed: 1.25 step/s
eval loss: 0.69536, accu: 0.70803
eval loss: 0.69502, accu: 0.70559
global step 1310, epoch: 1, batch: 1310, loss: 0.62238, accu: 0.69688, speed: 0.22 step/s
global step 1320, epoch: 1, batch: 1320, loss: 0.91776, accu: 0.70937, speed: 1.47 step/s
global step 1330, epoch: 1, batch: 1330, loss: 0.88990, accu: 0.71042, speed: 1.50 step/s
global step 1340, epoch: 1, batch: 1340, loss: 0.71551, accu: 0.72344, speed: 1.22 step/s
global step 1350, epoch: 1, batch: 1350, loss: 0.76740, accu: 0.72437, speed: 1.12 step/s
global step 1360, epoch: 1, batch: 1360, loss: 0.57732, accu: 0.72604, speed: 1.36 step/s
global step 1370, epoch: 1, batch: 1370, loss: 0.73060, accu: 0.72366, speed: 1.35 step/s
global step 1380, epoch: 1, batch: 1380, loss: 0.72608, accu: 0.72695, speed: 1.32 step/s
global step 1390, epoch: 1, batch: 1390, loss: 0.68720, accu: 0.72326, speed: 1.30 step/s
global step 1400, epoch: 1, batch: 1400, loss: 0.69454, accu: 0.71906, speed: 1.34 step/s
eval loss: 0.70807, accu: 0.68916
eval loss: 0.71539, accu: 0.68962
global step 1410, epoch: 1, batch: 1410, loss: 0.68726, accu: 0.70000, speed: 0.23 step/s
global step 1420, epoch: 1, batch: 1420, loss: 0.67999, accu: 0.70625, speed: 1.38 step/s
global step 1430, epoch: 1, batch: 1430, loss: 0.40443, accu: 0.72813, speed: 1.30 step/s
global step 1440, epoch: 1, batch: 1440, loss: 0.80389, accu: 0.72891, speed: 1.44 step/s
global step 1450, epoch: 1, batch: 1450, loss: 0.79473, accu: 0.73000, speed: 1.48 step/s
global step 1460, epoch: 1, batch: 1460, loss: 0.63667, accu: 0.73021, speed: 1.36 step/s
global step 1470, epoch: 1, batch: 1470, loss: 0.48847, accu: 0.72946, speed: 1.43 step/s
global step 1480, epoch: 1, batch: 1480, loss: 0.65317, accu: 0.72617, speed: 1.50 step/s
global step 1490, epoch: 1, batch: 1490, loss: 0.87097, accu: 0.71736, speed: 1.35 step/s
global step 1500, epoch: 1, batch: 1500, loss: 0.63862, accu: 0.71625, speed: 1.51 step/s
eval loss: 0.67964, accu: 0.72289
eval loss: 0.68833, accu: 0.71417
global step 1510, epoch: 1, batch: 1510, loss: 0.72937, accu: 0.73125, speed: 0.21 step/s
global step 1520, epoch: 1, batch: 1520, loss: 0.42832, accu: 0.73125, speed: 1.47 step/s
global step 1530, epoch: 1, batch: 1530, loss: 0.86435, accu: 0.72188, speed: 1.37 step/s
global step 1540, epoch: 1, batch: 1540, loss: 0.73680, accu: 0.72656, speed: 1.36 step/s
global step 1550, epoch: 1, batch: 1550, loss: 0.39240, accu: 0.72250, speed: 1.30 step/s
global step 1560, epoch: 1, batch: 1560, loss: 0.46045, accu: 0.71875, speed: 1.51 step/s
global step 1570, epoch: 1, batch: 1570, loss: 0.62907, accu: 0.71562, speed: 1.56 step/s
global step 1580, epoch: 1, batch: 1580, loss: 0.54271, accu: 0.71562, speed: 1.61 step/s
global step 1590, epoch: 1, batch: 1590, loss: 0.64951, accu: 0.71528, speed: 1.53 step/s
global step 1600, epoch: 1, batch: 1600, loss: 0.80722, accu: 0.71719, speed: 1.43 step/s
eval loss: 0.65871, accu: 0.73333
eval loss: 0.66325, accu: 0.71737
global step 1610, epoch: 1, batch: 1610, loss: 0.41683, accu: 0.78750, speed: 0.23 step/s
global step 1620, epoch: 1, batch: 1620, loss: 0.92630, accu: 0.75000, speed: 1.42 step/s
global step 1630, epoch: 1, batch: 1630, loss: 0.71765, accu: 0.75000, speed: 1.53 step/s
global step 1640, epoch: 1, batch: 1640, loss: 0.70108, accu: 0.74062, speed: 1.54 step/s
global step 1650, epoch: 1, batch: 1650, loss: 0.74963, accu: 0.74250, speed: 1.32 step/s
global step 1660, epoch: 1, batch: 1660, loss: 0.63454, accu: 0.73646, speed: 1.23 step/s
global step 1670, epoch: 1, batch: 1670, loss: 0.73615, accu: 0.72813, speed: 1.34 step/s
global step 1680, epoch: 1, batch: 1680, loss: 0.53732, accu: 0.72891, speed: 1.30 step/s
global step 1690, epoch: 1, batch: 1690, loss: 0.58567, accu: 0.73403, speed: 1.36 step/s
global step 1700, epoch: 1, batch: 1700, loss: 0.65009, accu: 0.73375, speed: 1.19 step/s
eval loss: 0.69135, accu: 0.72008
eval loss: 0.69404, accu: 0.70958
global step 1710, epoch: 1, batch: 1710, loss: 0.74477, accu: 0.74375, speed: 0.23 step/s
global step 1720, epoch: 1, batch: 1720, loss: 0.84217, accu: 0.74531, speed: 1.47 step/s
global step 1730, epoch: 1, batch: 1730, loss: 0.55750, accu: 0.75417, speed: 1.47 step/s
global step 1740, epoch: 1, batch: 1740, loss: 0.61614, accu: 0.75938, speed: 1.41 step/s
global step 1750, epoch: 1, batch: 1750, loss: 0.70674, accu: 0.75687, speed: 1.26 step/s
global step 1760, epoch: 1, batch: 1760, loss: 0.39629, accu: 0.75417, speed: 1.36 step/s
global step 1770, epoch: 1, batch: 1770, loss: 0.56009, accu: 0.75357, speed: 1.48 step/s
global step 1780, epoch: 1, batch: 1780, loss: 0.88191, accu: 0.75156, speed: 1.34 step/s
global step 1790, epoch: 1, batch: 1790, loss: 0.90446, accu: 0.74826, speed: 1.38 step/s
global step 1800, epoch: 1, batch: 1800, loss: 0.54602, accu: 0.75031, speed: 1.16 step/s
eval loss: 0.62166, accu: 0.74819
eval loss: 0.62228, accu: 0.74052
global step 1810, epoch: 1, batch: 1810, loss: 0.62781, accu: 0.71875, speed: 0.22 step/s
global step 1820, epoch: 1, batch: 1820, loss: 0.71395, accu: 0.72656, speed: 1.48 step/s
global step 1830, epoch: 1, batch: 1830, loss: 0.90535, accu: 0.73125, speed: 1.48 step/s
global step 1840, epoch: 1, batch: 1840, loss: 0.65806, accu: 0.73125, speed: 1.59 step/s
global step 1850, epoch: 1, batch: 1850, loss: 0.45598, accu: 0.73938, speed: 1.39 step/s
global step 1860, epoch: 1, batch: 1860, loss: 0.53504, accu: 0.73750, speed: 1.15 step/s
global step 1870, epoch: 1, batch: 1870, loss: 0.75516, accu: 0.72991, speed: 1.26 step/s
global step 1880, epoch: 1, batch: 1880, loss: 0.72585, accu: 0.73438, speed: 1.48 step/s
global step 1890, epoch: 1, batch: 1890, loss: 0.49745, accu: 0.73264, speed: 1.61 step/s
global step 1900, epoch: 1, batch: 1900, loss: 0.66796, accu: 0.73750, speed: 1.42 step/s
eval loss: 0.63124, accu: 0.74538
eval loss: 0.62682, accu: 0.73892
global step 1910, epoch: 1, batch: 1910, loss: 0.58380, accu: 0.74375, speed: 0.22 step/s
global step 1920, epoch: 1, batch: 1920, loss: 0.59554, accu: 0.75781, speed: 1.43 step/s
global step 1930, epoch: 1, batch: 1930, loss: 0.70244, accu: 0.75625, speed: 1.44 step/s
global step 1940, epoch: 1, batch: 1940, loss: 0.51621, accu: 0.75625, speed: 1.53 step/s
global step 1950, epoch: 1, batch: 1950, loss: 0.51477, accu: 0.75500, speed: 1.37 step/s
global step 1960, epoch: 1, batch: 1960, loss: 0.87731, accu: 0.75521, speed: 1.59 step/s
global step 1970, epoch: 1, batch: 1970, loss: 0.53372, accu: 0.75134, speed: 1.57 step/s
global step 1980, epoch: 1, batch: 1980, loss: 0.82666, accu: 0.75352, speed: 1.25 step/s
global step 1990, epoch: 1, batch: 1990, loss: 0.70996, accu: 0.75000, speed: 1.47 step/s
global step 2000, epoch: 1, batch: 2000, loss: 0.67931, accu: 0.74719, speed: 1.31 step/s
eval loss: 0.61300, accu: 0.76908
eval loss: 0.61011, accu: 0.74451
global step 2010, epoch: 1, batch: 2010, loss: 0.56447, accu: 0.72500, speed: 0.23 step/s
global step 2020, epoch: 1, batch: 2020, loss: 0.74016, accu: 0.71250, speed: 1.45 step/s
global step 2030, epoch: 1, batch: 2030, loss: 0.55931, accu: 0.72188, speed: 1.46 step/s
global step 2040, epoch: 1, batch: 2040, loss: 0.41831, accu: 0.72891, speed: 1.41 step/s
global step 2050, epoch: 1, batch: 2050, loss: 0.52365, accu: 0.72688, speed: 1.39 step/s
global step 2060, epoch: 1, batch: 2060, loss: 0.51341, accu: 0.73281, speed: 1.41 step/s
global step 2070, epoch: 1, batch: 2070, loss: 0.78886, accu: 0.73036, speed: 1.25 step/s
global step 2080, epoch: 1, batch: 2080, loss: 0.62814, accu: 0.73242, speed: 1.43 step/s
global step 2090, epoch: 1, batch: 2090, loss: 0.51113, accu: 0.72882, speed: 1.42 step/s
global step 2100, epoch: 1, batch: 2100, loss: 0.44741, accu: 0.73344, speed: 1.31 step/s
eval loss: 0.58294, accu: 0.77550
eval loss: 0.58325, accu: 0.76287
global step 2110, epoch: 1, batch: 2110, loss: 0.52975, accu: 0.76562, speed: 0.23 step/s
global step 2120, epoch: 1, batch: 2120, loss: 0.70574, accu: 0.75000, speed: 1.26 step/s
global step 2130, epoch: 1, batch: 2130, loss: 0.55078, accu: 0.75833, speed: 1.39 step/s
global step 2140, epoch: 1, batch: 2140, loss: 0.77501, accu: 0.74687, speed: 1.38 step/s
global step 2150, epoch: 1, batch: 2150, loss: 0.78090, accu: 0.74813, speed: 1.53 step/s
global step 2160, epoch: 1, batch: 2160, loss: 0.36416, accu: 0.75156, speed: 1.37 step/s
global step 2170, epoch: 1, batch: 2170, loss: 0.53391, accu: 0.75446, speed: 1.32 step/s
global step 2180, epoch: 1, batch: 2180, loss: 0.66036, accu: 0.75547, speed: 1.44 step/s
global step 2190, epoch: 1, batch: 2190, loss: 0.53146, accu: 0.75694, speed: 1.48 step/s
global step 2200, epoch: 1, batch: 2200, loss: 0.77384, accu: 0.75500, speed: 1.47 step/s
eval loss: 0.62510, accu: 0.73373
eval loss: 0.62315, accu: 0.73234
global step 2210, epoch: 1, batch: 2210, loss: 0.60881, accu: 0.73750, speed: 0.22 step/s
global step 2220, epoch: 1, batch: 2220, loss: 0.78095, accu: 0.71875, speed: 1.18 step/s
global step 2230, epoch: 1, batch: 2230, loss: 0.65106, accu: 0.73542, speed: 1.39 step/s
global step 2240, epoch: 1, batch: 2240, loss: 0.46534, accu: 0.73750, speed: 1.43 step/s
global step 2250, epoch: 1, batch: 2250, loss: 0.59856, accu: 0.73938, speed: 1.42 step/s
global step 2260, epoch: 1, batch: 2260, loss: 0.73848, accu: 0.73906, speed: 1.38 step/s
global step 2270, epoch: 1, batch: 2270, loss: 0.55510, accu: 0.73973, speed: 1.53 step/s
global step 2280, epoch: 1, batch: 2280, loss: 0.59557, accu: 0.74219, speed: 1.22 step/s
global step 2290, epoch: 1, batch: 2290, loss: 0.74745, accu: 0.74514, speed: 1.42 step/s
global step 2300, epoch: 1, batch: 2300, loss: 0.40103, accu: 0.74500, speed: 1.35 step/s
eval loss: 0.62197, accu: 0.75663
eval loss: 0.61512, accu: 0.75130
global step 2310, epoch: 1, batch: 2310, loss: 0.56758, accu: 0.74375, speed: 0.22 step/s
global step 2320, epoch: 1, batch: 2320, loss: 0.83843, accu: 0.73750, speed: 1.32 step/s
global step 2330, epoch: 1, batch: 2330, loss: 0.65889, accu: 0.74479, speed: 1.44 step/s
global step 2340, epoch: 1, batch: 2340, loss: 0.51842, accu: 0.75000, speed: 1.43 step/s
global step 2350, epoch: 1, batch: 2350, loss: 0.74740, accu: 0.75313, speed: 1.31 step/s
global step 2360, epoch: 1, batch: 2360, loss: 0.66130, accu: 0.75781, speed: 1.28 step/s
global step 2370, epoch: 1, batch: 2370, loss: 0.62878, accu: 0.75670, speed: 1.18 step/s
global step 2380, epoch: 1, batch: 2380, loss: 0.63356, accu: 0.75117, speed: 1.59 step/s
global step 2390, epoch: 1, batch: 2390, loss: 0.66178, accu: 0.75382, speed: 1.26 step/s
global step 2400, epoch: 1, batch: 2400, loss: 0.45839, accu: 0.75281, speed: 1.52 step/s
eval loss: 0.56737, accu: 0.77711
eval loss: 0.56801, accu: 0.77725
global step 2410, epoch: 1, batch: 2410, loss: 0.61094, accu: 0.73438, speed: 0.23 step/s
global step 2420, epoch: 1, batch: 2420, loss: 0.61760, accu: 0.72969, speed: 1.41 step/s
global step 2430, epoch: 1, batch: 2430, loss: 0.53419, accu: 0.74062, speed: 1.58 step/s
global step 2440, epoch: 1, batch: 2440, loss: 0.45917, accu: 0.73984, speed: 1.25 step/s
global step 2450, epoch: 1, batch: 2450, loss: 0.76828, accu: 0.74000, speed: 1.28 step/s
global step 2460, epoch: 1, batch: 2460, loss: 0.55927, accu: 0.74531, speed: 1.51 step/s
global step 2470, epoch: 1, batch: 2470, loss: 0.48401, accu: 0.75268, speed: 1.68 step/s
global step 2480, epoch: 1, batch: 2480, loss: 0.77665, accu: 0.75742, speed: 1.59 step/s
global step 2490, epoch: 1, batch: 2490, loss: 0.54847, accu: 0.75625, speed: 1.35 step/s
global step 2500, epoch: 1, batch: 2500, loss: 0.56466, accu: 0.75719, speed: 1.49 step/s
eval loss: 0.57903, accu: 0.76867
eval loss: 0.58314, accu: 0.76687
global step 2510, epoch: 1, batch: 2510, loss: 0.83989, accu: 0.76562, speed: 0.22 step/s
global step 2520, epoch: 1, batch: 2520, loss: 0.32823, accu: 0.76406, speed: 1.49 step/s
global step 2530, epoch: 1, batch: 2530, loss: 0.67307, accu: 0.76250, speed: 1.42 step/s
global step 2540, epoch: 1, batch: 2540, loss: 0.58046, accu: 0.76719, speed: 1.44 step/s
global step 2550, epoch: 1, batch: 2550, loss: 0.39509, accu: 0.76313, speed: 1.21 step/s
global step 2560, epoch: 1, batch: 2560, loss: 0.41375, accu: 0.76771, speed: 1.25 step/s
global step 2570, epoch: 1, batch: 2570, loss: 0.55248, accu: 0.77009, speed: 1.46 step/s
global step 2580, epoch: 1, batch: 2580, loss: 0.58758, accu: 0.76445, speed: 1.50 step/s
global step 2590, epoch: 1, batch: 2590, loss: 0.64397, accu: 0.76146, speed: 1.36 step/s
global step 2600, epoch: 1, batch: 2600, loss: 0.65019, accu: 0.76156, speed: 1.26 step/s
eval loss: 0.58263, accu: 0.77269
eval loss: 0.57824, accu: 0.76627
global step 2610, epoch: 1, batch: 2610, loss: 0.60261, accu: 0.74687, speed: 0.22 step/s
global step 2620, epoch: 1, batch: 2620, loss: 0.68283, accu: 0.73750, speed: 1.37 step/s
global step 2630, epoch: 1, batch: 2630, loss: 0.54313, accu: 0.75833, speed: 1.41 step/s
global step 2640, epoch: 1, batch: 2640, loss: 0.57436, accu: 0.74609, speed: 1.52 step/s
global step 2650, epoch: 1, batch: 2650, loss: 0.71620, accu: 0.74813, speed: 1.36 step/s
global step 2660, epoch: 1, batch: 2660, loss: 0.49277, accu: 0.75469, speed: 1.43 step/s
global step 2670, epoch: 1, batch: 2670, loss: 0.66495, accu: 0.75536, speed: 1.44 step/s
global step 2680, epoch: 1, batch: 2680, loss: 0.44510, accu: 0.75508, speed: 1.48 step/s
global step 2690, epoch: 1, batch: 2690, loss: 0.64216, accu: 0.75694, speed: 1.21 step/s
global step 2700, epoch: 1, batch: 2700, loss: 0.48646, accu: 0.75531, speed: 1.35 step/s
eval loss: 0.55912, accu: 0.77711
eval loss: 0.56455, accu: 0.77804
global step 2710, epoch: 1, batch: 2710, loss: 0.58029, accu: 0.76250, speed: 0.22 step/s
global step 2720, epoch: 1, batch: 2720, loss: 0.66071, accu: 0.76250, speed: 1.33 step/s
global step 2730, epoch: 1, batch: 2730, loss: 0.58205, accu: 0.75313, speed: 1.53 step/s
global step 2740, epoch: 1, batch: 2740, loss: 0.55360, accu: 0.75625, speed: 1.35 step/s
global step 2750, epoch: 1, batch: 2750, loss: 0.55186, accu: 0.74875, speed: 1.44 step/s
global step 2760, epoch: 1, batch: 2760, loss: 0.90224, accu: 0.74531, speed: 1.33 step/s
global step 2770, epoch: 1, batch: 2770, loss: 0.70096, accu: 0.74687, speed: 1.40 step/s
global step 2780, epoch: 1, batch: 2780, loss: 0.46447, accu: 0.74414, speed: 1.54 step/s
global step 2790, epoch: 1, batch: 2790, loss: 0.93602, accu: 0.74618, speed: 1.20 step/s
global step 2800, epoch: 1, batch: 2800, loss: 0.67449, accu: 0.74687, speed: 1.43 step/s
eval loss: 0.56068, accu: 0.77671
eval loss: 0.55946, accu: 0.77725
global step 2810, epoch: 1, batch: 2810, loss: 0.62505, accu: 0.76250, speed: 0.22 step/s
global step 2820, epoch: 1, batch: 2820, loss: 0.64258, accu: 0.76406, speed: 1.32 step/s
global step 2830, epoch: 1, batch: 2830, loss: 0.56067, accu: 0.75313, speed: 1.07 step/s
global step 2840, epoch: 1, batch: 2840, loss: 0.37780, accu: 0.76172, speed: 1.24 step/s
global step 2850, epoch: 1, batch: 2850, loss: 0.59337, accu: 0.76687, speed: 1.28 step/s
global step 2860, epoch: 1, batch: 2860, loss: 0.31042, accu: 0.76302, speed: 1.36 step/s
global step 2870, epoch: 1, batch: 2870, loss: 0.62872, accu: 0.76295, speed: 1.60 step/s
global step 2880, epoch: 1, batch: 2880, loss: 0.47021, accu: 0.76172, speed: 1.38 step/s
global step 2890, epoch: 1, batch: 2890, loss: 0.45872, accu: 0.76111, speed: 1.31 step/s
global step 2900, epoch: 1, batch: 2900, loss: 0.57430, accu: 0.75594, speed: 1.58 step/s
eval loss: 0.57711, accu: 0.75984
eval loss: 0.57865, accu: 0.76607
global step 2910, epoch: 1, batch: 2910, loss: 0.74698, accu: 0.78438, speed: 0.23 step/s
global step 2920, epoch: 1, batch: 2920, loss: 0.59629, accu: 0.79844, speed: 1.51 step/s
global step 2930, epoch: 1, batch: 2930, loss: 0.43808, accu: 0.77812, speed: 1.37 step/s
global step 2940, epoch: 1, batch: 2940, loss: 0.82839, accu: 0.77578, speed: 1.51 step/s
global step 2950, epoch: 1, batch: 2950, loss: 0.63075, accu: 0.77250, speed: 1.20 step/s
global step 2960, epoch: 1, batch: 2960, loss: 0.81318, accu: 0.76562, speed: 1.25 step/s
global step 2970, epoch: 1, batch: 2970, loss: 0.62353, accu: 0.76607, speed: 1.25 step/s
global step 2980, epoch: 1, batch: 2980, loss: 0.56522, accu: 0.75781, speed: 1.39 step/s
global step 2990, epoch: 1, batch: 2990, loss: 0.64261, accu: 0.75868, speed: 1.55 step/s
global step 3000, epoch: 1, batch: 3000, loss: 0.58861, accu: 0.75813, speed: 1.46 step/s
eval loss: 0.61460, accu: 0.74538
eval loss: 0.61603, accu: 0.74012
global step 3010, epoch: 1, batch: 3010, loss: 0.51227, accu: 0.75625, speed: 0.23 step/s
global step 3020, epoch: 1, batch: 3020, loss: 0.76739, accu: 0.75938, speed: 1.38 step/s
global step 3030, epoch: 1, batch: 3030, loss: 0.50490, accu: 0.76771, speed: 1.31 step/s
global step 3040, epoch: 1, batch: 3040, loss: 0.61490, accu: 0.76016, speed: 1.55 step/s
global step 3050, epoch: 1, batch: 3050, loss: 0.71796, accu: 0.75313, speed: 1.37 step/s
global step 3060, epoch: 1, batch: 3060, loss: 0.62751, accu: 0.75104, speed: 1.35 step/s
global step 3070, epoch: 1, batch: 3070, loss: 0.49878, accu: 0.75714, speed: 1.53 step/s
global step 3080, epoch: 1, batch: 3080, loss: 0.63425, accu: 0.75898, speed: 1.48 step/s
global step 3090, epoch: 1, batch: 3090, loss: 0.79640, accu: 0.76146, speed: 1.39 step/s
global step 3100, epoch: 1, batch: 3100, loss: 0.34132, accu: 0.76438, speed: 1.38 step/s
eval loss: 0.57155, accu: 0.76948
eval loss: 0.57475, accu: 0.77186
global step 3110, epoch: 1, batch: 3110, loss: 0.58648, accu: 0.78750, speed: 0.22 step/s
global step 3120, epoch: 1, batch: 3120, loss: 0.54495, accu: 0.77969, speed: 1.38 step/s
global step 3130, epoch: 1, batch: 3130, loss: 0.70693, accu: 0.76146, speed: 1.70 step/s
global step 3140, epoch: 1, batch: 3140, loss: 0.88232, accu: 0.75469, speed: 1.44 step/s
global step 3150, epoch: 1, batch: 3150, loss: 0.70680, accu: 0.74438, speed: 1.47 step/s
global step 3160, epoch: 1, batch: 3160, loss: 0.64448, accu: 0.73958, speed: 1.49 step/s
global step 3170, epoch: 1, batch: 3170, loss: 0.77147, accu: 0.74018, speed: 1.41 step/s
global step 3180, epoch: 1, batch: 3180, loss: 0.81749, accu: 0.74297, speed: 1.36 step/s
global step 3190, epoch: 1, batch: 3190, loss: 0.54138, accu: 0.74687, speed: 1.68 step/s
global step 3200, epoch: 1, batch: 3200, loss: 0.97783, accu: 0.74844, speed: 1.45 step/s
eval loss: 0.59766, accu: 0.76546
eval loss: 0.60141, accu: 0.75429
global step 3210, epoch: 1, batch: 3210, loss: 0.56208, accu: 0.76562, speed: 0.22 step/s
global step 3220, epoch: 1, batch: 3220, loss: 0.67379, accu: 0.75781, speed: 1.49 step/s
global step 3230, epoch: 1, batch: 3230, loss: 0.44517, accu: 0.76250, speed: 1.35 step/s
global step 3240, epoch: 1, batch: 3240, loss: 0.68212, accu: 0.76875, speed: 1.31 step/s
global step 3250, epoch: 1, batch: 3250, loss: 0.55999, accu: 0.76562, speed: 1.38 step/s
global step 3260, epoch: 1, batch: 3260, loss: 0.38217, accu: 0.76302, speed: 1.51 step/s
global step 3270, epoch: 1, batch: 3270, loss: 0.50724, accu: 0.75848, speed: 1.63 step/s
global step 3280, epoch: 1, batch: 3280, loss: 0.76884, accu: 0.75664, speed: 1.57 step/s
global step 3290, epoch: 1, batch: 3290, loss: 0.49573, accu: 0.76007, speed: 1.56 step/s
global step 3300, epoch: 1, batch: 3300, loss: 0.41119, accu: 0.76062, speed: 1.27 step/s
eval loss: 0.57892, accu: 0.76546
eval loss: 0.58600, accu: 0.75689
global step 3310, epoch: 1, batch: 3310, loss: 1.04214, accu: 0.76562, speed: 0.23 step/s
global step 3320, epoch: 1, batch: 3320, loss: 0.61604, accu: 0.78438, speed: 1.23 step/s
global step 3330, epoch: 1, batch: 3330, loss: 0.66517, accu: 0.76562, speed: 1.32 step/s
global step 3340, epoch: 1, batch: 3340, loss: 0.71997, accu: 0.75625, speed: 1.36 step/s
global step 3350, epoch: 1, batch: 3350, loss: 0.79614, accu: 0.74562, speed: 1.13 step/s
global step 3360, epoch: 1, batch: 3360, loss: 0.57947, accu: 0.74740, speed: 1.37 step/s
global step 3370, epoch: 1, batch: 3370, loss: 0.48515, accu: 0.74955, speed: 1.50 step/s
global step 3380, epoch: 1, batch: 3380, loss: 0.71071, accu: 0.74766, speed: 1.54 step/s
global step 3390, epoch: 1, batch: 3390, loss: 0.57605, accu: 0.74757, speed: 1.31 step/s
global step 3400, epoch: 1, batch: 3400, loss: 0.50465, accu: 0.74938, speed: 1.63 step/s
eval loss: 0.59506, accu: 0.76867
eval loss: 0.59436, accu: 0.76228
global step 3410, epoch: 1, batch: 3410, loss: 0.66473, accu: 0.78438, speed: 0.23 step/s
global step 3420, epoch: 1, batch: 3420, loss: 0.46671, accu: 0.76562, speed: 1.59 step/s
global step 3430, epoch: 1, batch: 3430, loss: 0.55217, accu: 0.77083, speed: 1.51 step/s
global step 3440, epoch: 1, batch: 3440, loss: 0.61373, accu: 0.77266, speed: 1.31 step/s
global step 3450, epoch: 1, batch: 3450, loss: 0.52614, accu: 0.76438, speed: 1.48 step/s
global step 3460, epoch: 1, batch: 3460, loss: 0.64574, accu: 0.76719, speed: 1.23 step/s
global step 3470, epoch: 1, batch: 3470, loss: 0.61786, accu: 0.76250, speed: 1.28 step/s
global step 3480, epoch: 1, batch: 3480, loss: 0.66831, accu: 0.76484, speed: 1.13 step/s
global step 3490, epoch: 1, batch: 3490, loss: 0.40890, accu: 0.76215, speed: 1.23 step/s
global step 3500, epoch: 1, batch: 3500, loss: 0.66647, accu: 0.76000, speed: 1.43 step/s
eval loss: 0.56621, accu: 0.77671
eval loss: 0.56476, accu: 0.77665
global step 3510, epoch: 1, batch: 3510, loss: 0.73741, accu: 0.71875, speed: 0.22 step/s
global step 3520, epoch: 1, batch: 3520, loss: 0.76745, accu: 0.72656, speed: 1.35 step/s
global step 3530, epoch: 1, batch: 3530, loss: 0.47708, accu: 0.74271, speed: 1.46 step/s
global step 3540, epoch: 1, batch: 3540, loss: 0.50658, accu: 0.74609, speed: 1.32 step/s
global step 3550, epoch: 1, batch: 3550, loss: 0.46316, accu: 0.75000, speed: 1.36 step/s
global step 3560, epoch: 1, batch: 3560, loss: 0.64057, accu: 0.74896, speed: 1.42 step/s
global step 3570, epoch: 1, batch: 3570, loss: 0.46351, accu: 0.74375, speed: 1.47 step/s
global step 3580, epoch: 1, batch: 3580, loss: 0.53492, accu: 0.74180, speed: 1.57 step/s
global step 3590, epoch: 1, batch: 3590, loss: 0.54071, accu: 0.74410, speed: 1.28 step/s
global step 3600, epoch: 1, batch: 3600, loss: 0.48528, accu: 0.74313, speed: 1.19 step/s
eval loss: 0.55427, accu: 0.78675
eval loss: 0.55256, accu: 0.77964
global step 3610, epoch: 1, batch: 3610, loss: 0.50552, accu: 0.80312, speed: 0.23 step/s
global step 3620, epoch: 1, batch: 3620, loss: 0.60497, accu: 0.77500, speed: 1.31 step/s
global step 3630, epoch: 1, batch: 3630, loss: 0.57389, accu: 0.76667, speed: 1.53 step/s
global step 3640, epoch: 1, batch: 3640, loss: 0.80673, accu: 0.76562, speed: 1.39 step/s
global step 3650, epoch: 1, batch: 3650, loss: 0.65663, accu: 0.76313, speed: 1.35 step/s
global step 3660, epoch: 1, batch: 3660, loss: 0.78934, accu: 0.75625, speed: 1.40 step/s
global step 3670, epoch: 1, batch: 3670, loss: 0.61689, accu: 0.75491, speed: 1.49 step/s
global step 3680, epoch: 1, batch: 3680, loss: 0.42302, accu: 0.75508, speed: 1.51 step/s
global step 3690, epoch: 1, batch: 3690, loss: 0.84522, accu: 0.75208, speed: 1.49 step/s
global step 3700, epoch: 1, batch: 3700, loss: 0.67774, accu: 0.75219, speed: 1.36 step/s
eval loss: 0.57872, accu: 0.77269
eval loss: 0.58638, accu: 0.77026
global step 3710, epoch: 1, batch: 3710, loss: 0.67076, accu: 0.76250, speed: 0.23 step/s
global step 3720, epoch: 1, batch: 3720, loss: 0.81531, accu: 0.78125, speed: 1.50 step/s
global step 3730, epoch: 1, batch: 3730, loss: 0.73125, accu: 0.76458, speed: 1.37 step/s
global step 3740, epoch: 1, batch: 3740, loss: 0.72811, accu: 0.76016, speed: 1.27 step/s
global step 3750, epoch: 1, batch: 3750, loss: 0.32958, accu: 0.75813, speed: 1.56 step/s
global step 3760, epoch: 1, batch: 3760, loss: 0.49373, accu: 0.75625, speed: 1.36 step/s
global step 3770, epoch: 1, batch: 3770, loss: 0.48209, accu: 0.75402, speed: 1.50 step/s
global step 3780, epoch: 1, batch: 3780, loss: 0.61103, accu: 0.75078, speed: 1.29 step/s
global step 3790, epoch: 1, batch: 3790, loss: 0.49618, accu: 0.75625, speed: 1.17 step/s
global step 3800, epoch: 1, batch: 3800, loss: 0.68718, accu: 0.75813, speed: 1.24 step/s
eval loss: 0.55754, accu: 0.78193
eval loss: 0.56938, accu: 0.78164
global step 3810, epoch: 1, batch: 3810, loss: 0.48994, accu: 0.76250, speed: 0.23 step/s
global step 3820, epoch: 1, batch: 3820, loss: 0.78233, accu: 0.74062, speed: 1.54 step/s
global step 3830, epoch: 1, batch: 3830, loss: 0.66828, accu: 0.75313, speed: 1.17 step/s
global step 3840, epoch: 1, batch: 3840, loss: 0.62108, accu: 0.75156, speed: 1.27 step/s
global step 3850, epoch: 1, batch: 3850, loss: 0.48433, accu: 0.75000, speed: 1.41 step/s
global step 3860, epoch: 1, batch: 3860, loss: 0.66746, accu: 0.75260, speed: 1.40 step/s
global step 3870, epoch: 1, batch: 3870, loss: 0.70173, accu: 0.75446, speed: 1.50 step/s
global step 3880, epoch: 1, batch: 3880, loss: 0.45177, accu: 0.75742, speed: 1.42 step/s
global step 3890, epoch: 1, batch: 3890, loss: 0.55708, accu: 0.76181, speed: 1.49 step/s
global step 3900, epoch: 1, batch: 3900, loss: 0.65360, accu: 0.76500, speed: 1.33 step/s
eval loss: 0.58563, accu: 0.75863
eval loss: 0.60156, accu: 0.75629
global step 3910, epoch: 1, batch: 3910, loss: 0.58348, accu: 0.76562, speed: 0.22 step/s
global step 3920, epoch: 1, batch: 3920, loss: 0.70342, accu: 0.74219, speed: 1.31 step/s
global step 3930, epoch: 1, batch: 3930, loss: 0.58638, accu: 0.72292, speed: 1.45 step/s
global step 3940, epoch: 1, batch: 3940, loss: 0.63208, accu: 0.73125, speed: 1.59 step/s
global step 3950, epoch: 1, batch: 3950, loss: 0.46083, accu: 0.74250, speed: 1.34 step/s
global step 3960, epoch: 1, batch: 3960, loss: 0.61584, accu: 0.75625, speed: 1.46 step/s
global step 3970, epoch: 1, batch: 3970, loss: 0.65245, accu: 0.75536, speed: 1.28 step/s
global step 3980, epoch: 1, batch: 3980, loss: 0.63735, accu: 0.75586, speed: 1.50 step/s
global step 3990, epoch: 1, batch: 3990, loss: 0.62930, accu: 0.75243, speed: 1.42 step/s
global step 4000, epoch: 1, batch: 4000, loss: 0.66088, accu: 0.75687, speed: 1.29 step/s
eval loss: 0.53662, accu: 0.78996
eval loss: 0.54803, accu: 0.79022
global step 4010, epoch: 1, batch: 4010, loss: 0.61966, accu: 0.77812, speed: 0.23 step/s
global step 4020, epoch: 1, batch: 4020, loss: 0.65362, accu: 0.77187, speed: 1.47 step/s
global step 4030, epoch: 1, batch: 4030, loss: 0.53003, accu: 0.76979, speed: 1.36 step/s
global step 4040, epoch: 1, batch: 4040, loss: 0.53730, accu: 0.75938, speed: 1.43 step/s
global step 4050, epoch: 1, batch: 4050, loss: 0.44748, accu: 0.75875, speed: 1.42 step/s
global step 4060, epoch: 1, batch: 4060, loss: 0.54022, accu: 0.76146, speed: 1.42 step/s
global step 4070, epoch: 1, batch: 4070, loss: 0.46774, accu: 0.75759, speed: 1.26 step/s
global step 4080, epoch: 1, batch: 4080, loss: 0.46201, accu: 0.75313, speed: 1.39 step/s
global step 4090, epoch: 1, batch: 4090, loss: 0.56662, accu: 0.75660, speed: 1.45 step/s
global step 4100, epoch: 1, batch: 4100, loss: 0.34250, accu: 0.75781, speed: 1.42 step/s
eval loss: 0.53927, accu: 0.78675
eval loss: 0.55090, accu: 0.78423
global step 4110, epoch: 1, batch: 4110, loss: 0.68731, accu: 0.75000, speed: 0.23 step/s
global step 4120, epoch: 1, batch: 4120, loss: 0.71587, accu: 0.74375, speed: 1.32 step/s
global step 4130, epoch: 1, batch: 4130, loss: 0.50741, accu: 0.73646, speed: 1.60 step/s
global step 4140, epoch: 1, batch: 4140, loss: 0.69717, accu: 0.74453, speed: 1.15 step/s
global step 4150, epoch: 1, batch: 4150, loss: 0.80336, accu: 0.74875, speed: 1.42 step/s
global step 4160, epoch: 1, batch: 4160, loss: 0.43028, accu: 0.75781, speed: 1.52 step/s
global step 4170, epoch: 1, batch: 4170, loss: 0.49267, accu: 0.76473, speed: 1.43 step/s
global step 4180, epoch: 1, batch: 4180, loss: 0.56655, accu: 0.76289, speed: 1.51 step/s
global step 4190, epoch: 1, batch: 4190, loss: 0.43997, accu: 0.76285, speed: 1.35 step/s
global step 4200, epoch: 1, batch: 4200, loss: 0.46915, accu: 0.76062, speed: 1.46 step/s
eval loss: 0.57899, accu: 0.76466
eval loss: 0.58985, accu: 0.76148
global step 4210, epoch: 1, batch: 4210, loss: 0.69708, accu: 0.73438, speed: 0.23 step/s
global step 4220, epoch: 1, batch: 4220, loss: 0.47916, accu: 0.74844, speed: 1.51 step/s
global step 4230, epoch: 1, batch: 4230, loss: 0.46213, accu: 0.75417, speed: 1.30 step/s
global step 4240, epoch: 1, batch: 4240, loss: 0.46420, accu: 0.74609, speed: 1.47 step/s
global step 4250, epoch: 1, batch: 4250, loss: 0.57369, accu: 0.75875, speed: 1.52 step/s
global step 4260, epoch: 1, batch: 4260, loss: 0.84525, accu: 0.76406, speed: 1.54 step/s
global step 4270, epoch: 1, batch: 4270, loss: 0.66980, accu: 0.76920, speed: 1.34 step/s
global step 4280, epoch: 1, batch: 4280, loss: 0.55550, accu: 0.76875, speed: 1.42 step/s
global step 4290, epoch: 1, batch: 4290, loss: 0.44228, accu: 0.76979, speed: 1.33 step/s
global step 4300, epoch: 1, batch: 4300, loss: 0.66676, accu: 0.76844, speed: 1.28 step/s
eval loss: 0.55405, accu: 0.77470
eval loss: 0.55968, accu: 0.77964
global step 4310, epoch: 1, batch: 4310, loss: 0.84031, accu: 0.69063, speed: 0.22 step/s
global step 4320, epoch: 1, batch: 4320, loss: 0.72569, accu: 0.73438, speed: 1.39 step/s
global step 4330, epoch: 1, batch: 4330, loss: 0.53863, accu: 0.74479, speed: 1.47 step/s
global step 4340, epoch: 1, batch: 4340, loss: 0.59577, accu: 0.75469, speed: 1.38 step/s
global step 4350, epoch: 1, batch: 4350, loss: 0.54295, accu: 0.74875, speed: 1.35 step/s
global step 4360, epoch: 1, batch: 4360, loss: 0.61946, accu: 0.74687, speed: 1.29 step/s
global step 4370, epoch: 1, batch: 4370, loss: 0.56640, accu: 0.75536, speed: 1.39 step/s
global step 4380, epoch: 1, batch: 4380, loss: 0.97652, accu: 0.75195, speed: 1.31 step/s
global step 4390, epoch: 1, batch: 4390, loss: 0.76873, accu: 0.75313, speed: 1.54 step/s
global step 4400, epoch: 1, batch: 4400, loss: 0.41041, accu: 0.75281, speed: 1.42 step/s
eval loss: 0.53840, accu: 0.79036
eval loss: 0.54586, accu: 0.78762
global step 4410, epoch: 1, batch: 4410, loss: 0.53790, accu: 0.77812, speed: 0.23 step/s
global step 4420, epoch: 1, batch: 4420, loss: 0.57745, accu: 0.76406, speed: 1.37 step/s
global step 4430, epoch: 1, batch: 4430, loss: 0.60251, accu: 0.76042, speed: 1.21 step/s
global step 4440, epoch: 1, batch: 4440, loss: 0.44503, accu: 0.75547, speed: 1.34 step/s
global step 4450, epoch: 1, batch: 4450, loss: 0.48476, accu: 0.75813, speed: 1.33 step/s
global step 4460, epoch: 1, batch: 4460, loss: 0.62884, accu: 0.75677, speed: 1.34 step/s
global step 4470, epoch: 1, batch: 4470, loss: 0.56576, accu: 0.76339, speed: 1.34 step/s
global step 4480, epoch: 1, batch: 4480, loss: 0.61129, accu: 0.76445, speed: 1.44 step/s
global step 4490, epoch: 1, batch: 4490, loss: 0.53414, accu: 0.76181, speed: 1.30 step/s
global step 4500, epoch: 1, batch: 4500, loss: 0.56762, accu: 0.76156, speed: 1.35 step/s
eval loss: 0.56148, accu: 0.78153
eval loss: 0.57224, accu: 0.77006
global step 4510, epoch: 1, batch: 4510, loss: 0.90874, accu: 0.73438, speed: 0.23 step/s
global step 4520, epoch: 1, batch: 4520, loss: 0.54894, accu: 0.74844, speed: 1.49 step/s
global step 4530, epoch: 1, batch: 4530, loss: 0.58839, accu: 0.74583, speed: 1.34 step/s
global step 4540, epoch: 1, batch: 4540, loss: 0.55403, accu: 0.75703, speed: 1.42 step/s
global step 4550, epoch: 1, batch: 4550, loss: 0.45914, accu: 0.75625, speed: 1.43 step/s
global step 4560, epoch: 1, batch: 4560, loss: 0.43382, accu: 0.76354, speed: 1.42 step/s
global step 4570, epoch: 1, batch: 4570, loss: 0.36407, accu: 0.76339, speed: 1.46 step/s
global step 4580, epoch: 1, batch: 4580, loss: 0.69715, accu: 0.76055, speed: 1.19 step/s
global step 4590, epoch: 1, batch: 4590, loss: 0.68686, accu: 0.76076, speed: 1.49 step/s
global step 4600, epoch: 1, batch: 4600, loss: 0.82279, accu: 0.75687, speed: 1.27 step/s
eval loss: 0.55806, accu: 0.77390
eval loss: 0.57170, accu: 0.77305
global step 4610, epoch: 1, batch: 4610, loss: 0.46163, accu: 0.79375, speed: 0.23 step/s
global step 4620, epoch: 1, batch: 4620, loss: 0.52933, accu: 0.79063, speed: 1.28 step/s
global step 4630, epoch: 1, batch: 4630, loss: 0.56656, accu: 0.78646, speed: 1.38 step/s
global step 4640, epoch: 1, batch: 4640, loss: 0.74132, accu: 0.78516, speed: 1.41 step/s
global step 4650, epoch: 1, batch: 4650, loss: 0.59668, accu: 0.77187, speed: 1.35 step/s
global step 4660, epoch: 1, batch: 4660, loss: 0.33732, accu: 0.76979, speed: 1.45 step/s
global step 4670, epoch: 1, batch: 4670, loss: 0.39673, accu: 0.76518, speed: 1.50 step/s
global step 4680, epoch: 1, batch: 4680, loss: 0.84216, accu: 0.76016, speed: 1.29 step/s
global step 4690, epoch: 1, batch: 4690, loss: 0.46547, accu: 0.76146, speed: 1.39 step/s
global step 4700, epoch: 1, batch: 4700, loss: 0.75955, accu: 0.76250, speed: 1.50 step/s
eval loss: 0.52069, accu: 0.79116
eval loss: 0.54192, accu: 0.79202
global step 4710, epoch: 1, batch: 4710, loss: 0.48795, accu: 0.80000, speed: 0.22 step/s
global step 4720, epoch: 1, batch: 4720, loss: 0.74069, accu: 0.77969, speed: 1.54 step/s
global step 4730, epoch: 1, batch: 4730, loss: 0.51675, accu: 0.78438, speed: 1.45 step/s
global step 4740, epoch: 1, batch: 4740, loss: 0.48009, accu: 0.77578, speed: 1.24 step/s
global step 4750, epoch: 1, batch: 4750, loss: 0.59309, accu: 0.76625, speed: 1.55 step/s
global step 4760, epoch: 1, batch: 4760, loss: 0.52762, accu: 0.76771, speed: 1.25 step/s
global step 4770, epoch: 1, batch: 4770, loss: 0.39570, accu: 0.76920, speed: 1.49 step/s
global step 4780, epoch: 1, batch: 4780, loss: 0.67574, accu: 0.76445, speed: 1.41 step/s
global step 4790, epoch: 1, batch: 4790, loss: 0.63920, accu: 0.76285, speed: 1.50 step/s
global step 4800, epoch: 1, batch: 4800, loss: 0.44595, accu: 0.76125, speed: 1.48 step/s
eval loss: 0.52725, accu: 0.79197
eval loss: 0.54582, accu: 0.78443
global step 4810, epoch: 1, batch: 4810, loss: 0.94366, accu: 0.75000, speed: 0.22 step/s
global step 4820, epoch: 1, batch: 4820, loss: 0.76879, accu: 0.73125, speed: 1.25 step/s
global step 4830, epoch: 1, batch: 4830, loss: 0.56927, accu: 0.75000, speed: 1.49 step/s
global step 4840, epoch: 1, batch: 4840, loss: 0.89193, accu: 0.75547, speed: 1.27 step/s
global step 4850, epoch: 1, batch: 4850, loss: 0.52804, accu: 0.76875, speed: 1.11 step/s
global step 4860, epoch: 1, batch: 4860, loss: 0.55160, accu: 0.77083, speed: 1.44 step/s
global step 4870, epoch: 1, batch: 4870, loss: 0.57298, accu: 0.76741, speed: 1.12 step/s
global step 4880, epoch: 1, batch: 4880, loss: 0.75913, accu: 0.76562, speed: 1.18 step/s
global step 4890, epoch: 1, batch: 4890, loss: 0.55554, accu: 0.76250, speed: 1.52 step/s
global step 4900, epoch: 1, batch: 4900, loss: 0.44549, accu: 0.75906, speed: 1.47 step/s
eval loss: 0.52795, accu: 0.78835
eval loss: 0.53458, accu: 0.79042
global step 4910, epoch: 1, batch: 4910, loss: 0.61598, accu: 0.79688, speed: 0.22 step/s
global step 4920, epoch: 1, batch: 4920, loss: 0.52949, accu: 0.77656, speed: 1.49 step/s
global step 4930, epoch: 1, batch: 4930, loss: 0.71844, accu: 0.77187, speed: 1.36 step/s
global step 4940, epoch: 1, batch: 4940, loss: 0.77614, accu: 0.76172, speed: 1.43 step/s
global step 4950, epoch: 1, batch: 4950, loss: 0.53925, accu: 0.75875, speed: 1.45 step/s
global step 4960, epoch: 1, batch: 4960, loss: 0.76066, accu: 0.75052, speed: 1.45 step/s
global step 4970, epoch: 1, batch: 4970, loss: 0.61408, accu: 0.75179, speed: 1.35 step/s
global step 4980, epoch: 1, batch: 4980, loss: 0.34986, accu: 0.75352, speed: 1.55 step/s
global step 4990, epoch: 1, batch: 4990, loss: 0.63921, accu: 0.75347, speed: 1.34 step/s
global step 5000, epoch: 1, batch: 5000, loss: 0.72044, accu: 0.75062, speed: 1.39 step/s
eval loss: 0.53888, accu: 0.78193
eval loss: 0.55626, accu: 0.78004
global step 5010, epoch: 1, batch: 5010, loss: 0.46175, accu: 0.74375, speed: 0.22 step/s
global step 5020, epoch: 1, batch: 5020, loss: 0.63139, accu: 0.74375, speed: 1.39 step/s
global step 5030, epoch: 1, batch: 5030, loss: 0.48877, accu: 0.75000, speed: 1.34 step/s
global step 5040, epoch: 1, batch: 5040, loss: 0.38734, accu: 0.74922, speed: 1.45 step/s
global step 5050, epoch: 1, batch: 5050, loss: 0.44535, accu: 0.75187, speed: 1.33 step/s
global step 5060, epoch: 1, batch: 5060, loss: 0.39971, accu: 0.75208, speed: 1.38 step/s
global step 5070, epoch: 1, batch: 5070, loss: 0.31449, accu: 0.75357, speed: 1.26 step/s
global step 5080, epoch: 1, batch: 5080, loss: 0.49995, accu: 0.75430, speed: 1.51 step/s
global step 5090, epoch: 1, batch: 5090, loss: 0.52665, accu: 0.75347, speed: 1.48 step/s
global step 5100, epoch: 1, batch: 5100, loss: 0.34733, accu: 0.75562, speed: 1.44 step/s
eval loss: 0.55251, accu: 0.78594
eval loss: 0.55433, accu: 0.78423
global step 5110, epoch: 1, batch: 5110, loss: 0.45387, accu: 0.78750, speed: 0.22 step/s
global step 5120, epoch: 1, batch: 5120, loss: 0.46386, accu: 0.76562, speed: 1.40 step/s
global step 5130, epoch: 1, batch: 5130, loss: 0.57614, accu: 0.75104, speed: 1.38 step/s
global step 5140, epoch: 1, batch: 5140, loss: 0.77822, accu: 0.74766, speed: 1.22 step/s
global step 5150, epoch: 1, batch: 5150, loss: 0.44133, accu: 0.75062, speed: 1.42 step/s
global step 5160, epoch: 1, batch: 5160, loss: 0.58051, accu: 0.75052, speed: 1.61 step/s
global step 5170, epoch: 1, batch: 5170, loss: 0.60983, accu: 0.74821, speed: 1.50 step/s
global step 5180, epoch: 1, batch: 5180, loss: 0.31847, accu: 0.75000, speed: 1.35 step/s
global step 5190, epoch: 1, batch: 5190, loss: 0.38619, accu: 0.75035, speed: 1.46 step/s
global step 5200, epoch: 1, batch: 5200, loss: 0.79125, accu: 0.74813, speed: 1.39 step/s
eval loss: 0.52966, accu: 0.79317
eval loss: 0.53010, accu: 0.78723
global step 5210, epoch: 1, batch: 5210, loss: 0.61588, accu: 0.76250, speed: 0.23 step/s
global step 5220, epoch: 1, batch: 5220, loss: 0.43425, accu: 0.74844, speed: 1.43 step/s
global step 5230, epoch: 1, batch: 5230, loss: 0.75051, accu: 0.74271, speed: 1.76 step/s
global step 5240, epoch: 1, batch: 5240, loss: 0.84163, accu: 0.74375, speed: 1.64 step/s
global step 5250, epoch: 1, batch: 5250, loss: 0.62490, accu: 0.73188, speed: 1.45 step/s
global step 5260, epoch: 1, batch: 5260, loss: 0.72173, accu: 0.73125, speed: 1.40 step/s
global step 5270, epoch: 1, batch: 5270, loss: 0.35297, accu: 0.73571, speed: 1.31 step/s
global step 5280, epoch: 1, batch: 5280, loss: 0.50264, accu: 0.74453, speed: 1.45 step/s
global step 5290, epoch: 1, batch: 5290, loss: 0.51758, accu: 0.74931, speed: 1.49 step/s
global step 5300, epoch: 1, batch: 5300, loss: 0.35833, accu: 0.75062, speed: 1.52 step/s
eval loss: 0.59459, accu: 0.75141
eval loss: 0.59677, accu: 0.75210
global step 5310, epoch: 1, batch: 5310, loss: 0.58186, accu: 0.77187, speed: 0.22 step/s
global step 5320, epoch: 1, batch: 5320, loss: 0.42788, accu: 0.77969, speed: 1.52 step/s
global step 5330, epoch: 1, batch: 5330, loss: 0.50635, accu: 0.76562, speed: 1.33 step/s
global step 5340, epoch: 1, batch: 5340, loss: 0.50541, accu: 0.76016, speed: 1.55 step/s
global step 5350, epoch: 1, batch: 5350, loss: 0.47167, accu: 0.75375, speed: 1.44 step/s
global step 5360, epoch: 1, batch: 5360, loss: 0.60664, accu: 0.75833, speed: 1.46 step/s
global step 5370, epoch: 1, batch: 5370, loss: 0.61775, accu: 0.75893, speed: 1.19 step/s
global step 5380, epoch: 1, batch: 5380, loss: 0.58382, accu: 0.76250, speed: 1.55 step/s
global step 5390, epoch: 1, batch: 5390, loss: 0.49942, accu: 0.76111, speed: 1.48 step/s
global step 5400, epoch: 1, batch: 5400, loss: 0.63909, accu: 0.76344, speed: 1.43 step/s
eval loss: 0.60798, accu: 0.74538
eval loss: 0.60690, accu: 0.74950
global step 5410, epoch: 1, batch: 5410, loss: 0.63792, accu: 0.75000, speed: 0.22 step/s
global step 5420, epoch: 1, batch: 5420, loss: 0.84449, accu: 0.74687, speed: 1.55 step/s
global step 5430, epoch: 1, batch: 5430, loss: 0.85157, accu: 0.74479, speed: 1.18 step/s
global step 5440, epoch: 1, batch: 5440, loss: 0.75105, accu: 0.75156, speed: 1.30 step/s
global step 5450, epoch: 1, batch: 5450, loss: 0.52756, accu: 0.75062, speed: 1.45 step/s
global step 5460, epoch: 1, batch: 5460, loss: 0.77242, accu: 0.75208, speed: 1.20 step/s
global step 5470, epoch: 1, batch: 5470, loss: 0.39225, accu: 0.75179, speed: 1.29 step/s
global step 5480, epoch: 1, batch: 5480, loss: 0.62889, accu: 0.75586, speed: 1.37 step/s
global step 5490, epoch: 1, batch: 5490, loss: 0.74864, accu: 0.75486, speed: 1.53 step/s
global step 5500, epoch: 1, batch: 5500, loss: 0.43532, accu: 0.75594, speed: 1.36 step/s
eval loss: 0.61034, accu: 0.74257
eval loss: 0.61946, accu: 0.73952
global step 5510, epoch: 1, batch: 5510, loss: 0.69623, accu: 0.73750, speed: 0.22 step/s
global step 5520, epoch: 1, batch: 5520, loss: 0.59455, accu: 0.75000, speed: 1.20 step/s
global step 5530, epoch: 1, batch: 5530, loss: 0.56226, accu: 0.76042, speed: 1.46 step/s
global step 5540, epoch: 1, batch: 5540, loss: 0.49130, accu: 0.75547, speed: 1.39 step/s
global step 5550, epoch: 1, batch: 5550, loss: 0.41015, accu: 0.75687, speed: 1.33 step/s
global step 5560, epoch: 1, batch: 5560, loss: 0.53559, accu: 0.75625, speed: 1.39 step/s
global step 5570, epoch: 1, batch: 5570, loss: 0.71649, accu: 0.75446, speed: 1.33 step/s
global step 5580, epoch: 1, batch: 5580, loss: 0.56185, accu: 0.75625, speed: 1.34 step/s
global step 5590, epoch: 1, batch: 5590, loss: 0.51748, accu: 0.75903, speed: 1.37 step/s
global step 5600, epoch: 1, batch: 5600, loss: 0.53928, accu: 0.75531, speed: 1.28 step/s
eval loss: 0.55997, accu: 0.78112
eval loss: 0.57614, accu: 0.77066
global step 5610, epoch: 1, batch: 5610, loss: 0.79724, accu: 0.75938, speed: 0.23 step/s
global step 5620, epoch: 1, batch: 5620, loss: 0.60009, accu: 0.76250, speed: 1.34 step/s
global step 5630, epoch: 1, batch: 5630, loss: 0.40403, accu: 0.76250, speed: 1.40 step/s
global step 5640, epoch: 1, batch: 5640, loss: 0.30945, accu: 0.76484, speed: 1.32 step/s
global step 5650, epoch: 1, batch: 5650, loss: 0.40899, accu: 0.76250, speed: 1.28 step/s
global step 5660, epoch: 1, batch: 5660, loss: 0.50596, accu: 0.75885, speed: 1.39 step/s
global step 5670, epoch: 1, batch: 5670, loss: 0.57326, accu: 0.76116, speed: 1.23 step/s
global step 5680, epoch: 1, batch: 5680, loss: 0.58268, accu: 0.75703, speed: 1.39 step/s
global step 5690, epoch: 1, batch: 5690, loss: 0.61443, accu: 0.75729, speed: 1.51 step/s
global step 5700, epoch: 1, batch: 5700, loss: 0.68637, accu: 0.76094, speed: 1.47 step/s
eval loss: 0.53329, accu: 0.79598
eval loss: 0.55425, accu: 0.78463
global step 5710, epoch: 1, batch: 5710, loss: 0.79362, accu: 0.75625, speed: 0.23 step/s
global step 5720, epoch: 1, batch: 5720, loss: 0.59028, accu: 0.75469, speed: 1.29 step/s
global step 5730, epoch: 1, batch: 5730, loss: 0.60505, accu: 0.74687, speed: 1.36 step/s
global step 5740, epoch: 1, batch: 5740, loss: 0.70179, accu: 0.74844, speed: 1.31 step/s
global step 5750, epoch: 1, batch: 5750, loss: 0.38547, accu: 0.75125, speed: 1.55 step/s
global step 5760, epoch: 1, batch: 5760, loss: 0.70996, accu: 0.75417, speed: 1.52 step/s
global step 5770, epoch: 1, batch: 5770, loss: 0.52844, accu: 0.75982, speed: 1.54 step/s
global step 5780, epoch: 1, batch: 5780, loss: 0.71025, accu: 0.76367, speed: 1.49 step/s
global step 5790, epoch: 1, batch: 5790, loss: 0.53323, accu: 0.76215, speed: 1.66 step/s
global step 5800, epoch: 1, batch: 5800, loss: 0.46207, accu: 0.76187, speed: 1.44 step/s
eval loss: 0.56820, accu: 0.76747
eval loss: 0.57032, accu: 0.77625
global step 5810, epoch: 1, batch: 5810, loss: 0.31635, accu: 0.77500, speed: 0.23 step/s
global step 5820, epoch: 1, batch: 5820, loss: 0.75470, accu: 0.75938, speed: 1.27 step/s
global step 5830, epoch: 1, batch: 5830, loss: 0.44446, accu: 0.77396, speed: 1.49 step/s
global step 5840, epoch: 1, batch: 5840, loss: 0.47781, accu: 0.77578, speed: 1.50 step/s
global step 5850, epoch: 1, batch: 5850, loss: 0.56611, accu: 0.77563, speed: 1.33 step/s
global step 5860, epoch: 1, batch: 5860, loss: 0.58244, accu: 0.77344, speed: 1.35 step/s
global step 5870, epoch: 1, batch: 5870, loss: 0.50111, accu: 0.77768, speed: 1.45 step/s
global step 5880, epoch: 1, batch: 5880, loss: 0.51921, accu: 0.77734, speed: 1.40 step/s
global step 5890, epoch: 1, batch: 5890, loss: 0.41145, accu: 0.77708, speed: 1.50 step/s
global step 5900, epoch: 1, batch: 5900, loss: 0.44478, accu: 0.77625, speed: 1.29 step/s
eval loss: 0.59590, accu: 0.76305
eval loss: 0.59052, accu: 0.76647
global step 5910, epoch: 1, batch: 5910, loss: 0.50739, accu: 0.78750, speed: 0.22 step/s
global step 5920, epoch: 1, batch: 5920, loss: 0.30415, accu: 0.78906, speed: 1.31 step/s
global step 5930, epoch: 1, batch: 5930, loss: 0.59762, accu: 0.77812, speed: 1.31 step/s
global step 5940, epoch: 1, batch: 5940, loss: 0.66362, accu: 0.76953, speed: 1.37 step/s
global step 5950, epoch: 1, batch: 5950, loss: 0.69537, accu: 0.75750, speed: 1.45 step/s
global step 5960, epoch: 1, batch: 5960, loss: 0.56540, accu: 0.75365, speed: 1.48 step/s
global step 5970, epoch: 1, batch: 5970, loss: 0.33771, accu: 0.76161, speed: 1.40 step/s
global step 5980, epoch: 1, batch: 5980, loss: 0.55583, accu: 0.75703, speed: 1.40 step/s
global step 5990, epoch: 1, batch: 5990, loss: 0.62422, accu: 0.75903, speed: 1.25 step/s
global step 6000, epoch: 1, batch: 6000, loss: 0.53865, accu: 0.76156, speed: 1.40 step/s
eval loss: 0.55215, accu: 0.78715
eval loss: 0.55900, accu: 0.78004
global step 6010, epoch: 1, batch: 6010, loss: 0.60368, accu: 0.75938, speed: 0.22 step/s
global step 6020, epoch: 1, batch: 6020, loss: 0.69785, accu: 0.75313, speed: 1.44 step/s
global step 6030, epoch: 1, batch: 6030, loss: 0.62777, accu: 0.75417, speed: 1.41 step/s
global step 6040, epoch: 1, batch: 6040, loss: 0.45563, accu: 0.75391, speed: 1.36 step/s
global step 6050, epoch: 1, batch: 6050, loss: 0.72919, accu: 0.75000, speed: 1.46 step/s
global step 6060, epoch: 1, batch: 6060, loss: 0.69503, accu: 0.75156, speed: 1.31 step/s
global step 6070, epoch: 1, batch: 6070, loss: 0.37082, accu: 0.75223, speed: 1.37 step/s
global step 6080, epoch: 1, batch: 6080, loss: 0.58207, accu: 0.76016, speed: 1.50 step/s
global step 6090, epoch: 1, batch: 6090, loss: 0.84523, accu: 0.76250, speed: 1.48 step/s
global step 6100, epoch: 1, batch: 6100, loss: 0.67082, accu: 0.76156, speed: 1.40 step/s
eval loss: 0.54068, accu: 0.79478
eval loss: 0.53405, accu: 0.78782
global step 6110, epoch: 1, batch: 6110, loss: 0.75067, accu: 0.71875, speed: 0.22 step/s
global step 6120, epoch: 1, batch: 6120, loss: 0.71369, accu: 0.74219, speed: 1.44 step/s
global step 6130, epoch: 1, batch: 6130, loss: 0.58847, accu: 0.73646, speed: 1.38 step/s
global step 6140, epoch: 1, batch: 6140, loss: 0.80117, accu: 0.74219, speed: 1.36 step/s
global step 6150, epoch: 1, batch: 6150, loss: 0.53465, accu: 0.74500, speed: 1.22 step/s
global step 6160, epoch: 1, batch: 6160, loss: 0.51231, accu: 0.74323, speed: 1.13 step/s
global step 6170, epoch: 1, batch: 6170, loss: 0.50284, accu: 0.74464, speed: 1.42 step/s
global step 6180, epoch: 1, batch: 6180, loss: 0.41433, accu: 0.74727, speed: 1.60 step/s
global step 6190, epoch: 1, batch: 6190, loss: 0.71705, accu: 0.74861, speed: 1.50 step/s
global step 6200, epoch: 1, batch: 6200, loss: 0.51148, accu: 0.75187, speed: 1.52 step/s
eval loss: 0.52941, accu: 0.78876
eval loss: 0.54022, accu: 0.78902
global step 6210, epoch: 1, batch: 6210, loss: 1.05040, accu: 0.75313, speed: 0.23 step/s
global step 6220, epoch: 1, batch: 6220, loss: 0.63483, accu: 0.73594, speed: 1.38 step/s
global step 6230, epoch: 1, batch: 6230, loss: 0.48655, accu: 0.74375, speed: 1.29 step/s
global step 6240, epoch: 1, batch: 6240, loss: 0.82070, accu: 0.74922, speed: 1.30 step/s
global step 6250, epoch: 1, batch: 6250, loss: 0.50715, accu: 0.75562, speed: 1.13 step/s
global step 6260, epoch: 1, batch: 6260, loss: 0.77954, accu: 0.75938, speed: 1.35 step/s
global step 6270, epoch: 1, batch: 6270, loss: 0.70664, accu: 0.76607, speed: 1.41 step/s
global step 6280, epoch: 1, batch: 6280, loss: 0.47785, accu: 0.76953, speed: 1.46 step/s
global step 6290, epoch: 1, batch: 6290, loss: 0.53345, accu: 0.77222, speed: 1.32 step/s
global step 6300, epoch: 1, batch: 6300, loss: 0.27010, accu: 0.77312, speed: 1.48 step/s
eval loss: 0.57880, accu: 0.76466
eval loss: 0.58649, accu: 0.76407
global step 6310, epoch: 1, batch: 6310, loss: 0.64868, accu: 0.75625, speed: 0.22 step/s
global step 6320, epoch: 1, batch: 6320, loss: 0.55280, accu: 0.75781, speed: 1.36 step/s
global step 6330, epoch: 1, batch: 6330, loss: 0.48979, accu: 0.77708, speed: 1.44 step/s
global step 6340, epoch: 1, batch: 6340, loss: 0.45246, accu: 0.77812, speed: 1.39 step/s
global step 6350, epoch: 1, batch: 6350, loss: 0.73725, accu: 0.77312, speed: 1.28 step/s
global step 6360, epoch: 1, batch: 6360, loss: 0.54833, accu: 0.77448, speed: 1.53 step/s
global step 6370, epoch: 1, batch: 6370, loss: 0.68111, accu: 0.77143, speed: 1.39 step/s
global step 6380, epoch: 1, batch: 6380, loss: 0.52762, accu: 0.76797, speed: 1.36 step/s
global step 6390, epoch: 1, batch: 6390, loss: 0.44422, accu: 0.76632, speed: 1.45 step/s
global step 6400, epoch: 1, batch: 6400, loss: 0.47829, accu: 0.76469, speed: 1.44 step/s
eval loss: 0.57387, accu: 0.76988
eval loss: 0.56820, accu: 0.76766
global step 6410, epoch: 1, batch: 6410, loss: 0.51753, accu: 0.71875, speed: 0.23 step/s
global step 6420, epoch: 1, batch: 6420, loss: 0.78615, accu: 0.72500, speed: 1.47 step/s
global step 6430, epoch: 1, batch: 6430, loss: 0.50558, accu: 0.73958, speed: 1.58 step/s
global step 6440, epoch: 1, batch: 6440, loss: 0.75340, accu: 0.73828, speed: 1.59 step/s
global step 6450, epoch: 1, batch: 6450, loss: 0.41007, accu: 0.73875, speed: 1.37 step/s
global step 6460, epoch: 1, batch: 6460, loss: 0.55632, accu: 0.73854, speed: 1.34 step/s
global step 6470, epoch: 1, batch: 6470, loss: 0.77018, accu: 0.74420, speed: 1.31 step/s
global step 6480, epoch: 1, batch: 6480, loss: 0.57809, accu: 0.74570, speed: 1.34 step/s
global step 6490, epoch: 1, batch: 6490, loss: 0.69135, accu: 0.74792, speed: 1.37 step/s
global step 6500, epoch: 1, batch: 6500, loss: 0.61173, accu: 0.74656, speed: 1.34 step/s
eval loss: 0.55312, accu: 0.77269
eval loss: 0.55464, accu: 0.77246
global step 6510, epoch: 1, batch: 6510, loss: 0.69130, accu: 0.76250, speed: 0.23 step/s
global step 6520, epoch: 1, batch: 6520, loss: 0.39913, accu: 0.78125, speed: 1.51 step/s
global step 6530, epoch: 1, batch: 6530, loss: 0.28267, accu: 0.78021, speed: 1.27 step/s
global step 6540, epoch: 1, batch: 6540, loss: 0.42121, accu: 0.78281, speed: 1.50 step/s
global step 6550, epoch: 1, batch: 6550, loss: 0.55617, accu: 0.78250, speed: 1.41 step/s
global step 6560, epoch: 1, batch: 6560, loss: 0.72933, accu: 0.77552, speed: 1.53 step/s
global step 6570, epoch: 1, batch: 6570, loss: 0.51397, accu: 0.77500, speed: 1.53 step/s
global step 6580, epoch: 1, batch: 6580, loss: 0.43440, accu: 0.77227, speed: 1.37 step/s
global step 6590, epoch: 1, batch: 6590, loss: 0.65762, accu: 0.77083, speed: 1.41 step/s
global step 6600, epoch: 1, batch: 6600, loss: 0.53157, accu: 0.77000, speed: 1.17 step/s
eval loss: 0.52717, accu: 0.79237
eval loss: 0.53888, accu: 0.78902
global step 6610, epoch: 1, batch: 6610, loss: 0.37700, accu: 0.77812, speed: 0.23 step/s
global step 6620, epoch: 1, batch: 6620, loss: 0.41108, accu: 0.78125, speed: 1.60 step/s
global step 6630, epoch: 1, batch: 6630, loss: 0.47078, accu: 0.78333, speed: 1.27 step/s
global step 6640, epoch: 1, batch: 6640, loss: 0.58523, accu: 0.77266, speed: 1.54 step/s
global step 6650, epoch: 1, batch: 6650, loss: 0.61326, accu: 0.77000, speed: 1.22 step/s
global step 6660, epoch: 1, batch: 6660, loss: 0.70219, accu: 0.76302, speed: 1.40 step/s
global step 6670, epoch: 1, batch: 6670, loss: 0.72012, accu: 0.75759, speed: 1.38 step/s
global step 6680, epoch: 1, batch: 6680, loss: 0.64028, accu: 0.76055, speed: 1.35 step/s
global step 6690, epoch: 1, batch: 6690, loss: 0.70822, accu: 0.75764, speed: 1.24 step/s
global step 6700, epoch: 1, batch: 6700, loss: 0.57899, accu: 0.75594, speed: 1.30 step/s
eval loss: 0.56933, accu: 0.77269
eval loss: 0.58041, accu: 0.76786
global step 6710, epoch: 1, batch: 6710, loss: 0.42713, accu: 0.78125, speed: 0.22 step/s
global step 6720, epoch: 1, batch: 6720, loss: 0.76250, accu: 0.76562, speed: 1.34 step/s
global step 6730, epoch: 1, batch: 6730, loss: 0.74472, accu: 0.75208, speed: 1.18 step/s
global step 6740, epoch: 1, batch: 6740, loss: 0.57563, accu: 0.75625, speed: 1.30 step/s
global step 6750, epoch: 1, batch: 6750, loss: 0.51634, accu: 0.75813, speed: 1.27 step/s
global step 6760, epoch: 1, batch: 6760, loss: 1.00990, accu: 0.75625, speed: 1.42 step/s
global step 6770, epoch: 1, batch: 6770, loss: 0.66407, accu: 0.75357, speed: 1.38 step/s
global step 6780, epoch: 1, batch: 6780, loss: 0.49438, accu: 0.75469, speed: 1.47 step/s
global step 6790, epoch: 1, batch: 6790, loss: 0.79393, accu: 0.75625, speed: 1.44 step/s
global step 6800, epoch: 1, batch: 6800, loss: 0.53555, accu: 0.75531, speed: 1.25 step/s
eval loss: 0.53671, accu: 0.78554
eval loss: 0.54321, accu: 0.78942
global step 6810, epoch: 1, batch: 6810, loss: 0.47950, accu: 0.80000, speed: 0.23 step/s
global step 6820, epoch: 1, batch: 6820, loss: 0.84350, accu: 0.78594, speed: 1.26 step/s
global step 6830, epoch: 1, batch: 6830, loss: 0.86043, accu: 0.77604, speed: 1.27 step/s
global step 6840, epoch: 1, batch: 6840, loss: 0.73434, accu: 0.77812, speed: 1.37 step/s
global step 6850, epoch: 1, batch: 6850, loss: 0.73660, accu: 0.77438, speed: 1.43 step/s
global step 6860, epoch: 1, batch: 6860, loss: 0.83860, accu: 0.76667, speed: 1.22 step/s
global step 6870, epoch: 1, batch: 6870, loss: 0.46546, accu: 0.76473, speed: 1.45 step/s
global step 6880, epoch: 1, batch: 6880, loss: 0.58219, accu: 0.76094, speed: 1.45 step/s
global step 6890, epoch: 1, batch: 6890, loss: 0.75063, accu: 0.75938, speed: 1.27 step/s
global step 6900, epoch: 1, batch: 6900, loss: 0.64019, accu: 0.75906, speed: 1.33 step/s
eval loss: 0.52776, accu: 0.79839
eval loss: 0.53880, accu: 0.79481
global step 6910, epoch: 1, batch: 6910, loss: 0.63116, accu: 0.76562, speed: 0.22 step/s
global step 6920, epoch: 1, batch: 6920, loss: 0.40563, accu: 0.77187, speed: 1.57 step/s
global step 6930, epoch: 1, batch: 6930, loss: 0.57205, accu: 0.77917, speed: 1.32 step/s
global step 6940, epoch: 1, batch: 6940, loss: 0.54958, accu: 0.77500, speed: 1.19 step/s
global step 6950, epoch: 1, batch: 6950, loss: 0.52398, accu: 0.77125, speed: 1.35 step/s
global step 6960, epoch: 1, batch: 6960, loss: 0.62277, accu: 0.77656, speed: 1.40 step/s
global step 6970, epoch: 1, batch: 6970, loss: 0.51309, accu: 0.77723, speed: 1.40 step/s
global step 6980, epoch: 1, batch: 6980, loss: 0.61452, accu: 0.77812, speed: 1.56 step/s
global step 6990, epoch: 1, batch: 6990, loss: 0.63430, accu: 0.77500, speed: 1.44 step/s
global step 7000, epoch: 1, batch: 7000, loss: 0.53646, accu: 0.77750, speed: 1.50 step/s
eval loss: 0.60061, accu: 0.74859
eval loss: 0.60547, accu: 0.75150
global step 7010, epoch: 1, batch: 7010, loss: 0.40012, accu: 0.74687, speed: 0.23 step/s
global step 7020, epoch: 1, batch: 7020, loss: 0.68144, accu: 0.74375, speed: 1.31 step/s
global step 7030, epoch: 1, batch: 7030, loss: 0.33793, accu: 0.74896, speed: 1.39 step/s
global step 7040, epoch: 1, batch: 7040, loss: 0.45783, accu: 0.75313, speed: 1.41 step/s
global step 7050, epoch: 1, batch: 7050, loss: 0.69710, accu: 0.75750, speed: 1.29 step/s
global step 7060, epoch: 1, batch: 7060, loss: 0.44781, accu: 0.76250, speed: 1.45 step/s
global step 7070, epoch: 1, batch: 7070, loss: 0.54228, accu: 0.76339, speed: 1.34 step/s
global step 7080, epoch: 1, batch: 7080, loss: 0.68983, accu: 0.76797, speed: 1.28 step/s
global step 7090, epoch: 1, batch: 7090, loss: 0.43057, accu: 0.76875, speed: 1.28 step/s
global step 7100, epoch: 1, batch: 7100, loss: 0.65081, accu: 0.77156, speed: 1.34 step/s
eval loss: 0.53402, accu: 0.79317
eval loss: 0.53260, accu: 0.79621
global step 7110, epoch: 1, batch: 7110, loss: 0.47501, accu: 0.75625, speed: 0.23 step/s
global step 7120, epoch: 1, batch: 7120, loss: 0.75244, accu: 0.75000, speed: 1.49 step/s
global step 7130, epoch: 1, batch: 7130, loss: 0.47086, accu: 0.75938, speed: 1.50 step/s
global step 7140, epoch: 1, batch: 7140, loss: 0.29185, accu: 0.76484, speed: 1.47 step/s
global step 7150, epoch: 1, batch: 7150, loss: 0.84425, accu: 0.76062, speed: 1.23 step/s
global step 7160, epoch: 1, batch: 7160, loss: 0.58487, accu: 0.76562, speed: 1.25 step/s
global step 7170, epoch: 1, batch: 7170, loss: 0.71750, accu: 0.76920, speed: 1.40 step/s
global step 7180, epoch: 1, batch: 7180, loss: 0.62787, accu: 0.76875, speed: 1.31 step/s
global step 7190, epoch: 1, batch: 7190, loss: 0.52742, accu: 0.77014, speed: 1.46 step/s
global step 7200, epoch: 1, batch: 7200, loss: 0.43397, accu: 0.77063, speed: 1.32 step/s
eval loss: 0.52746, accu: 0.80080
eval loss: 0.52697, accu: 0.79641
global step 7210, epoch: 1, batch: 7210, loss: 0.49203, accu: 0.77812, speed: 0.22 step/s
global step 7220, epoch: 1, batch: 7220, loss: 0.52796, accu: 0.77812, speed: 1.37 step/s
global step 7230, epoch: 1, batch: 7230, loss: 0.62150, accu: 0.77187, speed: 1.24 step/s
global step 7240, epoch: 1, batch: 7240, loss: 0.52662, accu: 0.76719, speed: 1.37 step/s
global step 7250, epoch: 1, batch: 7250, loss: 0.50873, accu: 0.76687, speed: 1.39 step/s
global step 7260, epoch: 1, batch: 7260, loss: 0.41263, accu: 0.76927, speed: 1.21 step/s
global step 7270, epoch: 1, batch: 7270, loss: 0.74734, accu: 0.76786, speed: 1.37 step/s
global step 7280, epoch: 1, batch: 7280, loss: 0.54417, accu: 0.76719, speed: 1.10 step/s
global step 7290, epoch: 1, batch: 7290, loss: 0.42926, accu: 0.76840, speed: 1.37 step/s
global step 7300, epoch: 1, batch: 7300, loss: 0.59496, accu: 0.77094, speed: 1.55 step/s
eval loss: 0.52407, accu: 0.79558
eval loss: 0.52764, accu: 0.79860
global step 7310, epoch: 1, batch: 7310, loss: 0.77541, accu: 0.76562, speed: 0.23 step/s
global step 7320, epoch: 1, batch: 7320, loss: 0.34769, accu: 0.76875, speed: 1.56 step/s
global step 7330, epoch: 1, batch: 7330, loss: 0.80828, accu: 0.76562, speed: 1.33 step/s
global step 7340, epoch: 1, batch: 7340, loss: 0.61773, accu: 0.76250, speed: 1.39 step/s
global step 7350, epoch: 1, batch: 7350, loss: 0.69431, accu: 0.76313, speed: 1.39 step/s
global step 7360, epoch: 1, batch: 7360, loss: 0.51777, accu: 0.76615, speed: 1.49 step/s
global step 7370, epoch: 1, batch: 7370, loss: 0.48138, accu: 0.76875, speed: 1.52 step/s
global step 7380, epoch: 1, batch: 7380, loss: 0.45830, accu: 0.77031, speed: 1.58 step/s
global step 7390, epoch: 1, batch: 7390, loss: 0.65023, accu: 0.77049, speed: 1.42 step/s
global step 7400, epoch: 1, batch: 7400, loss: 0.54347, accu: 0.77156, speed: 1.29 step/s
eval loss: 0.60324, accu: 0.74900
eval loss: 0.60635, accu: 0.75010
global step 7410, epoch: 1, batch: 7410, loss: 0.27904, accu: 0.77812, speed: 0.22 step/s
global step 7420, epoch: 1, batch: 7420, loss: 0.42006, accu: 0.76562, speed: 1.32 step/s
global step 7430, epoch: 1, batch: 7430, loss: 0.61436, accu: 0.76354, speed: 1.26 step/s
global step 7440, epoch: 1, batch: 7440, loss: 0.65813, accu: 0.76562, speed: 1.47 step/s
global step 7450, epoch: 1, batch: 7450, loss: 0.77260, accu: 0.76313, speed: 1.46 step/s
global step 7460, epoch: 1, batch: 7460, loss: 0.74033, accu: 0.75885, speed: 1.46 step/s
global step 7470, epoch: 1, batch: 7470, loss: 0.75364, accu: 0.75848, speed: 1.53 step/s
global step 7480, epoch: 1, batch: 7480, loss: 0.43592, accu: 0.75938, speed: 1.22 step/s
global step 7490, epoch: 1, batch: 7490, loss: 0.78206, accu: 0.76007, speed: 1.38 step/s
global step 7500, epoch: 1, batch: 7500, loss: 0.60156, accu: 0.76125, speed: 1.27 step/s
eval loss: 0.53627, accu: 0.79036
eval loss: 0.54806, accu: 0.77864
global step 7510, epoch: 1, batch: 7510, loss: 0.67224, accu: 0.75625, speed: 0.23 step/s
global step 7520, epoch: 1, batch: 7520, loss: 0.46406, accu: 0.78125, speed: 1.48 step/s
global step 7530, epoch: 1, batch: 7530, loss: 0.46796, accu: 0.78021, speed: 1.22 step/s
global step 7540, epoch: 1, batch: 7540, loss: 0.65508, accu: 0.77187, speed: 1.48 step/s
global step 7550, epoch: 1, batch: 7550, loss: 0.40250, accu: 0.78250, speed: 1.23 step/s
global step 7560, epoch: 1, batch: 7560, loss: 0.45507, accu: 0.78177, speed: 1.30 step/s
global step 7570, epoch: 1, batch: 7570, loss: 0.81296, accu: 0.78036, speed: 1.36 step/s
global step 7580, epoch: 1, batch: 7580, loss: 0.64029, accu: 0.78242, speed: 1.38 step/s
global step 7590, epoch: 1, batch: 7590, loss: 0.38475, accu: 0.78194, speed: 1.31 step/s
global step 7600, epoch: 1, batch: 7600, loss: 0.60091, accu: 0.78563, speed: 1.63 step/s
eval loss: 0.56575, accu: 0.77631
eval loss: 0.57738, accu: 0.77725
global step 7610, epoch: 1, batch: 7610, loss: 0.52714, accu: 0.77187, speed: 0.22 step/s
global step 7620, epoch: 1, batch: 7620, loss: 0.64544, accu: 0.77812, speed: 1.56 step/s
global step 7630, epoch: 1, batch: 7630, loss: 0.90627, accu: 0.78021, speed: 1.53 step/s
global step 7640, epoch: 1, batch: 7640, loss: 0.76887, accu: 0.78047, speed: 1.33 step/s
global step 7650, epoch: 1, batch: 7650, loss: 0.75769, accu: 0.77812, speed: 1.52 step/s
global step 7660, epoch: 1, batch: 7660, loss: 0.51555, accu: 0.76302, speed: 1.44 step/s
global step 7670, epoch: 1, batch: 7670, loss: 0.38784, accu: 0.76607, speed: 1.35 step/s
global step 7680, epoch: 1, batch: 7680, loss: 0.48341, accu: 0.76914, speed: 1.41 step/s
global step 7690, epoch: 1, batch: 7690, loss: 0.51258, accu: 0.76979, speed: 1.62 step/s
global step 7700, epoch: 1, batch: 7700, loss: 0.39164, accu: 0.76875, speed: 1.49 step/s
eval loss: 0.50711, accu: 0.79759
eval loss: 0.52169, accu: 0.79082
global step 7710, epoch: 1, batch: 7710, loss: 0.45022, accu: 0.75625, speed: 0.23 step/s
global step 7720, epoch: 1, batch: 7720, loss: 0.47992, accu: 0.77969, speed: 1.50 step/s
global step 7730, epoch: 1, batch: 7730, loss: 0.38505, accu: 0.76771, speed: 1.34 step/s
global step 7740, epoch: 1, batch: 7740, loss: 0.37452, accu: 0.77422, speed: 1.62 step/s
global step 7750, epoch: 1, batch: 7750, loss: 0.53042, accu: 0.76938, speed: 1.39 step/s
global step 7760, epoch: 1, batch: 7760, loss: 0.57104, accu: 0.77083, speed: 1.59 step/s
global step 7770, epoch: 1, batch: 7770, loss: 0.53219, accu: 0.76875, speed: 1.33 step/s
global step 7780, epoch: 1, batch: 7780, loss: 0.57351, accu: 0.76953, speed: 1.37 step/s
global step 7790, epoch: 1, batch: 7790, loss: 0.35338, accu: 0.76701, speed: 1.31 step/s
global step 7800, epoch: 1, batch: 7800, loss: 0.87537, accu: 0.76406, speed: 1.27 step/s
eval loss: 0.52973, accu: 0.78594
eval loss: 0.54351, accu: 0.78303
global step 7810, epoch: 1, batch: 7810, loss: 0.62023, accu: 0.79063, speed: 0.22 step/s
global step 7820, epoch: 1, batch: 7820, loss: 0.47470, accu: 0.80469, speed: 1.45 step/s
global step 7830, epoch: 1, batch: 7830, loss: 0.82839, accu: 0.77187, speed: 1.28 step/s
global step 7840, epoch: 1, batch: 7840, loss: 0.51306, accu: 0.76797, speed: 1.34 step/s
global step 7850, epoch: 1, batch: 7850, loss: 0.63193, accu: 0.77000, speed: 1.33 step/s
global step 7860, epoch: 1, batch: 7860, loss: 0.52696, accu: 0.77292, speed: 1.43 step/s
global step 7870, epoch: 1, batch: 7870, loss: 0.57542, accu: 0.77366, speed: 1.40 step/s
global step 7880, epoch: 1, batch: 7880, loss: 0.58745, accu: 0.77383, speed: 1.64 step/s
global step 7890, epoch: 1, batch: 7890, loss: 0.37323, accu: 0.77500, speed: 1.29 step/s
global step 7900, epoch: 1, batch: 7900, loss: 0.53835, accu: 0.77469, speed: 1.27 step/s
eval loss: 0.51426, accu: 0.79880
eval loss: 0.53399, accu: 0.79222
global step 7910, epoch: 1, batch: 7910, loss: 0.58248, accu: 0.79375, speed: 0.22 step/s
global step 7920, epoch: 1, batch: 7920, loss: 0.60932, accu: 0.78438, speed: 1.41 step/s
global step 7930, epoch: 1, batch: 7930, loss: 0.61977, accu: 0.77500, speed: 1.42 step/s
global step 7940, epoch: 1, batch: 7940, loss: 0.61118, accu: 0.78125, speed: 1.61 step/s
global step 7950, epoch: 1, batch: 7950, loss: 0.25541, accu: 0.78438, speed: 1.45 step/s
global step 7960, epoch: 1, batch: 7960, loss: 0.41645, accu: 0.78281, speed: 1.45 step/s
global step 7970, epoch: 1, batch: 7970, loss: 0.74350, accu: 0.78036, speed: 1.40 step/s
global step 7980, epoch: 1, batch: 7980, loss: 0.57337, accu: 0.77734, speed: 1.35 step/s
global step 7990, epoch: 1, batch: 7990, loss: 0.53350, accu: 0.78056, speed: 1.36 step/s
global step 8000, epoch: 1, batch: 8000, loss: 0.49239, accu: 0.78469, speed: 1.53 step/s
eval loss: 0.54085, accu: 0.79558
eval loss: 0.56510, accu: 0.78563
global step 8010, epoch: 1, batch: 8010, loss: 0.71016, accu: 0.74375, speed: 0.22 step/s
global step 8020, epoch: 1, batch: 8020, loss: 0.53881, accu: 0.73750, speed: 1.51 step/s
global step 8030, epoch: 1, batch: 8030, loss: 0.69013, accu: 0.74583, speed: 1.37 step/s
global step 8040, epoch: 1, batch: 8040, loss: 0.51034, accu: 0.75469, speed: 1.35 step/s
global step 8050, epoch: 1, batch: 8050, loss: 0.56046, accu: 0.75500, speed: 1.30 step/s
global step 8060, epoch: 1, batch: 8060, loss: 0.74340, accu: 0.76198, speed: 1.49 step/s
global step 8070, epoch: 1, batch: 8070, loss: 0.47915, accu: 0.76384, speed: 1.36 step/s
global step 8080, epoch: 1, batch: 8080, loss: 0.76557, accu: 0.76914, speed: 1.29 step/s
global step 8090, epoch: 1, batch: 8090, loss: 0.36809, accu: 0.76840, speed: 1.60 step/s
global step 8100, epoch: 1, batch: 8100, loss: 0.65918, accu: 0.77406, speed: 1.35 step/s
eval loss: 0.53563, accu: 0.78474
eval loss: 0.54344, accu: 0.78204
global step 8110, epoch: 1, batch: 8110, loss: 0.68084, accu: 0.76250, speed: 0.22 step/s
global step 8120, epoch: 1, batch: 8120, loss: 0.34103, accu: 0.76875, speed: 1.25 step/s
global step 8130, epoch: 1, batch: 8130, loss: 0.52833, accu: 0.77812, speed: 1.40 step/s
global step 8140, epoch: 1, batch: 8140, loss: 0.62292, accu: 0.77266, speed: 1.02 step/s
global step 8150, epoch: 1, batch: 8150, loss: 0.55615, accu: 0.76313, speed: 1.39 step/s
global step 8160, epoch: 1, batch: 8160, loss: 0.66920, accu: 0.75781, speed: 1.41 step/s
global step 8170, epoch: 1, batch: 8170, loss: 0.77905, accu: 0.75580, speed: 1.37 step/s
global step 8180, epoch: 1, batch: 8180, loss: 0.52485, accu: 0.75625, speed: 1.57 step/s
global step 8190, epoch: 1, batch: 8190, loss: 0.49885, accu: 0.76111, speed: 1.28 step/s
global step 8200, epoch: 1, batch: 8200, loss: 0.53773, accu: 0.76281, speed: 1.36 step/s
eval loss: 0.62899, accu: 0.75462
eval loss: 0.62092, accu: 0.75808
global step 8210, epoch: 1, batch: 8210, loss: 0.42499, accu: 0.76875, speed: 0.22 step/s
global step 8220, epoch: 1, batch: 8220, loss: 0.63117, accu: 0.75000, speed: 1.25 step/s
global step 8230, epoch: 1, batch: 8230, loss: 0.84638, accu: 0.74583, speed: 1.27 step/s
global step 8240, epoch: 1, batch: 8240, loss: 0.67587, accu: 0.76406, speed: 1.31 step/s
global step 8250, epoch: 1, batch: 8250, loss: 0.30246, accu: 0.77063, speed: 1.52 step/s
global step 8260, epoch: 1, batch: 8260, loss: 0.30824, accu: 0.77344, speed: 1.52 step/s
global step 8270, epoch: 1, batch: 8270, loss: 0.36208, accu: 0.78080, speed: 1.47 step/s
global step 8280, epoch: 1, batch: 8280, loss: 0.32258, accu: 0.77891, speed: 1.25 step/s
global step 8290, epoch: 1, batch: 8290, loss: 0.56274, accu: 0.78090, speed: 1.55 step/s
global step 8300, epoch: 1, batch: 8300, loss: 0.45254, accu: 0.77938, speed: 1.36 step/s
eval loss: 0.53478, accu: 0.77871
eval loss: 0.54478, accu: 0.77285
global step 8310, epoch: 1, batch: 8310, loss: 0.51946, accu: 0.78750, speed: 0.22 step/s
global step 8320, epoch: 1, batch: 8320, loss: 0.78112, accu: 0.78594, speed: 1.31 step/s