From 386b544c8f69696342d9358b270642b55ef764d3 Mon Sep 17 00:00:00 2001 From: Cosmo Bobak <56003038+cosmobobak@users.noreply.github.com> Date: Sat, 12 Oct 2024 11:23:49 +0100 Subject: [PATCH] New network, ID `voyager`. (#205) * smol Bench: 5209506 * meow Bench: 5154092 * permutation * cargo fmt Bench: 5154092 * update nethist Bench: 5154092 --- networkhistory.txt | 102 ++++++++++++++++++++++++------------- src/nnue/network.rs | 73 +++++++++++++++++++++----- src/nnue/network/layers.rs | 2 +- 3 files changed, 129 insertions(+), 48 deletions(-) diff --git a/networkhistory.txt b/networkhistory.txt index 85c4dea0..2dc72d1e 100644 --- a/networkhistory.txt +++ b/networkhistory.txt @@ -182,19 +182,19 @@ | | vs viri56. ------------|------------------------------------------|----------------------------------------------- hugenet | viri58 with 1024 neurons. | ELO | 20.20 +- 9.42 (95%) - | | SPRT | 8.0+0.08s Threads=1 Hash=16MB + 59 | | SPRT | 8.0+0.08s Threads=1 Hash=16MB | | LLR | 2.97 (-2.94, 2.94) [0.00, 5.00] | | GAMES | N: 2600 W: 722 L: 571 D: 1307 | | vs viri58. ------------|------------------------------------------|----------------------------------------------- luminary | hugenet with 768 neurons, trained with | ELO | 1.12 +- 3.82 (95%) - | ~110M extra viri58 FENs, and ~270M extra | SPRT | 8.0+0.08s Threads=1 Hash=16MB + 60 | ~110M extra viri58 FENs, and ~270M extra | SPRT | 8.0+0.08s Threads=1 Hash=16MB | hugenet FENs. | LLR | 0.01 (-2.94, 2.94) [0.00, 3.00] | | GAMES | N: 15488 W: 3808 L: 3758 D: 7922 | | vs hugenet. (did not merge) ------------|------------------------------------------|----------------------------------------------- gemstone | luminary with the dataset pruned down to | STC: - | 963M positions from only non-bugged | ELO | 6.14 +- 3.85 (95%) + 61 | 963M positions from only non-bugged | ELO | 6.14 +- 3.85 (95%) | datagen runs. | SPRT | 8.0+0.08s Threads=1 Hash=16MB | no lichess, first net with DFRC data. | LLR | 2.95 (-2.94, 2.94) [0.00, 3.00] | | GAMES | N: 15896 W: 4181 L: 3900 D: 7815 @@ -211,7 +211,7 @@ | | vs hugenet. ------------|------------------------------------------|----------------------------------------------- gemini | gemstone with ~250M extra DFRC FENs | STC: - | generated with gemstone. | ELO | -5.11 +- 4.22 (95%) + 62 | generated with gemstone. | ELO | -5.11 +- 4.22 (95%) | | SPRT | 8.0+0.08s Threads=1 Hash=16MB | | LLR | -3.01 (-2.94, 2.94) [0.00, 3.00] | | GAMES | N: 12912 W: 3111 L: 3301 D: 6500 @@ -223,26 +223,26 @@ | | vs gemstone. ------------|------------------------------------------|----------------------------------------------- grimoire | gemini with 444M extra classical chess | STC: - | FENs. | ELO | -0.37 +- 2.90 (95%) + 63 | FENs. | ELO | -0.37 +- 2.90 (95%) | | SPRT | 8.0+0.08s Threads=1 Hash=16MB | | LLR | -1.51 (-2.94, 2.94) [0.00, 3.00] | | GAMES | N: 27224 W: 6717 L: 6746 D: 13761 | | vs gemstone. ------------|------------------------------------------|----------------------------------------------- echelon | Proof-of-concept HalfKA network. Uses a | est. -80 Elo vs gemstone. - | small 64x2 feature transformer, CReLU | + 64 | small 64x2 feature transformer, CReLU | | activation, batch size 16384, and 35 | | epochs. Trained on the gemini dataset to | | maximise variety of king positioning. | ------------|------------------------------------------|----------------------------------------------- excalibur | echelon with 384x2 feature transformer, | ELO | -4.6 +- 4.9 (95%) - | and using SCReLU activation. | SPRT | 8.0+0.08s Threads=1 Hash=16MB + 65 | and using SCReLU activation. | SPRT | 8.0+0.08s Threads=1 Hash=16MB | | LLR | -2.95 (-2.94, 2.94) [0.00, 3.00] | | GAMES | N: 10414 W: 2719 L: 2857 D: 4838 | | vs gemstone. ------------|------------------------------------------|----------------------------------------------- zhudun | excalibur with 768x2 feature transformer | STC: - | with 134M extra FENs generated using | ELO | -0.49 +- 2.23 (95%) + 66 | with 134M extra FENs generated using | ELO | -0.49 +- 2.23 (95%) | excalibur. (using excalibur over | SPRT | 8.0+0.08s Threads=1 Hash=16MB | gemstone for the extra FENs because | LLR | -2.97 (-2.94, 2.94) [0.00, 3.00] | i want to get selfplay to reveal any | GAMES | N: 48992 W: 12875 L: 12944 D: 23173 @@ -254,14 +254,14 @@ | | vs gemstone. ------------|------------------------------------------|----------------------------------------------- arcanum | zhudun retrained with exponentially | STC: - | decreasing LR (10 epochs) | ELO | -15.84 +- 7.13 (95%) + 67 | decreasing LR (10 epochs) | ELO | -15.84 +- 7.13 (95%) | | SPRT | 8.0+0.08s Threads=1 Hash=16MB | | LLR | -2.96 (-2.94, 2.94) [0.00, 3.00] | | GAMES | N: 4720 W: 1113 L: 1328 D: 2279 | | vs zhudun ------------|------------------------------------------|----------------------------------------------- astralite | gemstone-arch, with 386M extra zhudun | v2 STC: - | FENs. 20 epochs with lr 0.001, wdl 0.3, | ELO | 5.32 +- 3.55 (95%) + 68 | FENs. 20 epochs with lr 0.001, wdl 0.3, | ELO | 5.32 +- 3.55 (95%) | then three different continuations: | SPRT | 8.0+0.08s Threads=1 Hash=16MB | v1: 5 epochs lr 0.0001, wdl 0.3 | LLR | 2.95 (-2.94, 2.94) [0.00, 3.00] | v2: 5 epochs lr 0.0001, wdl 0.4 | GAMES | N: 19464 W: 5304 L: 5006 D: 9154 @@ -286,42 +286,42 @@ | | vs astralite-v2. ------------|------------------------------------------|----------------------------------------------- alchemist | astralite but trained with wdl 0.4 the | STC: - | whole time. | ELO | -0.06 +- 1.87 (95%) + 69 | whole time. | ELO | -0.06 +- 1.87 (95%) | | SPRT | 8.0+0.08s Threads=1 Hash=16MB | | LLR | -2.95 (-2.94, 2.94) [0.00, 3.00] | | GAMES | N: 66792 W: 16861 L: 16872 D: 33059 | | vs astralite-v2. ------------|------------------------------------------|----------------------------------------------- majesty | HalfKA + 768nn + 0.4 WDL + 15 epochs, | STC: - | lr-drop 10, dataset of only 766M FRC | ELO | -106.16 +- 19.05 (95%) + 70 | lr-drop 10, dataset of only 766M FRC | ELO | -106.16 +- 19.05 (95%) | positions generated using zhudun. | SPRT | 8.0+0.08s Threads=1 Hash=16MB | | LLR | -2.96 (-2.94, 2.94) [0.00, 3.00] | Either dataset was too small or might | GAMES | N: 776 W: 109 L: 339 D: 328 | have been shuffled wrong. | vs zhudun. ------------|------------------------------------------|----------------------------------------------- xi | alchemist using marlinflow's cuda boards | STC: - | | ELO | 0.00 +- 0.00 (95%) + 71 | | ELO | 0.00 +- 0.00 (95%) | | SPRT | 8.0+0.08s Threads=1 Hash=16MB | | LLR | 0.00 (-2.94, 2.94) [0.00, 3.00] | makes me think that it's broken. | GAMES | N: 112 W: 0 L: 112 D: 0 | | vs astralite-v2. ------------|------------------------------------------|----------------------------------------------- marlineater | alchemist with WDL 1.0. | STC: - | | ELO | -35.43 +- 10.52 (95%) + 72 | | ELO | -35.43 +- 10.52 (95%) | | SPRT | 8.0+0.08s Threads=1 Hash=16MB | | LLR | -2.98 (-2.94, 2.94) [0.00, 3.00] | | GAMES | N: 2224 W: 474 L: 700 D: 1050 | | vs astralite-v2. ------------|------------------------------------------|----------------------------------------------- neutron | another uninspired alchemist variation | STC: - | | ELO | -1.17 +- 2.63 (95%) + 73 | | ELO | -1.17 +- 2.63 (95%) | | SPRT | 8.0+0.08s Threads=1 Hash=16MB | | LLR | -2.95 (-2.94, 2.94) [0.00, 3.00] | | GAMES | N: 34544 W: 8869 L: 8985 D: 16690 | | vs astralite-v2. ------------|------------------------------------------|----------------------------------------------- piledriver | alchemist with a PSQT subnet! | - test 1 was -196 elo - | | - test 2 was -295 elo + 74 | | - test 2 was -295 elo | | discovered that I had mis-implemented the | | memory layout of the subnet weights. | | STC: @@ -332,7 +332,7 @@ marlineater | alchemist with WDL 1.0. | STC: | but this is promising either way. | vs zhudun. ------------|------------------------------------------|----------------------------------------------- cyc | 1024x2 relative network, lr 0.001 | STC (epoch 5) - | drop every 4 epochs, 15 epochs total, | ELO | -9.60 +- 6.46 (95%) + 75 | drop every 4 epochs, 15 epochs total, | ELO | -9.60 +- 6.46 (95%) | lr-drop-gamma of 0.3, wdl 0.4. | SPRT | 8.0+0.08s Threads=1 Hash=16MB | | LLR | -2.29 (-2.25, 2.89) [0.00, 3.00] | | GAMES | N: 5792 W: 1433 L: 1593 D: 2766 @@ -358,7 +358,7 @@ marlineater | alchemist with WDL 1.0. | STC: | | vs cyc-epoch11. ------------|------------------------------------------|----------------------------------------------- omega | 1536x2, otherwise identical to cyc. | STC (epoch 14) - | | ELO | -2.51 +- 3.72 (95%) + 76 | | ELO | -2.51 +- 3.72 (95%) | | SPRT | 8.0+0.08s Threads=1 Hash=16MB | | LLR | -2.25 (-2.25, 2.89) [0.00, 3.00] | | GAMES | N: 16624 W: 4083 L: 4203 D: 8338 @@ -371,32 +371,32 @@ marlineater | alchemist with WDL 1.0. | STC: | | vs cyc (epoch 15). ------------|------------------------------------------|----------------------------------------------- vanguard | replication of omega using JW's bullet | STC regression - | trainer. | ELO | 0.2 +/- 2.2 (95%) + 77 | trainer. | ELO | 0.2 +/- 2.2 (95%) | | SPRT | 8.0+0.08s Threads=1 Hash=16MB | | LLR | -2.95 (-2.94, 2.94) [0.00, 3.00] | | GAMES | N: 48792 W: 12720 L: 12687 D: 23385 | | vs omega. ------------|------------------------------------------|----------------------------------------------- elua | 1024x2 HalfKA network trained using JW's | Issues with the feature factoriser - | bullet trainer, with vanguard's | resulted in bugs that are unclear to me. + 78 | bullet trainer, with vanguard's | resulted in bugs that are unclear to me. | hyperparameters. | ------------|------------------------------------------|----------------------------------------------- callosum | 1024x2 HalfKA network trained using JW's | STC (epoch 15) - | bullet trainer, with vanguard's | ELO | -35.56 +- 12.24 (95%) + 79 | bullet trainer, with vanguard's | ELO | -35.56 +- 12.24 (95%) | hyperparameters, but without a feature | SPRT | 8.0+0.08s Threads=1 Hash=16MB | factoriser. | LLR | -2.26 (-2.25, 2.89) [0.00, 3.00] | | GAMES | N: 1696 W: 375 L: 548 D: 773 | | vs omega. ------------|------------------------------------------|----------------------------------------------- artemis | same arch as omega / vanguard, with 497M | STC: - | additional positions generated using | ELO | 4.27 +- 3.04 (95%) + 80 | additional positions generated using | ELO | 4.27 +- 3.04 (95%) | omega. | SPRT | 8.0+0.08s Threads=1 Hash=16MB | | LLR | 2.91 (-2.25, 2.89) [0.00, 3.00] | | GAMES | N: 24752 W: 6288 L: 5984 D: 12480 | | vs omega. ------------|------------------------------------------|----------------------------------------------- lilith | 1024x2 net with four buckets, trained | fixed-nodes: - | using JW's bullet trainer, with | ELO | -14.37 +- 8.14 (95%) + 81 | using JW's bullet trainer, with | ELO | -14.37 +- 8.14 (95%) | artemis's hyperparameters. | SPRT | N=25000 Threads=1 Hash=16MB | | LLR | -2.29 (-2.25, 2.89) [0.00, 3.00] | STC test interrupted early, as f-nodes | GAMES | N: 4208 W: 1177 L: 1351 D: 1680 @@ -409,14 +409,14 @@ marlineater | alchemist with WDL 1.0. | STC: | | vs artemis. ------------|------------------------------------------|----------------------------------------------- jupiter | same as artemis, but starting LR was 10x | fixed-nodes: - | higher. (0.01) | ELO | -180.47 +- 27.29 (95%) + 82 | higher. (0.01) | ELO | -180.47 +- 27.29 (95%) | | SPRT | N=25000 Threads=1 Hash=16MB | | LLR | -3.09 (-2.25, 2.89) [0.00, 3.00] | | GAMES | N: 528 W: 58 L: 310 D: 160 | | vs artemis. ------------|------------------------------------------|----------------------------------------------- newcomb | artemis with new lr schedule: | fixed-nodes, epoch 25: - | initial LR : 0.01 | ELO | -170.74 +- 31.69 (95%) + 83 | initial LR : 0.01 | ELO | -170.74 +- 31.69 (95%) | lr gamma : 0.1 | SPRT | N=25000 Threads=1 Hash=16MB | lr step : every 9 epochs | LLR | -2.31 (-2.25, 2.89) [0.00, 3.00] | epochs : 25 | GAMES | N: 448 W: 70 L: 274 D: 104 @@ -430,14 +430,14 @@ marlineater | alchemist with WDL 1.0. | STC: | what a load of garbage. | vs artemis. ------------|------------------------------------------|----------------------------------------------- * qa-181 | not a new network, but a re-quantisation | STC: - | of artemis so that more optimal SIMD | ELO | 16.76 +- 6.68 (95%) + 80 | of artemis so that more optimal SIMD | ELO | 16.76 +- 6.68 (95%) | can be used for inference. | SPRT | 8.0+0.08s Threads=1 Hash=16MB | | LLR | 2.92 (-2.25, 2.89) [0.00, 3.00] | | GAMES | N: 4896 W: 1271 L: 1035 D: 2590 | | vs artemis-qa-255. ------------|------------------------------------------|----------------------------------------------- signalis | four-buckets horizontally mirrored | LTC: - | 1536x2 network, trained with artemis's | ELO | 5.47 +- 3.57 (95%) + 84 | 1536x2 network, trained with artemis's | ELO | 5.47 +- 3.57 (95%) | hyperparameters. | SPRT | 40.0+0.40s Threads=1 Hash=128MB | | LLR | 2.89 (-2.25, 2.89) [0.00, 3.00] | initial run had a bug, so this is ID'd | GAMES | N: 17672 W: 4452 L: 4174 D: 9046 @@ -447,7 +447,7 @@ marlineater | alchemist with WDL 1.0. | STC: | network release. | ------------|------------------------------------------|----------------------------------------------- gestalt | nine-buckets horizontally mirrored | fixed-nodes: - | 1536x2 network, with larger dataset. | Elo | 34.12 +- 9.08 (95%) + 85 | 1536x2 network, with larger dataset. | Elo | 34.12 +- 9.08 (95%) | | SPRT | N=25000 Threads=1 Hash=16MB | | LLR | 3.07 (-2.94, 2.94) [0.00, 3.00] | | Games | N: 2564 W: 880 L: 629 D: 1055 @@ -465,7 +465,7 @@ marlineater | alchemist with WDL 1.0. | STC: | | vs. signalis ------------|------------------------------------------|----------------------------------------------- semiotic | gestalt with bigger dataset and eight | fixed-nodes: - | material-count output buckets. | Elo | 19.76 +- 6.80 (95%) + 86 | material-count output buckets. | Elo | 19.76 +- 6.80 (95%) | | SPRT | N=25000 Threads=1 Hash=16MB | | LLR | 3.06 (-2.94, 2.94) [0.00, 3.00] | | Games | N: 4348 W: 1365 L: 1118 D: 1865 @@ -483,7 +483,7 @@ marlineater | alchemist with WDL 1.0. | STC: | | vs. gestalt ------------|------------------------------------------|----------------------------------------------- skirmish | gestalt with the tweaks enumerated at | fixed-nodes: - | end of the second ANNUEP blogpost | Elo | 14.81 +- 5.85 (95%) + 87 | end of the second ANNUEP blogpost | Elo | 14.81 +- 5.85 (95%) | | SPRT | N=25000 Threads=1 Hash=16MB | | LLR | 2.97 (-2.94, 2.94) [0.00, 3.00] | | Games | N: 5492 W: 1663 L: 1429 D: 2400 @@ -512,7 +512,7 @@ marlineater | alchemist with WDL 1.0. | STC: | | cosmo.tardis.ac/2024/07/15/nnue-research-02/ ------------|------------------------------------------|----------------------------------------------- compact | semiotic with smaller dataset, much | v1 (60sb, 5b fens) fixed-nodes: - | shorter training run, and cosine lr. | Elo | -107.30 +- 15.74 (95%) + 88 | shorter training run, and cosine lr. | Elo | -107.30 +- 15.74 (95%) | | SPRT | N=25000 Threads=1 Hash=16MB | | LLR | -3.11 (-2.94, 2.94) [0.00, 3.00] | | Games | N: 952 W: 165 L: 450 D: 337 @@ -548,7 +548,7 @@ marlineater | alchemist with WDL 1.0. | STC: | | vs. semiotic ------------|------------------------------------------|----------------------------------------------- eternity | 13-buckets 2048x2 net, trained for 200 | fixed-nodes: - | superbatches with cosine lr. | Elo | -1.97 +- 2.72 (95%) + 89 | superbatches with cosine lr. | Elo | -1.97 +- 2.72 (95%) | | SPRT | N=25000 Threads=1 Hash=16MB | performance was sufficiently close to | LLR | -2.96 (-2.94, 2.94) [0.00, 3.00] | semiotic to make me think it was | Games | N: 26234 W: 7280 L: 7429 D: 11525 @@ -557,7 +557,7 @@ marlineater | alchemist with WDL 1.0. | STC: | | vs. semiotic ------------|------------------------------------------|----------------------------------------------- everedge | semiotic but trained with WDL warmup to | fixed-nodes: - | 0.5 over the course of 160 superbatches. | Elo | -5.78 +- 4.10 (95%) + 90 | 0.5 over the course of 160 superbatches. | Elo | -5.78 +- 4.10 (95%) | | SPRT | N=25000 Threads=1 Hash=16MB | | LLR | -2.98 (-2.94, 2.94) [0.00, 3.00] | | Games | N: 11490 W: 3123 L: 3314 D: 5053 @@ -566,7 +566,7 @@ marlineater | alchemist with WDL 1.0. | STC: | | vs. semiotic ------------|------------------------------------------|----------------------------------------------- pendragon | 2048x2 net with an extra billion | fixed-nodes v1: - | fens from semiotic injected. | Elo | 5.93 +- 3.53 (95%) + 91 | fens from semiotic injected. | Elo | 5.93 +- 3.53 (95%) | | SPRT | N=25000 Threads=1 Hash=16MB | first version had 13 buckets and trained | LLR | 2.99 (-2.94, 2.94) [0.00, 3.00] | for 400 superbatches. | Games | N: 16004 W: 4664 L: 4391 D: 6949 @@ -602,7 +602,7 @@ marlineater | alchemist with WDL 1.0. | STC: | | vs. semiotic ------------|------------------------------------------|----------------------------------------------- hyperstition| 2048x2 -pairwise> 16 -> 32 -> 1 net. | fixed-nodes: - | crelu/screlu/screlu activations. | Elo | 42.88 +- 10.08 (95%) + 92 | crelu/screlu/screlu activations. | Elo | 42.88 +- 10.08 (95%) | trained under identical conditions to | SPRT | N=25000 Threads=1 Hash=16MB | pendragon. | LLR | 3.09 (-2.94, 2.94) [0.00, 3.00] | | Games | N: 2036 W: 718 L: 468 D: 850 @@ -618,4 +618,36 @@ hyperstition| 2048x2 -pairwise> 16 -> 32 -> 1 net. | fixed-nodes: | | Penta | [14, 760, 1772, 957, 11] | | https://chess.swehosting.se/test/7788/ | | vs. pendragon +------------|------------------------------------------|----------------------------------------------- + lila | the final hyperstition weight file, | fixed-nodes: + 93 | trained for 100 more superbatches with a | Elo | 2.42 +- 1.89 (95%) + | 0.0005 -> 0.0005 * 0.3^3 lr cosine lr | SPRT | N=25000 Threads=1 Hash=16MB + | schedule, and including couple hundred | LLR | 3.00 (-2.94, 2.94) [0.00, 3.00] + | million datapoints generated using | Games | N: 52732 W: 14703 L: 14336 D: 23693 + | hyperstition. No other changes to the | Penta | [1019, 6368, 11343, 6499, 1137] + | training setup. | https://chess.swehosting.se/test/8011/ + | | + | | LTC: + | | Elo | 4.04 +- 2.52 (95%) + | | SPRT | 40.0+0.40s Threads=1 Hash=128MB + | | LLR | 2.95 (-2.94, 2.94) [0.00, 3.00] + | | Games | N: 17712 W: 3992 L: 3786 D: 9934 + | | Penta | [17, 1966, 4693, 2154, 26] + | | https://chess.swehosting.se/test/8056/ +------------|------------------------------------------|----------------------------------------------- + voyager | the hyperstition arch, trained on the | fixed-nodes: + 94 | new bullet version with extra fens | Elo | 9.80 +- 4.68 (95%) + | generated using hyperstition & lila. | SPRT | N=25000 Threads=1 Hash=16MB + | | LLR | 2.97 (-2.94, 2.94) [0.00, 3.00] + | | Games | N: 8652 W: 2578 L: 2334 D: 3740 + | | Penta | [166, 956, 1869, 1138, 197] + | | https://chess.swehosting.se/test/8717/ + | | + | | LTC: + | | Elo | 8.73 +- 4.00 (95%) + | | SPRT | 40.0+0.40s Threads=1 Hash=128MB + | | LLR | 2.95 (-2.94, 2.94) [0.00, 3.00] + | | Games | N: 7724 W: 1854 L: 1660 D: 4210 + | | Penta | [26, 833, 1947, 1033, 23] + | | https://chess.swehosting.se/test/8718/ ------------|------------------------------------------|----------------------------------------------- \ No newline at end of file diff --git a/src/nnue/network.rs b/src/nnue/network.rs index 07828697..67cf570b 100644 --- a/src/nnue/network.rs +++ b/src/nnue/network.rs @@ -130,17 +130,63 @@ pub struct NNUEParams { pub l3_bias: [f32; OUTPUT_BUCKETS], } -const REPERMUTE_INDICES: [usize; L1_SIZE / 2] = { - let mut indices = [0; L1_SIZE / 2]; - let mut i = 0; - while i < L1_SIZE / 2 { - indices[i] = i; - i += 1; - } - indices -}; - -// const REPERMUTE_INDICES: [usize; L1_SIZE / 2] = [840, 838, 168, 364, 27, 147, 350, 469, 825, 343, 279, 759, 480, 78, 284, 483, 153, 80, 685, 872, 623, 436, 844, 1020, 824, 478, 694, 774, 686, 987, 337, 87, 495, 597, 487, 524, 120, 88, 360, 456, 702, 766, 744, 789, 1, 239, 803, 417, 333, 608, 368, 204, 301, 426, 385, 963, 843, 262, 526, 970, 883, 109, 401, 915, 36, 708, 948, 375, 118, 528, 905, 16, 717, 444, 243, 218, 180, 197, 43, 1001, 636, 199, 299, 796, 395, 21, 442, 511, 681, 158, 679, 598, 895, 520, 585, 1018, 509, 25, 131, 400, 277, 125, 182, 484, 688, 91, 479, 128, 216, 170, 581, 918, 268, 431, 863, 746, 238, 938, 105, 228, 293, 34, 414, 376, 576, 157, 741, 852, 492, 797, 622, 54, 297, 129, 728, 595, 537, 571, 854, 256, 315, 943, 831, 543, 639, 610, 745, 920, 485, 633, 267, 501, 737, 917, 514, 410, 990, 669, 453, 331, 196, 880, 813, 47, 723, 641, 248, 278, 121, 529, 516, 566, 448, 866, 348, 184, 772, 306, 402, 758, 981, 302, 835, 452, 15, 629, 229, 127, 496, 504, 270, 468, 86, 853, 934, 491, 734, 553, 942, 290, 369, 670, 664, 202, 213, 22, 579, 482, 236, 193, 477, 253, 380, 510, 906, 856, 49, 650, 443, 409, 931, 682, 321, 457, 503, 951, 1008, 276, 313, 163, 921, 995, 668, 827, 455, 308, 40, 877, 94, 933, 396, 894, 422, 223, 660, 538, 865, 704, 411, 142, 188, 465, 955, 830, 433, 140, 51, 397, 563, 66, 527, 839, 116, 782, 698, 439, 980, 882, 617, 750, 548, 275, 793, 381, 1016, 490, 474, 881, 99, 257, 135, 365, 837, 265, 644, 817, 513, 535, 8, 359, 982, 507, 338, 713, 10, 189, 435, 727, 370, 440, 146, 190, 62, 176, 523, 152, 155, 144, 540, 240, 795, 602, 65, 926, 74, 319, 773, 940, 45, 967, 873, 280, 994, 33, 407, 493, 604, 263, 783, 584, 663, 710, 525, 977, 35, 20, 464, 192, 826, 48, 1003, 821, 899, 577, 221, 73, 106, 373, 71, 209, 935, 649, 269, 859, 232, 536, 292, 769, 434, 72, 324, 778, 570, 361, 371, 619, 247, 757, 771, 342, 412, 60, 421, 711, 946, 736, 960, 953, 845, 654, 897, 547, 868, 884, 291, 901, 386, 219, 612, 645, 986, 947, 472, 964, 64, 388, 791, 675, 420, 601, 954, 600, 200, 707, 134, 310, 808, 258, 1015, 150, 89, 790, 181, 984, 779, 183, 855, 377, 505, 389, 683, 716, 862, 458, 731, 763, 286, 266, 676, 374, 497, 167, 929, 672, 642, 599, 285, 578, 560, 353, 210, 517, 283, 327, 287, 893, 356, 902, 973, 334, 903, 220, 760, 691, 542, 288, 703, 810, 767, 50, 557, 848, 857, 591, 822, 462, 701, 620, 515, 61, 384, 936, 624, 486, 635, 768, 889, 841, 860, 237, 349, 1017, 809, 44, 816, 419, 637, 178, 244, 326, 447, 110, 177, 316, 264, 989, 101, 508, 222, 621, 460, 561, 476, 945, 721, 30, 58, 646, 574, 205, 83, 273, 325, 345, 787, 961, 607, 531, 38, 37, 31, 587, 339, 415, 53, 613, 466, 569, 697, 939, 274, 871, 956, 42, 594, 32, 394, 628, 282, 770, 850, 1011, 425, 117, 993, 56, 14, 661, 296, 449, 226, 2, 648, 133, 7, 186, 798, 5, 161, 534, 784, 1012, 700, 39, 678, 530, 558, 786, 590, 46, 634, 846, 818, 781, 807, 556, 705, 937, 154, 544, 445, 861, 801, 693, 108, 502, 379, 59, 550, 405, 564, 347, 765, 609, 988, 966, 156, 847, 991, 596, 743, 169, 68, 1002, 870, 90, 565, 195, 294, 473, 217, 235, 305, 4, 304, 114, 317, 1000, 864, 699, 588, 241, 811, 834, 115, 191, 362, 552, 910, 932, 272, 726, 307, 67, 792, 533, 488, 13, 950, 254, 923, 311, 975, 430, 999, 885, 573, 748, 733, 165, 589, 665, 692, 398, 126, 927, 521, 996, 390, 626, 112, 81, 271, 76, 833, 175, 974, 11, 355, 555, 896, 351, 888, 618, 876, 832, 467, 408, 225, 605, 1009, 892, 77, 687, 667, 187, 0, 689, 559, 79, 215, 95, 814, 69, 742, 997, 489, 1019, 998, 102, 233, 211, 340, 17, 706, 928, 869, 652, 546, 715, 298, 113, 98, 84, 806, 393, 958, 3, 122, 52, 751, 250, 673, 625, 729, 214, 320, 968, 829, 657, 57, 104, 674, 851, 777, 886, 512, 70, 423, 799, 722, 451, 735, 1023, 461, 925, 231, 965, 738, 842, 913, 446, 143, 432, 363, 309, 914, 725, 185, 658, 93, 1022, 891, 261, 194, 922, 75, 138, 336, 404, 100, 399, 413, 494, 139, 224, 162, 712, 709, 611, 651, 874, 898, 383, 638, 323, 985, 575, 295, 976, 97, 437, 145, 630, 819, 500, 230, 992, 788, 655, 441, 580, 438, 959, 592, 983, 459, 136, 322, 366, 506, 92, 762, 85, 1004, 631, 541, 690, 780, 568, 603, 632, 662, 907, 732, 329, 828, 804, 406, 151, 206, 242, 812, 227, 159, 761, 656, 29, 18, 119, 344, 160, 260, 303, 251, 234, 593, 972, 312, 522, 1005, 137, 289, 328, 367, 900, 941, 532, 198, 815, 103, 203, 63, 26, 427, 392, 696, 201, 754, 372, 952, 858, 666, 640, 719, 281, 107, 149, 382, 416, 912, 130, 908, 23, 671, 919, 358, 179, 1021, 714, 470, 957, 429, 909, 55, 148, 518, 428, 615, 314, 387, 357, 28, 172, 208, 677, 582, 653, 539, 164, 1007, 24, 207, 606, 499, 684, 911, 823, 391, 740, 171, 904, 124, 756, 680, 794, 752, 519, 878, 971, 916, 424, 551, 300, 627, 747, 330, 481, 41, 249, 567, 805, 471, 836, 879, 875, 775, 764, 616, 659, 820, 586, 1014, 332, 132, 785, 96, 9, 141, 111, 867, 463, 890, 724, 583, 255, 695, 498, 245, 647, 730, 979, 403, 944, 166, 978, 352, 318, 718, 418, 6, 123, 450, 720, 572, 949, 1010, 174, 212, 962, 802, 454, 887, 749, 82, 354, 562, 346, 755, 614, 753, 545, 549, 969, 378, 475, 259, 12, 776, 800, 924, 930, 341, 246, 1013, 643, 335, 1006, 252, 849, 554, 739, 19, 173]; +// const REPERMUTE_INDICES: [usize; L1_SIZE / 2] = { +// let mut indices = [0; L1_SIZE / 2]; +// let mut i = 0; +// while i < L1_SIZE / 2 { +// indices[i] = i; +// i += 1; +// } +// indices +// }; + +const REPERMUTE_INDICES: [usize; L1_SIZE / 2] = [ + 948, 151, 580, 671, 346, 625, 832, 44, 798, 783, 551, 355, 1009, 499, 896, 16, 805, 323, 555, 735, 767, 844, 808, + 351, 255, 905, 161, 440, 308, 178, 674, 158, 285, 276, 594, 278, 809, 531, 658, 926, 96, 732, 589, 129, 406, 708, + 205, 693, 1007, 812, 15, 543, 388, 889, 296, 742, 979, 274, 11, 51, 294, 787, 847, 888, 772, 599, 517, 442, 57, 68, + 893, 788, 918, 137, 414, 235, 7, 250, 1003, 712, 412, 77, 132, 365, 554, 174, 113, 93, 62, 607, 47, 950, 697, 971, + 1011, 76, 496, 786, 746, 737, 332, 128, 806, 247, 958, 819, 145, 343, 524, 14, 731, 491, 978, 24, 101, 845, 325, + 723, 387, 861, 364, 3, 633, 635, 916, 192, 781, 204, 408, 380, 260, 972, 218, 797, 45, 486, 147, 863, 532, 748, 84, + 227, 448, 1004, 656, 960, 272, 430, 683, 302, 774, 244, 289, 738, 65, 817, 615, 155, 144, 638, 238, 964, 490, 722, + 804, 800, 359, 705, 528, 209, 747, 78, 9, 493, 617, 225, 403, 959, 191, 397, 461, 264, 659, 415, 628, 30, 377, 450, + 901, 181, 67, 150, 887, 327, 336, 987, 868, 715, 837, 417, 760, 915, 156, 287, 546, 741, 880, 385, 386, 822, 153, + 770, 938, 391, 165, 866, 200, 483, 443, 670, 573, 394, 229, 962, 526, 82, 383, 999, 38, 312, 506, 159, 500, 98, + 333, 422, 775, 949, 133, 557, 578, 529, 719, 460, 586, 424, 848, 197, 273, 116, 610, 821, 95, 99, 756, 825, 378, + 299, 505, 842, 538, 718, 927, 237, 329, 480, 838, 776, 269, 851, 347, 799, 242, 189, 74, 190, 36, 503, 49, 1021, + 655, 631, 425, 186, 795, 375, 562, 876, 390, 859, 32, 634, 42, 124, 10, 41, 463, 645, 574, 253, 300, 982, 881, 72, + 231, 230, 803, 587, 914, 970, 988, 257, 4, 429, 407, 955, 677, 432, 713, 536, 447, 481, 71, 286, 765, 66, 810, 358, + 163, 243, 689, 251, 894, 878, 35, 241, 413, 758, 103, 966, 676, 233, 785, 434, 665, 492, 539, 427, 796, 125, 792, + 865, 662, 768, 202, 618, 495, 348, 470, 942, 444, 27, 80, 855, 466, 476, 692, 530, 616, 641, 8, 1017, 730, 571, + 445, 862, 657, 519, 26, 198, 703, 455, 922, 304, 477, 395, 326, 22, 897, 1006, 836, 597, 870, 270, 419, 508, 572, + 1019, 750, 620, 567, 680, 221, 283, 931, 468, 627, 475, 816, 941, 762, 219, 591, 522, 1008, 85, 983, 707, 488, 664, + 973, 515, 552, 18, 112, 83, 991, 162, 1001, 714, 733, 410, 282, 755, 884, 376, 248, 70, 214, 903, 612, 828, 256, + 471, 910, 789, 340, 640, 702, 423, 489, 222, 605, 967, 857, 535, 171, 104, 940, 501, 341, 575, 990, 590, 449, 458, + 169, 140, 63, 60, 928, 389, 514, 354, 537, 899, 545, 920, 217, 384, 280, 989, 744, 265, 814, 79, 534, 12, 224, 494, + 548, 92, 134, 293, 87, 34, 542, 420, 652, 453, 995, 5, 729, 175, 934, 811, 860, 498, 523, 311, 773, 435, 157, 761, + 533, 342, 711, 215, 108, 328, 912, 827, 284, 611, 303, 69, 757, 570, 885, 947, 114, 102, 956, 513, 433, 701, 864, + 588, 261, 924, 187, 975, 188, 148, 933, 220, 405, 17, 726, 646, 1, 858, 484, 382, 600, 310, 569, 778, 917, 997, + 815, 1018, 479, 841, 109, 563, 195, 642, 663, 891, 911, 974, 622, 840, 516, 472, 436, 485, 601, 199, 525, 1000, + 698, 469, 309, 875, 268, 710, 1002, 142, 784, 751, 951, 240, 980, 139, 780, 301, 613, 606, 54, 576, 194, 565, 929, + 20, 630, 152, 306, 650, 462, 428, 849, 431, 644, 141, 474, 402, 1020, 138, 986, 371, 977, 43, 882, 143, 170, 207, + 521, 88, 593, 307, 392, 298, 86, 540, 820, 558, 833, 963, 369, 361, 900, 709, 399, 624, 97, 824, 123, 892, 368, + 596, 177, 585, 210, 830, 53, 879, 118, 146, 1005, 497, 985, 122, 324, 263, 216, 598, 381, 614, 668, 954, 21, 908, + 232, 75, 73, 418, 507, 944, 749, 473, 725, 367, 727, 675, 577, 647, 943, 766, 603, 362, 19, 40, 404, 1013, 935, + 745, 59, 106, 874, 314, 932, 105, 28, 639, 366, 734, 592, 288, 629, 211, 164, 520, 437, 196, 994, 291, 409, 94, + 441, 252, 313, 791, 172, 925, 131, 678, 854, 236, 185, 452, 684, 239, 136, 322, 316, 360, 130, 930, 794, 451, 512, + 653, 206, 886, 64, 266, 271, 39, 898, 50, 695, 992, 834, 961, 179, 632, 258, 37, 890, 249, 2, 464, 566, 511, 454, + 793, 318, 769, 81, 752, 319, 753, 945, 984, 334, 184, 439, 823, 350, 740, 691, 167, 416, 764, 337, 779, 720, 1016, + 180, 651, 1012, 1010, 895, 281, 1023, 23, 58, 953, 913, 835, 246, 338, 679, 1022, 704, 672, 541, 374, 717, 396, + 560, 518, 154, 623, 349, 564, 1015, 208, 801, 699, 119, 969, 13, 482, 621, 739, 681, 379, 0, 182, 509, 52, 687, + 550, 846, 649, 790, 110, 976, 400, 465, 315, 111, 353, 277, 547, 909, 923, 33, 31, 121, 690, 149, 487, 317, 724, 6, + 826, 90, 335, 648, 544, 608, 581, 813, 100, 673, 331, 582, 871, 279, 583, 609, 339, 993, 716, 902, 173, 320, 743, + 852, 636, 457, 127, 906, 877, 579, 968, 626, 981, 29, 921, 295, 549, 654, 856, 637, 321, 759, 754, 213, 669, 667, + 643, 120, 46, 700, 345, 559, 115, 363, 504, 706, 176, 946, 456, 688, 682, 467, 193, 721, 919, 292, 763, 619, 459, + 438, 939, 201, 904, 356, 561, 839, 160, 510, 135, 883, 344, 426, 595, 685, 952, 254, 998, 853, 234, 398, 869, 183, + 259, 411, 55, 936, 305, 728, 25, 203, 604, 275, 212, 850, 290, 166, 873, 736, 370, 226, 352, 61, 297, 782, 502, 56, + 996, 1014, 357, 228, 245, 117, 957, 527, 907, 168, 584, 330, 771, 556, 373, 831, 262, 937, 660, 401, 818, 568, 777, + 421, 872, 372, 867, 126, 91, 48, 807, 107, 829, 694, 446, 89, 267, 802, 393, 661, 696, 666, 686, 478, 553, 843, + 223, 602, 965, +]; impl UnquantisedNetwork { /// Convert a parameter file generated by bullet into a quantised parameter set, @@ -357,7 +403,10 @@ impl QuantisedNetwork { } } -fn repermute_l1_weights(sorted: &mut [[[i8; 16]; 8]], l1_weights: &[[[i8; 16]; 8]; 2048]) { +fn repermute_l1_weights( + sorted: &mut [[[i8; L2_SIZE]; OUTPUT_BUCKETS]], + l1_weights: &[[[i8; L2_SIZE]; OUTPUT_BUCKETS]; L1_SIZE], +) { for (tgt_index, src_index) in REPERMUTE_INDICES.iter().copied().enumerate() { sorted[tgt_index] = l1_weights[src_index]; } diff --git a/src/nnue/network/layers.rs b/src/nnue/network/layers.rs index b0b3b8e0..8a4bf7d0 100644 --- a/src/nnue/network/layers.rs +++ b/src/nnue/network/layers.rs @@ -328,7 +328,7 @@ mod x86simd { #[allow(clippy::similar_names)] fn propagate_l1( - ft_outputs: &Align64<[MaybeUninit; 2048]>, + ft_outputs: &Align64<[MaybeUninit; L1_SIZE]>, nnz_slice: &[u16], weights: &Align64<[i8; L1_SIZE * L2_SIZE]>, biases: &Align64<[f32; 16]>,