Skip to content

Commit

Permalink
Use Plantard arithmetic for NTT_769 in Dilithium
Browse files Browse the repository at this point in the history
  • Loading branch information
JunhaoHuang committed Mar 27, 2024
1 parent 4f5b5ce commit dfabd15
Show file tree
Hide file tree
Showing 4 changed files with 819 additions and 46 deletions.
1 change: 0 additions & 1 deletion crypto_sign/dilithium3/m4f/macros_fnt.i

This file was deleted.

98 changes: 98 additions & 0 deletions crypto_sign/dilithium3/m4f/macros_smallntt.i
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/**
* Copyright (c) 2023 Junhao Huang ([email protected])
*
* Licensed under the Apache License, Version 2.0(the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MACROS_SMALLNTT_I
#define MACROS_SMALLNTT_I

// general macros
.macro load a, a0, a1, a2, a3, mem0, mem1, mem2, mem3
ldr.w \a0, [\a, \mem0]
ldr.w \a1, [\a, \mem1]
ldr.w \a2, [\a, \mem2]
ldr.w \a3, [\a, \mem3]
.endm

.macro store a, a0, a1, a2, a3, mem0, mem1, mem2, mem3
str.w \a0, [\a, \mem0]
str.w \a1, [\a, \mem1]
str.w \a2, [\a, \mem2]
str.w \a3, [\a, \mem3]
.endm

.macro doubleplant a, tmp, q, qa, plantconst
smulwb \tmp, \plantconst, \a
smulwt \a, \plantconst, \a
smlabt \tmp, \tmp, \q, \qa
smlabt \a, \a, \q, \qa
pkhtb \a, \a, \tmp, asr#16
.endm

.macro doublebarrett a, tmp, tmp2, q, barrettconst
smulbb \tmp, \a, \barrettconst
smultb \tmp2, \a, \barrettconst
asr \tmp, \tmp, #26
asr \tmp2, \tmp2, #26
smulbb \tmp, \tmp, \q
smulbb \tmp2, \tmp2, \q
pkhbt \tmp, \tmp, \tmp2, lsl#16
usub16 \a, \a, \tmp
.endm

// q locate in the top half of the register
.macro plant_red q, qa, qinv, tmp
mul \tmp, \tmp, \qinv
//tmp*qinv mod 2^2n/ 2^n; in high half
smlatt \tmp, \tmp, \q, \qa
// result in high half
.endm

.macro mul_twiddle_plant a, twiddle, tmp, q, qa
smulwb \tmp, \twiddle, \a
smulwt \a, \twiddle, \a
smlabt \tmp, \tmp, \q, \qa
smlabt \a, \a, \q, \qa
pkhtb \a, \a, \tmp, asr#16
.endm

.macro doublebutterfly_plant a0, a1, twiddle, tmp, q, qa
smulwb \tmp, \twiddle, \a1
smulwt \a1, \twiddle, \a1
smlabt \tmp, \tmp, \q, \qa
smlabt \a1, \a1, \q, \qa
pkhtb \tmp, \a1, \tmp, asr#16
usub16 \a1, \a0, \tmp
uadd16 \a0, \a0, \tmp
.endm

.macro two_doublebutterfly_plant a0, a1, a2, a3, twiddle0, twiddle1, tmp, q, qa
doublebutterfly_plant \a0, \a1, \twiddle0, \tmp, \q, \qa
doublebutterfly_plant \a2, \a3, \twiddle1, \tmp, \q, \qa
.endm

//For 3329
.macro fullplant a0, a1, a2, a3, a4, a5, a6, a7, tmp, q, qa, plantconst
movw \plantconst, #44984
movt \plantconst, #19
doubleplant \a0, \tmp, \q, \qa, \plantconst
doubleplant \a1, \tmp, \q, \qa, \plantconst
doubleplant \a2, \tmp, \q, \qa, \plantconst
doubleplant \a3, \tmp, \q, \qa, \plantconst
doubleplant \a4, \tmp, \q, \qa, \plantconst
doubleplant \a5, \tmp, \q, \qa, \plantconst
doubleplant \a6, \tmp, \q, \qa, \plantconst
doubleplant \a7, \tmp, \q, \qa, \plantconst
.endm

#endif
85 changes: 40 additions & 45 deletions crypto_sign/dilithium3/m4f/smallntt.h
Original file line number Diff line number Diff line change
@@ -1,53 +1,48 @@
/**
* Copyright (c) 2023 Junhao Huang ([email protected])
*
* Licensed under the Apache License, Version 2.0(the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SMALLNTT_H
#define SMALLNTT_H

#include <stdint.h>
#include "params.h"

static const int16_t zetas[64] = {
-23, 112, -151, -134, -52, -148, 227, 232,
-71, 212, 236, 21, 341, 379, -202, -220,
352, 292, 238, 145, 194, -276, 70, -274,
117, 333, 66, 247, -237, -83, -252, -244,
331, -241, 167, 357, -355, 291, -358, 105, -115, -209, 14, 99, -260, 29, 366, -378, -318, 278, 353, 354, -184, 127, 330, -303, 222, -78, -348, -44, 201, 158, 350, 168
};

static const int16_t zetas_asm[128] = {
0, -164, -81, 361, 186, -3, -250, -120, -308, 129, -16, -223, -362, -143, 131, -337,
-76, 147, -114, -23, 112, -151, -134,
-98, -272, 54, -52, -148, 227, 232,
36, -2, -124, -71, 212, 236, 21,
-75, -80, -346, 341, 379, -202, -220,
-339, 86, -51, 352, 292, 238, 145,
-255, 364, 267, 194, -276, 70, -274,
282, 161, -15, 117, 333, 66, 247,
-203, 288, 169, -237, -83, -252, -244,
-34, 191, 307, 331, -241, 167, 357,
199, -50, -24, -355, 291, -358, 105,
178, -170, 226, -115, -209, 14, 99,
270, 121, -188, -260, 29, 366, -378,
-10, -380, 279, -318, 278, 353, 354,
149, 180, -375, -184, 127, 330, -303,
369, -157, 263, 222, -78, -348, -44,
-192, -128, -246, 201, 158, 350, 168
};

static const int16_t zetas_inv_CT_asm[256] = {
0, 171, 171, 164, 171, -361, 164, 81, 171, 120, -361, 3, 164, 250, 81, -186,
171, 164, 171, -361, 164, 81, -257, 49, -141, -18, -215, 38, 283, 347, 337, 192, -369, 246, -263, 128, 157, 239, -264, 179, 301, -207, 219, -332, -206, 120, 337, -131, 192, -149, -369, 10, 62, 57, 40, 136, 1, 311, -173, 27, 223, 203, -282, -169, 15, -288, -161, 74, -56, 271, -309, 26, -373, 116, -67, -361, 120, 250, 337, 143, -131, 362, -383, 82, 125, -344, -93, 299, -60, -204, 143, -270, -178, 188, -226, -121, 170, 39, -175, 174, 284, -111, 84, -22, 79, 3, 223, 16, 203, 255, -282, 339, 245, 64, -90, -306, 190, -123, 197, -253, -129, 75, -36, 346, 124, 80, 2, 218, 126, -33, -266, 326, -122, -261, 343, 164, -361, 81, 120, 3, 250, -186, 285, 200, -89, 5, 17, -96, 135, -310, -131, -149, 10, 375, -279, -180, 380, -280, -183, -7, 130, -327, -189, -335, -370, 250, 143, 362, -270, -199, -178, 34, -359, -144, -182, 304, -43, -300, -251, 377, 16, 255, 339, -267, 51, -364, -86, -106, 101, -118, 214, -349, -110, -374, -195, 81, 3, -186, 223, -129, 16, 308, 320, 319, 8, 181, 154, 216, 273, 313, 362, -199, 34, 24, -307, 50, -191, -139, -165, 208, 92, 159, 233, 177, -321, -186, -129, 308, 75, 98, -36, 76, 231, 324, 25, 85, 289, -94, -12, 113, 308, 98, 76, -54, 114, 272, -147, -146, -35, -119, -97, -176, -137, -312, -138,
};


#define SMALL_Q 769

void small_ntt_asm(int16_t a[N], const int16_t * zetas);
void small_invntt_tomont_asm(int16_t a[N], const int16_t * zetas);
void small_pointmul_asm(int16_t out[N], const int16_t in[N], const int16_t *zetas);
void small_asymmetric_mul_asm(int16_t c[256], const int16_t a[256], const int16_t b[256], const int16_t b_prime[256]);

#define small_ntt(a) small_ntt_asm(a, zetas_asm)
#define small_invntt_tomont(a) small_invntt_tomont_asm(a, zetas_inv_CT_asm)
#define small_point_mul(out, in) small_pointmul_asm(out, in, zetas)
#define small_asymmetric_mul(c, a, b, b_prime) small_asymmetric_mul_asm(c, a, b, b_prime);
#define SMALL_Q 769

static const int32_t zetas_769[64] = {
3138844760, 1334846793, 999738812, 1854264165, 1681125041, 1150537404, 2820492178, 3071823164, 726067294, 2066499220, 3272887953, 1055590142, 4255871365, 1871019564, 2731130050, 1826338500, 513832239, 1792827701, 3373420347, 2993631302, 1161707670, 3306398751, 3518633806, 3406931146, 1586177780, 3853741788, 3317569017, 3825816122, 971813147, 122872927, 217820188, 619949766, 3753209393, 770748358, 4099487641, 765163225, 3630336467, 1742561504, 3479537875, 982983413, 2809321912, 2379266669, 703726762, 681386230, 4110657907, 1457719720, 1217559000, 2474213930, 1195218468, 1089100940, 564098436, 614364633, 3635921600, 2088839752, 3702943196, 1949211426, 2569161192, 374203913, 3982199847, 2083254619, 1513571050, 3647091866, 413299844, 4149753838};

static const int32_t zetas_asm_769[128] = {
346278248, 223405321, 966228013, 759578091, -150798592, 318352582, -1736976371, 1697880440, -2105595150, -804259156, 1675539907, -1016494210, 1401868389, -2005062756, 240160720, 474736307, -1200803600, -1435379187, -1156122536, 1334846793, 999738811, 1854264164, -631120032, -787503756, -1580592646, 1681125040, 1150537403, -1474475119, -1223144132, 1809583100, -100532394, -1938041160, 726067293, 2066499219, -1022079344, 1055590142, 525002504, 273671518, -212235055, -39095931, 1871019563, -1563837247, 1826338499, 139628326, 27925665, 1731391238, 513832238, 1792827701, -921546949, -1301335995, 67021596, 1117026605, 536172770, 1161707669, -988568545, -776333490, -888036151, 1290165729, -497076839, -753992958, 1586177779, -441225509, -977398279, -469151174, -1614103444, 1591762912, -94947261, 971813146, 122872927, 217820188, 619949766, -1709050706, 1010909077, -1748146637, -541757903, 770748357, -195479656, 765163224, 1413038655, 1781657435, -1206388733, -664630830, 1742561504, -815429422, 982983412, 357448514, 44681064, -1524741316, -1485645385, -1915700627, 703726761, 681386229, 686971362, 1787242568, -860110486, -184309390, 1457719719, 1217558999, -1820753366, -502661972, -1921285760, 1139367137, 1195218467, 1089100940, 564098435, 614364633, -1100271206, 457980908, -1669954774, -659045697, 2088839751, -592024101, 1949211426, 1368357591, 698141628, 335107981, -1725806105, 374203913, -312767449, 2083254618, -1061175275, -2139105948, 519417371, 1513571050, -647875431, 413299844, -145213459, 0};

// INTT with CT butterfly
static const int32_t zetas_inv_asm_769[256] = {
5585134, 5585134, -346278248, 5585134, -966228013, -346278248, -223405321, 5585134, 1736976371, -966228013, 150798592, -346278248, -318352582, -223405321, -759578091,
// removed first "2285" + LAYER 3+2+1 - 1 - butterfly
5585134, -346278248, 5585134, -966228013, -346278248, -223405321, 636705165, 446810642, 1519156183, 11170266, -821014555, -1932456027, 301597183, -692556495, -240160720, 1061175275, -1368357591, -519417371, -335107981, 2139105948, -698141628, -625534899, -1267825197, 843355087, 290426917, 128458060, 1295750862, -748407825, -826599688, 1736976371, -240160720, 2005062756, 1061175275, 1100271206, -1368357591, 502661972, 915961816, 1396283256, 452395775, -1038834743, -955057747, -670215963, 2016233022, -16755399, -1675539907, 1614103444, -1290165729, 94947261, 753992958, -1591762912, 497076839, -1954796559, 1943626293, -1122611738, -1239899531, 938302348, -245745853, 882451018, -435640376, -966228013, 1736976371, -318352582, -240160720, -1401868389, 2005062756, 1016494210, 714897027, -1005323944, 876865885, 2122350549, -1373942724, -2094424884, 1468889985, 1558252114, -1401868389, -686971362, -357448514, 860110486, 1524741316, -1787242568, -44681064, 1407453522, -368618780, 1323676527, -653460564, -1362772458, 1379527857, -463566041, 1859849297, 150798592, -1675539907, 804259156, 1614103444, -67021596, -1290165729, -139628326, -2060914086, -994153678, 55851330, 189894523, -1072345541, 1507985917, 832184821, 1111441472, 2105595150, -525002504, -1809583100, 212235055, 1938041160, -273671518, 100532394, -2044158687, -78191862, 1452134586, 642290298, -2111180283, 552928169, 161968858, -1167292802, -346278248, -966228013, -223405321, 1736976371, 150798592, -318352582, -759578091, -1608518311, -2032988421, -899206417, -480321440, 943887481, 1491230518, -83776995, -284841784, 2005062756, 1100271206, 502661972, 1669954774, -1139367137, -457980908, 1921285760, 1128196871, -1318091394, -1904530361, 396544445, -1228729265, 117287794, 2116765416, 1184048201, -318352582, -1401868389, 1016494210, -686971362, -1413038655, -357448514, 1709050706, -731652426, 89362128, 2021818155, 1720220972, -1882189829, -1245484665, -798674023, 720482160, 804259156, -67021596, -139628326, -536172770, -1731391238, -1117026605, -27925665, -1843093898, -1971551958, 1027664477, 1776072302, -1692295306, 1977137091, 709311894, 1552666981, -223405321, 150798592, -759578091, -1675539907, 2105595150, 804259156, -1697880440, -675801096, 279256651, 949472614, -1066760408, -1050005009, -134043193, 1262240064, 1714635839, 1016494210, -1413038655, 1709050706, 1206388733, 1748146637, -1781657435, -1010909077, -390959312, -1329261660, -1083515807, -1965966825, -1530326449, 809844289, -1541496715, 1630858843, -759578091, 2105595150, -1697880440, -525002504, 631120032, -1809583100, -474736307, -1575007513, -201064789, 1893360095, 424470110, -1133782004, -418884977, -1424208921, -547343036, -1697880440, 631120032, -474736307, 1580592646, 1435379187, 787503756, 1200803600, 1999477623, -932717215, 1982722224, -1848679031, 586438968, 1993892490, 1625273710, -1346017059, 0};

// Q1=769
void small_ntt_asm_769(int16_t a[N], const int32_t *zetas);
void small_invntt_asm_769(int16_t a[N], const int32_t *zetas);
void small_pointmul_asm_769(int16_t out[N], const int16_t in[N], const int32_t *zetas);
void small_asymmetric_mul_asm_769(int16_t c[N], const int16_t a[N], const int16_t b[N], const int16_t b_prime[N]);

// small NTT for computing cs0 and cs1; default use 769 as modulus.
#define small_ntt(a) small_ntt_asm_769(a, zetas_asm_769)
#define small_invntt_tomont(a) small_invntt_asm_769(a, zetas_inv_asm_769)
#define small_point_mul(out, in) small_pointmul_asm_769(out, in, zetas_769)
#define small_asymmetric_mul(c, a, b, b_prime) small_asymmetric_mul_asm_769(c, a, b, b_prime);

#endif
Loading

0 comments on commit dfabd15

Please sign in to comment.