Skip to content

Commit

Permalink
Revisiting Keccak and Dilithium Implementations on ARMv7-M (#338)
Browse files Browse the repository at this point in the history
* Use Plantard arithmetic for NTT_769 in Dilithium

* rm old smallntt.S

* update benchmarks

---------

Co-authored-by: Matthias J. Kannwischer <[email protected]>
  • Loading branch information
JunhaoHuang and mkannwischer authored Apr 15, 2024
1 parent 2c48508 commit 9c2bc41
Show file tree
Hide file tree
Showing 7 changed files with 829 additions and 893 deletions.
10 changes: 5 additions & 5 deletions benchmarks.csv
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ cross-sha3-r-sdpg-3-fast (10 executions),ref,627948,625525,637639,43573841,43565
cross-sha3-r-sdpg-5-fast (10 executions),ref,1146280,1142409,1153794,93557878,93547167,93566329,59948216,59857434,60043852
dilithium2 (90 executions),clean,1873447,1838554,1903845,7846622,3321671,28761609,2062804,2062332,2063181
dilithium2 (100 executions),m4f,1427684,1390524,1466437,4219137,1813668,12587382,1417706,1417251,1418128
dilithium3 (90 executions),clean,3205542,3204354,3206592,12108503,5097440,50759276,3377010,3376729,3377395
dilithium3 (100 executions),m4f,2515970,2514894,2516922,5896583,2935265,23718896,2411234,2410948,2411551
dilithium3 (1000 executions),clean,3205551,3204090,3207411,12696585,5097364,74392293,3376992,3376581,3377393
dilithium3 (1000 executions),m4f,2515969,2514498,2517634,5884832,2917322,25268693,2411257,2410858,2411717
dilithium5 (90 executions),clean,5346066,5287239,5395626,15205929,7953360,49173429,5609664,5609137,5610119
dilithium5 (100 executions),m4f,4273211,4210308,4329697,8062110,4882708,18398575,4185407,4184878,4185954
falcon-1024 (10 executions),m4-ct,354880005,284902033,635131652,87741288,87506676,87922628,991320,982548,997219
Expand Down Expand Up @@ -341,8 +341,8 @@ cross-sha3-r-sdpg-3-fast,ref,71.7,68.2,68.7,,,,,,
cross-sha3-r-sdpg-5-fast,ref,71.1,66.1,66.8,,,,,,
dilithium2,clean,60.9,30.2,52.9,,,,,,
dilithium2,m4f,79.9,62.2,76.8,,,,,,
dilithium3,clean,64.7,33.8,56.8,,,,,,
dilithium3,m4f,82.3,57.9,79.4,,,,,,
dilithium3,clean,64.7,31.3,56.8,,,,,,
dilithium3,m4f,82.3,60.3,79.4,,,,,,
dilithium5,clean,67.0,38.4,61.1,,,,,,
dilithium5,m4f,83.4,63.5,81.7,,,,,,
falcon-1024,clean,6.5,0.3,23.7,,,,,,
Expand Down Expand Up @@ -491,7 +491,7 @@ cross-sha3-r-sdpg-5-fast,ref,18593,0,208,18801,,,,,
dilithium2,clean,8064,0,0,8064,,,,,
dilithium2,m4f,18596,0,0,18596,,,,,
dilithium3,clean,7580,0,0,7580,,,,,
dilithium3,m4f,20108,0,0,20108,,,,,
dilithium3,m4f,18588,0,0,18588,,,,,
dilithium5,clean,7808,0,0,7808,,,,,
dilithium5,m4f,18468,0,0,18468,,,,,
falcon-1024,clean,82647,0,0,82647,,,,,
Expand Down
10 changes: 5 additions & 5 deletions benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@
| cross-sha3-r-sdpg-5-fast (10 executions) | ref | AVG: 1,146,280 <br /> MIN: 1,142,409 <br /> MAX: 1,153,794 | AVG: 93,557,878 <br /> MIN: 93,547,167 <br /> MAX: 93,566,329 | AVG: 59,948,216 <br /> MIN: 59,857,434 <br /> MAX: 60,043,852 |
| dilithium2 (90 executions) | clean | AVG: 1,873,447 <br /> MIN: 1,838,554 <br /> MAX: 1,903,845 | AVG: 7,846,622 <br /> MIN: 3,321,671 <br /> MAX: 28,761,609 | AVG: 2,062,804 <br /> MIN: 2,062,332 <br /> MAX: 2,063,181 |
| dilithium2 (100 executions) | m4f | AVG: 1,427,684 <br /> MIN: 1,390,524 <br /> MAX: 1,466,437 | AVG: 4,219,137 <br /> MIN: 1,813,668 <br /> MAX: 12,587,382 | AVG: 1,417,706 <br /> MIN: 1,417,251 <br /> MAX: 1,418,128 |
| dilithium3 (90 executions) | clean | AVG: 3,205,542 <br /> MIN: 3,204,354 <br /> MAX: 3,206,592 | AVG: 12,108,503 <br /> MIN: 5,097,440 <br /> MAX: 50,759,276 | AVG: 3,377,010 <br /> MIN: 3,376,729 <br /> MAX: 3,377,395 |
| dilithium3 (100 executions) | m4f | AVG: 2,515,970 <br /> MIN: 2,514,894 <br /> MAX: 2,516,922 | AVG: 5,896,583 <br /> MIN: 2,935,265 <br /> MAX: 23,718,896 | AVG: 2,411,234 <br /> MIN: 2,410,948 <br /> MAX: 2,411,551 |
| dilithium3 (1000 executions) | clean | AVG: 3,205,551 <br /> MIN: 3,204,090 <br /> MAX: 3,207,411 | AVG: 12,696,585 <br /> MIN: 5,097,364 <br /> MAX: 74,392,293 | AVG: 3,376,992 <br /> MIN: 3,376,581 <br /> MAX: 3,377,393 |
| dilithium3 (1000 executions) | m4f | AVG: 2,515,969 <br /> MIN: 2,514,498 <br /> MAX: 2,517,634 | AVG: 5,884,832 <br /> MIN: 2,917,322 <br /> MAX: 25,268,693 | AVG: 2,411,257 <br /> MIN: 2,410,858 <br /> MAX: 2,411,717 |
| dilithium5 (90 executions) | clean | AVG: 5,346,066 <br /> MIN: 5,287,239 <br /> MAX: 5,395,626 | AVG: 15,205,929 <br /> MIN: 7,953,360 <br /> MAX: 49,173,429 | AVG: 5,609,664 <br /> MIN: 5,609,137 <br /> MAX: 5,610,119 |
| dilithium5 (100 executions) | m4f | AVG: 4,273,211 <br /> MIN: 4,210,308 <br /> MAX: 4,329,697 | AVG: 8,062,110 <br /> MIN: 4,882,708 <br /> MAX: 18,398,575 | AVG: 4,185,407 <br /> MIN: 4,184,878 <br /> MAX: 4,185,954 |
| falcon-1024 (10 executions) | m4-ct | AVG: 354,880,005 <br /> MIN: 284,902,033 <br /> MAX: 635,131,652 | AVG: 87,741,288 <br /> MIN: 87,506,676 <br /> MAX: 87,922,628 | AVG: 991,320 <br /> MIN: 982,548 <br /> MAX: 997,219 |
Expand Down Expand Up @@ -347,8 +347,8 @@
| cross-sha3-r-sdpg-5-fast | ref | 71.1% | 66.1% | 66.8% |
| dilithium2 | clean | 60.9% | 30.2% | 52.9% |
| dilithium2 | m4f | 79.9% | 62.2% | 76.8% |
| dilithium3 | clean | 64.7% | 33.8% | 56.8% |
| dilithium3 | m4f | 82.3% | 57.9% | 79.4% |
| dilithium3 | clean | 64.7% | 31.3% | 56.8% |
| dilithium3 | m4f | 82.3% | 60.3% | 79.4% |
| dilithium5 | clean | 67.0% | 38.4% | 61.1% |
| dilithium5 | m4f | 83.4% | 63.5% | 81.7% |
| falcon-1024 | clean | 6.5% | 0.3% | 23.7% |
Expand Down Expand Up @@ -499,7 +499,7 @@
| dilithium2 | clean | 8,064 | 0 | 0 | 8,064 |
| dilithium2 | m4f | 18,596 | 0 | 0 | 18,596 |
| dilithium3 | clean | 7,580 | 0 | 0 | 7,580 |
| dilithium3 | m4f | 20,108 | 0 | 0 | 20,108 |
| dilithium3 | m4f | 18,588 | 0 | 0 | 18,588 |
| dilithium5 | clean | 7,808 | 0 | 0 | 7,808 |
| dilithium5 | m4f | 18,468 | 0 | 0 | 18,468 |
| falcon-1024 | clean | 82,647 | 0 | 0 | 82,647 |
Expand Down
1 change: 0 additions & 1 deletion crypto_sign/dilithium3/m4f/macros_fnt.i

This file was deleted.

98 changes: 98 additions & 0 deletions crypto_sign/dilithium3/m4f/macros_smallntt.i
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/**
* Copyright (c) 2023 Junhao Huang ([email protected])
*
* Licensed under the Apache License, Version 2.0(the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MACROS_SMALLNTT_I
#define MACROS_SMALLNTT_I

// general macros
.macro load a, a0, a1, a2, a3, mem0, mem1, mem2, mem3
ldr.w \a0, [\a, \mem0]
ldr.w \a1, [\a, \mem1]
ldr.w \a2, [\a, \mem2]
ldr.w \a3, [\a, \mem3]
.endm

.macro store a, a0, a1, a2, a3, mem0, mem1, mem2, mem3
str.w \a0, [\a, \mem0]
str.w \a1, [\a, \mem1]
str.w \a2, [\a, \mem2]
str.w \a3, [\a, \mem3]
.endm

.macro doubleplant a, tmp, q, qa, plantconst
smulwb \tmp, \plantconst, \a
smulwt \a, \plantconst, \a
smlabt \tmp, \tmp, \q, \qa
smlabt \a, \a, \q, \qa
pkhtb \a, \a, \tmp, asr#16
.endm

.macro doublebarrett a, tmp, tmp2, q, barrettconst
smulbb \tmp, \a, \barrettconst
smultb \tmp2, \a, \barrettconst
asr \tmp, \tmp, #26
asr \tmp2, \tmp2, #26
smulbb \tmp, \tmp, \q
smulbb \tmp2, \tmp2, \q
pkhbt \tmp, \tmp, \tmp2, lsl#16
usub16 \a, \a, \tmp
.endm

// q locate in the top half of the register
.macro plant_red q, qa, qinv, tmp
mul \tmp, \tmp, \qinv
//tmp*qinv mod 2^2n/ 2^n; in high half
smlatt \tmp, \tmp, \q, \qa
// result in high half
.endm

.macro mul_twiddle_plant a, twiddle, tmp, q, qa
smulwb \tmp, \twiddle, \a
smulwt \a, \twiddle, \a
smlabt \tmp, \tmp, \q, \qa
smlabt \a, \a, \q, \qa
pkhtb \a, \a, \tmp, asr#16
.endm

.macro doublebutterfly_plant a0, a1, twiddle, tmp, q, qa
smulwb \tmp, \twiddle, \a1
smulwt \a1, \twiddle, \a1
smlabt \tmp, \tmp, \q, \qa
smlabt \a1, \a1, \q, \qa
pkhtb \tmp, \a1, \tmp, asr#16
usub16 \a1, \a0, \tmp
uadd16 \a0, \a0, \tmp
.endm

.macro two_doublebutterfly_plant a0, a1, a2, a3, twiddle0, twiddle1, tmp, q, qa
doublebutterfly_plant \a0, \a1, \twiddle0, \tmp, \q, \qa
doublebutterfly_plant \a2, \a3, \twiddle1, \tmp, \q, \qa
.endm

//For 3329
.macro fullplant a0, a1, a2, a3, a4, a5, a6, a7, tmp, q, qa, plantconst
movw \plantconst, #44984
movt \plantconst, #19
doubleplant \a0, \tmp, \q, \qa, \plantconst
doubleplant \a1, \tmp, \q, \qa, \plantconst
doubleplant \a2, \tmp, \q, \qa, \plantconst
doubleplant \a3, \tmp, \q, \qa, \plantconst
doubleplant \a4, \tmp, \q, \qa, \plantconst
doubleplant \a5, \tmp, \q, \qa, \plantconst
doubleplant \a6, \tmp, \q, \qa, \plantconst
doubleplant \a7, \tmp, \q, \qa, \plantconst
.endm

#endif
Loading

0 comments on commit 9c2bc41

Please sign in to comment.