-
Notifications
You must be signed in to change notification settings - Fork 54
/
Copy pathdec64.asm
1461 lines (1132 loc) · 54.7 KB
/
dec64.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
title dec64.asm for x64.
; dec64.com
; 2024-01-27
; Public Domain
; No warranty expressed or implied. Use at your own risk. You have been warned.
; This file implements the elementary arithmetic operations for DEC64, a
; decimal floating point type. DEC64 uses 64 bits to represent a number. The
; low order 8 bits contain an exponent that ranges from -127 to 127. The
; exponent is not biased. The exponent -128 is reserved for nan (not a number).
; The remaining 56 bits, including the sign bit, are the coefficient in the
; range -36_028_797_018_963_968 thru 36_028_797_018_963_967. The exponent and
; the coefficient are both two's complement signed numbers.
;
; The value of any non-nan DEC64 number is coefficient * (10 ** exponent).
;
; Rounding is to the nearest value. Ties are rounded away from zero. Integer
; division is floored. The result of modulo has the sign of the divisor.
; There is no negative zero.
;
; These operations produce a result of nan:
;
; dec64_abs(nan)
; dec64_ceiling(nan)
; dec64_floor(nan)
; dec64_neg(nan)
; dec64_normal(nan)
; dec64_signum(nan)
;
; These operations produce a result of zero for all values of n,
; even if n is nan:
;
; dec64_divide(0, n)
; dec64_integer_divide(0, n)
; dec64_modulo(0, n)
; dec64_multiply(0, n)
; dec64_multiply(n, 0)
;
; These operations produce a result of nan for all values of n
; except zero:
;
; dec64_divide(n, 0)
; dec64_divide(n, nan)
; dec64_integer_divide(n, 0)
; dec64_integer_divide(n, nan)
; dec64_modulo(n, 0)
; dec64_modulo(n, nan)
; dec64_multiply(n, nan)
; dec64_multiply(nan, n)
;
; These operations produce a result of nan for all values of n:
;
; dec64_add(n, nan)
; dec64_add(nan, n)
; dec64_divide(nan, n)
; dec64_integer_divide(nan, n)
; dec64_modulo(nan, n)
; dec64_round(nan, n)
; dec64_subtract(n, nan)
; dec64_subtract(nan, n)
;
; This file can be processed with Microsoft's ML64.exe. There might be other
; assemblers that can process this file, but that has not been tested.
;
; You know what goes great with those defective pentium chips?
; Defective pentium salsa! (David Letterman)
; All public symbols have a dec64_ prefix. All other symbols are private.
; There are 72057594037927936 possible nan values. Three are reserved:
;
; DEC64_NULL
; DEC64_TRUE
; DEC64_FALSE
;
; The other 72057594037927933 nan values can be used for any purpose,
; including object pointers.
; When these functions return nan, they always return DEC64_NULL,
; the normal nan.
; The comparison functions return DEC64_TRUE or DEC64_FALSE.
null equ 0000000000000080h
true equ 0000000000000380h
false equ 0000000000000280h
; Multiplication by a scaled reciprocal can be faster than division.
eight_over_ten equ -3689348814741910323
public dec64_abs;(number: dec64)
; returns absolution: dec64
public dec64_add;(augend: dec64, addend: dec64)
; returns sum: dec64
public dec64_ceiling;(number: dec64)
; returns integer: dec64
public dec64_coefficient;(number: dec64)
; returns coefficient: int64
public dec64_divide;(dividend: dec64, divisor: dec64)
; returns quotient: dec64
public dec64_exponent;(number: dec64)
; returns exponent: int64
public dec64_floor;(number: dec64)
; returns integer: dec64
public dec64_integer_divide;(dividend: dec64, divisor: dec64)
; returns quotient: dec64
public dec64_is_equal;(comparahend: dec64, comparator: dec64)
; returns comparison: dec64
public dec64_is_false;(boolean: dec64)
; returns comparison: dec64
public dec64_is_integer;(number: dec64)
; returns comparison: dec64
public dec64_is_less;(comparahend: dec64, comparator: dec64)
; returns comparison: dec64
public dec64_is_nan;(number: dec64)
; returns comparison: dec64
public dec64_is_zero;(number: dec64)
; returns comparison: dec64
public dec64_modulo;(dividend: dec64, divisor: dec64)
; returns modulus: dec64
public dec64_multiply;(multiplicand: dec64, multiplier: dec64)
; returns product: dec64
public dec64_neg;(number: dec64)
; returns negation: dec64
public dec64_new;(coefficient: int64, exponent: int64)
; returns number: dec64
public dec64_normal;(number: dec64)
; returns normalization: dec64
public dec64_round;(number: dec64, place: dec64)
; returns quantization: dec64
public dec64_signum;(number: dec64)
; returns signature: dec64
public dec64_subtract;(minuend: dec64, subtrahend: dec64)
; returns difference: dec64
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
; Repair the register names. Over the long and twisted evolution of x86, the
; register names have picked up some weird, inconsistent conventions. We can
; simplify, naming them r0 thru r15, with a _b suffix to indicate the low
; order byte, an _h suffix to indicate the so-called high byte and a _w suffix
; to indicate the lower 16 bits. We would use _d to indicate the lower 32 bits,
; but that was not needed here.
r0 equ rax
r1 equ rcx
r2 equ rdx
r6 equ rsi
r7 equ rdi
r0_b equ al
r1_b equ cl
r2_b equ dl
r8_b equ r8b
r10_b equ r10b
r0_h equ ah
r1_h equ ch
r2_h equ dh
r0_w equ ax
r1_w equ cx
; All of the public functions in this file accept up to two arguments, which
; are passed in registers (either r1, r2 or r7, r6), returning a result in r0.
; Registers r1, r2, r8, r9, r10, and r11 are clobbered. Register r0 is the
; return value. The other registers are not disturbed.
; There is painfully inadequate standardization around x64 calling conventions.
; On Win64, the first two arguments are passed in r1 and r2. On Unix, the first
; two arguments are passed in r7 and r6. We try to hide this behind macros. The
; two systems also have different conventions about which registers may be
; clobbered and which must be preserved. This code lives in the intersection.
UNIX equ 0 ; calling convention: 0 for Windows, 1 for Unix
function_with_one_parameter macro
if UNIX
mov r1, r7 ;; UNIX
endif
endm
function_with_two_parameters macro
if UNIX
mov r1, r7 ;; UNIX
mov r2, r6 ;; UNIX
endif
endm
call_with_one_parameter macro function
if UNIX
mov r7, r1 ;; UNIX
endif
call function
endm
call_with_two_parameters macro function
if UNIX
mov r7, r1 ;; UNIX
mov r6, r2 ;; UNIX
endif
call function
endm
tail_with_one_parameter macro function
if UNIX
mov r7, r1 ;; UNIX
endif
jmp function
endm
tail_with_two_parameters macro function
if UNIX
mov r7, r1 ;; UNIX
mov r6, r2 ;; UNIX
endif
jmp function
endm
; There may be a performance benefit in padding programs so that most jump
; destinations are aligned on 16 byte boundaries.
pad macro
align 16
endm
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_data segment para read
power: ; the powers of 10
qword 1 ; 0
qword 10 ; 1
qword 100 ; 2
qword 1000 ; 3
qword 10000 ; 4
qword 100000 ; 5
qword 1000000 ; 6
qword 10000000 ; 7
qword 100000000 ; 8
qword 1000000000 ; 9
qword 10000000000 ; 10
qword 100000000000 ; 11
qword 1000000000000 ; 12
qword 10000000000000 ; 13
qword 100000000000000 ; 14
qword 1000000000000000 ; 15
qword 10000000000000000 ; 16
qword 100000000000000000 ; 17
qword 1000000000000000000 ; 18
qword 10000000000000000000 ; 19
dec64_data ends
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_code segment para execute
dec64_coefficient: function_with_one_parameter
;(number: dec64) returns coefficient: int64
; Return the coefficient part from a dec64 number.
mov r0, r1
sar r0, 8
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_exponent: function_with_one_parameter
;(number: dec64) returns exponent: int64
; Return the exponent part, sign extended to 64 bits, from a dec64 number.
; dec64_exponent(nan) returns -128.
movsx r0, r1_b ; r0 is the exponent
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_new: function_with_two_parameters
;(coefficient: int64, exponent: int64) returns number: dec64
; Construct a new dec64 number with a coefficient and an exponent.
mov r0, r1 ; r0 is the coefficient
test r0, r0 ; is the coefficient zero?
jz return_zero
mov r8, r2 ; r8 is the exponent
; Fall into pack.
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
pack:
; The pack function combines the coefficient and exponent into a dec64.
; Numbers that are too huge to be contained in this format become nan.
; Numbers that are too tiny to be contained in this format become zero.
; The coefficient is in r0.
; The exponent is in r8.
; If the exponent is greater than 127, then the number is too big.
; But it might still be possible to salvage a value.
cmp r8, 127 ; compare exponent with 127
jg pack_decrease ; it might be possible to decrease it
; If the exponent is too small, or if the coefficient is too large, then some
; division is necessary. The absolute value of the coefficient is off by one
; for the negative because
; negative_extreme_coefficent = -(extreme_coefficent + 1)
mov r10, r0 ; r10 is the coefficient
mov r1, 3602879701896396800 ; the ultimate coefficient * 100
not r10 ; r10 is -coefficient
xor r11, r11 ; r11 is zero
test r10, r10 ; look at the sign bit
cmovs r10, r0 ; r10 is the absolute value of coefficient
cmp r10, r1 ; compare with the actual coefficient
jae pack_large ; deal with the very large coefficient
mov r1, 36028797018963967 ; the ultimate coefficient - 1
mov r9, -127 ; r9 is the ultimate exponent
cmp r1, r10 ; compare with the actual coefficient
adc r11, 0 ; add 1 to r11 if 1 digit too big
mov r1, 360287970189639679 ; the ultimate coefficient * 10 - 1
sub r9, r8 ; r9 is the difference from actual exponent
cmp r1, r10 ; compare with the actual coefficient
adc r11, 0 ; add 1 to r11 if 2 digits too big
cmp r9, r11 ; which excess is larger?
cmovl r9, r11 ; take the max
test r9, r9 ; if neither was zero
jnz pack_increase ; then increase the exponent by the excess
shl r0, 8 ; shift the exponent into position
cmovz r8, r0 ; if coefficient is zero, also zero the exp
movzx r8, r8_b ; zero out all but the bottom 8 bits of exp
or r0, r8 ; mix in the exponent
ret ; whew
pad
pack_large:
mov r1, r0 ; r1 is the coefficient
sar r1, 63 ; r1 is -1 if negative, or 0 if positive
mov r11, eight_over_ten ; magic number
mov r9, r1 ; r9 is -1 or 0
xor r0, r1 ; complement the coefficient if negative
and r9, 1 ; r9 is 1 or 0
add r0, r9 ; r0 is absolute value of coefficient
add r8, 1 ; add 1 to the exponent
mul r11 ; multiply abs(coefficient) by magic number
mov r0, r2 ; r0 is the product shift 64 bits
shr r0, 3 ; r0 is divided by 8: abs(coefficient) / 10
xor r0, r1 ; complement coefficient if it was negative
add r0, r9 ; coefficient's sign is restored
jmp pack ; start over
pad
pack_increase:
mov r10, power
mov r10, [r10][r9*8] ; r10 is 10^r9
cmp r9, 20 ; is the difference more than 20?
jae return_zero ; if so, the result is zero (rare)
mov r11, r10 ; r11 is the power of ten
neg r11 ; r11 is the negation of the power of ten
test r0, r0 ; examine the sign of the coefficient
cmovns r11, r10 ; r11 has the sign of the coefficient
sar r11, 1 ; r11 is half the signed power of ten
add r0, r11 ; r0 is coefficient plus the rounding fudge
cqo ; sign extend r0 into r2
idiv r10 ; divide by the power of ten
add r8, r9 ; increase the exponent
jmp pack ; start over
pad
pack_decrease:
; The exponent is too big. We can attempt to reduce it by scaling back.
; This can salvage values in a small set of cases.
imul r0, 10 ; try multiplying the coefficient by 10
jo return_null ; if it overflows, we failed to salvage
sub r8, 1 ; decrement the exponent
test r8, -128 ; is the exponent still over 127?
jnz pack_decrease ; until the exponent is less than 127
mov r9, r0 ; r9 is the coefficient
sar r9, 56 ; r9 is top 8 bits of the coefficient
adc r9, 0 ; add the ninth bit
jnz return_null ; the number is still too large
shl r0, 8 ; shift the exponent into position
cmovz r8, r0 ; if coefficient is zero, also zero exponent
movzx r8, r8_b ; zero out all but bottom 8 bits of exponent
or r0, r8 ; mix in the exponent
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_round: function_with_two_parameters
;(number: dec64, place: dec64) returns quantization: dec64
; The place argument indicates at what decimal place to round.
; -2 nearest cent
; 0 nearest integer
; 3 nearest thousand
; 6 nearest million
; 9 nearest billion
; The place should be between -16 and 16.
pad
round_begin:
cmp r1_b, 128 ; is the number nan?
jz return_null
test r2_b, r2_b ; is places already an integer?
jnz round_places ; nope
mov r10, r1 ; r10 is the number
mov r11, r2 ; r11 is the places
movsx r8, r10_b ; r8 is the current exponent
mov r0, r10 ; r0 is the coefficient
sar r11, 8 ; r11 is place: int64
sar r0, 8 ; r0 is the coefficient of the number
jz return_zero ; if the coefficient is 0, the result is 0
cmp r8, r11 ; compare the exponents
jge pack ; no rounding required
mov r1, r0
mov r9, eight_over_ten ; magic
neg r1 ; r1 is -coefficient
cmovns r0, r1 ; r0 is abs(coefficient)
pad
round_loop:
; Increment the exponent and divide the coefficient by 10 until the target
; exponent is reached. The division is accomplished by multiplying with a
; scaled reciprocal.
mul r9 ; r2 is the coefficient * 8 / 10
mov r0, r2 ; r0 is the coefficient * 8 / 10
sar r0, 3 ; r0 is the coefficient / 10
add r8, 1 ; increment the exponent
cmp r8, r11 ; compare the exponent with the target
jne round_loop ; loop if the exponent is not at the target
; Round if necessary and return the result.
shr r2, 2 ; isolate the carry bit
and r2, 1 ; r2 is 1 if rounding is needed
add r0, r2 ; r0 is rounded
mov r1, r0
neg r1
test r10, r10 ; was the original number negative?
cmovs r0, r1
jmp pack ; pack it up
pad
round_places:
; Places does not seem to be an integer. If it is nan, then default to zero.
cmp r2_b, 128
jne round_normal
xor r2, r2
jmp round_begin
pad
round_normal:
mov r10, r1 ; save the number
mov r1, r2 ; pass the place
call_with_one_parameter dec64_normal
test r0_b, r0_b ; is the number of places a normal integer?
jnz return_null
mov r1, r10
mov r2, r0
jmp round_begin
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_add: function_with_two_parameters
;(augend: dec64, addend: dec64) returns sum: dec64
; Add two dec64 numbers together.
; If the two exponents are both zero (which is usually the case for integers)
; we can take the fast path. Since the exponents are both zero, we can simply
; add the numbers together and check for overflow.
pad
add_begin:
mov r0, r1 ; r0 is the first number
or r1_b, r2_b ; r1_b is the two exponents or'd together
jnz add_slow ; if either exponent is not zero, slow path
add r0, r2 ; add the shifted coefficients together
jo add_overflow ; if there was no overflow, we are done
ret ; no need to pack
pad
add_overflow:
; If there was an overflow (extremely unlikely) then we must make it fit.
; pack knows how to do that.
rcr r0, 1 ; divide the sum by 2 and repair its sign
movsx r8, r1_b ; r8 is the exponent
sar r0, 7 ; r0 is the coefficient of the sum
jmp pack ; pack it up
pad
add_slow:
; The slow path is taken if the two operands do not both have zero exponents.
mov r1, r0 ; restore r1
cmp r0_b, 128 ; is the first operand nan?
je return_null ; if nan, get out
; Are the two exponents the same? This happens often, especially with
; money values.
cmp r1_b, r2_b ; compare the two exponents
jne add_slower ; if not equal, take the slower path
; The exponents match so we may add now. Zero out the exponents so there
; is no carry into the coefficients when the coefficients are added.
; If the result is zero, then return the normal zero.
and r0, -256 ; remove the exponent
and r2, -256 ; remove the other exponent
add r0, r2 ; add the shifted coefficients
jo add_overflow ; if it overflows, it must be repaired
cmovz r1, r0 ; if coefficient is zero, exponent is zero
movzx r1, r1_b ; mask the exponent
or r0, r1 ; mix in the exponent
ret ; no need to pack
pad
add_slower:
; The slower path is taken when neither operand is nan, and their
; exponents are different. Before addition can take place, the exponents
; must be made to match. Swap the numbers if the second exponent is greater
; than the first.
cmp r2_b, 128 ; Is the second operand nan?
je return_null
cmp r1_b, r2_b ; compare the exponents
mov r0, r1 ; r0 is the first number
cmovl r1, r2 ; r1 is the number with the larger exponent
cmovl r2, r0 ; r2 is the number with the smaller exponent
; Shift the coefficients of r1 and r2 into r10 and r11 and unpack the exponents.
mov r10, r1 ; r10 is the first number
mov r11, r2 ; r11 is the second number
movsx r8, r1_b ; r8 is the first exponent
movsx r9, r2_b ; r9 is the second exponent
sar r10, 8 ; r10 is the first coefficient
sar r11, 8 ; r11 is the second coefficient
mov r0, r10 ; r0 is the first coefficient
pad
add_slower_decrease:
; The coefficients are not the same. Before we can add, they must be the same.
; We try to decrease the first exponent. When we decrease the exponent
; by 1, we must also multiply the coefficient by 10. We can do this as long as
; there is no overflow. We have 8 extra bits to work with, so we can do this
; at least twice, possibly more.
imul r0, 10 ; before decrementing the exponent, multiply
jo add_slower_increase
sub r8, 1 ; decrease the first exponent
mov r10, r0 ; r10 is the enlarged first coefficient
cmp r8, r9 ; are the exponents equal yet?
jg add_slower_decrease
mov r0, r11 ; r0 is the second coefficient
; The exponents are now equal, so the coefficients may be added.
add r0, r10 ; add the two coefficients
jmp pack ; pack it up
pad
add_slower_increase:
; We cannot decrease the first exponent any more, so we must instead try to
; increase the second exponent, which results in a loss of significance.
; That is the heartbreak of floating point.
; Determine how many places need to be shifted. If it is more than 17, there is
; nothing more to add.
mov r2, r8 ; r2 is the first exponent
sub r2, r9 ; r2 is the remaining exponent difference
mov r0, r11 ; r0 is the second coefficient
cmp r2, 17 ; 17 is the max digits in a packed coefficient
ja return_r1 ; too small to matter
mov r9, power
mov r9, [r9][r2*8] ; r9 is the power of ten
cqo ; sign extend r0 into r2
idiv r9 ; divide second coefficient by power of 10
test r0, r0 ; examine the scaled coefficient
jz return_r1 ; too insignificant to add?
; The exponents are now equal, so the coefficients may be added.
add r0, r10 ; add the two coefficients
jmp pack
pad
return_r1:
mov r0, r1 ; r0 is the original number
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_ceiling: function_with_one_parameter
;(number: dec64) returns integer: dec64
; Produce the smallest integer that is greater than or equal to the number. In
; the result, the exponent will be greater than or equal to zero unless it is
; nan. Numbers with positive exponents are not modified, even if the
; numbers are outside of the safe integer range.
; Preserved: r11.
mov r9, 1 ; r9 is the round up flag
jmp floor_begin
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_floor: function_with_one_parameter
;(number: dec64) returns integer: dec64
; Produce the largest integer that is less than or equal to the number. This
; is sometimes called the entier function. In the result, the exponent will be
; greater than or equal to zero unless it is nan. Numbers with positive
; exponents are not modified, even if the numbers are outside of the safe
; integer range.
; Preserved: r11.
mov r9, -1 ; r9 is the round down flag
pad
floor_begin:
cmp r1_b, 128
je return_null
mov r0, r1 ; r0 is the number
movsx r8, r1_b ; r8 is the exponent
sar r0, 8 ; r0 is the coefficient
cmovz r1, r0 ; if coefficient is zero, the number is zero
neg r8 ; r8 is the negated exponent
test r1_b, r1_b ; examine the exponent
jns return_r1 ; nothing to do unless exponent was negative
cmp r8, 17 ; is the exponent is too extreme?
jae floor_micro ; deal with a micro number
mov r10, power
mov r10, [r10][r8*8] ; r10 is the power of ten
cqo ; sign extend r0 into r2
idiv r10 ; divide r2:r0 by 10
test r2, r2 ; examine the remainder
jnz floor_remains ; deal with the remainder
shl r0, 8 ; pack the coefficient
ret
pad
floor_micro:
mov r2, r0 ; r2 is the coefficient
xor r0, r0 ; r0 is zero
pad
floor_remains:
; If the remainder is negative and the rounding flag is negative, then we need
; to decrement r0. But if the remainder and the rounding flag are both
; positive, then we need to increment r0.
xor r10, r10 ; r10 is zero
xor r2, r9 ; xor the remainder and the rounding
cmovs r9, r10 ; if different signs, clear the rounding
add r0, r9 ; add the rounding to the coefficient
shl r0, 8 ; pack the coefficient
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_subtract: function_with_two_parameters
;(minuend: dec64, subtrahend: dec64) returns difference: dec64
; Subtract the dec64 number in r2 from the dec64 number in r1.
; The result is in r0.
; This is the same as dec64_add, except that the operand in r2 has its
; coefficient complemented first.
xor r2, -256 ; not the coefficient only
add r2, 256 ; two's complement increment of coefficient
jno add_begin ; if there is no overflow, begin the beguine
; The subtrahend coefficient is -36028797018963968. This value cannot easily be
; complemented, so take the slower path. This should be extremely rare.
cmp r1_b, 128 ; is the first operand nan
sete r0_b
cmp r2_b, 128 ; is the second operand nan?
sete r0_h
or r0_b, r0_h ; is either nan?
jnz return_null
mov r10, r1 ; r10 is the first coefficient
movsx r8, r1_b ; r8 is the first exponent
sar r10, 8
movsx r9, r2_b ; r9 is the second exponent
mov r11, 80000000000000H ; r11 is 36028797018963968
mov r0, r10 ; r0 is the first coefficient
cmp r8, r9 ; if the second exponent is larger, swap
jge subtract_slower_decrease_compare
mov r0, r11 ; r0 is the second coefficient
xchg r8, r9 ; swap the exponents
xchg r10, r11 ; swap the coefficients
jmp subtract_slower_decrease_compare
pad
subtract_slower_decrease:
; The coefficients are not the same. Before we can add, they must be the same.
; We try to decrease the first exponent. When we decrease the exponent
; by 1, we must also multiply the coefficient by 10. We can do this as long as
; there is no overflow. We have 8 extra bits to work with, so we can do this
; at least twice, possibly more.
imul r0, 10 ; before decrementing the exponent, multiply
jo subtract_slower_increase
sub r8, 1 ; decrease the first exponent
mov r10, r0 ; r10 is the enlarged first coefficient
pad
subtract_slower_decrease_compare:
cmp r8, r9 ; are the exponents equal yet?
jg subtract_slower_decrease
mov r0, r11 ; r0 is the second coefficient
; The exponents are now equal, so the coefficients may be added.
add r0, r10 ; add the two coefficients
jmp pack ; pack it up
pad
subtract_slower_increase:
; We cannot decrease the first exponent any more, so we must instead try to
; increase the second exponent, which results in a loss of significance.
; That is the heartbreak of floating point.
; Determine how many places need to be shifted. If it is more than 17, there is
; nothing more to add.
mov r2, r8 ; r2 is the first exponent
sub r2, r9 ; r2 is the remaining exponent difference
mov r0, r11 ; r0 is the second coefficient
cmp r2, 17 ; 17 is the max digits in packed coefficient
ja subtract_underflow ; too small to matter
mov r9, power
mov r9, [r9][r2*8] ; r9 is the power of ten
cqo ; sign extend r0 into r2
idiv r9 ; divide second coefficient by power of 10
; The exponents are now equal, so the coefficients may be added.
add r0, r10 ; add the two coefficients
jmp pack
pad
subtract_underflow:
mov r0, r10 ; r0 is the first coefficient
jmp pack
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_multiply: function_with_two_parameters
;(multiplicand: dec64, multiplier: dec64) returns product: dec64
; Multiply two dec64 numbers together.
; Unpack the exponents into r8 and r9.
movsx r8, r1_b ; r8 is the first exponent
movsx r9, r2_b ; r9 is the second exponent
; Set flags in r0 if either operand is nan.
cmp r1_b, 128 ; is the first operand nan?
sete r0_b ; r0_b is 1 if the first operand is nan
cmp r2_b, 128 ; is the second operand nan?
sete r0_h ; r0_h is 1 if the second operand is nan
; Unpack the coefficients. Set flags in r1 if either is not zero.
sar r1, 8 ; r1 is the first coefficient
mov r10, r1 ; r10 is the first coefficient
setnz r1_b ; r1_b is 1 if first coefficient is not zero
sar r2, 8 ; r2 is the second coefficient
setnz r1_h ; r1_h is 1 if second coefficient is not 0
; The result is nan if one or both of the operands is nan and neither of the
; operands is zero.
or r1_w, r0_w ; is either coefficient zero and not nan?
xchg r1_b, r1_h
test r0_w, r1_w
jnz return_null
mov r0, r10 ; r0 is the first coefficient
add r8, r9 ; r8 is the product exponent
imul r2 ; r2:r0 is r1 * r2
jno pack ; if product fits in 64 bits, start packing
; There was overflow.
; Make the 110 bit coefficient in r2:r0Er8 all fit. Estimate the number of
; digits of excess, and increase the exponent by that many digits.
; We use 77/256 to convert log2 to log10.
mov r9, r2 ; r9 is the excess significance
xor r1, r1 ; r1 is zero anticipating bsr
neg r9
cmovs r9, r2 ; r9 is abs of excess significance
bsr r1, r9 ; find the position of most significant bit
imul r1, 77 ; multiply the bit number by 77/256 to
shr r1, 8 ; convert a bit number to a digit number
add r1, 2 ; add two extra digits to the scale
add r8, r1 ; increase the exponent
mov r9, power
idiv qword ptr [r9][r1*8] ; divide by the power of ten
jmp pack
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_divide: function_with_two_parameters
;(dividend: dec64, divisor: dec64) returns quotient: dec64
; Divide a dec64 number by another.
; Begin unpacking the components.
cmp r2, 200h
jz divide_two
movsx r8, r1_b ; r8 is the first exponent
movsx r9, r2_b ; r9 is the second exponent
mov r10, r1 ; r10 is the first number
mov r11, r2 ; r11 is the second number
; Set nan flags in r0.
cmp r1_b, 128 ; is the first operand nan?
sete r0_b ; r0_b is 1 if the first operand is nan
cmp r2_b, 128 ; is the second operand nan?
sete r0_h ; r0_h is 1 if the second operand is nan
sar r10, 8 ; r10 is the dividend coefficient
setnz r1_b ; r1_b is 1 if dividend coefficient is not 0
or r1_b, r0_b ; r0_b is 1 if either is nan or divisor is 0
jz return_zero ; if the dividend is zero, quotient is zero
sar r11, 8 ; r11 is the divisor coefficient
setz r1_b ; r1_h is 1 if dividing by zero
or r0_h, r0_b ; r0_h is 1 if either is nan
or r1_b, r0_h ; r1_b is zero if dividend is zero & not nan
jnz return_null
sub r8, r9 ; r8 is the quotient exponent
pad
divide_measure:
; We want to get as many bits into the quotient as possible in order to capture
; enough significance. But if the quotient has more than 64 bits, then there
; may be a hardware fault. To avoid that, we compare the magnitudes of the
; dividend coefficient and divisor coefficient, and use that to scale the
; dividend to give us a good quotient.
mov r0, r10 ; r0 is the first coefficient
mov r1, r11 ; r1 is the second coefficient
neg r0 ; r0 is negated
cmovs r0, r10 ; r0 is abs of dividend coefficient
neg r1 ; r1 is negated
cmovs r1, r11 ; r1 is abs of divisor coefficient
bsr r0, r0 ; r0 is the dividend most significant bit
bsr r1, r1 ; r1 is the divisor most significant bit
; Scale up the dividend to be approximately 58 bits longer than the divisor.
; Scaling uses factors of 10, so we must convert from a bit count to a digit
; count by multiplication by 77/256 (approximately LN2/LN10).
add r1, 58 ; we want approx 58 bits in raw quotient
sub r1, r0 ; r1 is the number of bits to add to the dividend
imul r1, 77 ; multiply by 77/256 to convert bits|digits
shr r1, 8 ; r1 is number of digits to scale dividend
; The largest power of 10 that can be held in an int64 is 1e18.
cmp r1, 18 ; prescale the dividend if 10**r1 won't fit
jg divide_prescale
; Multiply the dividend by the scale factor, and divide that 128 bit result by
; the divisor. Because of the scaling, the quotient is guaranteed to use most
; of the 64 bits in r0, and never more. Reduce the final exponent by the number
; of digits scaled.
mov r0, r10 ; r0 is the dividend coefficient
mov r9, power
imul qword ptr [r9][r1*8] ; r2:r0 is the dividend coefficient * 10**r1
idiv r11 ; r0 is the quotient
sub r8, r1 ; r8 is the exponent
jmp pack ; pack it up
pad
divide_prescale:
; If the number of scaling digits is larger than 18, then we have to
; scale in two steps: first prescaling the dividend to fill a register, and
; then repeating to fill a second register. This happens when the divisor
; coefficient is much larger than the dividend coefficient.
mov r1, 58 ; we want 58 bits or so in the dividend
sub r1, r0 ; r1 is the number of additional bits needed
imul r1, 77 ; convert bits to digits
shr r1, 8 ; shift 8 is cheaper than div 256
mov r9, power
imul r10, qword ptr [r9][r1*8] ; multiply the dividend by power of ten
sub r8, r1 ; reduce the exponent
jmp divide_measure ; try again
divide_two:
; Divide a dec64 number by two.
cmp r1_b, 128
je return_null
test r1_h, 1
jz divide_half
; Unpack the components into r8 and r1, multiply by 5 and divide by 10.
movsx r8, r1_b ; r8 is the exponent
sar r1, 8 ; r1 is the coefficient
sub r8, 1 ; bump down the exponent
lea r0, [r1+r1*4] ; r0 is the coefficient * 5
jmp pack
pad
divide_half:
; If the least significant bit of the coefficient is 0, then we can do this
; the fast way. Shift the coefficient by 1 bit and restore the exponent. If
; the shift produces zero, even easier.
mov r0, -256 ; r0 is the coefficient mask
and r0, r1 ; r0 is the coefficient shifted 8 bits
jz return
sar r0, 1 ; r0 is divided by 2
movzx r1, r1_b ; zero out r1 except lowest 8 bits
or r0, r1 ; mix in the exponent
ret