-
Notifications
You must be signed in to change notification settings - Fork 6
/
strcountset64.asm
175 lines (150 loc) · 5.58 KB
/
strcountset64.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
;************************* strcountinset64.asm *********************************
; Author: Agner Fog
; Date created: 2011-07-20
; Last modified: 2011-07-20
; Description:
; size_t strCountInSet(const char * str, const char * set);
;
; Counts how many characters in str that belong to the set defined by set.
; Both strings are zero-terminated ASCII strings.
;
; Note that this functions may read up to 15 bytes beyond the end of the strings.
; This is rarely a problem but it can in principle generate a protection violation
; if a string is placed at the end of the data segment.
;
; Position-independent code is generated if POSITIONINDEPENDENT is defined.
;
; CPU dispatching included for 386 and SSE4.2 instruction sets.
;
; Copyright (c) 2011 GNU General Public License www.gnu.org/licenses
;******************************************************************************
default rel
global strCountInSet: function
; Direct entries to CPU-specific versions
global strCountInSetGeneric: function
global strCountInSetSSE42: function
; Imported from instrset64.asm:
extern InstructionSet ; Instruction set for CPU dispatcher
section .text
;******************************************************************************
; strCountInSet function
;******************************************************************************
%ifdef WINDOWS
%define par1 rcx
%define par2 rdx
%else
%define par1 rdi
%define par2 rsi
%endif
strCountInSet: ; function dispatching
jmp near [strCountInSetDispatch] ; Go to appropriate version, depending on instruction set
align 16
strCountInSetSSE42: ; SSE4.2 version
%ifdef WINDOWS
push rsi
push rdi
mov rdi, rcx ; str
mov rsi, rdx ; set
%endif
mov r8, rsi
xor eax, eax ; match counter
str_next:
movdqu xmm2, [rdi] ; str
movdqu xmm1, [rsi] ; set
pcmpistrm xmm1, xmm2, 00000000b; find in set, return bit mask in xmm0
movd ecx, xmm0
jns set_extends ; the set is more than 16 bytes
jz str_finished
set_finished:
popcnt ecx, ecx
add rax, rcx
; first 16 characters checked, continue with next 16 characters (a terminating zero would never match)
add rdi, 16 ; next 16 bytes of str
jmp str_next
set_and_str_finished:
or ecx, edx ; accumulate matches
str_finished:
popcnt ecx, ecx
add rax, rcx
%ifdef WINDOWS
pop rdi
pop rsi
%endif
ret
set_loop:
or ecx, edx ; accumulate matches
set_extends:
add rsi, 16
movdqu xmm1, [rsi] ; next part of set
pcmpistrm xmm1, xmm2, 00000000b; find in set, return bit mask in xmm0
movd edx, xmm0
jns set_loop
jz set_and_str_finished
mov rsi, r8 ; restore set pointer
or ecx, edx ; accumulate matches
jmp set_finished
;strCountInSetSSE42 end
;******************************************************************************
; strCountInSet function generic
;******************************************************************************
align 8
strCountInSetGeneric: ; Generic version
%ifdef WINDOWS
push rsi
push rdi
mov rdi, rcx ; str
mov rsi, rdx ; set
%endif
mov r8, rsi
xor eax, eax ; match counter
str_next10:
mov cl, [rdi] ; read one byte from str
test cl, cl
jz str_finished10 ; str finished
set_next10:
mov dl, [rsi]
test dl, dl
jz set_finished10
inc rsi ; next in set
cmp cl, dl
jne set_next10
; character match found, goto next character
inc rax ; count match
inc rdi
jmp str_next10
set_finished10: ; end of set, no match found
mov rsi, r8 ; restore set pointer
inc rdi
jmp str_next10 ; next in string
str_finished10: ; end of str, count is in eax
%ifdef WINDOWS
pop rdi
pop rsi
%endif
ret
;strCountInSetGeneric end
; ********************************************************************************
; CPU dispatching for strCountInSet. This is executed only once
; ********************************************************************************
strCountInSetCPUDispatch:
; get supported instruction set
push par1
push par2
call InstructionSet
pop par2
pop par1
; Point to generic version of strstr
lea r8, [strCountInSetGeneric]
cmp eax, 10 ; check SSE4.2
jb Q100
; SSE4.2 supported
; Point to SSE4.2 version of strstr
lea r8, [strCountInSetSSE42]
Q100: mov [strCountInSetDispatch], r8
; Continue in appropriate version
jmp r8
SECTION .data
; Pointer to appropriate versions. Initially point to dispatcher
strCountInSetDispatch DQ strCountInSetCPUDispatch
SECTION .bss
dq 0, 0