-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrc64_amd64.s
155 lines (148 loc) · 3.42 KB
/
crc64_amd64.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
// Copyright (c) 2025 Minio Inc. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
#include "textflag.h"
TEXT ·updateAsm(SB), $0-40
MOVQ crc+0(FP), AX // checksum
MOVQ p_base+8(FP), SI // start pointer
MOVQ p_len+16(FP), CX // length of buffer
NOTQ AX
SHRQ $7, CX
CMPQ CX, $1
JLT skip128
VMOVDQA 0x00(SI), X0
VMOVDQA 0x10(SI), X1
VMOVDQA 0x20(SI), X2
VMOVDQA 0x30(SI), X3
VMOVDQA 0x40(SI), X4
VMOVDQA 0x50(SI), X5
VMOVDQA 0x60(SI), X6
VMOVDQA 0x70(SI), X7
MOVQ AX, X8
PXOR X8, X0
CMPQ CX, $1
JE tail128
MOVQ $0xa1ca681e733f9c40, AX
MOVQ AX, X8
MOVQ $0x5f852fb61e8d92dc, AX
PINSRQ $0x1, AX, X9
loop128:
ADDQ $128, SI
SUBQ $1, CX
VMOVDQA X0, X10
PCLMULQDQ $0x00, X8, X10
PCLMULQDQ $0x11, X9, X0
PXOR X10, X0
PXOR 0(SI), X0
VMOVDQA X1, X10
PCLMULQDQ $0x00, X8, X10
PCLMULQDQ $0x11, X9, X1
PXOR X10, X1
PXOR 0x10(SI), X1
VMOVDQA X2, X10
PCLMULQDQ $0x00, X8, X10
PCLMULQDQ $0x11, X9, X2
PXOR X10, X2
PXOR 0x20(SI), X2
VMOVDQA X3, X10
PCLMULQDQ $0x00, X8, X10
PCLMULQDQ $0x11, X9, X3
PXOR X10, X3
PXOR 0x30(SI), X3
VMOVDQA X4, X10
PCLMULQDQ $0x00, X8, X10
PCLMULQDQ $0x11, X9, X4
PXOR X10, X4
PXOR 0x40(SI), X4
VMOVDQA X5, X10
PCLMULQDQ $0x00, X8, X10
PCLMULQDQ $0x11, X9, X5
PXOR X10, X5
PXOR 0x50(SI), X5
VMOVDQA X6, X10
PCLMULQDQ $0x00, X8, X10
PCLMULQDQ $0x11, X9, X6
PXOR X10, X6
PXOR 0x60(SI), X6
VMOVDQA X7, X10
PCLMULQDQ $0x00, X8, X10
PCLMULQDQ $0x11, X9, X7
PXOR X10, X7
PXOR 0x70(SI), X7
CMPQ CX, $1
JGT loop128
tail128:
MOVQ $0xd083dd594d96319d, AX
MOVQ AX, X11
PCLMULQDQ $0x00, X0, X11
MOVQ $0x946588403d4adcbc, AX
PINSRQ $0x1, AX, X12
PCLMULQDQ $0x11, X12, X0
PXOR X11, X7
PXOR X0, X7
MOVQ $0x3c255f5ebc414423, AX
MOVQ AX, X11
PCLMULQDQ $0x00, X1, X11
MOVQ $0x34f5a24e22d66e90, AX
PINSRQ $0x1, AX, X12
PCLMULQDQ $0x11, X12, X1
PXOR X11, X1
PXOR X7, X1
MOVQ $0x7b0ab10dd0f809fe, AX
MOVQ AX, X11
PCLMULQDQ $0x00, X2, X11
MOVQ $0x03363823e6e791e5, AX
PINSRQ $0x1, AX, X12
PCLMULQDQ $0x11, X12, X2
PXOR X11, X2
PXOR X1, X2
MOVQ $0x0c32cdb31e18a84a, AX
MOVQ AX, X11
PCLMULQDQ $0x00, X3, X11
MOVQ $0x62242240ace5045a, AX
PINSRQ $0x1, AX, X12
PCLMULQDQ $0x11, X12, X3
PXOR X11, X3
PXOR X2, X3
MOVQ $0xbdd7ac0ee1a4a0f0, AX
MOVQ AX, X11
PCLMULQDQ $0x00, X4, X11
MOVQ $0xa3ffdc1fe8e82a8b, AX
PINSRQ $0x1, AX, X12
PCLMULQDQ $0x11, X12, X4
PXOR X11, X4
PXOR X3, X4
MOVQ $0xb0bc2e589204f500, AX
MOVQ AX, X11
PCLMULQDQ $0x00, X5, X11
MOVQ $0xe1e0bb9d45d7a44c, AX
PINSRQ $0x1, AX, X12
PCLMULQDQ $0x11, X12, X5
PXOR X11, X5
PXOR X4, X5
MOVQ $0xeadc41fd2ba3d420, AX
MOVQ AX, X11
PCLMULQDQ $0x00, X6, X11
MOVQ $0x21e9761e252621ac, AX
PINSRQ $0x1, AX, X12
PCLMULQDQ $0x11, X12, X6
PXOR X11, X6
PXOR X5, X6
MOVQ AX, X5
PCLMULQDQ $0x00, X6, X5
PSHUFD $0xee, X6, X6
PXOR X5, X6
MOVQ $0x27ecfa329aef9f77, AX
MOVQ AX, X4
PCLMULQDQ $0x00, X4, X6
PEXTRQ $0, X6, BX
MOVQ $0x34d926535897936b, AX
MOVQ AX, X4
PCLMULQDQ $0x00, X4, X6
PXOR X5, X6
PEXTRQ $1, X6, AX
XORQ BX, AX
skip128:
NOTQ AX
MOVQ AX, checksum+32(FP)
RET