-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathstrlen.S
91 lines (84 loc) · 1.74 KB
/
strlen.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#if 0
size_t strlen_rvv(char *src) {
size_t vlmax = __riscv_vsetvlmax_e8m8();
char *p = src;
long first = -1;
size_t vl;
while (first < 0) {
vuint8m8_t v = __riscv_vle8ff_v_u8m8((uint8_t*)p, &vl, vlmax);
first = __riscv_vfirst_m_b1(__riscv_vmseq_vx_u8m8_b1(v, 0, vl), vl);
p += vl;
}
p -= vl - first;
return (size_t)(p - src);
}
#define PAGE_SIZE 4096
size_t strlen_rvv_page_aligned_(char *src) {
char *p = src;
long first = 0;
size_t n = 0 - ((uintptr_t)src | -4096);
size_t vl;
for (; n > 0; n -= vl) {
vl = __riscv_vsetvl_e8m8(n);
vuint8m8_t v = __riscv_vle8_v_u8m8((uint8_t*)p, vl);
first = __riscv_vfirst_m_b1(__riscv_vmseq_vx_u8m8_b1(v, 0, vl), vl);
p += vl;
if (first >= 0) {
goto end;
}
}
vl = __riscv_vsetvlmax_e8m8();
do {
vuint8m8_t v = __riscv_vle8_v_u8m8((uint8_t*)p, vl);
first = __riscv_vfirst_m_b1(__riscv_vmseq_vx_u8m8_b1(v, 0, vl), vl);
p += vl;
} while (first < 0);
end:
p -= vl - first;
return (size_t)(p - src);
}
#endif
#ifdef MX
.global MX(strlen_rvv_)
MX(strlen_rvv_):
mv a3, a0
1:
vsetvli a1, x0, e8, MX(), ta, ma
vle8ff.v v8, (a3)
csrr a1, vl
vmseq.vi v0, v8, 0
vfirst.m a2, v0
add a3, a3, a1 # end += vl
bltz a2, 1b
add a0, a0, a1 # start += vl
add a3, a3, a2 # end += idx
sub a0, a3, a0 # start - end
ret
.global MX(strlen_rvv_page_aligned_) # generated by clang
MX(strlen_rvv_page_aligned_):
lui a1, 1048575
or a1, a1, a0
neg a4, a1
mv a1, a0
1:
vsetvli a2, a4, e8, MX(), ta, ma
vle8.v v8, (a1)
vmseq.vi v16, v8, 0
vfirst.m a3, v16
add a1, a1, a2
bgez a3, 1f
sub a4, a4, a2
bnez a4, 1b
vsetvli a2, zero, e8, MX(), ta, ma
2:
vle8.v v8, (a1)
vmseq.vi v16, v8, 0
vfirst.m a3, v16
add a1, a1, a2
bltz a3, 2b
1:
sub a1, a1, a2
sub a0, a3, a0
add a0, a0, a1
ret
#endif