Skip to content

Commit 8bd6bea

Browse files
committed
[wip] experiment with reversing alignment and size validation order
gherrit-pr-id: G74bf0db1b7c30669171b5985ce24849a264746e7
1 parent a08a5c9 commit 8bd6bea

59 files changed

Lines changed: 1182 additions & 1211 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,12 @@
11
bench_ref_from_bytes_dynamic_padding:
2-
test dil, 3
3-
je .LBB5_2
4-
xor eax, eax
5-
mov rdx, rsi
6-
ret
7-
.LBB5_2:
82
movabs rax, 9223372036854775804
93
and rax, rsi
104
cmp rax, 9
11-
jae .LBB5_4
12-
xor eax, eax
13-
mov rdx, rsi
14-
ret
15-
.LBB5_4:
5+
setb cl
6+
test dil, 3
7+
setne dl
8+
or dl, cl
9+
jne .LBB5_1
1610
add rax, -9
1711
movabs rcx, -6148914691236517205
1812
mul rcx
@@ -26,3 +20,7 @@ bench_ref_from_bytes_dynamic_padding:
2620
cmove rax, rdi
2721
mov rdx, rsi
2822
ret
23+
.LBB5_1:
24+
xor eax, eax
25+
mov rdx, rsi
26+
ret
Lines changed: 35 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
Iterations: 100
2-
Instructions: 2500
3-
Total Cycles: 849
4-
Total uOps: 2800
2+
Instructions: 2400
3+
Total Cycles: 1406
4+
Total uOps: 2700
55

66
Dispatch Width: 4
7-
uOps Per Cycle: 3.30
8-
IPC: 2.94
9-
Block RThroughput: 7.0
7+
uOps Per Cycle: 1.92
8+
IPC: 1.71
9+
Block RThroughput: 6.8
1010

1111

1212
Instruction Info:
@@ -18,18 +18,14 @@ Instruction Info:
1818
[6]: HasSideEffects (U)
1919

2020
[1] [2] [3] [4] [5] [6] Instructions:
21-
1 1 0.33 test dil, 3
22-
1 1 1.00 je .LBB5_2
23-
1 0 0.25 xor eax, eax
24-
1 1 0.33 mov rdx, rsi
25-
1 1 1.00 U ret
2621
1 1 0.33 movabs rax, 9223372036854775804
2722
1 1 0.33 and rax, rsi
2823
1 1 0.33 cmp rax, 9
29-
1 1 1.00 jae .LBB5_4
30-
1 0 0.25 xor eax, eax
31-
1 1 0.33 mov rdx, rsi
32-
1 1 1.00 U ret
24+
1 1 0.50 setb cl
25+
1 1 0.33 test dil, 3
26+
1 1 0.50 setne dl
27+
1 1 0.33 or dl, cl
28+
1 1 1.00 jne .LBB5_1
3329
1 1 0.33 add rax, -9
3430
1 1 0.33 movabs rcx, -6148914691236517205
3531
2 4 1.00 mul rcx
@@ -43,6 +39,9 @@ Instruction Info:
4339
2 2 0.67 cmove rax, rdi
4440
1 1 0.33 mov rdx, rsi
4541
1 1 1.00 U ret
42+
1 0 0.25 xor eax, eax
43+
1 1 0.33 mov rdx, rsi
44+
1 1 1.00 U ret
4645

4746

4847
Resources:
@@ -58,32 +57,31 @@ Resources:
5857

5958
Resource pressure per iteration:
6059
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
61-
- - 8.33 8.32 - 8.35 - -
60+
- - 8.02 8.02 - 8.96 - -
6261

6362
Resource pressure by instruction:
6463
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
65-
- - 0.35 0.33 - 0.32 - - test dil, 3
66-
- - - - - 1.00 - - je .LBB5_2
67-
- - - - - - - - xor eax, eax
68-
- - 0.92 0.04 - 0.04 - - mov rdx, rsi
69-
- - - - - 1.00 - - ret
70-
- - 0.32 0.15 - 0.53 - - movabs rax, 9223372036854775804
71-
- - 0.03 0.06 - 0.91 - - and rax, rsi
72-
- - 0.05 0.93 - 0.02 - - cmp rax, 9
73-
- - - - - 1.00 - - jae .LBB5_4
64+
- - 0.02 0.03 - 0.95 - - movabs rax, 9223372036854775804
65+
- - 0.94 0.02 - 0.04 - - and rax, rsi
66+
- - 0.01 0.04 - 0.95 - - cmp rax, 9
67+
- - 0.97 - - 0.03 - - setb cl
68+
- - 0.98 - - 0.02 - - test dil, 3
69+
- - 0.04 - - 0.96 - - setne dl
70+
- - 0.01 0.03 - 0.96 - - or dl, cl
71+
- - - - - 1.00 - - jne .LBB5_1
72+
- - 0.04 0.95 - 0.01 - - add rax, -9
73+
- - 0.02 0.98 - - - - movabs rcx, -6148914691236517205
74+
- - 1.00 1.00 - - - - mul rcx
75+
- - 0.98 - - 0.02 - - shr rdx
76+
- - 0.01 0.99 - - - - lea rcx, [rdx + 2*rdx]
77+
- - 0.02 0.02 - 0.96 - - or rcx, 3
78+
- - 0.02 0.95 - 0.03 - - add rcx, 9
7479
- - - - - - - - xor eax, eax
75-
- - 0.93 0.04 - 0.03 - - mov rdx, rsi
80+
- - 0.95 0.04 - 0.01 - - cmp rsi, rcx
81+
- - 0.98 0.99 - 0.03 - - cmove rsi, rdx
82+
- - 0.98 0.99 - 0.03 - - cmove rax, rdi
83+
- - 0.01 0.04 - 0.95 - - mov rdx, rsi
7684
- - - - - 1.00 - - ret
77-
- - 0.37 0.33 - 0.30 - - add rax, -9
78-
- - 0.61 0.09 - 0.30 - - movabs rcx, -6148914691236517205
79-
- - 1.00 1.00 - - - - mul rcx
80-
- - 0.67 - - 0.33 - - shr rdx
81-
- - 0.33 0.67 - - - - lea rcx, [rdx + 2*rdx]
82-
- - 0.34 0.61 - 0.05 - - or rcx, 3
83-
- - 0.36 0.61 - 0.03 - - add rcx, 9
8485
- - - - - - - - xor eax, eax
85-
- - 0.04 0.63 - 0.33 - - cmp rsi, rcx
86-
- - 0.98 0.97 - 0.05 - - cmove rsi, rdx
87-
- - 0.98 0.94 - 0.08 - - cmove rax, rdi
88-
- - 0.05 0.92 - 0.03 - - mov rdx, rsi
86+
- - 0.04 0.95 - 0.01 - - mov rdx, rsi
8987
- - - - - 1.00 - - ret

benches/ref_from_bytes_dynamic_size.x86-64

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,20 @@ bench_ref_from_bytes_dynamic_size:
22
mov rdx, rsi
33
cmp rsi, 4
44
setb al
5-
or al, dil
6-
test al, 1
7-
je .LBB5_2
8-
xor eax, eax
9-
ret
10-
.LBB5_2:
5+
mov ecx, edi
6+
or cl, al
7+
test cl, 1
8+
jne .LBB5_1
119
lea rcx, [rdx - 4]
1210
mov rsi, rcx
13-
shr rsi
14-
and rcx, -2
15-
add rcx, 4
11+
and rsi, -2
12+
add rsi, 4
13+
shr rcx
1614
xor eax, eax
17-
cmp rdx, rcx
18-
cmove rdx, rsi
15+
cmp rdx, rsi
16+
cmove rdx, rcx
1917
cmove rax, rdi
2018
ret
19+
.LBB5_1:
20+
xor eax, eax
21+
ret
Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
Iterations: 100
2-
Instructions: 1800
3-
Total Cycles: 606
4-
Total uOps: 2000
2+
Instructions: 1900
3+
Total Cycles: 668
4+
Total uOps: 2100
55

66
Dispatch Width: 4
7-
uOps Per Cycle: 3.30
8-
IPC: 2.97
9-
Block RThroughput: 5.0
7+
uOps Per Cycle: 3.14
8+
IPC: 2.84
9+
Block RThroughput: 5.3
1010

1111

1212
Instruction Info:
@@ -21,21 +21,22 @@ Instruction Info:
2121
1 1 0.33 mov rdx, rsi
2222
1 1 0.33 cmp rsi, 4
2323
1 1 0.50 setb al
24-
1 1 0.33 or al, dil
25-
1 1 0.33 test al, 1
26-
1 1 1.00 je .LBB5_2
27-
1 0 0.25 xor eax, eax
28-
1 1 1.00 U ret
24+
1 1 0.33 mov ecx, edi
25+
1 1 0.33 or cl, al
26+
1 1 0.33 test cl, 1
27+
1 1 1.00 jne .LBB5_1
2928
1 1 0.50 lea rcx, [rdx - 4]
3029
1 1 0.33 mov rsi, rcx
31-
1 1 0.50 shr rsi
32-
1 1 0.33 and rcx, -2
33-
1 1 0.33 add rcx, 4
30+
1 1 0.33 and rsi, -2
31+
1 1 0.33 add rsi, 4
32+
1 1 0.50 shr rcx
3433
1 0 0.25 xor eax, eax
35-
1 1 0.33 cmp rdx, rcx
36-
2 2 0.67 cmove rdx, rsi
34+
1 1 0.33 cmp rdx, rsi
35+
2 2 0.67 cmove rdx, rcx
3736
2 2 0.67 cmove rax, rdi
3837
1 1 1.00 U ret
38+
1 0 0.25 xor eax, eax
39+
1 1 1.00 U ret
3940

4041

4142
Resources:
@@ -51,25 +52,26 @@ Resources:
5152

5253
Resource pressure per iteration:
5354
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
54-
- - 6.00 6.00 - 6.00 - -
55+
- - 6.19 6.40 - 6.41 - -
5556

5657
Resource pressure by instruction:
5758
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
58-
- - - 0.99 - 0.01 - - mov rdx, rsi
59-
- - 0.99 0.01 - - - - cmp rsi, 4
60-
- - 1.00 - - - - - setb al
61-
- - 0.99 0.01 - - - - or al, dil
62-
- - - 0.99 - 0.01 - - test al, 1
63-
- - - - - 1.00 - - je .LBB5_2
59+
- - 0.41 0.21 - 0.38 - - mov rdx, rsi
60+
- - - 0.21 - 0.79 - - cmp rsi, 4
61+
- - 0.39 - - 0.61 - - setb al
62+
- - 0.98 0.02 - - - - mov ecx, edi
63+
- - 0.02 0.38 - 0.60 - - or cl, al
64+
- - - 1.00 - - - - test cl, 1
65+
- - - - - 1.00 - - jne .LBB5_1
66+
- - 0.62 0.38 - - - - lea rcx, [rdx - 4]
67+
- - 0.19 0.79 - 0.02 - - mov rsi, rcx
68+
- - 0.20 0.60 - 0.20 - - and rsi, -2
69+
- - 0.20 0.78 - 0.02 - - add rsi, 4
70+
- - 0.61 - - 0.39 - - shr rcx
6471
- - - - - - - - xor eax, eax
72+
- - 0.58 0.41 - 0.01 - - cmp rdx, rsi
73+
- - 1.00 0.81 - 0.19 - - cmove rdx, rcx
74+
- - 0.99 0.81 - 0.20 - - cmove rax, rdi
6575
- - - - - 1.00 - - ret
66-
- - 1.00 - - - - - lea rcx, [rdx - 4]
67-
- - - 1.00 - - - - mov rsi, rcx
68-
- - 1.00 - - - - - shr rsi
69-
- - 1.00 - - - - - and rcx, -2
70-
- - - 1.00 - - - - add rcx, 4
7176
- - - - - - - - xor eax, eax
72-
- - - - - 1.00 - - cmp rdx, rcx
73-
- - 0.01 1.00 - 0.99 - - cmove rdx, rsi
74-
- - 0.01 1.00 - 0.99 - - cmove rax, rdi
7577
- - - - - 1.00 - - ret
Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
bench_ref_from_bytes_static_size:
2+
mov ecx, edi
3+
and ecx, 1
4+
xor rsi, 6
25
xor eax, eax
3-
cmp rsi, 6
4-
mov rcx, rdi
5-
cmovne rcx, rax
6-
cmovb rcx, rax
7-
test dil, 1
8-
cmove rax, rcx
6+
or rsi, rcx
7+
cmove rax, rdi
98
ret
Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
Iterations: 100
2-
Instructions: 800
3-
Total Cycles: 341
4-
Total uOps: 1100
2+
Instructions: 700
3+
Total Cycles: 240
4+
Total uOps: 800
55

66
Dispatch Width: 4
7-
uOps Per Cycle: 3.23
8-
IPC: 2.35
9-
Block RThroughput: 2.8
7+
uOps Per Cycle: 3.33
8+
IPC: 2.92
9+
Block RThroughput: 2.0
1010

1111

1212
Instruction Info:
@@ -18,13 +18,12 @@ Instruction Info:
1818
[6]: HasSideEffects (U)
1919

2020
[1] [2] [3] [4] [5] [6] Instructions:
21+
1 1 0.33 mov ecx, edi
22+
1 1 0.33 and ecx, 1
23+
1 1 0.33 xor rsi, 6
2124
1 0 0.25 xor eax, eax
22-
1 1 0.33 cmp rsi, 6
23-
1 1 0.33 mov rcx, rdi
24-
2 2 0.67 cmovne rcx, rax
25-
2 2 0.67 cmovb rcx, rax
26-
1 1 0.33 test dil, 1
27-
2 2 0.67 cmove rax, rcx
25+
1 1 0.33 or rsi, rcx
26+
2 2 0.67 cmove rax, rdi
2827
1 1 1.00 U ret
2928

3029

@@ -41,15 +40,14 @@ Resources:
4140

4241
Resource pressure per iteration:
4342
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
44-
- - 3.32 3.34 - 3.34 - -
43+
- - 2.33 2.33 - 2.34 - -
4544

4645
Resource pressure by instruction:
4746
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
47+
- - 0.01 0.98 - 0.01 - - mov ecx, edi
48+
- - 0.02 0.66 - 0.32 - - and ecx, 1
49+
- - 0.33 0.66 - 0.01 - - xor rsi, 6
4850
- - - - - - - - xor eax, eax
49-
- - - 0.17 - 0.83 - - cmp rsi, 6
50-
- - 0.17 0.18 - 0.65 - - mov rcx, rdi
51-
- - 1.00 0.98 - 0.02 - - cmovne rcx, rax
52-
- - 1.00 1.00 - - - - cmovb rcx, rax
53-
- - 0.16 0.02 - 0.82 - - test dil, 1
54-
- - 0.99 0.99 - 0.02 - - cmove rax, rcx
51+
- - 0.98 0.02 - - - - or rsi, rcx
52+
- - 0.99 0.01 - 1.00 - - cmove rax, rdi
5553
- - - - - 1.00 - - ret
Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
11
bench_ref_from_bytes_with_elems_dynamic_padding:
2-
movabs rax, 3074457345618258598
3-
cmp rdx, rax
4-
seta cl
2+
movabs rcx, 3074457345618258598
3+
cmp rdx, rcx
4+
ja .LBB5_3
55
mov rax, rdi
66
test al, 3
7-
setne dil
8-
or dil, cl
9-
jne .LBB5_2
7+
jne .LBB5_3
108
lea rcx, [rdx + 2*rdx]
119
or rcx, 3
1210
add rcx, 9
1311
cmp rsi, rcx
14-
je .LBB5_3
15-
.LBB5_2:
12+
jne .LBB5_3
13+
ret
14+
.LBB5_3:
1615
xor eax, eax
1716
mov rdx, rsi
18-
.LBB5_3:
1917
ret

0 commit comments

Comments
 (0)