2 * Copyright (C) 2019 Collabora, Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 /* Midgard has some accelerated support for perspective projection on the
25 * load/store pipes. So the first perspective projection pass looks for
26 * lowered/open-coded perspective projection of the form "fmul (A.xyz,
27 * frcp(A.w))" or "fmul (A.xy, frcp(A.z))" and rewrite with a native
28 * perspective division opcode (on the load/store pipe).
30 * Caveats apply: the frcp should be used only once to make this optimization
37 midgard_opt_combine_projection(compiler_context
*ctx
, midgard_block
*block
)
39 bool progress
= false;
41 mir_foreach_instr_in_block_safe(block
, ins
) {
42 /* First search for fmul */
43 if (ins
->type
!= TAG_ALU_4
) continue;
44 if (ins
->alu
.op
!= midgard_alu_op_fmul
) continue;
48 /* Check the swizzles */
50 midgard_vector_alu_src src1
=
51 vector_alu_from_unsigned(ins
->alu
.src1
);
53 midgard_vector_alu_src src2
=
54 vector_alu_from_unsigned(ins
->alu
.src2
);
56 if (!mir_is_simple_swizzle(src1
.swizzle
, ins
->mask
)) continue;
57 if (src2
.swizzle
!= SWIZZLE_XXXX
) continue;
59 /* Awesome, we're the right form. Now check where src2 is from */
60 unsigned frcp
= ins
->ssa_args
.src1
;
61 unsigned to
= ins
->ssa_args
.dest
;
63 if (frcp
>= ctx
->func
->impl
->ssa_alloc
) continue;
64 if (to
>= ctx
->func
->impl
->ssa_alloc
) continue;
66 bool frcp_found
= false;
67 unsigned frcp_component
= 0;
68 unsigned frcp_from
= 0;
70 mir_foreach_instr_in_block_safe(block
, sub
) {
71 if (sub
->ssa_args
.dest
!= frcp
) continue;
73 midgard_vector_alu_src s
=
74 vector_alu_from_unsigned(sub
->alu
.src1
);
76 frcp_component
= s
.swizzle
& 3;
77 frcp_from
= sub
->ssa_args
.src0
;
80 (sub
->type
== TAG_ALU_4
) &&
81 (sub
->alu
.op
== midgard_alu_op_frcp
);
85 if (!frcp_found
) continue;
86 if (frcp_component
!= COMPONENT_W
&& frcp_component
!= COMPONENT_Z
) continue;
87 if (!mir_single_use(ctx
, frcp
)) continue;
89 /* Nice, we got the form spot on. Let's convert! */
91 midgard_instruction accel
= {
92 .type
= TAG_LOAD_STORE_4
,
100 .op
= frcp_component
== COMPONENT_W
?
101 midgard_op_ldst_perspective_division_w
:
102 midgard_op_ldst_perspective_division_z
,
103 .swizzle
= SWIZZLE_XYZW
,
108 mir_insert_instruction_before(ins
, accel
);
109 mir_remove_instruction(ins
);