b90357061318336bd4372a09a8832f47f5dcc813
[mesa.git] / src / freedreno / ir3 / ir3_cf.c
1 /*
2 * Copyright (C) 2019 Google.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ir3.h"
25
26 static bool
27 is_fp16_conv(struct ir3_instruction *instr)
28 {
29 if (instr->opc != OPC_MOV)
30 return false;
31
32 struct ir3_register *dst = instr->regs[0];
33 struct ir3_register *src = instr->regs[1];
34
35 /* disallow conversions that cannot be folded into
36 * alu instructions:
37 */
38 if (dst->flags & (IR3_REG_EVEN | IR3_REG_POS_INF))
39 return false;
40
41 if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
42 return false;
43 if (src->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
44 return false;
45
46 if (instr->cat1.src_type == TYPE_F32 &&
47 instr->cat1.dst_type == TYPE_F16)
48 return true;
49
50 if (instr->cat1.src_type == TYPE_F16 &&
51 instr->cat1.dst_type == TYPE_F32)
52 return true;
53
54 return false;
55 }
56
57 static bool
58 all_uses_fp16_conv(struct ir3 *ir, struct ir3_instruction *conv_src)
59 {
60 foreach_block (block, &ir->block_list) {
61 foreach_instr (instr, &block->instr_list) {
62 struct ir3_instruction *src;
63 foreach_ssa_src (src, instr) {
64 if (src == conv_src && !is_fp16_conv(instr))
65 return false;
66 }
67 }
68 }
69
70 return true;
71 }
72
73 static void
74 rewrite_uses(struct ir3 *ir, struct ir3_instruction *conv,
75 struct ir3_instruction *replace)
76 {
77 foreach_block (block, &ir->block_list) {
78 foreach_instr (instr, &block->instr_list) {
79 struct ir3_instruction *src;
80 foreach_ssa_src_n (src, n, instr) {
81 if (src == conv)
82 instr->regs[n]->instr = replace;
83 }
84 }
85 }
86 }
87
88 static void
89 try_conversion_folding(struct ir3 *ir, struct ir3_instruction *conv)
90 {
91 struct ir3_instruction *src;
92
93 if (!is_fp16_conv(conv))
94 return;
95
96 src = ssa(conv->regs[1]);
97 if (!is_alu(src))
98 return;
99
100 /* avoid folding f2f32(f2f16) together, in cases where this is legal to
101 * do (glsl) nir should have handled that for us already:
102 */
103 if (is_fp16_conv(src))
104 return;
105
106 switch (src->opc) {
107 case OPC_SEL_B32:
108 case OPC_SEL_B16:
109 case OPC_MAX_F:
110 case OPC_MIN_F:
111 case OPC_SIGN_F:
112 case OPC_ABSNEG_F:
113 return;
114 default:
115 break;
116 }
117
118 if (!all_uses_fp16_conv(ir, src))
119 return;
120
121 if (src->opc == OPC_MOV) {
122 if (src->cat1.dst_type == src->cat1.src_type) {
123 /* If we're folding a conversion into a bitwise move, we need to
124 * change the dst type to F32 to get the right behavior, since we
125 * could be moving a float with a u32.u32 move.
126 */
127 src->cat1.dst_type = conv->cat1.dst_type;
128 src->cat1.src_type = conv->cat1.src_type;
129 } else {
130 /* Otherwise, for typechanging movs, we can just change the dst
131 * type to F16 to collaps the two conversions. For example
132 * cov.s32f32 follwed by cov.f32f16 becomes cov.s32f16.
133 */
134 src->cat1.dst_type = conv->cat1.dst_type;
135 }
136 }
137
138 if (conv->regs[0]->flags & IR3_REG_HALF) {
139 src->regs[0]->flags |= IR3_REG_HALF;
140 } else {
141 src->regs[0]->flags &= ~IR3_REG_HALF;
142 }
143
144 rewrite_uses(ir, conv, src);
145 }
146
147 void
148 ir3_cf(struct ir3 *ir)
149 {
150 foreach_block_safe (block, &ir->block_list) {
151 foreach_instr_safe (instr, &block->instr_list) {
152 try_conversion_folding(ir, instr);
153 }
154 }
155 }