freedreno/ir3: Add new ir3 pass to fold out fp16 conversions
[mesa.git] / src / freedreno / ir3 / ir3_cf.c
1 /*
2 * Copyright (C) 2019 Google.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ir3.h"
25
26 static bool
27 is_fp16_conv(struct ir3_instruction *instr)
28 {
29 if (instr->opc == OPC_MOV &&
30 instr->cat1.src_type == TYPE_F32 &&
31 instr->cat1.dst_type == TYPE_F16)
32 return true;
33
34 return false;
35 }
36
37 static bool
38 all_uses_fp16_conv(struct ir3 *ir, struct ir3_instruction *conv_src)
39 {
40 foreach_block (block, &ir->block_list) {
41 foreach_instr (instr, &block->instr_list) {
42 struct ir3_instruction *src;
43 foreach_ssa_src (src, instr) {
44 if (src == conv_src && !is_fp16_conv(instr))
45 return false;
46 }
47 }
48 }
49
50 return true;
51 }
52
53 static void
54 rewrite_uses(struct ir3 *ir, struct ir3_instruction *conv,
55 struct ir3_instruction *replace)
56 {
57 foreach_block (block, &ir->block_list) {
58 foreach_instr (instr, &block->instr_list) {
59 struct ir3_instruction *src;
60 foreach_ssa_src_n (src, n, instr) {
61 if (src == conv)
62 instr->regs[n]->instr = replace;
63 }
64 }
65 }
66 }
67
68 static void
69 try_conversion_folding(struct ir3 *ir, struct ir3_instruction *conv)
70 {
71 struct ir3_instruction *src;
72
73 if (!is_fp16_conv(conv))
74 return;
75
76 src = ssa(conv->regs[1]);
77 if (!is_alu(src))
78 return;
79
80 switch (src->opc) {
81 case OPC_SEL_B32:
82 case OPC_MAX_F:
83 case OPC_MIN_F:
84 case OPC_ABSNEG_F:
85 return;
86 default:
87 break;
88 }
89
90 if (!all_uses_fp16_conv(ir, src))
91 return;
92
93 if (src->opc == OPC_MOV) {
94 if (src->cat1.dst_type == src->cat1.src_type) {
95 /* If we're folding a conversion into a bitwise move, we need to
96 * change the dst type to F32 to get the right behavior, since we
97 * could be moving a float with a u32.u32 move.
98 */
99 src->cat1.dst_type = TYPE_F16;
100 src->cat1.src_type = TYPE_F32;
101 } else {
102 /* Otherwise, for typechanging movs, we can just change the dst
103 * type to F16 to collaps the two conversions. For example
104 * cov.s32f32 follwed by cov.f32f16 becomes cov.s32f16.
105 */
106 src->cat1.dst_type = TYPE_F16;
107 }
108 }
109
110 src->regs[0]->flags |= IR3_REG_HALF;
111
112 rewrite_uses(ir, conv, src);
113 }
114
115 void
116 ir3_cf(struct ir3 *ir)
117 {
118 foreach_block_safe (block, &ir->block_list) {
119 foreach_instr_safe (instr, &block->instr_list) {
120 try_conversion_folding(ir, instr);
121 }
122 }
123 }