panfrost: Add XML for attribute buffers
[mesa.git] / src / panfrost / bifrost / bi_schedule.c
1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "compiler.h"
28
29 /* Finds the clause type required or return none */
30
31 static bool
32 bi_is_fragz(bi_instruction *ins)
33 {
34 if (!(ins->src[0] & BIR_INDEX_CONSTANT))
35 return false;
36
37 return (ins->constant.u32 == BIFROST_FRAGZ);
38 }
39
40 static enum bifrost_clause_type
41 bi_clause_type_for_ins(bi_instruction *ins)
42 {
43 unsigned T = ins->type;
44
45 /* Only high latency ops impose clause types */
46 if (!(bi_class_props[T] & BI_SCHED_HI_LATENCY))
47 return BIFROST_CLAUSE_NONE;
48
49 switch (T) {
50 case BI_BRANCH:
51 case BI_DISCARD:
52 return BIFROST_CLAUSE_NONE;
53
54 case BI_LOAD_VAR:
55 if (bi_is_fragz(ins))
56 return BIFROST_CLAUSE_FRAGZ;
57
58 return BIFROST_CLAUSE_LOAD_VARY;
59
60 case BI_LOAD_UNIFORM:
61 case BI_LOAD_ATTR:
62 case BI_LOAD_VAR_ADDRESS:
63 return BIFROST_CLAUSE_UBO;
64
65 case BI_TEX:
66 return BIFROST_CLAUSE_TEX;
67
68 case BI_LOAD:
69 return BIFROST_CLAUSE_SSBO_LOAD;
70
71 case BI_STORE:
72 case BI_STORE_VAR:
73 return BIFROST_CLAUSE_SSBO_STORE;
74
75 case BI_BLEND:
76 return BIFROST_CLAUSE_BLEND;
77
78 case BI_ATEST:
79 return BIFROST_CLAUSE_ATEST;
80
81 default:
82 unreachable("Invalid high-latency class");
83 }
84 }
85
86 /* There is an encoding restriction against FMA fp16 add/min/max
87 * having both sources with abs(..) with a duplicated source. This is
88 * due to the packing being order-sensitive, so the ports must end up distinct
89 * to handle both having abs(..). The swizzle doesn't matter here. Note
90 * BIR_INDEX_REGISTER generally should not be used pre-schedule (TODO: enforce
91 * this).
92 */
93
94 static bool
95 bi_ambiguous_abs(bi_instruction *ins)
96 {
97 bool classy = bi_class_props[ins->type] & BI_NO_ABS_ABS_FP16_FMA;
98 bool typey = ins->dest_type == nir_type_float16;
99 bool absy = ins->src_abs[0] && ins->src_abs[1];
100
101 return classy && typey && absy;
102 }
103
104 /* New Bifrost (which?) don't seem to have ICMP on FMA */
105 static bool
106 bi_icmp(bi_instruction *ins)
107 {
108 bool ic = nir_alu_type_get_base_type(ins->src_types[0]) != nir_type_float;
109 return ic && (ins->type == BI_CMP);
110 }
111
112 /* No 8/16-bit IADD/ISUB on FMA */
113 static bool
114 bi_imath_small(bi_instruction *ins)
115 {
116 bool sz = nir_alu_type_get_type_size(ins->src_types[0]) < 32;
117 return sz && (ins->type == BI_IMATH);
118 }
119
120 /* Lowers FMOV to ADD #0, since FMOV doesn't exist on the h/w and this is the
121 * latest time it's sane to lower (it's useful to distinguish before, but we'll
122 * need this handle during scheduling to ensure the ports get modeled
123 * correctly with respect to the new zero source) */
124
125 static void
126 bi_lower_fmov(bi_instruction *ins)
127 {
128 if (ins->type != BI_FMOV)
129 return;
130
131 ins->type = BI_ADD;
132 ins->src[1] = BIR_INDEX_ZERO;
133 ins->src_types[1] = ins->src_types[0];
134 }
135
136 /* To work out the back-to-back flag, we need to detect branches and
137 * "fallthrough" branches, implied in the last clause of a block that falls
138 * through to another block with *multiple predecessors*. */
139
140 static bool
141 bi_back_to_back(bi_block *block)
142 {
143 /* Last block of a program */
144 if (!block->base.successors[0]) {
145 assert(!block->base.successors[1]);
146 return false;
147 }
148
149 /* Multiple successors? We're branching */
150 if (block->base.successors[1])
151 return false;
152
153 struct pan_block *succ = block->base.successors[0];
154 assert(succ->predecessors);
155 unsigned count = succ->predecessors->entries;
156
157 /* Back to back only if the successor has only a single predecessor */
158 return (count == 1);
159 }
160
161 /* Eventually, we'll need a proper scheduling, grouping instructions
162 * into clauses and ordering/assigning grouped instructions to the
163 * appropriate FMA/ADD slots. Right now we do the dumbest possible
164 * thing just to have the scheduler stubbed out so we can focus on
165 * codegen */
166
167 void
168 bi_schedule(bi_context *ctx)
169 {
170 unsigned ids = 0;
171 unsigned last_id = 0;
172 bool is_first = true;
173
174 bi_foreach_block(ctx, block) {
175 bi_block *bblock = (bi_block *) block;
176
177 list_inithead(&bblock->clauses);
178
179 bi_foreach_instr_in_block(bblock, ins) {
180 /* Convenient time to lower */
181 bi_lower_fmov(ins);
182
183 unsigned props = bi_class_props[ins->type];
184
185 bi_clause *u = rzalloc(ctx, bi_clause);
186 u->bundle_count = 1;
187
188 /* Check for scheduling restrictions */
189
190 bool can_fma = props & BI_SCHED_FMA;
191 bool can_add = props & BI_SCHED_ADD;
192
193 can_fma &= !bi_ambiguous_abs(ins);
194 can_fma &= !bi_icmp(ins);
195 can_fma &= !bi_imath_small(ins);
196
197 assert(can_fma || can_add);
198
199 if (can_fma)
200 u->bundles[0].fma = ins;
201 else
202 u->bundles[0].add = ins;
203
204 u->scoreboard_id = ids++;
205
206 if (is_first)
207 is_first = false;
208 else {
209 /* Rule: first instructions cannot have write barriers */
210 u->dependencies |= (1 << last_id);
211 u->data_register_write_barrier = true;
212 }
213
214 if (ins->type == BI_ATEST)
215 u->dependencies |= (1 << 6);
216
217 if (ins->type == BI_BLEND)
218 u->dependencies |= (1 << 6) | (1 << 7);
219
220 ids = ids & 1;
221 last_id = u->scoreboard_id;
222
223 /* Let's be optimistic, we'll fix up later */
224 u->back_to_back = true;
225
226 u->constant_count = 1;
227 u->constants[0] = ins->constant.u64;
228
229 /* No indirect jumps yet */
230 if (ins->type == BI_BRANCH) {
231 u->branch_constant = true;
232 u->branch_conditional =
233 (ins->cond != BI_COND_ALWAYS);
234 }
235
236 u->clause_type = bi_clause_type_for_ins(ins);
237 u->block = (struct bi_block *) block;
238
239 list_addtail(&u->link, &bblock->clauses);
240 }
241
242 /* Back-to-back bit affects only the last clause of a block,
243 * the rest are implicitly true */
244 bi_clause *last_clause = list_last_entry(&bblock->clauses, bi_clause, link);
245
246 if (last_clause)
247 last_clause->back_to_back = bi_back_to_back(bblock);
248
249 bblock->scheduled = true;
250 }
251 }