2 * Copyright (C) 2020 Collabora Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
29 /* Finds the clause type required or return none */
32 bi_is_fragz(bi_instruction
*ins
)
34 if (!(ins
->src
[0] & BIR_INDEX_CONSTANT
))
37 return (ins
->constant
.u32
== BIFROST_FRAGZ
);
40 static enum bifrost_clause_type
41 bi_clause_type_for_ins(bi_instruction
*ins
)
43 unsigned T
= ins
->type
;
45 /* Only high latency ops impose clause types */
46 if (!(bi_class_props
[T
] & BI_SCHED_HI_LATENCY
))
47 return BIFROST_CLAUSE_NONE
;
52 return BIFROST_CLAUSE_NONE
;
56 return BIFROST_CLAUSE_FRAGZ
;
58 return BIFROST_CLAUSE_LOAD_VARY
;
62 case BI_LOAD_VAR_ADDRESS
:
63 return BIFROST_CLAUSE_UBO
;
66 return BIFROST_CLAUSE_TEX
;
69 return BIFROST_CLAUSE_SSBO_LOAD
;
73 return BIFROST_CLAUSE_SSBO_STORE
;
76 return BIFROST_CLAUSE_BLEND
;
79 return BIFROST_CLAUSE_ATEST
;
82 unreachable("Invalid high-latency class");
86 /* There is an encoding restriction against FMA fp16 add/min/max
87 * having both sources with abs(..) with a duplicated source. This is
88 * due to the packing being order-sensitive, so the ports must end up distinct
89 * to handle both having abs(..). The swizzle doesn't matter here. Note
90 * BIR_INDEX_REGISTER generally should not be used pre-schedule (TODO: enforce
95 bi_ambiguous_abs(bi_instruction
*ins
)
97 bool classy
= bi_class_props
[ins
->type
] & BI_NO_ABS_ABS_FP16_FMA
;
98 bool typey
= ins
->dest_type
== nir_type_float16
;
99 bool absy
= ins
->src_abs
[0] && ins
->src_abs
[1];
101 return classy
&& typey
&& absy
;
104 /* New Bifrost (which?) don't seem to have ICMP on FMA */
106 bi_icmp(bi_instruction
*ins
)
108 bool ic
= nir_alu_type_get_base_type(ins
->src_types
[0]) != nir_type_float
;
109 return ic
&& (ins
->type
== BI_CMP
);
112 /* No 8/16-bit IADD/ISUB on FMA */
114 bi_imath_small(bi_instruction
*ins
)
116 bool sz
= nir_alu_type_get_type_size(ins
->src_types
[0]) < 32;
117 return sz
&& (ins
->type
== BI_IMATH
);
120 /* Lowers FMOV to ADD #0, since FMOV doesn't exist on the h/w and this is the
121 * latest time it's sane to lower (it's useful to distinguish before, but we'll
122 * need this handle during scheduling to ensure the ports get modeled
123 * correctly with respect to the new zero source) */
126 bi_lower_fmov(bi_instruction
*ins
)
128 if (ins
->type
!= BI_FMOV
)
132 ins
->src
[1] = BIR_INDEX_ZERO
;
133 ins
->src_types
[1] = ins
->src_types
[0];
136 /* To work out the back-to-back flag, we need to detect branches and
137 * "fallthrough" branches, implied in the last clause of a block that falls
138 * through to another block with *multiple predecessors*. */
141 bi_back_to_back(bi_block
*block
)
143 /* Last block of a program */
144 if (!block
->base
.successors
[0]) {
145 assert(!block
->base
.successors
[1]);
149 /* Multiple successors? We're branching */
150 if (block
->base
.successors
[1])
153 struct pan_block
*succ
= block
->base
.successors
[0];
154 assert(succ
->predecessors
);
155 unsigned count
= succ
->predecessors
->entries
;
157 /* Back to back only if the successor has only a single predecessor */
161 /* Eventually, we'll need a proper scheduling, grouping instructions
162 * into clauses and ordering/assigning grouped instructions to the
163 * appropriate FMA/ADD slots. Right now we do the dumbest possible
164 * thing just to have the scheduler stubbed out so we can focus on
168 bi_schedule(bi_context
*ctx
)
171 unsigned last_id
= 0;
172 bool is_first
= true;
174 bi_foreach_block(ctx
, block
) {
175 bi_block
*bblock
= (bi_block
*) block
;
177 list_inithead(&bblock
->clauses
);
179 bi_foreach_instr_in_block(bblock
, ins
) {
180 /* Convenient time to lower */
183 unsigned props
= bi_class_props
[ins
->type
];
185 bi_clause
*u
= rzalloc(ctx
, bi_clause
);
188 /* Check for scheduling restrictions */
190 bool can_fma
= props
& BI_SCHED_FMA
;
191 bool can_add
= props
& BI_SCHED_ADD
;
193 can_fma
&= !bi_ambiguous_abs(ins
);
194 can_fma
&= !bi_icmp(ins
);
195 can_fma
&= !bi_imath_small(ins
);
197 assert(can_fma
|| can_add
);
200 u
->bundles
[0].fma
= ins
;
202 u
->bundles
[0].add
= ins
;
204 u
->scoreboard_id
= ids
++;
209 /* Rule: first instructions cannot have write barriers */
210 u
->dependencies
|= (1 << last_id
);
211 u
->data_register_write_barrier
= true;
214 if (ins
->type
== BI_ATEST
)
215 u
->dependencies
|= (1 << 6);
217 if (ins
->type
== BI_BLEND
)
218 u
->dependencies
|= (1 << 6) | (1 << 7);
221 last_id
= u
->scoreboard_id
;
223 /* Let's be optimistic, we'll fix up later */
224 u
->back_to_back
= true;
226 u
->constant_count
= 1;
227 u
->constants
[0] = ins
->constant
.u64
;
229 /* No indirect jumps yet */
230 if (ins
->type
== BI_BRANCH
) {
231 u
->branch_constant
= true;
232 u
->branch_conditional
=
233 (ins
->cond
!= BI_COND_ALWAYS
);
236 u
->clause_type
= bi_clause_type_for_ins(ins
);
237 u
->block
= (struct bi_block
*) block
;
239 list_addtail(&u
->link
, &bblock
->clauses
);
242 /* Back-to-back bit affects only the last clause of a block,
243 * the rest are implicitly true */
244 bi_clause
*last_clause
= list_last_entry(&bblock
->clauses
, bi_clause
, link
);
247 last_clause
->back_to_back
= bi_back_to_back(bblock
);
249 bblock
->scheduled
= true;