2 * Copyright (c) 2019 Zodiac Inflight Innovations
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Jonathan Marek <jonathan@marek.ca>
27 #include "etnaviv_asm.h"
28 #include "etnaviv_context.h"
30 #include "compiler/nir/nir.h"
31 #include "compiler/nir/nir_builder.h"
32 #include "compiler/nir/nir_worklist.h"
33 #include "util/register_allocate.h"
36 unsigned max_temps
; /* max # of vec4 registers */
37 unsigned max_consts
; /* max # of vec4 consts */
38 unsigned id_reg
; /* register with vertex/instance id */
39 bool single_const_src
: 1; /* limited to 1 vec4 const src */
40 bool etna_new_transcendentals
: 1;
45 #define ALU_SWIZ(s) INST_SWIZ((s)->swizzle[0], (s)->swizzle[1], (s)->swizzle[2], (s)->swizzle[3])
46 #define SRC_DISABLE ((hw_src){})
47 #define SRC_CONST(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_UNIFORM_0, .reg=idx, .swiz=s})
48 #define SRC_REG(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_TEMP, .reg=idx, .swiz=s})
50 #define option(name) (state->options->name)
51 #define emit(type, args...) etna_emit_##type(state->options->user, args)
53 typedef struct etna_inst_dst hw_dst
;
54 typedef struct etna_inst_src hw_src
;
62 const struct emit_options
*options
;
66 nir_function_impl
*impl
;
76 src_swizzle(hw_src src
, unsigned swizzle
)
78 src
.swiz
= inst_swiz_compose(src
.swiz
, swizzle
);
82 static inline bool is_sysval(nir_instr
*instr
)
84 if (instr
->type
!= nir_instr_type_intrinsic
)
87 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
88 return intr
->intrinsic
== nir_intrinsic_load_front_face
||
89 intr
->intrinsic
== nir_intrinsic_load_frag_coord
;
92 /* constants are represented as 64-bit ints
93 * 32-bit for the value and 32-bit for the type (imm, uniform, etc)
96 #define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)}
97 #define CONST(x) CONST_VAL(ETNA_IMMEDIATE_CONSTANT, x)
98 #define UNIFORM(x) CONST_VAL(ETNA_IMMEDIATE_UNIFORM, x)
99 #define UNIFORM_BASE(x) CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR, x)
100 #define TEXSCALE(x, i) CONST_VAL(ETNA_IMMEDIATE_TEXRECT_SCALE_X + (i), x)
103 const_add(uint64_t *c
, uint64_t value
)
105 for (unsigned i
= 0; i
< 4; i
++) {
106 if (c
[i
] == value
|| !c
[i
]) {
115 const_src(struct state
*state
, nir_const_value
*value
, unsigned num_components
)
119 for (i
= 0; swiz
< 0; i
++) {
120 uint64_t *a
= &option(consts
)[i
*4];
122 memcpy(save
, a
, sizeof(save
));
124 for (unsigned j
= 0; j
< num_components
; j
++) {
125 int c
= const_add(a
, value
[j
].u64
);
127 memcpy(a
, save
, sizeof(save
));
135 assert(i
<= option(max_consts
));
136 state
->const_count
= MAX2(state
->const_count
, i
);
138 return SRC_CONST(i
- 1, swiz
);
148 /* Swizzles and write masks can be used to layer virtual non-interfering
149 * registers on top of the real VEC4 registers. For example, the virtual
150 * VEC3_XYZ register and the virtual SCALAR_W register that use the same
151 * physical VEC4 base register do not interfere.
154 REG_CLASS_VIRT_SCALAR
,
158 /* special vec2 class for fast transcendentals, limited to XY or ZW */
159 REG_CLASS_VIRT_VEC2T
,
160 /* special classes for LOAD - contiguous components */
161 REG_CLASS_VIRT_VEC2C
,
162 REG_CLASS_VIRT_VEC3C
,
168 REG_TYPE_VIRT_VEC3_XYZ
,
169 REG_TYPE_VIRT_VEC3_XYW
,
170 REG_TYPE_VIRT_VEC3_XZW
,
171 REG_TYPE_VIRT_VEC3_YZW
,
172 REG_TYPE_VIRT_VEC2_XY
,
173 REG_TYPE_VIRT_VEC2_XZ
,
174 REG_TYPE_VIRT_VEC2_XW
,
175 REG_TYPE_VIRT_VEC2_YZ
,
176 REG_TYPE_VIRT_VEC2_YW
,
177 REG_TYPE_VIRT_VEC2_ZW
,
178 REG_TYPE_VIRT_SCALAR_X
,
179 REG_TYPE_VIRT_SCALAR_Y
,
180 REG_TYPE_VIRT_SCALAR_Z
,
181 REG_TYPE_VIRT_SCALAR_W
,
182 REG_TYPE_VIRT_VEC2T_XY
,
183 REG_TYPE_VIRT_VEC2T_ZW
,
184 REG_TYPE_VIRT_VEC2C_XY
,
185 REG_TYPE_VIRT_VEC2C_YZ
,
186 REG_TYPE_VIRT_VEC2C_ZW
,
187 REG_TYPE_VIRT_VEC3C_XYZ
,
188 REG_TYPE_VIRT_VEC3C_YZW
,
192 /* writemask when used as dest */
194 reg_writemask
[NUM_REG_TYPES
] = {
195 [REG_TYPE_VEC4
] = 0xf,
196 [REG_TYPE_VIRT_SCALAR_X
] = 0x1,
197 [REG_TYPE_VIRT_SCALAR_Y
] = 0x2,
198 [REG_TYPE_VIRT_VEC2_XY
] = 0x3,
199 [REG_TYPE_VIRT_VEC2T_XY
] = 0x3,
200 [REG_TYPE_VIRT_VEC2C_XY
] = 0x3,
201 [REG_TYPE_VIRT_SCALAR_Z
] = 0x4,
202 [REG_TYPE_VIRT_VEC2_XZ
] = 0x5,
203 [REG_TYPE_VIRT_VEC2_YZ
] = 0x6,
204 [REG_TYPE_VIRT_VEC2C_YZ
] = 0x6,
205 [REG_TYPE_VIRT_VEC3_XYZ
] = 0x7,
206 [REG_TYPE_VIRT_VEC3C_XYZ
] = 0x7,
207 [REG_TYPE_VIRT_SCALAR_W
] = 0x8,
208 [REG_TYPE_VIRT_VEC2_XW
] = 0x9,
209 [REG_TYPE_VIRT_VEC2_YW
] = 0xa,
210 [REG_TYPE_VIRT_VEC3_XYW
] = 0xb,
211 [REG_TYPE_VIRT_VEC2_ZW
] = 0xc,
212 [REG_TYPE_VIRT_VEC2T_ZW
] = 0xc,
213 [REG_TYPE_VIRT_VEC2C_ZW
] = 0xc,
214 [REG_TYPE_VIRT_VEC3_XZW
] = 0xd,
215 [REG_TYPE_VIRT_VEC3_YZW
] = 0xe,
216 [REG_TYPE_VIRT_VEC3C_YZW
] = 0xe,
219 /* how to swizzle when used as a src */
221 reg_swiz
[NUM_REG_TYPES
] = {
222 [REG_TYPE_VEC4
] = INST_SWIZ_IDENTITY
,
223 [REG_TYPE_VIRT_SCALAR_X
] = INST_SWIZ_IDENTITY
,
224 [REG_TYPE_VIRT_SCALAR_Y
] = SWIZZLE(Y
, Y
, Y
, Y
),
225 [REG_TYPE_VIRT_VEC2_XY
] = INST_SWIZ_IDENTITY
,
226 [REG_TYPE_VIRT_VEC2T_XY
] = INST_SWIZ_IDENTITY
,
227 [REG_TYPE_VIRT_VEC2C_XY
] = INST_SWIZ_IDENTITY
,
228 [REG_TYPE_VIRT_SCALAR_Z
] = SWIZZLE(Z
, Z
, Z
, Z
),
229 [REG_TYPE_VIRT_VEC2_XZ
] = SWIZZLE(X
, Z
, X
, Z
),
230 [REG_TYPE_VIRT_VEC2_YZ
] = SWIZZLE(Y
, Z
, Y
, Z
),
231 [REG_TYPE_VIRT_VEC2C_YZ
] = SWIZZLE(Y
, Z
, Y
, Z
),
232 [REG_TYPE_VIRT_VEC3_XYZ
] = INST_SWIZ_IDENTITY
,
233 [REG_TYPE_VIRT_VEC3C_XYZ
] = INST_SWIZ_IDENTITY
,
234 [REG_TYPE_VIRT_SCALAR_W
] = SWIZZLE(W
, W
, W
, W
),
235 [REG_TYPE_VIRT_VEC2_XW
] = SWIZZLE(X
, W
, X
, W
),
236 [REG_TYPE_VIRT_VEC2_YW
] = SWIZZLE(Y
, W
, Y
, W
),
237 [REG_TYPE_VIRT_VEC3_XYW
] = SWIZZLE(X
, Y
, W
, X
),
238 [REG_TYPE_VIRT_VEC2_ZW
] = SWIZZLE(Z
, W
, Z
, W
),
239 [REG_TYPE_VIRT_VEC2T_ZW
] = SWIZZLE(Z
, W
, Z
, W
),
240 [REG_TYPE_VIRT_VEC2C_ZW
] = SWIZZLE(Z
, W
, Z
, W
),
241 [REG_TYPE_VIRT_VEC3_XZW
] = SWIZZLE(X
, Z
, W
, X
),
242 [REG_TYPE_VIRT_VEC3_YZW
] = SWIZZLE(Y
, Z
, W
, X
),
243 [REG_TYPE_VIRT_VEC3C_YZW
] = SWIZZLE(Y
, Z
, W
, X
),
246 /* how to swizzle when used as a dest */
248 reg_dst_swiz
[NUM_REG_TYPES
] = {
249 [REG_TYPE_VEC4
] = INST_SWIZ_IDENTITY
,
250 [REG_TYPE_VIRT_SCALAR_X
] = INST_SWIZ_IDENTITY
,
251 [REG_TYPE_VIRT_SCALAR_Y
] = SWIZZLE(X
, X
, X
, X
),
252 [REG_TYPE_VIRT_VEC2_XY
] = INST_SWIZ_IDENTITY
,
253 [REG_TYPE_VIRT_VEC2T_XY
] = INST_SWIZ_IDENTITY
,
254 [REG_TYPE_VIRT_VEC2C_XY
] = INST_SWIZ_IDENTITY
,
255 [REG_TYPE_VIRT_SCALAR_Z
] = SWIZZLE(X
, X
, X
, X
),
256 [REG_TYPE_VIRT_VEC2_XZ
] = SWIZZLE(X
, X
, Y
, Y
),
257 [REG_TYPE_VIRT_VEC2_YZ
] = SWIZZLE(X
, X
, Y
, Y
),
258 [REG_TYPE_VIRT_VEC2C_YZ
] = SWIZZLE(X
, X
, Y
, Y
),
259 [REG_TYPE_VIRT_VEC3_XYZ
] = INST_SWIZ_IDENTITY
,
260 [REG_TYPE_VIRT_VEC3C_XYZ
] = INST_SWIZ_IDENTITY
,
261 [REG_TYPE_VIRT_SCALAR_W
] = SWIZZLE(X
, X
, X
, X
),
262 [REG_TYPE_VIRT_VEC2_XW
] = SWIZZLE(X
, X
, Y
, Y
),
263 [REG_TYPE_VIRT_VEC2_YW
] = SWIZZLE(X
, X
, Y
, Y
),
264 [REG_TYPE_VIRT_VEC3_XYW
] = SWIZZLE(X
, Y
, Z
, Z
),
265 [REG_TYPE_VIRT_VEC2_ZW
] = SWIZZLE(X
, X
, X
, Y
),
266 [REG_TYPE_VIRT_VEC2T_ZW
] = SWIZZLE(X
, X
, X
, Y
),
267 [REG_TYPE_VIRT_VEC2C_ZW
] = SWIZZLE(X
, X
, X
, Y
),
268 [REG_TYPE_VIRT_VEC3_XZW
] = SWIZZLE(X
, Y
, Y
, Z
),
269 [REG_TYPE_VIRT_VEC3_YZW
] = SWIZZLE(X
, X
, Y
, Z
),
270 [REG_TYPE_VIRT_VEC3C_YZW
] = SWIZZLE(X
, X
, Y
, Z
),
273 static inline int reg_get_type(int virt_reg
)
275 return virt_reg
% NUM_REG_TYPES
;
278 static inline int reg_get_base(struct state
*state
, int virt_reg
)
280 /* offset by 1 to avoid reserved position register */
281 if (state
->shader
->info
.stage
== MESA_SHADER_FRAGMENT
)
282 return virt_reg
/ NUM_REG_TYPES
+ 1;
283 return virt_reg
/ NUM_REG_TYPES
;
286 static inline int reg_get_class(int virt_reg
)
288 switch (reg_get_type(virt_reg
)) {
290 return REG_CLASS_VEC4
;
291 case REG_TYPE_VIRT_VEC3_XYZ
:
292 case REG_TYPE_VIRT_VEC3_XYW
:
293 case REG_TYPE_VIRT_VEC3_XZW
:
294 case REG_TYPE_VIRT_VEC3_YZW
:
295 return REG_CLASS_VIRT_VEC3
;
296 case REG_TYPE_VIRT_VEC2_XY
:
297 case REG_TYPE_VIRT_VEC2_XZ
:
298 case REG_TYPE_VIRT_VEC2_XW
:
299 case REG_TYPE_VIRT_VEC2_YZ
:
300 case REG_TYPE_VIRT_VEC2_YW
:
301 case REG_TYPE_VIRT_VEC2_ZW
:
302 return REG_CLASS_VIRT_VEC2
;
303 case REG_TYPE_VIRT_SCALAR_X
:
304 case REG_TYPE_VIRT_SCALAR_Y
:
305 case REG_TYPE_VIRT_SCALAR_Z
:
306 case REG_TYPE_VIRT_SCALAR_W
:
307 return REG_CLASS_VIRT_SCALAR
;
308 case REG_TYPE_VIRT_VEC2T_XY
:
309 case REG_TYPE_VIRT_VEC2T_ZW
:
310 return REG_CLASS_VIRT_VEC2T
;
311 case REG_TYPE_VIRT_VEC2C_XY
:
312 case REG_TYPE_VIRT_VEC2C_YZ
:
313 case REG_TYPE_VIRT_VEC2C_ZW
:
314 return REG_CLASS_VIRT_VEC2C
;
315 case REG_TYPE_VIRT_VEC3C_XYZ
:
316 case REG_TYPE_VIRT_VEC3C_YZW
:
317 return REG_CLASS_VIRT_VEC3C
;
324 /* get unique ssa/reg index for nir_src */
326 src_index(nir_function_impl
*impl
, nir_src
*src
)
328 return src
->is_ssa
? src
->ssa
->index
: (src
->reg
.reg
->index
+ impl
->ssa_alloc
);
331 /* get unique ssa/reg index for nir_dest */
333 dest_index(nir_function_impl
*impl
, nir_dest
*dest
)
335 return dest
->is_ssa
? dest
->ssa
.index
: (dest
->reg
.reg
->index
+ impl
->ssa_alloc
);
338 /* nir_src to allocated register */
340 ra_src(struct state
*state
, nir_src
*src
)
342 unsigned reg
= ra_get_node_reg(state
->g
, state
->live_map
[src_index(state
->impl
, src
)]);
343 return SRC_REG(reg_get_base(state
, reg
), reg_swiz
[reg_get_type(reg
)]);
347 get_src(struct state
*state
, nir_src
*src
)
350 return ra_src(state
, src
);
352 nir_instr
*instr
= src
->ssa
->parent_instr
;
354 if (instr
->pass_flags
& BYPASS_SRC
) {
355 assert(instr
->type
== nir_instr_type_alu
);
356 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
357 assert(alu
->op
== nir_op_mov
);
358 return src_swizzle(get_src(state
, &alu
->src
[0].src
), ALU_SWIZ(&alu
->src
[0]));
361 switch (instr
->type
) {
362 case nir_instr_type_load_const
:
363 return const_src(state
, nir_instr_as_load_const(instr
)->value
, src
->ssa
->num_components
);
364 case nir_instr_type_intrinsic
: {
365 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
366 switch (intr
->intrinsic
) {
367 case nir_intrinsic_load_input
:
368 case nir_intrinsic_load_instance_id
:
369 case nir_intrinsic_load_uniform
:
370 return ra_src(state
, src
);
371 case nir_intrinsic_load_front_face
:
372 return (hw_src
) { .use
= 1, .rgroup
= INST_RGROUP_INTERNAL
};
373 case nir_intrinsic_load_frag_coord
:
374 return SRC_REG(0, INST_SWIZ_IDENTITY
);
380 case nir_instr_type_alu
:
381 case nir_instr_type_tex
:
382 return ra_src(state
, src
);
383 case nir_instr_type_ssa_undef
: {
384 /* return zero to deal with broken Blur demo */
385 nir_const_value value
= CONST(0);
386 return src_swizzle(const_src(state
, &value
, 1), SWIZZLE(X
,X
,X
,X
));
397 update_swiz_mask(nir_alu_instr
*alu
, nir_dest
*dest
, unsigned *swiz
, unsigned *mask
)
402 bool is_vec
= dest
!= NULL
;
403 unsigned swizzle
= 0, write_mask
= 0;
404 for (unsigned i
= 0; i
< 4; i
++) {
405 /* channel not written */
406 if (!(alu
->dest
.write_mask
& (1 << i
)))
408 /* src is different (only check for vecN) */
409 if (is_vec
&& alu
->src
[i
].src
.ssa
!= &dest
->ssa
)
412 unsigned src_swiz
= is_vec
? alu
->src
[i
].swizzle
[0] : alu
->src
[0].swizzle
[i
];
413 swizzle
|= (*swiz
>> src_swiz
* 2 & 3) << i
* 2;
414 /* this channel isn't written through this chain */
415 if (*mask
& (1 << src_swiz
))
416 write_mask
|= 1 << i
;
423 vec_dest_has_swizzle(nir_alu_instr
*vec
, nir_ssa_def
*ssa
)
425 for (unsigned i
= 0; i
< 4; i
++) {
426 if (!(vec
->dest
.write_mask
& (1 << i
)) || vec
->src
[i
].src
.ssa
!= ssa
)
429 if (vec
->src
[i
].swizzle
[0] != i
)
433 /* don't deal with possible bypassed vec/mov chain */
434 nir_foreach_use(use_src
, ssa
) {
435 nir_instr
*instr
= use_src
->parent_instr
;
436 if (instr
->type
!= nir_instr_type_alu
)
439 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
455 real_dest(nir_dest
*dest
, unsigned *swiz
, unsigned *mask
)
457 if (!dest
|| !dest
->is_ssa
)
460 bool can_bypass_src
= !list_length(&dest
->ssa
.if_uses
);
461 nir_instr
*p_instr
= dest
->ssa
.parent_instr
;
463 /* if used by a vecN, the "real" destination becomes the vecN destination
464 * lower_alu guarantees that values used by a vecN are only used by that vecN
465 * we can apply the same logic to movs in a some cases too
467 nir_foreach_use(use_src
, &dest
->ssa
) {
468 nir_instr
*instr
= use_src
->parent_instr
;
470 /* src bypass check: for now only deal with tex src mov case
471 * note: for alu don't bypass mov for multiple uniform sources
473 switch (instr
->type
) {
474 case nir_instr_type_tex
:
475 if (p_instr
->type
== nir_instr_type_alu
&&
476 nir_instr_as_alu(p_instr
)->op
== nir_op_mov
) {
480 can_bypass_src
= false;
484 if (instr
->type
!= nir_instr_type_alu
)
487 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
493 assert(list_length(&dest
->ssa
.if_uses
) == 0);
494 nir_foreach_use(use_src
, &dest
->ssa
)
495 assert(use_src
->parent_instr
== instr
);
497 update_swiz_mask(alu
, dest
, swiz
, mask
);
500 switch (dest
->ssa
.parent_instr
->type
) {
501 case nir_instr_type_alu
:
502 case nir_instr_type_tex
:
507 if (list_length(&dest
->ssa
.if_uses
) || list_length(&dest
->ssa
.uses
) > 1)
510 update_swiz_mask(alu
, NULL
, swiz
, mask
);
517 assert(!(instr
->pass_flags
& BYPASS_SRC
));
518 instr
->pass_flags
|= BYPASS_DST
;
519 return real_dest(&alu
->dest
.dest
, swiz
, mask
);
522 if (can_bypass_src
&& !(p_instr
->pass_flags
& BYPASS_DST
)) {
523 p_instr
->pass_flags
|= BYPASS_SRC
;
530 /* get allocated dest register for nir_dest
531 * *p_swiz tells how the components need to be placed into register
534 ra_dest(struct state
*state
, nir_dest
*dest
, unsigned *p_swiz
)
536 unsigned swiz
= INST_SWIZ_IDENTITY
, mask
= 0xf;
537 dest
= real_dest(dest
, &swiz
, &mask
);
539 unsigned r
= ra_get_node_reg(state
->g
, state
->live_map
[dest_index(state
->impl
, dest
)]);
540 unsigned t
= reg_get_type(r
);
542 *p_swiz
= inst_swiz_compose(swiz
, reg_dst_swiz
[t
]);
546 .reg
= reg_get_base(state
, r
),
547 .write_mask
= inst_write_mask_compose(mask
, reg_writemask
[t
]),
551 /* if instruction dest needs a register, return nir_dest for it */
553 dest_for_instr(nir_instr
*instr
)
555 nir_dest
*dest
= NULL
;
557 switch (instr
->type
) {
558 case nir_instr_type_alu
:
559 dest
= &nir_instr_as_alu(instr
)->dest
.dest
;
561 case nir_instr_type_tex
:
562 dest
=&nir_instr_as_tex(instr
)->dest
;
564 case nir_instr_type_intrinsic
: {
565 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
566 if (intr
->intrinsic
== nir_intrinsic_load_uniform
||
567 intr
->intrinsic
== nir_intrinsic_load_input
||
568 intr
->intrinsic
== nir_intrinsic_load_instance_id
)
574 return real_dest(dest
, NULL
, NULL
);
579 nir_dest
*dest
; /* cached dest_for_instr */
580 unsigned live_start
, live_end
; /* live range */
584 range_include(struct live_def
*def
, unsigned index
)
586 if (def
->live_start
> index
)
587 def
->live_start
= index
;
588 if (def
->live_end
< index
)
589 def
->live_end
= index
;
592 struct live_defs_state
{
594 unsigned bitset_words
;
596 nir_function_impl
*impl
;
597 nir_block
*block
; /* current block pointer */
598 unsigned index
; /* current live index */
600 struct live_def
*defs
;
601 unsigned *live_map
; /* to map ssa/reg index into defs array */
603 nir_block_worklist worklist
;
607 init_liveness_block(nir_block
*block
,
608 struct live_defs_state
*state
)
610 block
->live_in
= reralloc(block
, block
->live_in
, BITSET_WORD
,
611 state
->bitset_words
);
612 memset(block
->live_in
, 0, state
->bitset_words
* sizeof(BITSET_WORD
));
614 block
->live_out
= reralloc(block
, block
->live_out
, BITSET_WORD
,
615 state
->bitset_words
);
616 memset(block
->live_out
, 0, state
->bitset_words
* sizeof(BITSET_WORD
));
618 nir_block_worklist_push_head(&state
->worklist
, block
);
624 set_src_live(nir_src
*src
, void *void_state
)
626 struct live_defs_state
*state
= void_state
;
629 nir_instr
*instr
= src
->ssa
->parent_instr
;
631 if (is_sysval(instr
))
634 switch (instr
->type
) {
635 case nir_instr_type_load_const
:
636 case nir_instr_type_ssa_undef
:
638 case nir_instr_type_alu
: {
640 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
641 if (instr
->pass_flags
& BYPASS_SRC
) {
642 for (unsigned i
= 0; i
< nir_op_infos
[alu
->op
].num_inputs
; i
++)
643 set_src_live(&alu
->src
[i
].src
, state
);
652 unsigned i
= state
->live_map
[src_index(state
->impl
, src
)];
655 BITSET_SET(state
->block
->live_in
, i
);
656 range_include(&state
->defs
[i
], state
->index
);
662 propagate_across_edge(nir_block
*pred
, nir_block
*succ
,
663 struct live_defs_state
*state
)
665 BITSET_WORD progress
= 0;
666 for (unsigned i
= 0; i
< state
->bitset_words
; ++i
) {
667 progress
|= succ
->live_in
[i
] & ~pred
->live_out
[i
];
668 pred
->live_out
[i
] |= succ
->live_in
[i
];
670 return progress
!= 0;
674 live_defs(nir_function_impl
*impl
, struct live_def
*defs
, unsigned *live_map
)
676 struct live_defs_state state
;
677 unsigned block_live_index
[impl
->num_blocks
+ 1];
681 state
.live_map
= live_map
;
684 nir_foreach_block(block
, impl
) {
685 block_live_index
[block
->index
] = state
.num_defs
;
686 nir_foreach_instr(instr
, block
) {
687 nir_dest
*dest
= dest_for_instr(instr
);
691 unsigned idx
= dest_index(impl
, dest
);
692 /* register is already in defs */
693 if (live_map
[idx
] != ~0u)
696 defs
[state
.num_defs
] = (struct live_def
) {instr
, dest
, state
.num_defs
, 0};
698 /* input live from the start */
699 if (instr
->type
== nir_instr_type_intrinsic
) {
700 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
701 if (intr
->intrinsic
== nir_intrinsic_load_input
||
702 intr
->intrinsic
== nir_intrinsic_load_instance_id
)
703 defs
[state
.num_defs
].live_start
= 0;
706 live_map
[idx
] = state
.num_defs
;
710 block_live_index
[impl
->num_blocks
] = state
.num_defs
;
712 nir_block_worklist_init(&state
.worklist
, impl
->num_blocks
, NULL
);
714 /* We now know how many unique ssa definitions we have and we can go
715 * ahead and allocate live_in and live_out sets and add all of the
716 * blocks to the worklist.
718 state
.bitset_words
= BITSET_WORDS(state
.num_defs
);
719 nir_foreach_block(block
, impl
) {
720 init_liveness_block(block
, &state
);
723 /* We're now ready to work through the worklist and update the liveness
724 * sets of each of the blocks. By the time we get to this point, every
725 * block in the function implementation has been pushed onto the
726 * worklist in reverse order. As long as we keep the worklist
727 * up-to-date as we go, everything will get covered.
729 while (!nir_block_worklist_is_empty(&state
.worklist
)) {
730 /* We pop them off in the reverse order we pushed them on. This way
731 * the first walk of the instructions is backwards so we only walk
732 * once in the case of no control flow.
734 nir_block
*block
= nir_block_worklist_pop_head(&state
.worklist
);
737 memcpy(block
->live_in
, block
->live_out
,
738 state
.bitset_words
* sizeof(BITSET_WORD
));
740 state
.index
= block_live_index
[block
->index
+ 1];
742 nir_if
*following_if
= nir_block_get_following_if(block
);
744 set_src_live(&following_if
->condition
, &state
);
746 nir_foreach_instr_reverse(instr
, block
) {
747 /* when we come across the next "live" instruction, decrement index */
748 if (state
.index
&& instr
== defs
[state
.index
- 1].instr
) {
750 /* the only source of writes to registers is phis:
751 * we don't expect any partial write_mask alus
752 * so clearing live_in here is OK
754 BITSET_CLEAR(block
->live_in
, state
.index
);
757 /* don't set_src_live for not-emitted instructions */
758 if (instr
->pass_flags
)
761 unsigned index
= state
.index
;
763 /* output live till the end */
764 if (instr
->type
== nir_instr_type_intrinsic
) {
765 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
766 if (intr
->intrinsic
== nir_intrinsic_store_output
)
770 nir_foreach_src(instr
, set_src_live
, &state
);
774 assert(state
.index
== block_live_index
[block
->index
]);
776 /* Walk over all of the predecessors of the current block updating
777 * their live in with the live out of this one. If anything has
778 * changed, add the predecessor to the work list so that we ensure
779 * that the new information is used.
781 set_foreach(block
->predecessors
, entry
) {
782 nir_block
*pred
= (nir_block
*)entry
->key
;
783 if (propagate_across_edge(pred
, block
, &state
))
784 nir_block_worklist_push_tail(&state
.worklist
, pred
);
788 nir_block_worklist_fini(&state
.worklist
);
790 /* apply live_in/live_out to ranges */
792 nir_foreach_block(block
, impl
) {
796 BITSET_FOREACH_SET(i
, tmp
, block
->live_in
, state
.num_defs
)
797 range_include(&state
.defs
[i
], block_live_index
[block
->index
]);
799 BITSET_FOREACH_SET(i
, tmp
, block
->live_out
, state
.num_defs
)
800 range_include(&state
.defs
[i
], block_live_index
[block
->index
+ 1]);
803 return state
.num_defs
;
806 /* precomputed by register_allocate */
807 static unsigned int *q_values
[] = {
808 (unsigned int[]) {1, 2, 3, 4, 2, 2, 3, },
809 (unsigned int[]) {3, 5, 6, 6, 5, 5, 6, },
810 (unsigned int[]) {3, 4, 4, 4, 4, 4, 4, },
811 (unsigned int[]) {1, 1, 1, 1, 1, 1, 1, },
812 (unsigned int[]) {1, 2, 2, 2, 1, 2, 2, },
813 (unsigned int[]) {2, 3, 3, 3, 2, 3, 3, },
814 (unsigned int[]) {2, 2, 2, 2, 2, 2, 2, },
818 ra_assign(struct state
*state
, nir_shader
*shader
)
820 struct ra_regs
*regs
= ra_alloc_reg_set(NULL
, option(max_temps
) *
821 NUM_REG_TYPES
, false);
823 /* classes always be created from index 0, so equal to the class enum
824 * which represents a register with (c+1) components
826 for (int c
= 0; c
< NUM_REG_CLASSES
; c
++)
827 ra_alloc_reg_class(regs
);
828 /* add each register of each class */
829 for (int r
= 0; r
< NUM_REG_TYPES
* option(max_temps
); r
++)
830 ra_class_add_reg(regs
, reg_get_class(r
), r
);
832 for (int r
= 0; r
< option(max_temps
); r
++) {
833 for (int i
= 0; i
< NUM_REG_TYPES
; i
++) {
834 for (int j
= 0; j
< i
; j
++) {
835 if (reg_writemask
[i
] & reg_writemask
[j
]) {
836 ra_add_reg_conflict(regs
, NUM_REG_TYPES
* r
+ i
,
837 NUM_REG_TYPES
* r
+ j
);
842 ra_set_finalize(regs
, q_values
);
844 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
846 /* liveness and interference */
848 nir_index_blocks(impl
);
849 nir_index_ssa_defs(impl
);
850 nir_foreach_block(block
, impl
) {
851 nir_foreach_instr(instr
, block
)
852 instr
->pass_flags
= 0;
855 /* this gives an approximation/upper limit on how many nodes are needed
856 * (some ssa values do not represent an allocated register)
858 unsigned max_nodes
= impl
->ssa_alloc
+ impl
->reg_alloc
;
859 unsigned *live_map
= ralloc_array(NULL
, unsigned, max_nodes
);
860 memset(live_map
, 0xff, sizeof(unsigned) * max_nodes
);
861 struct live_def
*defs
= rzalloc_array(NULL
, struct live_def
, max_nodes
);
863 unsigned num_nodes
= live_defs(impl
, defs
, live_map
);
864 struct ra_graph
*g
= ra_alloc_interference_graph(regs
, num_nodes
);
866 /* set classes from num_components */
867 for (unsigned i
= 0; i
< num_nodes
; i
++) {
868 nir_instr
*instr
= defs
[i
].instr
;
869 nir_dest
*dest
= defs
[i
].dest
;
870 unsigned c
= nir_dest_num_components(*dest
) - 1;
872 if (instr
->type
== nir_instr_type_alu
&& option(etna_new_transcendentals
)) {
873 switch (nir_instr_as_alu(instr
)->op
) {
878 assert(dest
->is_ssa
);
879 c
= REG_CLASS_VIRT_VEC2T
;
885 if (instr
->type
== nir_instr_type_intrinsic
) {
886 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
887 if (intr
->intrinsic
== nir_intrinsic_load_uniform
) {
888 /* make sure there isn't any reswizzling */
889 assert(dest
== &intr
->dest
);
890 if (dest
->ssa
.num_components
== 2)
891 c
= REG_CLASS_VIRT_VEC2C
;
892 if (dest
->ssa
.num_components
== 3)
893 c
= REG_CLASS_VIRT_VEC3C
;
897 ra_set_node_class(g
, i
, c
);
900 nir_foreach_block(block
, impl
) {
901 nir_foreach_instr(instr
, block
) {
902 if (instr
->type
!= nir_instr_type_intrinsic
)
905 nir_dest
*dest
= dest_for_instr(instr
);
906 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
909 switch (intr
->intrinsic
) {
910 case nir_intrinsic_store_output
: {
911 /* don't want output to be swizzled
912 * TODO: better would be to set the type to X/XY/XYZ/XYZW
914 ra_set_node_class(g
, live_map
[src_index(impl
, &intr
->src
[0])], REG_CLASS_VEC4
);
916 case nir_intrinsic_load_input
:
917 reg
= nir_intrinsic_base(intr
) * NUM_REG_TYPES
+ (unsigned[]) {
918 REG_TYPE_VIRT_SCALAR_X
,
919 REG_TYPE_VIRT_VEC2_XY
,
920 REG_TYPE_VIRT_VEC3_XYZ
,
922 }[nir_dest_num_components(*dest
) - 1];
924 case nir_intrinsic_load_instance_id
:
925 reg
= option(id_reg
) * NUM_REG_TYPES
+ REG_TYPE_VIRT_SCALAR_Y
;
931 ra_set_node_reg(g
, live_map
[dest_index(impl
, dest
)], reg
);
935 /* add interference for intersecting live ranges */
936 for (unsigned i
= 0; i
< num_nodes
; i
++) {
937 assert(defs
[i
].live_start
< defs
[i
].live_end
);
938 for (unsigned j
= 0; j
< i
; j
++) {
939 if (defs
[i
].live_start
>= defs
[j
].live_end
|| defs
[j
].live_start
>= defs
[i
].live_end
)
941 ra_add_node_interference(g
, i
, j
);
947 /* Allocate registers */
948 ASSERTED
bool ok
= ra_allocate(g
);
953 state
->live_map
= live_map
;
954 state
->num_nodes
= num_nodes
;
958 ra_finish(struct state
*state
)
960 /* TODO: better way to get number of registers used? */
962 for (unsigned i
= 0; i
< state
->num_nodes
; i
++) {
963 j
= MAX2(j
, reg_get_base(state
, ra_get_node_reg(state
->g
, i
)) + 1);
966 ralloc_free(state
->g
);
967 ralloc_free(state
->regs
);
968 ralloc_free(state
->live_map
);
974 emit_alu(struct state
*state
, nir_alu_instr
* alu
)
976 const nir_op_info
*info
= &nir_op_infos
[alu
->op
];
978 /* marked as dead instruction (vecN and other bypassed instr) */
979 if (alu
->instr
.pass_flags
)
982 assert(!(alu
->op
>= nir_op_vec2
&& alu
->op
<= nir_op_vec4
));
985 hw_dst dst
= ra_dest(state
, &alu
->dest
.dest
, &dst_swiz
);
987 /* compose alu write_mask with RA write mask */
988 if (!alu
->dest
.dest
.is_ssa
)
989 dst
.write_mask
= inst_write_mask_compose(alu
->dest
.write_mask
, dst
.write_mask
);
995 /* not per-component - don't compose dst_swiz */
996 dst_swiz
= INST_SWIZ_IDENTITY
;
1004 for (int i
= 0; i
< info
->num_inputs
; i
++) {
1005 nir_alu_src
*asrc
= &alu
->src
[i
];
1008 src
= src_swizzle(get_src(state
, &asrc
->src
), ALU_SWIZ(asrc
));
1009 src
= src_swizzle(src
, dst_swiz
);
1011 if (src
.rgroup
!= INST_RGROUP_IMMEDIATE
) {
1012 src
.neg
= asrc
->negate
|| (alu
->op
== nir_op_fneg
);
1013 src
.abs
= asrc
->abs
|| (alu
->op
== nir_op_fabs
);
1015 assert(!asrc
->negate
&& alu
->op
!= nir_op_fneg
);
1016 assert(!asrc
->abs
&& alu
->op
!= nir_op_fabs
);
1022 emit(alu
, alu
->op
, dst
, srcs
, alu
->dest
.saturate
|| (alu
->op
== nir_op_fsat
));
1026 emit_tex(struct state
*state
, nir_tex_instr
* tex
)
1029 hw_dst dst
= ra_dest(state
, &tex
->dest
, &dst_swiz
);
1030 nir_src
*coord
= NULL
;
1031 nir_src
*lod_bias
= NULL
;
1033 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
1034 switch (tex
->src
[i
].src_type
) {
1035 case nir_tex_src_coord
:
1036 coord
= &tex
->src
[i
].src
;
1038 case nir_tex_src_bias
:
1039 case nir_tex_src_lod
:
1041 lod_bias
= &tex
->src
[i
].src
;
1049 emit(tex
, tex
->op
, tex
->sampler_index
, dst_swiz
, dst
, get_src(state
, coord
),
1050 lod_bias
? get_src(state
, lod_bias
) : SRC_DISABLE
);
1054 emit_intrinsic(struct state
*state
, nir_intrinsic_instr
* intr
)
1056 switch (intr
->intrinsic
) {
1057 case nir_intrinsic_store_output
:
1058 emit(output
, nir_intrinsic_base(intr
), get_src(state
, &intr
->src
[0]));
1060 case nir_intrinsic_discard_if
:
1061 emit(discard
, get_src(state
, &intr
->src
[0]));
1063 case nir_intrinsic_discard
:
1064 emit(discard
, SRC_DISABLE
);
1066 case nir_intrinsic_load_uniform
: {
1068 hw_dst dst
= ra_dest(state
, &intr
->dest
, &dst_swiz
);
1069 /* TODO: might have a problem with dst_swiz .. */
1070 emit(load_ubo
, dst
, get_src(state
, &intr
->src
[0]), const_src(state
, &UNIFORM_BASE(nir_intrinsic_base(intr
) * 16), 1));
1072 case nir_intrinsic_load_front_face
:
1073 case nir_intrinsic_load_frag_coord
:
1074 assert(intr
->dest
.is_ssa
); /* TODO - lower phis could cause this */
1076 case nir_intrinsic_load_input
:
1077 case nir_intrinsic_load_instance_id
:
1085 emit_instr(struct state
*state
, nir_instr
* instr
)
1087 switch (instr
->type
) {
1088 case nir_instr_type_alu
:
1089 emit_alu(state
, nir_instr_as_alu(instr
));
1091 case nir_instr_type_tex
:
1092 emit_tex(state
, nir_instr_as_tex(instr
));
1094 case nir_instr_type_intrinsic
:
1095 emit_intrinsic(state
, nir_instr_as_intrinsic(instr
));
1097 case nir_instr_type_jump
:
1098 assert(nir_instr_is_last(instr
));
1099 case nir_instr_type_load_const
:
1100 case nir_instr_type_ssa_undef
:
1109 emit_block(struct state
*state
, nir_block
* block
)
1111 emit(block_start
, block
->index
);
1113 nir_foreach_instr(instr
, block
)
1114 emit_instr(state
, instr
);
1116 /* succs->index < block->index is for the loop case */
1117 nir_block
*succs
= block
->successors
[0];
1118 if (nir_block_ends_in_jump(block
) || succs
->index
< block
->index
)
1119 emit(jump
, succs
->index
, SRC_DISABLE
);
1123 emit_cf_list(struct state
*state
, struct exec_list
*list
);
1126 emit_if(struct state
*state
, nir_if
* nif
)
1128 emit(jump
, nir_if_first_else_block(nif
)->index
, get_src(state
, &nif
->condition
));
1129 emit_cf_list(state
, &nif
->then_list
);
1131 /* jump at end of then_list to skip else_list
1132 * not needed if then_list already ends with a jump or else_list is empty
1134 if (!nir_block_ends_in_jump(nir_if_last_then_block(nif
)) &&
1135 !nir_cf_list_is_empty_block(&nif
->else_list
))
1136 emit(jump
, nir_if_last_else_block(nif
)->successors
[0]->index
, SRC_DISABLE
);
1138 emit_cf_list(state
, &nif
->else_list
);
1142 emit_cf_list(struct state
*state
, struct exec_list
*list
)
1144 foreach_list_typed(nir_cf_node
, node
, node
, list
) {
1145 switch (node
->type
) {
1146 case nir_cf_node_block
:
1147 emit_block(state
, nir_cf_node_as_block(node
));
1149 case nir_cf_node_if
:
1150 emit_if(state
, nir_cf_node_as_if(node
));
1152 case nir_cf_node_loop
:
1153 emit_cf_list(state
, &nir_cf_node_as_loop(node
)->body
);
1162 /* based on nir_lower_vec_to_movs */
1164 insert_vec_mov(nir_alu_instr
*vec
, unsigned start_idx
, nir_shader
*shader
)
1166 assert(start_idx
< nir_op_infos
[vec
->op
].num_inputs
);
1167 unsigned write_mask
= (1u << start_idx
);
1169 nir_alu_instr
*mov
= nir_alu_instr_create(shader
, nir_op_mov
);
1170 nir_alu_src_copy(&mov
->src
[0], &vec
->src
[start_idx
], mov
);
1172 mov
->src
[0].swizzle
[0] = vec
->src
[start_idx
].swizzle
[0];
1173 mov
->src
[0].negate
= vec
->src
[start_idx
].negate
;
1174 mov
->src
[0].abs
= vec
->src
[start_idx
].abs
;
1176 unsigned num_components
= 1;
1178 for (unsigned i
= start_idx
+ 1; i
< 4; i
++) {
1179 if (!(vec
->dest
.write_mask
& (1 << i
)))
1182 if (nir_srcs_equal(vec
->src
[i
].src
, vec
->src
[start_idx
].src
) &&
1183 vec
->src
[i
].negate
== vec
->src
[start_idx
].negate
&&
1184 vec
->src
[i
].abs
== vec
->src
[start_idx
].abs
) {
1185 write_mask
|= (1 << i
);
1186 mov
->src
[0].swizzle
[num_components
] = vec
->src
[i
].swizzle
[0];
1191 mov
->dest
.write_mask
= (1 << num_components
) - 1;
1192 nir_ssa_dest_init(&mov
->instr
, &mov
->dest
.dest
, num_components
, 32, NULL
);
1194 /* replace vec srcs with inserted mov */
1195 for (unsigned i
= 0, j
= 0; i
< 4; i
++) {
1196 if (!(write_mask
& (1 << i
)))
1199 nir_instr_rewrite_src(&vec
->instr
, &vec
->src
[i
].src
, nir_src_for_ssa(&mov
->dest
.dest
.ssa
));
1200 vec
->src
[i
].swizzle
[0] = j
++;
1203 nir_instr_insert_before(&vec
->instr
, &mov
->instr
);
1209 * for vecN instructions:
1210 * -merge constant sources into a single src
1211 * -insert movs (nir_lower_vec_to_movs equivalent)
1212 * for non-vecN instructions:
1213 * -try to merge constants as single constant
1214 * -insert movs for multiple constants (pre-HALTI5)
1217 lower_alu(struct state
*state
, nir_alu_instr
*alu
)
1219 const nir_op_info
*info
= &nir_op_infos
[alu
->op
];
1222 nir_builder_init(&b
, state
->impl
);
1223 b
.cursor
= nir_before_instr(&alu
->instr
);
1229 nir_const_value value
[4];
1230 unsigned num_components
= 0;
1232 for (unsigned i
= 0; i
< info
->num_inputs
; i
++) {
1233 nir_const_value
*cv
= nir_src_as_const_value(alu
->src
[i
].src
);
1235 value
[num_components
++] = cv
[alu
->src
[i
].swizzle
[0]];
1238 if (num_components
<= 1) /* nothing to do */
1241 nir_ssa_def
*def
= nir_build_imm(&b
, num_components
, 32, value
);
1243 if (num_components
== info
->num_inputs
) {
1244 nir_ssa_def_rewrite_uses(&alu
->dest
.dest
.ssa
, nir_src_for_ssa(def
));
1245 nir_instr_remove(&alu
->instr
);
1249 for (unsigned i
= 0, j
= 0; i
< info
->num_inputs
; i
++) {
1250 nir_const_value
*cv
= nir_src_as_const_value(alu
->src
[i
].src
);
1254 nir_instr_rewrite_src(&alu
->instr
, &alu
->src
[i
].src
, nir_src_for_ssa(def
));
1255 alu
->src
[i
].swizzle
[0] = j
++;
1259 if (!option(single_const_src
))
1262 /* pre-GC7000L can only have 1 uniform src per instruction */
1263 nir_const_value value
[4] = {};
1264 uint8_t swizzle
[4][4] = {};
1265 unsigned swiz_max
= 0, num_const
= 0;
1267 for (unsigned i
= 0; i
< info
->num_inputs
; i
++) {
1268 nir_const_value
*cv
= nir_src_as_const_value(alu
->src
[i
].src
);
1272 unsigned num_components
= info
->input_sizes
[i
] ?: alu
->dest
.dest
.ssa
.num_components
;
1273 for (unsigned j
= 0; j
< num_components
; j
++) {
1274 int idx
= const_add(&value
[0].u64
, cv
[alu
->src
[i
].swizzle
[j
]].u64
);
1275 swizzle
[i
][j
] = idx
;
1276 swiz_max
= MAX2(swiz_max
, (unsigned) idx
);
1285 /* resolve with single combined const src */
1287 nir_ssa_def
*def
= nir_build_imm(&b
, swiz_max
+ 1, 32, value
);
1289 for (unsigned i
= 0; i
< info
->num_inputs
; i
++) {
1290 nir_const_value
*cv
= nir_src_as_const_value(alu
->src
[i
].src
);
1294 nir_instr_rewrite_src(&alu
->instr
, &alu
->src
[i
].src
, nir_src_for_ssa(def
));
1296 for (unsigned j
= 0; j
< 4; j
++)
1297 alu
->src
[i
].swizzle
[j
] = swizzle
[i
][j
];
1302 /* resolve with movs */
1304 for (unsigned i
= 0; i
< info
->num_inputs
; i
++) {
1305 nir_const_value
*cv
= nir_src_as_const_value(alu
->src
[i
].src
);
1313 nir_ssa_def
*mov
= nir_mov(&b
, alu
->src
[i
].src
.ssa
);
1314 nir_instr_rewrite_src(&alu
->instr
, &alu
->src
[i
].src
, nir_src_for_ssa(mov
));
1319 unsigned finished_write_mask
= 0;
1320 for (unsigned i
= 0; i
< 4; i
++) {
1321 if (!(alu
->dest
.write_mask
& (1 << i
)))
1324 nir_ssa_def
*ssa
= alu
->src
[i
].src
.ssa
;
1326 /* check that vecN instruction is only user of this */
1327 bool need_mov
= list_length(&ssa
->if_uses
) != 0;
1328 nir_foreach_use(use_src
, ssa
) {
1329 if (use_src
->parent_instr
!= &alu
->instr
)
1333 nir_instr
*instr
= ssa
->parent_instr
;
1334 switch (instr
->type
) {
1335 case nir_instr_type_alu
:
1336 case nir_instr_type_tex
:
1338 case nir_instr_type_intrinsic
:
1339 if (nir_instr_as_intrinsic(instr
)->intrinsic
== nir_intrinsic_load_input
) {
1340 need_mov
= vec_dest_has_swizzle(alu
, &nir_instr_as_intrinsic(instr
)->dest
.ssa
);
1347 if (need_mov
&& !(finished_write_mask
& (1 << i
)))
1348 finished_write_mask
|= insert_vec_mov(alu
, i
, state
->shader
);
1353 emit_shader(nir_shader
*shader
, const struct emit_options
*options
,
1354 unsigned *num_temps
, unsigned *num_consts
)
1356 struct state state
= {
1359 .impl
= nir_shader_get_entrypoint(shader
),
1363 nir_builder_init(&b
, state
.impl
);
1365 /* convert non-dynamic uniform loads to constants, etc */
1366 nir_foreach_block(block
, state
.impl
) {
1367 nir_foreach_instr_safe(instr
, block
) {
1368 switch(instr
->type
) {
1369 case nir_instr_type_alu
:
1370 /* deals with vecN and const srcs */
1371 lower_alu(&state
, nir_instr_as_alu(instr
));
1373 case nir_instr_type_load_const
: {
1374 nir_load_const_instr
*load_const
= nir_instr_as_load_const(instr
);
1375 for (unsigned i
= 0; i
< load_const
->def
.num_components
; i
++)
1376 load_const
->value
[i
] = CONST(load_const
->value
[i
].u32
);
1378 case nir_instr_type_intrinsic
: {
1379 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
1380 if (intr
->intrinsic
!= nir_intrinsic_load_uniform
)
1382 nir_const_value
*off
= nir_src_as_const_value(intr
->src
[0]);
1383 if (!off
|| off
[0].u64
>> 32 != ETNA_IMMEDIATE_CONSTANT
)
1386 unsigned base
= nir_intrinsic_base(intr
) + off
[0].u32
/ 16;
1387 nir_const_value value
[4];
1389 for (unsigned i
= 0; i
< intr
->dest
.ssa
.num_components
; i
++) {
1390 if (nir_intrinsic_base(intr
) < 0)
1391 value
[i
] = TEXSCALE(~nir_intrinsic_base(intr
), i
);
1393 value
[i
] = UNIFORM(base
* 4 + i
);
1396 b
.cursor
= nir_after_instr(instr
);
1397 nir_ssa_def
*def
= nir_build_imm(&b
, intr
->dest
.ssa
.num_components
, 32, value
);
1399 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
, nir_src_for_ssa(def
));
1400 nir_instr_remove(instr
);
1408 /* add mov for any store output using sysval/const */
1409 nir_foreach_block(block
, state
.impl
) {
1410 nir_foreach_instr_safe(instr
, block
) {
1411 if (instr
->type
!= nir_instr_type_intrinsic
)
1414 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
1416 switch (intr
->intrinsic
) {
1417 case nir_intrinsic_store_output
: {
1418 nir_src
*src
= &intr
->src
[0];
1419 if (nir_src_is_const(*src
) || is_sysval(src
->ssa
->parent_instr
)) {
1420 b
.cursor
= nir_before_instr(instr
);
1421 nir_instr_rewrite_src(instr
, src
, nir_src_for_ssa(nir_mov(&b
, src
->ssa
)));
1430 /* call directly to avoid validation (load_const don't pass validation at this point) */
1431 nir_convert_from_ssa(shader
, true);
1432 nir_opt_dce(shader
);
1434 ra_assign(&state
, shader
);
1436 emit_cf_list(&state
, &nir_shader_get_entrypoint(shader
)->body
);
1438 *num_temps
= ra_finish(&state
);
1439 *num_consts
= state
.const_count
;