2 * Copyright (c) 2019 Zodiac Inflight Innovations
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Jonathan Marek <jonathan@marek.ca>
27 #include "etnaviv_asm.h"
28 #include "etnaviv_context.h"
29 #include "etnaviv_compiler_nir.h"
31 #include "compiler/nir/nir.h"
32 #include "compiler/nir/nir_builder.h"
34 #define ALU_SWIZ(s) INST_SWIZ((s)->swizzle[0], (s)->swizzle[1], (s)->swizzle[2], (s)->swizzle[3])
35 #define SRC_DISABLE ((hw_src){})
36 #define SRC_CONST(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_UNIFORM_0, .reg=idx, .swiz=s})
37 #define SRC_REG(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_TEMP, .reg=idx, .swiz=s})
39 typedef struct etna_inst_dst hw_dst
;
40 typedef struct etna_inst_src hw_src
;
43 src_swizzle(hw_src src
, unsigned swizzle
)
45 if (src
.rgroup
!= INST_RGROUP_IMMEDIATE
)
46 src
.swiz
= inst_swiz_compose(src
.swiz
, swizzle
);
51 /* constants are represented as 64-bit ints
52 * 32-bit for the value and 32-bit for the type (imm, uniform, etc)
55 #define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)}
56 #define CONST(x) CONST_VAL(ETNA_IMMEDIATE_CONSTANT, x)
57 #define UNIFORM(x) CONST_VAL(ETNA_IMMEDIATE_UNIFORM, x)
58 #define TEXSCALE(x, i) CONST_VAL(ETNA_IMMEDIATE_TEXRECT_SCALE_X + (i), x)
61 const_add(uint64_t *c
, uint64_t value
)
63 for (unsigned i
= 0; i
< 4; i
++) {
64 if (c
[i
] == value
|| !c
[i
]) {
73 const_src(struct etna_compile
*c
, nir_const_value
*value
, unsigned num_components
)
75 /* use inline immediates if possible */
76 if (c
->specs
->halti
>= 2 && num_components
== 1 &&
77 value
[0].u64
>> 32 == ETNA_IMMEDIATE_CONSTANT
) {
78 uint32_t bits
= value
[0].u32
;
80 /* "float" - shifted by 12 */
81 if ((bits
& 0xfff) == 0)
82 return etna_immediate_src(0, bits
>> 12);
84 /* "unsigned" - raw 20 bit value */
86 return etna_immediate_src(2, bits
);
88 /* "signed" - sign extended 20-bit (sign included) value */
89 if (bits
>= 0xfff80000)
90 return etna_immediate_src(1, bits
);
95 for (i
= 0; swiz
< 0; i
++) {
96 uint64_t *a
= &c
->consts
[i
*4];
98 memcpy(save
, a
, sizeof(save
));
100 for (unsigned j
= 0; j
< num_components
; j
++) {
101 int c
= const_add(a
, value
[j
].u64
);
103 memcpy(a
, save
, sizeof(save
));
111 assert(i
<= ETNA_MAX_IMM
/ 4);
112 c
->const_count
= MAX2(c
->const_count
, i
);
114 return SRC_CONST(i
- 1, swiz
);
117 /* Swizzles and write masks can be used to layer virtual non-interfering
118 * registers on top of the real VEC4 registers. For example, the virtual
119 * VEC3_XYZ register and the virtual SCALAR_W register that use the same
120 * physical VEC4 base register do not interfere.
123 REG_CLASS_VIRT_SCALAR
,
127 /* special vec2 class for fast transcendentals, limited to XY or ZW */
128 REG_CLASS_VIRT_VEC2T
,
129 /* special classes for LOAD - contiguous components */
130 REG_CLASS_VIRT_VEC2C
,
131 REG_CLASS_VIRT_VEC3C
,
137 REG_TYPE_VIRT_VEC3_XYZ
,
138 REG_TYPE_VIRT_VEC3_XYW
,
139 REG_TYPE_VIRT_VEC3_XZW
,
140 REG_TYPE_VIRT_VEC3_YZW
,
141 REG_TYPE_VIRT_VEC2_XY
,
142 REG_TYPE_VIRT_VEC2_XZ
,
143 REG_TYPE_VIRT_VEC2_XW
,
144 REG_TYPE_VIRT_VEC2_YZ
,
145 REG_TYPE_VIRT_VEC2_YW
,
146 REG_TYPE_VIRT_VEC2_ZW
,
147 REG_TYPE_VIRT_SCALAR_X
,
148 REG_TYPE_VIRT_SCALAR_Y
,
149 REG_TYPE_VIRT_SCALAR_Z
,
150 REG_TYPE_VIRT_SCALAR_W
,
151 REG_TYPE_VIRT_VEC2T_XY
,
152 REG_TYPE_VIRT_VEC2T_ZW
,
153 REG_TYPE_VIRT_VEC2C_XY
,
154 REG_TYPE_VIRT_VEC2C_YZ
,
155 REG_TYPE_VIRT_VEC2C_ZW
,
156 REG_TYPE_VIRT_VEC3C_XYZ
,
157 REG_TYPE_VIRT_VEC3C_YZW
,
161 /* writemask when used as dest */
163 reg_writemask
[NUM_REG_TYPES
] = {
164 [REG_TYPE_VEC4
] = 0xf,
165 [REG_TYPE_VIRT_SCALAR_X
] = 0x1,
166 [REG_TYPE_VIRT_SCALAR_Y
] = 0x2,
167 [REG_TYPE_VIRT_VEC2_XY
] = 0x3,
168 [REG_TYPE_VIRT_VEC2T_XY
] = 0x3,
169 [REG_TYPE_VIRT_VEC2C_XY
] = 0x3,
170 [REG_TYPE_VIRT_SCALAR_Z
] = 0x4,
171 [REG_TYPE_VIRT_VEC2_XZ
] = 0x5,
172 [REG_TYPE_VIRT_VEC2_YZ
] = 0x6,
173 [REG_TYPE_VIRT_VEC2C_YZ
] = 0x6,
174 [REG_TYPE_VIRT_VEC3_XYZ
] = 0x7,
175 [REG_TYPE_VIRT_VEC3C_XYZ
] = 0x7,
176 [REG_TYPE_VIRT_SCALAR_W
] = 0x8,
177 [REG_TYPE_VIRT_VEC2_XW
] = 0x9,
178 [REG_TYPE_VIRT_VEC2_YW
] = 0xa,
179 [REG_TYPE_VIRT_VEC3_XYW
] = 0xb,
180 [REG_TYPE_VIRT_VEC2_ZW
] = 0xc,
181 [REG_TYPE_VIRT_VEC2T_ZW
] = 0xc,
182 [REG_TYPE_VIRT_VEC2C_ZW
] = 0xc,
183 [REG_TYPE_VIRT_VEC3_XZW
] = 0xd,
184 [REG_TYPE_VIRT_VEC3_YZW
] = 0xe,
185 [REG_TYPE_VIRT_VEC3C_YZW
] = 0xe,
188 /* how to swizzle when used as a src */
190 reg_swiz
[NUM_REG_TYPES
] = {
191 [REG_TYPE_VEC4
] = INST_SWIZ_IDENTITY
,
192 [REG_TYPE_VIRT_SCALAR_X
] = INST_SWIZ_IDENTITY
,
193 [REG_TYPE_VIRT_SCALAR_Y
] = SWIZZLE(Y
, Y
, Y
, Y
),
194 [REG_TYPE_VIRT_VEC2_XY
] = INST_SWIZ_IDENTITY
,
195 [REG_TYPE_VIRT_VEC2T_XY
] = INST_SWIZ_IDENTITY
,
196 [REG_TYPE_VIRT_VEC2C_XY
] = INST_SWIZ_IDENTITY
,
197 [REG_TYPE_VIRT_SCALAR_Z
] = SWIZZLE(Z
, Z
, Z
, Z
),
198 [REG_TYPE_VIRT_VEC2_XZ
] = SWIZZLE(X
, Z
, X
, Z
),
199 [REG_TYPE_VIRT_VEC2_YZ
] = SWIZZLE(Y
, Z
, Y
, Z
),
200 [REG_TYPE_VIRT_VEC2C_YZ
] = SWIZZLE(Y
, Z
, Y
, Z
),
201 [REG_TYPE_VIRT_VEC3_XYZ
] = INST_SWIZ_IDENTITY
,
202 [REG_TYPE_VIRT_VEC3C_XYZ
] = INST_SWIZ_IDENTITY
,
203 [REG_TYPE_VIRT_SCALAR_W
] = SWIZZLE(W
, W
, W
, W
),
204 [REG_TYPE_VIRT_VEC2_XW
] = SWIZZLE(X
, W
, X
, W
),
205 [REG_TYPE_VIRT_VEC2_YW
] = SWIZZLE(Y
, W
, Y
, W
),
206 [REG_TYPE_VIRT_VEC3_XYW
] = SWIZZLE(X
, Y
, W
, X
),
207 [REG_TYPE_VIRT_VEC2_ZW
] = SWIZZLE(Z
, W
, Z
, W
),
208 [REG_TYPE_VIRT_VEC2T_ZW
] = SWIZZLE(Z
, W
, Z
, W
),
209 [REG_TYPE_VIRT_VEC2C_ZW
] = SWIZZLE(Z
, W
, Z
, W
),
210 [REG_TYPE_VIRT_VEC3_XZW
] = SWIZZLE(X
, Z
, W
, X
),
211 [REG_TYPE_VIRT_VEC3_YZW
] = SWIZZLE(Y
, Z
, W
, X
),
212 [REG_TYPE_VIRT_VEC3C_YZW
] = SWIZZLE(Y
, Z
, W
, X
),
215 /* how to swizzle when used as a dest */
217 reg_dst_swiz
[NUM_REG_TYPES
] = {
218 [REG_TYPE_VEC4
] = INST_SWIZ_IDENTITY
,
219 [REG_TYPE_VIRT_SCALAR_X
] = INST_SWIZ_IDENTITY
,
220 [REG_TYPE_VIRT_SCALAR_Y
] = SWIZZLE(X
, X
, X
, X
),
221 [REG_TYPE_VIRT_VEC2_XY
] = INST_SWIZ_IDENTITY
,
222 [REG_TYPE_VIRT_VEC2T_XY
] = INST_SWIZ_IDENTITY
,
223 [REG_TYPE_VIRT_VEC2C_XY
] = INST_SWIZ_IDENTITY
,
224 [REG_TYPE_VIRT_SCALAR_Z
] = SWIZZLE(X
, X
, X
, X
),
225 [REG_TYPE_VIRT_VEC2_XZ
] = SWIZZLE(X
, X
, Y
, Y
),
226 [REG_TYPE_VIRT_VEC2_YZ
] = SWIZZLE(X
, X
, Y
, Y
),
227 [REG_TYPE_VIRT_VEC2C_YZ
] = SWIZZLE(X
, X
, Y
, Y
),
228 [REG_TYPE_VIRT_VEC3_XYZ
] = INST_SWIZ_IDENTITY
,
229 [REG_TYPE_VIRT_VEC3C_XYZ
] = INST_SWIZ_IDENTITY
,
230 [REG_TYPE_VIRT_SCALAR_W
] = SWIZZLE(X
, X
, X
, X
),
231 [REG_TYPE_VIRT_VEC2_XW
] = SWIZZLE(X
, X
, Y
, Y
),
232 [REG_TYPE_VIRT_VEC2_YW
] = SWIZZLE(X
, X
, Y
, Y
),
233 [REG_TYPE_VIRT_VEC3_XYW
] = SWIZZLE(X
, Y
, Z
, Z
),
234 [REG_TYPE_VIRT_VEC2_ZW
] = SWIZZLE(X
, X
, X
, Y
),
235 [REG_TYPE_VIRT_VEC2T_ZW
] = SWIZZLE(X
, X
, X
, Y
),
236 [REG_TYPE_VIRT_VEC2C_ZW
] = SWIZZLE(X
, X
, X
, Y
),
237 [REG_TYPE_VIRT_VEC3_XZW
] = SWIZZLE(X
, Y
, Y
, Z
),
238 [REG_TYPE_VIRT_VEC3_YZW
] = SWIZZLE(X
, X
, Y
, Z
),
239 [REG_TYPE_VIRT_VEC3C_YZW
] = SWIZZLE(X
, X
, Y
, Z
),
242 static inline int reg_get_type(int virt_reg
)
244 return virt_reg
% NUM_REG_TYPES
;
247 static inline int reg_get_base(struct etna_compile
*c
, int virt_reg
)
249 /* offset by 1 to avoid reserved position register */
250 if (c
->nir
->info
.stage
== MESA_SHADER_FRAGMENT
)
251 return (virt_reg
/ NUM_REG_TYPES
+ 1) % ETNA_MAX_TEMPS
;
252 return virt_reg
/ NUM_REG_TYPES
;
255 /* use "r63.z" for depth reg, it will wrap around to r0.z by reg_get_base
256 * (fs registers are offset by 1 to avoid reserving r0)
258 #define REG_FRAG_DEPTH ((ETNA_MAX_TEMPS - 1) * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Z)
260 static inline int reg_get_class(int virt_reg
)
262 switch (reg_get_type(virt_reg
)) {
264 return REG_CLASS_VEC4
;
265 case REG_TYPE_VIRT_VEC3_XYZ
:
266 case REG_TYPE_VIRT_VEC3_XYW
:
267 case REG_TYPE_VIRT_VEC3_XZW
:
268 case REG_TYPE_VIRT_VEC3_YZW
:
269 return REG_CLASS_VIRT_VEC3
;
270 case REG_TYPE_VIRT_VEC2_XY
:
271 case REG_TYPE_VIRT_VEC2_XZ
:
272 case REG_TYPE_VIRT_VEC2_XW
:
273 case REG_TYPE_VIRT_VEC2_YZ
:
274 case REG_TYPE_VIRT_VEC2_YW
:
275 case REG_TYPE_VIRT_VEC2_ZW
:
276 return REG_CLASS_VIRT_VEC2
;
277 case REG_TYPE_VIRT_SCALAR_X
:
278 case REG_TYPE_VIRT_SCALAR_Y
:
279 case REG_TYPE_VIRT_SCALAR_Z
:
280 case REG_TYPE_VIRT_SCALAR_W
:
281 return REG_CLASS_VIRT_SCALAR
;
282 case REG_TYPE_VIRT_VEC2T_XY
:
283 case REG_TYPE_VIRT_VEC2T_ZW
:
284 return REG_CLASS_VIRT_VEC2T
;
285 case REG_TYPE_VIRT_VEC2C_XY
:
286 case REG_TYPE_VIRT_VEC2C_YZ
:
287 case REG_TYPE_VIRT_VEC2C_ZW
:
288 return REG_CLASS_VIRT_VEC2C
;
289 case REG_TYPE_VIRT_VEC3C_XYZ
:
290 case REG_TYPE_VIRT_VEC3C_YZW
:
291 return REG_CLASS_VIRT_VEC3C
;
298 /* nir_src to allocated register */
300 ra_src(struct etna_compile
*c
, nir_src
*src
)
302 unsigned reg
= ra_get_node_reg(c
->g
, c
->live_map
[src_index(c
->impl
, src
)]);
303 return SRC_REG(reg_get_base(c
, reg
), reg_swiz
[reg_get_type(reg
)]);
307 get_src(struct etna_compile
*c
, nir_src
*src
)
310 return ra_src(c
, src
);
312 nir_instr
*instr
= src
->ssa
->parent_instr
;
314 if (instr
->pass_flags
& BYPASS_SRC
) {
315 assert(instr
->type
== nir_instr_type_alu
);
316 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
317 assert(alu
->op
== nir_op_mov
);
318 return src_swizzle(get_src(c
, &alu
->src
[0].src
), ALU_SWIZ(&alu
->src
[0]));
321 switch (instr
->type
) {
322 case nir_instr_type_load_const
:
323 return const_src(c
, nir_instr_as_load_const(instr
)->value
, src
->ssa
->num_components
);
324 case nir_instr_type_intrinsic
: {
325 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
326 switch (intr
->intrinsic
) {
327 case nir_intrinsic_load_input
:
328 case nir_intrinsic_load_instance_id
:
329 case nir_intrinsic_load_uniform
:
330 case nir_intrinsic_load_ubo
:
331 return ra_src(c
, src
);
332 case nir_intrinsic_load_front_face
:
333 return (hw_src
) { .use
= 1, .rgroup
= INST_RGROUP_INTERNAL
};
334 case nir_intrinsic_load_frag_coord
:
335 return SRC_REG(0, INST_SWIZ_IDENTITY
);
337 compile_error(c
, "Unhandled NIR intrinsic type: %s\n",
338 nir_intrinsic_infos
[intr
->intrinsic
].name
);
342 case nir_instr_type_alu
:
343 case nir_instr_type_tex
:
344 return ra_src(c
, src
);
345 case nir_instr_type_ssa_undef
: {
346 /* return zero to deal with broken Blur demo */
347 nir_const_value value
= CONST(0);
348 return src_swizzle(const_src(c
, &value
, 1), SWIZZLE(X
,X
,X
,X
));
351 compile_error(c
, "Unhandled NIR instruction type: %d\n", instr
->type
);
359 vec_dest_has_swizzle(nir_alu_instr
*vec
, nir_ssa_def
*ssa
)
361 for (unsigned i
= 0; i
< 4; i
++) {
362 if (!(vec
->dest
.write_mask
& (1 << i
)) || vec
->src
[i
].src
.ssa
!= ssa
)
365 if (vec
->src
[i
].swizzle
[0] != i
)
369 /* don't deal with possible bypassed vec/mov chain */
370 nir_foreach_use(use_src
, ssa
) {
371 nir_instr
*instr
= use_src
->parent_instr
;
372 if (instr
->type
!= nir_instr_type_alu
)
375 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
390 /* get allocated dest register for nir_dest
391 * *p_swiz tells how the components need to be placed into register
394 ra_dest(struct etna_compile
*c
, nir_dest
*dest
, unsigned *p_swiz
)
396 unsigned swiz
= INST_SWIZ_IDENTITY
, mask
= 0xf;
397 dest
= real_dest(dest
, &swiz
, &mask
);
399 unsigned r
= ra_get_node_reg(c
->g
, c
->live_map
[dest_index(c
->impl
, dest
)]);
400 unsigned t
= reg_get_type(r
);
402 *p_swiz
= inst_swiz_compose(swiz
, reg_dst_swiz
[t
]);
406 .reg
= reg_get_base(c
, r
),
407 .write_mask
= inst_write_mask_compose(mask
, reg_writemask
[t
]),
411 /* precomputed by register_allocate */
412 static unsigned int *q_values
[] = {
413 (unsigned int[]) {1, 2, 3, 4, 2, 2, 3, },
414 (unsigned int[]) {3, 5, 6, 6, 5, 5, 6, },
415 (unsigned int[]) {3, 4, 4, 4, 4, 4, 4, },
416 (unsigned int[]) {1, 1, 1, 1, 1, 1, 1, },
417 (unsigned int[]) {1, 2, 2, 2, 1, 2, 2, },
418 (unsigned int[]) {2, 3, 3, 3, 2, 3, 3, },
419 (unsigned int[]) {2, 2, 2, 2, 2, 2, 2, },
423 ra_assign(struct etna_compile
*c
, nir_shader
*shader
)
425 struct ra_regs
*regs
= ra_alloc_reg_set(NULL
, ETNA_MAX_TEMPS
*
426 NUM_REG_TYPES
, false);
428 /* classes always be created from index 0, so equal to the class enum
429 * which represents a register with (c+1) components
431 for (int c
= 0; c
< NUM_REG_CLASSES
; c
++)
432 ra_alloc_reg_class(regs
);
433 /* add each register of each class */
434 for (int r
= 0; r
< NUM_REG_TYPES
* ETNA_MAX_TEMPS
; r
++)
435 ra_class_add_reg(regs
, reg_get_class(r
), r
);
437 for (int r
= 0; r
< ETNA_MAX_TEMPS
; r
++) {
438 for (int i
= 0; i
< NUM_REG_TYPES
; i
++) {
439 for (int j
= 0; j
< i
; j
++) {
440 if (reg_writemask
[i
] & reg_writemask
[j
]) {
441 ra_add_reg_conflict(regs
, NUM_REG_TYPES
* r
+ i
,
442 NUM_REG_TYPES
* r
+ j
);
447 ra_set_finalize(regs
, q_values
);
449 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
451 /* liveness and interference */
453 nir_index_blocks(impl
);
454 nir_index_ssa_defs(impl
);
455 nir_foreach_block(block
, impl
) {
456 nir_foreach_instr(instr
, block
)
457 instr
->pass_flags
= 0;
460 /* this gives an approximation/upper limit on how many nodes are needed
461 * (some ssa values do not represent an allocated register)
463 unsigned max_nodes
= impl
->ssa_alloc
+ impl
->reg_alloc
;
464 unsigned *live_map
= ralloc_array(NULL
, unsigned, max_nodes
);
465 memset(live_map
, 0xff, sizeof(unsigned) * max_nodes
);
466 struct live_def
*defs
= rzalloc_array(NULL
, struct live_def
, max_nodes
);
468 unsigned num_nodes
= etna_live_defs(impl
, defs
, live_map
);
469 struct ra_graph
*g
= ra_alloc_interference_graph(regs
, num_nodes
);
471 /* set classes from num_components */
472 for (unsigned i
= 0; i
< num_nodes
; i
++) {
473 nir_instr
*instr
= defs
[i
].instr
;
474 nir_dest
*dest
= defs
[i
].dest
;
475 unsigned comp
= nir_dest_num_components(*dest
) - 1;
477 if (instr
->type
== nir_instr_type_alu
&&
478 c
->specs
->has_new_transcendentals
) {
479 switch (nir_instr_as_alu(instr
)->op
) {
484 assert(dest
->is_ssa
);
485 comp
= REG_CLASS_VIRT_VEC2T
;
491 if (instr
->type
== nir_instr_type_intrinsic
) {
492 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
493 /* can't have dst swizzle or sparse writemask on UBO loads */
494 if (intr
->intrinsic
== nir_intrinsic_load_ubo
) {
495 assert(dest
== &intr
->dest
);
496 if (dest
->ssa
.num_components
== 2)
497 comp
= REG_CLASS_VIRT_VEC2C
;
498 if (dest
->ssa
.num_components
== 3)
499 comp
= REG_CLASS_VIRT_VEC3C
;
503 ra_set_node_class(g
, i
, comp
);
506 nir_foreach_block(block
, impl
) {
507 nir_foreach_instr(instr
, block
) {
508 if (instr
->type
!= nir_instr_type_intrinsic
)
511 nir_dest
*dest
= dest_for_instr(instr
);
512 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
515 switch (intr
->intrinsic
) {
516 case nir_intrinsic_store_deref
: {
517 /* don't want outputs to be swizzled
518 * TODO: better would be to set the type to X/XY/XYZ/XYZW
519 * TODO: what if fragcoord.z is read after writing fragdepth?
521 nir_deref_instr
*deref
= nir_src_as_deref(intr
->src
[0]);
522 unsigned index
= live_map
[src_index(impl
, &intr
->src
[1])];
524 if (shader
->info
.stage
== MESA_SHADER_FRAGMENT
&&
525 deref
->var
->data
.location
== FRAG_RESULT_DEPTH
) {
526 ra_set_node_reg(g
, index
, REG_FRAG_DEPTH
);
528 ra_set_node_class(g
, index
, REG_CLASS_VEC4
);
531 case nir_intrinsic_load_input
:
532 reg
= nir_intrinsic_base(intr
) * NUM_REG_TYPES
+ (unsigned[]) {
533 REG_TYPE_VIRT_SCALAR_X
,
534 REG_TYPE_VIRT_VEC2_XY
,
535 REG_TYPE_VIRT_VEC3_XYZ
,
537 }[nir_dest_num_components(*dest
) - 1];
539 case nir_intrinsic_load_instance_id
:
540 reg
= c
->variant
->infile
.num_reg
* NUM_REG_TYPES
+ REG_TYPE_VIRT_SCALAR_Y
;
546 ra_set_node_reg(g
, live_map
[dest_index(impl
, dest
)], reg
);
550 /* add interference for intersecting live ranges */
551 for (unsigned i
= 0; i
< num_nodes
; i
++) {
552 assert(defs
[i
].live_start
< defs
[i
].live_end
);
553 for (unsigned j
= 0; j
< i
; j
++) {
554 if (defs
[i
].live_start
>= defs
[j
].live_end
|| defs
[j
].live_start
>= defs
[i
].live_end
)
556 ra_add_node_interference(g
, i
, j
);
562 /* Allocate registers */
563 ASSERTED
bool ok
= ra_allocate(g
);
568 c
->live_map
= live_map
;
569 c
->num_nodes
= num_nodes
;
573 ra_finish(struct etna_compile
*c
)
575 /* TODO: better way to get number of registers used? */
577 for (unsigned i
= 0; i
< c
->num_nodes
; i
++) {
578 j
= MAX2(j
, reg_get_base(c
, ra_get_node_reg(c
->g
, i
)) + 1);
582 ralloc_free(c
->regs
);
583 ralloc_free(c
->live_map
);
589 emit_alu(struct etna_compile
*c
, nir_alu_instr
* alu
)
591 const nir_op_info
*info
= &nir_op_infos
[alu
->op
];
593 /* marked as dead instruction (vecN and other bypassed instr) */
594 if (alu
->instr
.pass_flags
)
597 assert(!(alu
->op
>= nir_op_vec2
&& alu
->op
<= nir_op_vec4
));
600 hw_dst dst
= ra_dest(c
, &alu
->dest
.dest
, &dst_swiz
);
602 /* compose alu write_mask with RA write mask */
603 if (!alu
->dest
.dest
.is_ssa
)
604 dst
.write_mask
= inst_write_mask_compose(alu
->dest
.write_mask
, dst
.write_mask
);
610 /* not per-component - don't compose dst_swiz */
611 dst_swiz
= INST_SWIZ_IDENTITY
;
619 for (int i
= 0; i
< info
->num_inputs
; i
++) {
620 nir_alu_src
*asrc
= &alu
->src
[i
];
623 src
= src_swizzle(get_src(c
, &asrc
->src
), ALU_SWIZ(asrc
));
624 src
= src_swizzle(src
, dst_swiz
);
626 if (src
.rgroup
!= INST_RGROUP_IMMEDIATE
) {
627 src
.neg
= asrc
->negate
|| (alu
->op
== nir_op_fneg
);
628 src
.abs
= asrc
->abs
|| (alu
->op
== nir_op_fabs
);
630 assert(!asrc
->negate
&& alu
->op
!= nir_op_fneg
);
631 assert(!asrc
->abs
&& alu
->op
!= nir_op_fabs
);
637 etna_emit_alu(c
, alu
->op
, dst
, srcs
, alu
->dest
.saturate
|| (alu
->op
== nir_op_fsat
));
641 emit_tex(struct etna_compile
*c
, nir_tex_instr
* tex
)
644 hw_dst dst
= ra_dest(c
, &tex
->dest
, &dst_swiz
);
645 nir_src
*coord
= NULL
, *lod_bias
= NULL
, *compare
= NULL
;
647 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
648 switch (tex
->src
[i
].src_type
) {
649 case nir_tex_src_coord
:
650 coord
= &tex
->src
[i
].src
;
652 case nir_tex_src_bias
:
653 case nir_tex_src_lod
:
655 lod_bias
= &tex
->src
[i
].src
;
657 case nir_tex_src_comparator
:
658 compare
= &tex
->src
[i
].src
;
661 compile_error(c
, "Unhandled NIR tex src type: %d\n",
662 tex
->src
[i
].src_type
);
667 etna_emit_tex(c
, tex
->op
, tex
->sampler_index
, dst_swiz
, dst
, get_src(c
, coord
),
668 lod_bias
? get_src(c
, lod_bias
) : SRC_DISABLE
,
669 compare
? get_src(c
, compare
) : SRC_DISABLE
);
673 emit_intrinsic(struct etna_compile
*c
, nir_intrinsic_instr
* intr
)
675 switch (intr
->intrinsic
) {
676 case nir_intrinsic_store_deref
:
677 etna_emit_output(c
, nir_src_as_deref(intr
->src
[0])->var
, get_src(c
, &intr
->src
[1]));
679 case nir_intrinsic_discard_if
:
680 etna_emit_discard(c
, get_src(c
, &intr
->src
[0]));
682 case nir_intrinsic_discard
:
683 etna_emit_discard(c
, SRC_DISABLE
);
685 case nir_intrinsic_load_uniform
: {
687 struct etna_inst_dst dst
= ra_dest(c
, &intr
->dest
, &dst_swiz
);
689 /* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */
690 emit_inst(c
, &(struct etna_inst
) {
691 .opcode
= INST_OPCODE_MOVAR
,
692 .dst
.write_mask
= 0x1,
693 .src
[2] = get_src(c
, &intr
->src
[0]),
695 emit_inst(c
, &(struct etna_inst
) {
696 .opcode
= INST_OPCODE_MOV
,
700 .rgroup
= INST_RGROUP_UNIFORM_0
,
701 .reg
= nir_intrinsic_base(intr
),
703 .amode
= INST_AMODE_ADD_A_X
,
707 case nir_intrinsic_load_ubo
: {
708 /* TODO: if offset is of the form (x + C) then add C to the base instead */
709 unsigned idx
= nir_src_as_const_value(intr
->src
[0])[0].u32
;
711 emit_inst(c
, &(struct etna_inst
) {
712 .opcode
= INST_OPCODE_LOAD
,
713 .type
= INST_TYPE_U32
,
714 .dst
= ra_dest(c
, &intr
->dest
, &dst_swiz
),
715 .src
[0] = get_src(c
, &intr
->src
[1]),
716 .src
[1] = const_src(c
, &CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR
+ idx
, 0), 1),
719 case nir_intrinsic_load_front_face
:
720 case nir_intrinsic_load_frag_coord
:
721 assert(intr
->dest
.is_ssa
); /* TODO - lower phis could cause this */
723 case nir_intrinsic_load_input
:
724 case nir_intrinsic_load_instance_id
:
727 compile_error(c
, "Unhandled NIR intrinsic type: %s\n",
728 nir_intrinsic_infos
[intr
->intrinsic
].name
);
733 emit_instr(struct etna_compile
*c
, nir_instr
* instr
)
735 switch (instr
->type
) {
736 case nir_instr_type_alu
:
737 emit_alu(c
, nir_instr_as_alu(instr
));
739 case nir_instr_type_tex
:
740 emit_tex(c
, nir_instr_as_tex(instr
));
742 case nir_instr_type_intrinsic
:
743 emit_intrinsic(c
, nir_instr_as_intrinsic(instr
));
745 case nir_instr_type_jump
:
746 assert(nir_instr_is_last(instr
));
747 case nir_instr_type_load_const
:
748 case nir_instr_type_ssa_undef
:
749 case nir_instr_type_deref
:
752 compile_error(c
, "Unhandled NIR instruction type: %d\n", instr
->type
);
758 emit_block(struct etna_compile
*c
, nir_block
* block
)
760 etna_emit_block_start(c
, block
->index
);
762 nir_foreach_instr(instr
, block
)
763 emit_instr(c
, instr
);
765 /* succs->index < block->index is for the loop case */
766 nir_block
*succs
= block
->successors
[0];
767 if (nir_block_ends_in_jump(block
) || succs
->index
< block
->index
)
768 etna_emit_jump(c
, succs
->index
, SRC_DISABLE
);
772 emit_cf_list(struct etna_compile
*c
, struct exec_list
*list
);
775 emit_if(struct etna_compile
*c
, nir_if
* nif
)
777 etna_emit_jump(c
, nir_if_first_else_block(nif
)->index
, get_src(c
, &nif
->condition
));
778 emit_cf_list(c
, &nif
->then_list
);
780 /* jump at end of then_list to skip else_list
781 * not needed if then_list already ends with a jump or else_list is empty
783 if (!nir_block_ends_in_jump(nir_if_last_then_block(nif
)) &&
784 !nir_cf_list_is_empty_block(&nif
->else_list
))
785 etna_emit_jump(c
, nir_if_last_else_block(nif
)->successors
[0]->index
, SRC_DISABLE
);
787 emit_cf_list(c
, &nif
->else_list
);
791 emit_cf_list(struct etna_compile
*c
, struct exec_list
*list
)
793 foreach_list_typed(nir_cf_node
, node
, node
, list
) {
794 switch (node
->type
) {
795 case nir_cf_node_block
:
796 emit_block(c
, nir_cf_node_as_block(node
));
799 emit_if(c
, nir_cf_node_as_if(node
));
801 case nir_cf_node_loop
:
802 emit_cf_list(c
, &nir_cf_node_as_loop(node
)->body
);
805 compile_error(c
, "Unknown NIR node type\n");
811 /* based on nir_lower_vec_to_movs */
813 insert_vec_mov(nir_alu_instr
*vec
, unsigned start_idx
, nir_shader
*shader
)
815 assert(start_idx
< nir_op_infos
[vec
->op
].num_inputs
);
816 unsigned write_mask
= (1u << start_idx
);
818 nir_alu_instr
*mov
= nir_alu_instr_create(shader
, nir_op_mov
);
819 nir_alu_src_copy(&mov
->src
[0], &vec
->src
[start_idx
], mov
);
821 mov
->src
[0].swizzle
[0] = vec
->src
[start_idx
].swizzle
[0];
822 mov
->src
[0].negate
= vec
->src
[start_idx
].negate
;
823 mov
->src
[0].abs
= vec
->src
[start_idx
].abs
;
825 unsigned num_components
= 1;
827 for (unsigned i
= start_idx
+ 1; i
< 4; i
++) {
828 if (!(vec
->dest
.write_mask
& (1 << i
)))
831 if (nir_srcs_equal(vec
->src
[i
].src
, vec
->src
[start_idx
].src
) &&
832 vec
->src
[i
].negate
== vec
->src
[start_idx
].negate
&&
833 vec
->src
[i
].abs
== vec
->src
[start_idx
].abs
) {
834 write_mask
|= (1 << i
);
835 mov
->src
[0].swizzle
[num_components
] = vec
->src
[i
].swizzle
[0];
840 mov
->dest
.write_mask
= (1 << num_components
) - 1;
841 nir_ssa_dest_init(&mov
->instr
, &mov
->dest
.dest
, num_components
, 32, NULL
);
843 /* replace vec srcs with inserted mov */
844 for (unsigned i
= 0, j
= 0; i
< 4; i
++) {
845 if (!(write_mask
& (1 << i
)))
848 nir_instr_rewrite_src(&vec
->instr
, &vec
->src
[i
].src
, nir_src_for_ssa(&mov
->dest
.dest
.ssa
));
849 vec
->src
[i
].swizzle
[0] = j
++;
852 nir_instr_insert_before(&vec
->instr
, &mov
->instr
);
858 * for vecN instructions:
859 * -merge constant sources into a single src
860 * -insert movs (nir_lower_vec_to_movs equivalent)
861 * for non-vecN instructions:
862 * -try to merge constants as single constant
863 * -insert movs for multiple constants (pre-HALTI5)
866 lower_alu(struct etna_compile
*c
, nir_alu_instr
*alu
)
868 const nir_op_info
*info
= &nir_op_infos
[alu
->op
];
871 nir_builder_init(&b
, c
->impl
);
872 b
.cursor
= nir_before_instr(&alu
->instr
);
880 /* pre-GC7000L can only have 1 uniform src per instruction */
881 if (c
->specs
->halti
>= 5)
884 nir_const_value value
[4] = {};
885 uint8_t swizzle
[4][4] = {};
886 unsigned swiz_max
= 0, num_const
= 0;
888 for (unsigned i
= 0; i
< info
->num_inputs
; i
++) {
889 nir_const_value
*cv
= nir_src_as_const_value(alu
->src
[i
].src
);
893 unsigned num_components
= info
->input_sizes
[i
] ?: alu
->dest
.dest
.ssa
.num_components
;
894 for (unsigned j
= 0; j
< num_components
; j
++) {
895 int idx
= const_add(&value
[0].u64
, cv
[alu
->src
[i
].swizzle
[j
]].u64
);
897 swiz_max
= MAX2(swiz_max
, (unsigned) idx
);
906 /* resolve with single combined const src */
908 nir_ssa_def
*def
= nir_build_imm(&b
, swiz_max
+ 1, 32, value
);
910 for (unsigned i
= 0; i
< info
->num_inputs
; i
++) {
911 nir_const_value
*cv
= nir_src_as_const_value(alu
->src
[i
].src
);
915 nir_instr_rewrite_src(&alu
->instr
, &alu
->src
[i
].src
, nir_src_for_ssa(def
));
917 for (unsigned j
= 0; j
< 4; j
++)
918 alu
->src
[i
].swizzle
[j
] = swizzle
[i
][j
];
923 /* resolve with movs */
925 for (unsigned i
= 0; i
< info
->num_inputs
; i
++) {
926 nir_const_value
*cv
= nir_src_as_const_value(alu
->src
[i
].src
);
934 nir_ssa_def
*mov
= nir_mov(&b
, alu
->src
[i
].src
.ssa
);
935 nir_instr_rewrite_src(&alu
->instr
, &alu
->src
[i
].src
, nir_src_for_ssa(mov
));
940 nir_const_value value
[4];
941 unsigned num_components
= 0;
943 for (unsigned i
= 0; i
< info
->num_inputs
; i
++) {
944 nir_const_value
*cv
= nir_src_as_const_value(alu
->src
[i
].src
);
946 value
[num_components
++] = cv
[alu
->src
[i
].swizzle
[0]];
949 /* if there is more than one constant source to the vecN, combine them
950 * into a single load_const (removing the vecN completely if all components
953 if (num_components
> 1) {
954 nir_ssa_def
*def
= nir_build_imm(&b
, num_components
, 32, value
);
956 if (num_components
== info
->num_inputs
) {
957 nir_ssa_def_rewrite_uses(&alu
->dest
.dest
.ssa
, nir_src_for_ssa(def
));
958 nir_instr_remove(&alu
->instr
);
962 for (unsigned i
= 0, j
= 0; i
< info
->num_inputs
; i
++) {
963 nir_const_value
*cv
= nir_src_as_const_value(alu
->src
[i
].src
);
967 nir_instr_rewrite_src(&alu
->instr
, &alu
->src
[i
].src
, nir_src_for_ssa(def
));
968 alu
->src
[i
].swizzle
[0] = j
++;
972 unsigned finished_write_mask
= 0;
973 for (unsigned i
= 0; i
< 4; i
++) {
974 if (!(alu
->dest
.write_mask
& (1 << i
)))
977 nir_ssa_def
*ssa
= alu
->src
[i
].src
.ssa
;
979 /* check that vecN instruction is only user of this */
980 bool need_mov
= list_length(&ssa
->if_uses
) != 0;
981 nir_foreach_use(use_src
, ssa
) {
982 if (use_src
->parent_instr
!= &alu
->instr
)
986 nir_instr
*instr
= ssa
->parent_instr
;
987 switch (instr
->type
) {
988 case nir_instr_type_alu
:
989 case nir_instr_type_tex
:
991 case nir_instr_type_intrinsic
:
992 if (nir_instr_as_intrinsic(instr
)->intrinsic
== nir_intrinsic_load_input
) {
993 need_mov
= vec_dest_has_swizzle(alu
, &nir_instr_as_intrinsic(instr
)->dest
.ssa
);
1000 if (need_mov
&& !(finished_write_mask
& (1 << i
)))
1001 finished_write_mask
|= insert_vec_mov(alu
, i
, c
->nir
);
1006 emit_shader(struct etna_compile
*c
, unsigned *num_temps
, unsigned *num_consts
)
1008 nir_shader
*shader
= c
->nir
;
1009 c
->impl
= nir_shader_get_entrypoint(shader
);
1011 bool have_indirect_uniform
= false;
1012 unsigned indirect_max
= 0;
1015 nir_builder_init(&b
, c
->impl
);
1017 /* convert non-dynamic uniform loads to constants, etc */
1018 nir_foreach_block(block
, c
->impl
) {
1019 nir_foreach_instr_safe(instr
, block
) {
1020 switch(instr
->type
) {
1021 case nir_instr_type_alu
:
1022 /* deals with vecN and const srcs */
1023 lower_alu(c
, nir_instr_as_alu(instr
));
1025 case nir_instr_type_load_const
: {
1026 nir_load_const_instr
*load_const
= nir_instr_as_load_const(instr
);
1027 for (unsigned i
= 0; i
< load_const
->def
.num_components
; i
++)
1028 load_const
->value
[i
] = CONST(load_const
->value
[i
].u32
);
1030 case nir_instr_type_intrinsic
: {
1031 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
1032 /* TODO: load_ubo can also become a constant in some cases
1033 * (at the moment it can end up emitting a LOAD with two
1034 * uniform sources, which could be a problem on HALTI2)
1036 if (intr
->intrinsic
!= nir_intrinsic_load_uniform
)
1038 nir_const_value
*off
= nir_src_as_const_value(intr
->src
[0]);
1039 if (!off
|| off
[0].u64
>> 32 != ETNA_IMMEDIATE_CONSTANT
) {
1040 have_indirect_uniform
= true;
1041 indirect_max
= nir_intrinsic_base(intr
) + nir_intrinsic_range(intr
);
1045 unsigned base
= nir_intrinsic_base(intr
);
1046 /* pre halti2 uniform offset will be float */
1047 if (c
->specs
->halti
< 2)
1048 base
+= (unsigned) off
[0].f32
;
1051 nir_const_value value
[4];
1053 for (unsigned i
= 0; i
< intr
->dest
.ssa
.num_components
; i
++) {
1054 if (nir_intrinsic_base(intr
) < 0)
1055 value
[i
] = TEXSCALE(~nir_intrinsic_base(intr
), i
);
1057 value
[i
] = UNIFORM(base
* 4 + i
);
1060 b
.cursor
= nir_after_instr(instr
);
1061 nir_ssa_def
*def
= nir_build_imm(&b
, intr
->dest
.ssa
.num_components
, 32, value
);
1063 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
, nir_src_for_ssa(def
));
1064 nir_instr_remove(instr
);
1072 /* TODO: only emit required indirect uniform ranges */
1073 if (have_indirect_uniform
) {
1074 for (unsigned i
= 0; i
< indirect_max
* 4; i
++)
1075 c
->consts
[i
] = UNIFORM(i
).u64
;
1076 c
->const_count
= indirect_max
;
1079 /* add mov for any store output using sysval/const */
1080 nir_foreach_block(block
, c
->impl
) {
1081 nir_foreach_instr_safe(instr
, block
) {
1082 if (instr
->type
!= nir_instr_type_intrinsic
)
1085 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
1087 switch (intr
->intrinsic
) {
1088 case nir_intrinsic_store_deref
: {
1089 nir_src
*src
= &intr
->src
[1];
1090 if (nir_src_is_const(*src
) || is_sysval(src
->ssa
->parent_instr
)) {
1091 b
.cursor
= nir_before_instr(instr
);
1092 nir_instr_rewrite_src(instr
, src
, nir_src_for_ssa(nir_mov(&b
, src
->ssa
)));
1101 /* call directly to avoid validation (load_const don't pass validation at this point) */
1102 nir_convert_from_ssa(shader
, true);
1103 nir_opt_dce(shader
);
1105 ra_assign(c
, shader
);
1107 emit_cf_list(c
, &nir_shader_get_entrypoint(shader
)->body
);
1109 *num_temps
= ra_finish(c
);
1110 *num_consts
= c
->const_count
;