2 * Copyright © 2014-2015 Broadcom
3 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "util/ralloc.h"
26 #include "glsl/nir/nir.h"
27 #include "glsl/nir/nir_builder.h"
28 #include "glsl/list.h"
29 #include "glsl/shader_enums.h"
31 #include "nir/tgsi_to_nir.h"
32 #include "tgsi/tgsi_parse.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_scan.h"
37 #define SWIZ(X, Y, Z, W) (unsigned[4]){ \
45 /** nir register containing this TGSI index. */
48 /** Offset (in vec4s) from the start of var for this TGSI index. */
53 union tgsi_full_token
*token
;
56 struct tgsi_shader_info
*scan
;
58 struct ttn_reg_info
*output_regs
;
59 struct ttn_reg_info
*temp_regs
;
60 nir_ssa_def
**imm_defs
;
62 nir_register
*addr_reg
;
65 * Stack of cf_node_lists where instructions should be pushed as we pop
66 * back out of the control flow stack.
68 * For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else
69 * instructions should be placed, and if_stack[if_stack_pos - 1] has where
70 * the next instructions outside of the if/then/else block go.
72 struct exec_list
**if_stack
;
73 unsigned if_stack_pos
;
76 * Stack of cf_node_lists where instructions should be pushed as we pop
77 * back out of the control flow stack.
79 * loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside
82 struct exec_list
**loop_stack
;
83 unsigned loop_stack_pos
;
85 /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
89 #define ttn_swizzle(b, src, x, y, z, w) \
90 nir_swizzle(b, src, SWIZ(x, y, z, w), 4, false)
91 #define ttn_channel(b, src, swiz) \
92 nir_swizzle(b, src, SWIZ(swiz, swiz, swiz, swiz), 1, false)
95 ttn_src_for_dest(nir_builder
*b
, nir_alu_dest
*dest
)
98 memset(&src
, 0, sizeof(src
));
100 if (dest
->dest
.is_ssa
)
101 src
.src
= nir_src_for_ssa(&dest
->dest
.ssa
);
103 assert(!dest
->dest
.reg
.indirect
);
104 src
.src
= nir_src_for_reg(dest
->dest
.reg
.reg
);
105 src
.src
.reg
.base_offset
= dest
->dest
.reg
.base_offset
;
108 for (int i
= 0; i
< 4; i
++)
111 return nir_fmov_alu(b
, src
, 4);
115 ttn_emit_declaration(struct ttn_compile
*c
)
117 nir_builder
*b
= &c
->build
;
118 struct tgsi_full_declaration
*decl
= &c
->token
->FullDeclaration
;
119 unsigned array_size
= decl
->Range
.Last
- decl
->Range
.First
+ 1;
120 unsigned file
= decl
->Declaration
.File
;
123 if (file
== TGSI_FILE_TEMPORARY
) {
124 if (decl
->Declaration
.Array
) {
125 /* for arrays, we create variables instead of registers: */
126 nir_variable
*var
= rzalloc(b
->shader
, nir_variable
);
128 var
->type
= glsl_array_type(glsl_vec4_type(), array_size
);
129 var
->data
.mode
= nir_var_global
;
130 var
->name
= ralloc_asprintf(var
, "arr_%d", decl
->Array
.ArrayID
);
132 exec_list_push_tail(&b
->shader
->globals
, &var
->node
);
134 for (i
= 0; i
< array_size
; i
++) {
135 /* point all the matching slots to the same var,
136 * with appropriate offset set, mostly just so
137 * we know what to do when tgsi does a non-indirect
140 c
->temp_regs
[decl
->Range
.First
+ i
].reg
= NULL
;
141 c
->temp_regs
[decl
->Range
.First
+ i
].var
= var
;
142 c
->temp_regs
[decl
->Range
.First
+ i
].offset
= i
;
145 for (i
= 0; i
< array_size
; i
++) {
146 nir_register
*reg
= nir_local_reg_create(b
->impl
);
147 reg
->num_components
= 4;
148 c
->temp_regs
[decl
->Range
.First
+ i
].reg
= reg
;
149 c
->temp_regs
[decl
->Range
.First
+ i
].var
= NULL
;
150 c
->temp_regs
[decl
->Range
.First
+ i
].offset
= 0;
153 } else if (file
== TGSI_FILE_ADDRESS
) {
154 c
->addr_reg
= nir_local_reg_create(b
->impl
);
155 c
->addr_reg
->num_components
= 4;
156 } else if (file
== TGSI_FILE_SYSTEM_VALUE
) {
157 /* Nothing to record for system values. */
158 } else if (file
== TGSI_FILE_SAMPLER
) {
159 /* Nothing to record for samplers. */
162 assert(file
== TGSI_FILE_INPUT
||
163 file
== TGSI_FILE_OUTPUT
||
164 file
== TGSI_FILE_CONSTANT
);
166 var
= rzalloc(b
->shader
, nir_variable
);
167 var
->data
.driver_location
= decl
->Range
.First
;
169 var
->type
= glsl_vec4_type();
171 var
->type
= glsl_array_type(var
->type
, array_size
);
174 case TGSI_FILE_INPUT
:
175 var
->data
.read_only
= true;
176 var
->data
.mode
= nir_var_shader_in
;
177 var
->name
= ralloc_asprintf(var
, "in_%d", decl
->Range
.First
);
179 /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_*
180 * instead, but nothing in NIR core is looking at the value
181 * currently, and this is less change to drivers.
183 var
->data
.location
= decl
->Semantic
.Name
;
184 var
->data
.index
= decl
->Semantic
.Index
;
186 /* We definitely need to translate the interpolation field, because
187 * nir_print will decode it.
189 switch (decl
->Interp
.Interpolate
) {
190 case TGSI_INTERPOLATE_CONSTANT
:
191 var
->data
.interpolation
= INTERP_QUALIFIER_FLAT
;
193 case TGSI_INTERPOLATE_LINEAR
:
194 var
->data
.interpolation
= INTERP_QUALIFIER_NOPERSPECTIVE
;
196 case TGSI_INTERPOLATE_PERSPECTIVE
:
197 var
->data
.interpolation
= INTERP_QUALIFIER_SMOOTH
;
201 exec_list_push_tail(&b
->shader
->inputs
, &var
->node
);
203 case TGSI_FILE_OUTPUT
: {
204 /* Since we can't load from outputs in the IR, we make temporaries
205 * for the outputs and emit stores to the real outputs at the end of
208 nir_register
*reg
= nir_local_reg_create(b
->impl
);
209 reg
->num_components
= 4;
211 reg
->num_array_elems
= array_size
;
213 var
->data
.mode
= nir_var_shader_out
;
214 var
->name
= ralloc_asprintf(var
, "out_%d", decl
->Range
.First
);
216 var
->data
.location
= decl
->Semantic
.Name
;
217 var
->data
.index
= decl
->Semantic
.Index
;
219 for (i
= 0; i
< array_size
; i
++) {
220 c
->output_regs
[decl
->Range
.First
+ i
].offset
= i
;
221 c
->output_regs
[decl
->Range
.First
+ i
].reg
= reg
;
224 exec_list_push_tail(&b
->shader
->outputs
, &var
->node
);
227 case TGSI_FILE_CONSTANT
:
228 var
->data
.mode
= nir_var_uniform
;
229 var
->name
= ralloc_asprintf(var
, "uniform_%d", decl
->Range
.First
);
231 exec_list_push_tail(&b
->shader
->uniforms
, &var
->node
);
234 unreachable("bad declaration file");
242 ttn_emit_immediate(struct ttn_compile
*c
)
244 nir_builder
*b
= &c
->build
;
245 struct tgsi_full_immediate
*tgsi_imm
= &c
->token
->FullImmediate
;
246 nir_load_const_instr
*load_const
;
249 load_const
= nir_load_const_instr_create(b
->shader
, 4);
250 c
->imm_defs
[c
->next_imm
] = &load_const
->def
;
253 for (i
= 0; i
< 4; i
++)
254 load_const
->value
.u
[i
] = tgsi_imm
->u
[i
].Uint
;
256 nir_instr_insert_after_cf_list(b
->cf_node_list
, &load_const
->instr
);
260 ttn_src_for_indirect(struct ttn_compile
*c
, struct tgsi_ind_register
*indirect
);
262 /* generate either a constant or indirect deref chain for accessing an
265 static nir_deref_var
*
266 ttn_array_deref(struct ttn_compile
*c
, nir_intrinsic_instr
*instr
,
267 nir_variable
*var
, unsigned offset
,
268 struct tgsi_ind_register
*indirect
)
270 nir_deref_var
*deref
= nir_deref_var_create(instr
, var
);
271 nir_deref_array
*arr
= nir_deref_array_create(deref
);
273 arr
->base_offset
= offset
;
274 arr
->deref
.type
= glsl_get_array_element(var
->type
);
277 arr
->deref_array_type
= nir_deref_array_type_indirect
;
278 arr
->indirect
= *ttn_src_for_indirect(c
, indirect
);
280 arr
->deref_array_type
= nir_deref_array_type_direct
;
283 deref
->deref
.child
= &arr
->deref
;
289 ttn_src_for_file_and_index(struct ttn_compile
*c
, unsigned file
, unsigned index
,
290 struct tgsi_ind_register
*indirect
)
292 nir_builder
*b
= &c
->build
;
295 memset(&src
, 0, sizeof(src
));
298 case TGSI_FILE_TEMPORARY
:
299 if (c
->temp_regs
[index
].var
) {
300 unsigned offset
= c
->temp_regs
[index
].offset
;
301 nir_variable
*var
= c
->temp_regs
[index
].var
;
302 nir_intrinsic_instr
*load
;
304 load
= nir_intrinsic_instr_create(b
->shader
,
305 nir_intrinsic_load_var
);
306 load
->num_components
= 4;
307 load
->variables
[0] = ttn_array_deref(c
, load
, var
, offset
, indirect
);
309 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 4, NULL
);
310 nir_instr_insert_after_cf_list(b
->cf_node_list
, &load
->instr
);
312 src
= nir_src_for_ssa(&load
->dest
.ssa
);
316 src
.reg
.reg
= c
->temp_regs
[index
].reg
;
320 case TGSI_FILE_ADDRESS
:
321 src
.reg
.reg
= c
->addr_reg
;
324 case TGSI_FILE_IMMEDIATE
:
325 src
= nir_src_for_ssa(c
->imm_defs
[index
]);
329 case TGSI_FILE_SYSTEM_VALUE
: {
330 nir_intrinsic_instr
*load
;
334 switch (c
->scan
->system_value_semantic_name
[index
]) {
335 case TGSI_SEMANTIC_VERTEXID_NOBASE
:
336 op
= nir_intrinsic_load_vertex_id_zero_base
;
338 case TGSI_SEMANTIC_VERTEXID
:
339 op
= nir_intrinsic_load_vertex_id
;
341 case TGSI_SEMANTIC_BASEVERTEX
:
342 op
= nir_intrinsic_load_base_vertex
;
344 case TGSI_SEMANTIC_INSTANCEID
:
345 op
= nir_intrinsic_load_instance_id
;
348 unreachable("bad system value");
351 load
= nir_intrinsic_instr_create(b
->shader
, op
);
352 load
->num_components
= ncomp
;
354 nir_ssa_dest_init(&load
->instr
, &load
->dest
, ncomp
, NULL
);
355 nir_instr_insert_after_cf_list(b
->cf_node_list
, &load
->instr
);
357 src
= nir_src_for_ssa(&load
->dest
.ssa
);
361 case TGSI_FILE_INPUT
:
362 case TGSI_FILE_CONSTANT
: {
363 nir_intrinsic_instr
*load
;
366 case TGSI_FILE_INPUT
:
367 load
= nir_intrinsic_instr_create(b
->shader
,
369 nir_intrinsic_load_input_indirect
:
370 nir_intrinsic_load_input
);
372 case TGSI_FILE_CONSTANT
:
373 load
= nir_intrinsic_instr_create(b
->shader
,
375 nir_intrinsic_load_uniform_indirect
:
376 nir_intrinsic_load_uniform
);
379 unreachable("No other load files supported");
383 load
->num_components
= 4;
384 load
->const_index
[0] = index
;
385 load
->const_index
[1] = 1;
387 nir_alu_src indirect_address
;
388 memset(&indirect_address
, 0, sizeof(indirect_address
));
389 indirect_address
.src
= nir_src_for_reg(c
->addr_reg
);
390 for (int i
= 0; i
< 4; i
++)
391 indirect_address
.swizzle
[i
] = indirect
->Swizzle
;
392 load
->src
[0] = nir_src_for_ssa(nir_imov_alu(b
, indirect_address
, 1));
394 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 4, NULL
);
395 nir_instr_insert_after_cf_list(b
->cf_node_list
, &load
->instr
);
397 src
= nir_src_for_ssa(&load
->dest
.ssa
);
402 unreachable("bad src file");
410 ttn_src_for_indirect(struct ttn_compile
*c
, struct tgsi_ind_register
*indirect
)
412 nir_builder
*b
= &c
->build
;
414 memset(&src
, 0, sizeof(src
));
415 for (int i
= 0; i
< 4; i
++)
416 src
.swizzle
[i
] = indirect
->Swizzle
;
417 src
.src
= ttn_src_for_file_and_index(c
,
419 indirect
->Index
, NULL
);
420 nir_src
*result
= ralloc(b
->shader
, nir_src
);
421 *result
= nir_src_for_ssa(nir_imov_alu(b
, src
, 1));
426 ttn_get_dest(struct ttn_compile
*c
, struct tgsi_full_dst_register
*tgsi_fdst
)
428 struct tgsi_dst_register
*tgsi_dst
= &tgsi_fdst
->Register
;
430 unsigned index
= tgsi_dst
->Index
;
432 memset(&dest
, 0, sizeof(dest
));
434 if (tgsi_dst
->File
== TGSI_FILE_TEMPORARY
) {
435 if (c
->temp_regs
[index
].var
) {
436 nir_builder
*b
= &c
->build
;
437 nir_intrinsic_instr
*load
;
438 struct tgsi_ind_register
*indirect
=
439 tgsi_dst
->Indirect
? &tgsi_fdst
->Indirect
: NULL
;
442 /* this works, because TGSI will give us a base offset
443 * (in case of indirect index) that points back into
444 * the array. Access can be direct or indirect, we
445 * don't really care. Just create a one-shot dst reg
446 * that will get store_var'd back into the array var
447 * at the end of ttn_emit_instruction()
449 reg
= nir_local_reg_create(c
->build
.impl
);
450 reg
->num_components
= 4;
451 dest
.dest
.reg
.reg
= reg
;
452 dest
.dest
.reg
.base_offset
= 0;
454 /* since the alu op might not write to all components
455 * of the temporary, we must first do a load_var to
456 * get the previous array elements into the register.
457 * This is one area that NIR could use a bit of
458 * improvement (or opt pass to clean up the mess
459 * once things are scalarized)
462 load
= nir_intrinsic_instr_create(c
->build
.shader
,
463 nir_intrinsic_load_var
);
464 load
->num_components
= 4;
466 ttn_array_deref(c
, load
, c
->temp_regs
[index
].var
,
467 c
->temp_regs
[index
].offset
,
470 load
->dest
= nir_dest_for_reg(reg
);
472 nir_instr_insert_after_cf_list(b
->cf_node_list
, &load
->instr
);
474 assert(!tgsi_dst
->Indirect
);
475 dest
.dest
.reg
.reg
= c
->temp_regs
[index
].reg
;
476 dest
.dest
.reg
.base_offset
= c
->temp_regs
[index
].offset
;
478 } else if (tgsi_dst
->File
== TGSI_FILE_OUTPUT
) {
479 dest
.dest
.reg
.reg
= c
->output_regs
[index
].reg
;
480 dest
.dest
.reg
.base_offset
= c
->output_regs
[index
].offset
;
481 } else if (tgsi_dst
->File
== TGSI_FILE_ADDRESS
) {
483 dest
.dest
.reg
.reg
= c
->addr_reg
;
486 dest
.write_mask
= tgsi_dst
->WriteMask
;
487 dest
.saturate
= false;
489 if (tgsi_dst
->Indirect
&& (tgsi_dst
->File
!= TGSI_FILE_TEMPORARY
))
490 dest
.dest
.reg
.indirect
= ttn_src_for_indirect(c
, &tgsi_fdst
->Indirect
);
495 static nir_variable
*
496 ttn_get_var(struct ttn_compile
*c
, struct tgsi_full_dst_register
*tgsi_fdst
)
498 struct tgsi_dst_register
*tgsi_dst
= &tgsi_fdst
->Register
;
499 unsigned index
= tgsi_dst
->Index
;
501 if (tgsi_dst
->File
== TGSI_FILE_TEMPORARY
) {
502 /* we should not have an indirect when there is no var! */
503 if (!c
->temp_regs
[index
].var
)
504 assert(!tgsi_dst
->Indirect
);
505 return c
->temp_regs
[index
].var
;
512 ttn_get_src(struct ttn_compile
*c
, struct tgsi_full_src_register
*tgsi_fsrc
)
514 nir_builder
*b
= &c
->build
;
515 struct tgsi_src_register
*tgsi_src
= &tgsi_fsrc
->Register
;
516 unsigned tgsi_opcode
= c
->token
->FullInstruction
.Instruction
.Opcode
;
517 unsigned tgsi_src_type
= tgsi_opcode_infer_src_type(tgsi_opcode
);
518 bool src_is_float
= !(tgsi_src_type
== TGSI_TYPE_SIGNED
||
519 tgsi_src_type
== TGSI_TYPE_UNSIGNED
);
522 memset(&src
, 0, sizeof(src
));
524 if (tgsi_src
->File
== TGSI_FILE_NULL
) {
525 return nir_imm_float(b
, 0.0);
526 } else if (tgsi_src
->File
== TGSI_FILE_SAMPLER
) {
527 /* Only the index of the sampler gets used in texturing, and it will
528 * handle looking that up on its own instead of using the nir_alu_src.
530 assert(!tgsi_src
->Indirect
);
533 src
.src
= ttn_src_for_file_and_index(c
,
536 (tgsi_src
->Indirect
?
537 &tgsi_fsrc
->Indirect
: NULL
));
540 src
.swizzle
[0] = tgsi_src
->SwizzleX
;
541 src
.swizzle
[1] = tgsi_src
->SwizzleY
;
542 src
.swizzle
[2] = tgsi_src
->SwizzleZ
;
543 src
.swizzle
[3] = tgsi_src
->SwizzleW
;
545 nir_ssa_def
*def
= nir_fmov_alu(b
, src
, 4);
547 if (tgsi_src
->Absolute
) {
549 def
= nir_fabs(b
, def
);
551 def
= nir_iabs(b
, def
);
554 if (tgsi_src
->Negate
) {
556 def
= nir_fneg(b
, def
);
558 def
= nir_ineg(b
, def
);
565 ttn_alu(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
567 unsigned num_srcs
= nir_op_infos
[op
].num_inputs
;
568 nir_alu_instr
*instr
= nir_alu_instr_create(b
->shader
, op
);
571 for (i
= 0; i
< num_srcs
; i
++)
572 instr
->src
[i
].src
= nir_src_for_ssa(src
[i
]);
575 nir_instr_insert_after_cf_list(b
->cf_node_list
, &instr
->instr
);
579 ttn_move_dest_masked(nir_builder
*b
, nir_alu_dest dest
,
580 nir_ssa_def
*def
, unsigned write_mask
)
582 if (!(dest
.write_mask
& write_mask
))
585 nir_alu_instr
*mov
= nir_alu_instr_create(b
->shader
, nir_op_imov
);
587 mov
->dest
.write_mask
&= write_mask
;
588 mov
->src
[0].src
= nir_src_for_ssa(def
);
589 for (unsigned i
= def
->num_components
; i
< 4; i
++)
590 mov
->src
[0].swizzle
[i
] = def
->num_components
- 1;
591 nir_instr_insert_after_cf_list(b
->cf_node_list
, &mov
->instr
);
595 ttn_move_dest(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
*def
)
597 ttn_move_dest_masked(b
, dest
, def
, TGSI_WRITEMASK_XYZW
);
601 ttn_arl(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
603 ttn_move_dest(b
, dest
, nir_f2i(b
, nir_ffloor(b
, src
[0])));
606 /* EXP - Approximate Exponential Base 2
607 * dst.x = 2^{\lfloor src.x\rfloor}
608 * dst.y = src.x - \lfloor src.x\rfloor
613 ttn_exp(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
615 nir_ssa_def
*srcx
= ttn_channel(b
, src
[0], X
);
617 ttn_move_dest_masked(b
, dest
, nir_fexp2(b
, nir_ffloor(b
, srcx
)),
619 ttn_move_dest_masked(b
, dest
, nir_fsub(b
, srcx
, nir_ffloor(b
, srcx
)),
621 ttn_move_dest_masked(b
, dest
, nir_fexp2(b
, srcx
), TGSI_WRITEMASK_Z
);
622 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_W
);
625 /* LOG - Approximate Logarithm Base 2
626 * dst.x = \lfloor\log_2{|src.x|}\rfloor
627 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
628 * dst.z = \log_2{|src.x|}
632 ttn_log(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
634 nir_ssa_def
*abs_srcx
= nir_fabs(b
, ttn_channel(b
, src
[0], X
));
635 nir_ssa_def
*log2
= nir_flog2(b
, abs_srcx
);
637 ttn_move_dest_masked(b
, dest
, nir_ffloor(b
, log2
), TGSI_WRITEMASK_X
);
638 ttn_move_dest_masked(b
, dest
,
639 nir_fdiv(b
, abs_srcx
, nir_fexp2(b
, nir_ffloor(b
, log2
))),
641 ttn_move_dest_masked(b
, dest
, nir_flog2(b
, abs_srcx
), TGSI_WRITEMASK_Z
);
642 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_W
);
645 /* DST - Distance Vector
647 * dst.y = src0.y \times src1.y
652 ttn_dst(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
654 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_X
);
655 ttn_move_dest_masked(b
, dest
, nir_fmul(b
, src
[0], src
[1]), TGSI_WRITEMASK_Y
);
656 ttn_move_dest_masked(b
, dest
, nir_fmov(b
, src
[0]), TGSI_WRITEMASK_Z
);
657 ttn_move_dest_masked(b
, dest
, nir_fmov(b
, src
[1]), TGSI_WRITEMASK_W
);
660 /* LIT - Light Coefficients
662 * dst.y = max(src.x, 0.0)
663 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
667 ttn_lit(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
669 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_XW
);
671 ttn_move_dest_masked(b
, dest
, nir_fmax(b
, ttn_channel(b
, src
[0], X
),
672 nir_imm_float(b
, 0.0)), TGSI_WRITEMASK_Y
);
674 if (dest
.write_mask
& TGSI_WRITEMASK_Z
) {
675 nir_ssa_def
*src0_y
= ttn_channel(b
, src
[0], Y
);
676 nir_ssa_def
*wclamp
= nir_fmax(b
, nir_fmin(b
, ttn_channel(b
, src
[0], W
),
677 nir_imm_float(b
, 128.0)),
678 nir_imm_float(b
, -128.0));
679 nir_ssa_def
*pow
= nir_fpow(b
, nir_fmax(b
, src0_y
, nir_imm_float(b
, 0.0)),
682 ttn_move_dest_masked(b
, dest
,
685 nir_imm_float(b
, 0.0),
686 ttn_channel(b
, src
[0], X
)),
687 nir_imm_float(b
, 0.0),
694 * dst.x = \cos{src.x}
695 * dst.y = \sin{src.x}
700 ttn_scs(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
702 ttn_move_dest_masked(b
, dest
, nir_fcos(b
, ttn_channel(b
, src
[0], X
)),
704 ttn_move_dest_masked(b
, dest
, nir_fsin(b
, ttn_channel(b
, src
[0], X
)),
706 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 0.0), TGSI_WRITEMASK_Z
);
707 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_W
);
711 ttn_sle(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
713 ttn_move_dest(b
, dest
, nir_sge(b
, src
[1], src
[0]));
717 ttn_sgt(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
719 ttn_move_dest(b
, dest
, nir_slt(b
, src
[1], src
[0]));
723 ttn_clamp(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
725 ttn_move_dest(b
, dest
, nir_fmin(b
, nir_fmax(b
, src
[0], src
[1]), src
[2]));
729 ttn_xpd(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
731 ttn_move_dest_masked(b
, dest
,
734 ttn_swizzle(b
, src
[0], Y
, Z
, X
, X
),
735 ttn_swizzle(b
, src
[1], Z
, X
, Y
, X
)),
737 ttn_swizzle(b
, src
[1], Y
, Z
, X
, X
),
738 ttn_swizzle(b
, src
[0], Z
, X
, Y
, X
))),
740 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_W
);
744 ttn_dp2a(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
746 ttn_move_dest(b
, dest
,
747 ttn_channel(b
, nir_fadd(b
, nir_fdot2(b
, src
[0], src
[1]),
753 ttn_dp2(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
755 ttn_move_dest(b
, dest
, nir_fdot2(b
, src
[0], src
[1]));
759 ttn_dp3(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
761 ttn_move_dest(b
, dest
, nir_fdot3(b
, src
[0], src
[1]));
765 ttn_dp4(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
767 ttn_move_dest(b
, dest
, nir_fdot4(b
, src
[0], src
[1]));
771 ttn_dph(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
773 ttn_move_dest(b
, dest
, nir_fadd(b
, nir_fdot3(b
, src
[0], src
[1]),
774 ttn_channel(b
, src
[1], W
)));
778 ttn_umad(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
780 ttn_move_dest(b
, dest
, nir_iadd(b
, nir_imul(b
, src
[0], src
[1]), src
[2]));
784 ttn_arr(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
786 ttn_move_dest(b
, dest
, nir_ffloor(b
, nir_fadd(b
, src
[0], nir_imm_float(b
, 0.5))));
790 ttn_cmp(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
792 ttn_move_dest(b
, dest
, nir_bcsel(b
,
793 nir_flt(b
, src
[0], nir_imm_float(b
, 0.0)),
798 ttn_ucmp(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
800 ttn_move_dest(b
, dest
, nir_bcsel(b
,
801 nir_ine(b
, src
[0], nir_imm_int(b
, 0)),
806 ttn_kill(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
808 nir_intrinsic_instr
*discard
=
809 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_discard
);
810 nir_instr_insert_after_cf_list(b
->cf_node_list
, &discard
->instr
);
814 ttn_kill_if(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
816 nir_ssa_def
*cmp
= nir_bany4(b
, nir_flt(b
, src
[0], nir_imm_float(b
, 0.0)));
817 nir_intrinsic_instr
*discard
=
818 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_discard_if
);
819 discard
->src
[0] = nir_src_for_ssa(cmp
);
820 nir_instr_insert_after_cf_list(b
->cf_node_list
, &discard
->instr
);
824 ttn_if(struct ttn_compile
*c
, nir_ssa_def
*src
, bool is_uint
)
826 nir_builder
*b
= &c
->build
;
828 /* Save the outside-of-the-if-statement node list. */
829 c
->if_stack
[c
->if_stack_pos
] = b
->cf_node_list
;
832 src
= ttn_channel(b
, src
, X
);
834 nir_if
*if_stmt
= nir_if_create(b
->shader
);
836 if_stmt
->condition
= nir_src_for_ssa(nir_ine(b
, src
, nir_imm_int(b
, 0)));
838 if_stmt
->condition
= nir_src_for_ssa(nir_fne(b
, src
, nir_imm_int(b
, 0)));
840 nir_cf_node_insert_end(b
->cf_node_list
, &if_stmt
->cf_node
);
842 nir_builder_insert_after_cf_list(b
, &if_stmt
->then_list
);
844 c
->if_stack
[c
->if_stack_pos
] = &if_stmt
->else_list
;
849 ttn_else(struct ttn_compile
*c
)
851 nir_builder
*b
= &c
->build
;
853 nir_builder_insert_after_cf_list(b
, c
->if_stack
[c
->if_stack_pos
- 1]);
857 ttn_endif(struct ttn_compile
*c
)
859 nir_builder
*b
= &c
->build
;
861 c
->if_stack_pos
-= 2;
862 nir_builder_insert_after_cf_list(b
, c
->if_stack
[c
->if_stack_pos
]);
866 ttn_bgnloop(struct ttn_compile
*c
)
868 nir_builder
*b
= &c
->build
;
870 /* Save the outside-of-the-loop node list. */
871 c
->loop_stack
[c
->loop_stack_pos
] = b
->cf_node_list
;
874 nir_loop
*loop
= nir_loop_create(b
->shader
);
875 nir_cf_node_insert_end(b
->cf_node_list
, &loop
->cf_node
);
877 nir_builder_insert_after_cf_list(b
, &loop
->body
);
881 ttn_cont(nir_builder
*b
)
883 nir_jump_instr
*instr
= nir_jump_instr_create(b
->shader
, nir_jump_continue
);
884 nir_instr_insert_after_cf_list(b
->cf_node_list
, &instr
->instr
);
888 ttn_brk(nir_builder
*b
)
890 nir_jump_instr
*instr
= nir_jump_instr_create(b
->shader
, nir_jump_break
);
891 nir_instr_insert_after_cf_list(b
->cf_node_list
, &instr
->instr
);
895 ttn_endloop(struct ttn_compile
*c
)
897 nir_builder
*b
= &c
->build
;
900 nir_builder_insert_after_cf_list(b
, c
->loop_stack
[c
->loop_stack_pos
]);
904 setup_texture_info(nir_tex_instr
*instr
, unsigned texture
)
907 case TGSI_TEXTURE_1D
:
908 instr
->sampler_dim
= GLSL_SAMPLER_DIM_1D
;
910 case TGSI_TEXTURE_1D_ARRAY
:
911 instr
->sampler_dim
= GLSL_SAMPLER_DIM_1D
;
912 instr
->is_array
= true;
914 case TGSI_TEXTURE_SHADOW1D
:
915 instr
->sampler_dim
= GLSL_SAMPLER_DIM_1D
;
916 instr
->is_shadow
= true;
918 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
919 instr
->sampler_dim
= GLSL_SAMPLER_DIM_1D
;
920 instr
->is_shadow
= true;
921 instr
->is_array
= true;
923 case TGSI_TEXTURE_2D
:
924 instr
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
926 case TGSI_TEXTURE_2D_ARRAY
:
927 instr
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
928 instr
->is_array
= true;
930 case TGSI_TEXTURE_2D_MSAA
:
931 instr
->sampler_dim
= GLSL_SAMPLER_DIM_MS
;
933 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
934 instr
->sampler_dim
= GLSL_SAMPLER_DIM_MS
;
935 instr
->is_array
= true;
937 case TGSI_TEXTURE_SHADOW2D
:
938 instr
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
939 instr
->is_shadow
= true;
941 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
942 instr
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
943 instr
->is_shadow
= true;
944 instr
->is_array
= true;
946 case TGSI_TEXTURE_3D
:
947 instr
->sampler_dim
= GLSL_SAMPLER_DIM_3D
;
949 case TGSI_TEXTURE_CUBE
:
950 instr
->sampler_dim
= GLSL_SAMPLER_DIM_CUBE
;
952 case TGSI_TEXTURE_CUBE_ARRAY
:
953 instr
->sampler_dim
= GLSL_SAMPLER_DIM_CUBE
;
954 instr
->is_array
= true;
956 case TGSI_TEXTURE_SHADOWCUBE
:
957 instr
->sampler_dim
= GLSL_SAMPLER_DIM_CUBE
;
958 instr
->is_shadow
= true;
960 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
961 instr
->sampler_dim
= GLSL_SAMPLER_DIM_CUBE
;
962 instr
->is_shadow
= true;
963 instr
->is_array
= true;
965 case TGSI_TEXTURE_RECT
:
966 instr
->sampler_dim
= GLSL_SAMPLER_DIM_RECT
;
968 case TGSI_TEXTURE_SHADOWRECT
:
969 instr
->sampler_dim
= GLSL_SAMPLER_DIM_RECT
;
970 instr
->is_shadow
= true;
973 fprintf(stderr
, "Unknown TGSI texture target %d\n", texture
);
979 ttn_tex(struct ttn_compile
*c
, nir_alu_dest dest
, nir_ssa_def
**src
)
981 nir_builder
*b
= &c
->build
;
982 struct tgsi_full_instruction
*tgsi_inst
= &c
->token
->FullInstruction
;
983 nir_tex_instr
*instr
;
985 unsigned num_srcs
, samp
= 1;
987 switch (tgsi_inst
->Instruction
.Opcode
) {
988 case TGSI_OPCODE_TEX
:
992 case TGSI_OPCODE_TXP
:
996 case TGSI_OPCODE_TXB
:
1000 case TGSI_OPCODE_TXL
:
1004 case TGSI_OPCODE_TXF
:
1008 case TGSI_OPCODE_TXD
:
1015 fprintf(stderr
, "unknown TGSI tex op %d\n", tgsi_inst
->Instruction
.Opcode
);
1019 if (tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D
||
1020 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D_ARRAY
||
1021 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D
||
1022 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D_ARRAY
||
1023 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWRECT
||
1024 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE
||
1025 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
) {
1029 instr
= nir_tex_instr_create(b
->shader
, num_srcs
);
1032 setup_texture_info(instr
, tgsi_inst
->Texture
.Texture
);
1034 switch (instr
->sampler_dim
) {
1035 case GLSL_SAMPLER_DIM_1D
:
1036 case GLSL_SAMPLER_DIM_BUF
:
1037 instr
->coord_components
= 1;
1039 case GLSL_SAMPLER_DIM_2D
:
1040 case GLSL_SAMPLER_DIM_RECT
:
1041 case GLSL_SAMPLER_DIM_EXTERNAL
:
1042 case GLSL_SAMPLER_DIM_MS
:
1043 instr
->coord_components
= 2;
1045 case GLSL_SAMPLER_DIM_3D
:
1046 case GLSL_SAMPLER_DIM_CUBE
:
1047 instr
->coord_components
= 3;
1051 if (instr
->is_array
)
1052 instr
->coord_components
++;
1054 assert(tgsi_inst
->Src
[samp
].Register
.File
== TGSI_FILE_SAMPLER
);
1055 instr
->sampler_index
= tgsi_inst
->Src
[samp
].Register
.Index
;
1057 unsigned src_number
= 0;
1059 instr
->src
[src_number
].src
=
1060 nir_src_for_ssa(nir_swizzle(b
, src
[0], SWIZ(X
, Y
, Z
, W
),
1061 instr
->coord_components
, false));
1062 instr
->src
[src_number
].src_type
= nir_tex_src_coord
;
1065 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXP
) {
1066 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], W
));
1067 instr
->src
[src_number
].src_type
= nir_tex_src_projector
;
1071 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB
) {
1072 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], W
));
1073 instr
->src
[src_number
].src_type
= nir_tex_src_bias
;
1077 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL
) {
1078 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], W
));
1079 instr
->src
[src_number
].src_type
= nir_tex_src_lod
;
1083 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXD
) {
1084 instr
->src
[src_number
].src
=
1085 nir_src_for_ssa(nir_swizzle(b
, src
[1], SWIZ(X
, Y
, Z
, W
),
1086 instr
->coord_components
, false));
1087 instr
->src
[src_number
].src_type
= nir_tex_src_ddx
;
1089 instr
->src
[src_number
].src
=
1090 nir_src_for_ssa(nir_swizzle(b
, src
[2], SWIZ(X
, Y
, Z
, W
),
1091 instr
->coord_components
, false));
1092 instr
->src
[src_number
].src_type
= nir_tex_src_ddy
;
1096 if (instr
->is_shadow
) {
1097 if (instr
->coord_components
< 3)
1098 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], Z
));
1100 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], W
));
1102 instr
->src
[src_number
].src_type
= nir_tex_src_comparitor
;
1106 assert(src_number
== num_srcs
);
1108 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
, 4, NULL
);
1109 nir_instr_insert_after_cf_list(b
->cf_node_list
, &instr
->instr
);
1111 /* Resolve the writemask on the texture op. */
1112 ttn_move_dest(b
, dest
, &instr
->dest
.ssa
);
1115 /* TGSI_OPCODE_TXQ is actually two distinct operations:
1117 * dst.x = texture\_width(unit, lod)
1118 * dst.y = texture\_height(unit, lod)
1119 * dst.z = texture\_depth(unit, lod)
1120 * dst.w = texture\_levels(unit)
1122 * dst.xyz map to NIR txs opcode, and dst.w maps to query_levels
1125 ttn_txq(struct ttn_compile
*c
, nir_alu_dest dest
, nir_ssa_def
**src
)
1127 nir_builder
*b
= &c
->build
;
1128 struct tgsi_full_instruction
*tgsi_inst
= &c
->token
->FullInstruction
;
1129 nir_tex_instr
*txs
, *qlv
;
1131 txs
= nir_tex_instr_create(b
->shader
, 1);
1132 txs
->op
= nir_texop_txs
;
1133 setup_texture_info(txs
, tgsi_inst
->Texture
.Texture
);
1135 qlv
= nir_tex_instr_create(b
->shader
, 0);
1136 qlv
->op
= nir_texop_query_levels
;
1137 setup_texture_info(qlv
, tgsi_inst
->Texture
.Texture
);
1139 assert(tgsi_inst
->Src
[1].Register
.File
== TGSI_FILE_SAMPLER
);
1140 txs
->sampler_index
= tgsi_inst
->Src
[1].Register
.Index
;
1141 qlv
->sampler_index
= tgsi_inst
->Src
[1].Register
.Index
;
1143 /* only single src, the lod: */
1144 txs
->src
[0].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], X
));
1145 txs
->src
[0].src_type
= nir_tex_src_lod
;
1147 nir_ssa_dest_init(&txs
->instr
, &txs
->dest
, 3, NULL
);
1148 nir_instr_insert_after_cf_list(b
->cf_node_list
, &txs
->instr
);
1150 nir_ssa_dest_init(&qlv
->instr
, &qlv
->dest
, 1, NULL
);
1151 nir_instr_insert_after_cf_list(b
->cf_node_list
, &qlv
->instr
);
1153 ttn_move_dest_masked(b
, dest
, &txs
->dest
.ssa
, TGSI_WRITEMASK_XYZ
);
1154 ttn_move_dest_masked(b
, dest
, &qlv
->dest
.ssa
, TGSI_WRITEMASK_W
);
1157 static const nir_op op_trans
[TGSI_OPCODE_LAST
] = {
1158 [TGSI_OPCODE_ARL
] = 0,
1159 [TGSI_OPCODE_MOV
] = nir_op_fmov
,
1160 [TGSI_OPCODE_LIT
] = 0,
1161 [TGSI_OPCODE_RCP
] = nir_op_frcp
,
1162 [TGSI_OPCODE_RSQ
] = nir_op_frsq
,
1163 [TGSI_OPCODE_EXP
] = 0,
1164 [TGSI_OPCODE_LOG
] = 0,
1165 [TGSI_OPCODE_MUL
] = nir_op_fmul
,
1166 [TGSI_OPCODE_ADD
] = nir_op_fadd
,
1167 [TGSI_OPCODE_DP3
] = 0,
1168 [TGSI_OPCODE_DP4
] = 0,
1169 [TGSI_OPCODE_DST
] = 0,
1170 [TGSI_OPCODE_MIN
] = nir_op_fmin
,
1171 [TGSI_OPCODE_MAX
] = nir_op_fmax
,
1172 [TGSI_OPCODE_SLT
] = nir_op_slt
,
1173 [TGSI_OPCODE_SGE
] = nir_op_sge
,
1174 [TGSI_OPCODE_MAD
] = nir_op_ffma
,
1175 [TGSI_OPCODE_SUB
] = nir_op_fsub
,
1176 [TGSI_OPCODE_LRP
] = 0,
1177 [TGSI_OPCODE_SQRT
] = nir_op_fsqrt
,
1178 [TGSI_OPCODE_DP2A
] = 0,
1179 [TGSI_OPCODE_FRC
] = nir_op_ffract
,
1180 [TGSI_OPCODE_CLAMP
] = 0,
1181 [TGSI_OPCODE_FLR
] = nir_op_ffloor
,
1182 [TGSI_OPCODE_ROUND
] = nir_op_fround_even
,
1183 [TGSI_OPCODE_EX2
] = nir_op_fexp2
,
1184 [TGSI_OPCODE_LG2
] = nir_op_flog2
,
1185 [TGSI_OPCODE_POW
] = nir_op_fpow
,
1186 [TGSI_OPCODE_XPD
] = 0,
1187 [TGSI_OPCODE_ABS
] = nir_op_fabs
,
1188 [TGSI_OPCODE_DPH
] = 0,
1189 [TGSI_OPCODE_COS
] = nir_op_fcos
,
1190 [TGSI_OPCODE_DDX
] = nir_op_fddx
,
1191 [TGSI_OPCODE_DDY
] = nir_op_fddy
,
1192 [TGSI_OPCODE_KILL
] = 0,
1193 [TGSI_OPCODE_PK2H
] = 0, /* XXX */
1194 [TGSI_OPCODE_PK2US
] = 0, /* XXX */
1195 [TGSI_OPCODE_PK4B
] = 0, /* XXX */
1196 [TGSI_OPCODE_PK4UB
] = 0, /* XXX */
1197 [TGSI_OPCODE_SEQ
] = nir_op_seq
,
1198 [TGSI_OPCODE_SGT
] = 0,
1199 [TGSI_OPCODE_SIN
] = nir_op_fsin
,
1200 [TGSI_OPCODE_SLE
] = 0,
1201 [TGSI_OPCODE_TEX
] = 0,
1202 [TGSI_OPCODE_TXD
] = 0,
1203 [TGSI_OPCODE_TXP
] = 0,
1204 [TGSI_OPCODE_UP2H
] = 0, /* XXX */
1205 [TGSI_OPCODE_UP2US
] = 0, /* XXX */
1206 [TGSI_OPCODE_UP4B
] = 0, /* XXX */
1207 [TGSI_OPCODE_UP4UB
] = 0, /* XXX */
1208 [TGSI_OPCODE_ARR
] = 0,
1210 /* No function calls, yet. */
1211 [TGSI_OPCODE_CAL
] = 0, /* XXX */
1212 [TGSI_OPCODE_RET
] = 0, /* XXX */
1214 [TGSI_OPCODE_SSG
] = nir_op_fsign
,
1215 [TGSI_OPCODE_CMP
] = 0,
1216 [TGSI_OPCODE_SCS
] = 0,
1217 [TGSI_OPCODE_TXB
] = 0,
1218 [TGSI_OPCODE_DIV
] = nir_op_fdiv
,
1219 [TGSI_OPCODE_DP2
] = 0,
1220 [TGSI_OPCODE_DP2A
] = 0,
1221 [TGSI_OPCODE_TXL
] = 0,
1223 [TGSI_OPCODE_BRK
] = 0,
1224 [TGSI_OPCODE_IF
] = 0,
1225 [TGSI_OPCODE_UIF
] = 0,
1226 [TGSI_OPCODE_ELSE
] = 0,
1227 [TGSI_OPCODE_ENDIF
] = 0,
1229 [TGSI_OPCODE_DDX_FINE
] = nir_op_fddx_fine
,
1230 [TGSI_OPCODE_DDY_FINE
] = nir_op_fddy_fine
,
1232 [TGSI_OPCODE_PUSHA
] = 0, /* XXX */
1233 [TGSI_OPCODE_POPA
] = 0, /* XXX */
1235 [TGSI_OPCODE_CEIL
] = nir_op_fceil
,
1236 [TGSI_OPCODE_I2F
] = nir_op_i2f
,
1237 [TGSI_OPCODE_NOT
] = nir_op_inot
,
1238 [TGSI_OPCODE_TRUNC
] = nir_op_ftrunc
,
1239 [TGSI_OPCODE_SHL
] = nir_op_ishl
,
1240 [TGSI_OPCODE_AND
] = nir_op_iand
,
1241 [TGSI_OPCODE_OR
] = nir_op_ior
,
1242 [TGSI_OPCODE_MOD
] = nir_op_umod
,
1243 [TGSI_OPCODE_XOR
] = nir_op_ixor
,
1244 [TGSI_OPCODE_SAD
] = 0, /* XXX */
1245 [TGSI_OPCODE_TXF
] = 0,
1246 [TGSI_OPCODE_TXQ
] = 0,
1248 [TGSI_OPCODE_CONT
] = 0,
1250 [TGSI_OPCODE_EMIT
] = 0, /* XXX */
1251 [TGSI_OPCODE_ENDPRIM
] = 0, /* XXX */
1253 [TGSI_OPCODE_BGNLOOP
] = 0,
1254 [TGSI_OPCODE_BGNSUB
] = 0, /* XXX: no function calls */
1255 [TGSI_OPCODE_ENDLOOP
] = 0,
1256 [TGSI_OPCODE_ENDSUB
] = 0, /* XXX: no function calls */
1258 [TGSI_OPCODE_TXQ_LZ
] = 0,
1259 [TGSI_OPCODE_NOP
] = 0,
1260 [TGSI_OPCODE_FSEQ
] = nir_op_feq
,
1261 [TGSI_OPCODE_FSGE
] = nir_op_fge
,
1262 [TGSI_OPCODE_FSLT
] = nir_op_flt
,
1263 [TGSI_OPCODE_FSNE
] = nir_op_fne
,
1265 /* No control flow yet */
1266 [TGSI_OPCODE_CALLNZ
] = 0, /* XXX */
1267 [TGSI_OPCODE_BREAKC
] = 0, /* not emitted by glsl_to_tgsi.cpp */
1269 [TGSI_OPCODE_KILL_IF
] = 0,
1271 [TGSI_OPCODE_END
] = 0,
1273 [TGSI_OPCODE_F2I
] = nir_op_f2i
,
1274 [TGSI_OPCODE_IDIV
] = nir_op_idiv
,
1275 [TGSI_OPCODE_IMAX
] = nir_op_imax
,
1276 [TGSI_OPCODE_IMIN
] = nir_op_imin
,
1277 [TGSI_OPCODE_INEG
] = nir_op_ineg
,
1278 [TGSI_OPCODE_ISGE
] = nir_op_ige
,
1279 [TGSI_OPCODE_ISHR
] = nir_op_ishr
,
1280 [TGSI_OPCODE_ISLT
] = nir_op_ilt
,
1281 [TGSI_OPCODE_F2U
] = nir_op_f2u
,
1282 [TGSI_OPCODE_U2F
] = nir_op_u2f
,
1283 [TGSI_OPCODE_UADD
] = nir_op_iadd
,
1284 [TGSI_OPCODE_UDIV
] = nir_op_udiv
,
1285 [TGSI_OPCODE_UMAD
] = 0,
1286 [TGSI_OPCODE_UMAX
] = nir_op_umax
,
1287 [TGSI_OPCODE_UMIN
] = nir_op_umin
,
1288 [TGSI_OPCODE_UMOD
] = nir_op_umod
,
1289 [TGSI_OPCODE_UMUL
] = nir_op_imul
,
1290 [TGSI_OPCODE_USEQ
] = nir_op_ieq
,
1291 [TGSI_OPCODE_USGE
] = nir_op_uge
,
1292 [TGSI_OPCODE_USHR
] = nir_op_ushr
,
1293 [TGSI_OPCODE_USLT
] = nir_op_ult
,
1294 [TGSI_OPCODE_USNE
] = nir_op_ine
,
1296 [TGSI_OPCODE_SWITCH
] = 0, /* not emitted by glsl_to_tgsi.cpp */
1297 [TGSI_OPCODE_CASE
] = 0, /* not emitted by glsl_to_tgsi.cpp */
1298 [TGSI_OPCODE_DEFAULT
] = 0, /* not emitted by glsl_to_tgsi.cpp */
1299 [TGSI_OPCODE_ENDSWITCH
] = 0, /* not emitted by glsl_to_tgsi.cpp */
1301 /* XXX: SAMPLE opcodes */
1303 [TGSI_OPCODE_UARL
] = nir_op_imov
,
1304 [TGSI_OPCODE_UCMP
] = 0,
1305 [TGSI_OPCODE_IABS
] = nir_op_iabs
,
1306 [TGSI_OPCODE_ISSG
] = nir_op_isign
,
1310 [TGSI_OPCODE_TEX2
] = 0,
1311 [TGSI_OPCODE_TXB2
] = 0,
1312 [TGSI_OPCODE_TXL2
] = 0,
1314 [TGSI_OPCODE_IMUL_HI
] = nir_op_imul_high
,
1315 [TGSI_OPCODE_UMUL_HI
] = nir_op_umul_high
,
1317 [TGSI_OPCODE_TG4
] = 0,
1318 [TGSI_OPCODE_LODQ
] = 0, /* XXX */
1320 [TGSI_OPCODE_IBFE
] = nir_op_ibitfield_extract
,
1321 [TGSI_OPCODE_UBFE
] = nir_op_ubitfield_extract
,
1322 [TGSI_OPCODE_BFI
] = nir_op_bitfield_insert
,
1323 [TGSI_OPCODE_BREV
] = nir_op_bitfield_reverse
,
1324 [TGSI_OPCODE_POPC
] = nir_op_bit_count
,
1325 [TGSI_OPCODE_LSB
] = nir_op_find_lsb
,
1326 [TGSI_OPCODE_IMSB
] = nir_op_ifind_msb
,
1327 [TGSI_OPCODE_UMSB
] = nir_op_ifind_msb
, /* XXX: signed vs unsigned */
1329 [TGSI_OPCODE_INTERP_CENTROID
] = 0, /* XXX */
1330 [TGSI_OPCODE_INTERP_SAMPLE
] = 0, /* XXX */
1331 [TGSI_OPCODE_INTERP_OFFSET
] = 0, /* XXX */
1335 ttn_emit_instruction(struct ttn_compile
*c
)
1337 nir_builder
*b
= &c
->build
;
1338 struct tgsi_full_instruction
*tgsi_inst
= &c
->token
->FullInstruction
;
1340 unsigned tgsi_op
= tgsi_inst
->Instruction
.Opcode
;
1341 struct tgsi_full_dst_register
*tgsi_dst
= &tgsi_inst
->Dst
[0];
1343 if (tgsi_op
== TGSI_OPCODE_END
)
1346 nir_ssa_def
*src
[TGSI_FULL_MAX_SRC_REGISTERS
];
1347 for (i
= 0; i
< TGSI_FULL_MAX_SRC_REGISTERS
; i
++) {
1348 src
[i
] = ttn_get_src(c
, &tgsi_inst
->Src
[i
]);
1350 nir_alu_dest dest
= ttn_get_dest(c
, tgsi_dst
);
1353 case TGSI_OPCODE_RSQ
:
1354 ttn_move_dest(b
, dest
, nir_frsq(b
, ttn_channel(b
, src
[0], X
)));
1357 case TGSI_OPCODE_SQRT
:
1358 ttn_move_dest(b
, dest
, nir_fsqrt(b
, ttn_channel(b
, src
[0], X
)));
1361 case TGSI_OPCODE_RCP
:
1362 ttn_move_dest(b
, dest
, nir_frcp(b
, ttn_channel(b
, src
[0], X
)));
1365 case TGSI_OPCODE_EX2
:
1366 ttn_move_dest(b
, dest
, nir_fexp2(b
, ttn_channel(b
, src
[0], X
)));
1369 case TGSI_OPCODE_LG2
:
1370 ttn_move_dest(b
, dest
, nir_flog2(b
, ttn_channel(b
, src
[0], X
)));
1373 case TGSI_OPCODE_POW
:
1374 ttn_move_dest(b
, dest
, nir_fpow(b
,
1375 ttn_channel(b
, src
[0], X
),
1376 ttn_channel(b
, src
[1], X
)));
1379 case TGSI_OPCODE_COS
:
1380 ttn_move_dest(b
, dest
, nir_fcos(b
, ttn_channel(b
, src
[0], X
)));
1383 case TGSI_OPCODE_SIN
:
1384 ttn_move_dest(b
, dest
, nir_fsin(b
, ttn_channel(b
, src
[0], X
)));
1387 case TGSI_OPCODE_ARL
:
1388 ttn_arl(b
, op_trans
[tgsi_op
], dest
, src
);
1391 case TGSI_OPCODE_EXP
:
1392 ttn_exp(b
, op_trans
[tgsi_op
], dest
, src
);
1395 case TGSI_OPCODE_LOG
:
1396 ttn_log(b
, op_trans
[tgsi_op
], dest
, src
);
1399 case TGSI_OPCODE_DST
:
1400 ttn_dst(b
, op_trans
[tgsi_op
], dest
, src
);
1403 case TGSI_OPCODE_LIT
:
1404 ttn_lit(b
, op_trans
[tgsi_op
], dest
, src
);
1407 case TGSI_OPCODE_CLAMP
:
1408 ttn_clamp(b
, op_trans
[tgsi_op
], dest
, src
);
1411 case TGSI_OPCODE_XPD
:
1412 ttn_xpd(b
, op_trans
[tgsi_op
], dest
, src
);
1415 case TGSI_OPCODE_DP2
:
1416 ttn_dp2(b
, op_trans
[tgsi_op
], dest
, src
);
1419 case TGSI_OPCODE_DP3
:
1420 ttn_dp3(b
, op_trans
[tgsi_op
], dest
, src
);
1423 case TGSI_OPCODE_DP4
:
1424 ttn_dp4(b
, op_trans
[tgsi_op
], dest
, src
);
1427 case TGSI_OPCODE_DP2A
:
1428 ttn_dp2a(b
, op_trans
[tgsi_op
], dest
, src
);
1431 case TGSI_OPCODE_DPH
:
1432 ttn_dph(b
, op_trans
[tgsi_op
], dest
, src
);
1435 case TGSI_OPCODE_UMAD
:
1436 ttn_umad(b
, op_trans
[tgsi_op
], dest
, src
);
1439 case TGSI_OPCODE_LRP
:
1440 ttn_move_dest(b
, dest
, nir_flrp(b
, src
[2], src
[1], src
[0]));
1443 case TGSI_OPCODE_KILL
:
1444 ttn_kill(b
, op_trans
[tgsi_op
], dest
, src
);
1447 case TGSI_OPCODE_ARR
:
1448 ttn_arr(b
, op_trans
[tgsi_op
], dest
, src
);
1451 case TGSI_OPCODE_CMP
:
1452 ttn_cmp(b
, op_trans
[tgsi_op
], dest
, src
);
1455 case TGSI_OPCODE_UCMP
:
1456 ttn_ucmp(b
, op_trans
[tgsi_op
], dest
, src
);
1459 case TGSI_OPCODE_SCS
:
1460 ttn_scs(b
, op_trans
[tgsi_op
], dest
, src
);
1463 case TGSI_OPCODE_SGT
:
1464 ttn_sgt(b
, op_trans
[tgsi_op
], dest
, src
);
1467 case TGSI_OPCODE_SLE
:
1468 ttn_sle(b
, op_trans
[tgsi_op
], dest
, src
);
1471 case TGSI_OPCODE_KILL_IF
:
1472 ttn_kill_if(b
, op_trans
[tgsi_op
], dest
, src
);
1475 case TGSI_OPCODE_TEX
:
1476 case TGSI_OPCODE_TXP
:
1477 case TGSI_OPCODE_TXL
:
1478 case TGSI_OPCODE_TXB
:
1479 case TGSI_OPCODE_TXD
:
1480 case TGSI_OPCODE_TXL2
:
1481 case TGSI_OPCODE_TXB2
:
1482 case TGSI_OPCODE_TXQ_LZ
:
1483 case TGSI_OPCODE_TXF
:
1484 case TGSI_OPCODE_TG4
:
1485 ttn_tex(c
, dest
, src
);
1488 case TGSI_OPCODE_TXQ
:
1489 ttn_txq(c
, dest
, src
);
1492 case TGSI_OPCODE_NOP
:
1495 case TGSI_OPCODE_IF
:
1496 ttn_if(c
, src
[0], false);
1499 case TGSI_OPCODE_UIF
:
1500 ttn_if(c
, src
[0], true);
1503 case TGSI_OPCODE_ELSE
:
1507 case TGSI_OPCODE_ENDIF
:
1511 case TGSI_OPCODE_BGNLOOP
:
1515 case TGSI_OPCODE_BRK
:
1519 case TGSI_OPCODE_CONT
:
1523 case TGSI_OPCODE_ENDLOOP
:
1528 if (op_trans
[tgsi_op
] != 0 || tgsi_op
== TGSI_OPCODE_MOV
) {
1529 ttn_alu(b
, op_trans
[tgsi_op
], dest
, src
);
1531 fprintf(stderr
, "unknown TGSI opcode: %s\n",
1532 tgsi_get_opcode_name(tgsi_op
));
1538 if (tgsi_inst
->Instruction
.Saturate
) {
1539 assert(tgsi_inst
->Instruction
.Saturate
== TGSI_SAT_ZERO_ONE
);
1540 assert(!dest
.dest
.is_ssa
);
1541 ttn_move_dest(b
, dest
, nir_fsat(b
, ttn_src_for_dest(b
, &dest
)));
1544 /* if the dst has a matching var, append store_global to move
1545 * output from reg to var
1547 nir_variable
*var
= ttn_get_var(c
, tgsi_dst
);
1549 unsigned index
= tgsi_dst
->Register
.Index
;
1550 unsigned offset
= c
->temp_regs
[index
].offset
;
1551 nir_intrinsic_instr
*store
=
1552 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_var
);
1553 struct tgsi_ind_register
*indirect
= tgsi_dst
->Register
.Indirect
?
1554 &tgsi_dst
->Indirect
: NULL
;
1556 store
->num_components
= 4;
1557 store
->variables
[0] = ttn_array_deref(c
, store
, var
, offset
, indirect
);
1558 store
->src
[0] = nir_src_for_reg(dest
.dest
.reg
.reg
);
1560 nir_instr_insert_after_cf_list(b
->cf_node_list
, &store
->instr
);
1565 * Puts a NIR intrinsic to store of each TGSI_FILE_OUTPUT value to the output
1566 * variables at the end of the shader.
1568 * We don't generate these incrementally as the TGSI_FILE_OUTPUT values are
1569 * written, because there's no output load intrinsic, which means we couldn't
1570 * handle writemasks.
1573 ttn_add_output_stores(struct ttn_compile
*c
)
1575 nir_builder
*b
= &c
->build
;
1577 foreach_list_typed(nir_variable
, var
, node
, &b
->shader
->outputs
) {
1578 unsigned array_len
= MAX2(glsl_get_length(var
->type
), 1);
1581 for (i
= 0; i
< array_len
; i
++) {
1582 nir_intrinsic_instr
*store
=
1583 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_output
);
1584 store
->num_components
= 4;
1585 store
->const_index
[0] = var
->data
.driver_location
+ i
;
1586 store
->const_index
[1] = 1;
1587 store
->src
[0].reg
.reg
= c
->output_regs
[var
->data
.driver_location
].reg
;
1588 nir_instr_insert_after_cf_list(b
->cf_node_list
, &store
->instr
);
1594 tgsi_to_nir(const void *tgsi_tokens
,
1595 const nir_shader_compiler_options
*options
)
1597 struct tgsi_parse_context parser
;
1598 struct tgsi_shader_info scan
;
1599 struct ttn_compile
*c
;
1600 struct nir_shader
*s
;
1603 c
= rzalloc(NULL
, struct ttn_compile
);
1604 s
= nir_shader_create(NULL
, options
);
1606 nir_function
*func
= nir_function_create(s
, "main");
1607 nir_function_overload
*overload
= nir_function_overload_create(func
);
1608 nir_function_impl
*impl
= nir_function_impl_create(overload
);
1610 nir_builder_init(&c
->build
, impl
);
1611 nir_builder_insert_after_cf_list(&c
->build
, &impl
->body
);
1613 tgsi_scan_shader(tgsi_tokens
, &scan
);
1616 s
->num_inputs
= scan
.file_max
[TGSI_FILE_INPUT
] + 1;
1617 s
->num_uniforms
= scan
.file_max
[TGSI_FILE_CONSTANT
] + 1;
1618 s
->num_outputs
= scan
.file_max
[TGSI_FILE_OUTPUT
] + 1;
1620 c
->output_regs
= rzalloc_array(c
, struct ttn_reg_info
,
1621 scan
.file_max
[TGSI_FILE_OUTPUT
] + 1);
1622 c
->temp_regs
= rzalloc_array(c
, struct ttn_reg_info
,
1623 scan
.file_max
[TGSI_FILE_TEMPORARY
] + 1);
1624 c
->imm_defs
= rzalloc_array(c
, nir_ssa_def
*,
1625 scan
.file_max
[TGSI_FILE_IMMEDIATE
] + 1);
1627 c
->if_stack
= rzalloc_array(c
, struct exec_list
*,
1628 (scan
.opcode_count
[TGSI_OPCODE_IF
] +
1629 scan
.opcode_count
[TGSI_OPCODE_UIF
]) * 2);
1630 c
->loop_stack
= rzalloc_array(c
, struct exec_list
*,
1631 scan
.opcode_count
[TGSI_OPCODE_BGNLOOP
]);
1633 ret
= tgsi_parse_init(&parser
, tgsi_tokens
);
1634 assert(ret
== TGSI_PARSE_OK
);
1636 while (!tgsi_parse_end_of_tokens(&parser
)) {
1637 tgsi_parse_token(&parser
);
1638 c
->token
= &parser
.FullToken
;
1640 switch (parser
.FullToken
.Token
.Type
) {
1641 case TGSI_TOKEN_TYPE_DECLARATION
:
1642 ttn_emit_declaration(c
);
1645 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1646 ttn_emit_instruction(c
);
1649 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1650 ttn_emit_immediate(c
);
1655 tgsi_parse_free(&parser
);
1657 ttn_add_output_stores(c
);