2 * Copyright © 2014-2015 Broadcom
3 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "util/ralloc.h"
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_control_flow.h"
28 #include "compiler/nir/nir_builder.h"
29 #include "compiler/glsl/list.h"
30 #include "compiler/shader_enums.h"
32 #include "tgsi_to_nir.h"
33 #include "tgsi/tgsi_parse.h"
34 #include "tgsi/tgsi_dump.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_scan.h"
38 #define SWIZ(X, Y, Z, W) (unsigned[4]){ \
46 /** nir register containing this TGSI index. */
49 /** Offset (in vec4s) from the start of var for this TGSI index. */
54 union tgsi_full_token
*token
;
56 struct tgsi_shader_info
*scan
;
58 struct ttn_reg_info
*output_regs
;
59 struct ttn_reg_info
*temp_regs
;
60 nir_ssa_def
**imm_defs
;
62 unsigned num_samp_types
;
63 nir_alu_type
*samp_types
;
65 nir_register
*addr_reg
;
68 * Stack of nir_cursors where instructions should be pushed as we pop
69 * back out of the control flow stack.
71 * For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else
72 * instructions should be placed, and if_stack[if_stack_pos - 1] has where
73 * the next instructions outside of the if/then/else block go.
76 unsigned if_stack_pos
;
79 * Stack of nir_cursors where instructions should be pushed as we pop
80 * back out of the control flow stack.
82 * loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside
85 nir_cursor
*loop_stack
;
86 unsigned loop_stack_pos
;
88 /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
92 #define ttn_swizzle(b, src, x, y, z, w) \
93 nir_swizzle(b, src, SWIZ(x, y, z, w), 4, false)
94 #define ttn_channel(b, src, swiz) \
95 nir_swizzle(b, src, SWIZ(swiz, swiz, swiz, swiz), 1, false)
97 static gl_varying_slot
98 tgsi_varying_semantic_to_slot(unsigned semantic
, unsigned index
)
101 case TGSI_SEMANTIC_POSITION
:
102 return VARYING_SLOT_POS
;
103 case TGSI_SEMANTIC_COLOR
:
105 return VARYING_SLOT_COL0
;
107 return VARYING_SLOT_COL1
;
108 case TGSI_SEMANTIC_BCOLOR
:
110 return VARYING_SLOT_BFC0
;
112 return VARYING_SLOT_BFC1
;
113 case TGSI_SEMANTIC_FOG
:
114 return VARYING_SLOT_FOGC
;
115 case TGSI_SEMANTIC_PSIZE
:
116 return VARYING_SLOT_PSIZ
;
117 case TGSI_SEMANTIC_GENERIC
:
118 return VARYING_SLOT_VAR0
+ index
;
119 case TGSI_SEMANTIC_FACE
:
120 return VARYING_SLOT_FACE
;
121 case TGSI_SEMANTIC_EDGEFLAG
:
122 return VARYING_SLOT_EDGE
;
123 case TGSI_SEMANTIC_PRIMID
:
124 return VARYING_SLOT_PRIMITIVE_ID
;
125 case TGSI_SEMANTIC_CLIPDIST
:
127 return VARYING_SLOT_CLIP_DIST0
;
129 return VARYING_SLOT_CLIP_DIST1
;
130 case TGSI_SEMANTIC_CLIPVERTEX
:
131 return VARYING_SLOT_CLIP_VERTEX
;
132 case TGSI_SEMANTIC_TEXCOORD
:
133 return VARYING_SLOT_TEX0
+ index
;
134 case TGSI_SEMANTIC_PCOORD
:
135 return VARYING_SLOT_PNTC
;
136 case TGSI_SEMANTIC_VIEWPORT_INDEX
:
137 return VARYING_SLOT_VIEWPORT
;
138 case TGSI_SEMANTIC_LAYER
:
139 return VARYING_SLOT_LAYER
;
141 fprintf(stderr
, "Bad TGSI semantic: %d/%d\n", semantic
, index
);
146 /* Temporary helper to remap back to TGSI style semantic name/index
147 * values, for use in drivers that haven't been converted to using
151 varying_slot_to_tgsi_semantic(gl_varying_slot slot
,
152 unsigned *semantic_name
, unsigned *semantic_index
)
154 static const unsigned map
[][2] = {
155 [VARYING_SLOT_POS
] = { TGSI_SEMANTIC_POSITION
, 0 },
156 [VARYING_SLOT_COL0
] = { TGSI_SEMANTIC_COLOR
, 0 },
157 [VARYING_SLOT_COL1
] = { TGSI_SEMANTIC_COLOR
, 1 },
158 [VARYING_SLOT_BFC0
] = { TGSI_SEMANTIC_BCOLOR
, 0 },
159 [VARYING_SLOT_BFC1
] = { TGSI_SEMANTIC_BCOLOR
, 1 },
160 [VARYING_SLOT_FOGC
] = { TGSI_SEMANTIC_FOG
, 0 },
161 [VARYING_SLOT_PSIZ
] = { TGSI_SEMANTIC_PSIZE
, 0 },
162 [VARYING_SLOT_FACE
] = { TGSI_SEMANTIC_FACE
, 0 },
163 [VARYING_SLOT_EDGE
] = { TGSI_SEMANTIC_EDGEFLAG
, 0 },
164 [VARYING_SLOT_PRIMITIVE_ID
] = { TGSI_SEMANTIC_PRIMID
, 0 },
165 [VARYING_SLOT_CLIP_DIST0
] = { TGSI_SEMANTIC_CLIPDIST
, 0 },
166 [VARYING_SLOT_CLIP_DIST1
] = { TGSI_SEMANTIC_CLIPDIST
, 1 },
167 [VARYING_SLOT_CLIP_VERTEX
] = { TGSI_SEMANTIC_CLIPVERTEX
, 0 },
168 [VARYING_SLOT_PNTC
] = { TGSI_SEMANTIC_PCOORD
, 0 },
169 [VARYING_SLOT_VIEWPORT
] = { TGSI_SEMANTIC_VIEWPORT_INDEX
, 0 },
170 [VARYING_SLOT_LAYER
] = { TGSI_SEMANTIC_LAYER
, 0 },
173 if (slot
>= VARYING_SLOT_VAR0
) {
174 *semantic_name
= TGSI_SEMANTIC_GENERIC
;
175 *semantic_index
= slot
- VARYING_SLOT_VAR0
;
179 if (slot
>= VARYING_SLOT_TEX0
&& slot
<= VARYING_SLOT_TEX7
) {
180 *semantic_name
= TGSI_SEMANTIC_TEXCOORD
;
181 *semantic_index
= slot
- VARYING_SLOT_TEX0
;
185 if (slot
>= ARRAY_SIZE(map
)) {
186 fprintf(stderr
, "Unknown varying slot %d\n", slot
);
190 *semantic_name
= map
[slot
][0];
191 *semantic_index
= map
[slot
][1];
194 /* Temporary helper to remap back to TGSI style semantic name/index
195 * values, for use in drivers that haven't been converted to using
199 frag_result_to_tgsi_semantic(gl_frag_result slot
,
200 unsigned *semantic_name
, unsigned *semantic_index
)
202 static const unsigned map
[][2] = {
203 [FRAG_RESULT_DEPTH
] = { TGSI_SEMANTIC_POSITION
, 0 },
204 [FRAG_RESULT_COLOR
] = { TGSI_SEMANTIC_COLOR
, -1 },
205 [FRAG_RESULT_DATA0
+ 0] = { TGSI_SEMANTIC_COLOR
, 0 },
206 [FRAG_RESULT_DATA0
+ 1] = { TGSI_SEMANTIC_COLOR
, 1 },
207 [FRAG_RESULT_DATA0
+ 2] = { TGSI_SEMANTIC_COLOR
, 2 },
208 [FRAG_RESULT_DATA0
+ 3] = { TGSI_SEMANTIC_COLOR
, 3 },
209 [FRAG_RESULT_DATA0
+ 4] = { TGSI_SEMANTIC_COLOR
, 4 },
210 [FRAG_RESULT_DATA0
+ 5] = { TGSI_SEMANTIC_COLOR
, 5 },
211 [FRAG_RESULT_DATA0
+ 6] = { TGSI_SEMANTIC_COLOR
, 6 },
212 [FRAG_RESULT_DATA0
+ 7] = { TGSI_SEMANTIC_COLOR
, 7 },
215 *semantic_name
= map
[slot
][0];
216 *semantic_index
= map
[slot
][1];
220 ttn_src_for_dest(nir_builder
*b
, nir_alu_dest
*dest
)
223 memset(&src
, 0, sizeof(src
));
225 if (dest
->dest
.is_ssa
)
226 src
.src
= nir_src_for_ssa(&dest
->dest
.ssa
);
228 assert(!dest
->dest
.reg
.indirect
);
229 src
.src
= nir_src_for_reg(dest
->dest
.reg
.reg
);
230 src
.src
.reg
.base_offset
= dest
->dest
.reg
.base_offset
;
233 for (int i
= 0; i
< 4; i
++)
236 return nir_fmov_alu(b
, src
, 4);
240 ttn_emit_declaration(struct ttn_compile
*c
)
242 nir_builder
*b
= &c
->build
;
243 struct tgsi_full_declaration
*decl
= &c
->token
->FullDeclaration
;
244 unsigned array_size
= decl
->Range
.Last
- decl
->Range
.First
+ 1;
245 unsigned file
= decl
->Declaration
.File
;
248 if (file
== TGSI_FILE_TEMPORARY
) {
249 if (decl
->Declaration
.Array
) {
250 /* for arrays, we create variables instead of registers: */
251 nir_variable
*var
= rzalloc(b
->shader
, nir_variable
);
253 var
->type
= glsl_array_type(glsl_vec4_type(), array_size
);
254 var
->data
.mode
= nir_var_global
;
255 var
->name
= ralloc_asprintf(var
, "arr_%d", decl
->Array
.ArrayID
);
257 exec_list_push_tail(&b
->shader
->globals
, &var
->node
);
259 for (i
= 0; i
< array_size
; i
++) {
260 /* point all the matching slots to the same var,
261 * with appropriate offset set, mostly just so
262 * we know what to do when tgsi does a non-indirect
265 c
->temp_regs
[decl
->Range
.First
+ i
].reg
= NULL
;
266 c
->temp_regs
[decl
->Range
.First
+ i
].var
= var
;
267 c
->temp_regs
[decl
->Range
.First
+ i
].offset
= i
;
270 for (i
= 0; i
< array_size
; i
++) {
271 nir_register
*reg
= nir_local_reg_create(b
->impl
);
272 reg
->num_components
= 4;
273 c
->temp_regs
[decl
->Range
.First
+ i
].reg
= reg
;
274 c
->temp_regs
[decl
->Range
.First
+ i
].var
= NULL
;
275 c
->temp_regs
[decl
->Range
.First
+ i
].offset
= 0;
278 } else if (file
== TGSI_FILE_ADDRESS
) {
279 c
->addr_reg
= nir_local_reg_create(b
->impl
);
280 c
->addr_reg
->num_components
= 4;
281 } else if (file
== TGSI_FILE_SYSTEM_VALUE
) {
282 /* Nothing to record for system values. */
283 } else if (file
== TGSI_FILE_SAMPLER
) {
284 /* Nothing to record for samplers. */
285 } else if (file
== TGSI_FILE_SAMPLER_VIEW
) {
286 struct tgsi_declaration_sampler_view
*sview
= &decl
->SamplerView
;
289 assert((sview
->ReturnTypeX
== sview
->ReturnTypeY
) &&
290 (sview
->ReturnTypeX
== sview
->ReturnTypeZ
) &&
291 (sview
->ReturnTypeX
== sview
->ReturnTypeW
));
293 switch (sview
->ReturnTypeX
) {
294 case TGSI_RETURN_TYPE_SINT
:
297 case TGSI_RETURN_TYPE_UINT
:
298 type
= nir_type_uint
;
300 case TGSI_RETURN_TYPE_FLOAT
:
302 type
= nir_type_float
;
306 for (i
= 0; i
< array_size
; i
++) {
307 c
->samp_types
[decl
->Range
.First
+ i
] = type
;
310 bool is_array
= (array_size
> 1);
312 assert(file
== TGSI_FILE_INPUT
||
313 file
== TGSI_FILE_OUTPUT
||
314 file
== TGSI_FILE_CONSTANT
);
316 /* nothing to do for UBOs: */
317 if ((file
== TGSI_FILE_CONSTANT
) && decl
->Declaration
.Dimension
)
320 if ((file
== TGSI_FILE_INPUT
) || (file
== TGSI_FILE_OUTPUT
)) {
321 is_array
= (is_array
&& decl
->Declaration
.Array
&&
322 (decl
->Array
.ArrayID
!= 0));
325 for (i
= 0; i
< array_size
; i
++) {
326 unsigned idx
= decl
->Range
.First
+ i
;
327 nir_variable
*var
= rzalloc(b
->shader
, nir_variable
);
329 var
->data
.driver_location
= idx
;
331 var
->type
= glsl_vec4_type();
333 var
->type
= glsl_array_type(var
->type
, array_size
);
336 case TGSI_FILE_INPUT
:
337 var
->data
.read_only
= true;
338 var
->data
.mode
= nir_var_shader_in
;
339 var
->name
= ralloc_asprintf(var
, "in_%d", idx
);
341 if (c
->scan
->processor
== PIPE_SHADER_FRAGMENT
) {
342 if (decl
->Semantic
.Name
== TGSI_SEMANTIC_FACE
) {
343 var
->data
.location
= SYSTEM_VALUE_FRONT_FACE
;
344 var
->data
.mode
= nir_var_system_value
;
347 tgsi_varying_semantic_to_slot(decl
->Semantic
.Name
,
348 decl
->Semantic
.Index
);
351 assert(!decl
->Declaration
.Semantic
);
352 var
->data
.location
= VERT_ATTRIB_GENERIC0
+ idx
;
356 /* We definitely need to translate the interpolation field, because
357 * nir_print will decode it.
359 switch (decl
->Interp
.Interpolate
) {
360 case TGSI_INTERPOLATE_CONSTANT
:
361 var
->data
.interpolation
= INTERP_MODE_FLAT
;
363 case TGSI_INTERPOLATE_LINEAR
:
364 var
->data
.interpolation
= INTERP_MODE_NOPERSPECTIVE
;
366 case TGSI_INTERPOLATE_PERSPECTIVE
:
367 var
->data
.interpolation
= INTERP_MODE_SMOOTH
;
371 exec_list_push_tail(&b
->shader
->inputs
, &var
->node
);
373 for (int i
= 0; i
< array_size
; i
++)
374 b
->shader
->info
->inputs_read
|= 1 << (var
->data
.location
+ i
);
377 case TGSI_FILE_OUTPUT
: {
378 int semantic_name
= decl
->Semantic
.Name
;
379 int semantic_index
= decl
->Semantic
.Index
;
380 /* Since we can't load from outputs in the IR, we make temporaries
381 * for the outputs and emit stores to the real outputs at the end of
384 nir_register
*reg
= nir_local_reg_create(b
->impl
);
385 reg
->num_components
= 4;
387 reg
->num_array_elems
= array_size
;
389 var
->data
.mode
= nir_var_shader_out
;
390 var
->name
= ralloc_asprintf(var
, "out_%d", idx
);
393 if (c
->scan
->processor
== PIPE_SHADER_FRAGMENT
) {
394 switch (semantic_name
) {
395 case TGSI_SEMANTIC_COLOR
: {
396 /* TODO tgsi loses some information, so we cannot
397 * actually differentiate here between DSB and MRT
398 * at this point. But so far no drivers using tgsi-
399 * to-nir support dual source blend:
401 bool dual_src_blend
= false;
402 if (dual_src_blend
&& (semantic_index
== 1)) {
403 var
->data
.location
= FRAG_RESULT_DATA0
;
406 if (c
->scan
->properties
[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
])
407 var
->data
.location
= FRAG_RESULT_COLOR
;
409 var
->data
.location
= FRAG_RESULT_DATA0
+ semantic_index
;
413 case TGSI_SEMANTIC_POSITION
:
414 var
->data
.location
= FRAG_RESULT_DEPTH
;
417 fprintf(stderr
, "Bad TGSI semantic: %d/%d\n",
418 decl
->Semantic
.Name
, decl
->Semantic
.Index
);
423 tgsi_varying_semantic_to_slot(semantic_name
, semantic_index
);
428 for (j
= 0; j
< array_size
; j
++) {
429 c
->output_regs
[idx
+ j
].offset
= i
+ j
;
430 c
->output_regs
[idx
+ j
].reg
= reg
;
433 c
->output_regs
[idx
].offset
= i
;
434 c
->output_regs
[idx
].reg
= reg
;
437 exec_list_push_tail(&b
->shader
->outputs
, &var
->node
);
439 for (int i
= 0; i
< array_size
; i
++)
440 b
->shader
->info
->outputs_written
|= 1 << (var
->data
.location
+ i
);
443 case TGSI_FILE_CONSTANT
:
444 var
->data
.mode
= nir_var_uniform
;
445 var
->name
= ralloc_asprintf(var
, "uniform_%d", idx
);
447 exec_list_push_tail(&b
->shader
->uniforms
, &var
->node
);
450 unreachable("bad declaration file");
462 ttn_emit_immediate(struct ttn_compile
*c
)
464 nir_builder
*b
= &c
->build
;
465 struct tgsi_full_immediate
*tgsi_imm
= &c
->token
->FullImmediate
;
466 nir_load_const_instr
*load_const
;
469 load_const
= nir_load_const_instr_create(b
->shader
, 4, 32);
470 c
->imm_defs
[c
->next_imm
] = &load_const
->def
;
473 for (i
= 0; i
< 4; i
++)
474 load_const
->value
.u32
[i
] = tgsi_imm
->u
[i
].Uint
;
476 nir_builder_instr_insert(b
, &load_const
->instr
);
480 ttn_src_for_indirect(struct ttn_compile
*c
, struct tgsi_ind_register
*indirect
);
482 /* generate either a constant or indirect deref chain for accessing an
485 static nir_deref_var
*
486 ttn_array_deref(struct ttn_compile
*c
, nir_intrinsic_instr
*instr
,
487 nir_variable
*var
, unsigned offset
,
488 struct tgsi_ind_register
*indirect
)
490 nir_deref_var
*deref
= nir_deref_var_create(instr
, var
);
491 nir_deref_array
*arr
= nir_deref_array_create(deref
);
493 arr
->base_offset
= offset
;
494 arr
->deref
.type
= glsl_get_array_element(var
->type
);
497 arr
->deref_array_type
= nir_deref_array_type_indirect
;
498 arr
->indirect
= nir_src_for_ssa(ttn_src_for_indirect(c
, indirect
));
500 arr
->deref_array_type
= nir_deref_array_type_direct
;
503 deref
->deref
.child
= &arr
->deref
;
509 ttn_src_for_file_and_index(struct ttn_compile
*c
, unsigned file
, unsigned index
,
510 struct tgsi_ind_register
*indirect
,
511 struct tgsi_dimension
*dim
,
512 struct tgsi_ind_register
*dimind
)
514 nir_builder
*b
= &c
->build
;
517 memset(&src
, 0, sizeof(src
));
520 case TGSI_FILE_TEMPORARY
:
521 if (c
->temp_regs
[index
].var
) {
522 unsigned offset
= c
->temp_regs
[index
].offset
;
523 nir_variable
*var
= c
->temp_regs
[index
].var
;
524 nir_intrinsic_instr
*load
;
526 load
= nir_intrinsic_instr_create(b
->shader
,
527 nir_intrinsic_load_var
);
528 load
->num_components
= 4;
529 load
->variables
[0] = ttn_array_deref(c
, load
, var
, offset
, indirect
);
530 nir_ssa_dest_init(&load
->instr
, &load
->dest
,
532 nir_builder_instr_insert(b
, &load
->instr
);
534 src
= nir_src_for_ssa(&load
->dest
.ssa
);
538 src
.reg
.reg
= c
->temp_regs
[index
].reg
;
543 case TGSI_FILE_ADDRESS
:
544 src
.reg
.reg
= c
->addr_reg
;
548 case TGSI_FILE_IMMEDIATE
:
549 src
= nir_src_for_ssa(c
->imm_defs
[index
]);
554 case TGSI_FILE_SYSTEM_VALUE
: {
555 nir_intrinsic_instr
*load
;
562 switch (c
->scan
->system_value_semantic_name
[index
]) {
563 case TGSI_SEMANTIC_VERTEXID_NOBASE
:
564 op
= nir_intrinsic_load_vertex_id_zero_base
;
566 case TGSI_SEMANTIC_VERTEXID
:
567 op
= nir_intrinsic_load_vertex_id
;
569 case TGSI_SEMANTIC_BASEVERTEX
:
570 op
= nir_intrinsic_load_base_vertex
;
572 case TGSI_SEMANTIC_INSTANCEID
:
573 op
= nir_intrinsic_load_instance_id
;
576 unreachable("bad system value");
579 load
= nir_intrinsic_instr_create(b
->shader
, op
);
580 load
->num_components
= ncomp
;
582 nir_ssa_dest_init(&load
->instr
, &load
->dest
, ncomp
, 32, NULL
);
583 nir_builder_instr_insert(b
, &load
->instr
);
585 src
= nir_src_for_ssa(&load
->dest
.ssa
);
587 b
->shader
->info
->system_values_read
|=
588 (1 << nir_system_value_from_intrinsic(op
));
593 case TGSI_FILE_INPUT
:
594 case TGSI_FILE_CONSTANT
: {
595 nir_intrinsic_instr
*load
;
600 case TGSI_FILE_INPUT
:
601 /* Special case: Turn the frontface varying into a load of the
602 * frontface intrinsic plus math, and appending the silly floats.
604 if (c
->scan
->processor
== PIPE_SHADER_FRAGMENT
&&
605 c
->scan
->input_semantic_name
[index
] == TGSI_SEMANTIC_FACE
) {
606 nir_ssa_def
*tgsi_frontface
[4] = {
608 nir_load_system_value(&c
->build
,
609 nir_intrinsic_load_front_face
, 0),
610 nir_imm_float(&c
->build
, 1.0),
611 nir_imm_float(&c
->build
, -1.0)),
612 nir_imm_float(&c
->build
, 0.0),
613 nir_imm_float(&c
->build
, 0.0),
614 nir_imm_float(&c
->build
, 1.0),
617 return nir_src_for_ssa(nir_vec(&c
->build
, tgsi_frontface
, 4));
620 op
= nir_intrinsic_load_input
;
623 case TGSI_FILE_CONSTANT
:
625 op
= nir_intrinsic_load_ubo
;
627 op
= nir_intrinsic_load_uniform
;
631 unreachable("No other load files supported");
635 load
= nir_intrinsic_instr_create(b
->shader
, op
);
637 load
->num_components
= 4;
641 ttn_src_for_file_and_index(c
, dimind
->File
, dimind
->Index
,
644 /* UBOs start at index 1 in TGSI: */
646 nir_src_for_ssa(nir_imm_int(b
, dim
->Index
- 1));
652 if (op
== nir_intrinsic_load_ubo
) {
653 /* UBO loads don't have a base offset. */
654 offset
= nir_imm_int(b
, index
);
656 offset
= nir_iadd(b
, offset
, ttn_src_for_indirect(c
, indirect
));
658 /* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
659 offset
= nir_ishl(b
, offset
, nir_imm_int(b
, 4));
661 nir_intrinsic_set_base(load
, index
);
663 offset
= ttn_src_for_indirect(c
, indirect
);
665 offset
= nir_imm_int(b
, 0);
668 load
->src
[srcn
++] = nir_src_for_ssa(offset
);
670 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 4, 32, NULL
);
671 nir_builder_instr_insert(b
, &load
->instr
);
673 src
= nir_src_for_ssa(&load
->dest
.ssa
);
678 unreachable("bad src file");
686 ttn_src_for_indirect(struct ttn_compile
*c
, struct tgsi_ind_register
*indirect
)
688 nir_builder
*b
= &c
->build
;
690 memset(&src
, 0, sizeof(src
));
691 for (int i
= 0; i
< 4; i
++)
692 src
.swizzle
[i
] = indirect
->Swizzle
;
693 src
.src
= ttn_src_for_file_and_index(c
,
697 return nir_imov_alu(b
, src
, 1);
701 ttn_get_dest(struct ttn_compile
*c
, struct tgsi_full_dst_register
*tgsi_fdst
)
703 struct tgsi_dst_register
*tgsi_dst
= &tgsi_fdst
->Register
;
705 unsigned index
= tgsi_dst
->Index
;
707 memset(&dest
, 0, sizeof(dest
));
709 if (tgsi_dst
->File
== TGSI_FILE_TEMPORARY
) {
710 if (c
->temp_regs
[index
].var
) {
713 /* this works, because TGSI will give us a base offset
714 * (in case of indirect index) that points back into
715 * the array. Access can be direct or indirect, we
716 * don't really care. Just create a one-shot dst reg
717 * that will get store_var'd back into the array var
718 * at the end of ttn_emit_instruction()
720 reg
= nir_local_reg_create(c
->build
.impl
);
721 reg
->num_components
= 4;
722 dest
.dest
.reg
.reg
= reg
;
723 dest
.dest
.reg
.base_offset
= 0;
725 assert(!tgsi_dst
->Indirect
);
726 dest
.dest
.reg
.reg
= c
->temp_regs
[index
].reg
;
727 dest
.dest
.reg
.base_offset
= c
->temp_regs
[index
].offset
;
729 } else if (tgsi_dst
->File
== TGSI_FILE_OUTPUT
) {
730 dest
.dest
.reg
.reg
= c
->output_regs
[index
].reg
;
731 dest
.dest
.reg
.base_offset
= c
->output_regs
[index
].offset
;
732 } else if (tgsi_dst
->File
== TGSI_FILE_ADDRESS
) {
734 dest
.dest
.reg
.reg
= c
->addr_reg
;
737 dest
.write_mask
= tgsi_dst
->WriteMask
;
738 dest
.saturate
= false;
740 if (tgsi_dst
->Indirect
&& (tgsi_dst
->File
!= TGSI_FILE_TEMPORARY
)) {
741 nir_src
*indirect
= ralloc(c
->build
.shader
, nir_src
);
742 *indirect
= nir_src_for_ssa(ttn_src_for_indirect(c
, &tgsi_fdst
->Indirect
));
743 dest
.dest
.reg
.indirect
= indirect
;
749 static nir_variable
*
750 ttn_get_var(struct ttn_compile
*c
, struct tgsi_full_dst_register
*tgsi_fdst
)
752 struct tgsi_dst_register
*tgsi_dst
= &tgsi_fdst
->Register
;
753 unsigned index
= tgsi_dst
->Index
;
755 if (tgsi_dst
->File
== TGSI_FILE_TEMPORARY
) {
756 /* we should not have an indirect when there is no var! */
757 if (!c
->temp_regs
[index
].var
)
758 assert(!tgsi_dst
->Indirect
);
759 return c
->temp_regs
[index
].var
;
766 ttn_get_src(struct ttn_compile
*c
, struct tgsi_full_src_register
*tgsi_fsrc
)
768 nir_builder
*b
= &c
->build
;
769 struct tgsi_src_register
*tgsi_src
= &tgsi_fsrc
->Register
;
770 unsigned tgsi_opcode
= c
->token
->FullInstruction
.Instruction
.Opcode
;
771 unsigned tgsi_src_type
= tgsi_opcode_infer_src_type(tgsi_opcode
);
772 bool src_is_float
= !(tgsi_src_type
== TGSI_TYPE_SIGNED
||
773 tgsi_src_type
== TGSI_TYPE_UNSIGNED
);
776 memset(&src
, 0, sizeof(src
));
778 if (tgsi_src
->File
== TGSI_FILE_NULL
) {
779 return nir_imm_float(b
, 0.0);
780 } else if (tgsi_src
->File
== TGSI_FILE_SAMPLER
) {
781 /* Only the index of the sampler gets used in texturing, and it will
782 * handle looking that up on its own instead of using the nir_alu_src.
784 assert(!tgsi_src
->Indirect
);
787 struct tgsi_ind_register
*ind
= NULL
;
788 struct tgsi_dimension
*dim
= NULL
;
789 struct tgsi_ind_register
*dimind
= NULL
;
790 if (tgsi_src
->Indirect
)
791 ind
= &tgsi_fsrc
->Indirect
;
792 if (tgsi_src
->Dimension
) {
793 dim
= &tgsi_fsrc
->Dimension
;
795 dimind
= &tgsi_fsrc
->DimIndirect
;
797 src
.src
= ttn_src_for_file_and_index(c
,
803 src
.swizzle
[0] = tgsi_src
->SwizzleX
;
804 src
.swizzle
[1] = tgsi_src
->SwizzleY
;
805 src
.swizzle
[2] = tgsi_src
->SwizzleZ
;
806 src
.swizzle
[3] = tgsi_src
->SwizzleW
;
808 nir_ssa_def
*def
= nir_fmov_alu(b
, src
, 4);
810 if (tgsi_src
->Absolute
) {
812 def
= nir_fabs(b
, def
);
814 def
= nir_iabs(b
, def
);
817 if (tgsi_src
->Negate
) {
819 def
= nir_fneg(b
, def
);
821 def
= nir_ineg(b
, def
);
828 ttn_alu(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
830 unsigned num_srcs
= nir_op_infos
[op
].num_inputs
;
831 nir_alu_instr
*instr
= nir_alu_instr_create(b
->shader
, op
);
834 for (i
= 0; i
< num_srcs
; i
++)
835 instr
->src
[i
].src
= nir_src_for_ssa(src
[i
]);
838 nir_builder_instr_insert(b
, &instr
->instr
);
842 ttn_move_dest_masked(nir_builder
*b
, nir_alu_dest dest
,
843 nir_ssa_def
*def
, unsigned write_mask
)
845 if (!(dest
.write_mask
& write_mask
))
848 nir_alu_instr
*mov
= nir_alu_instr_create(b
->shader
, nir_op_imov
);
850 mov
->dest
.write_mask
&= write_mask
;
851 mov
->src
[0].src
= nir_src_for_ssa(def
);
852 for (unsigned i
= def
->num_components
; i
< 4; i
++)
853 mov
->src
[0].swizzle
[i
] = def
->num_components
- 1;
854 nir_builder_instr_insert(b
, &mov
->instr
);
858 ttn_move_dest(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
*def
)
860 ttn_move_dest_masked(b
, dest
, def
, TGSI_WRITEMASK_XYZW
);
864 ttn_arl(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
866 ttn_move_dest(b
, dest
, nir_f2i(b
, nir_ffloor(b
, src
[0])));
869 /* EXP - Approximate Exponential Base 2
870 * dst.x = 2^{\lfloor src.x\rfloor}
871 * dst.y = src.x - \lfloor src.x\rfloor
876 ttn_exp(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
878 nir_ssa_def
*srcx
= ttn_channel(b
, src
[0], X
);
880 ttn_move_dest_masked(b
, dest
, nir_fexp2(b
, nir_ffloor(b
, srcx
)),
882 ttn_move_dest_masked(b
, dest
, nir_fsub(b
, srcx
, nir_ffloor(b
, srcx
)),
884 ttn_move_dest_masked(b
, dest
, nir_fexp2(b
, srcx
), TGSI_WRITEMASK_Z
);
885 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_W
);
888 /* LOG - Approximate Logarithm Base 2
889 * dst.x = \lfloor\log_2{|src.x|}\rfloor
890 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
891 * dst.z = \log_2{|src.x|}
895 ttn_log(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
897 nir_ssa_def
*abs_srcx
= nir_fabs(b
, ttn_channel(b
, src
[0], X
));
898 nir_ssa_def
*log2
= nir_flog2(b
, abs_srcx
);
900 ttn_move_dest_masked(b
, dest
, nir_ffloor(b
, log2
), TGSI_WRITEMASK_X
);
901 ttn_move_dest_masked(b
, dest
,
902 nir_fdiv(b
, abs_srcx
, nir_fexp2(b
, nir_ffloor(b
, log2
))),
904 ttn_move_dest_masked(b
, dest
, nir_flog2(b
, abs_srcx
), TGSI_WRITEMASK_Z
);
905 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_W
);
908 /* DST - Distance Vector
910 * dst.y = src0.y \times src1.y
915 ttn_dst(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
917 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_X
);
918 ttn_move_dest_masked(b
, dest
, nir_fmul(b
, src
[0], src
[1]), TGSI_WRITEMASK_Y
);
919 ttn_move_dest_masked(b
, dest
, nir_fmov(b
, src
[0]), TGSI_WRITEMASK_Z
);
920 ttn_move_dest_masked(b
, dest
, nir_fmov(b
, src
[1]), TGSI_WRITEMASK_W
);
923 /* LIT - Light Coefficients
925 * dst.y = max(src.x, 0.0)
926 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
930 ttn_lit(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
932 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_XW
);
934 ttn_move_dest_masked(b
, dest
, nir_fmax(b
, ttn_channel(b
, src
[0], X
),
935 nir_imm_float(b
, 0.0)), TGSI_WRITEMASK_Y
);
937 if (dest
.write_mask
& TGSI_WRITEMASK_Z
) {
938 nir_ssa_def
*src0_y
= ttn_channel(b
, src
[0], Y
);
939 nir_ssa_def
*wclamp
= nir_fmax(b
, nir_fmin(b
, ttn_channel(b
, src
[0], W
),
940 nir_imm_float(b
, 128.0)),
941 nir_imm_float(b
, -128.0));
942 nir_ssa_def
*pow
= nir_fpow(b
, nir_fmax(b
, src0_y
, nir_imm_float(b
, 0.0)),
945 ttn_move_dest_masked(b
, dest
,
948 nir_imm_float(b
, 0.0),
949 ttn_channel(b
, src
[0], X
)),
950 nir_imm_float(b
, 0.0),
957 * dst.x = \cos{src.x}
958 * dst.y = \sin{src.x}
963 ttn_scs(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
965 ttn_move_dest_masked(b
, dest
, nir_fcos(b
, ttn_channel(b
, src
[0], X
)),
967 ttn_move_dest_masked(b
, dest
, nir_fsin(b
, ttn_channel(b
, src
[0], X
)),
969 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 0.0), TGSI_WRITEMASK_Z
);
970 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_W
);
974 ttn_sle(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
976 ttn_move_dest(b
, dest
, nir_sge(b
, src
[1], src
[0]));
980 ttn_sgt(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
982 ttn_move_dest(b
, dest
, nir_slt(b
, src
[1], src
[0]));
986 ttn_clamp(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
988 ttn_move_dest(b
, dest
, nir_fmin(b
, nir_fmax(b
, src
[0], src
[1]), src
[2]));
992 ttn_xpd(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
994 ttn_move_dest_masked(b
, dest
,
997 ttn_swizzle(b
, src
[0], Y
, Z
, X
, X
),
998 ttn_swizzle(b
, src
[1], Z
, X
, Y
, X
)),
1000 ttn_swizzle(b
, src
[1], Y
, Z
, X
, X
),
1001 ttn_swizzle(b
, src
[0], Z
, X
, Y
, X
))),
1002 TGSI_WRITEMASK_XYZ
);
1003 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_W
);
1007 ttn_dp2a(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
1009 ttn_move_dest(b
, dest
,
1010 ttn_channel(b
, nir_fadd(b
, nir_fdot2(b
, src
[0], src
[1]),
1016 ttn_dp2(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
1018 ttn_move_dest(b
, dest
, nir_fdot2(b
, src
[0], src
[1]));
1022 ttn_dp3(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
1024 ttn_move_dest(b
, dest
, nir_fdot3(b
, src
[0], src
[1]));
1028 ttn_dp4(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
1030 ttn_move_dest(b
, dest
, nir_fdot4(b
, src
[0], src
[1]));
1034 ttn_dph(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
1036 ttn_move_dest(b
, dest
, nir_fadd(b
, nir_fdot3(b
, src
[0], src
[1]),
1037 ttn_channel(b
, src
[1], W
)));
1041 ttn_umad(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
1043 ttn_move_dest(b
, dest
, nir_iadd(b
, nir_imul(b
, src
[0], src
[1]), src
[2]));
1047 ttn_arr(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
1049 ttn_move_dest(b
, dest
, nir_ffloor(b
, nir_fadd(b
, src
[0], nir_imm_float(b
, 0.5))));
1053 ttn_cmp(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
1055 ttn_move_dest(b
, dest
, nir_bcsel(b
,
1056 nir_flt(b
, src
[0], nir_imm_float(b
, 0.0)),
1061 ttn_ucmp(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
1063 ttn_move_dest(b
, dest
, nir_bcsel(b
,
1064 nir_ine(b
, src
[0], nir_imm_int(b
, 0)),
1069 ttn_kill(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
1071 nir_intrinsic_instr
*discard
=
1072 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_discard
);
1073 nir_builder_instr_insert(b
, &discard
->instr
);
1074 b
->shader
->info
->fs
.uses_discard
= true;
1078 ttn_kill_if(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
1080 nir_ssa_def
*cmp
= nir_bany_inequal4(b
, nir_flt(b
, src
[0],
1081 nir_imm_float(b
, 0.0)),
1083 nir_intrinsic_instr
*discard
=
1084 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_discard_if
);
1085 discard
->src
[0] = nir_src_for_ssa(cmp
);
1086 nir_builder_instr_insert(b
, &discard
->instr
);
1087 b
->shader
->info
->fs
.uses_discard
= true;
1091 ttn_if(struct ttn_compile
*c
, nir_ssa_def
*src
, bool is_uint
)
1093 nir_builder
*b
= &c
->build
;
1095 src
= ttn_channel(b
, src
, X
);
1097 nir_if
*if_stmt
= nir_if_create(b
->shader
);
1099 if_stmt
->condition
= nir_src_for_ssa(nir_ine(b
, src
, nir_imm_int(b
, 0)));
1101 if_stmt
->condition
= nir_src_for_ssa(nir_fne(b
, src
, nir_imm_int(b
, 0)));
1103 nir_builder_cf_insert(b
, &if_stmt
->cf_node
);
1105 c
->if_stack
[c
->if_stack_pos
] = nir_after_cf_node(&if_stmt
->cf_node
);
1108 b
->cursor
= nir_after_cf_list(&if_stmt
->then_list
);
1110 c
->if_stack
[c
->if_stack_pos
] = nir_after_cf_list(&if_stmt
->else_list
);
1115 ttn_else(struct ttn_compile
*c
)
1117 nir_builder
*b
= &c
->build
;
1119 b
->cursor
= c
->if_stack
[c
->if_stack_pos
- 1];
1123 ttn_endif(struct ttn_compile
*c
)
1125 nir_builder
*b
= &c
->build
;
1127 c
->if_stack_pos
-= 2;
1128 b
->cursor
= c
->if_stack
[c
->if_stack_pos
];
1132 ttn_bgnloop(struct ttn_compile
*c
)
1134 nir_builder
*b
= &c
->build
;
1136 nir_loop
*loop
= nir_loop_create(b
->shader
);
1137 nir_builder_cf_insert(b
, &loop
->cf_node
);
1139 c
->loop_stack
[c
->loop_stack_pos
] = nir_after_cf_node(&loop
->cf_node
);
1140 c
->loop_stack_pos
++;
1142 b
->cursor
= nir_after_cf_list(&loop
->body
);
1146 ttn_cont(nir_builder
*b
)
1148 nir_jump_instr
*instr
= nir_jump_instr_create(b
->shader
, nir_jump_continue
);
1149 nir_builder_instr_insert(b
, &instr
->instr
);
1153 ttn_brk(nir_builder
*b
)
1155 nir_jump_instr
*instr
= nir_jump_instr_create(b
->shader
, nir_jump_break
);
1156 nir_builder_instr_insert(b
, &instr
->instr
);
1160 ttn_endloop(struct ttn_compile
*c
)
1162 nir_builder
*b
= &c
->build
;
1164 c
->loop_stack_pos
--;
1165 b
->cursor
= c
->loop_stack
[c
->loop_stack_pos
];
1169 setup_texture_info(nir_tex_instr
*instr
, unsigned texture
)
1172 case TGSI_TEXTURE_BUFFER
:
1173 instr
->sampler_dim
= GLSL_SAMPLER_DIM_BUF
;
1175 case TGSI_TEXTURE_1D
:
1176 instr
->sampler_dim
= GLSL_SAMPLER_DIM_1D
;
1178 case TGSI_TEXTURE_1D_ARRAY
:
1179 instr
->sampler_dim
= GLSL_SAMPLER_DIM_1D
;
1180 instr
->is_array
= true;
1182 case TGSI_TEXTURE_SHADOW1D
:
1183 instr
->sampler_dim
= GLSL_SAMPLER_DIM_1D
;
1184 instr
->is_shadow
= true;
1186 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
1187 instr
->sampler_dim
= GLSL_SAMPLER_DIM_1D
;
1188 instr
->is_shadow
= true;
1189 instr
->is_array
= true;
1191 case TGSI_TEXTURE_2D
:
1192 instr
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
1194 case TGSI_TEXTURE_2D_ARRAY
:
1195 instr
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
1196 instr
->is_array
= true;
1198 case TGSI_TEXTURE_2D_MSAA
:
1199 instr
->sampler_dim
= GLSL_SAMPLER_DIM_MS
;
1201 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
1202 instr
->sampler_dim
= GLSL_SAMPLER_DIM_MS
;
1203 instr
->is_array
= true;
1205 case TGSI_TEXTURE_SHADOW2D
:
1206 instr
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
1207 instr
->is_shadow
= true;
1209 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
1210 instr
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
1211 instr
->is_shadow
= true;
1212 instr
->is_array
= true;
1214 case TGSI_TEXTURE_3D
:
1215 instr
->sampler_dim
= GLSL_SAMPLER_DIM_3D
;
1217 case TGSI_TEXTURE_CUBE
:
1218 instr
->sampler_dim
= GLSL_SAMPLER_DIM_CUBE
;
1220 case TGSI_TEXTURE_CUBE_ARRAY
:
1221 instr
->sampler_dim
= GLSL_SAMPLER_DIM_CUBE
;
1222 instr
->is_array
= true;
1224 case TGSI_TEXTURE_SHADOWCUBE
:
1225 instr
->sampler_dim
= GLSL_SAMPLER_DIM_CUBE
;
1226 instr
->is_shadow
= true;
1228 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
1229 instr
->sampler_dim
= GLSL_SAMPLER_DIM_CUBE
;
1230 instr
->is_shadow
= true;
1231 instr
->is_array
= true;
1233 case TGSI_TEXTURE_RECT
:
1234 instr
->sampler_dim
= GLSL_SAMPLER_DIM_RECT
;
1236 case TGSI_TEXTURE_SHADOWRECT
:
1237 instr
->sampler_dim
= GLSL_SAMPLER_DIM_RECT
;
1238 instr
->is_shadow
= true;
1241 fprintf(stderr
, "Unknown TGSI texture target %d\n", texture
);
1247 ttn_tex(struct ttn_compile
*c
, nir_alu_dest dest
, nir_ssa_def
**src
)
1249 nir_builder
*b
= &c
->build
;
1250 struct tgsi_full_instruction
*tgsi_inst
= &c
->token
->FullInstruction
;
1251 nir_tex_instr
*instr
;
1253 unsigned num_srcs
, samp
= 1, sview
, i
;
1255 switch (tgsi_inst
->Instruction
.Opcode
) {
1256 case TGSI_OPCODE_TEX
:
1260 case TGSI_OPCODE_TEX2
:
1265 case TGSI_OPCODE_TXP
:
1269 case TGSI_OPCODE_TXB
:
1273 case TGSI_OPCODE_TXB2
:
1278 case TGSI_OPCODE_TXL
:
1282 case TGSI_OPCODE_TXL2
:
1287 case TGSI_OPCODE_TXF
:
1288 if (tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_2D_MSAA
||
1289 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_2D_ARRAY_MSAA
) {
1290 op
= nir_texop_txf_ms
;
1296 case TGSI_OPCODE_TXD
:
1301 case TGSI_OPCODE_LODQ
:
1307 fprintf(stderr
, "unknown TGSI tex op %d\n", tgsi_inst
->Instruction
.Opcode
);
1311 if (tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D
||
1312 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D_ARRAY
||
1313 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D
||
1314 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D_ARRAY
||
1315 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWRECT
||
1316 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE
||
1317 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
) {
1321 num_srcs
+= tgsi_inst
->Texture
.NumOffsets
;
1323 instr
= nir_tex_instr_create(b
->shader
, num_srcs
);
1326 setup_texture_info(instr
, tgsi_inst
->Texture
.Texture
);
1328 switch (instr
->sampler_dim
) {
1329 case GLSL_SAMPLER_DIM_1D
:
1330 case GLSL_SAMPLER_DIM_BUF
:
1331 instr
->coord_components
= 1;
1333 case GLSL_SAMPLER_DIM_2D
:
1334 case GLSL_SAMPLER_DIM_RECT
:
1335 case GLSL_SAMPLER_DIM_EXTERNAL
:
1336 case GLSL_SAMPLER_DIM_MS
:
1337 instr
->coord_components
= 2;
1339 case GLSL_SAMPLER_DIM_3D
:
1340 case GLSL_SAMPLER_DIM_CUBE
:
1341 instr
->coord_components
= 3;
1343 case GLSL_SAMPLER_DIM_SUBPASS
:
1344 unreachable("invalid sampler_dim");
1347 if (instr
->is_array
)
1348 instr
->coord_components
++;
1350 assert(tgsi_inst
->Src
[samp
].Register
.File
== TGSI_FILE_SAMPLER
);
1351 instr
->texture_index
= tgsi_inst
->Src
[samp
].Register
.Index
;
1352 instr
->sampler_index
= tgsi_inst
->Src
[samp
].Register
.Index
;
1354 /* TODO if we supported any opc's which take an explicit SVIEW
1355 * src, we would use that here instead. But for the "legacy"
1356 * texture opc's the SVIEW index is same as SAMP index:
1358 sview
= instr
->texture_index
;
1360 if (op
== nir_texop_lod
) {
1361 instr
->dest_type
= nir_type_float
;
1362 } else if (sview
< c
->num_samp_types
) {
1363 instr
->dest_type
= c
->samp_types
[sview
];
1365 instr
->dest_type
= nir_type_float
;
1368 unsigned src_number
= 0;
1370 instr
->src
[src_number
].src
=
1371 nir_src_for_ssa(nir_swizzle(b
, src
[0], SWIZ(X
, Y
, Z
, W
),
1372 instr
->coord_components
, false));
1373 instr
->src
[src_number
].src_type
= nir_tex_src_coord
;
1376 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXP
) {
1377 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], W
));
1378 instr
->src
[src_number
].src_type
= nir_tex_src_projector
;
1382 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB
) {
1383 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], W
));
1384 instr
->src
[src_number
].src_type
= nir_tex_src_bias
;
1388 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB2
) {
1389 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[1], X
));
1390 instr
->src
[src_number
].src_type
= nir_tex_src_bias
;
1394 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL
) {
1395 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], W
));
1396 instr
->src
[src_number
].src_type
= nir_tex_src_lod
;
1400 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL2
) {
1401 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[1], X
));
1402 instr
->src
[src_number
].src_type
= nir_tex_src_lod
;
1406 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXF
) {
1407 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], W
));
1408 if (op
== nir_texop_txf_ms
)
1409 instr
->src
[src_number
].src_type
= nir_tex_src_ms_index
;
1411 instr
->src
[src_number
].src_type
= nir_tex_src_lod
;
1415 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXD
) {
1416 instr
->src
[src_number
].src
=
1417 nir_src_for_ssa(nir_swizzle(b
, src
[1], SWIZ(X
, Y
, Z
, W
),
1418 instr
->coord_components
, false));
1419 instr
->src
[src_number
].src_type
= nir_tex_src_ddx
;
1421 instr
->src
[src_number
].src
=
1422 nir_src_for_ssa(nir_swizzle(b
, src
[2], SWIZ(X
, Y
, Z
, W
),
1423 instr
->coord_components
, false));
1424 instr
->src
[src_number
].src_type
= nir_tex_src_ddy
;
1428 if (instr
->is_shadow
) {
1429 if (instr
->coord_components
== 4)
1430 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[1], X
));
1431 else if (instr
->coord_components
== 3)
1432 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], W
));
1434 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], Z
));
1436 instr
->src
[src_number
].src_type
= nir_tex_src_comparator
;
1440 for (i
= 0; i
< tgsi_inst
->Texture
.NumOffsets
; i
++) {
1441 struct tgsi_texture_offset
*tex_offset
= &tgsi_inst
->TexOffsets
[i
];
1442 /* since TexOffset ins't using tgsi_full_src_register we get to
1443 * do some extra gymnastics:
1447 memset(&src
, 0, sizeof(src
));
1449 src
.src
= ttn_src_for_file_and_index(c
,
1454 src
.swizzle
[0] = tex_offset
->SwizzleX
;
1455 src
.swizzle
[1] = tex_offset
->SwizzleY
;
1456 src
.swizzle
[2] = tex_offset
->SwizzleZ
;
1457 src
.swizzle
[3] = TGSI_SWIZZLE_W
;
1459 instr
->src
[src_number
].src_type
= nir_tex_src_offset
;
1460 instr
->src
[src_number
].src
= nir_src_for_ssa(
1461 nir_fmov_alu(b
, src
, nir_tex_instr_src_size(instr
, src_number
)));
1465 assert(src_number
== num_srcs
);
1467 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
, 4, 32, NULL
);
1468 nir_builder_instr_insert(b
, &instr
->instr
);
1470 /* Resolve the writemask on the texture op. */
1471 ttn_move_dest(b
, dest
, &instr
->dest
.ssa
);
1474 /* TGSI_OPCODE_TXQ is actually two distinct operations:
1476 * dst.x = texture\_width(unit, lod)
1477 * dst.y = texture\_height(unit, lod)
1478 * dst.z = texture\_depth(unit, lod)
1479 * dst.w = texture\_levels(unit)
1481 * dst.xyz map to NIR txs opcode, and dst.w maps to query_levels
1484 ttn_txq(struct ttn_compile
*c
, nir_alu_dest dest
, nir_ssa_def
**src
)
1486 nir_builder
*b
= &c
->build
;
1487 struct tgsi_full_instruction
*tgsi_inst
= &c
->token
->FullInstruction
;
1488 nir_tex_instr
*txs
, *qlv
;
1490 txs
= nir_tex_instr_create(b
->shader
, 1);
1491 txs
->op
= nir_texop_txs
;
1492 setup_texture_info(txs
, tgsi_inst
->Texture
.Texture
);
1494 qlv
= nir_tex_instr_create(b
->shader
, 0);
1495 qlv
->op
= nir_texop_query_levels
;
1496 setup_texture_info(qlv
, tgsi_inst
->Texture
.Texture
);
1498 assert(tgsi_inst
->Src
[1].Register
.File
== TGSI_FILE_SAMPLER
);
1499 txs
->texture_index
= tgsi_inst
->Src
[1].Register
.Index
;
1500 qlv
->texture_index
= tgsi_inst
->Src
[1].Register
.Index
;
1502 /* only single src, the lod: */
1503 txs
->src
[0].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], X
));
1504 txs
->src
[0].src_type
= nir_tex_src_lod
;
1506 nir_ssa_dest_init(&txs
->instr
, &txs
->dest
, 3, 32, NULL
);
1507 nir_builder_instr_insert(b
, &txs
->instr
);
1509 nir_ssa_dest_init(&qlv
->instr
, &qlv
->dest
, 1, 32, NULL
);
1510 nir_builder_instr_insert(b
, &qlv
->instr
);
1512 ttn_move_dest_masked(b
, dest
, &txs
->dest
.ssa
, TGSI_WRITEMASK_XYZ
);
1513 ttn_move_dest_masked(b
, dest
, &qlv
->dest
.ssa
, TGSI_WRITEMASK_W
);
1516 static const nir_op op_trans
[TGSI_OPCODE_LAST
] = {
1517 [TGSI_OPCODE_ARL
] = 0,
1518 [TGSI_OPCODE_MOV
] = nir_op_fmov
,
1519 [TGSI_OPCODE_LIT
] = 0,
1520 [TGSI_OPCODE_RCP
] = nir_op_frcp
,
1521 [TGSI_OPCODE_RSQ
] = nir_op_frsq
,
1522 [TGSI_OPCODE_EXP
] = 0,
1523 [TGSI_OPCODE_LOG
] = 0,
1524 [TGSI_OPCODE_MUL
] = nir_op_fmul
,
1525 [TGSI_OPCODE_ADD
] = nir_op_fadd
,
1526 [TGSI_OPCODE_DP3
] = 0,
1527 [TGSI_OPCODE_DP4
] = 0,
1528 [TGSI_OPCODE_DST
] = 0,
1529 [TGSI_OPCODE_MIN
] = nir_op_fmin
,
1530 [TGSI_OPCODE_MAX
] = nir_op_fmax
,
1531 [TGSI_OPCODE_SLT
] = nir_op_slt
,
1532 [TGSI_OPCODE_SGE
] = nir_op_sge
,
1533 [TGSI_OPCODE_MAD
] = nir_op_ffma
,
1534 [TGSI_OPCODE_SUB
] = nir_op_fsub
,
1535 [TGSI_OPCODE_LRP
] = 0,
1536 [TGSI_OPCODE_SQRT
] = nir_op_fsqrt
,
1537 [TGSI_OPCODE_DP2A
] = 0,
1538 [TGSI_OPCODE_FRC
] = nir_op_ffract
,
1539 [TGSI_OPCODE_CLAMP
] = 0,
1540 [TGSI_OPCODE_FLR
] = nir_op_ffloor
,
1541 [TGSI_OPCODE_ROUND
] = nir_op_fround_even
,
1542 [TGSI_OPCODE_EX2
] = nir_op_fexp2
,
1543 [TGSI_OPCODE_LG2
] = nir_op_flog2
,
1544 [TGSI_OPCODE_POW
] = nir_op_fpow
,
1545 [TGSI_OPCODE_XPD
] = 0,
1546 [TGSI_OPCODE_ABS
] = nir_op_fabs
,
1547 [TGSI_OPCODE_DPH
] = 0,
1548 [TGSI_OPCODE_COS
] = nir_op_fcos
,
1549 [TGSI_OPCODE_DDX
] = nir_op_fddx
,
1550 [TGSI_OPCODE_DDY
] = nir_op_fddy
,
1551 [TGSI_OPCODE_KILL
] = 0,
1552 [TGSI_OPCODE_PK2H
] = 0, /* XXX */
1553 [TGSI_OPCODE_PK2US
] = 0, /* XXX */
1554 [TGSI_OPCODE_PK4B
] = 0, /* XXX */
1555 [TGSI_OPCODE_PK4UB
] = 0, /* XXX */
1556 [TGSI_OPCODE_SEQ
] = nir_op_seq
,
1557 [TGSI_OPCODE_SGT
] = 0,
1558 [TGSI_OPCODE_SIN
] = nir_op_fsin
,
1559 [TGSI_OPCODE_SNE
] = nir_op_sne
,
1560 [TGSI_OPCODE_SLE
] = 0,
1561 [TGSI_OPCODE_TEX
] = 0,
1562 [TGSI_OPCODE_TXD
] = 0,
1563 [TGSI_OPCODE_TXP
] = 0,
1564 [TGSI_OPCODE_UP2H
] = 0, /* XXX */
1565 [TGSI_OPCODE_UP2US
] = 0, /* XXX */
1566 [TGSI_OPCODE_UP4B
] = 0, /* XXX */
1567 [TGSI_OPCODE_UP4UB
] = 0, /* XXX */
1568 [TGSI_OPCODE_ARR
] = 0,
1570 /* No function calls, yet. */
1571 [TGSI_OPCODE_CAL
] = 0, /* XXX */
1572 [TGSI_OPCODE_RET
] = 0, /* XXX */
1574 [TGSI_OPCODE_SSG
] = nir_op_fsign
,
1575 [TGSI_OPCODE_CMP
] = 0,
1576 [TGSI_OPCODE_SCS
] = 0,
1577 [TGSI_OPCODE_TXB
] = 0,
1578 [TGSI_OPCODE_DIV
] = nir_op_fdiv
,
1579 [TGSI_OPCODE_DP2
] = 0,
1580 [TGSI_OPCODE_TXL
] = 0,
1582 [TGSI_OPCODE_BRK
] = 0,
1583 [TGSI_OPCODE_IF
] = 0,
1584 [TGSI_OPCODE_UIF
] = 0,
1585 [TGSI_OPCODE_ELSE
] = 0,
1586 [TGSI_OPCODE_ENDIF
] = 0,
1588 [TGSI_OPCODE_DDX_FINE
] = nir_op_fddx_fine
,
1589 [TGSI_OPCODE_DDY_FINE
] = nir_op_fddy_fine
,
1591 [TGSI_OPCODE_PUSHA
] = 0, /* XXX */
1592 [TGSI_OPCODE_POPA
] = 0, /* XXX */
1594 [TGSI_OPCODE_CEIL
] = nir_op_fceil
,
1595 [TGSI_OPCODE_I2F
] = nir_op_i2f
,
1596 [TGSI_OPCODE_NOT
] = nir_op_inot
,
1597 [TGSI_OPCODE_TRUNC
] = nir_op_ftrunc
,
1598 [TGSI_OPCODE_SHL
] = nir_op_ishl
,
1599 [TGSI_OPCODE_AND
] = nir_op_iand
,
1600 [TGSI_OPCODE_OR
] = nir_op_ior
,
1601 [TGSI_OPCODE_MOD
] = nir_op_umod
,
1602 [TGSI_OPCODE_XOR
] = nir_op_ixor
,
1603 [TGSI_OPCODE_SAD
] = 0, /* XXX */
1604 [TGSI_OPCODE_TXF
] = 0,
1605 [TGSI_OPCODE_TXQ
] = 0,
1607 [TGSI_OPCODE_CONT
] = 0,
1609 [TGSI_OPCODE_EMIT
] = 0, /* XXX */
1610 [TGSI_OPCODE_ENDPRIM
] = 0, /* XXX */
1612 [TGSI_OPCODE_BGNLOOP
] = 0,
1613 [TGSI_OPCODE_BGNSUB
] = 0, /* XXX: no function calls */
1614 [TGSI_OPCODE_ENDLOOP
] = 0,
1615 [TGSI_OPCODE_ENDSUB
] = 0, /* XXX: no function calls */
1617 [TGSI_OPCODE_TXQ_LZ
] = 0,
1618 [TGSI_OPCODE_NOP
] = 0,
1619 [TGSI_OPCODE_FSEQ
] = nir_op_feq
,
1620 [TGSI_OPCODE_FSGE
] = nir_op_fge
,
1621 [TGSI_OPCODE_FSLT
] = nir_op_flt
,
1622 [TGSI_OPCODE_FSNE
] = nir_op_fne
,
1624 /* No control flow yet */
1625 [TGSI_OPCODE_CALLNZ
] = 0, /* XXX */
1626 [TGSI_OPCODE_BREAKC
] = 0, /* not emitted by glsl_to_tgsi.cpp */
1628 [TGSI_OPCODE_KILL_IF
] = 0,
1630 [TGSI_OPCODE_END
] = 0,
1632 [TGSI_OPCODE_F2I
] = nir_op_f2i
,
1633 [TGSI_OPCODE_IDIV
] = nir_op_idiv
,
1634 [TGSI_OPCODE_IMAX
] = nir_op_imax
,
1635 [TGSI_OPCODE_IMIN
] = nir_op_imin
,
1636 [TGSI_OPCODE_INEG
] = nir_op_ineg
,
1637 [TGSI_OPCODE_ISGE
] = nir_op_ige
,
1638 [TGSI_OPCODE_ISHR
] = nir_op_ishr
,
1639 [TGSI_OPCODE_ISLT
] = nir_op_ilt
,
1640 [TGSI_OPCODE_F2U
] = nir_op_f2u
,
1641 [TGSI_OPCODE_U2F
] = nir_op_u2f
,
1642 [TGSI_OPCODE_UADD
] = nir_op_iadd
,
1643 [TGSI_OPCODE_UDIV
] = nir_op_udiv
,
1644 [TGSI_OPCODE_UMAD
] = 0,
1645 [TGSI_OPCODE_UMAX
] = nir_op_umax
,
1646 [TGSI_OPCODE_UMIN
] = nir_op_umin
,
1647 [TGSI_OPCODE_UMOD
] = nir_op_umod
,
1648 [TGSI_OPCODE_UMUL
] = nir_op_imul
,
1649 [TGSI_OPCODE_USEQ
] = nir_op_ieq
,
1650 [TGSI_OPCODE_USGE
] = nir_op_uge
,
1651 [TGSI_OPCODE_USHR
] = nir_op_ushr
,
1652 [TGSI_OPCODE_USLT
] = nir_op_ult
,
1653 [TGSI_OPCODE_USNE
] = nir_op_ine
,
1655 [TGSI_OPCODE_SWITCH
] = 0, /* not emitted by glsl_to_tgsi.cpp */
1656 [TGSI_OPCODE_CASE
] = 0, /* not emitted by glsl_to_tgsi.cpp */
1657 [TGSI_OPCODE_DEFAULT
] = 0, /* not emitted by glsl_to_tgsi.cpp */
1658 [TGSI_OPCODE_ENDSWITCH
] = 0, /* not emitted by glsl_to_tgsi.cpp */
1660 /* XXX: SAMPLE opcodes */
1662 [TGSI_OPCODE_UARL
] = nir_op_imov
,
1663 [TGSI_OPCODE_UCMP
] = 0,
1664 [TGSI_OPCODE_IABS
] = nir_op_iabs
,
1665 [TGSI_OPCODE_ISSG
] = nir_op_isign
,
1669 [TGSI_OPCODE_TEX2
] = 0,
1670 [TGSI_OPCODE_TXB2
] = 0,
1671 [TGSI_OPCODE_TXL2
] = 0,
1673 [TGSI_OPCODE_IMUL_HI
] = nir_op_imul_high
,
1674 [TGSI_OPCODE_UMUL_HI
] = nir_op_umul_high
,
1676 [TGSI_OPCODE_TG4
] = 0,
1677 [TGSI_OPCODE_LODQ
] = 0,
1679 [TGSI_OPCODE_IBFE
] = nir_op_ibitfield_extract
,
1680 [TGSI_OPCODE_UBFE
] = nir_op_ubitfield_extract
,
1681 [TGSI_OPCODE_BFI
] = nir_op_bitfield_insert
,
1682 [TGSI_OPCODE_BREV
] = nir_op_bitfield_reverse
,
1683 [TGSI_OPCODE_POPC
] = nir_op_bit_count
,
1684 [TGSI_OPCODE_LSB
] = nir_op_find_lsb
,
1685 [TGSI_OPCODE_IMSB
] = nir_op_ifind_msb
,
1686 [TGSI_OPCODE_UMSB
] = nir_op_ufind_msb
,
1688 [TGSI_OPCODE_INTERP_CENTROID
] = 0, /* XXX */
1689 [TGSI_OPCODE_INTERP_SAMPLE
] = 0, /* XXX */
1690 [TGSI_OPCODE_INTERP_OFFSET
] = 0, /* XXX */
1694 ttn_emit_instruction(struct ttn_compile
*c
)
1696 nir_builder
*b
= &c
->build
;
1697 struct tgsi_full_instruction
*tgsi_inst
= &c
->token
->FullInstruction
;
1699 unsigned tgsi_op
= tgsi_inst
->Instruction
.Opcode
;
1700 struct tgsi_full_dst_register
*tgsi_dst
= &tgsi_inst
->Dst
[0];
1702 if (tgsi_op
== TGSI_OPCODE_END
)
1705 nir_ssa_def
*src
[TGSI_FULL_MAX_SRC_REGISTERS
];
1706 for (i
= 0; i
< tgsi_inst
->Instruction
.NumSrcRegs
; i
++) {
1707 src
[i
] = ttn_get_src(c
, &tgsi_inst
->Src
[i
]);
1709 nir_alu_dest dest
= ttn_get_dest(c
, tgsi_dst
);
1712 case TGSI_OPCODE_RSQ
:
1713 ttn_move_dest(b
, dest
, nir_frsq(b
, ttn_channel(b
, src
[0], X
)));
1716 case TGSI_OPCODE_SQRT
:
1717 ttn_move_dest(b
, dest
, nir_fsqrt(b
, ttn_channel(b
, src
[0], X
)));
1720 case TGSI_OPCODE_RCP
:
1721 ttn_move_dest(b
, dest
, nir_frcp(b
, ttn_channel(b
, src
[0], X
)));
1724 case TGSI_OPCODE_EX2
:
1725 ttn_move_dest(b
, dest
, nir_fexp2(b
, ttn_channel(b
, src
[0], X
)));
1728 case TGSI_OPCODE_LG2
:
1729 ttn_move_dest(b
, dest
, nir_flog2(b
, ttn_channel(b
, src
[0], X
)));
1732 case TGSI_OPCODE_POW
:
1733 ttn_move_dest(b
, dest
, nir_fpow(b
,
1734 ttn_channel(b
, src
[0], X
),
1735 ttn_channel(b
, src
[1], X
)));
1738 case TGSI_OPCODE_COS
:
1739 ttn_move_dest(b
, dest
, nir_fcos(b
, ttn_channel(b
, src
[0], X
)));
1742 case TGSI_OPCODE_SIN
:
1743 ttn_move_dest(b
, dest
, nir_fsin(b
, ttn_channel(b
, src
[0], X
)));
1746 case TGSI_OPCODE_ARL
:
1747 ttn_arl(b
, op_trans
[tgsi_op
], dest
, src
);
1750 case TGSI_OPCODE_EXP
:
1751 ttn_exp(b
, op_trans
[tgsi_op
], dest
, src
);
1754 case TGSI_OPCODE_LOG
:
1755 ttn_log(b
, op_trans
[tgsi_op
], dest
, src
);
1758 case TGSI_OPCODE_DST
:
1759 ttn_dst(b
, op_trans
[tgsi_op
], dest
, src
);
1762 case TGSI_OPCODE_LIT
:
1763 ttn_lit(b
, op_trans
[tgsi_op
], dest
, src
);
1766 case TGSI_OPCODE_CLAMP
:
1767 ttn_clamp(b
, op_trans
[tgsi_op
], dest
, src
);
1770 case TGSI_OPCODE_XPD
:
1771 ttn_xpd(b
, op_trans
[tgsi_op
], dest
, src
);
1774 case TGSI_OPCODE_DP2
:
1775 ttn_dp2(b
, op_trans
[tgsi_op
], dest
, src
);
1778 case TGSI_OPCODE_DP3
:
1779 ttn_dp3(b
, op_trans
[tgsi_op
], dest
, src
);
1782 case TGSI_OPCODE_DP4
:
1783 ttn_dp4(b
, op_trans
[tgsi_op
], dest
, src
);
1786 case TGSI_OPCODE_DP2A
:
1787 ttn_dp2a(b
, op_trans
[tgsi_op
], dest
, src
);
1790 case TGSI_OPCODE_DPH
:
1791 ttn_dph(b
, op_trans
[tgsi_op
], dest
, src
);
1794 case TGSI_OPCODE_UMAD
:
1795 ttn_umad(b
, op_trans
[tgsi_op
], dest
, src
);
1798 case TGSI_OPCODE_LRP
:
1799 ttn_move_dest(b
, dest
, nir_flrp(b
, src
[2], src
[1], src
[0]));
1802 case TGSI_OPCODE_KILL
:
1803 ttn_kill(b
, op_trans
[tgsi_op
], dest
, src
);
1806 case TGSI_OPCODE_ARR
:
1807 ttn_arr(b
, op_trans
[tgsi_op
], dest
, src
);
1810 case TGSI_OPCODE_CMP
:
1811 ttn_cmp(b
, op_trans
[tgsi_op
], dest
, src
);
1814 case TGSI_OPCODE_UCMP
:
1815 ttn_ucmp(b
, op_trans
[tgsi_op
], dest
, src
);
1818 case TGSI_OPCODE_SCS
:
1819 ttn_scs(b
, op_trans
[tgsi_op
], dest
, src
);
1822 case TGSI_OPCODE_SGT
:
1823 ttn_sgt(b
, op_trans
[tgsi_op
], dest
, src
);
1826 case TGSI_OPCODE_SLE
:
1827 ttn_sle(b
, op_trans
[tgsi_op
], dest
, src
);
1830 case TGSI_OPCODE_KILL_IF
:
1831 ttn_kill_if(b
, op_trans
[tgsi_op
], dest
, src
);
1834 case TGSI_OPCODE_TEX
:
1835 case TGSI_OPCODE_TXP
:
1836 case TGSI_OPCODE_TXL
:
1837 case TGSI_OPCODE_TXB
:
1838 case TGSI_OPCODE_TXD
:
1839 case TGSI_OPCODE_TEX2
:
1840 case TGSI_OPCODE_TXL2
:
1841 case TGSI_OPCODE_TXB2
:
1842 case TGSI_OPCODE_TXQ_LZ
:
1843 case TGSI_OPCODE_TXF
:
1844 case TGSI_OPCODE_TG4
:
1845 case TGSI_OPCODE_LODQ
:
1846 ttn_tex(c
, dest
, src
);
1849 case TGSI_OPCODE_TXQ
:
1850 ttn_txq(c
, dest
, src
);
1853 case TGSI_OPCODE_NOP
:
1856 case TGSI_OPCODE_IF
:
1857 ttn_if(c
, src
[0], false);
1860 case TGSI_OPCODE_UIF
:
1861 ttn_if(c
, src
[0], true);
1864 case TGSI_OPCODE_ELSE
:
1868 case TGSI_OPCODE_ENDIF
:
1872 case TGSI_OPCODE_BGNLOOP
:
1876 case TGSI_OPCODE_BRK
:
1880 case TGSI_OPCODE_CONT
:
1884 case TGSI_OPCODE_ENDLOOP
:
1889 if (op_trans
[tgsi_op
] != 0 || tgsi_op
== TGSI_OPCODE_MOV
) {
1890 ttn_alu(b
, op_trans
[tgsi_op
], dest
, src
);
1892 fprintf(stderr
, "unknown TGSI opcode: %s\n",
1893 tgsi_get_opcode_name(tgsi_op
));
1899 if (tgsi_inst
->Instruction
.Saturate
) {
1900 assert(!dest
.dest
.is_ssa
);
1901 ttn_move_dest(b
, dest
, nir_fsat(b
, ttn_src_for_dest(b
, &dest
)));
1904 /* if the dst has a matching var, append store_var to move
1905 * output from reg to var
1907 nir_variable
*var
= ttn_get_var(c
, tgsi_dst
);
1909 unsigned index
= tgsi_dst
->Register
.Index
;
1910 unsigned offset
= c
->temp_regs
[index
].offset
;
1911 nir_intrinsic_instr
*store
=
1912 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_var
);
1913 struct tgsi_ind_register
*indirect
= tgsi_dst
->Register
.Indirect
?
1914 &tgsi_dst
->Indirect
: NULL
;
1916 store
->num_components
= 4;
1917 nir_intrinsic_set_write_mask(store
, dest
.write_mask
);
1918 store
->variables
[0] = ttn_array_deref(c
, store
, var
, offset
, indirect
);
1919 store
->src
[0] = nir_src_for_reg(dest
.dest
.reg
.reg
);
1921 nir_builder_instr_insert(b
, &store
->instr
);
1926 * Puts a NIR intrinsic to store of each TGSI_FILE_OUTPUT value to the output
1927 * variables at the end of the shader.
1929 * We don't generate these incrementally as the TGSI_FILE_OUTPUT values are
1930 * written, because there's no output load intrinsic, which means we couldn't
1931 * handle writemasks.
1934 ttn_add_output_stores(struct ttn_compile
*c
)
1936 nir_builder
*b
= &c
->build
;
1938 foreach_list_typed(nir_variable
, var
, node
, &b
->shader
->outputs
) {
1939 unsigned array_len
= MAX2(glsl_get_length(var
->type
), 1);
1942 for (i
= 0; i
< array_len
; i
++) {
1943 nir_intrinsic_instr
*store
=
1944 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_output
);
1945 unsigned loc
= var
->data
.driver_location
+ i
;
1947 nir_src src
= nir_src_for_reg(c
->output_regs
[loc
].reg
);
1948 src
.reg
.base_offset
= c
->output_regs
[loc
].offset
;
1950 if (c
->build
.shader
->stage
== MESA_SHADER_FRAGMENT
&&
1951 var
->data
.location
== FRAG_RESULT_DEPTH
) {
1952 /* TGSI uses TGSI_SEMANTIC_POSITION.z for the depth output, while
1953 * NIR uses a single float FRAG_RESULT_DEPTH.
1955 src
= nir_src_for_ssa(nir_channel(b
, nir_ssa_for_src(b
, src
, 4), 2));
1956 store
->num_components
= 1;
1958 store
->num_components
= 4;
1960 store
->src
[0] = src
;
1962 nir_intrinsic_set_base(store
, loc
);
1963 nir_intrinsic_set_write_mask(store
, 0xf);
1964 store
->src
[1] = nir_src_for_ssa(nir_imm_int(b
, 0));
1965 nir_builder_instr_insert(b
, &store
->instr
);
1970 static gl_shader_stage
1971 tgsi_processor_to_shader_stage(unsigned processor
)
1973 switch (processor
) {
1974 case PIPE_SHADER_FRAGMENT
: return MESA_SHADER_FRAGMENT
;
1975 case PIPE_SHADER_VERTEX
: return MESA_SHADER_VERTEX
;
1976 case PIPE_SHADER_GEOMETRY
: return MESA_SHADER_GEOMETRY
;
1977 case PIPE_SHADER_TESS_CTRL
: return MESA_SHADER_TESS_CTRL
;
1978 case PIPE_SHADER_TESS_EVAL
: return MESA_SHADER_TESS_EVAL
;
1979 case PIPE_SHADER_COMPUTE
: return MESA_SHADER_COMPUTE
;
1981 unreachable("invalid TGSI processor");
1986 tgsi_to_nir(const void *tgsi_tokens
,
1987 const nir_shader_compiler_options
*options
)
1989 struct tgsi_parse_context parser
;
1990 struct tgsi_shader_info scan
;
1991 struct ttn_compile
*c
;
1992 struct nir_shader
*s
;
1995 c
= rzalloc(NULL
, struct ttn_compile
);
1997 tgsi_scan_shader(tgsi_tokens
, &scan
);
2000 nir_builder_init_simple_shader(&c
->build
, NULL
,
2001 tgsi_processor_to_shader_stage(scan
.processor
),
2003 s
= c
->build
.shader
;
2005 s
->num_inputs
= scan
.file_max
[TGSI_FILE_INPUT
] + 1;
2006 s
->num_uniforms
= scan
.const_file_max
[0] + 1;
2007 s
->num_outputs
= scan
.file_max
[TGSI_FILE_OUTPUT
] + 1;
2009 c
->output_regs
= rzalloc_array(c
, struct ttn_reg_info
,
2010 scan
.file_max
[TGSI_FILE_OUTPUT
] + 1);
2011 c
->temp_regs
= rzalloc_array(c
, struct ttn_reg_info
,
2012 scan
.file_max
[TGSI_FILE_TEMPORARY
] + 1);
2013 c
->imm_defs
= rzalloc_array(c
, nir_ssa_def
*,
2014 scan
.file_max
[TGSI_FILE_IMMEDIATE
] + 1);
2016 c
->num_samp_types
= scan
.file_max
[TGSI_FILE_SAMPLER_VIEW
] + 1;
2017 c
->samp_types
= rzalloc_array(c
, nir_alu_type
, c
->num_samp_types
);
2019 c
->if_stack
= rzalloc_array(c
, nir_cursor
,
2020 (scan
.opcode_count
[TGSI_OPCODE_IF
] +
2021 scan
.opcode_count
[TGSI_OPCODE_UIF
]) * 2);
2022 c
->loop_stack
= rzalloc_array(c
, nir_cursor
,
2023 scan
.opcode_count
[TGSI_OPCODE_BGNLOOP
]);
2025 ret
= tgsi_parse_init(&parser
, tgsi_tokens
);
2026 assert(ret
== TGSI_PARSE_OK
);
2028 while (!tgsi_parse_end_of_tokens(&parser
)) {
2029 tgsi_parse_token(&parser
);
2030 c
->token
= &parser
.FullToken
;
2032 switch (parser
.FullToken
.Token
.Type
) {
2033 case TGSI_TOKEN_TYPE_DECLARATION
:
2034 ttn_emit_declaration(c
);
2037 case TGSI_TOKEN_TYPE_INSTRUCTION
:
2038 ttn_emit_instruction(c
);
2041 case TGSI_TOKEN_TYPE_IMMEDIATE
:
2042 ttn_emit_immediate(c
);
2047 tgsi_parse_free(&parser
);
2049 ttn_add_output_stores(c
);