tgsi_to_nir: Improve interpolation modes.
[mesa.git] / src / gallium / auxiliary / nir / tgsi_to_nir.c
1 /*
2 * Copyright © 2014-2015 Broadcom
3 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include "util/ralloc.h"
26 #include "pipe/p_screen.h"
27
28 #include "compiler/nir/nir.h"
29 #include "compiler/nir/nir_control_flow.h"
30 #include "compiler/nir/nir_builder.h"
31 #include "compiler/glsl/gl_nir.h"
32 #include "compiler/glsl/list.h"
33 #include "compiler/shader_enums.h"
34
35 #include "tgsi_to_nir.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "tgsi/tgsi_info.h"
39 #include "tgsi/tgsi_scan.h"
40 #include "tgsi/tgsi_from_mesa.h"
41
42 #define SWIZ(X, Y, Z, W) (unsigned[4]){ \
43 TGSI_SWIZZLE_##X, \
44 TGSI_SWIZZLE_##Y, \
45 TGSI_SWIZZLE_##Z, \
46 TGSI_SWIZZLE_##W, \
47 }
48
49 struct ttn_reg_info {
50 /** nir register containing this TGSI index. */
51 nir_register *reg;
52 nir_variable *var;
53 /** Offset (in vec4s) from the start of var for this TGSI index. */
54 int offset;
55 };
56
57 struct ttn_compile {
58 union tgsi_full_token *token;
59 nir_builder build;
60 struct tgsi_shader_info *scan;
61
62 struct ttn_reg_info *output_regs;
63 struct ttn_reg_info *temp_regs;
64 nir_ssa_def **imm_defs;
65
66 unsigned num_samp_types;
67 nir_alu_type *samp_types;
68
69 nir_register *addr_reg;
70
71 nir_variable **inputs;
72 nir_variable **outputs;
73 nir_variable *samplers[PIPE_MAX_SAMPLERS];
74
75 nir_variable *input_var_face;
76 nir_variable *input_var_position;
77
78 /**
79 * Stack of nir_cursors where instructions should be pushed as we pop
80 * back out of the control flow stack.
81 *
82 * For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else
83 * instructions should be placed, and if_stack[if_stack_pos - 1] has where
84 * the next instructions outside of the if/then/else block go.
85 */
86 nir_cursor *if_stack;
87 unsigned if_stack_pos;
88
89 /**
90 * Stack of nir_cursors where instructions should be pushed as we pop
91 * back out of the control flow stack.
92 *
93 * loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside
94 * of the loop.
95 */
96 nir_cursor *loop_stack;
97 unsigned loop_stack_pos;
98
99 /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
100 unsigned next_imm;
101
102 bool cap_scalar;
103 bool cap_face_is_sysval;
104 bool cap_position_is_sysval;
105 bool cap_packed_uniforms;
106 bool cap_samplers_as_deref;
107 };
108
109 #define ttn_swizzle(b, src, x, y, z, w) \
110 nir_swizzle(b, src, SWIZ(x, y, z, w), 4, false)
111 #define ttn_channel(b, src, swiz) \
112 nir_swizzle(b, src, SWIZ(swiz, swiz, swiz, swiz), 1, false)
113
114 static gl_varying_slot
115 tgsi_varying_semantic_to_slot(unsigned semantic, unsigned index)
116 {
117 switch (semantic) {
118 case TGSI_SEMANTIC_POSITION:
119 return VARYING_SLOT_POS;
120 case TGSI_SEMANTIC_COLOR:
121 if (index == 0)
122 return VARYING_SLOT_COL0;
123 else
124 return VARYING_SLOT_COL1;
125 case TGSI_SEMANTIC_BCOLOR:
126 if (index == 0)
127 return VARYING_SLOT_BFC0;
128 else
129 return VARYING_SLOT_BFC1;
130 case TGSI_SEMANTIC_FOG:
131 return VARYING_SLOT_FOGC;
132 case TGSI_SEMANTIC_PSIZE:
133 return VARYING_SLOT_PSIZ;
134 case TGSI_SEMANTIC_GENERIC:
135 return VARYING_SLOT_VAR0 + index;
136 case TGSI_SEMANTIC_FACE:
137 return VARYING_SLOT_FACE;
138 case TGSI_SEMANTIC_EDGEFLAG:
139 return VARYING_SLOT_EDGE;
140 case TGSI_SEMANTIC_PRIMID:
141 return VARYING_SLOT_PRIMITIVE_ID;
142 case TGSI_SEMANTIC_CLIPDIST:
143 if (index == 0)
144 return VARYING_SLOT_CLIP_DIST0;
145 else
146 return VARYING_SLOT_CLIP_DIST1;
147 case TGSI_SEMANTIC_CLIPVERTEX:
148 return VARYING_SLOT_CLIP_VERTEX;
149 case TGSI_SEMANTIC_TEXCOORD:
150 return VARYING_SLOT_TEX0 + index;
151 case TGSI_SEMANTIC_PCOORD:
152 return VARYING_SLOT_PNTC;
153 case TGSI_SEMANTIC_VIEWPORT_INDEX:
154 return VARYING_SLOT_VIEWPORT;
155 case TGSI_SEMANTIC_LAYER:
156 return VARYING_SLOT_LAYER;
157 default:
158 fprintf(stderr, "Bad TGSI semantic: %d/%d\n", semantic, index);
159 abort();
160 }
161 }
162
163 static nir_ssa_def *
164 ttn_src_for_dest(nir_builder *b, nir_alu_dest *dest)
165 {
166 nir_alu_src src;
167 memset(&src, 0, sizeof(src));
168
169 if (dest->dest.is_ssa)
170 src.src = nir_src_for_ssa(&dest->dest.ssa);
171 else {
172 assert(!dest->dest.reg.indirect);
173 src.src = nir_src_for_reg(dest->dest.reg.reg);
174 src.src.reg.base_offset = dest->dest.reg.base_offset;
175 }
176
177 for (int i = 0; i < 4; i++)
178 src.swizzle[i] = i;
179
180 return nir_fmov_alu(b, src, 4);
181 }
182
183 static enum glsl_interp_mode
184 ttn_translate_interp_mode(unsigned tgsi_interp)
185 {
186 switch (tgsi_interp) {
187 case TGSI_INTERPOLATE_CONSTANT:
188 return INTERP_MODE_FLAT;
189 case TGSI_INTERPOLATE_LINEAR:
190 return INTERP_MODE_NOPERSPECTIVE;
191 case TGSI_INTERPOLATE_PERSPECTIVE:
192 return INTERP_MODE_SMOOTH;
193 case TGSI_INTERPOLATE_COLOR:
194 return INTERP_MODE_SMOOTH;
195 default:
196 unreachable("bad TGSI interpolation mode");
197 }
198 }
199
200 static void
201 ttn_emit_declaration(struct ttn_compile *c)
202 {
203 nir_builder *b = &c->build;
204 struct tgsi_full_declaration *decl = &c->token->FullDeclaration;
205 unsigned array_size = decl->Range.Last - decl->Range.First + 1;
206 unsigned file = decl->Declaration.File;
207 unsigned i;
208
209 if (file == TGSI_FILE_TEMPORARY) {
210 if (decl->Declaration.Array) {
211 /* for arrays, we create variables instead of registers: */
212 nir_variable *var = rzalloc(b->shader, nir_variable);
213
214 var->type = glsl_array_type(glsl_vec4_type(), array_size, 0);
215 var->data.mode = nir_var_shader_temp;
216 var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID);
217
218 exec_list_push_tail(&b->shader->globals, &var->node);
219
220 for (i = 0; i < array_size; i++) {
221 /* point all the matching slots to the same var,
222 * with appropriate offset set, mostly just so
223 * we know what to do when tgsi does a non-indirect
224 * access
225 */
226 c->temp_regs[decl->Range.First + i].reg = NULL;
227 c->temp_regs[decl->Range.First + i].var = var;
228 c->temp_regs[decl->Range.First + i].offset = i;
229 }
230 } else {
231 for (i = 0; i < array_size; i++) {
232 nir_register *reg = nir_local_reg_create(b->impl);
233 reg->num_components = 4;
234 c->temp_regs[decl->Range.First + i].reg = reg;
235 c->temp_regs[decl->Range.First + i].var = NULL;
236 c->temp_regs[decl->Range.First + i].offset = 0;
237 }
238 }
239 } else if (file == TGSI_FILE_ADDRESS) {
240 c->addr_reg = nir_local_reg_create(b->impl);
241 c->addr_reg->num_components = 4;
242 } else if (file == TGSI_FILE_SYSTEM_VALUE) {
243 /* Nothing to record for system values. */
244 } else if (file == TGSI_FILE_SAMPLER) {
245 /* Nothing to record for samplers. */
246 } else if (file == TGSI_FILE_SAMPLER_VIEW) {
247 struct tgsi_declaration_sampler_view *sview = &decl->SamplerView;
248 nir_alu_type type;
249
250 assert((sview->ReturnTypeX == sview->ReturnTypeY) &&
251 (sview->ReturnTypeX == sview->ReturnTypeZ) &&
252 (sview->ReturnTypeX == sview->ReturnTypeW));
253
254 switch (sview->ReturnTypeX) {
255 case TGSI_RETURN_TYPE_SINT:
256 type = nir_type_int;
257 break;
258 case TGSI_RETURN_TYPE_UINT:
259 type = nir_type_uint;
260 break;
261 case TGSI_RETURN_TYPE_FLOAT:
262 default:
263 type = nir_type_float;
264 break;
265 }
266
267 for (i = 0; i < array_size; i++) {
268 c->samp_types[decl->Range.First + i] = type;
269 }
270 } else {
271 bool is_array = (array_size > 1);
272
273 assert(file == TGSI_FILE_INPUT ||
274 file == TGSI_FILE_OUTPUT ||
275 file == TGSI_FILE_CONSTANT);
276
277 /* nothing to do for UBOs: */
278 if ((file == TGSI_FILE_CONSTANT) && decl->Declaration.Dimension &&
279 decl->Dim.Index2D != 0) {
280 b->shader->info.num_ubos =
281 MAX2(b->shader->info.num_ubos, decl->Dim.Index2D);
282 return;
283 }
284
285 if ((file == TGSI_FILE_INPUT) || (file == TGSI_FILE_OUTPUT)) {
286 is_array = (is_array && decl->Declaration.Array &&
287 (decl->Array.ArrayID != 0));
288 }
289
290 for (i = 0; i < array_size; i++) {
291 unsigned idx = decl->Range.First + i;
292 nir_variable *var = rzalloc(b->shader, nir_variable);
293
294 var->data.driver_location = idx;
295
296 var->type = glsl_vec4_type();
297 if (is_array)
298 var->type = glsl_array_type(var->type, array_size, 0);
299
300 switch (file) {
301 case TGSI_FILE_INPUT:
302 var->data.read_only = true;
303 var->data.mode = nir_var_shader_in;
304 var->name = ralloc_asprintf(var, "in_%d", idx);
305
306 if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
307 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
308 var->type = glsl_bool_type();
309 if (c->cap_face_is_sysval) {
310 var->data.mode = nir_var_system_value;
311 var->data.location = SYSTEM_VALUE_FRONT_FACE;
312 } else {
313 var->data.location = VARYING_SLOT_FACE;
314 }
315 c->input_var_face = var;
316 } else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
317 if (c->cap_position_is_sysval) {
318 var->data.mode = nir_var_system_value;
319 var->data.location = SYSTEM_VALUE_FRAG_COORD;
320 } else {
321 var->data.location = VARYING_SLOT_POS;
322 }
323 c->input_var_position = var;
324 } else {
325 var->data.location =
326 tgsi_varying_semantic_to_slot(decl->Semantic.Name,
327 decl->Semantic.Index);
328 }
329 } else {
330 assert(!decl->Declaration.Semantic);
331 var->data.location = VERT_ATTRIB_GENERIC0 + idx;
332 }
333 var->data.index = 0;
334 var->data.interpolation =
335 ttn_translate_interp_mode(decl->Interp.Interpolate);
336
337 exec_list_push_tail(&b->shader->inputs, &var->node);
338 c->inputs[idx] = var;
339
340 for (int i = 0; i < array_size; i++)
341 b->shader->info.inputs_read |= 1 << (var->data.location + i);
342
343 break;
344 case TGSI_FILE_OUTPUT: {
345 int semantic_name = decl->Semantic.Name;
346 int semantic_index = decl->Semantic.Index;
347 /* Since we can't load from outputs in the IR, we make temporaries
348 * for the outputs and emit stores to the real outputs at the end of
349 * the shader.
350 */
351 nir_register *reg = nir_local_reg_create(b->impl);
352 reg->num_components = 4;
353 if (is_array)
354 reg->num_array_elems = array_size;
355
356 var->data.mode = nir_var_shader_out;
357 var->name = ralloc_asprintf(var, "out_%d", idx);
358 var->data.index = 0;
359 var->data.interpolation =
360 ttn_translate_interp_mode(decl->Interp.Interpolate);
361
362 if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
363 switch (semantic_name) {
364 case TGSI_SEMANTIC_COLOR: {
365 /* TODO tgsi loses some information, so we cannot
366 * actually differentiate here between DSB and MRT
367 * at this point. But so far no drivers using tgsi-
368 * to-nir support dual source blend:
369 */
370 bool dual_src_blend = false;
371 if (dual_src_blend && (semantic_index == 1)) {
372 var->data.location = FRAG_RESULT_DATA0;
373 var->data.index = 1;
374 } else {
375 if (c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
376 var->data.location = FRAG_RESULT_COLOR;
377 else
378 var->data.location = FRAG_RESULT_DATA0 + semantic_index;
379 }
380 break;
381 }
382 case TGSI_SEMANTIC_POSITION:
383 var->data.location = FRAG_RESULT_DEPTH;
384 var->type = glsl_float_type();
385 break;
386 default:
387 fprintf(stderr, "Bad TGSI semantic: %d/%d\n",
388 decl->Semantic.Name, decl->Semantic.Index);
389 abort();
390 }
391 } else {
392 var->data.location =
393 tgsi_varying_semantic_to_slot(semantic_name, semantic_index);
394 }
395
396 if (is_array) {
397 unsigned j;
398 for (j = 0; j < array_size; j++) {
399 c->output_regs[idx + j].offset = i + j;
400 c->output_regs[idx + j].reg = reg;
401 }
402 } else {
403 c->output_regs[idx].offset = i;
404 c->output_regs[idx].reg = reg;
405 }
406
407 exec_list_push_tail(&b->shader->outputs, &var->node);
408 c->outputs[idx] = var;
409
410 for (int i = 0; i < array_size; i++)
411 b->shader->info.outputs_written |= 1ull << (var->data.location + i);
412 }
413 break;
414 case TGSI_FILE_CONSTANT:
415 var->data.mode = nir_var_uniform;
416 var->name = ralloc_asprintf(var, "uniform_%d", idx);
417
418 exec_list_push_tail(&b->shader->uniforms, &var->node);
419 break;
420 default:
421 unreachable("bad declaration file");
422 return;
423 }
424
425 if (is_array)
426 break;
427 }
428
429 }
430 }
431
432 static void
433 ttn_emit_immediate(struct ttn_compile *c)
434 {
435 nir_builder *b = &c->build;
436 struct tgsi_full_immediate *tgsi_imm = &c->token->FullImmediate;
437 nir_load_const_instr *load_const;
438 int i;
439
440 load_const = nir_load_const_instr_create(b->shader, 4, 32);
441 c->imm_defs[c->next_imm] = &load_const->def;
442 c->next_imm++;
443
444 for (i = 0; i < 4; i++)
445 load_const->value.u32[i] = tgsi_imm->u[i].Uint;
446
447 nir_builder_instr_insert(b, &load_const->instr);
448 }
449
450 static nir_ssa_def *
451 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect);
452
453 /* generate either a constant or indirect deref chain for accessing an
454 * array variable.
455 */
456 static nir_deref_instr *
457 ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset,
458 struct tgsi_ind_register *indirect)
459 {
460 nir_deref_instr *deref = nir_build_deref_var(&c->build, var);
461 nir_ssa_def *index = nir_imm_int(&c->build, offset);
462 if (indirect)
463 index = nir_iadd(&c->build, index, ttn_src_for_indirect(c, indirect));
464 return nir_build_deref_array(&c->build, deref, index);
465 }
466
467 /* Special case: Turn the frontface varying into a load of the
468 * frontface variable, and create the vector as required by TGSI.
469 */
470 static nir_ssa_def *
471 ttn_emulate_tgsi_front_face(struct ttn_compile *c)
472 {
473 nir_ssa_def *tgsi_frontface[4];
474
475 if (c->cap_face_is_sysval) {
476 /* When it's a system value, it should be an integer vector: (F, 0, 0, 1)
477 * F is 0xffffffff if front-facing, 0 if not.
478 */
479
480 nir_ssa_def *frontface = nir_load_front_face(&c->build, 1);
481
482 tgsi_frontface[0] = nir_bcsel(&c->build,
483 frontface,
484 nir_imm_int(&c->build, 0xffffffff),
485 nir_imm_int(&c->build, 0));
486 tgsi_frontface[1] = nir_imm_int(&c->build, 0);
487 tgsi_frontface[2] = nir_imm_int(&c->build, 0);
488 tgsi_frontface[3] = nir_imm_int(&c->build, 1);
489 } else {
490 /* When it's an input, it should be a float vector: (F, 0.0, 0.0, 1.0)
491 * F is positive if front-facing, negative if not.
492 */
493
494 assert(c->input_var_face);
495 nir_ssa_def *frontface = nir_load_var(&c->build, c->input_var_face);
496
497 tgsi_frontface[0] = nir_bcsel(&c->build,
498 frontface,
499 nir_imm_float(&c->build, 1.0),
500 nir_imm_float(&c->build, -1.0));
501 tgsi_frontface[1] = nir_imm_float(&c->build, 0.0);
502 tgsi_frontface[2] = nir_imm_float(&c->build, 0.0);
503 tgsi_frontface[3] = nir_imm_float(&c->build, 1.0);
504 }
505
506 return nir_vec(&c->build, tgsi_frontface, 4);
507 }
508
509 static nir_src
510 ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
511 struct tgsi_ind_register *indirect,
512 struct tgsi_dimension *dim,
513 struct tgsi_ind_register *dimind)
514 {
515 nir_builder *b = &c->build;
516 nir_src src;
517
518 memset(&src, 0, sizeof(src));
519
520 switch (file) {
521 case TGSI_FILE_TEMPORARY:
522 if (c->temp_regs[index].var) {
523 unsigned offset = c->temp_regs[index].offset;
524 nir_variable *var = c->temp_regs[index].var;
525 nir_ssa_def *load = nir_load_deref(&c->build,
526 ttn_array_deref(c, var, offset, indirect));
527
528 src = nir_src_for_ssa(load);
529 } else {
530 assert(!indirect);
531 src.reg.reg = c->temp_regs[index].reg;
532 }
533 assert(!dim);
534 break;
535
536 case TGSI_FILE_ADDRESS:
537 src.reg.reg = c->addr_reg;
538 assert(!dim);
539 break;
540
541 case TGSI_FILE_IMMEDIATE:
542 src = nir_src_for_ssa(c->imm_defs[index]);
543 assert(!indirect);
544 assert(!dim);
545 break;
546
547 case TGSI_FILE_SYSTEM_VALUE: {
548 nir_intrinsic_op op;
549 nir_ssa_def *load;
550
551 assert(!indirect);
552 assert(!dim);
553
554 switch (c->scan->system_value_semantic_name[index]) {
555 case TGSI_SEMANTIC_VERTEXID_NOBASE:
556 op = nir_intrinsic_load_vertex_id_zero_base;
557 load = nir_load_vertex_id_zero_base(b);
558 break;
559 case TGSI_SEMANTIC_VERTEXID:
560 op = nir_intrinsic_load_vertex_id;
561 load = nir_load_vertex_id(b);
562 break;
563 case TGSI_SEMANTIC_BASEVERTEX:
564 op = nir_intrinsic_load_base_vertex;
565 load = nir_load_base_vertex(b);
566 break;
567 case TGSI_SEMANTIC_INSTANCEID:
568 op = nir_intrinsic_load_instance_id;
569 load = nir_load_instance_id(b);
570 break;
571 case TGSI_SEMANTIC_FACE:
572 assert(c->cap_face_is_sysval);
573 op = nir_intrinsic_load_front_face;
574 load = ttn_emulate_tgsi_front_face(c);
575 break;
576 case TGSI_SEMANTIC_POSITION:
577 assert(c->cap_position_is_sysval);
578 op = nir_intrinsic_load_frag_coord;
579 load = nir_load_frag_coord(b);
580 break;
581 default:
582 unreachable("bad system value");
583 }
584
585 src = nir_src_for_ssa(load);
586 b->shader->info.system_values_read |=
587 (1 << nir_system_value_from_intrinsic(op));
588
589 break;
590 }
591
592 case TGSI_FILE_INPUT:
593 if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
594 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_FACE) {
595 assert(!c->cap_face_is_sysval && c->input_var_face);
596 return nir_src_for_ssa(ttn_emulate_tgsi_front_face(c));
597 } else if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
598 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_POSITION) {
599 assert(!c->cap_position_is_sysval && c->input_var_position);
600 return nir_src_for_ssa(nir_load_var(&c->build, c->input_var_position));
601 } else {
602 /* Indirection on input arrays isn't supported by TTN. */
603 assert(!dim);
604 nir_deref_instr *deref = nir_build_deref_var(&c->build,
605 c->inputs[index]);
606 return nir_src_for_ssa(nir_load_deref(&c->build, deref));
607 }
608 break;
609
610 case TGSI_FILE_CONSTANT: {
611 nir_intrinsic_instr *load;
612 nir_intrinsic_op op;
613 unsigned srcn = 0;
614
615 if (dim && (dim->Index > 0 || dim->Indirect)) {
616 op = nir_intrinsic_load_ubo;
617 } else {
618 op = nir_intrinsic_load_uniform;
619 }
620
621 load = nir_intrinsic_instr_create(b->shader, op);
622
623 load->num_components = 4;
624 if (dim && (dim->Index > 0 || dim->Indirect)) {
625 if (dimind) {
626 load->src[srcn] =
627 ttn_src_for_file_and_index(c, dimind->File, dimind->Index,
628 NULL, NULL, NULL);
629 } else {
630 /* UBOs start at index 1 in TGSI: */
631 load->src[srcn] =
632 nir_src_for_ssa(nir_imm_int(b, dim->Index - 1));
633 }
634 srcn++;
635 }
636
637 nir_ssa_def *offset;
638 if (op == nir_intrinsic_load_ubo) {
639 /* UBO loads don't have a base offset. */
640 offset = nir_imm_int(b, index);
641 if (indirect) {
642 offset = nir_iadd(b, offset, ttn_src_for_indirect(c, indirect));
643 }
644 /* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
645 offset = nir_ishl(b, offset, nir_imm_int(b, 4));
646 } else {
647 nir_intrinsic_set_base(load, index);
648 if (indirect) {
649 offset = ttn_src_for_indirect(c, indirect);
650 } else {
651 offset = nir_imm_int(b, 0);
652 }
653 }
654 load->src[srcn++] = nir_src_for_ssa(offset);
655
656 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
657 nir_builder_instr_insert(b, &load->instr);
658
659 src = nir_src_for_ssa(&load->dest.ssa);
660 break;
661 }
662
663 default:
664 unreachable("bad src file");
665 }
666
667
668 return src;
669 }
670
671 static nir_ssa_def *
672 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect)
673 {
674 nir_builder *b = &c->build;
675 nir_alu_src src;
676 memset(&src, 0, sizeof(src));
677 for (int i = 0; i < 4; i++)
678 src.swizzle[i] = indirect->Swizzle;
679 src.src = ttn_src_for_file_and_index(c,
680 indirect->File,
681 indirect->Index,
682 NULL, NULL, NULL);
683 return nir_imov_alu(b, src, 1);
684 }
685
686 static nir_alu_dest
687 ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
688 {
689 struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
690 nir_alu_dest dest;
691 unsigned index = tgsi_dst->Index;
692
693 memset(&dest, 0, sizeof(dest));
694
695 if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
696 if (c->temp_regs[index].var) {
697 nir_register *reg;
698
699 /* this works, because TGSI will give us a base offset
700 * (in case of indirect index) that points back into
701 * the array. Access can be direct or indirect, we
702 * don't really care. Just create a one-shot dst reg
703 * that will get store_var'd back into the array var
704 * at the end of ttn_emit_instruction()
705 */
706 reg = nir_local_reg_create(c->build.impl);
707 reg->num_components = 4;
708 dest.dest.reg.reg = reg;
709 dest.dest.reg.base_offset = 0;
710 } else {
711 assert(!tgsi_dst->Indirect);
712 dest.dest.reg.reg = c->temp_regs[index].reg;
713 dest.dest.reg.base_offset = c->temp_regs[index].offset;
714 }
715 } else if (tgsi_dst->File == TGSI_FILE_OUTPUT) {
716 dest.dest.reg.reg = c->output_regs[index].reg;
717 dest.dest.reg.base_offset = c->output_regs[index].offset;
718 } else if (tgsi_dst->File == TGSI_FILE_ADDRESS) {
719 assert(index == 0);
720 dest.dest.reg.reg = c->addr_reg;
721 }
722
723 dest.write_mask = tgsi_dst->WriteMask;
724 dest.saturate = false;
725
726 if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) {
727 nir_src *indirect = ralloc(c->build.shader, nir_src);
728 *indirect = nir_src_for_ssa(ttn_src_for_indirect(c, &tgsi_fdst->Indirect));
729 dest.dest.reg.indirect = indirect;
730 }
731
732 return dest;
733 }
734
735 static nir_variable *
736 ttn_get_var(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
737 {
738 struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
739 unsigned index = tgsi_dst->Index;
740
741 if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
742 /* we should not have an indirect when there is no var! */
743 if (!c->temp_regs[index].var)
744 assert(!tgsi_dst->Indirect);
745 return c->temp_regs[index].var;
746 }
747
748 return NULL;
749 }
750
751 static nir_ssa_def *
752 ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc,
753 int src_idx)
754 {
755 nir_builder *b = &c->build;
756 struct tgsi_src_register *tgsi_src = &tgsi_fsrc->Register;
757 enum tgsi_opcode opcode = c->token->FullInstruction.Instruction.Opcode;
758 unsigned tgsi_src_type = tgsi_opcode_infer_src_type(opcode, src_idx);
759 bool src_is_float = !(tgsi_src_type == TGSI_TYPE_SIGNED ||
760 tgsi_src_type == TGSI_TYPE_UNSIGNED);
761 nir_alu_src src;
762
763 memset(&src, 0, sizeof(src));
764
765 if (tgsi_src->File == TGSI_FILE_NULL) {
766 return nir_imm_float(b, 0.0);
767 } else if (tgsi_src->File == TGSI_FILE_SAMPLER) {
768 /* Only the index of the sampler gets used in texturing, and it will
769 * handle looking that up on its own instead of using the nir_alu_src.
770 */
771 assert(!tgsi_src->Indirect);
772 return NULL;
773 } else {
774 struct tgsi_ind_register *ind = NULL;
775 struct tgsi_dimension *dim = NULL;
776 struct tgsi_ind_register *dimind = NULL;
777 if (tgsi_src->Indirect)
778 ind = &tgsi_fsrc->Indirect;
779 if (tgsi_src->Dimension) {
780 dim = &tgsi_fsrc->Dimension;
781 if (dim->Indirect)
782 dimind = &tgsi_fsrc->DimIndirect;
783 }
784 src.src = ttn_src_for_file_and_index(c,
785 tgsi_src->File,
786 tgsi_src->Index,
787 ind, dim, dimind);
788 }
789
790 src.swizzle[0] = tgsi_src->SwizzleX;
791 src.swizzle[1] = tgsi_src->SwizzleY;
792 src.swizzle[2] = tgsi_src->SwizzleZ;
793 src.swizzle[3] = tgsi_src->SwizzleW;
794
795 nir_ssa_def *def = nir_fmov_alu(b, src, 4);
796
797 if (tgsi_src->Absolute) {
798 if (src_is_float)
799 def = nir_fabs(b, def);
800 else
801 def = nir_iabs(b, def);
802 }
803
804 if (tgsi_src->Negate) {
805 if (src_is_float)
806 def = nir_fneg(b, def);
807 else
808 def = nir_ineg(b, def);
809 }
810
811 return def;
812 }
813
814 static void
815 ttn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
816 {
817 unsigned num_srcs = nir_op_infos[op].num_inputs;
818 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
819 unsigned i;
820
821 for (i = 0; i < num_srcs; i++)
822 instr->src[i].src = nir_src_for_ssa(src[i]);
823
824 instr->dest = dest;
825 nir_builder_instr_insert(b, &instr->instr);
826 }
827
828 static void
829 ttn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
830 nir_ssa_def *def, unsigned write_mask)
831 {
832 if (!(dest.write_mask & write_mask))
833 return;
834
835 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_imov);
836 mov->dest = dest;
837 mov->dest.write_mask &= write_mask;
838 mov->src[0].src = nir_src_for_ssa(def);
839 for (unsigned i = def->num_components; i < 4; i++)
840 mov->src[0].swizzle[i] = def->num_components - 1;
841 nir_builder_instr_insert(b, &mov->instr);
842 }
843
844 static void
845 ttn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
846 {
847 ttn_move_dest_masked(b, dest, def, TGSI_WRITEMASK_XYZW);
848 }
849
850 static void
851 ttn_arl(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
852 {
853 ttn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
854 }
855
856 /* EXP - Approximate Exponential Base 2
857 * dst.x = 2^{\lfloor src.x\rfloor}
858 * dst.y = src.x - \lfloor src.x\rfloor
859 * dst.z = 2^{src.x}
860 * dst.w = 1.0
861 */
862 static void
863 ttn_exp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
864 {
865 nir_ssa_def *srcx = ttn_channel(b, src[0], X);
866
867 ttn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)),
868 TGSI_WRITEMASK_X);
869 ttn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)),
870 TGSI_WRITEMASK_Y);
871 ttn_move_dest_masked(b, dest, nir_fexp2(b, srcx), TGSI_WRITEMASK_Z);
872 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
873 }
874
875 /* LOG - Approximate Logarithm Base 2
876 * dst.x = \lfloor\log_2{|src.x|}\rfloor
877 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
878 * dst.z = \log_2{|src.x|}
879 * dst.w = 1.0
880 */
881 static void
882 ttn_log(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
883 {
884 nir_ssa_def *abs_srcx = nir_fabs(b, ttn_channel(b, src[0], X));
885 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
886
887 ttn_move_dest_masked(b, dest, nir_ffloor(b, log2), TGSI_WRITEMASK_X);
888 ttn_move_dest_masked(b, dest,
889 nir_fdiv(b, abs_srcx, nir_fexp2(b, nir_ffloor(b, log2))),
890 TGSI_WRITEMASK_Y);
891 ttn_move_dest_masked(b, dest, nir_flog2(b, abs_srcx), TGSI_WRITEMASK_Z);
892 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
893 }
894
895 /* DST - Distance Vector
896 * dst.x = 1.0
897 * dst.y = src0.y \times src1.y
898 * dst.z = src0.z
899 * dst.w = src1.w
900 */
901 static void
902 ttn_dst(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
903 {
904 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_X);
905 ttn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), TGSI_WRITEMASK_Y);
906 ttn_move_dest_masked(b, dest, nir_fmov(b, src[0]), TGSI_WRITEMASK_Z);
907 ttn_move_dest_masked(b, dest, nir_fmov(b, src[1]), TGSI_WRITEMASK_W);
908 }
909
910 /* LIT - Light Coefficients
911 * dst.x = 1.0
912 * dst.y = max(src.x, 0.0)
913 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
914 * dst.w = 1.0
915 */
916 static void
917 ttn_lit(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
918 {
919 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_XW);
920
921 ttn_move_dest_masked(b, dest, nir_fmax(b, ttn_channel(b, src[0], X),
922 nir_imm_float(b, 0.0)), TGSI_WRITEMASK_Y);
923
924 if (dest.write_mask & TGSI_WRITEMASK_Z) {
925 nir_ssa_def *src0_y = ttn_channel(b, src[0], Y);
926 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ttn_channel(b, src[0], W),
927 nir_imm_float(b, 128.0)),
928 nir_imm_float(b, -128.0));
929 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
930 wclamp);
931
932 ttn_move_dest_masked(b, dest,
933 nir_bcsel(b,
934 nir_flt(b,
935 ttn_channel(b, src[0], X),
936 nir_imm_float(b, 0.0)),
937 nir_imm_float(b, 0.0),
938 pow),
939 TGSI_WRITEMASK_Z);
940 }
941 }
942
943 static void
944 ttn_sle(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
945 {
946 ttn_move_dest(b, dest, nir_sge(b, src[1], src[0]));
947 }
948
949 static void
950 ttn_sgt(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
951 {
952 ttn_move_dest(b, dest, nir_slt(b, src[1], src[0]));
953 }
954
955 static void
956 ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
957 {
958 ttn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
959 }
960
961 static void
962 ttn_dp3(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
963 {
964 ttn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
965 }
966
967 static void
968 ttn_dp4(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
969 {
970 ttn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
971 }
972
973 static void
974 ttn_umad(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
975 {
976 ttn_move_dest(b, dest, nir_iadd(b, nir_imul(b, src[0], src[1]), src[2]));
977 }
978
979 static void
980 ttn_arr(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
981 {
982 ttn_move_dest(b, dest, nir_f2i32(b, nir_fround_even(b, src[0])));
983 }
984
985 static void
986 ttn_cmp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
987 {
988 ttn_move_dest(b, dest, nir_bcsel(b,
989 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
990 src[1], src[2]));
991 }
992
993 static void
994 ttn_ucmp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
995 {
996 ttn_move_dest(b, dest, nir_bcsel(b,
997 nir_ine(b, src[0], nir_imm_int(b, 0)),
998 src[1], src[2]));
999 }
1000
1001 static void
1002 ttn_kill(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1003 {
1004 nir_intrinsic_instr *discard =
1005 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard);
1006 nir_builder_instr_insert(b, &discard->instr);
1007 b->shader->info.fs.uses_discard = true;
1008 }
1009
1010 static void
1011 ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1012 {
1013 nir_ssa_def *cmp = nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)));
1014 nir_intrinsic_instr *discard =
1015 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
1016 discard->src[0] = nir_src_for_ssa(cmp);
1017 nir_builder_instr_insert(b, &discard->instr);
1018 b->shader->info.fs.uses_discard = true;
1019 }
1020
1021 static void
1022 ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
1023 {
1024 nir_builder *b = &c->build;
1025 nir_ssa_def *src_x = ttn_channel(b, src, X);
1026
1027 nir_if *if_stmt = nir_if_create(b->shader);
1028 if (is_uint) {
1029 /* equivalent to TGSI UIF, src is interpreted as integer */
1030 if_stmt->condition = nir_src_for_ssa(nir_ine(b, src_x, nir_imm_int(b, 0)));
1031 } else {
1032 /* equivalent to TGSI IF, src is interpreted as float */
1033 if_stmt->condition = nir_src_for_ssa(nir_fne(b, src_x, nir_imm_float(b, 0.0)));
1034 }
1035 nir_builder_cf_insert(b, &if_stmt->cf_node);
1036
1037 c->if_stack[c->if_stack_pos] = nir_after_cf_node(&if_stmt->cf_node);
1038 c->if_stack_pos++;
1039
1040 b->cursor = nir_after_cf_list(&if_stmt->then_list);
1041
1042 c->if_stack[c->if_stack_pos] = nir_after_cf_list(&if_stmt->else_list);
1043 c->if_stack_pos++;
1044 }
1045
1046 static void
1047 ttn_else(struct ttn_compile *c)
1048 {
1049 nir_builder *b = &c->build;
1050
1051 b->cursor = c->if_stack[c->if_stack_pos - 1];
1052 }
1053
1054 static void
1055 ttn_endif(struct ttn_compile *c)
1056 {
1057 nir_builder *b = &c->build;
1058
1059 c->if_stack_pos -= 2;
1060 b->cursor = c->if_stack[c->if_stack_pos];
1061 }
1062
1063 static void
1064 ttn_bgnloop(struct ttn_compile *c)
1065 {
1066 nir_builder *b = &c->build;
1067
1068 nir_loop *loop = nir_loop_create(b->shader);
1069 nir_builder_cf_insert(b, &loop->cf_node);
1070
1071 c->loop_stack[c->loop_stack_pos] = nir_after_cf_node(&loop->cf_node);
1072 c->loop_stack_pos++;
1073
1074 b->cursor = nir_after_cf_list(&loop->body);
1075 }
1076
1077 static void
1078 ttn_cont(nir_builder *b)
1079 {
1080 nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_continue);
1081 nir_builder_instr_insert(b, &instr->instr);
1082 }
1083
1084 static void
1085 ttn_brk(nir_builder *b)
1086 {
1087 nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_break);
1088 nir_builder_instr_insert(b, &instr->instr);
1089 }
1090
1091 static void
1092 ttn_endloop(struct ttn_compile *c)
1093 {
1094 nir_builder *b = &c->build;
1095
1096 c->loop_stack_pos--;
1097 b->cursor = c->loop_stack[c->loop_stack_pos];
1098 }
1099
1100 static void
1101 setup_texture_info(nir_tex_instr *instr, unsigned texture)
1102 {
1103 switch (texture) {
1104 case TGSI_TEXTURE_BUFFER:
1105 instr->sampler_dim = GLSL_SAMPLER_DIM_BUF;
1106 break;
1107 case TGSI_TEXTURE_1D:
1108 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
1109 break;
1110 case TGSI_TEXTURE_1D_ARRAY:
1111 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
1112 instr->is_array = true;
1113 break;
1114 case TGSI_TEXTURE_SHADOW1D:
1115 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
1116 instr->is_shadow = true;
1117 break;
1118 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1119 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
1120 instr->is_shadow = true;
1121 instr->is_array = true;
1122 break;
1123 case TGSI_TEXTURE_2D:
1124 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
1125 break;
1126 case TGSI_TEXTURE_2D_ARRAY:
1127 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
1128 instr->is_array = true;
1129 break;
1130 case TGSI_TEXTURE_2D_MSAA:
1131 instr->sampler_dim = GLSL_SAMPLER_DIM_MS;
1132 break;
1133 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1134 instr->sampler_dim = GLSL_SAMPLER_DIM_MS;
1135 instr->is_array = true;
1136 break;
1137 case TGSI_TEXTURE_SHADOW2D:
1138 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
1139 instr->is_shadow = true;
1140 break;
1141 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1142 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
1143 instr->is_shadow = true;
1144 instr->is_array = true;
1145 break;
1146 case TGSI_TEXTURE_3D:
1147 instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
1148 break;
1149 case TGSI_TEXTURE_CUBE:
1150 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
1151 break;
1152 case TGSI_TEXTURE_CUBE_ARRAY:
1153 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
1154 instr->is_array = true;
1155 break;
1156 case TGSI_TEXTURE_SHADOWCUBE:
1157 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
1158 instr->is_shadow = true;
1159 break;
1160 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1161 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
1162 instr->is_shadow = true;
1163 instr->is_array = true;
1164 break;
1165 case TGSI_TEXTURE_RECT:
1166 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
1167 break;
1168 case TGSI_TEXTURE_SHADOWRECT:
1169 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
1170 instr->is_shadow = true;
1171 break;
1172 default:
1173 fprintf(stderr, "Unknown TGSI texture target %d\n", texture);
1174 abort();
1175 }
1176 }
1177
1178 static enum glsl_base_type
1179 base_type_for_alu_type(nir_alu_type type)
1180 {
1181 type = nir_alu_type_get_base_type(type);
1182
1183 switch (type) {
1184 case nir_type_float:
1185 return GLSL_TYPE_FLOAT;
1186 case nir_type_int:
1187 return GLSL_TYPE_INT;
1188 case nir_type_uint:
1189 return GLSL_TYPE_UINT;
1190 default:
1191 unreachable("invalid type");
1192 }
1193 }
1194
1195 static nir_variable *
1196 get_sampler_var(struct ttn_compile *c, int binding,
1197 enum glsl_sampler_dim dim,
1198 bool is_shadow,
1199 bool is_array,
1200 enum glsl_base_type base_type)
1201 {
1202 nir_variable *var = c->samplers[binding];
1203 if (!var) {
1204 const struct glsl_type *type =
1205 glsl_sampler_type(dim, is_shadow, is_array, base_type);
1206 var = nir_variable_create(c->build.shader, nir_var_uniform, type,
1207 "sampler");
1208 var->data.binding = binding;
1209 var->data.explicit_binding = true;
1210 c->samplers[binding] = var;
1211 }
1212
1213 return var;
1214 }
1215
1216 static void
1217 ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
1218 {
1219 nir_builder *b = &c->build;
1220 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1221 nir_tex_instr *instr;
1222 nir_texop op;
1223 unsigned num_srcs, samp = 1, sview, i;
1224
1225 switch (tgsi_inst->Instruction.Opcode) {
1226 case TGSI_OPCODE_TEX:
1227 op = nir_texop_tex;
1228 num_srcs = 1;
1229 break;
1230 case TGSI_OPCODE_TEX2:
1231 op = nir_texop_tex;
1232 num_srcs = 1;
1233 samp = 2;
1234 break;
1235 case TGSI_OPCODE_TXP:
1236 op = nir_texop_tex;
1237 num_srcs = 2;
1238 break;
1239 case TGSI_OPCODE_TXB:
1240 op = nir_texop_txb;
1241 num_srcs = 2;
1242 break;
1243 case TGSI_OPCODE_TXB2:
1244 op = nir_texop_txb;
1245 num_srcs = 2;
1246 samp = 2;
1247 break;
1248 case TGSI_OPCODE_TXL:
1249 op = nir_texop_txl;
1250 num_srcs = 2;
1251 break;
1252 case TGSI_OPCODE_TXL2:
1253 op = nir_texop_txl;
1254 num_srcs = 2;
1255 samp = 2;
1256 break;
1257 case TGSI_OPCODE_TXF:
1258 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
1259 tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) {
1260 op = nir_texop_txf_ms;
1261 } else {
1262 op = nir_texop_txf;
1263 }
1264 num_srcs = 2;
1265 break;
1266 case TGSI_OPCODE_TXD:
1267 op = nir_texop_txd;
1268 num_srcs = 3;
1269 samp = 3;
1270 break;
1271 case TGSI_OPCODE_LODQ:
1272 op = nir_texop_lod;
1273 num_srcs = 1;
1274 break;
1275
1276 default:
1277 fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode);
1278 abort();
1279 }
1280
1281 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
1282 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
1283 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
1284 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
1285 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
1286 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
1287 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1288 num_srcs++;
1289 }
1290
1291 /* Deref sources */
1292 num_srcs += 2;
1293
1294 num_srcs += tgsi_inst->Texture.NumOffsets;
1295
1296 instr = nir_tex_instr_create(b->shader, num_srcs);
1297 instr->op = op;
1298
1299 setup_texture_info(instr, tgsi_inst->Texture.Texture);
1300
1301 switch (instr->sampler_dim) {
1302 case GLSL_SAMPLER_DIM_1D:
1303 case GLSL_SAMPLER_DIM_BUF:
1304 instr->coord_components = 1;
1305 break;
1306 case GLSL_SAMPLER_DIM_2D:
1307 case GLSL_SAMPLER_DIM_RECT:
1308 case GLSL_SAMPLER_DIM_EXTERNAL:
1309 case GLSL_SAMPLER_DIM_MS:
1310 instr->coord_components = 2;
1311 break;
1312 case GLSL_SAMPLER_DIM_3D:
1313 case GLSL_SAMPLER_DIM_CUBE:
1314 instr->coord_components = 3;
1315 break;
1316 case GLSL_SAMPLER_DIM_SUBPASS:
1317 case GLSL_SAMPLER_DIM_SUBPASS_MS:
1318 unreachable("invalid sampler_dim");
1319 }
1320
1321 if (instr->is_array)
1322 instr->coord_components++;
1323
1324 assert(tgsi_inst->Src[samp].Register.File == TGSI_FILE_SAMPLER);
1325
1326 /* TODO if we supported any opc's which take an explicit SVIEW
1327 * src, we would use that here instead. But for the "legacy"
1328 * texture opc's the SVIEW index is same as SAMP index:
1329 */
1330 sview = tgsi_inst->Src[samp].Register.Index;
1331
1332 if (op == nir_texop_lod) {
1333 instr->dest_type = nir_type_float;
1334 } else if (sview < c->num_samp_types) {
1335 instr->dest_type = c->samp_types[sview];
1336 } else {
1337 instr->dest_type = nir_type_float;
1338 }
1339
1340 nir_variable *var =
1341 get_sampler_var(c, sview, instr->sampler_dim,
1342 instr->is_shadow,
1343 instr->is_array,
1344 base_type_for_alu_type(instr->dest_type));
1345
1346 nir_deref_instr *deref = nir_build_deref_var(b, var);
1347
1348 unsigned src_number = 0;
1349
1350 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
1351 instr->src[src_number].src_type = nir_tex_src_texture_deref;
1352 src_number++;
1353 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
1354 instr->src[src_number].src_type = nir_tex_src_sampler_deref;
1355 src_number++;
1356
1357 instr->src[src_number].src =
1358 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
1359 instr->coord_components, false));
1360 instr->src[src_number].src_type = nir_tex_src_coord;
1361 src_number++;
1362
1363 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1364 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1365 instr->src[src_number].src_type = nir_tex_src_projector;
1366 src_number++;
1367 }
1368
1369 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB) {
1370 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1371 instr->src[src_number].src_type = nir_tex_src_bias;
1372 src_number++;
1373 }
1374
1375 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
1376 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1377 instr->src[src_number].src_type = nir_tex_src_bias;
1378 src_number++;
1379 }
1380
1381 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
1382 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1383 instr->src[src_number].src_type = nir_tex_src_lod;
1384 src_number++;
1385 }
1386
1387 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
1388 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1389 instr->src[src_number].src_type = nir_tex_src_lod;
1390 src_number++;
1391 }
1392
1393 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
1394 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1395 if (op == nir_texop_txf_ms)
1396 instr->src[src_number].src_type = nir_tex_src_ms_index;
1397 else
1398 instr->src[src_number].src_type = nir_tex_src_lod;
1399 src_number++;
1400 }
1401
1402 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1403 instr->src[src_number].src_type = nir_tex_src_ddx;
1404 instr->src[src_number].src =
1405 nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W),
1406 nir_tex_instr_src_size(instr, src_number),
1407 false));
1408 src_number++;
1409 instr->src[src_number].src_type = nir_tex_src_ddy;
1410 instr->src[src_number].src =
1411 nir_src_for_ssa(nir_swizzle(b, src[2], SWIZ(X, Y, Z, W),
1412 nir_tex_instr_src_size(instr, src_number),
1413 false));
1414 src_number++;
1415 }
1416
1417 if (instr->is_shadow) {
1418 if (instr->coord_components == 4)
1419 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1420 else if (instr->coord_components == 3)
1421 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1422 else
1423 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
1424
1425 instr->src[src_number].src_type = nir_tex_src_comparator;
1426 src_number++;
1427 }
1428
1429 for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++) {
1430 struct tgsi_texture_offset *tex_offset = &tgsi_inst->TexOffsets[i];
1431 /* since TexOffset ins't using tgsi_full_src_register we get to
1432 * do some extra gymnastics:
1433 */
1434 nir_alu_src src;
1435
1436 memset(&src, 0, sizeof(src));
1437
1438 src.src = ttn_src_for_file_and_index(c,
1439 tex_offset->File,
1440 tex_offset->Index,
1441 NULL, NULL, NULL);
1442
1443 src.swizzle[0] = tex_offset->SwizzleX;
1444 src.swizzle[1] = tex_offset->SwizzleY;
1445 src.swizzle[2] = tex_offset->SwizzleZ;
1446 src.swizzle[3] = TGSI_SWIZZLE_W;
1447
1448 instr->src[src_number].src_type = nir_tex_src_offset;
1449 instr->src[src_number].src = nir_src_for_ssa(
1450 nir_fmov_alu(b, src, nir_tex_instr_src_size(instr, src_number)));
1451 src_number++;
1452 }
1453
1454 assert(src_number == num_srcs);
1455 assert(src_number == instr->num_srcs);
1456
1457 nir_ssa_dest_init(&instr->instr, &instr->dest,
1458 nir_tex_instr_dest_size(instr),
1459 32, NULL);
1460 nir_builder_instr_insert(b, &instr->instr);
1461
1462 /* Resolve the writemask on the texture op. */
1463 ttn_move_dest(b, dest, &instr->dest.ssa);
1464 }
1465
1466 /* TGSI_OPCODE_TXQ is actually two distinct operations:
1467 *
1468 * dst.x = texture\_width(unit, lod)
1469 * dst.y = texture\_height(unit, lod)
1470 * dst.z = texture\_depth(unit, lod)
1471 * dst.w = texture\_levels(unit)
1472 *
1473 * dst.xyz map to NIR txs opcode, and dst.w maps to query_levels
1474 */
1475 static void
1476 ttn_txq(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
1477 {
1478 nir_builder *b = &c->build;
1479 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1480 nir_tex_instr *txs, *qlv;
1481
1482 txs = nir_tex_instr_create(b->shader, 2);
1483 txs->op = nir_texop_txs;
1484 setup_texture_info(txs, tgsi_inst->Texture.Texture);
1485
1486 qlv = nir_tex_instr_create(b->shader, 1);
1487 qlv->op = nir_texop_query_levels;
1488 setup_texture_info(qlv, tgsi_inst->Texture.Texture);
1489
1490 assert(tgsi_inst->Src[1].Register.File == TGSI_FILE_SAMPLER);
1491 int tex_index = tgsi_inst->Src[1].Register.Index;
1492
1493 nir_variable *var =
1494 get_sampler_var(c, tex_index, txs->sampler_dim,
1495 txs->is_shadow,
1496 txs->is_array,
1497 base_type_for_alu_type(txs->dest_type));
1498
1499 nir_deref_instr *deref = nir_build_deref_var(b, var);
1500
1501 txs->src[0].src = nir_src_for_ssa(&deref->dest.ssa);
1502 txs->src[0].src_type = nir_tex_src_texture_deref;
1503
1504 qlv->src[0].src = nir_src_for_ssa(&deref->dest.ssa);
1505 qlv->src[0].src_type = nir_tex_src_texture_deref;
1506
1507 /* lod: */
1508 txs->src[1].src = nir_src_for_ssa(ttn_channel(b, src[0], X));
1509 txs->src[1].src_type = nir_tex_src_lod;
1510
1511 nir_ssa_dest_init(&txs->instr, &txs->dest,
1512 nir_tex_instr_dest_size(txs), 32, NULL);
1513 nir_builder_instr_insert(b, &txs->instr);
1514
1515 nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, 32, NULL);
1516 nir_builder_instr_insert(b, &qlv->instr);
1517
1518 ttn_move_dest_masked(b, dest, &txs->dest.ssa, TGSI_WRITEMASK_XYZ);
1519 ttn_move_dest_masked(b, dest, &qlv->dest.ssa, TGSI_WRITEMASK_W);
1520 }
1521
1522 static const nir_op op_trans[TGSI_OPCODE_LAST] = {
1523 [TGSI_OPCODE_ARL] = 0,
1524 [TGSI_OPCODE_MOV] = nir_op_fmov,
1525 [TGSI_OPCODE_LIT] = 0,
1526 [TGSI_OPCODE_RCP] = nir_op_frcp,
1527 [TGSI_OPCODE_RSQ] = nir_op_frsq,
1528 [TGSI_OPCODE_EXP] = 0,
1529 [TGSI_OPCODE_LOG] = 0,
1530 [TGSI_OPCODE_MUL] = nir_op_fmul,
1531 [TGSI_OPCODE_ADD] = nir_op_fadd,
1532 [TGSI_OPCODE_DP3] = 0,
1533 [TGSI_OPCODE_DP4] = 0,
1534 [TGSI_OPCODE_DST] = 0,
1535 [TGSI_OPCODE_MIN] = nir_op_fmin,
1536 [TGSI_OPCODE_MAX] = nir_op_fmax,
1537 [TGSI_OPCODE_SLT] = nir_op_slt,
1538 [TGSI_OPCODE_SGE] = nir_op_sge,
1539 [TGSI_OPCODE_MAD] = nir_op_ffma,
1540 [TGSI_OPCODE_LRP] = 0,
1541 [TGSI_OPCODE_SQRT] = nir_op_fsqrt,
1542 [TGSI_OPCODE_FRC] = nir_op_ffract,
1543 [TGSI_OPCODE_FLR] = nir_op_ffloor,
1544 [TGSI_OPCODE_ROUND] = nir_op_fround_even,
1545 [TGSI_OPCODE_EX2] = nir_op_fexp2,
1546 [TGSI_OPCODE_LG2] = nir_op_flog2,
1547 [TGSI_OPCODE_POW] = nir_op_fpow,
1548 [TGSI_OPCODE_COS] = nir_op_fcos,
1549 [TGSI_OPCODE_DDX] = nir_op_fddx,
1550 [TGSI_OPCODE_DDY] = nir_op_fddy,
1551 [TGSI_OPCODE_KILL] = 0,
1552 [TGSI_OPCODE_PK2H] = 0, /* XXX */
1553 [TGSI_OPCODE_PK2US] = 0, /* XXX */
1554 [TGSI_OPCODE_PK4B] = 0, /* XXX */
1555 [TGSI_OPCODE_PK4UB] = 0, /* XXX */
1556 [TGSI_OPCODE_SEQ] = nir_op_seq,
1557 [TGSI_OPCODE_SGT] = 0,
1558 [TGSI_OPCODE_SIN] = nir_op_fsin,
1559 [TGSI_OPCODE_SNE] = nir_op_sne,
1560 [TGSI_OPCODE_SLE] = 0,
1561 [TGSI_OPCODE_TEX] = 0,
1562 [TGSI_OPCODE_TXD] = 0,
1563 [TGSI_OPCODE_TXP] = 0,
1564 [TGSI_OPCODE_UP2H] = 0, /* XXX */
1565 [TGSI_OPCODE_UP2US] = 0, /* XXX */
1566 [TGSI_OPCODE_UP4B] = 0, /* XXX */
1567 [TGSI_OPCODE_UP4UB] = 0, /* XXX */
1568 [TGSI_OPCODE_ARR] = 0,
1569
1570 /* No function calls, yet. */
1571 [TGSI_OPCODE_CAL] = 0, /* XXX */
1572 [TGSI_OPCODE_RET] = 0, /* XXX */
1573
1574 [TGSI_OPCODE_SSG] = nir_op_fsign,
1575 [TGSI_OPCODE_CMP] = 0,
1576 [TGSI_OPCODE_TXB] = 0,
1577 [TGSI_OPCODE_DIV] = nir_op_fdiv,
1578 [TGSI_OPCODE_DP2] = 0,
1579 [TGSI_OPCODE_TXL] = 0,
1580
1581 [TGSI_OPCODE_BRK] = 0,
1582 [TGSI_OPCODE_IF] = 0,
1583 [TGSI_OPCODE_UIF] = 0,
1584 [TGSI_OPCODE_ELSE] = 0,
1585 [TGSI_OPCODE_ENDIF] = 0,
1586
1587 [TGSI_OPCODE_DDX_FINE] = nir_op_fddx_fine,
1588 [TGSI_OPCODE_DDY_FINE] = nir_op_fddy_fine,
1589
1590 [TGSI_OPCODE_CEIL] = nir_op_fceil,
1591 [TGSI_OPCODE_I2F] = nir_op_i2f32,
1592 [TGSI_OPCODE_NOT] = nir_op_inot,
1593 [TGSI_OPCODE_TRUNC] = nir_op_ftrunc,
1594 [TGSI_OPCODE_SHL] = nir_op_ishl,
1595 [TGSI_OPCODE_AND] = nir_op_iand,
1596 [TGSI_OPCODE_OR] = nir_op_ior,
1597 [TGSI_OPCODE_MOD] = nir_op_umod,
1598 [TGSI_OPCODE_XOR] = nir_op_ixor,
1599 [TGSI_OPCODE_TXF] = 0,
1600 [TGSI_OPCODE_TXQ] = 0,
1601
1602 [TGSI_OPCODE_CONT] = 0,
1603
1604 [TGSI_OPCODE_EMIT] = 0, /* XXX */
1605 [TGSI_OPCODE_ENDPRIM] = 0, /* XXX */
1606
1607 [TGSI_OPCODE_BGNLOOP] = 0,
1608 [TGSI_OPCODE_BGNSUB] = 0, /* XXX: no function calls */
1609 [TGSI_OPCODE_ENDLOOP] = 0,
1610 [TGSI_OPCODE_ENDSUB] = 0, /* XXX: no function calls */
1611
1612 [TGSI_OPCODE_NOP] = 0,
1613 [TGSI_OPCODE_FSEQ] = nir_op_feq32,
1614 [TGSI_OPCODE_FSGE] = nir_op_fge32,
1615 [TGSI_OPCODE_FSLT] = nir_op_flt32,
1616 [TGSI_OPCODE_FSNE] = nir_op_fne32,
1617
1618 [TGSI_OPCODE_KILL_IF] = 0,
1619
1620 [TGSI_OPCODE_END] = 0,
1621
1622 [TGSI_OPCODE_F2I] = nir_op_f2i32,
1623 [TGSI_OPCODE_IDIV] = nir_op_idiv,
1624 [TGSI_OPCODE_IMAX] = nir_op_imax,
1625 [TGSI_OPCODE_IMIN] = nir_op_imin,
1626 [TGSI_OPCODE_INEG] = nir_op_ineg,
1627 [TGSI_OPCODE_ISGE] = nir_op_ige32,
1628 [TGSI_OPCODE_ISHR] = nir_op_ishr,
1629 [TGSI_OPCODE_ISLT] = nir_op_ilt32,
1630 [TGSI_OPCODE_F2U] = nir_op_f2u32,
1631 [TGSI_OPCODE_U2F] = nir_op_u2f32,
1632 [TGSI_OPCODE_UADD] = nir_op_iadd,
1633 [TGSI_OPCODE_UDIV] = nir_op_udiv,
1634 [TGSI_OPCODE_UMAD] = 0,
1635 [TGSI_OPCODE_UMAX] = nir_op_umax,
1636 [TGSI_OPCODE_UMIN] = nir_op_umin,
1637 [TGSI_OPCODE_UMOD] = nir_op_umod,
1638 [TGSI_OPCODE_UMUL] = nir_op_imul,
1639 [TGSI_OPCODE_USEQ] = nir_op_ieq32,
1640 [TGSI_OPCODE_USGE] = nir_op_uge32,
1641 [TGSI_OPCODE_USHR] = nir_op_ushr,
1642 [TGSI_OPCODE_USLT] = nir_op_ult32,
1643 [TGSI_OPCODE_USNE] = nir_op_ine32,
1644
1645 [TGSI_OPCODE_SWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
1646 [TGSI_OPCODE_CASE] = 0, /* not emitted by glsl_to_tgsi.cpp */
1647 [TGSI_OPCODE_DEFAULT] = 0, /* not emitted by glsl_to_tgsi.cpp */
1648 [TGSI_OPCODE_ENDSWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
1649
1650 /* XXX: SAMPLE opcodes */
1651
1652 [TGSI_OPCODE_UARL] = nir_op_imov,
1653 [TGSI_OPCODE_UCMP] = 0,
1654 [TGSI_OPCODE_IABS] = nir_op_iabs,
1655 [TGSI_OPCODE_ISSG] = nir_op_isign,
1656
1657 /* XXX: atomics */
1658
1659 [TGSI_OPCODE_TEX2] = 0,
1660 [TGSI_OPCODE_TXB2] = 0,
1661 [TGSI_OPCODE_TXL2] = 0,
1662
1663 [TGSI_OPCODE_IMUL_HI] = nir_op_imul_high,
1664 [TGSI_OPCODE_UMUL_HI] = nir_op_umul_high,
1665
1666 [TGSI_OPCODE_TG4] = 0,
1667 [TGSI_OPCODE_LODQ] = 0,
1668
1669 [TGSI_OPCODE_IBFE] = nir_op_ibitfield_extract,
1670 [TGSI_OPCODE_UBFE] = nir_op_ubitfield_extract,
1671 [TGSI_OPCODE_BFI] = nir_op_bitfield_insert,
1672 [TGSI_OPCODE_BREV] = nir_op_bitfield_reverse,
1673 [TGSI_OPCODE_POPC] = nir_op_bit_count,
1674 [TGSI_OPCODE_LSB] = nir_op_find_lsb,
1675 [TGSI_OPCODE_IMSB] = nir_op_ifind_msb,
1676 [TGSI_OPCODE_UMSB] = nir_op_ufind_msb,
1677
1678 [TGSI_OPCODE_INTERP_CENTROID] = 0, /* XXX */
1679 [TGSI_OPCODE_INTERP_SAMPLE] = 0, /* XXX */
1680 [TGSI_OPCODE_INTERP_OFFSET] = 0, /* XXX */
1681 };
1682
1683 static void
1684 ttn_emit_instruction(struct ttn_compile *c)
1685 {
1686 nir_builder *b = &c->build;
1687 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1688 unsigned i;
1689 unsigned tgsi_op = tgsi_inst->Instruction.Opcode;
1690 struct tgsi_full_dst_register *tgsi_dst = &tgsi_inst->Dst[0];
1691
1692 if (tgsi_op == TGSI_OPCODE_END)
1693 return;
1694
1695 nir_ssa_def *src[TGSI_FULL_MAX_SRC_REGISTERS];
1696 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
1697 src[i] = ttn_get_src(c, &tgsi_inst->Src[i], i);
1698 }
1699 nir_alu_dest dest = ttn_get_dest(c, tgsi_dst);
1700
1701 switch (tgsi_op) {
1702 case TGSI_OPCODE_RSQ:
1703 ttn_move_dest(b, dest, nir_frsq(b, ttn_channel(b, src[0], X)));
1704 break;
1705
1706 case TGSI_OPCODE_SQRT:
1707 ttn_move_dest(b, dest, nir_fsqrt(b, ttn_channel(b, src[0], X)));
1708 break;
1709
1710 case TGSI_OPCODE_RCP:
1711 ttn_move_dest(b, dest, nir_frcp(b, ttn_channel(b, src[0], X)));
1712 break;
1713
1714 case TGSI_OPCODE_EX2:
1715 ttn_move_dest(b, dest, nir_fexp2(b, ttn_channel(b, src[0], X)));
1716 break;
1717
1718 case TGSI_OPCODE_LG2:
1719 ttn_move_dest(b, dest, nir_flog2(b, ttn_channel(b, src[0], X)));
1720 break;
1721
1722 case TGSI_OPCODE_POW:
1723 ttn_move_dest(b, dest, nir_fpow(b,
1724 ttn_channel(b, src[0], X),
1725 ttn_channel(b, src[1], X)));
1726 break;
1727
1728 case TGSI_OPCODE_COS:
1729 ttn_move_dest(b, dest, nir_fcos(b, ttn_channel(b, src[0], X)));
1730 break;
1731
1732 case TGSI_OPCODE_SIN:
1733 ttn_move_dest(b, dest, nir_fsin(b, ttn_channel(b, src[0], X)));
1734 break;
1735
1736 case TGSI_OPCODE_ARL:
1737 ttn_arl(b, op_trans[tgsi_op], dest, src);
1738 break;
1739
1740 case TGSI_OPCODE_EXP:
1741 ttn_exp(b, op_trans[tgsi_op], dest, src);
1742 break;
1743
1744 case TGSI_OPCODE_LOG:
1745 ttn_log(b, op_trans[tgsi_op], dest, src);
1746 break;
1747
1748 case TGSI_OPCODE_DST:
1749 ttn_dst(b, op_trans[tgsi_op], dest, src);
1750 break;
1751
1752 case TGSI_OPCODE_LIT:
1753 ttn_lit(b, op_trans[tgsi_op], dest, src);
1754 break;
1755
1756 case TGSI_OPCODE_DP2:
1757 ttn_dp2(b, op_trans[tgsi_op], dest, src);
1758 break;
1759
1760 case TGSI_OPCODE_DP3:
1761 ttn_dp3(b, op_trans[tgsi_op], dest, src);
1762 break;
1763
1764 case TGSI_OPCODE_DP4:
1765 ttn_dp4(b, op_trans[tgsi_op], dest, src);
1766 break;
1767
1768 case TGSI_OPCODE_UMAD:
1769 ttn_umad(b, op_trans[tgsi_op], dest, src);
1770 break;
1771
1772 case TGSI_OPCODE_LRP:
1773 ttn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
1774 break;
1775
1776 case TGSI_OPCODE_KILL:
1777 ttn_kill(b, op_trans[tgsi_op], dest, src);
1778 break;
1779
1780 case TGSI_OPCODE_ARR:
1781 ttn_arr(b, op_trans[tgsi_op], dest, src);
1782 break;
1783
1784 case TGSI_OPCODE_CMP:
1785 ttn_cmp(b, op_trans[tgsi_op], dest, src);
1786 break;
1787
1788 case TGSI_OPCODE_UCMP:
1789 ttn_ucmp(b, op_trans[tgsi_op], dest, src);
1790 break;
1791
1792 case TGSI_OPCODE_SGT:
1793 ttn_sgt(b, op_trans[tgsi_op], dest, src);
1794 break;
1795
1796 case TGSI_OPCODE_SLE:
1797 ttn_sle(b, op_trans[tgsi_op], dest, src);
1798 break;
1799
1800 case TGSI_OPCODE_KILL_IF:
1801 ttn_kill_if(b, op_trans[tgsi_op], dest, src);
1802 break;
1803
1804 case TGSI_OPCODE_TEX:
1805 case TGSI_OPCODE_TXP:
1806 case TGSI_OPCODE_TXL:
1807 case TGSI_OPCODE_TXB:
1808 case TGSI_OPCODE_TXD:
1809 case TGSI_OPCODE_TEX2:
1810 case TGSI_OPCODE_TXL2:
1811 case TGSI_OPCODE_TXB2:
1812 case TGSI_OPCODE_TXF:
1813 case TGSI_OPCODE_TG4:
1814 case TGSI_OPCODE_LODQ:
1815 ttn_tex(c, dest, src);
1816 break;
1817
1818 case TGSI_OPCODE_TXQ:
1819 ttn_txq(c, dest, src);
1820 break;
1821
1822 case TGSI_OPCODE_NOP:
1823 break;
1824
1825 case TGSI_OPCODE_IF:
1826 ttn_if(c, src[0], false);
1827 break;
1828
1829 case TGSI_OPCODE_UIF:
1830 ttn_if(c, src[0], true);
1831 break;
1832
1833 case TGSI_OPCODE_ELSE:
1834 ttn_else(c);
1835 break;
1836
1837 case TGSI_OPCODE_ENDIF:
1838 ttn_endif(c);
1839 break;
1840
1841 case TGSI_OPCODE_BGNLOOP:
1842 ttn_bgnloop(c);
1843 break;
1844
1845 case TGSI_OPCODE_BRK:
1846 ttn_brk(b);
1847 break;
1848
1849 case TGSI_OPCODE_CONT:
1850 ttn_cont(b);
1851 break;
1852
1853 case TGSI_OPCODE_ENDLOOP:
1854 ttn_endloop(c);
1855 break;
1856
1857 default:
1858 if (op_trans[tgsi_op] != 0 || tgsi_op == TGSI_OPCODE_MOV) {
1859 ttn_alu(b, op_trans[tgsi_op], dest, src);
1860 } else {
1861 fprintf(stderr, "unknown TGSI opcode: %s\n",
1862 tgsi_get_opcode_name(tgsi_op));
1863 abort();
1864 }
1865 break;
1866 }
1867
1868 if (tgsi_inst->Instruction.Saturate) {
1869 assert(!dest.dest.is_ssa);
1870 ttn_move_dest(b, dest, nir_fsat(b, ttn_src_for_dest(b, &dest)));
1871 }
1872
1873 /* if the dst has a matching var, append store_var to move
1874 * output from reg to var
1875 */
1876 nir_variable *var = ttn_get_var(c, tgsi_dst);
1877 if (var) {
1878 unsigned index = tgsi_dst->Register.Index;
1879 unsigned offset = c->temp_regs[index].offset;
1880 struct tgsi_ind_register *indirect = tgsi_dst->Register.Indirect ?
1881 &tgsi_dst->Indirect : NULL;
1882 nir_src val = nir_src_for_reg(dest.dest.reg.reg);
1883 nir_store_deref(b, ttn_array_deref(c, var, offset, indirect),
1884 nir_ssa_for_src(b, val, 4), dest.write_mask);
1885 }
1886 }
1887
1888 /**
1889 * Puts a NIR intrinsic to store of each TGSI_FILE_OUTPUT value to the output
1890 * variables at the end of the shader.
1891 *
1892 * We don't generate these incrementally as the TGSI_FILE_OUTPUT values are
1893 * written, because there's no output load intrinsic, which means we couldn't
1894 * handle writemasks.
1895 */
1896 static void
1897 ttn_add_output_stores(struct ttn_compile *c)
1898 {
1899 nir_builder *b = &c->build;
1900
1901 for (int i = 0; i < c->build.shader->num_outputs; i++) {
1902 nir_variable *var = c->outputs[i];
1903 if (!var)
1904 continue;
1905
1906 nir_src src = nir_src_for_reg(c->output_regs[i].reg);
1907 src.reg.base_offset = c->output_regs[i].offset;
1908
1909 nir_ssa_def *store_value = nir_ssa_for_src(b, src, 4);
1910 if (c->build.shader->info.stage == MESA_SHADER_FRAGMENT &&
1911 var->data.location == FRAG_RESULT_DEPTH) {
1912 /* TGSI uses TGSI_SEMANTIC_POSITION.z for the depth output, while
1913 * NIR uses a single float FRAG_RESULT_DEPTH.
1914 */
1915 store_value = nir_channel(b, store_value, 2);
1916 }
1917
1918 nir_store_deref(b, nir_build_deref_var(b, var), store_value,
1919 (1 << store_value->num_components) - 1);
1920 }
1921 }
1922
1923 /**
1924 * Parses the given TGSI tokens.
1925 */
1926 static void
1927 ttn_parse_tgsi(struct ttn_compile *c, const void *tgsi_tokens)
1928 {
1929 struct tgsi_parse_context parser;
1930 int ret;
1931
1932 ret = tgsi_parse_init(&parser, tgsi_tokens);
1933 assert(ret == TGSI_PARSE_OK);
1934
1935 while (!tgsi_parse_end_of_tokens(&parser)) {
1936 tgsi_parse_token(&parser);
1937 c->token = &parser.FullToken;
1938
1939 switch (parser.FullToken.Token.Type) {
1940 case TGSI_TOKEN_TYPE_DECLARATION:
1941 ttn_emit_declaration(c);
1942 break;
1943
1944 case TGSI_TOKEN_TYPE_INSTRUCTION:
1945 ttn_emit_instruction(c);
1946 break;
1947
1948 case TGSI_TOKEN_TYPE_IMMEDIATE:
1949 ttn_emit_immediate(c);
1950 break;
1951 }
1952 }
1953
1954 tgsi_parse_free(&parser);
1955 }
1956
1957 static void
1958 ttn_read_pipe_caps(struct ttn_compile *c,
1959 struct pipe_screen *screen)
1960 {
1961 c->cap_scalar = screen->get_shader_param(screen, c->scan->processor, PIPE_SHADER_CAP_SCALAR_ISA);
1962 c->cap_packed_uniforms = screen->get_param(screen, PIPE_CAP_PACKED_UNIFORMS);
1963 c->cap_samplers_as_deref = screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF);
1964 c->cap_face_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL);
1965 c->cap_position_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL);
1966 }
1967
1968 /**
1969 * Initializes a TGSI-to-NIR compiler.
1970 */
1971 static struct ttn_compile *
1972 ttn_compile_init(const void *tgsi_tokens,
1973 const nir_shader_compiler_options *options,
1974 struct pipe_screen *screen)
1975 {
1976 struct ttn_compile *c;
1977 struct nir_shader *s;
1978 struct tgsi_shader_info scan;
1979
1980 assert(options || screen);
1981 c = rzalloc(NULL, struct ttn_compile);
1982
1983 tgsi_scan_shader(tgsi_tokens, &scan);
1984 c->scan = &scan;
1985
1986 if (!options) {
1987 options =
1988 screen->get_compiler_options(screen, PIPE_SHADER_IR_NIR, scan.processor);
1989 }
1990
1991 nir_builder_init_simple_shader(&c->build, NULL,
1992 tgsi_processor_to_shader_stage(scan.processor),
1993 options);
1994
1995 s = c->build.shader;
1996
1997 if (screen) {
1998 ttn_read_pipe_caps(c, screen);
1999 } else {
2000 /* TTN used to be hard coded to always make FACE a sysval,
2001 * so it makes sense to preserve that behavior so users don't break. */
2002 c->cap_face_is_sysval = true;
2003 }
2004
2005 if (s->info.stage == MESA_SHADER_FRAGMENT)
2006 s->info.fs.untyped_color_outputs = true;
2007
2008 s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
2009 s->num_uniforms = scan.const_file_max[0] + 1;
2010 s->num_outputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
2011
2012 s->info.vs.window_space_position = scan.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
2013
2014 c->inputs = rzalloc_array(c, struct nir_variable *, s->num_inputs);
2015 c->outputs = rzalloc_array(c, struct nir_variable *, s->num_outputs);
2016
2017 c->output_regs = rzalloc_array(c, struct ttn_reg_info,
2018 scan.file_max[TGSI_FILE_OUTPUT] + 1);
2019 c->temp_regs = rzalloc_array(c, struct ttn_reg_info,
2020 scan.file_max[TGSI_FILE_TEMPORARY] + 1);
2021 c->imm_defs = rzalloc_array(c, nir_ssa_def *,
2022 scan.file_max[TGSI_FILE_IMMEDIATE] + 1);
2023
2024 c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2025 c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types);
2026
2027 c->if_stack = rzalloc_array(c, nir_cursor,
2028 (scan.opcode_count[TGSI_OPCODE_IF] +
2029 scan.opcode_count[TGSI_OPCODE_UIF]) * 2);
2030 c->loop_stack = rzalloc_array(c, nir_cursor,
2031 scan.opcode_count[TGSI_OPCODE_BGNLOOP]);
2032
2033
2034 ttn_parse_tgsi(c, tgsi_tokens);
2035 ttn_add_output_stores(c);
2036
2037 nir_validate_shader(c->build.shader, "TTN: after parsing TGSI and creating the NIR shader");
2038
2039 return c;
2040 }
2041
2042 static void
2043 ttn_optimize_nir(nir_shader *nir, bool scalar)
2044 {
2045 bool progress;
2046 do {
2047 progress = false;
2048
2049 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2050
2051 if (scalar) {
2052 NIR_PASS_V(nir, nir_lower_alu_to_scalar);
2053 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
2054 }
2055
2056 NIR_PASS_V(nir, nir_lower_alu);
2057 NIR_PASS_V(nir, nir_lower_pack);
2058 NIR_PASS(progress, nir, nir_copy_prop);
2059 NIR_PASS(progress, nir, nir_opt_remove_phis);
2060 NIR_PASS(progress, nir, nir_opt_dce);
2061
2062 if (nir_opt_trivial_continues(nir)) {
2063 progress = true;
2064 NIR_PASS(progress, nir, nir_copy_prop);
2065 NIR_PASS(progress, nir, nir_opt_dce);
2066 }
2067
2068 NIR_PASS(progress, nir, nir_opt_if);
2069 NIR_PASS(progress, nir, nir_opt_dead_cf);
2070 NIR_PASS(progress, nir, nir_opt_cse);
2071 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
2072
2073 NIR_PASS(progress, nir, nir_opt_algebraic);
2074 NIR_PASS(progress, nir, nir_opt_constant_folding);
2075
2076 NIR_PASS(progress, nir, nir_opt_undef);
2077 NIR_PASS(progress, nir, nir_opt_conditional_discard);
2078
2079 if (nir->options->max_unroll_iterations) {
2080 NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
2081 }
2082
2083 } while (progress);
2084
2085 }
2086
2087 /**
2088 * Finalizes the NIR in a similar way as st_glsl_to_nir does.
2089 *
2090 * Drivers expect that these passes are already performed,
2091 * so we have to do it here too.
2092 */
2093 static void
2094 ttn_finalize_nir(struct ttn_compile *c)
2095 {
2096 struct nir_shader *nir = c->build.shader;
2097
2098 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2099 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
2100
2101 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
2102 NIR_PASS_V(nir, nir_split_var_copies);
2103 NIR_PASS_V(nir, nir_lower_var_copies);
2104 NIR_PASS_V(nir, nir_lower_system_values);
2105
2106 if (c->cap_packed_uniforms)
2107 NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16);
2108
2109 if (c->cap_samplers_as_deref)
2110 NIR_PASS_V(nir, gl_nir_lower_samplers_as_deref, NULL);
2111 else
2112 NIR_PASS_V(nir, gl_nir_lower_samplers, NULL);
2113
2114 ttn_optimize_nir(nir, c->cap_scalar);
2115 nir_shader_gather_info(nir, c->build.impl);
2116 nir_validate_shader(nir, "TTN: after all optimizations");
2117 }
2118
2119 struct nir_shader *
2120 tgsi_to_nir(const void *tgsi_tokens,
2121 struct pipe_screen *screen)
2122 {
2123 struct ttn_compile *c;
2124 struct nir_shader *s;
2125
2126 c = ttn_compile_init(tgsi_tokens, NULL, screen);
2127 s = c->build.shader;
2128 ttn_finalize_nir(c);
2129 ralloc_free(c);
2130
2131 return s;
2132 }
2133
2134 struct nir_shader *
2135 tgsi_to_nir_noscreen(const void *tgsi_tokens,
2136 const nir_shader_compiler_options *options)
2137 {
2138 struct ttn_compile *c;
2139 struct nir_shader *s;
2140
2141 c = ttn_compile_init(tgsi_tokens, options, NULL);
2142 s = c->build.shader;
2143 ralloc_free(c);
2144
2145 return s;
2146 }
2147