gallium/cso_context: move non-vbuf vertex buffer and element code into helpers
[mesa.git] / src / gallium / auxiliary / nir / tgsi_to_nir.c
1 /*
2 * Copyright © 2014-2015 Broadcom
3 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include "util/ralloc.h"
26 #include "pipe/p_screen.h"
27
28 #include "compiler/nir/nir.h"
29 #include "compiler/nir/nir_control_flow.h"
30 #include "compiler/nir/nir_builder.h"
31 #include "compiler/shader_enums.h"
32
33 #include "tgsi_to_nir.h"
34 #include "tgsi/tgsi_parse.h"
35 #include "tgsi/tgsi_dump.h"
36 #include "tgsi/tgsi_info.h"
37 #include "tgsi/tgsi_scan.h"
38 #include "tgsi/tgsi_from_mesa.h"
39
40 #define SWIZ(X, Y, Z, W) (unsigned[4]){ \
41 TGSI_SWIZZLE_##X, \
42 TGSI_SWIZZLE_##Y, \
43 TGSI_SWIZZLE_##Z, \
44 TGSI_SWIZZLE_##W, \
45 }
46
47 struct ttn_reg_info {
48 /** nir register containing this TGSI index. */
49 nir_register *reg;
50 nir_variable *var;
51 /** Offset (in vec4s) from the start of var for this TGSI index. */
52 int offset;
53 };
54
55 struct ttn_compile {
56 union tgsi_full_token *token;
57 nir_builder build;
58 struct tgsi_shader_info *scan;
59
60 struct ttn_reg_info *output_regs;
61 struct ttn_reg_info *temp_regs;
62 nir_ssa_def **imm_defs;
63
64 unsigned num_samp_types;
65 nir_alu_type *samp_types;
66
67 nir_register *addr_reg;
68
69 nir_variable **inputs;
70 nir_variable **outputs;
71 nir_variable *samplers[PIPE_MAX_SAMPLERS];
72 nir_variable *images[PIPE_MAX_SHADER_IMAGES];
73 nir_variable *ssbo[PIPE_MAX_SHADER_BUFFERS];
74
75 nir_variable *input_var_face;
76 nir_variable *input_var_position;
77 nir_variable *input_var_point;
78
79 /**
80 * Stack of nir_cursors where instructions should be pushed as we pop
81 * back out of the control flow stack.
82 *
83 * For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else
84 * instructions should be placed, and if_stack[if_stack_pos - 1] has where
85 * the next instructions outside of the if/then/else block go.
86 */
87 nir_cursor *if_stack;
88 unsigned if_stack_pos;
89
90 /**
91 * Stack of nir_cursors where instructions should be pushed as we pop
92 * back out of the control flow stack.
93 *
94 * loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside
95 * of the loop.
96 */
97 nir_cursor *loop_stack;
98 unsigned loop_stack_pos;
99
100 /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
101 unsigned next_imm;
102
103 bool cap_face_is_sysval;
104 bool cap_position_is_sysval;
105 bool cap_point_is_sysval;
106 bool cap_packed_uniforms;
107 bool cap_samplers_as_deref;
108 };
109
110 #define ttn_swizzle(b, src, x, y, z, w) \
111 nir_swizzle(b, src, SWIZ(x, y, z, w), 4)
112 #define ttn_channel(b, src, swiz) \
113 nir_channel(b, src, TGSI_SWIZZLE_##swiz)
114
115 static gl_varying_slot
116 tgsi_varying_semantic_to_slot(unsigned semantic, unsigned index)
117 {
118 switch (semantic) {
119 case TGSI_SEMANTIC_POSITION:
120 return VARYING_SLOT_POS;
121 case TGSI_SEMANTIC_COLOR:
122 if (index == 0)
123 return VARYING_SLOT_COL0;
124 else
125 return VARYING_SLOT_COL1;
126 case TGSI_SEMANTIC_BCOLOR:
127 if (index == 0)
128 return VARYING_SLOT_BFC0;
129 else
130 return VARYING_SLOT_BFC1;
131 case TGSI_SEMANTIC_FOG:
132 return VARYING_SLOT_FOGC;
133 case TGSI_SEMANTIC_PSIZE:
134 return VARYING_SLOT_PSIZ;
135 case TGSI_SEMANTIC_GENERIC:
136 assert(index < 32);
137 return VARYING_SLOT_VAR0 + index;
138 case TGSI_SEMANTIC_FACE:
139 return VARYING_SLOT_FACE;
140 case TGSI_SEMANTIC_EDGEFLAG:
141 return VARYING_SLOT_EDGE;
142 case TGSI_SEMANTIC_PRIMID:
143 return VARYING_SLOT_PRIMITIVE_ID;
144 case TGSI_SEMANTIC_CLIPDIST:
145 if (index == 0)
146 return VARYING_SLOT_CLIP_DIST0;
147 else
148 return VARYING_SLOT_CLIP_DIST1;
149 case TGSI_SEMANTIC_CLIPVERTEX:
150 return VARYING_SLOT_CLIP_VERTEX;
151 case TGSI_SEMANTIC_TEXCOORD:
152 assert(index < 8);
153 return VARYING_SLOT_TEX0 + index;
154 case TGSI_SEMANTIC_PCOORD:
155 return VARYING_SLOT_PNTC;
156 case TGSI_SEMANTIC_VIEWPORT_INDEX:
157 return VARYING_SLOT_VIEWPORT;
158 case TGSI_SEMANTIC_LAYER:
159 return VARYING_SLOT_LAYER;
160 case TGSI_SEMANTIC_TESSINNER:
161 return VARYING_SLOT_TESS_LEVEL_INNER;
162 case TGSI_SEMANTIC_TESSOUTER:
163 return VARYING_SLOT_TESS_LEVEL_OUTER;
164 default:
165 fprintf(stderr, "Bad TGSI semantic: %d/%d\n", semantic, index);
166 abort();
167 }
168 }
169
170 static enum gl_frag_depth_layout
171 ttn_get_depth_layout(unsigned tgsi_fs_depth_layout)
172 {
173 switch (tgsi_fs_depth_layout) {
174 case TGSI_FS_DEPTH_LAYOUT_NONE:
175 return FRAG_DEPTH_LAYOUT_NONE;
176 case TGSI_FS_DEPTH_LAYOUT_ANY:
177 return FRAG_DEPTH_LAYOUT_ANY;
178 case TGSI_FS_DEPTH_LAYOUT_GREATER:
179 return FRAG_DEPTH_LAYOUT_GREATER;
180 case TGSI_FS_DEPTH_LAYOUT_LESS:
181 return FRAG_DEPTH_LAYOUT_LESS;
182 case TGSI_FS_DEPTH_LAYOUT_UNCHANGED:
183 return FRAG_DEPTH_LAYOUT_UNCHANGED;
184 default:
185 unreachable("bad TGSI FS depth layout");
186 }
187 }
188
189 static nir_ssa_def *
190 ttn_src_for_dest(nir_builder *b, nir_alu_dest *dest)
191 {
192 nir_alu_src src;
193 memset(&src, 0, sizeof(src));
194
195 if (dest->dest.is_ssa)
196 src.src = nir_src_for_ssa(&dest->dest.ssa);
197 else {
198 assert(!dest->dest.reg.indirect);
199 src.src = nir_src_for_reg(dest->dest.reg.reg);
200 src.src.reg.base_offset = dest->dest.reg.base_offset;
201 }
202
203 for (int i = 0; i < 4; i++)
204 src.swizzle[i] = i;
205
206 return nir_mov_alu(b, src, 4);
207 }
208
209 static enum glsl_interp_mode
210 ttn_translate_interp_mode(unsigned tgsi_interp)
211 {
212 switch (tgsi_interp) {
213 case TGSI_INTERPOLATE_CONSTANT:
214 return INTERP_MODE_FLAT;
215 case TGSI_INTERPOLATE_LINEAR:
216 return INTERP_MODE_NOPERSPECTIVE;
217 case TGSI_INTERPOLATE_PERSPECTIVE:
218 return INTERP_MODE_SMOOTH;
219 case TGSI_INTERPOLATE_COLOR:
220 return INTERP_MODE_NONE;
221 default:
222 unreachable("bad TGSI interpolation mode");
223 }
224 }
225
226 static void
227 ttn_emit_declaration(struct ttn_compile *c)
228 {
229 nir_builder *b = &c->build;
230 struct tgsi_full_declaration *decl = &c->token->FullDeclaration;
231 unsigned array_size = decl->Range.Last - decl->Range.First + 1;
232 unsigned file = decl->Declaration.File;
233 unsigned i;
234
235 if (file == TGSI_FILE_TEMPORARY) {
236 if (decl->Declaration.Array) {
237 /* for arrays, we create variables instead of registers: */
238 nir_variable *var = rzalloc(b->shader, nir_variable);
239
240 var->type = glsl_array_type(glsl_vec4_type(), array_size, 0);
241 var->data.mode = nir_var_shader_temp;
242 var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID);
243
244 exec_list_push_tail(&b->shader->globals, &var->node);
245
246 for (i = 0; i < array_size; i++) {
247 /* point all the matching slots to the same var,
248 * with appropriate offset set, mostly just so
249 * we know what to do when tgsi does a non-indirect
250 * access
251 */
252 c->temp_regs[decl->Range.First + i].reg = NULL;
253 c->temp_regs[decl->Range.First + i].var = var;
254 c->temp_regs[decl->Range.First + i].offset = i;
255 }
256 } else {
257 for (i = 0; i < array_size; i++) {
258 nir_register *reg = nir_local_reg_create(b->impl);
259 reg->num_components = 4;
260 c->temp_regs[decl->Range.First + i].reg = reg;
261 c->temp_regs[decl->Range.First + i].var = NULL;
262 c->temp_regs[decl->Range.First + i].offset = 0;
263 }
264 }
265 } else if (file == TGSI_FILE_ADDRESS) {
266 c->addr_reg = nir_local_reg_create(b->impl);
267 c->addr_reg->num_components = 4;
268 } else if (file == TGSI_FILE_SYSTEM_VALUE) {
269 /* Nothing to record for system values. */
270 } else if (file == TGSI_FILE_BUFFER) {
271 /* Nothing to record for buffers. */
272 } else if (file == TGSI_FILE_IMAGE) {
273 /* Nothing to record for images. */
274 } else if (file == TGSI_FILE_SAMPLER) {
275 /* Nothing to record for samplers. */
276 } else if (file == TGSI_FILE_SAMPLER_VIEW) {
277 struct tgsi_declaration_sampler_view *sview = &decl->SamplerView;
278 nir_alu_type type;
279
280 assert((sview->ReturnTypeX == sview->ReturnTypeY) &&
281 (sview->ReturnTypeX == sview->ReturnTypeZ) &&
282 (sview->ReturnTypeX == sview->ReturnTypeW));
283
284 switch (sview->ReturnTypeX) {
285 case TGSI_RETURN_TYPE_SINT:
286 type = nir_type_int;
287 break;
288 case TGSI_RETURN_TYPE_UINT:
289 type = nir_type_uint;
290 break;
291 case TGSI_RETURN_TYPE_FLOAT:
292 default:
293 type = nir_type_float;
294 break;
295 }
296
297 for (i = 0; i < array_size; i++) {
298 c->samp_types[decl->Range.First + i] = type;
299 }
300 } else {
301 bool is_array = (array_size > 1);
302
303 assert(file == TGSI_FILE_INPUT ||
304 file == TGSI_FILE_OUTPUT ||
305 file == TGSI_FILE_CONSTANT);
306
307 /* nothing to do for UBOs: */
308 if ((file == TGSI_FILE_CONSTANT) && decl->Declaration.Dimension &&
309 decl->Dim.Index2D != 0) {
310 b->shader->info.num_ubos =
311 MAX2(b->shader->info.num_ubos, decl->Dim.Index2D);
312 return;
313 }
314
315 if ((file == TGSI_FILE_INPUT) || (file == TGSI_FILE_OUTPUT)) {
316 is_array = (is_array && decl->Declaration.Array &&
317 (decl->Array.ArrayID != 0));
318 }
319
320 for (i = 0; i < array_size; i++) {
321 unsigned idx = decl->Range.First + i;
322 nir_variable *var = rzalloc(b->shader, nir_variable);
323
324 var->data.driver_location = idx;
325
326 var->type = glsl_vec4_type();
327 if (is_array)
328 var->type = glsl_array_type(var->type, array_size, 0);
329
330 switch (file) {
331 case TGSI_FILE_INPUT:
332 var->data.read_only = true;
333 var->data.mode = nir_var_shader_in;
334 var->name = ralloc_asprintf(var, "in_%d", idx);
335
336 if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
337 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
338 var->type = glsl_bool_type();
339 if (c->cap_face_is_sysval) {
340 var->data.mode = nir_var_system_value;
341 var->data.location = SYSTEM_VALUE_FRONT_FACE;
342 } else {
343 var->data.location = VARYING_SLOT_FACE;
344 }
345 c->input_var_face = var;
346 } else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
347 if (c->cap_position_is_sysval) {
348 var->data.mode = nir_var_system_value;
349 var->data.location = SYSTEM_VALUE_FRAG_COORD;
350 } else {
351 var->data.location = VARYING_SLOT_POS;
352 }
353 c->input_var_position = var;
354 } else if (decl->Semantic.Name == TGSI_SEMANTIC_PCOORD) {
355 if (c->cap_point_is_sysval) {
356 var->data.mode = nir_var_system_value;
357 var->data.location = SYSTEM_VALUE_POINT_COORD;
358 } else {
359 var->data.location = VARYING_SLOT_PNTC;
360 }
361 c->input_var_point = var;
362 } else {
363 var->data.location =
364 tgsi_varying_semantic_to_slot(decl->Semantic.Name,
365 decl->Semantic.Index);
366 }
367 } else {
368 assert(!decl->Declaration.Semantic);
369 var->data.location = VERT_ATTRIB_GENERIC0 + idx;
370 }
371 var->data.index = 0;
372 var->data.interpolation =
373 ttn_translate_interp_mode(decl->Interp.Interpolate);
374
375 exec_list_push_tail(&b->shader->inputs, &var->node);
376 c->inputs[idx] = var;
377
378 for (int i = 0; i < array_size; i++)
379 b->shader->info.inputs_read |= 1 << (var->data.location + i);
380
381 break;
382 case TGSI_FILE_OUTPUT: {
383 int semantic_name = decl->Semantic.Name;
384 int semantic_index = decl->Semantic.Index;
385 /* Since we can't load from outputs in the IR, we make temporaries
386 * for the outputs and emit stores to the real outputs at the end of
387 * the shader.
388 */
389 nir_register *reg = nir_local_reg_create(b->impl);
390 reg->num_components = 4;
391 if (is_array)
392 reg->num_array_elems = array_size;
393
394 var->data.mode = nir_var_shader_out;
395 var->name = ralloc_asprintf(var, "out_%d", idx);
396 var->data.index = 0;
397 var->data.interpolation =
398 ttn_translate_interp_mode(decl->Interp.Interpolate);
399 var->data.patch = semantic_name == TGSI_SEMANTIC_TESSINNER ||
400 semantic_name == TGSI_SEMANTIC_TESSOUTER ||
401 semantic_name == TGSI_SEMANTIC_PATCH;
402
403 if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
404 switch (semantic_name) {
405 case TGSI_SEMANTIC_COLOR: {
406 /* TODO tgsi loses some information, so we cannot
407 * actually differentiate here between DSB and MRT
408 * at this point. But so far no drivers using tgsi-
409 * to-nir support dual source blend:
410 */
411 bool dual_src_blend = false;
412 if (dual_src_blend && (semantic_index == 1)) {
413 var->data.location = FRAG_RESULT_DATA0;
414 var->data.index = 1;
415 } else {
416 if (c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
417 var->data.location = FRAG_RESULT_COLOR;
418 else
419 var->data.location = FRAG_RESULT_DATA0 + semantic_index;
420 }
421 break;
422 }
423 case TGSI_SEMANTIC_POSITION:
424 var->data.location = FRAG_RESULT_DEPTH;
425 var->type = glsl_float_type();
426 break;
427 case TGSI_SEMANTIC_STENCIL:
428 var->data.location = FRAG_RESULT_STENCIL;
429 var->type = glsl_int_type();
430 break;
431 default:
432 fprintf(stderr, "Bad TGSI semantic: %d/%d\n",
433 decl->Semantic.Name, decl->Semantic.Index);
434 abort();
435 }
436 } else {
437 var->data.location =
438 tgsi_varying_semantic_to_slot(semantic_name, semantic_index);
439 if (var->data.location == VARYING_SLOT_FOGC ||
440 var->data.location == VARYING_SLOT_PSIZ) {
441 var->type = glsl_float_type();
442 }
443 }
444
445 if (is_array) {
446 unsigned j;
447 for (j = 0; j < array_size; j++) {
448 c->output_regs[idx + j].offset = i + j;
449 c->output_regs[idx + j].reg = reg;
450 }
451 } else {
452 c->output_regs[idx].offset = i;
453 c->output_regs[idx].reg = reg;
454 }
455
456 exec_list_push_tail(&b->shader->outputs, &var->node);
457 c->outputs[idx] = var;
458
459 for (int i = 0; i < array_size; i++)
460 b->shader->info.outputs_written |= 1ull << (var->data.location + i);
461 }
462 break;
463 case TGSI_FILE_CONSTANT:
464 var->data.mode = nir_var_uniform;
465 var->name = ralloc_asprintf(var, "uniform_%d", idx);
466 var->data.location = idx;
467
468 exec_list_push_tail(&b->shader->uniforms, &var->node);
469 break;
470 default:
471 unreachable("bad declaration file");
472 return;
473 }
474
475 if (is_array)
476 break;
477 }
478
479 }
480 }
481
482 static void
483 ttn_emit_immediate(struct ttn_compile *c)
484 {
485 nir_builder *b = &c->build;
486 struct tgsi_full_immediate *tgsi_imm = &c->token->FullImmediate;
487 nir_load_const_instr *load_const;
488 int i;
489
490 load_const = nir_load_const_instr_create(b->shader, 4, 32);
491 c->imm_defs[c->next_imm] = &load_const->def;
492 c->next_imm++;
493
494 for (i = 0; i < load_const->def.num_components; i++)
495 load_const->value[i].u32 = tgsi_imm->u[i].Uint;
496
497 nir_builder_instr_insert(b, &load_const->instr);
498 }
499
500 static nir_ssa_def *
501 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect);
502
503 /* generate either a constant or indirect deref chain for accessing an
504 * array variable.
505 */
506 static nir_deref_instr *
507 ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset,
508 struct tgsi_ind_register *indirect)
509 {
510 nir_deref_instr *deref = nir_build_deref_var(&c->build, var);
511 nir_ssa_def *index = nir_imm_int(&c->build, offset);
512 if (indirect)
513 index = nir_iadd(&c->build, index, ttn_src_for_indirect(c, indirect));
514 return nir_build_deref_array(&c->build, deref, index);
515 }
516
517 /* Special case: Turn the frontface varying into a load of the
518 * frontface variable, and create the vector as required by TGSI.
519 */
520 static nir_ssa_def *
521 ttn_emulate_tgsi_front_face(struct ttn_compile *c)
522 {
523 nir_ssa_def *tgsi_frontface[4];
524
525 if (c->cap_face_is_sysval) {
526 /* When it's a system value, it should be an integer vector: (F, 0, 0, 1)
527 * F is 0xffffffff if front-facing, 0 if not.
528 */
529
530 nir_ssa_def *frontface = nir_load_front_face(&c->build, 1);
531
532 tgsi_frontface[0] = nir_bcsel(&c->build,
533 frontface,
534 nir_imm_int(&c->build, 0xffffffff),
535 nir_imm_int(&c->build, 0));
536 tgsi_frontface[1] = nir_imm_int(&c->build, 0);
537 tgsi_frontface[2] = nir_imm_int(&c->build, 0);
538 tgsi_frontface[3] = nir_imm_int(&c->build, 1);
539 } else {
540 /* When it's an input, it should be a float vector: (F, 0.0, 0.0, 1.0)
541 * F is positive if front-facing, negative if not.
542 */
543
544 assert(c->input_var_face);
545 nir_ssa_def *frontface = nir_load_var(&c->build, c->input_var_face);
546
547 tgsi_frontface[0] = nir_bcsel(&c->build,
548 frontface,
549 nir_imm_float(&c->build, 1.0),
550 nir_imm_float(&c->build, -1.0));
551 tgsi_frontface[1] = nir_imm_float(&c->build, 0.0);
552 tgsi_frontface[2] = nir_imm_float(&c->build, 0.0);
553 tgsi_frontface[3] = nir_imm_float(&c->build, 1.0);
554 }
555
556 return nir_vec(&c->build, tgsi_frontface, 4);
557 }
558
559 static nir_src
560 ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
561 struct tgsi_ind_register *indirect,
562 struct tgsi_dimension *dim,
563 struct tgsi_ind_register *dimind,
564 bool src_is_float)
565 {
566 nir_builder *b = &c->build;
567 nir_src src;
568
569 memset(&src, 0, sizeof(src));
570
571 switch (file) {
572 case TGSI_FILE_TEMPORARY:
573 if (c->temp_regs[index].var) {
574 unsigned offset = c->temp_regs[index].offset;
575 nir_variable *var = c->temp_regs[index].var;
576 nir_ssa_def *load = nir_load_deref(&c->build,
577 ttn_array_deref(c, var, offset, indirect));
578
579 src = nir_src_for_ssa(load);
580 } else {
581 assert(!indirect);
582 src.reg.reg = c->temp_regs[index].reg;
583 }
584 assert(!dim);
585 break;
586
587 case TGSI_FILE_ADDRESS:
588 src.reg.reg = c->addr_reg;
589 assert(!dim);
590 break;
591
592 case TGSI_FILE_IMMEDIATE:
593 src = nir_src_for_ssa(c->imm_defs[index]);
594 assert(!indirect);
595 assert(!dim);
596 break;
597
598 case TGSI_FILE_SYSTEM_VALUE: {
599 nir_intrinsic_op op;
600 nir_ssa_def *load;
601
602 assert(!indirect);
603 assert(!dim);
604
605 switch (c->scan->system_value_semantic_name[index]) {
606 case TGSI_SEMANTIC_VERTEXID_NOBASE:
607 op = nir_intrinsic_load_vertex_id_zero_base;
608 load = nir_load_vertex_id_zero_base(b);
609 break;
610 case TGSI_SEMANTIC_VERTEXID:
611 op = nir_intrinsic_load_vertex_id;
612 load = nir_load_vertex_id(b);
613 break;
614 case TGSI_SEMANTIC_BASEVERTEX:
615 op = nir_intrinsic_load_base_vertex;
616 load = nir_load_base_vertex(b);
617 break;
618 case TGSI_SEMANTIC_INSTANCEID:
619 op = nir_intrinsic_load_instance_id;
620 load = nir_load_instance_id(b);
621 break;
622 case TGSI_SEMANTIC_FACE:
623 assert(c->cap_face_is_sysval);
624 op = nir_intrinsic_load_front_face;
625 load = ttn_emulate_tgsi_front_face(c);
626 break;
627 case TGSI_SEMANTIC_POSITION:
628 assert(c->cap_position_is_sysval);
629 op = nir_intrinsic_load_frag_coord;
630 load = nir_load_frag_coord(b);
631 break;
632 case TGSI_SEMANTIC_PCOORD:
633 assert(c->cap_point_is_sysval);
634 op = nir_intrinsic_load_point_coord;
635 load = nir_load_point_coord(b);
636 break;
637 case TGSI_SEMANTIC_THREAD_ID:
638 op = nir_intrinsic_load_local_invocation_id;
639 load = nir_load_local_invocation_id(b);
640 break;
641 case TGSI_SEMANTIC_BLOCK_ID:
642 op = nir_intrinsic_load_work_group_id;
643 load = nir_load_work_group_id(b);
644 break;
645 case TGSI_SEMANTIC_CS_USER_DATA_AMD:
646 op = nir_intrinsic_load_user_data_amd;
647 load = nir_load_user_data_amd(b);
648 break;
649 case TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL:
650 op = nir_intrinsic_load_tess_level_inner_default;
651 load = nir_load_tess_level_inner_default(b);
652 break;
653 case TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL:
654 op = nir_intrinsic_load_tess_level_outer_default;
655 load = nir_load_tess_level_outer_default(b);
656 break;
657 default:
658 unreachable("bad system value");
659 }
660
661 if (load->num_components == 2)
662 load = nir_swizzle(b, load, SWIZ(X, Y, Y, Y), 4);
663 else if (load->num_components == 3)
664 load = nir_swizzle(b, load, SWIZ(X, Y, Z, Z), 4);
665
666 src = nir_src_for_ssa(load);
667 b->shader->info.system_values_read |=
668 (1ull << nir_system_value_from_intrinsic(op));
669
670 break;
671 }
672
673 case TGSI_FILE_INPUT:
674 if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
675 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_FACE) {
676 assert(!c->cap_face_is_sysval && c->input_var_face);
677 return nir_src_for_ssa(ttn_emulate_tgsi_front_face(c));
678 } else if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
679 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_POSITION) {
680 assert(!c->cap_position_is_sysval && c->input_var_position);
681 return nir_src_for_ssa(nir_load_var(&c->build, c->input_var_position));
682 } else if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
683 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_PCOORD) {
684 assert(!c->cap_point_is_sysval && c->input_var_point);
685 return nir_src_for_ssa(nir_load_var(&c->build, c->input_var_point));
686 } else {
687 /* Indirection on input arrays isn't supported by TTN. */
688 assert(!dim);
689 nir_deref_instr *deref = nir_build_deref_var(&c->build,
690 c->inputs[index]);
691 return nir_src_for_ssa(nir_load_deref(&c->build, deref));
692 }
693 break;
694
695 case TGSI_FILE_CONSTANT: {
696 nir_intrinsic_instr *load;
697 nir_intrinsic_op op;
698 unsigned srcn = 0;
699
700 if (dim && (dim->Index > 0 || dim->Indirect)) {
701 op = nir_intrinsic_load_ubo;
702 } else {
703 op = nir_intrinsic_load_uniform;
704 }
705
706 load = nir_intrinsic_instr_create(b->shader, op);
707 if (op == nir_intrinsic_load_uniform) {
708 nir_intrinsic_set_type(load, src_is_float ? nir_type_float :
709 nir_type_int);
710 }
711
712 load->num_components = 4;
713 if (dim && (dim->Index > 0 || dim->Indirect)) {
714 if (dimind) {
715 load->src[srcn] =
716 ttn_src_for_file_and_index(c, dimind->File, dimind->Index,
717 NULL, NULL, NULL, false);
718 } else {
719 /* UBOs start at index 1 in TGSI: */
720 load->src[srcn] =
721 nir_src_for_ssa(nir_imm_int(b, dim->Index - 1));
722 }
723 srcn++;
724 }
725
726 nir_ssa_def *offset;
727 if (op == nir_intrinsic_load_ubo) {
728 /* UBO loads don't have a base offset. */
729 offset = nir_imm_int(b, index);
730 if (indirect) {
731 offset = nir_iadd(b, offset, ttn_src_for_indirect(c, indirect));
732 }
733 /* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
734 offset = nir_ishl(b, offset, nir_imm_int(b, 4));
735 } else {
736 nir_intrinsic_set_base(load, index);
737 if (indirect) {
738 offset = ttn_src_for_indirect(c, indirect);
739 } else {
740 offset = nir_imm_int(b, 0);
741 }
742 }
743 load->src[srcn++] = nir_src_for_ssa(offset);
744
745 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
746 nir_builder_instr_insert(b, &load->instr);
747
748 src = nir_src_for_ssa(&load->dest.ssa);
749 break;
750 }
751
752 default:
753 unreachable("bad src file");
754 }
755
756
757 return src;
758 }
759
760 static nir_ssa_def *
761 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect)
762 {
763 nir_builder *b = &c->build;
764 nir_alu_src src;
765 memset(&src, 0, sizeof(src));
766 for (int i = 0; i < 4; i++)
767 src.swizzle[i] = indirect->Swizzle;
768 src.src = ttn_src_for_file_and_index(c,
769 indirect->File,
770 indirect->Index,
771 NULL, NULL, NULL,
772 false);
773 return nir_mov_alu(b, src, 1);
774 }
775
776 static nir_alu_dest
777 ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
778 {
779 struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
780 nir_alu_dest dest;
781 unsigned index = tgsi_dst->Index;
782
783 memset(&dest, 0, sizeof(dest));
784
785 if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
786 if (c->temp_regs[index].var) {
787 nir_register *reg;
788
789 /* this works, because TGSI will give us a base offset
790 * (in case of indirect index) that points back into
791 * the array. Access can be direct or indirect, we
792 * don't really care. Just create a one-shot dst reg
793 * that will get store_var'd back into the array var
794 * at the end of ttn_emit_instruction()
795 */
796 reg = nir_local_reg_create(c->build.impl);
797 reg->num_components = 4;
798 dest.dest.reg.reg = reg;
799 dest.dest.reg.base_offset = 0;
800 } else {
801 assert(!tgsi_dst->Indirect);
802 dest.dest.reg.reg = c->temp_regs[index].reg;
803 dest.dest.reg.base_offset = c->temp_regs[index].offset;
804 }
805 } else if (tgsi_dst->File == TGSI_FILE_OUTPUT) {
806 dest.dest.reg.reg = c->output_regs[index].reg;
807 dest.dest.reg.base_offset = c->output_regs[index].offset;
808 } else if (tgsi_dst->File == TGSI_FILE_ADDRESS) {
809 assert(index == 0);
810 dest.dest.reg.reg = c->addr_reg;
811 }
812
813 dest.write_mask = tgsi_dst->WriteMask;
814 dest.saturate = false;
815
816 if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) {
817 nir_src *indirect = ralloc(c->build.shader, nir_src);
818 *indirect = nir_src_for_ssa(ttn_src_for_indirect(c, &tgsi_fdst->Indirect));
819 dest.dest.reg.indirect = indirect;
820 }
821
822 return dest;
823 }
824
825 static nir_variable *
826 ttn_get_var(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
827 {
828 struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
829 unsigned index = tgsi_dst->Index;
830
831 if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
832 /* we should not have an indirect when there is no var! */
833 if (!c->temp_regs[index].var)
834 assert(!tgsi_dst->Indirect);
835 return c->temp_regs[index].var;
836 }
837
838 return NULL;
839 }
840
841 static nir_ssa_def *
842 ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc,
843 int src_idx)
844 {
845 nir_builder *b = &c->build;
846 struct tgsi_src_register *tgsi_src = &tgsi_fsrc->Register;
847 enum tgsi_opcode opcode = c->token->FullInstruction.Instruction.Opcode;
848 unsigned tgsi_src_type = tgsi_opcode_infer_src_type(opcode, src_idx);
849 bool src_is_float = (tgsi_src_type == TGSI_TYPE_FLOAT ||
850 tgsi_src_type == TGSI_TYPE_DOUBLE ||
851 tgsi_src_type == TGSI_TYPE_UNTYPED);
852 nir_alu_src src;
853
854 memset(&src, 0, sizeof(src));
855
856 if (tgsi_src->File == TGSI_FILE_NULL) {
857 return nir_imm_float(b, 0.0);
858 } else if (tgsi_src->File == TGSI_FILE_SAMPLER ||
859 tgsi_src->File == TGSI_FILE_IMAGE ||
860 tgsi_src->File == TGSI_FILE_BUFFER) {
861 /* Only the index of the resource gets used in texturing, and it will
862 * handle looking that up on its own instead of using the nir_alu_src.
863 */
864 assert(!tgsi_src->Indirect);
865 return NULL;
866 } else {
867 struct tgsi_ind_register *ind = NULL;
868 struct tgsi_dimension *dim = NULL;
869 struct tgsi_ind_register *dimind = NULL;
870 if (tgsi_src->Indirect)
871 ind = &tgsi_fsrc->Indirect;
872 if (tgsi_src->Dimension) {
873 dim = &tgsi_fsrc->Dimension;
874 if (dim->Indirect)
875 dimind = &tgsi_fsrc->DimIndirect;
876 }
877 src.src = ttn_src_for_file_and_index(c,
878 tgsi_src->File,
879 tgsi_src->Index,
880 ind, dim, dimind,
881 src_is_float);
882 }
883
884 src.swizzle[0] = tgsi_src->SwizzleX;
885 src.swizzle[1] = tgsi_src->SwizzleY;
886 src.swizzle[2] = tgsi_src->SwizzleZ;
887 src.swizzle[3] = tgsi_src->SwizzleW;
888
889 nir_ssa_def *def = nir_mov_alu(b, src, 4);
890
891 if (tgsi_type_is_64bit(tgsi_src_type))
892 def = nir_bitcast_vector(b, def, 64);
893
894 if (tgsi_src->Absolute) {
895 if (src_is_float)
896 def = nir_fabs(b, def);
897 else
898 def = nir_iabs(b, def);
899 }
900
901 if (tgsi_src->Negate) {
902 if (src_is_float)
903 def = nir_fneg(b, def);
904 else
905 def = nir_ineg(b, def);
906 }
907
908 return def;
909 }
910
911 static void
912 ttn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
913 nir_ssa_def *def, unsigned write_mask)
914 {
915 if (!(dest.write_mask & write_mask))
916 return;
917
918 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
919 mov->dest = dest;
920 mov->dest.write_mask &= write_mask;
921 mov->src[0].src = nir_src_for_ssa(def);
922 for (unsigned i = def->num_components; i < 4; i++)
923 mov->src[0].swizzle[i] = def->num_components - 1;
924 nir_builder_instr_insert(b, &mov->instr);
925 }
926
927 static void
928 ttn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
929 {
930 ttn_move_dest_masked(b, dest, def, TGSI_WRITEMASK_XYZW);
931 }
932
933 static void
934 ttn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, unsigned dest_bitsize,
935 nir_ssa_def **src)
936 {
937 nir_ssa_def *def = nir_build_alu_src_arr(b, op, src);
938 if (def->bit_size == 1)
939 def = nir_ineg(b, nir_b2i(b, def, dest_bitsize));
940 assert(def->bit_size == dest_bitsize);
941 if (dest_bitsize == 64) {
942 if (def->num_components > 2) {
943 /* 32 -> 64 bit conversion ops are supposed to only convert the first
944 * two components, and we need to truncate here to avoid creating a
945 * vec8 after bitcasting the destination.
946 */
947 def = nir_channels(b, def, 0x3);
948 }
949 def = nir_bitcast_vector(b, def, 32);
950 }
951 ttn_move_dest(b, dest, def);
952 }
953
954 static void
955 ttn_arl(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
956 {
957 ttn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
958 }
959
960 /* EXP - Approximate Exponential Base 2
961 * dst.x = 2^{\lfloor src.x\rfloor}
962 * dst.y = src.x - \lfloor src.x\rfloor
963 * dst.z = 2^{src.x}
964 * dst.w = 1.0
965 */
966 static void
967 ttn_exp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
968 {
969 nir_ssa_def *srcx = ttn_channel(b, src[0], X);
970
971 ttn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)),
972 TGSI_WRITEMASK_X);
973 ttn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)),
974 TGSI_WRITEMASK_Y);
975 ttn_move_dest_masked(b, dest, nir_fexp2(b, srcx), TGSI_WRITEMASK_Z);
976 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
977 }
978
979 /* LOG - Approximate Logarithm Base 2
980 * dst.x = \lfloor\log_2{|src.x|}\rfloor
981 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
982 * dst.z = \log_2{|src.x|}
983 * dst.w = 1.0
984 */
985 static void
986 ttn_log(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
987 {
988 nir_ssa_def *abs_srcx = nir_fabs(b, ttn_channel(b, src[0], X));
989 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
990
991 ttn_move_dest_masked(b, dest, nir_ffloor(b, log2), TGSI_WRITEMASK_X);
992 ttn_move_dest_masked(b, dest,
993 nir_fdiv(b, abs_srcx, nir_fexp2(b, nir_ffloor(b, log2))),
994 TGSI_WRITEMASK_Y);
995 ttn_move_dest_masked(b, dest, nir_flog2(b, abs_srcx), TGSI_WRITEMASK_Z);
996 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
997 }
998
999 /* DST - Distance Vector
1000 * dst.x = 1.0
1001 * dst.y = src0.y \times src1.y
1002 * dst.z = src0.z
1003 * dst.w = src1.w
1004 */
1005 static void
1006 ttn_dst(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1007 {
1008 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_X);
1009 ttn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), TGSI_WRITEMASK_Y);
1010 ttn_move_dest_masked(b, dest, nir_mov(b, src[0]), TGSI_WRITEMASK_Z);
1011 ttn_move_dest_masked(b, dest, nir_mov(b, src[1]), TGSI_WRITEMASK_W);
1012 }
1013
1014 /* LIT - Light Coefficients
1015 * dst.x = 1.0
1016 * dst.y = max(src.x, 0.0)
1017 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
1018 * dst.w = 1.0
1019 */
1020 static void
1021 ttn_lit(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1022 {
1023 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_XW);
1024
1025 ttn_move_dest_masked(b, dest, nir_fmax(b, ttn_channel(b, src[0], X),
1026 nir_imm_float(b, 0.0)), TGSI_WRITEMASK_Y);
1027
1028 if (dest.write_mask & TGSI_WRITEMASK_Z) {
1029 nir_ssa_def *src0_y = ttn_channel(b, src[0], Y);
1030 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ttn_channel(b, src[0], W),
1031 nir_imm_float(b, 128.0)),
1032 nir_imm_float(b, -128.0));
1033 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
1034 wclamp);
1035
1036 ttn_move_dest_masked(b, dest,
1037 nir_bcsel(b,
1038 nir_flt(b,
1039 ttn_channel(b, src[0], X),
1040 nir_imm_float(b, 0.0)),
1041 nir_imm_float(b, 0.0),
1042 pow),
1043 TGSI_WRITEMASK_Z);
1044 }
1045 }
1046
1047 static void
1048 ttn_sle(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1049 {
1050 ttn_move_dest(b, dest, nir_sge(b, src[1], src[0]));
1051 }
1052
1053 static void
1054 ttn_sgt(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1055 {
1056 ttn_move_dest(b, dest, nir_slt(b, src[1], src[0]));
1057 }
1058
1059 static void
1060 ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1061 {
1062 ttn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
1063 }
1064
1065 static void
1066 ttn_dp3(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1067 {
1068 ttn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
1069 }
1070
1071 static void
1072 ttn_dp4(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1073 {
1074 ttn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
1075 }
1076
1077 static void
1078 ttn_umad(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1079 {
1080 ttn_move_dest(b, dest, nir_iadd(b, nir_imul(b, src[0], src[1]), src[2]));
1081 }
1082
1083 static void
1084 ttn_arr(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1085 {
1086 ttn_move_dest(b, dest, nir_f2i32(b, nir_fround_even(b, src[0])));
1087 }
1088
1089 static void
1090 ttn_cmp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1091 {
1092 ttn_move_dest(b, dest, nir_bcsel(b,
1093 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
1094 src[1], src[2]));
1095 }
1096
1097 static void
1098 ttn_ucmp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1099 {
1100 ttn_move_dest(b, dest, nir_bcsel(b,
1101 nir_ine(b, src[0], nir_imm_int(b, 0)),
1102 src[1], src[2]));
1103 }
1104
1105 static void
1106 ttn_kill(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1107 {
1108 nir_intrinsic_instr *discard =
1109 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard);
1110 nir_builder_instr_insert(b, &discard->instr);
1111 b->shader->info.fs.uses_discard = true;
1112 }
1113
1114 static void
1115 ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1116 {
1117 /* flt must be exact, because NaN shouldn't discard. (apps rely on this) */
1118 b->exact = true;
1119 nir_ssa_def *cmp = nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)));
1120 b->exact = false;
1121
1122 nir_intrinsic_instr *discard =
1123 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
1124 discard->src[0] = nir_src_for_ssa(cmp);
1125 nir_builder_instr_insert(b, &discard->instr);
1126 b->shader->info.fs.uses_discard = true;
1127 }
1128
1129 static void
1130 ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
1131 {
1132 nir_builder *b = &c->build;
1133 nir_ssa_def *src_x = ttn_channel(b, src, X);
1134
1135 nir_if *if_stmt = nir_if_create(b->shader);
1136 if (is_uint) {
1137 /* equivalent to TGSI UIF, src is interpreted as integer */
1138 if_stmt->condition = nir_src_for_ssa(nir_ine(b, src_x, nir_imm_int(b, 0)));
1139 } else {
1140 /* equivalent to TGSI IF, src is interpreted as float */
1141 if_stmt->condition = nir_src_for_ssa(nir_fne(b, src_x, nir_imm_float(b, 0.0)));
1142 }
1143 nir_builder_cf_insert(b, &if_stmt->cf_node);
1144
1145 c->if_stack[c->if_stack_pos] = nir_after_cf_node(&if_stmt->cf_node);
1146 c->if_stack_pos++;
1147
1148 b->cursor = nir_after_cf_list(&if_stmt->then_list);
1149
1150 c->if_stack[c->if_stack_pos] = nir_after_cf_list(&if_stmt->else_list);
1151 c->if_stack_pos++;
1152 }
1153
1154 static void
1155 ttn_else(struct ttn_compile *c)
1156 {
1157 nir_builder *b = &c->build;
1158
1159 b->cursor = c->if_stack[c->if_stack_pos - 1];
1160 }
1161
1162 static void
1163 ttn_endif(struct ttn_compile *c)
1164 {
1165 nir_builder *b = &c->build;
1166
1167 c->if_stack_pos -= 2;
1168 b->cursor = c->if_stack[c->if_stack_pos];
1169 }
1170
1171 static void
1172 ttn_bgnloop(struct ttn_compile *c)
1173 {
1174 nir_builder *b = &c->build;
1175
1176 nir_loop *loop = nir_loop_create(b->shader);
1177 nir_builder_cf_insert(b, &loop->cf_node);
1178
1179 c->loop_stack[c->loop_stack_pos] = nir_after_cf_node(&loop->cf_node);
1180 c->loop_stack_pos++;
1181
1182 b->cursor = nir_after_cf_list(&loop->body);
1183 }
1184
1185 static void
1186 ttn_cont(nir_builder *b)
1187 {
1188 nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_continue);
1189 nir_builder_instr_insert(b, &instr->instr);
1190 }
1191
1192 static void
1193 ttn_brk(nir_builder *b)
1194 {
1195 nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_break);
1196 nir_builder_instr_insert(b, &instr->instr);
1197 }
1198
1199 static void
1200 ttn_endloop(struct ttn_compile *c)
1201 {
1202 nir_builder *b = &c->build;
1203
1204 c->loop_stack_pos--;
1205 b->cursor = c->loop_stack[c->loop_stack_pos];
1206 }
1207
1208 static void
1209 get_texture_info(unsigned texture,
1210 enum glsl_sampler_dim *dim,
1211 bool *is_shadow,
1212 bool *is_array)
1213 {
1214 assert(is_array);
1215 *is_array = false;
1216
1217 if (is_shadow)
1218 *is_shadow = false;
1219
1220 switch (texture) {
1221 case TGSI_TEXTURE_BUFFER:
1222 *dim = GLSL_SAMPLER_DIM_BUF;
1223 break;
1224 case TGSI_TEXTURE_1D:
1225 *dim = GLSL_SAMPLER_DIM_1D;
1226 break;
1227 case TGSI_TEXTURE_1D_ARRAY:
1228 *dim = GLSL_SAMPLER_DIM_1D;
1229 *is_array = true;
1230 break;
1231 case TGSI_TEXTURE_SHADOW1D:
1232 *dim = GLSL_SAMPLER_DIM_1D;
1233 *is_shadow = true;
1234 break;
1235 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1236 *dim = GLSL_SAMPLER_DIM_1D;
1237 *is_shadow = true;
1238 *is_array = true;
1239 break;
1240 case TGSI_TEXTURE_2D:
1241 *dim = GLSL_SAMPLER_DIM_2D;
1242 break;
1243 case TGSI_TEXTURE_2D_ARRAY:
1244 *dim = GLSL_SAMPLER_DIM_2D;
1245 *is_array = true;
1246 break;
1247 case TGSI_TEXTURE_2D_MSAA:
1248 *dim = GLSL_SAMPLER_DIM_MS;
1249 break;
1250 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1251 *dim = GLSL_SAMPLER_DIM_MS;
1252 *is_array = true;
1253 break;
1254 case TGSI_TEXTURE_SHADOW2D:
1255 *dim = GLSL_SAMPLER_DIM_2D;
1256 *is_shadow = true;
1257 break;
1258 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1259 *dim = GLSL_SAMPLER_DIM_2D;
1260 *is_shadow = true;
1261 *is_array = true;
1262 break;
1263 case TGSI_TEXTURE_3D:
1264 *dim = GLSL_SAMPLER_DIM_3D;
1265 break;
1266 case TGSI_TEXTURE_CUBE:
1267 *dim = GLSL_SAMPLER_DIM_CUBE;
1268 break;
1269 case TGSI_TEXTURE_CUBE_ARRAY:
1270 *dim = GLSL_SAMPLER_DIM_CUBE;
1271 *is_array = true;
1272 break;
1273 case TGSI_TEXTURE_SHADOWCUBE:
1274 *dim = GLSL_SAMPLER_DIM_CUBE;
1275 *is_shadow = true;
1276 break;
1277 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1278 *dim = GLSL_SAMPLER_DIM_CUBE;
1279 *is_shadow = true;
1280 *is_array = true;
1281 break;
1282 case TGSI_TEXTURE_RECT:
1283 *dim = GLSL_SAMPLER_DIM_RECT;
1284 break;
1285 case TGSI_TEXTURE_SHADOWRECT:
1286 *dim = GLSL_SAMPLER_DIM_RECT;
1287 *is_shadow = true;
1288 break;
1289 default:
1290 fprintf(stderr, "Unknown TGSI texture target %d\n", texture);
1291 abort();
1292 }
1293 }
1294
1295 static enum glsl_base_type
1296 base_type_for_alu_type(nir_alu_type type)
1297 {
1298 type = nir_alu_type_get_base_type(type);
1299
1300 switch (type) {
1301 case nir_type_float:
1302 return GLSL_TYPE_FLOAT;
1303 case nir_type_int:
1304 return GLSL_TYPE_INT;
1305 case nir_type_uint:
1306 return GLSL_TYPE_UINT;
1307 default:
1308 unreachable("invalid type");
1309 }
1310 }
1311
1312 static nir_variable *
1313 get_sampler_var(struct ttn_compile *c, int binding,
1314 enum glsl_sampler_dim dim,
1315 bool is_shadow,
1316 bool is_array,
1317 enum glsl_base_type base_type,
1318 nir_texop op)
1319 {
1320 nir_variable *var = c->samplers[binding];
1321 if (!var) {
1322 const struct glsl_type *type =
1323 glsl_sampler_type(dim, is_shadow, is_array, base_type);
1324 var = nir_variable_create(c->build.shader, nir_var_uniform, type,
1325 "sampler");
1326 var->data.binding = binding;
1327 var->data.explicit_binding = true;
1328 c->samplers[binding] = var;
1329
1330 /* Record textures used */
1331 unsigned mask = 1 << binding;
1332 c->build.shader->info.textures_used |= mask;
1333 if (op == nir_texop_txf ||
1334 op == nir_texop_txf_ms ||
1335 op == nir_texop_txf_ms_mcs)
1336 c->build.shader->info.textures_used_by_txf |= mask;
1337 }
1338
1339 return var;
1340 }
1341
1342 static nir_variable *
1343 get_image_var(struct ttn_compile *c, int binding,
1344 enum glsl_sampler_dim dim,
1345 bool is_array,
1346 enum glsl_base_type base_type,
1347 enum gl_access_qualifier access,
1348 GLenum format)
1349 {
1350 nir_variable *var = c->images[binding];
1351
1352 if (!var) {
1353 const struct glsl_type *type = glsl_image_type(dim, is_array, base_type);
1354
1355 var = nir_variable_create(c->build.shader, nir_var_uniform, type, "image");
1356 var->data.binding = binding;
1357 var->data.explicit_binding = true;
1358 var->data.access = access;
1359 var->data.image.format = format;
1360 c->images[binding] = var;
1361 }
1362
1363 return var;
1364 }
1365
1366 static void
1367 add_ssbo_var(struct ttn_compile *c, int binding)
1368 {
1369 nir_variable *var = c->ssbo[binding];
1370
1371 if (!var) {
1372 /* A length of 0 is used to denote unsized arrays */
1373 const struct glsl_type *type = glsl_array_type(glsl_uint_type(), 0, 0);
1374
1375 struct glsl_struct_field field = {
1376 .type = type,
1377 .name = "data",
1378 .location = -1,
1379 };
1380
1381 var = nir_variable_create(c->build.shader, nir_var_mem_ssbo, type, "ssbo");
1382 var->data.binding = binding;
1383 var->interface_type =
1384 glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430,
1385 false, "data");
1386 c->ssbo[binding] = var;
1387 }
1388 }
1389
1390 static void
1391 ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
1392 {
1393 nir_builder *b = &c->build;
1394 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1395 nir_tex_instr *instr;
1396 nir_texop op;
1397 unsigned num_srcs, samp = 1, sview, i;
1398
1399 switch (tgsi_inst->Instruction.Opcode) {
1400 case TGSI_OPCODE_TEX:
1401 op = nir_texop_tex;
1402 num_srcs = 1;
1403 break;
1404 case TGSI_OPCODE_TEX2:
1405 op = nir_texop_tex;
1406 num_srcs = 1;
1407 samp = 2;
1408 break;
1409 case TGSI_OPCODE_TXP:
1410 op = nir_texop_tex;
1411 num_srcs = 2;
1412 break;
1413 case TGSI_OPCODE_TXB:
1414 op = nir_texop_txb;
1415 num_srcs = 2;
1416 break;
1417 case TGSI_OPCODE_TXB2:
1418 op = nir_texop_txb;
1419 num_srcs = 2;
1420 samp = 2;
1421 break;
1422 case TGSI_OPCODE_TXL:
1423 case TGSI_OPCODE_TEX_LZ:
1424 op = nir_texop_txl;
1425 num_srcs = 2;
1426 break;
1427 case TGSI_OPCODE_TXL2:
1428 op = nir_texop_txl;
1429 num_srcs = 2;
1430 samp = 2;
1431 break;
1432 case TGSI_OPCODE_TXF:
1433 case TGSI_OPCODE_TXF_LZ:
1434 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
1435 tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) {
1436 op = nir_texop_txf_ms;
1437 } else {
1438 op = nir_texop_txf;
1439 }
1440 num_srcs = 2;
1441 break;
1442 case TGSI_OPCODE_TXD:
1443 op = nir_texop_txd;
1444 num_srcs = 3;
1445 samp = 3;
1446 break;
1447 case TGSI_OPCODE_LODQ:
1448 op = nir_texop_lod;
1449 num_srcs = 1;
1450 break;
1451
1452 default:
1453 fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode);
1454 abort();
1455 }
1456
1457 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
1458 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
1459 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
1460 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
1461 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
1462 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
1463 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1464 num_srcs++;
1465 }
1466
1467 /* Deref sources */
1468 num_srcs += 2;
1469
1470 num_srcs += tgsi_inst->Texture.NumOffsets;
1471
1472 instr = nir_tex_instr_create(b->shader, num_srcs);
1473 instr->op = op;
1474
1475 get_texture_info(tgsi_inst->Texture.Texture,
1476 &instr->sampler_dim, &instr->is_shadow, &instr->is_array);
1477
1478 switch (instr->sampler_dim) {
1479 case GLSL_SAMPLER_DIM_1D:
1480 case GLSL_SAMPLER_DIM_BUF:
1481 instr->coord_components = 1;
1482 break;
1483 case GLSL_SAMPLER_DIM_2D:
1484 case GLSL_SAMPLER_DIM_RECT:
1485 case GLSL_SAMPLER_DIM_EXTERNAL:
1486 case GLSL_SAMPLER_DIM_MS:
1487 instr->coord_components = 2;
1488 break;
1489 case GLSL_SAMPLER_DIM_3D:
1490 case GLSL_SAMPLER_DIM_CUBE:
1491 instr->coord_components = 3;
1492 break;
1493 case GLSL_SAMPLER_DIM_SUBPASS:
1494 case GLSL_SAMPLER_DIM_SUBPASS_MS:
1495 unreachable("invalid sampler_dim");
1496 }
1497
1498 if (instr->is_array)
1499 instr->coord_components++;
1500
1501 assert(tgsi_inst->Src[samp].Register.File == TGSI_FILE_SAMPLER);
1502
1503 /* TODO if we supported any opc's which take an explicit SVIEW
1504 * src, we would use that here instead. But for the "legacy"
1505 * texture opc's the SVIEW index is same as SAMP index:
1506 */
1507 sview = tgsi_inst->Src[samp].Register.Index;
1508
1509 if (op == nir_texop_lod) {
1510 instr->dest_type = nir_type_float;
1511 } else if (sview < c->num_samp_types) {
1512 instr->dest_type = c->samp_types[sview];
1513 } else {
1514 instr->dest_type = nir_type_float;
1515 }
1516
1517 nir_variable *var =
1518 get_sampler_var(c, sview, instr->sampler_dim,
1519 instr->is_shadow,
1520 instr->is_array,
1521 base_type_for_alu_type(instr->dest_type),
1522 op);
1523
1524 nir_deref_instr *deref = nir_build_deref_var(b, var);
1525
1526 unsigned src_number = 0;
1527
1528 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
1529 instr->src[src_number].src_type = nir_tex_src_texture_deref;
1530 src_number++;
1531 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
1532 instr->src[src_number].src_type = nir_tex_src_sampler_deref;
1533 src_number++;
1534
1535 instr->src[src_number].src =
1536 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
1537 instr->coord_components));
1538 instr->src[src_number].src_type = nir_tex_src_coord;
1539 src_number++;
1540
1541 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1542 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1543 instr->src[src_number].src_type = nir_tex_src_projector;
1544 src_number++;
1545 }
1546
1547 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB) {
1548 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1549 instr->src[src_number].src_type = nir_tex_src_bias;
1550 src_number++;
1551 }
1552
1553 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
1554 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1555 instr->src[src_number].src_type = nir_tex_src_bias;
1556 src_number++;
1557 }
1558
1559 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
1560 tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
1561 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ)
1562 instr->src[src_number].src = nir_src_for_ssa(nir_imm_int(b, 0));
1563 else
1564 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1565 instr->src[src_number].src_type = nir_tex_src_lod;
1566 src_number++;
1567 }
1568
1569 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
1570 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1571 instr->src[src_number].src_type = nir_tex_src_lod;
1572 src_number++;
1573 }
1574
1575 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF ||
1576 tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF_LZ) {
1577 if (op == nir_texop_txf_ms) {
1578 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1579 instr->src[src_number].src_type = nir_tex_src_ms_index;
1580 } else {
1581 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF_LZ)
1582 instr->src[src_number].src = nir_src_for_ssa(nir_imm_int(b, 0));
1583 else
1584 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1585 instr->src[src_number].src_type = nir_tex_src_lod;
1586 }
1587 src_number++;
1588 }
1589
1590 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1591 instr->src[src_number].src_type = nir_tex_src_ddx;
1592 instr->src[src_number].src =
1593 nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W),
1594 nir_tex_instr_src_size(instr, src_number)));
1595 src_number++;
1596 instr->src[src_number].src_type = nir_tex_src_ddy;
1597 instr->src[src_number].src =
1598 nir_src_for_ssa(nir_swizzle(b, src[2], SWIZ(X, Y, Z, W),
1599 nir_tex_instr_src_size(instr, src_number)));
1600 src_number++;
1601 }
1602
1603 if (instr->is_shadow) {
1604 if (instr->coord_components == 4)
1605 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1606 else if (instr->coord_components == 3)
1607 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1608 else
1609 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
1610
1611 instr->src[src_number].src_type = nir_tex_src_comparator;
1612 src_number++;
1613 }
1614
1615 for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++) {
1616 struct tgsi_texture_offset *tex_offset = &tgsi_inst->TexOffsets[i];
1617 /* since TexOffset ins't using tgsi_full_src_register we get to
1618 * do some extra gymnastics:
1619 */
1620 nir_alu_src src;
1621
1622 memset(&src, 0, sizeof(src));
1623
1624 src.src = ttn_src_for_file_and_index(c,
1625 tex_offset->File,
1626 tex_offset->Index,
1627 NULL, NULL, NULL,
1628 true);
1629
1630 src.swizzle[0] = tex_offset->SwizzleX;
1631 src.swizzle[1] = tex_offset->SwizzleY;
1632 src.swizzle[2] = tex_offset->SwizzleZ;
1633 src.swizzle[3] = TGSI_SWIZZLE_W;
1634
1635 instr->src[src_number].src_type = nir_tex_src_offset;
1636 instr->src[src_number].src = nir_src_for_ssa(
1637 nir_mov_alu(b, src, nir_tex_instr_src_size(instr, src_number)));
1638 src_number++;
1639 }
1640
1641 assert(src_number == num_srcs);
1642 assert(src_number == instr->num_srcs);
1643
1644 nir_ssa_dest_init(&instr->instr, &instr->dest,
1645 nir_tex_instr_dest_size(instr),
1646 32, NULL);
1647 nir_builder_instr_insert(b, &instr->instr);
1648
1649 /* Resolve the writemask on the texture op. */
1650 ttn_move_dest(b, dest, &instr->dest.ssa);
1651 }
1652
1653 /* TGSI_OPCODE_TXQ is actually two distinct operations:
1654 *
1655 * dst.x = texture\_width(unit, lod)
1656 * dst.y = texture\_height(unit, lod)
1657 * dst.z = texture\_depth(unit, lod)
1658 * dst.w = texture\_levels(unit)
1659 *
1660 * dst.xyz map to NIR txs opcode, and dst.w maps to query_levels
1661 */
1662 static void
1663 ttn_txq(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
1664 {
1665 nir_builder *b = &c->build;
1666 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1667 nir_tex_instr *txs, *qlv;
1668
1669 txs = nir_tex_instr_create(b->shader, 2);
1670 txs->op = nir_texop_txs;
1671 get_texture_info(tgsi_inst->Texture.Texture,
1672 &txs->sampler_dim, &txs->is_shadow, &txs->is_array);
1673
1674 qlv = nir_tex_instr_create(b->shader, 1);
1675 qlv->op = nir_texop_query_levels;
1676 get_texture_info(tgsi_inst->Texture.Texture,
1677 &qlv->sampler_dim, &qlv->is_shadow, &qlv->is_array);
1678
1679 assert(tgsi_inst->Src[1].Register.File == TGSI_FILE_SAMPLER);
1680 int tex_index = tgsi_inst->Src[1].Register.Index;
1681
1682 nir_variable *var =
1683 get_sampler_var(c, tex_index, txs->sampler_dim,
1684 txs->is_shadow,
1685 txs->is_array,
1686 base_type_for_alu_type(txs->dest_type),
1687 nir_texop_txs);
1688
1689 nir_deref_instr *deref = nir_build_deref_var(b, var);
1690
1691 txs->src[0].src = nir_src_for_ssa(&deref->dest.ssa);
1692 txs->src[0].src_type = nir_tex_src_texture_deref;
1693
1694 qlv->src[0].src = nir_src_for_ssa(&deref->dest.ssa);
1695 qlv->src[0].src_type = nir_tex_src_texture_deref;
1696
1697 /* lod: */
1698 txs->src[1].src = nir_src_for_ssa(ttn_channel(b, src[0], X));
1699 txs->src[1].src_type = nir_tex_src_lod;
1700
1701 nir_ssa_dest_init(&txs->instr, &txs->dest,
1702 nir_tex_instr_dest_size(txs), 32, NULL);
1703 nir_builder_instr_insert(b, &txs->instr);
1704
1705 nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, 32, NULL);
1706 nir_builder_instr_insert(b, &qlv->instr);
1707
1708 ttn_move_dest_masked(b, dest, &txs->dest.ssa, TGSI_WRITEMASK_XYZ);
1709 ttn_move_dest_masked(b, dest, &qlv->dest.ssa, TGSI_WRITEMASK_W);
1710 }
1711
1712 static enum glsl_base_type
1713 get_image_base_type(struct tgsi_full_instruction *tgsi_inst)
1714 {
1715 const struct util_format_description *desc =
1716 util_format_description(tgsi_inst->Memory.Format);
1717
1718 if (desc->channel[0].pure_integer) {
1719 if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
1720 return GLSL_TYPE_INT;
1721 else
1722 return GLSL_TYPE_UINT;
1723 }
1724 return GLSL_TYPE_FLOAT;
1725 }
1726
1727 static enum gl_access_qualifier
1728 get_mem_qualifier(struct tgsi_full_instruction *tgsi_inst)
1729 {
1730 enum gl_access_qualifier access = 0;
1731
1732 if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_COHERENT)
1733 access |= ACCESS_COHERENT;
1734 if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT)
1735 access |= ACCESS_RESTRICT;
1736 if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
1737 access |= ACCESS_VOLATILE;
1738 if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY)
1739 access |= ACCESS_STREAM_CACHE_POLICY;
1740
1741 return access;
1742 }
1743
1744 static GLenum
1745 get_image_format(struct tgsi_full_instruction *tgsi_inst)
1746 {
1747 switch (tgsi_inst->Memory.Format) {
1748 case PIPE_FORMAT_NONE:
1749 return GL_NONE;
1750
1751 case PIPE_FORMAT_R8_UNORM:
1752 return GL_R8;
1753 case PIPE_FORMAT_R8G8_UNORM:
1754 return GL_RG8;
1755 case PIPE_FORMAT_R8G8B8A8_UNORM:
1756 return GL_RGBA8;
1757 case PIPE_FORMAT_R16_UNORM:
1758 return GL_R16;
1759 case PIPE_FORMAT_R16G16_UNORM:
1760 return GL_RG16;
1761 case PIPE_FORMAT_R16G16B16A16_UNORM:
1762 return GL_RGBA16;
1763
1764 case PIPE_FORMAT_R8_SNORM:
1765 return GL_R8_SNORM;
1766 case PIPE_FORMAT_R8G8_SNORM:
1767 return GL_RG8_SNORM;
1768 case PIPE_FORMAT_R8G8B8A8_SNORM:
1769 return GL_RGBA8_SNORM;
1770 case PIPE_FORMAT_R16_SNORM:
1771 return GL_R16_SNORM;
1772 case PIPE_FORMAT_R16G16_SNORM:
1773 return GL_RG16_SNORM;
1774 case PIPE_FORMAT_R16G16B16A16_SNORM:
1775 return GL_RGBA16_SNORM;
1776
1777 case PIPE_FORMAT_R8_UINT:
1778 return GL_R8UI;
1779 case PIPE_FORMAT_R8G8_UINT:
1780 return GL_RG8UI;
1781 case PIPE_FORMAT_R8G8B8A8_UINT:
1782 return GL_RGBA8UI;
1783 case PIPE_FORMAT_R16_UINT:
1784 return GL_R16UI;
1785 case PIPE_FORMAT_R16G16_UINT:
1786 return GL_RG16UI;
1787 case PIPE_FORMAT_R16G16B16A16_UINT:
1788 return GL_RGBA16UI;
1789 case PIPE_FORMAT_R32_UINT:
1790 return GL_R32UI;
1791 case PIPE_FORMAT_R32G32_UINT:
1792 return GL_RG32UI;
1793 case PIPE_FORMAT_R32G32B32A32_UINT:
1794 return GL_RGBA32UI;
1795
1796 case PIPE_FORMAT_R8_SINT:
1797 return GL_R8I;
1798 case PIPE_FORMAT_R8G8_SINT:
1799 return GL_RG8I;
1800 case PIPE_FORMAT_R8G8B8A8_SINT:
1801 return GL_RGBA8I;
1802 case PIPE_FORMAT_R16_SINT:
1803 return GL_R16I;
1804 case PIPE_FORMAT_R16G16_SINT:
1805 return GL_RG16I;
1806 case PIPE_FORMAT_R16G16B16A16_SINT:
1807 return GL_RGBA16I;
1808 case PIPE_FORMAT_R32_SINT:
1809 return GL_R32I;
1810 case PIPE_FORMAT_R32G32_SINT:
1811 return GL_RG32I;
1812 case PIPE_FORMAT_R32G32B32A32_SINT:
1813 return GL_RGBA32I;
1814
1815 case PIPE_FORMAT_R16_FLOAT:
1816 return GL_R16F;
1817 case PIPE_FORMAT_R16G16_FLOAT:
1818 return GL_RG16F;
1819 case PIPE_FORMAT_R16G16B16A16_FLOAT:
1820 return GL_RGBA16F;
1821 case PIPE_FORMAT_R32_FLOAT:
1822 return GL_R32F;
1823 case PIPE_FORMAT_R32G32_FLOAT:
1824 return GL_RG32F;
1825 case PIPE_FORMAT_R32G32B32A32_FLOAT:
1826 return GL_RGBA32F;
1827
1828 case PIPE_FORMAT_R11G11B10_FLOAT:
1829 return GL_R11F_G11F_B10F;
1830 case PIPE_FORMAT_R10G10B10A2_UINT:
1831 return GL_RGB10_A2UI;
1832 case PIPE_FORMAT_R10G10B10A2_UNORM:
1833 return GL_RGB10_A2;
1834
1835 default:
1836 unreachable("unhandled image format");
1837 }
1838 }
1839
1840 static void
1841 ttn_mem(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
1842 {
1843 nir_builder *b = &c->build;
1844 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1845 nir_intrinsic_instr *instr = NULL;
1846 unsigned resource_index, addr_src_index, file;
1847
1848 switch (tgsi_inst->Instruction.Opcode) {
1849 case TGSI_OPCODE_LOAD:
1850 assert(!tgsi_inst->Src[0].Register.Indirect);
1851 resource_index = tgsi_inst->Src[0].Register.Index;
1852 file = tgsi_inst->Src[0].Register.File;
1853 addr_src_index = 1;
1854 break;
1855 case TGSI_OPCODE_STORE:
1856 assert(!tgsi_inst->Dst[0].Register.Indirect);
1857 resource_index = tgsi_inst->Dst[0].Register.Index;
1858 file = tgsi_inst->Dst[0].Register.File;
1859 addr_src_index = 0;
1860 break;
1861 default:
1862 unreachable("unexpected memory opcode");
1863 }
1864
1865 if (file == TGSI_FILE_BUFFER) {
1866 nir_intrinsic_op op;
1867
1868 switch (tgsi_inst->Instruction.Opcode) {
1869 case TGSI_OPCODE_LOAD:
1870 op = nir_intrinsic_load_ssbo;
1871 break;
1872 case TGSI_OPCODE_STORE:
1873 op = nir_intrinsic_store_ssbo;
1874 break;
1875 }
1876
1877 add_ssbo_var(c, resource_index);
1878
1879 instr = nir_intrinsic_instr_create(b->shader, op);
1880 instr->num_components = util_last_bit(tgsi_inst->Dst[0].Register.WriteMask);
1881 nir_intrinsic_set_access(instr, get_mem_qualifier(tgsi_inst));
1882 nir_intrinsic_set_align(instr, 4, 0);
1883
1884 unsigned i = 0;
1885 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_STORE)
1886 instr->src[i++] = nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W),
1887 instr->num_components));
1888 instr->src[i++] = nir_src_for_ssa(nir_imm_int(b, resource_index));
1889 instr->src[i++] = nir_src_for_ssa(ttn_channel(b, src[addr_src_index], X));
1890
1891 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_STORE)
1892 nir_intrinsic_set_write_mask(instr, tgsi_inst->Dst[0].Register.WriteMask);
1893
1894 } else if (file == TGSI_FILE_IMAGE) {
1895 nir_intrinsic_op op;
1896
1897 switch (tgsi_inst->Instruction.Opcode) {
1898 case TGSI_OPCODE_LOAD:
1899 op = nir_intrinsic_image_deref_load;
1900 break;
1901 case TGSI_OPCODE_STORE:
1902 op = nir_intrinsic_image_deref_store;
1903 break;
1904 }
1905
1906 instr = nir_intrinsic_instr_create(b->shader, op);
1907
1908 /* Set the image variable dereference. */
1909 enum glsl_sampler_dim dim;
1910 bool is_array;
1911 get_texture_info(tgsi_inst->Memory.Texture, &dim, NULL, &is_array);
1912
1913 enum glsl_base_type base_type = get_image_base_type(tgsi_inst);
1914 enum gl_access_qualifier access = get_mem_qualifier(tgsi_inst);
1915 GLenum format = get_image_format(tgsi_inst);
1916
1917 nir_variable *image =
1918 get_image_var(c, resource_index,
1919 dim, is_array, base_type, access, format);
1920 nir_deref_instr *image_deref = nir_build_deref_var(b, image);
1921 const struct glsl_type *type = image_deref->type;
1922
1923 nir_intrinsic_set_access(instr, image_deref->var->data.access);
1924
1925 instr->src[0] = nir_src_for_ssa(&image_deref->dest.ssa);
1926 instr->src[1] = nir_src_for_ssa(src[addr_src_index]);
1927
1928 /* Set the sample argument, which is undefined for single-sample images. */
1929 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS) {
1930 instr->src[2] = nir_src_for_ssa(ttn_channel(b, src[addr_src_index], W));
1931 } else {
1932 instr->src[2] = nir_src_for_ssa(nir_ssa_undef(b, 1, 32));
1933 }
1934
1935 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_STORE) {
1936 instr->src[3] = nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W), 4));
1937 }
1938
1939 instr->num_components = 4;
1940 } else {
1941 unreachable("unexpected file");
1942 }
1943
1944
1945 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_LOAD) {
1946 nir_ssa_dest_init(&instr->instr, &instr->dest, instr->num_components,
1947 32, NULL);
1948 nir_builder_instr_insert(b, &instr->instr);
1949 ttn_move_dest(b, dest, &instr->dest.ssa);
1950 } else {
1951 nir_builder_instr_insert(b, &instr->instr);
1952 }
1953 }
1954
1955 static const nir_op op_trans[TGSI_OPCODE_LAST] = {
1956 [TGSI_OPCODE_ARL] = 0,
1957 [TGSI_OPCODE_MOV] = nir_op_mov,
1958 [TGSI_OPCODE_LIT] = 0,
1959 [TGSI_OPCODE_RCP] = nir_op_frcp,
1960 [TGSI_OPCODE_RSQ] = nir_op_frsq,
1961 [TGSI_OPCODE_EXP] = 0,
1962 [TGSI_OPCODE_LOG] = 0,
1963 [TGSI_OPCODE_MUL] = nir_op_fmul,
1964 [TGSI_OPCODE_ADD] = nir_op_fadd,
1965 [TGSI_OPCODE_DP3] = 0,
1966 [TGSI_OPCODE_DP4] = 0,
1967 [TGSI_OPCODE_DST] = 0,
1968 [TGSI_OPCODE_MIN] = nir_op_fmin,
1969 [TGSI_OPCODE_MAX] = nir_op_fmax,
1970 [TGSI_OPCODE_SLT] = nir_op_slt,
1971 [TGSI_OPCODE_SGE] = nir_op_sge,
1972 [TGSI_OPCODE_MAD] = nir_op_ffma,
1973 [TGSI_OPCODE_TEX_LZ] = 0,
1974 [TGSI_OPCODE_LRP] = 0,
1975 [TGSI_OPCODE_SQRT] = nir_op_fsqrt,
1976 [TGSI_OPCODE_FRC] = nir_op_ffract,
1977 [TGSI_OPCODE_TXF_LZ] = 0,
1978 [TGSI_OPCODE_FLR] = nir_op_ffloor,
1979 [TGSI_OPCODE_ROUND] = nir_op_fround_even,
1980 [TGSI_OPCODE_EX2] = nir_op_fexp2,
1981 [TGSI_OPCODE_LG2] = nir_op_flog2,
1982 [TGSI_OPCODE_POW] = nir_op_fpow,
1983 [TGSI_OPCODE_COS] = nir_op_fcos,
1984 [TGSI_OPCODE_DDX] = nir_op_fddx,
1985 [TGSI_OPCODE_DDY] = nir_op_fddy,
1986 [TGSI_OPCODE_KILL] = 0,
1987 [TGSI_OPCODE_PK2H] = 0, /* XXX */
1988 [TGSI_OPCODE_PK2US] = 0, /* XXX */
1989 [TGSI_OPCODE_PK4B] = 0, /* XXX */
1990 [TGSI_OPCODE_PK4UB] = 0, /* XXX */
1991 [TGSI_OPCODE_SEQ] = nir_op_seq,
1992 [TGSI_OPCODE_SGT] = 0,
1993 [TGSI_OPCODE_SIN] = nir_op_fsin,
1994 [TGSI_OPCODE_SNE] = nir_op_sne,
1995 [TGSI_OPCODE_SLE] = 0,
1996 [TGSI_OPCODE_TEX] = 0,
1997 [TGSI_OPCODE_TXD] = 0,
1998 [TGSI_OPCODE_TXP] = 0,
1999 [TGSI_OPCODE_UP2H] = 0, /* XXX */
2000 [TGSI_OPCODE_UP2US] = 0, /* XXX */
2001 [TGSI_OPCODE_UP4B] = 0, /* XXX */
2002 [TGSI_OPCODE_UP4UB] = 0, /* XXX */
2003 [TGSI_OPCODE_ARR] = 0,
2004
2005 /* No function calls, yet. */
2006 [TGSI_OPCODE_CAL] = 0, /* XXX */
2007 [TGSI_OPCODE_RET] = 0, /* XXX */
2008
2009 [TGSI_OPCODE_SSG] = nir_op_fsign,
2010 [TGSI_OPCODE_CMP] = 0,
2011 [TGSI_OPCODE_TXB] = 0,
2012 [TGSI_OPCODE_DIV] = nir_op_fdiv,
2013 [TGSI_OPCODE_DP2] = 0,
2014 [TGSI_OPCODE_TXL] = 0,
2015
2016 [TGSI_OPCODE_BRK] = 0,
2017 [TGSI_OPCODE_IF] = 0,
2018 [TGSI_OPCODE_UIF] = 0,
2019 [TGSI_OPCODE_ELSE] = 0,
2020 [TGSI_OPCODE_ENDIF] = 0,
2021
2022 [TGSI_OPCODE_DDX_FINE] = nir_op_fddx_fine,
2023 [TGSI_OPCODE_DDY_FINE] = nir_op_fddy_fine,
2024
2025 [TGSI_OPCODE_CEIL] = nir_op_fceil,
2026 [TGSI_OPCODE_I2F] = nir_op_i2f32,
2027 [TGSI_OPCODE_NOT] = nir_op_inot,
2028 [TGSI_OPCODE_TRUNC] = nir_op_ftrunc,
2029 [TGSI_OPCODE_SHL] = nir_op_ishl,
2030 [TGSI_OPCODE_AND] = nir_op_iand,
2031 [TGSI_OPCODE_OR] = nir_op_ior,
2032 [TGSI_OPCODE_MOD] = nir_op_umod,
2033 [TGSI_OPCODE_XOR] = nir_op_ixor,
2034 [TGSI_OPCODE_TXF] = 0,
2035 [TGSI_OPCODE_TXQ] = 0,
2036
2037 [TGSI_OPCODE_CONT] = 0,
2038
2039 [TGSI_OPCODE_EMIT] = 0, /* XXX */
2040 [TGSI_OPCODE_ENDPRIM] = 0, /* XXX */
2041
2042 [TGSI_OPCODE_BGNLOOP] = 0,
2043 [TGSI_OPCODE_BGNSUB] = 0, /* XXX: no function calls */
2044 [TGSI_OPCODE_ENDLOOP] = 0,
2045 [TGSI_OPCODE_ENDSUB] = 0, /* XXX: no function calls */
2046
2047 [TGSI_OPCODE_NOP] = 0,
2048 [TGSI_OPCODE_FSEQ] = nir_op_feq,
2049 [TGSI_OPCODE_FSGE] = nir_op_fge,
2050 [TGSI_OPCODE_FSLT] = nir_op_flt,
2051 [TGSI_OPCODE_FSNE] = nir_op_fne,
2052
2053 [TGSI_OPCODE_KILL_IF] = 0,
2054
2055 [TGSI_OPCODE_END] = 0,
2056
2057 [TGSI_OPCODE_F2I] = nir_op_f2i32,
2058 [TGSI_OPCODE_IDIV] = nir_op_idiv,
2059 [TGSI_OPCODE_IMAX] = nir_op_imax,
2060 [TGSI_OPCODE_IMIN] = nir_op_imin,
2061 [TGSI_OPCODE_INEG] = nir_op_ineg,
2062 [TGSI_OPCODE_ISGE] = nir_op_ige,
2063 [TGSI_OPCODE_ISHR] = nir_op_ishr,
2064 [TGSI_OPCODE_ISLT] = nir_op_ilt,
2065 [TGSI_OPCODE_F2U] = nir_op_f2u32,
2066 [TGSI_OPCODE_U2F] = nir_op_u2f32,
2067 [TGSI_OPCODE_UADD] = nir_op_iadd,
2068 [TGSI_OPCODE_UDIV] = nir_op_udiv,
2069 [TGSI_OPCODE_UMAD] = 0,
2070 [TGSI_OPCODE_UMAX] = nir_op_umax,
2071 [TGSI_OPCODE_UMIN] = nir_op_umin,
2072 [TGSI_OPCODE_UMOD] = nir_op_umod,
2073 [TGSI_OPCODE_UMUL] = nir_op_imul,
2074 [TGSI_OPCODE_USEQ] = nir_op_ieq,
2075 [TGSI_OPCODE_USGE] = nir_op_uge,
2076 [TGSI_OPCODE_USHR] = nir_op_ushr,
2077 [TGSI_OPCODE_USLT] = nir_op_ult,
2078 [TGSI_OPCODE_USNE] = nir_op_ine,
2079
2080 [TGSI_OPCODE_SWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
2081 [TGSI_OPCODE_CASE] = 0, /* not emitted by glsl_to_tgsi.cpp */
2082 [TGSI_OPCODE_DEFAULT] = 0, /* not emitted by glsl_to_tgsi.cpp */
2083 [TGSI_OPCODE_ENDSWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
2084
2085 /* XXX: SAMPLE opcodes */
2086
2087 [TGSI_OPCODE_UARL] = nir_op_mov,
2088 [TGSI_OPCODE_UCMP] = 0,
2089 [TGSI_OPCODE_IABS] = nir_op_iabs,
2090 [TGSI_OPCODE_ISSG] = nir_op_isign,
2091
2092 [TGSI_OPCODE_LOAD] = 0,
2093 [TGSI_OPCODE_STORE] = 0,
2094
2095 /* XXX: atomics */
2096
2097 [TGSI_OPCODE_TEX2] = 0,
2098 [TGSI_OPCODE_TXB2] = 0,
2099 [TGSI_OPCODE_TXL2] = 0,
2100
2101 [TGSI_OPCODE_IMUL_HI] = nir_op_imul_high,
2102 [TGSI_OPCODE_UMUL_HI] = nir_op_umul_high,
2103
2104 [TGSI_OPCODE_TG4] = 0,
2105 [TGSI_OPCODE_LODQ] = 0,
2106
2107 [TGSI_OPCODE_IBFE] = nir_op_ibitfield_extract,
2108 [TGSI_OPCODE_UBFE] = nir_op_ubitfield_extract,
2109 [TGSI_OPCODE_BFI] = nir_op_bitfield_insert,
2110 [TGSI_OPCODE_BREV] = nir_op_bitfield_reverse,
2111 [TGSI_OPCODE_POPC] = nir_op_bit_count,
2112 [TGSI_OPCODE_LSB] = nir_op_find_lsb,
2113 [TGSI_OPCODE_IMSB] = nir_op_ifind_msb,
2114 [TGSI_OPCODE_UMSB] = nir_op_ufind_msb,
2115
2116 [TGSI_OPCODE_INTERP_CENTROID] = 0, /* XXX */
2117 [TGSI_OPCODE_INTERP_SAMPLE] = 0, /* XXX */
2118 [TGSI_OPCODE_INTERP_OFFSET] = 0, /* XXX */
2119
2120 [TGSI_OPCODE_F2D] = nir_op_f2f64,
2121 [TGSI_OPCODE_D2F] = nir_op_f2f32,
2122 [TGSI_OPCODE_DMUL] = nir_op_fmul,
2123 [TGSI_OPCODE_D2U] = nir_op_f2u32,
2124 [TGSI_OPCODE_U2D] = nir_op_u2f64,
2125
2126 [TGSI_OPCODE_U64ADD] = nir_op_iadd,
2127 [TGSI_OPCODE_U64MUL] = nir_op_imul,
2128 [TGSI_OPCODE_U64DIV] = nir_op_udiv,
2129 [TGSI_OPCODE_U64SNE] = nir_op_ine,
2130 };
2131
2132 static void
2133 ttn_emit_instruction(struct ttn_compile *c)
2134 {
2135 nir_builder *b = &c->build;
2136 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
2137 unsigned i;
2138 unsigned tgsi_op = tgsi_inst->Instruction.Opcode;
2139 struct tgsi_full_dst_register *tgsi_dst = &tgsi_inst->Dst[0];
2140
2141 if (tgsi_op == TGSI_OPCODE_END)
2142 return;
2143
2144 nir_ssa_def *src[TGSI_FULL_MAX_SRC_REGISTERS];
2145 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
2146 src[i] = ttn_get_src(c, &tgsi_inst->Src[i], i);
2147 }
2148 nir_alu_dest dest = ttn_get_dest(c, tgsi_dst);
2149
2150 unsigned tgsi_dst_type = tgsi_opcode_infer_dst_type(tgsi_op, 0);
2151
2152 /* The destination bitsize of the NIR opcode (not TGSI, where it's always
2153 * 32 bits). This needs to be passed into ttn_alu() because it can't be
2154 * inferred for comparison opcodes.
2155 */
2156 unsigned dst_bitsize = tgsi_type_is_64bit(tgsi_dst_type) ? 64 : 32;
2157
2158 switch (tgsi_op) {
2159 case TGSI_OPCODE_RSQ:
2160 ttn_move_dest(b, dest, nir_frsq(b, ttn_channel(b, src[0], X)));
2161 break;
2162
2163 case TGSI_OPCODE_SQRT:
2164 ttn_move_dest(b, dest, nir_fsqrt(b, ttn_channel(b, src[0], X)));
2165 break;
2166
2167 case TGSI_OPCODE_RCP:
2168 ttn_move_dest(b, dest, nir_frcp(b, ttn_channel(b, src[0], X)));
2169 break;
2170
2171 case TGSI_OPCODE_EX2:
2172 ttn_move_dest(b, dest, nir_fexp2(b, ttn_channel(b, src[0], X)));
2173 break;
2174
2175 case TGSI_OPCODE_LG2:
2176 ttn_move_dest(b, dest, nir_flog2(b, ttn_channel(b, src[0], X)));
2177 break;
2178
2179 case TGSI_OPCODE_POW:
2180 ttn_move_dest(b, dest, nir_fpow(b,
2181 ttn_channel(b, src[0], X),
2182 ttn_channel(b, src[1], X)));
2183 break;
2184
2185 case TGSI_OPCODE_COS:
2186 ttn_move_dest(b, dest, nir_fcos(b, ttn_channel(b, src[0], X)));
2187 break;
2188
2189 case TGSI_OPCODE_SIN:
2190 ttn_move_dest(b, dest, nir_fsin(b, ttn_channel(b, src[0], X)));
2191 break;
2192
2193 case TGSI_OPCODE_ARL:
2194 ttn_arl(b, op_trans[tgsi_op], dest, src);
2195 break;
2196
2197 case TGSI_OPCODE_EXP:
2198 ttn_exp(b, op_trans[tgsi_op], dest, src);
2199 break;
2200
2201 case TGSI_OPCODE_LOG:
2202 ttn_log(b, op_trans[tgsi_op], dest, src);
2203 break;
2204
2205 case TGSI_OPCODE_DST:
2206 ttn_dst(b, op_trans[tgsi_op], dest, src);
2207 break;
2208
2209 case TGSI_OPCODE_LIT:
2210 ttn_lit(b, op_trans[tgsi_op], dest, src);
2211 break;
2212
2213 case TGSI_OPCODE_DP2:
2214 ttn_dp2(b, op_trans[tgsi_op], dest, src);
2215 break;
2216
2217 case TGSI_OPCODE_DP3:
2218 ttn_dp3(b, op_trans[tgsi_op], dest, src);
2219 break;
2220
2221 case TGSI_OPCODE_DP4:
2222 ttn_dp4(b, op_trans[tgsi_op], dest, src);
2223 break;
2224
2225 case TGSI_OPCODE_UMAD:
2226 ttn_umad(b, op_trans[tgsi_op], dest, src);
2227 break;
2228
2229 case TGSI_OPCODE_LRP:
2230 ttn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
2231 break;
2232
2233 case TGSI_OPCODE_KILL:
2234 ttn_kill(b, op_trans[tgsi_op], dest, src);
2235 break;
2236
2237 case TGSI_OPCODE_ARR:
2238 ttn_arr(b, op_trans[tgsi_op], dest, src);
2239 break;
2240
2241 case TGSI_OPCODE_CMP:
2242 ttn_cmp(b, op_trans[tgsi_op], dest, src);
2243 break;
2244
2245 case TGSI_OPCODE_UCMP:
2246 ttn_ucmp(b, op_trans[tgsi_op], dest, src);
2247 break;
2248
2249 case TGSI_OPCODE_SGT:
2250 ttn_sgt(b, op_trans[tgsi_op], dest, src);
2251 break;
2252
2253 case TGSI_OPCODE_SLE:
2254 ttn_sle(b, op_trans[tgsi_op], dest, src);
2255 break;
2256
2257 case TGSI_OPCODE_KILL_IF:
2258 ttn_kill_if(b, op_trans[tgsi_op], dest, src);
2259 break;
2260
2261 case TGSI_OPCODE_TEX:
2262 case TGSI_OPCODE_TEX_LZ:
2263 case TGSI_OPCODE_TXP:
2264 case TGSI_OPCODE_TXL:
2265 case TGSI_OPCODE_TXB:
2266 case TGSI_OPCODE_TXD:
2267 case TGSI_OPCODE_TEX2:
2268 case TGSI_OPCODE_TXL2:
2269 case TGSI_OPCODE_TXB2:
2270 case TGSI_OPCODE_TXF:
2271 case TGSI_OPCODE_TXF_LZ:
2272 case TGSI_OPCODE_TG4:
2273 case TGSI_OPCODE_LODQ:
2274 ttn_tex(c, dest, src);
2275 break;
2276
2277 case TGSI_OPCODE_TXQ:
2278 ttn_txq(c, dest, src);
2279 break;
2280
2281 case TGSI_OPCODE_LOAD:
2282 case TGSI_OPCODE_STORE:
2283 ttn_mem(c, dest, src);
2284 break;
2285
2286 case TGSI_OPCODE_NOP:
2287 break;
2288
2289 case TGSI_OPCODE_IF:
2290 ttn_if(c, src[0], false);
2291 break;
2292
2293 case TGSI_OPCODE_UIF:
2294 ttn_if(c, src[0], true);
2295 break;
2296
2297 case TGSI_OPCODE_ELSE:
2298 ttn_else(c);
2299 break;
2300
2301 case TGSI_OPCODE_ENDIF:
2302 ttn_endif(c);
2303 break;
2304
2305 case TGSI_OPCODE_BGNLOOP:
2306 ttn_bgnloop(c);
2307 break;
2308
2309 case TGSI_OPCODE_BRK:
2310 ttn_brk(b);
2311 break;
2312
2313 case TGSI_OPCODE_CONT:
2314 ttn_cont(b);
2315 break;
2316
2317 case TGSI_OPCODE_ENDLOOP:
2318 ttn_endloop(c);
2319 break;
2320
2321 default:
2322 if (op_trans[tgsi_op] != 0 || tgsi_op == TGSI_OPCODE_MOV) {
2323 ttn_alu(b, op_trans[tgsi_op], dest, dst_bitsize, src);
2324 } else {
2325 fprintf(stderr, "unknown TGSI opcode: %s\n",
2326 tgsi_get_opcode_name(tgsi_op));
2327 abort();
2328 }
2329 break;
2330 }
2331
2332 if (tgsi_inst->Instruction.Saturate) {
2333 assert(!dest.dest.is_ssa);
2334 ttn_move_dest(b, dest, nir_fsat(b, ttn_src_for_dest(b, &dest)));
2335 }
2336
2337 /* if the dst has a matching var, append store_var to move
2338 * output from reg to var
2339 */
2340 nir_variable *var = ttn_get_var(c, tgsi_dst);
2341 if (var) {
2342 unsigned index = tgsi_dst->Register.Index;
2343 unsigned offset = c->temp_regs[index].offset;
2344 struct tgsi_ind_register *indirect = tgsi_dst->Register.Indirect ?
2345 &tgsi_dst->Indirect : NULL;
2346 nir_src val = nir_src_for_reg(dest.dest.reg.reg);
2347 nir_store_deref(b, ttn_array_deref(c, var, offset, indirect),
2348 nir_ssa_for_src(b, val, 4), dest.write_mask);
2349 }
2350 }
2351
2352 /**
2353 * Puts a NIR intrinsic to store of each TGSI_FILE_OUTPUT value to the output
2354 * variables at the end of the shader.
2355 *
2356 * We don't generate these incrementally as the TGSI_FILE_OUTPUT values are
2357 * written, because there's no output load intrinsic, which means we couldn't
2358 * handle writemasks.
2359 */
2360 static void
2361 ttn_add_output_stores(struct ttn_compile *c)
2362 {
2363 nir_builder *b = &c->build;
2364
2365 for (int i = 0; i < c->build.shader->num_outputs; i++) {
2366 nir_variable *var = c->outputs[i];
2367 if (!var)
2368 continue;
2369
2370 nir_src src = nir_src_for_reg(c->output_regs[i].reg);
2371 src.reg.base_offset = c->output_regs[i].offset;
2372
2373 nir_ssa_def *store_value = nir_ssa_for_src(b, src, 4);
2374 if (c->build.shader->info.stage == MESA_SHADER_FRAGMENT) {
2375 /* TGSI uses TGSI_SEMANTIC_POSITION.z for the depth output
2376 * and TGSI_SEMANTIC_STENCIL.y for the stencil output,
2377 * while NIR uses a single-component output.
2378 */
2379 if (var->data.location == FRAG_RESULT_DEPTH)
2380 store_value = nir_channel(b, store_value, 2);
2381 else if (var->data.location == FRAG_RESULT_STENCIL)
2382 store_value = nir_channel(b, store_value, 1);
2383 } else {
2384 /* FOGC and PSIZ are scalar values */
2385 if (var->data.location == VARYING_SLOT_FOGC ||
2386 var->data.location == VARYING_SLOT_PSIZ) {
2387 store_value = nir_channel(b, store_value, 0);
2388 }
2389 }
2390
2391 nir_store_deref(b, nir_build_deref_var(b, var), store_value,
2392 (1 << store_value->num_components) - 1);
2393 }
2394 }
2395
2396 /**
2397 * Parses the given TGSI tokens.
2398 */
2399 static void
2400 ttn_parse_tgsi(struct ttn_compile *c, const void *tgsi_tokens)
2401 {
2402 struct tgsi_parse_context parser;
2403 int ret;
2404
2405 ret = tgsi_parse_init(&parser, tgsi_tokens);
2406 assert(ret == TGSI_PARSE_OK);
2407
2408 while (!tgsi_parse_end_of_tokens(&parser)) {
2409 tgsi_parse_token(&parser);
2410 c->token = &parser.FullToken;
2411
2412 switch (parser.FullToken.Token.Type) {
2413 case TGSI_TOKEN_TYPE_DECLARATION:
2414 ttn_emit_declaration(c);
2415 break;
2416
2417 case TGSI_TOKEN_TYPE_INSTRUCTION:
2418 ttn_emit_instruction(c);
2419 break;
2420
2421 case TGSI_TOKEN_TYPE_IMMEDIATE:
2422 ttn_emit_immediate(c);
2423 break;
2424 }
2425 }
2426
2427 tgsi_parse_free(&parser);
2428 }
2429
2430 static void
2431 ttn_read_pipe_caps(struct ttn_compile *c,
2432 struct pipe_screen *screen)
2433 {
2434 c->cap_packed_uniforms = screen->get_param(screen, PIPE_CAP_PACKED_UNIFORMS);
2435 c->cap_samplers_as_deref = screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF);
2436 c->cap_face_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL);
2437 c->cap_position_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL);
2438 c->cap_point_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL);
2439 }
2440
2441 /**
2442 * Initializes a TGSI-to-NIR compiler.
2443 */
2444 static struct ttn_compile *
2445 ttn_compile_init(const void *tgsi_tokens,
2446 const nir_shader_compiler_options *options,
2447 struct pipe_screen *screen)
2448 {
2449 struct ttn_compile *c;
2450 struct nir_shader *s;
2451 struct tgsi_shader_info scan;
2452
2453 assert(options || screen);
2454 c = rzalloc(NULL, struct ttn_compile);
2455
2456 tgsi_scan_shader(tgsi_tokens, &scan);
2457 c->scan = &scan;
2458
2459 if (!options) {
2460 options =
2461 screen->get_compiler_options(screen, PIPE_SHADER_IR_NIR, scan.processor);
2462 }
2463
2464 nir_builder_init_simple_shader(&c->build, NULL,
2465 tgsi_processor_to_shader_stage(scan.processor),
2466 options);
2467
2468 s = c->build.shader;
2469
2470 if (screen) {
2471 ttn_read_pipe_caps(c, screen);
2472 } else {
2473 /* TTN used to be hard coded to always make FACE a sysval,
2474 * so it makes sense to preserve that behavior so users don't break. */
2475 c->cap_face_is_sysval = true;
2476 }
2477
2478 if (s->info.stage == MESA_SHADER_FRAGMENT)
2479 s->info.fs.untyped_color_outputs = true;
2480
2481 s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
2482 s->num_uniforms = scan.const_file_max[0] + 1;
2483 s->num_outputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
2484 s->info.num_ssbos = util_last_bit(scan.shader_buffers_declared);
2485 s->info.num_ubos = util_last_bit(scan.const_buffers_declared >> 1);
2486 s->info.num_images = util_last_bit(scan.images_declared);
2487 s->info.num_textures = util_last_bit(scan.samplers_declared);
2488
2489 for (unsigned i = 0; i < TGSI_PROPERTY_COUNT; i++) {
2490 unsigned value = scan.properties[i];
2491
2492 switch (i) {
2493 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2494 break; /* handled in ttn_emit_declaration */
2495 case TGSI_PROPERTY_FS_COORD_ORIGIN:
2496 if (s->info.stage == MESA_SHADER_FRAGMENT)
2497 s->info.fs.origin_upper_left = value == TGSI_FS_COORD_ORIGIN_UPPER_LEFT;
2498 break;
2499 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
2500 if (s->info.stage == MESA_SHADER_FRAGMENT)
2501 s->info.fs.pixel_center_integer = value == TGSI_FS_COORD_PIXEL_CENTER_INTEGER;
2502 break;
2503 case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
2504 if (s->info.stage == MESA_SHADER_FRAGMENT)
2505 s->info.fs.depth_layout = ttn_get_depth_layout(value);
2506 break;
2507 case TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION:
2508 if (s->info.stage == MESA_SHADER_VERTEX)
2509 s->info.vs.window_space_position = value;
2510 break;
2511 case TGSI_PROPERTY_NEXT_SHADER:
2512 s->info.next_stage = tgsi_processor_to_shader_stage(value);
2513 break;
2514 case TGSI_PROPERTY_VS_BLIT_SGPRS_AMD:
2515 if (s->info.stage == MESA_SHADER_VERTEX)
2516 s->info.vs.blit_sgprs_amd = value;
2517 break;
2518 case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
2519 if (s->info.stage == MESA_SHADER_COMPUTE)
2520 s->info.cs.local_size[0] = value;
2521 break;
2522 case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
2523 if (s->info.stage == MESA_SHADER_COMPUTE)
2524 s->info.cs.local_size[1] = value;
2525 break;
2526 case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
2527 if (s->info.stage == MESA_SHADER_COMPUTE)
2528 s->info.cs.local_size[2] = value;
2529 break;
2530 case TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD:
2531 if (s->info.stage == MESA_SHADER_COMPUTE)
2532 s->info.cs.user_data_components_amd = value;
2533 break;
2534 default:
2535 if (value) {
2536 fprintf(stderr, "tgsi_to_nir: unhandled TGSI property %u = %u\n",
2537 i, value);
2538 unreachable("unhandled TGSI property");
2539 }
2540 }
2541 }
2542
2543 if (s->info.stage == MESA_SHADER_COMPUTE &&
2544 (!s->info.cs.local_size[0] ||
2545 !s->info.cs.local_size[1] ||
2546 !s->info.cs.local_size[2]))
2547 s->info.cs.local_size_variable = true;
2548
2549 c->inputs = rzalloc_array(c, struct nir_variable *, s->num_inputs);
2550 c->outputs = rzalloc_array(c, struct nir_variable *, s->num_outputs);
2551
2552 c->output_regs = rzalloc_array(c, struct ttn_reg_info,
2553 scan.file_max[TGSI_FILE_OUTPUT] + 1);
2554 c->temp_regs = rzalloc_array(c, struct ttn_reg_info,
2555 scan.file_max[TGSI_FILE_TEMPORARY] + 1);
2556 c->imm_defs = rzalloc_array(c, nir_ssa_def *,
2557 scan.file_max[TGSI_FILE_IMMEDIATE] + 1);
2558
2559 c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2560 c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types);
2561
2562 c->if_stack = rzalloc_array(c, nir_cursor,
2563 (scan.opcode_count[TGSI_OPCODE_IF] +
2564 scan.opcode_count[TGSI_OPCODE_UIF]) * 2);
2565 c->loop_stack = rzalloc_array(c, nir_cursor,
2566 scan.opcode_count[TGSI_OPCODE_BGNLOOP]);
2567
2568
2569 ttn_parse_tgsi(c, tgsi_tokens);
2570 ttn_add_output_stores(c);
2571
2572 nir_validate_shader(c->build.shader, "TTN: after parsing TGSI and creating the NIR shader");
2573
2574 return c;
2575 }
2576
2577 static void
2578 ttn_optimize_nir(nir_shader *nir)
2579 {
2580 bool progress;
2581 do {
2582 progress = false;
2583
2584 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2585
2586 if (nir->options->lower_to_scalar) {
2587 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
2588 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
2589 }
2590
2591 NIR_PASS_V(nir, nir_lower_alu);
2592 NIR_PASS_V(nir, nir_lower_pack);
2593 NIR_PASS(progress, nir, nir_copy_prop);
2594 NIR_PASS(progress, nir, nir_opt_remove_phis);
2595 NIR_PASS(progress, nir, nir_opt_dce);
2596
2597 if (nir_opt_trivial_continues(nir)) {
2598 progress = true;
2599 NIR_PASS(progress, nir, nir_copy_prop);
2600 NIR_PASS(progress, nir, nir_opt_dce);
2601 }
2602
2603 NIR_PASS(progress, nir, nir_opt_if, false);
2604 NIR_PASS(progress, nir, nir_opt_dead_cf);
2605 NIR_PASS(progress, nir, nir_opt_cse);
2606 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
2607
2608 NIR_PASS(progress, nir, nir_opt_algebraic);
2609 NIR_PASS(progress, nir, nir_opt_constant_folding);
2610
2611 NIR_PASS(progress, nir, nir_opt_undef);
2612 NIR_PASS(progress, nir, nir_opt_conditional_discard);
2613
2614 if (nir->options->max_unroll_iterations) {
2615 NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
2616 }
2617
2618 } while (progress);
2619
2620 }
2621
2622 /**
2623 * Finalizes the NIR in a similar way as st_glsl_to_nir does.
2624 *
2625 * Drivers expect that these passes are already performed,
2626 * so we have to do it here too.
2627 */
2628 static void
2629 ttn_finalize_nir(struct ttn_compile *c, struct pipe_screen *screen)
2630 {
2631 struct nir_shader *nir = c->build.shader;
2632
2633 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2634 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
2635
2636 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
2637 NIR_PASS_V(nir, nir_split_var_copies);
2638 NIR_PASS_V(nir, nir_lower_var_copies);
2639 NIR_PASS_V(nir, nir_lower_system_values);
2640
2641 if (c->cap_packed_uniforms)
2642 NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16);
2643
2644 if (!c->cap_samplers_as_deref)
2645 NIR_PASS_V(nir, nir_lower_samplers);
2646
2647 if (screen->finalize_nir) {
2648 screen->finalize_nir(screen, nir, true);
2649 } else {
2650 ttn_optimize_nir(nir);
2651 nir_shader_gather_info(nir, c->build.impl);
2652 }
2653
2654 nir_validate_shader(nir, "TTN: after all optimizations");
2655 }
2656
2657 struct nir_shader *
2658 tgsi_to_nir(const void *tgsi_tokens,
2659 struct pipe_screen *screen)
2660 {
2661 struct ttn_compile *c;
2662 struct nir_shader *s;
2663
2664 c = ttn_compile_init(tgsi_tokens, NULL, screen);
2665 s = c->build.shader;
2666 ttn_finalize_nir(c, screen);
2667 ralloc_free(c);
2668
2669 return s;
2670 }
2671
2672 struct nir_shader *
2673 tgsi_to_nir_noscreen(const void *tgsi_tokens,
2674 const nir_shader_compiler_options *options)
2675 {
2676 struct ttn_compile *c;
2677 struct nir_shader *s;
2678
2679 c = ttn_compile_init(tgsi_tokens, options, NULL);
2680 s = c->build.shader;
2681 ralloc_free(c);
2682
2683 return s;
2684 }
2685