a23ed4d685fccd76003174c875565d8d7ccf180d
[mesa.git] / src / gallium / auxiliary / nir / tgsi_to_nir.c
1 /*
2 * Copyright © 2014-2015 Broadcom
3 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include "util/ralloc.h"
26 #include "pipe/p_screen.h"
27
28 #include "compiler/nir/nir.h"
29 #include "compiler/nir/nir_control_flow.h"
30 #include "compiler/nir/nir_builder.h"
31 #include "compiler/glsl/gl_nir.h"
32 #include "compiler/glsl/list.h"
33 #include "compiler/shader_enums.h"
34
35 #include "tgsi_to_nir.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "tgsi/tgsi_info.h"
39 #include "tgsi/tgsi_scan.h"
40 #include "tgsi/tgsi_from_mesa.h"
41
42 #define SWIZ(X, Y, Z, W) (unsigned[4]){ \
43 TGSI_SWIZZLE_##X, \
44 TGSI_SWIZZLE_##Y, \
45 TGSI_SWIZZLE_##Z, \
46 TGSI_SWIZZLE_##W, \
47 }
48
49 struct ttn_reg_info {
50 /** nir register containing this TGSI index. */
51 nir_register *reg;
52 nir_variable *var;
53 /** Offset (in vec4s) from the start of var for this TGSI index. */
54 int offset;
55 };
56
57 struct ttn_compile {
58 union tgsi_full_token *token;
59 nir_builder build;
60 struct tgsi_shader_info *scan;
61
62 struct ttn_reg_info *output_regs;
63 struct ttn_reg_info *temp_regs;
64 nir_ssa_def **imm_defs;
65
66 unsigned num_samp_types;
67 nir_alu_type *samp_types;
68
69 nir_register *addr_reg;
70
71 nir_variable **inputs;
72 nir_variable **outputs;
73 nir_variable *samplers[PIPE_MAX_SAMPLERS];
74 nir_variable *images[PIPE_MAX_SHADER_IMAGES];
75 nir_variable *ssbo[PIPE_MAX_SHADER_BUFFERS];
76
77 nir_variable *input_var_face;
78 nir_variable *input_var_position;
79 nir_variable *input_var_point;
80
81 /**
82 * Stack of nir_cursors where instructions should be pushed as we pop
83 * back out of the control flow stack.
84 *
85 * For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else
86 * instructions should be placed, and if_stack[if_stack_pos - 1] has where
87 * the next instructions outside of the if/then/else block go.
88 */
89 nir_cursor *if_stack;
90 unsigned if_stack_pos;
91
92 /**
93 * Stack of nir_cursors where instructions should be pushed as we pop
94 * back out of the control flow stack.
95 *
96 * loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside
97 * of the loop.
98 */
99 nir_cursor *loop_stack;
100 unsigned loop_stack_pos;
101
102 /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
103 unsigned next_imm;
104
105 bool cap_scalar;
106 bool cap_face_is_sysval;
107 bool cap_position_is_sysval;
108 bool cap_point_is_sysval;
109 bool cap_packed_uniforms;
110 bool cap_samplers_as_deref;
111 };
112
113 #define ttn_swizzle(b, src, x, y, z, w) \
114 nir_swizzle(b, src, SWIZ(x, y, z, w), 4)
115 #define ttn_channel(b, src, swiz) \
116 nir_channel(b, src, TGSI_SWIZZLE_##swiz)
117
118 static gl_varying_slot
119 tgsi_varying_semantic_to_slot(unsigned semantic, unsigned index)
120 {
121 switch (semantic) {
122 case TGSI_SEMANTIC_POSITION:
123 return VARYING_SLOT_POS;
124 case TGSI_SEMANTIC_COLOR:
125 if (index == 0)
126 return VARYING_SLOT_COL0;
127 else
128 return VARYING_SLOT_COL1;
129 case TGSI_SEMANTIC_BCOLOR:
130 if (index == 0)
131 return VARYING_SLOT_BFC0;
132 else
133 return VARYING_SLOT_BFC1;
134 case TGSI_SEMANTIC_FOG:
135 return VARYING_SLOT_FOGC;
136 case TGSI_SEMANTIC_PSIZE:
137 return VARYING_SLOT_PSIZ;
138 case TGSI_SEMANTIC_GENERIC:
139 return VARYING_SLOT_VAR0 + index;
140 case TGSI_SEMANTIC_FACE:
141 return VARYING_SLOT_FACE;
142 case TGSI_SEMANTIC_EDGEFLAG:
143 return VARYING_SLOT_EDGE;
144 case TGSI_SEMANTIC_PRIMID:
145 return VARYING_SLOT_PRIMITIVE_ID;
146 case TGSI_SEMANTIC_CLIPDIST:
147 if (index == 0)
148 return VARYING_SLOT_CLIP_DIST0;
149 else
150 return VARYING_SLOT_CLIP_DIST1;
151 case TGSI_SEMANTIC_CLIPVERTEX:
152 return VARYING_SLOT_CLIP_VERTEX;
153 case TGSI_SEMANTIC_TEXCOORD:
154 return VARYING_SLOT_TEX0 + index;
155 case TGSI_SEMANTIC_PCOORD:
156 return VARYING_SLOT_PNTC;
157 case TGSI_SEMANTIC_VIEWPORT_INDEX:
158 return VARYING_SLOT_VIEWPORT;
159 case TGSI_SEMANTIC_LAYER:
160 return VARYING_SLOT_LAYER;
161 default:
162 fprintf(stderr, "Bad TGSI semantic: %d/%d\n", semantic, index);
163 abort();
164 }
165 }
166
167 static enum gl_frag_depth_layout
168 ttn_get_depth_layout(unsigned tgsi_fs_depth_layout)
169 {
170 switch (tgsi_fs_depth_layout) {
171 case TGSI_FS_DEPTH_LAYOUT_NONE:
172 return FRAG_DEPTH_LAYOUT_NONE;
173 case TGSI_FS_DEPTH_LAYOUT_ANY:
174 return FRAG_DEPTH_LAYOUT_ANY;
175 case TGSI_FS_DEPTH_LAYOUT_GREATER:
176 return FRAG_DEPTH_LAYOUT_GREATER;
177 case TGSI_FS_DEPTH_LAYOUT_LESS:
178 return FRAG_DEPTH_LAYOUT_LESS;
179 case TGSI_FS_DEPTH_LAYOUT_UNCHANGED:
180 return FRAG_DEPTH_LAYOUT_UNCHANGED;
181 default:
182 unreachable("bad TGSI FS depth layout");
183 }
184 }
185
186 static nir_ssa_def *
187 ttn_src_for_dest(nir_builder *b, nir_alu_dest *dest)
188 {
189 nir_alu_src src;
190 memset(&src, 0, sizeof(src));
191
192 if (dest->dest.is_ssa)
193 src.src = nir_src_for_ssa(&dest->dest.ssa);
194 else {
195 assert(!dest->dest.reg.indirect);
196 src.src = nir_src_for_reg(dest->dest.reg.reg);
197 src.src.reg.base_offset = dest->dest.reg.base_offset;
198 }
199
200 for (int i = 0; i < 4; i++)
201 src.swizzle[i] = i;
202
203 return nir_mov_alu(b, src, 4);
204 }
205
206 static enum glsl_interp_mode
207 ttn_translate_interp_mode(unsigned tgsi_interp)
208 {
209 switch (tgsi_interp) {
210 case TGSI_INTERPOLATE_CONSTANT:
211 return INTERP_MODE_FLAT;
212 case TGSI_INTERPOLATE_LINEAR:
213 return INTERP_MODE_NOPERSPECTIVE;
214 case TGSI_INTERPOLATE_PERSPECTIVE:
215 return INTERP_MODE_SMOOTH;
216 case TGSI_INTERPOLATE_COLOR:
217 return INTERP_MODE_SMOOTH;
218 default:
219 unreachable("bad TGSI interpolation mode");
220 }
221 }
222
223 static void
224 ttn_emit_declaration(struct ttn_compile *c)
225 {
226 nir_builder *b = &c->build;
227 struct tgsi_full_declaration *decl = &c->token->FullDeclaration;
228 unsigned array_size = decl->Range.Last - decl->Range.First + 1;
229 unsigned file = decl->Declaration.File;
230 unsigned i;
231
232 if (file == TGSI_FILE_TEMPORARY) {
233 if (decl->Declaration.Array) {
234 /* for arrays, we create variables instead of registers: */
235 nir_variable *var = rzalloc(b->shader, nir_variable);
236
237 var->type = glsl_array_type(glsl_vec4_type(), array_size, 0);
238 var->data.mode = nir_var_shader_temp;
239 var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID);
240
241 exec_list_push_tail(&b->shader->globals, &var->node);
242
243 for (i = 0; i < array_size; i++) {
244 /* point all the matching slots to the same var,
245 * with appropriate offset set, mostly just so
246 * we know what to do when tgsi does a non-indirect
247 * access
248 */
249 c->temp_regs[decl->Range.First + i].reg = NULL;
250 c->temp_regs[decl->Range.First + i].var = var;
251 c->temp_regs[decl->Range.First + i].offset = i;
252 }
253 } else {
254 for (i = 0; i < array_size; i++) {
255 nir_register *reg = nir_local_reg_create(b->impl);
256 reg->num_components = 4;
257 c->temp_regs[decl->Range.First + i].reg = reg;
258 c->temp_regs[decl->Range.First + i].var = NULL;
259 c->temp_regs[decl->Range.First + i].offset = 0;
260 }
261 }
262 } else if (file == TGSI_FILE_ADDRESS) {
263 c->addr_reg = nir_local_reg_create(b->impl);
264 c->addr_reg->num_components = 4;
265 } else if (file == TGSI_FILE_SYSTEM_VALUE) {
266 /* Nothing to record for system values. */
267 } else if (file == TGSI_FILE_BUFFER) {
268 /* Nothing to record for buffers. */
269 } else if (file == TGSI_FILE_IMAGE) {
270 /* Nothing to record for images. */
271 } else if (file == TGSI_FILE_SAMPLER) {
272 /* Nothing to record for samplers. */
273 } else if (file == TGSI_FILE_SAMPLER_VIEW) {
274 struct tgsi_declaration_sampler_view *sview = &decl->SamplerView;
275 nir_alu_type type;
276
277 assert((sview->ReturnTypeX == sview->ReturnTypeY) &&
278 (sview->ReturnTypeX == sview->ReturnTypeZ) &&
279 (sview->ReturnTypeX == sview->ReturnTypeW));
280
281 switch (sview->ReturnTypeX) {
282 case TGSI_RETURN_TYPE_SINT:
283 type = nir_type_int;
284 break;
285 case TGSI_RETURN_TYPE_UINT:
286 type = nir_type_uint;
287 break;
288 case TGSI_RETURN_TYPE_FLOAT:
289 default:
290 type = nir_type_float;
291 break;
292 }
293
294 for (i = 0; i < array_size; i++) {
295 c->samp_types[decl->Range.First + i] = type;
296 }
297 } else {
298 bool is_array = (array_size > 1);
299
300 assert(file == TGSI_FILE_INPUT ||
301 file == TGSI_FILE_OUTPUT ||
302 file == TGSI_FILE_CONSTANT);
303
304 /* nothing to do for UBOs: */
305 if ((file == TGSI_FILE_CONSTANT) && decl->Declaration.Dimension &&
306 decl->Dim.Index2D != 0) {
307 b->shader->info.num_ubos =
308 MAX2(b->shader->info.num_ubos, decl->Dim.Index2D);
309 return;
310 }
311
312 if ((file == TGSI_FILE_INPUT) || (file == TGSI_FILE_OUTPUT)) {
313 is_array = (is_array && decl->Declaration.Array &&
314 (decl->Array.ArrayID != 0));
315 }
316
317 for (i = 0; i < array_size; i++) {
318 unsigned idx = decl->Range.First + i;
319 nir_variable *var = rzalloc(b->shader, nir_variable);
320
321 var->data.driver_location = idx;
322
323 var->type = glsl_vec4_type();
324 if (is_array)
325 var->type = glsl_array_type(var->type, array_size, 0);
326
327 switch (file) {
328 case TGSI_FILE_INPUT:
329 var->data.read_only = true;
330 var->data.mode = nir_var_shader_in;
331 var->name = ralloc_asprintf(var, "in_%d", idx);
332
333 if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
334 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
335 var->type = glsl_bool_type();
336 if (c->cap_face_is_sysval) {
337 var->data.mode = nir_var_system_value;
338 var->data.location = SYSTEM_VALUE_FRONT_FACE;
339 } else {
340 var->data.location = VARYING_SLOT_FACE;
341 }
342 c->input_var_face = var;
343 } else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
344 if (c->cap_position_is_sysval) {
345 var->data.mode = nir_var_system_value;
346 var->data.location = SYSTEM_VALUE_FRAG_COORD;
347 } else {
348 var->data.location = VARYING_SLOT_POS;
349 }
350 c->input_var_position = var;
351 } else if (decl->Semantic.Name == TGSI_SEMANTIC_PCOORD) {
352 if (c->cap_point_is_sysval) {
353 var->data.mode = nir_var_system_value;
354 var->data.location = SYSTEM_VALUE_POINT_COORD;
355 } else {
356 var->data.location = VARYING_SLOT_PNTC;
357 }
358 c->input_var_point = var;
359 } else {
360 var->data.location =
361 tgsi_varying_semantic_to_slot(decl->Semantic.Name,
362 decl->Semantic.Index);
363 }
364 } else {
365 assert(!decl->Declaration.Semantic);
366 var->data.location = VERT_ATTRIB_GENERIC0 + idx;
367 }
368 var->data.index = 0;
369 var->data.interpolation =
370 ttn_translate_interp_mode(decl->Interp.Interpolate);
371
372 exec_list_push_tail(&b->shader->inputs, &var->node);
373 c->inputs[idx] = var;
374
375 for (int i = 0; i < array_size; i++)
376 b->shader->info.inputs_read |= 1 << (var->data.location + i);
377
378 break;
379 case TGSI_FILE_OUTPUT: {
380 int semantic_name = decl->Semantic.Name;
381 int semantic_index = decl->Semantic.Index;
382 /* Since we can't load from outputs in the IR, we make temporaries
383 * for the outputs and emit stores to the real outputs at the end of
384 * the shader.
385 */
386 nir_register *reg = nir_local_reg_create(b->impl);
387 reg->num_components = 4;
388 if (is_array)
389 reg->num_array_elems = array_size;
390
391 var->data.mode = nir_var_shader_out;
392 var->name = ralloc_asprintf(var, "out_%d", idx);
393 var->data.index = 0;
394 var->data.interpolation =
395 ttn_translate_interp_mode(decl->Interp.Interpolate);
396
397 if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
398 switch (semantic_name) {
399 case TGSI_SEMANTIC_COLOR: {
400 /* TODO tgsi loses some information, so we cannot
401 * actually differentiate here between DSB and MRT
402 * at this point. But so far no drivers using tgsi-
403 * to-nir support dual source blend:
404 */
405 bool dual_src_blend = false;
406 if (dual_src_blend && (semantic_index == 1)) {
407 var->data.location = FRAG_RESULT_DATA0;
408 var->data.index = 1;
409 } else {
410 if (c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
411 var->data.location = FRAG_RESULT_COLOR;
412 else
413 var->data.location = FRAG_RESULT_DATA0 + semantic_index;
414 }
415 break;
416 }
417 case TGSI_SEMANTIC_POSITION:
418 var->data.location = FRAG_RESULT_DEPTH;
419 var->type = glsl_float_type();
420 break;
421 default:
422 fprintf(stderr, "Bad TGSI semantic: %d/%d\n",
423 decl->Semantic.Name, decl->Semantic.Index);
424 abort();
425 }
426 } else {
427 var->data.location =
428 tgsi_varying_semantic_to_slot(semantic_name, semantic_index);
429 }
430
431 if (is_array) {
432 unsigned j;
433 for (j = 0; j < array_size; j++) {
434 c->output_regs[idx + j].offset = i + j;
435 c->output_regs[idx + j].reg = reg;
436 }
437 } else {
438 c->output_regs[idx].offset = i;
439 c->output_regs[idx].reg = reg;
440 }
441
442 exec_list_push_tail(&b->shader->outputs, &var->node);
443 c->outputs[idx] = var;
444
445 for (int i = 0; i < array_size; i++)
446 b->shader->info.outputs_written |= 1ull << (var->data.location + i);
447 }
448 break;
449 case TGSI_FILE_CONSTANT:
450 var->data.mode = nir_var_uniform;
451 var->name = ralloc_asprintf(var, "uniform_%d", idx);
452 var->data.location = idx;
453
454 exec_list_push_tail(&b->shader->uniforms, &var->node);
455 break;
456 default:
457 unreachable("bad declaration file");
458 return;
459 }
460
461 if (is_array)
462 break;
463 }
464
465 }
466 }
467
468 static void
469 ttn_emit_immediate(struct ttn_compile *c)
470 {
471 nir_builder *b = &c->build;
472 struct tgsi_full_immediate *tgsi_imm = &c->token->FullImmediate;
473 nir_load_const_instr *load_const;
474 int i;
475
476 load_const = nir_load_const_instr_create(b->shader, 4, 32);
477 c->imm_defs[c->next_imm] = &load_const->def;
478 c->next_imm++;
479
480 for (i = 0; i < load_const->def.num_components; i++)
481 load_const->value[i].u32 = tgsi_imm->u[i].Uint;
482
483 nir_builder_instr_insert(b, &load_const->instr);
484 }
485
486 static nir_ssa_def *
487 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect);
488
489 /* generate either a constant or indirect deref chain for accessing an
490 * array variable.
491 */
492 static nir_deref_instr *
493 ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset,
494 struct tgsi_ind_register *indirect)
495 {
496 nir_deref_instr *deref = nir_build_deref_var(&c->build, var);
497 nir_ssa_def *index = nir_imm_int(&c->build, offset);
498 if (indirect)
499 index = nir_iadd(&c->build, index, ttn_src_for_indirect(c, indirect));
500 return nir_build_deref_array(&c->build, deref, index);
501 }
502
503 /* Special case: Turn the frontface varying into a load of the
504 * frontface variable, and create the vector as required by TGSI.
505 */
506 static nir_ssa_def *
507 ttn_emulate_tgsi_front_face(struct ttn_compile *c)
508 {
509 nir_ssa_def *tgsi_frontface[4];
510
511 if (c->cap_face_is_sysval) {
512 /* When it's a system value, it should be an integer vector: (F, 0, 0, 1)
513 * F is 0xffffffff if front-facing, 0 if not.
514 */
515
516 nir_ssa_def *frontface = nir_load_front_face(&c->build, 1);
517
518 tgsi_frontface[0] = nir_bcsel(&c->build,
519 frontface,
520 nir_imm_int(&c->build, 0xffffffff),
521 nir_imm_int(&c->build, 0));
522 tgsi_frontface[1] = nir_imm_int(&c->build, 0);
523 tgsi_frontface[2] = nir_imm_int(&c->build, 0);
524 tgsi_frontface[3] = nir_imm_int(&c->build, 1);
525 } else {
526 /* When it's an input, it should be a float vector: (F, 0.0, 0.0, 1.0)
527 * F is positive if front-facing, negative if not.
528 */
529
530 assert(c->input_var_face);
531 nir_ssa_def *frontface = nir_load_var(&c->build, c->input_var_face);
532
533 tgsi_frontface[0] = nir_bcsel(&c->build,
534 frontface,
535 nir_imm_float(&c->build, 1.0),
536 nir_imm_float(&c->build, -1.0));
537 tgsi_frontface[1] = nir_imm_float(&c->build, 0.0);
538 tgsi_frontface[2] = nir_imm_float(&c->build, 0.0);
539 tgsi_frontface[3] = nir_imm_float(&c->build, 1.0);
540 }
541
542 return nir_vec(&c->build, tgsi_frontface, 4);
543 }
544
545 static nir_src
546 ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
547 struct tgsi_ind_register *indirect,
548 struct tgsi_dimension *dim,
549 struct tgsi_ind_register *dimind,
550 bool src_is_float)
551 {
552 nir_builder *b = &c->build;
553 nir_src src;
554
555 memset(&src, 0, sizeof(src));
556
557 switch (file) {
558 case TGSI_FILE_TEMPORARY:
559 if (c->temp_regs[index].var) {
560 unsigned offset = c->temp_regs[index].offset;
561 nir_variable *var = c->temp_regs[index].var;
562 nir_ssa_def *load = nir_load_deref(&c->build,
563 ttn_array_deref(c, var, offset, indirect));
564
565 src = nir_src_for_ssa(load);
566 } else {
567 assert(!indirect);
568 src.reg.reg = c->temp_regs[index].reg;
569 }
570 assert(!dim);
571 break;
572
573 case TGSI_FILE_ADDRESS:
574 src.reg.reg = c->addr_reg;
575 assert(!dim);
576 break;
577
578 case TGSI_FILE_IMMEDIATE:
579 src = nir_src_for_ssa(c->imm_defs[index]);
580 assert(!indirect);
581 assert(!dim);
582 break;
583
584 case TGSI_FILE_SYSTEM_VALUE: {
585 nir_intrinsic_op op;
586 nir_ssa_def *load;
587
588 assert(!indirect);
589 assert(!dim);
590
591 switch (c->scan->system_value_semantic_name[index]) {
592 case TGSI_SEMANTIC_VERTEXID_NOBASE:
593 op = nir_intrinsic_load_vertex_id_zero_base;
594 load = nir_load_vertex_id_zero_base(b);
595 break;
596 case TGSI_SEMANTIC_VERTEXID:
597 op = nir_intrinsic_load_vertex_id;
598 load = nir_load_vertex_id(b);
599 break;
600 case TGSI_SEMANTIC_BASEVERTEX:
601 op = nir_intrinsic_load_base_vertex;
602 load = nir_load_base_vertex(b);
603 break;
604 case TGSI_SEMANTIC_INSTANCEID:
605 op = nir_intrinsic_load_instance_id;
606 load = nir_load_instance_id(b);
607 break;
608 case TGSI_SEMANTIC_FACE:
609 assert(c->cap_face_is_sysval);
610 op = nir_intrinsic_load_front_face;
611 load = ttn_emulate_tgsi_front_face(c);
612 break;
613 case TGSI_SEMANTIC_POSITION:
614 assert(c->cap_position_is_sysval);
615 op = nir_intrinsic_load_frag_coord;
616 load = nir_load_frag_coord(b);
617 break;
618 case TGSI_SEMANTIC_PCOORD:
619 assert(c->cap_point_is_sysval);
620 op = nir_intrinsic_load_point_coord;
621 load = nir_load_point_coord(b);
622 break;
623 case TGSI_SEMANTIC_THREAD_ID:
624 op = nir_intrinsic_load_local_invocation_id;
625 load = nir_load_local_invocation_id(b);
626 break;
627 case TGSI_SEMANTIC_BLOCK_ID:
628 op = nir_intrinsic_load_work_group_id;
629 load = nir_load_work_group_id(b);
630 break;
631 default:
632 unreachable("bad system value");
633 }
634
635 src = nir_src_for_ssa(load);
636 b->shader->info.system_values_read |=
637 (1 << nir_system_value_from_intrinsic(op));
638
639 break;
640 }
641
642 case TGSI_FILE_INPUT:
643 if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
644 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_FACE) {
645 assert(!c->cap_face_is_sysval && c->input_var_face);
646 return nir_src_for_ssa(ttn_emulate_tgsi_front_face(c));
647 } else if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
648 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_POSITION) {
649 assert(!c->cap_position_is_sysval && c->input_var_position);
650 return nir_src_for_ssa(nir_load_var(&c->build, c->input_var_position));
651 } else if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
652 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_PCOORD) {
653 assert(!c->cap_point_is_sysval && c->input_var_point);
654 return nir_src_for_ssa(nir_load_var(&c->build, c->input_var_point));
655 } else {
656 /* Indirection on input arrays isn't supported by TTN. */
657 assert(!dim);
658 nir_deref_instr *deref = nir_build_deref_var(&c->build,
659 c->inputs[index]);
660 return nir_src_for_ssa(nir_load_deref(&c->build, deref));
661 }
662 break;
663
664 case TGSI_FILE_CONSTANT: {
665 nir_intrinsic_instr *load;
666 nir_intrinsic_op op;
667 unsigned srcn = 0;
668
669 if (dim && (dim->Index > 0 || dim->Indirect)) {
670 op = nir_intrinsic_load_ubo;
671 } else {
672 op = nir_intrinsic_load_uniform;
673 }
674
675 load = nir_intrinsic_instr_create(b->shader, op);
676 if (op == nir_intrinsic_load_uniform) {
677 nir_intrinsic_set_type(load, src_is_float ? nir_type_float :
678 nir_type_int);
679 }
680
681 load->num_components = 4;
682 if (dim && (dim->Index > 0 || dim->Indirect)) {
683 if (dimind) {
684 load->src[srcn] =
685 ttn_src_for_file_and_index(c, dimind->File, dimind->Index,
686 NULL, NULL, NULL, false);
687 } else {
688 /* UBOs start at index 1 in TGSI: */
689 load->src[srcn] =
690 nir_src_for_ssa(nir_imm_int(b, dim->Index - 1));
691 }
692 srcn++;
693 }
694
695 nir_ssa_def *offset;
696 if (op == nir_intrinsic_load_ubo) {
697 /* UBO loads don't have a base offset. */
698 offset = nir_imm_int(b, index);
699 if (indirect) {
700 offset = nir_iadd(b, offset, ttn_src_for_indirect(c, indirect));
701 }
702 /* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
703 offset = nir_ishl(b, offset, nir_imm_int(b, 4));
704 } else {
705 nir_intrinsic_set_base(load, index);
706 if (indirect) {
707 offset = ttn_src_for_indirect(c, indirect);
708 } else {
709 offset = nir_imm_int(b, 0);
710 }
711 }
712 load->src[srcn++] = nir_src_for_ssa(offset);
713
714 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
715 nir_builder_instr_insert(b, &load->instr);
716
717 src = nir_src_for_ssa(&load->dest.ssa);
718 break;
719 }
720
721 default:
722 unreachable("bad src file");
723 }
724
725
726 return src;
727 }
728
729 static nir_ssa_def *
730 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect)
731 {
732 nir_builder *b = &c->build;
733 nir_alu_src src;
734 memset(&src, 0, sizeof(src));
735 for (int i = 0; i < 4; i++)
736 src.swizzle[i] = indirect->Swizzle;
737 src.src = ttn_src_for_file_and_index(c,
738 indirect->File,
739 indirect->Index,
740 NULL, NULL, NULL,
741 false);
742 return nir_mov_alu(b, src, 1);
743 }
744
745 static nir_alu_dest
746 ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
747 {
748 struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
749 nir_alu_dest dest;
750 unsigned index = tgsi_dst->Index;
751
752 memset(&dest, 0, sizeof(dest));
753
754 if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
755 if (c->temp_regs[index].var) {
756 nir_register *reg;
757
758 /* this works, because TGSI will give us a base offset
759 * (in case of indirect index) that points back into
760 * the array. Access can be direct or indirect, we
761 * don't really care. Just create a one-shot dst reg
762 * that will get store_var'd back into the array var
763 * at the end of ttn_emit_instruction()
764 */
765 reg = nir_local_reg_create(c->build.impl);
766 reg->num_components = 4;
767 dest.dest.reg.reg = reg;
768 dest.dest.reg.base_offset = 0;
769 } else {
770 assert(!tgsi_dst->Indirect);
771 dest.dest.reg.reg = c->temp_regs[index].reg;
772 dest.dest.reg.base_offset = c->temp_regs[index].offset;
773 }
774 } else if (tgsi_dst->File == TGSI_FILE_OUTPUT) {
775 dest.dest.reg.reg = c->output_regs[index].reg;
776 dest.dest.reg.base_offset = c->output_regs[index].offset;
777 } else if (tgsi_dst->File == TGSI_FILE_ADDRESS) {
778 assert(index == 0);
779 dest.dest.reg.reg = c->addr_reg;
780 }
781
782 dest.write_mask = tgsi_dst->WriteMask;
783 dest.saturate = false;
784
785 if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) {
786 nir_src *indirect = ralloc(c->build.shader, nir_src);
787 *indirect = nir_src_for_ssa(ttn_src_for_indirect(c, &tgsi_fdst->Indirect));
788 dest.dest.reg.indirect = indirect;
789 }
790
791 return dest;
792 }
793
794 static nir_variable *
795 ttn_get_var(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
796 {
797 struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
798 unsigned index = tgsi_dst->Index;
799
800 if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
801 /* we should not have an indirect when there is no var! */
802 if (!c->temp_regs[index].var)
803 assert(!tgsi_dst->Indirect);
804 return c->temp_regs[index].var;
805 }
806
807 return NULL;
808 }
809
810 static nir_ssa_def *
811 ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc,
812 int src_idx)
813 {
814 nir_builder *b = &c->build;
815 struct tgsi_src_register *tgsi_src = &tgsi_fsrc->Register;
816 enum tgsi_opcode opcode = c->token->FullInstruction.Instruction.Opcode;
817 unsigned tgsi_src_type = tgsi_opcode_infer_src_type(opcode, src_idx);
818 bool src_is_float = (tgsi_src_type == TGSI_TYPE_FLOAT ||
819 tgsi_src_type == TGSI_TYPE_DOUBLE ||
820 tgsi_src_type == TGSI_TYPE_UNTYPED);
821 nir_alu_src src;
822
823 memset(&src, 0, sizeof(src));
824
825 if (tgsi_src->File == TGSI_FILE_NULL) {
826 return nir_imm_float(b, 0.0);
827 } else if (tgsi_src->File == TGSI_FILE_SAMPLER ||
828 tgsi_src->File == TGSI_FILE_IMAGE ||
829 tgsi_src->File == TGSI_FILE_BUFFER) {
830 /* Only the index of the resource gets used in texturing, and it will
831 * handle looking that up on its own instead of using the nir_alu_src.
832 */
833 assert(!tgsi_src->Indirect);
834 return NULL;
835 } else {
836 struct tgsi_ind_register *ind = NULL;
837 struct tgsi_dimension *dim = NULL;
838 struct tgsi_ind_register *dimind = NULL;
839 if (tgsi_src->Indirect)
840 ind = &tgsi_fsrc->Indirect;
841 if (tgsi_src->Dimension) {
842 dim = &tgsi_fsrc->Dimension;
843 if (dim->Indirect)
844 dimind = &tgsi_fsrc->DimIndirect;
845 }
846 src.src = ttn_src_for_file_and_index(c,
847 tgsi_src->File,
848 tgsi_src->Index,
849 ind, dim, dimind,
850 src_is_float);
851 }
852
853 src.swizzle[0] = tgsi_src->SwizzleX;
854 src.swizzle[1] = tgsi_src->SwizzleY;
855 src.swizzle[2] = tgsi_src->SwizzleZ;
856 src.swizzle[3] = tgsi_src->SwizzleW;
857
858 nir_ssa_def *def = nir_mov_alu(b, src, 4);
859
860 if (tgsi_type_is_64bit(tgsi_src_type))
861 def = nir_bitcast_vector(b, def, 64);
862
863 if (tgsi_src->Absolute) {
864 if (src_is_float)
865 def = nir_fabs(b, def);
866 else
867 def = nir_iabs(b, def);
868 }
869
870 if (tgsi_src->Negate) {
871 if (src_is_float)
872 def = nir_fneg(b, def);
873 else
874 def = nir_ineg(b, def);
875 }
876
877 return def;
878 }
879
880 static void
881 ttn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
882 nir_ssa_def *def, unsigned write_mask)
883 {
884 if (!(dest.write_mask & write_mask))
885 return;
886
887 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
888 mov->dest = dest;
889 mov->dest.write_mask &= write_mask;
890 mov->src[0].src = nir_src_for_ssa(def);
891 for (unsigned i = def->num_components; i < 4; i++)
892 mov->src[0].swizzle[i] = def->num_components - 1;
893 nir_builder_instr_insert(b, &mov->instr);
894 }
895
896 static void
897 ttn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
898 {
899 ttn_move_dest_masked(b, dest, def, TGSI_WRITEMASK_XYZW);
900 }
901
902 static void
903 ttn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, unsigned dest_bitsize,
904 nir_ssa_def **src)
905 {
906 nir_ssa_def *def = nir_build_alu_src_arr(b, op, src);
907 if (def->bit_size == 1)
908 def = nir_ineg(b, nir_b2i(b, def, dest_bitsize));
909 assert(def->bit_size == dest_bitsize);
910 if (dest_bitsize == 64) {
911 if (def->num_components > 2) {
912 /* 32 -> 64 bit conversion ops are supposed to only convert the first
913 * two components, and we need to truncate here to avoid creating a
914 * vec8 after bitcasting the destination.
915 */
916 def = nir_channels(b, def, 0x3);
917 }
918 def = nir_bitcast_vector(b, def, 32);
919 }
920 ttn_move_dest(b, dest, def);
921 }
922
923 static void
924 ttn_arl(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
925 {
926 ttn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
927 }
928
929 /* EXP - Approximate Exponential Base 2
930 * dst.x = 2^{\lfloor src.x\rfloor}
931 * dst.y = src.x - \lfloor src.x\rfloor
932 * dst.z = 2^{src.x}
933 * dst.w = 1.0
934 */
935 static void
936 ttn_exp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
937 {
938 nir_ssa_def *srcx = ttn_channel(b, src[0], X);
939
940 ttn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)),
941 TGSI_WRITEMASK_X);
942 ttn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)),
943 TGSI_WRITEMASK_Y);
944 ttn_move_dest_masked(b, dest, nir_fexp2(b, srcx), TGSI_WRITEMASK_Z);
945 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
946 }
947
948 /* LOG - Approximate Logarithm Base 2
949 * dst.x = \lfloor\log_2{|src.x|}\rfloor
950 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
951 * dst.z = \log_2{|src.x|}
952 * dst.w = 1.0
953 */
954 static void
955 ttn_log(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
956 {
957 nir_ssa_def *abs_srcx = nir_fabs(b, ttn_channel(b, src[0], X));
958 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
959
960 ttn_move_dest_masked(b, dest, nir_ffloor(b, log2), TGSI_WRITEMASK_X);
961 ttn_move_dest_masked(b, dest,
962 nir_fdiv(b, abs_srcx, nir_fexp2(b, nir_ffloor(b, log2))),
963 TGSI_WRITEMASK_Y);
964 ttn_move_dest_masked(b, dest, nir_flog2(b, abs_srcx), TGSI_WRITEMASK_Z);
965 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
966 }
967
968 /* DST - Distance Vector
969 * dst.x = 1.0
970 * dst.y = src0.y \times src1.y
971 * dst.z = src0.z
972 * dst.w = src1.w
973 */
974 static void
975 ttn_dst(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
976 {
977 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_X);
978 ttn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), TGSI_WRITEMASK_Y);
979 ttn_move_dest_masked(b, dest, nir_mov(b, src[0]), TGSI_WRITEMASK_Z);
980 ttn_move_dest_masked(b, dest, nir_mov(b, src[1]), TGSI_WRITEMASK_W);
981 }
982
983 /* LIT - Light Coefficients
984 * dst.x = 1.0
985 * dst.y = max(src.x, 0.0)
986 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
987 * dst.w = 1.0
988 */
989 static void
990 ttn_lit(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
991 {
992 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_XW);
993
994 ttn_move_dest_masked(b, dest, nir_fmax(b, ttn_channel(b, src[0], X),
995 nir_imm_float(b, 0.0)), TGSI_WRITEMASK_Y);
996
997 if (dest.write_mask & TGSI_WRITEMASK_Z) {
998 nir_ssa_def *src0_y = ttn_channel(b, src[0], Y);
999 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ttn_channel(b, src[0], W),
1000 nir_imm_float(b, 128.0)),
1001 nir_imm_float(b, -128.0));
1002 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
1003 wclamp);
1004
1005 ttn_move_dest_masked(b, dest,
1006 nir_bcsel(b,
1007 nir_flt(b,
1008 ttn_channel(b, src[0], X),
1009 nir_imm_float(b, 0.0)),
1010 nir_imm_float(b, 0.0),
1011 pow),
1012 TGSI_WRITEMASK_Z);
1013 }
1014 }
1015
1016 static void
1017 ttn_sle(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1018 {
1019 ttn_move_dest(b, dest, nir_sge(b, src[1], src[0]));
1020 }
1021
1022 static void
1023 ttn_sgt(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1024 {
1025 ttn_move_dest(b, dest, nir_slt(b, src[1], src[0]));
1026 }
1027
1028 static void
1029 ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1030 {
1031 ttn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
1032 }
1033
1034 static void
1035 ttn_dp3(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1036 {
1037 ttn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
1038 }
1039
1040 static void
1041 ttn_dp4(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1042 {
1043 ttn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
1044 }
1045
1046 static void
1047 ttn_umad(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1048 {
1049 ttn_move_dest(b, dest, nir_iadd(b, nir_imul(b, src[0], src[1]), src[2]));
1050 }
1051
1052 static void
1053 ttn_arr(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1054 {
1055 ttn_move_dest(b, dest, nir_f2i32(b, nir_fround_even(b, src[0])));
1056 }
1057
1058 static void
1059 ttn_cmp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1060 {
1061 ttn_move_dest(b, dest, nir_bcsel(b,
1062 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
1063 src[1], src[2]));
1064 }
1065
1066 static void
1067 ttn_ucmp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1068 {
1069 ttn_move_dest(b, dest, nir_bcsel(b,
1070 nir_ine(b, src[0], nir_imm_int(b, 0)),
1071 src[1], src[2]));
1072 }
1073
1074 static void
1075 ttn_kill(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1076 {
1077 nir_intrinsic_instr *discard =
1078 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard);
1079 nir_builder_instr_insert(b, &discard->instr);
1080 b->shader->info.fs.uses_discard = true;
1081 }
1082
1083 static void
1084 ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1085 {
1086 nir_ssa_def *cmp = nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)));
1087 nir_intrinsic_instr *discard =
1088 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
1089 discard->src[0] = nir_src_for_ssa(cmp);
1090 nir_builder_instr_insert(b, &discard->instr);
1091 b->shader->info.fs.uses_discard = true;
1092 }
1093
1094 static void
1095 ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
1096 {
1097 nir_builder *b = &c->build;
1098 nir_ssa_def *src_x = ttn_channel(b, src, X);
1099
1100 nir_if *if_stmt = nir_if_create(b->shader);
1101 if (is_uint) {
1102 /* equivalent to TGSI UIF, src is interpreted as integer */
1103 if_stmt->condition = nir_src_for_ssa(nir_ine(b, src_x, nir_imm_int(b, 0)));
1104 } else {
1105 /* equivalent to TGSI IF, src is interpreted as float */
1106 if_stmt->condition = nir_src_for_ssa(nir_fne(b, src_x, nir_imm_float(b, 0.0)));
1107 }
1108 nir_builder_cf_insert(b, &if_stmt->cf_node);
1109
1110 c->if_stack[c->if_stack_pos] = nir_after_cf_node(&if_stmt->cf_node);
1111 c->if_stack_pos++;
1112
1113 b->cursor = nir_after_cf_list(&if_stmt->then_list);
1114
1115 c->if_stack[c->if_stack_pos] = nir_after_cf_list(&if_stmt->else_list);
1116 c->if_stack_pos++;
1117 }
1118
1119 static void
1120 ttn_else(struct ttn_compile *c)
1121 {
1122 nir_builder *b = &c->build;
1123
1124 b->cursor = c->if_stack[c->if_stack_pos - 1];
1125 }
1126
1127 static void
1128 ttn_endif(struct ttn_compile *c)
1129 {
1130 nir_builder *b = &c->build;
1131
1132 c->if_stack_pos -= 2;
1133 b->cursor = c->if_stack[c->if_stack_pos];
1134 }
1135
1136 static void
1137 ttn_bgnloop(struct ttn_compile *c)
1138 {
1139 nir_builder *b = &c->build;
1140
1141 nir_loop *loop = nir_loop_create(b->shader);
1142 nir_builder_cf_insert(b, &loop->cf_node);
1143
1144 c->loop_stack[c->loop_stack_pos] = nir_after_cf_node(&loop->cf_node);
1145 c->loop_stack_pos++;
1146
1147 b->cursor = nir_after_cf_list(&loop->body);
1148 }
1149
1150 static void
1151 ttn_cont(nir_builder *b)
1152 {
1153 nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_continue);
1154 nir_builder_instr_insert(b, &instr->instr);
1155 }
1156
1157 static void
1158 ttn_brk(nir_builder *b)
1159 {
1160 nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_break);
1161 nir_builder_instr_insert(b, &instr->instr);
1162 }
1163
1164 static void
1165 ttn_endloop(struct ttn_compile *c)
1166 {
1167 nir_builder *b = &c->build;
1168
1169 c->loop_stack_pos--;
1170 b->cursor = c->loop_stack[c->loop_stack_pos];
1171 }
1172
1173 static void
1174 get_texture_info(unsigned texture,
1175 enum glsl_sampler_dim *dim,
1176 bool *is_shadow,
1177 bool *is_array)
1178 {
1179 assert(is_array);
1180 *is_array = false;
1181
1182 if (is_shadow)
1183 *is_shadow = false;
1184
1185 switch (texture) {
1186 case TGSI_TEXTURE_BUFFER:
1187 *dim = GLSL_SAMPLER_DIM_BUF;
1188 break;
1189 case TGSI_TEXTURE_1D:
1190 *dim = GLSL_SAMPLER_DIM_1D;
1191 break;
1192 case TGSI_TEXTURE_1D_ARRAY:
1193 *dim = GLSL_SAMPLER_DIM_1D;
1194 *is_array = true;
1195 break;
1196 case TGSI_TEXTURE_SHADOW1D:
1197 *dim = GLSL_SAMPLER_DIM_1D;
1198 *is_shadow = true;
1199 break;
1200 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1201 *dim = GLSL_SAMPLER_DIM_1D;
1202 *is_shadow = true;
1203 *is_array = true;
1204 break;
1205 case TGSI_TEXTURE_2D:
1206 *dim = GLSL_SAMPLER_DIM_2D;
1207 break;
1208 case TGSI_TEXTURE_2D_ARRAY:
1209 *dim = GLSL_SAMPLER_DIM_2D;
1210 *is_array = true;
1211 break;
1212 case TGSI_TEXTURE_2D_MSAA:
1213 *dim = GLSL_SAMPLER_DIM_MS;
1214 break;
1215 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1216 *dim = GLSL_SAMPLER_DIM_MS;
1217 *is_array = true;
1218 break;
1219 case TGSI_TEXTURE_SHADOW2D:
1220 *dim = GLSL_SAMPLER_DIM_2D;
1221 *is_shadow = true;
1222 break;
1223 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1224 *dim = GLSL_SAMPLER_DIM_2D;
1225 *is_shadow = true;
1226 *is_array = true;
1227 break;
1228 case TGSI_TEXTURE_3D:
1229 *dim = GLSL_SAMPLER_DIM_3D;
1230 break;
1231 case TGSI_TEXTURE_CUBE:
1232 *dim = GLSL_SAMPLER_DIM_CUBE;
1233 break;
1234 case TGSI_TEXTURE_CUBE_ARRAY:
1235 *dim = GLSL_SAMPLER_DIM_CUBE;
1236 *is_array = true;
1237 break;
1238 case TGSI_TEXTURE_SHADOWCUBE:
1239 *dim = GLSL_SAMPLER_DIM_CUBE;
1240 *is_shadow = true;
1241 break;
1242 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1243 *dim = GLSL_SAMPLER_DIM_CUBE;
1244 *is_shadow = true;
1245 *is_array = true;
1246 break;
1247 case TGSI_TEXTURE_RECT:
1248 *dim = GLSL_SAMPLER_DIM_RECT;
1249 break;
1250 case TGSI_TEXTURE_SHADOWRECT:
1251 *dim = GLSL_SAMPLER_DIM_RECT;
1252 *is_shadow = true;
1253 break;
1254 default:
1255 fprintf(stderr, "Unknown TGSI texture target %d\n", texture);
1256 abort();
1257 }
1258 }
1259
1260 static enum glsl_base_type
1261 base_type_for_alu_type(nir_alu_type type)
1262 {
1263 type = nir_alu_type_get_base_type(type);
1264
1265 switch (type) {
1266 case nir_type_float:
1267 return GLSL_TYPE_FLOAT;
1268 case nir_type_int:
1269 return GLSL_TYPE_INT;
1270 case nir_type_uint:
1271 return GLSL_TYPE_UINT;
1272 default:
1273 unreachable("invalid type");
1274 }
1275 }
1276
1277 static nir_variable *
1278 get_sampler_var(struct ttn_compile *c, int binding,
1279 enum glsl_sampler_dim dim,
1280 bool is_shadow,
1281 bool is_array,
1282 enum glsl_base_type base_type)
1283 {
1284 nir_variable *var = c->samplers[binding];
1285 if (!var) {
1286 const struct glsl_type *type =
1287 glsl_sampler_type(dim, is_shadow, is_array, base_type);
1288 var = nir_variable_create(c->build.shader, nir_var_uniform, type,
1289 "sampler");
1290 var->data.binding = binding;
1291 var->data.explicit_binding = true;
1292 c->samplers[binding] = var;
1293 }
1294
1295 return var;
1296 }
1297
1298 static nir_variable *
1299 get_image_var(struct ttn_compile *c, int binding,
1300 enum glsl_sampler_dim dim,
1301 bool is_array,
1302 enum glsl_base_type base_type,
1303 enum gl_access_qualifier access,
1304 GLenum format)
1305 {
1306 nir_variable *var = c->images[binding];
1307
1308 if (!var) {
1309 const struct glsl_type *type = glsl_image_type(dim, is_array, base_type);
1310
1311 var = nir_variable_create(c->build.shader, nir_var_uniform, type, "image");
1312 var->data.binding = binding;
1313 var->data.explicit_binding = true;
1314 var->data.image.access = access;
1315 var->data.image.format = format;
1316 c->images[binding] = var;
1317 }
1318
1319 return var;
1320 }
1321
1322 static void
1323 add_ssbo_var(struct ttn_compile *c, int binding)
1324 {
1325 nir_variable *var = c->ssbo[binding];
1326
1327 if (!var) {
1328 /* A length of 0 is used to denote unsized arrays */
1329 const struct glsl_type *type = glsl_array_type(glsl_uint_type(), 0, 0);
1330
1331 struct glsl_struct_field field = {
1332 .type = type,
1333 .name = "data",
1334 .location = -1,
1335 };
1336
1337 var = nir_variable_create(c->build.shader, nir_var_mem_ssbo, type, "ssbo");
1338 var->data.binding = binding;
1339 var->interface_type =
1340 glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430,
1341 false, "data");
1342 c->ssbo[binding] = var;
1343 }
1344 }
1345
1346 static void
1347 ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
1348 {
1349 nir_builder *b = &c->build;
1350 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1351 nir_tex_instr *instr;
1352 nir_texop op;
1353 unsigned num_srcs, samp = 1, sview, i;
1354
1355 switch (tgsi_inst->Instruction.Opcode) {
1356 case TGSI_OPCODE_TEX:
1357 op = nir_texop_tex;
1358 num_srcs = 1;
1359 break;
1360 case TGSI_OPCODE_TEX2:
1361 op = nir_texop_tex;
1362 num_srcs = 1;
1363 samp = 2;
1364 break;
1365 case TGSI_OPCODE_TXP:
1366 op = nir_texop_tex;
1367 num_srcs = 2;
1368 break;
1369 case TGSI_OPCODE_TXB:
1370 op = nir_texop_txb;
1371 num_srcs = 2;
1372 break;
1373 case TGSI_OPCODE_TXB2:
1374 op = nir_texop_txb;
1375 num_srcs = 2;
1376 samp = 2;
1377 break;
1378 case TGSI_OPCODE_TXL:
1379 op = nir_texop_txl;
1380 num_srcs = 2;
1381 break;
1382 case TGSI_OPCODE_TXL2:
1383 op = nir_texop_txl;
1384 num_srcs = 2;
1385 samp = 2;
1386 break;
1387 case TGSI_OPCODE_TXF:
1388 case TGSI_OPCODE_TXF_LZ:
1389 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
1390 tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) {
1391 op = nir_texop_txf_ms;
1392 } else {
1393 op = nir_texop_txf;
1394 }
1395 num_srcs = 2;
1396 break;
1397 case TGSI_OPCODE_TXD:
1398 op = nir_texop_txd;
1399 num_srcs = 3;
1400 samp = 3;
1401 break;
1402 case TGSI_OPCODE_LODQ:
1403 op = nir_texop_lod;
1404 num_srcs = 1;
1405 break;
1406
1407 default:
1408 fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode);
1409 abort();
1410 }
1411
1412 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
1413 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
1414 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
1415 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
1416 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
1417 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
1418 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1419 num_srcs++;
1420 }
1421
1422 /* Deref sources */
1423 num_srcs += 2;
1424
1425 num_srcs += tgsi_inst->Texture.NumOffsets;
1426
1427 instr = nir_tex_instr_create(b->shader, num_srcs);
1428 instr->op = op;
1429
1430 get_texture_info(tgsi_inst->Texture.Texture,
1431 &instr->sampler_dim, &instr->is_shadow, &instr->is_array);
1432
1433 switch (instr->sampler_dim) {
1434 case GLSL_SAMPLER_DIM_1D:
1435 case GLSL_SAMPLER_DIM_BUF:
1436 instr->coord_components = 1;
1437 break;
1438 case GLSL_SAMPLER_DIM_2D:
1439 case GLSL_SAMPLER_DIM_RECT:
1440 case GLSL_SAMPLER_DIM_EXTERNAL:
1441 case GLSL_SAMPLER_DIM_MS:
1442 instr->coord_components = 2;
1443 break;
1444 case GLSL_SAMPLER_DIM_3D:
1445 case GLSL_SAMPLER_DIM_CUBE:
1446 instr->coord_components = 3;
1447 break;
1448 case GLSL_SAMPLER_DIM_SUBPASS:
1449 case GLSL_SAMPLER_DIM_SUBPASS_MS:
1450 unreachable("invalid sampler_dim");
1451 }
1452
1453 if (instr->is_array)
1454 instr->coord_components++;
1455
1456 assert(tgsi_inst->Src[samp].Register.File == TGSI_FILE_SAMPLER);
1457
1458 /* TODO if we supported any opc's which take an explicit SVIEW
1459 * src, we would use that here instead. But for the "legacy"
1460 * texture opc's the SVIEW index is same as SAMP index:
1461 */
1462 sview = tgsi_inst->Src[samp].Register.Index;
1463
1464 if (op == nir_texop_lod) {
1465 instr->dest_type = nir_type_float;
1466 } else if (sview < c->num_samp_types) {
1467 instr->dest_type = c->samp_types[sview];
1468 } else {
1469 instr->dest_type = nir_type_float;
1470 }
1471
1472 nir_variable *var =
1473 get_sampler_var(c, sview, instr->sampler_dim,
1474 instr->is_shadow,
1475 instr->is_array,
1476 base_type_for_alu_type(instr->dest_type));
1477
1478 nir_deref_instr *deref = nir_build_deref_var(b, var);
1479
1480 unsigned src_number = 0;
1481
1482 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
1483 instr->src[src_number].src_type = nir_tex_src_texture_deref;
1484 src_number++;
1485 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
1486 instr->src[src_number].src_type = nir_tex_src_sampler_deref;
1487 src_number++;
1488
1489 instr->src[src_number].src =
1490 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
1491 instr->coord_components));
1492 instr->src[src_number].src_type = nir_tex_src_coord;
1493 src_number++;
1494
1495 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1496 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1497 instr->src[src_number].src_type = nir_tex_src_projector;
1498 src_number++;
1499 }
1500
1501 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB) {
1502 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1503 instr->src[src_number].src_type = nir_tex_src_bias;
1504 src_number++;
1505 }
1506
1507 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
1508 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1509 instr->src[src_number].src_type = nir_tex_src_bias;
1510 src_number++;
1511 }
1512
1513 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
1514 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1515 instr->src[src_number].src_type = nir_tex_src_lod;
1516 src_number++;
1517 }
1518
1519 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
1520 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1521 instr->src[src_number].src_type = nir_tex_src_lod;
1522 src_number++;
1523 }
1524
1525 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF ||
1526 tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF_LZ) {
1527 if (op == nir_texop_txf_ms) {
1528 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1529 instr->src[src_number].src_type = nir_tex_src_ms_index;
1530 } else {
1531 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF_LZ)
1532 instr->src[src_number].src = nir_src_for_ssa(nir_imm_int(b, 0));
1533 else
1534 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1535 instr->src[src_number].src_type = nir_tex_src_lod;
1536 }
1537 src_number++;
1538 }
1539
1540 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1541 instr->src[src_number].src_type = nir_tex_src_ddx;
1542 instr->src[src_number].src =
1543 nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W),
1544 nir_tex_instr_src_size(instr, src_number)));
1545 src_number++;
1546 instr->src[src_number].src_type = nir_tex_src_ddy;
1547 instr->src[src_number].src =
1548 nir_src_for_ssa(nir_swizzle(b, src[2], SWIZ(X, Y, Z, W),
1549 nir_tex_instr_src_size(instr, src_number)));
1550 src_number++;
1551 }
1552
1553 if (instr->is_shadow) {
1554 if (instr->coord_components == 4)
1555 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1556 else if (instr->coord_components == 3)
1557 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1558 else
1559 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
1560
1561 instr->src[src_number].src_type = nir_tex_src_comparator;
1562 src_number++;
1563 }
1564
1565 for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++) {
1566 struct tgsi_texture_offset *tex_offset = &tgsi_inst->TexOffsets[i];
1567 /* since TexOffset ins't using tgsi_full_src_register we get to
1568 * do some extra gymnastics:
1569 */
1570 nir_alu_src src;
1571
1572 memset(&src, 0, sizeof(src));
1573
1574 src.src = ttn_src_for_file_and_index(c,
1575 tex_offset->File,
1576 tex_offset->Index,
1577 NULL, NULL, NULL,
1578 true);
1579
1580 src.swizzle[0] = tex_offset->SwizzleX;
1581 src.swizzle[1] = tex_offset->SwizzleY;
1582 src.swizzle[2] = tex_offset->SwizzleZ;
1583 src.swizzle[3] = TGSI_SWIZZLE_W;
1584
1585 instr->src[src_number].src_type = nir_tex_src_offset;
1586 instr->src[src_number].src = nir_src_for_ssa(
1587 nir_mov_alu(b, src, nir_tex_instr_src_size(instr, src_number)));
1588 src_number++;
1589 }
1590
1591 assert(src_number == num_srcs);
1592 assert(src_number == instr->num_srcs);
1593
1594 nir_ssa_dest_init(&instr->instr, &instr->dest,
1595 nir_tex_instr_dest_size(instr),
1596 32, NULL);
1597 nir_builder_instr_insert(b, &instr->instr);
1598
1599 /* Resolve the writemask on the texture op. */
1600 ttn_move_dest(b, dest, &instr->dest.ssa);
1601 }
1602
1603 /* TGSI_OPCODE_TXQ is actually two distinct operations:
1604 *
1605 * dst.x = texture\_width(unit, lod)
1606 * dst.y = texture\_height(unit, lod)
1607 * dst.z = texture\_depth(unit, lod)
1608 * dst.w = texture\_levels(unit)
1609 *
1610 * dst.xyz map to NIR txs opcode, and dst.w maps to query_levels
1611 */
1612 static void
1613 ttn_txq(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
1614 {
1615 nir_builder *b = &c->build;
1616 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1617 nir_tex_instr *txs, *qlv;
1618
1619 txs = nir_tex_instr_create(b->shader, 2);
1620 txs->op = nir_texop_txs;
1621 get_texture_info(tgsi_inst->Texture.Texture,
1622 &txs->sampler_dim, &txs->is_shadow, &txs->is_array);
1623
1624 qlv = nir_tex_instr_create(b->shader, 1);
1625 qlv->op = nir_texop_query_levels;
1626 get_texture_info(tgsi_inst->Texture.Texture,
1627 &qlv->sampler_dim, &qlv->is_shadow, &qlv->is_array);
1628
1629 assert(tgsi_inst->Src[1].Register.File == TGSI_FILE_SAMPLER);
1630 int tex_index = tgsi_inst->Src[1].Register.Index;
1631
1632 nir_variable *var =
1633 get_sampler_var(c, tex_index, txs->sampler_dim,
1634 txs->is_shadow,
1635 txs->is_array,
1636 base_type_for_alu_type(txs->dest_type));
1637
1638 nir_deref_instr *deref = nir_build_deref_var(b, var);
1639
1640 txs->src[0].src = nir_src_for_ssa(&deref->dest.ssa);
1641 txs->src[0].src_type = nir_tex_src_texture_deref;
1642
1643 qlv->src[0].src = nir_src_for_ssa(&deref->dest.ssa);
1644 qlv->src[0].src_type = nir_tex_src_texture_deref;
1645
1646 /* lod: */
1647 txs->src[1].src = nir_src_for_ssa(ttn_channel(b, src[0], X));
1648 txs->src[1].src_type = nir_tex_src_lod;
1649
1650 nir_ssa_dest_init(&txs->instr, &txs->dest,
1651 nir_tex_instr_dest_size(txs), 32, NULL);
1652 nir_builder_instr_insert(b, &txs->instr);
1653
1654 nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, 32, NULL);
1655 nir_builder_instr_insert(b, &qlv->instr);
1656
1657 ttn_move_dest_masked(b, dest, &txs->dest.ssa, TGSI_WRITEMASK_XYZ);
1658 ttn_move_dest_masked(b, dest, &qlv->dest.ssa, TGSI_WRITEMASK_W);
1659 }
1660
1661 static enum glsl_base_type
1662 get_image_base_type(struct tgsi_full_instruction *tgsi_inst)
1663 {
1664 const struct util_format_description *desc =
1665 util_format_description(tgsi_inst->Memory.Format);
1666
1667 if (desc->channel[0].pure_integer) {
1668 if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
1669 return GLSL_TYPE_INT;
1670 else
1671 return GLSL_TYPE_UINT;
1672 }
1673 return GLSL_TYPE_FLOAT;
1674 }
1675
1676 static enum gl_access_qualifier
1677 get_mem_qualifier(struct tgsi_full_instruction *tgsi_inst)
1678 {
1679 enum gl_access_qualifier access = 0;
1680
1681 if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_COHERENT)
1682 access |= ACCESS_COHERENT;
1683 if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT)
1684 access |= ACCESS_RESTRICT;
1685 if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
1686 access |= ACCESS_VOLATILE;
1687 if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY)
1688 access |= ACCESS_STREAM_CACHE_POLICY;
1689
1690 return access;
1691 }
1692
1693 static GLenum
1694 get_image_format(struct tgsi_full_instruction *tgsi_inst)
1695 {
1696 switch (tgsi_inst->Memory.Format) {
1697 case PIPE_FORMAT_R8_UNORM:
1698 return GL_R8;
1699 case PIPE_FORMAT_R8G8_UNORM:
1700 return GL_RG8;
1701 case PIPE_FORMAT_R8G8B8A8_UNORM:
1702 return GL_RGBA8;
1703 case PIPE_FORMAT_R16_UNORM:
1704 return GL_R16;
1705 case PIPE_FORMAT_R16G16_UNORM:
1706 return GL_RG16;
1707 case PIPE_FORMAT_R16G16B16A16_UNORM:
1708 return GL_RGBA16;
1709
1710 case PIPE_FORMAT_R8_SNORM:
1711 return GL_R8_SNORM;
1712 case PIPE_FORMAT_R8G8_SNORM:
1713 return GL_RG8_SNORM;
1714 case PIPE_FORMAT_R8G8B8A8_SNORM:
1715 return GL_RGBA8_SNORM;
1716 case PIPE_FORMAT_R16_SNORM:
1717 return GL_R16_SNORM;
1718 case PIPE_FORMAT_R16G16_SNORM:
1719 return GL_RG16_SNORM;
1720 case PIPE_FORMAT_R16G16B16A16_SNORM:
1721 return GL_RGBA16_SNORM;
1722
1723 case PIPE_FORMAT_R8_UINT:
1724 return GL_R8UI;
1725 case PIPE_FORMAT_R8G8_UINT:
1726 return GL_RG8UI;
1727 case PIPE_FORMAT_R8G8B8A8_UINT:
1728 return GL_RGBA8UI;
1729 case PIPE_FORMAT_R16_UINT:
1730 return GL_R16UI;
1731 case PIPE_FORMAT_R16G16_UINT:
1732 return GL_RG16UI;
1733 case PIPE_FORMAT_R16G16B16A16_UINT:
1734 return GL_RGBA16UI;
1735 case PIPE_FORMAT_R32_UINT:
1736 return GL_R32UI;
1737 case PIPE_FORMAT_R32G32_UINT:
1738 return GL_RG32UI;
1739 case PIPE_FORMAT_R32G32B32A32_UINT:
1740 return GL_RGBA32UI;
1741
1742 case PIPE_FORMAT_R8_SINT:
1743 return GL_R8I;
1744 case PIPE_FORMAT_R8G8_SINT:
1745 return GL_RG8I;
1746 case PIPE_FORMAT_R8G8B8A8_SINT:
1747 return GL_RGBA8I;
1748 case PIPE_FORMAT_R16_SINT:
1749 return GL_R16I;
1750 case PIPE_FORMAT_R16G16_SINT:
1751 return GL_RG16I;
1752 case PIPE_FORMAT_R16G16B16A16_SINT:
1753 return GL_RGBA16I;
1754 case PIPE_FORMAT_R32_SINT:
1755 return GL_R32I;
1756 case PIPE_FORMAT_R32G32_SINT:
1757 return GL_RG32I;
1758 case PIPE_FORMAT_R32G32B32A32_SINT:
1759 return GL_RGBA32I;
1760
1761 case PIPE_FORMAT_R16_FLOAT:
1762 return GL_R16F;
1763 case PIPE_FORMAT_R16G16_FLOAT:
1764 return GL_RG16F;
1765 case PIPE_FORMAT_R16G16B16A16_FLOAT:
1766 return GL_RGBA16F;
1767 case PIPE_FORMAT_R32_FLOAT:
1768 return GL_R32F;
1769 case PIPE_FORMAT_R32G32_FLOAT:
1770 return GL_RG32F;
1771 case PIPE_FORMAT_R32G32B32A32_FLOAT:
1772 return GL_RGBA32F;
1773
1774 case PIPE_FORMAT_R11G11B10_FLOAT:
1775 return GL_R11F_G11F_B10F;
1776 case PIPE_FORMAT_R10G10B10A2_UINT:
1777 return GL_RGB10_A2UI;
1778 case PIPE_FORMAT_R10G10B10A2_UNORM:
1779 return GL_RGB10_A2;
1780
1781 default:
1782 unreachable("unhandled image format");
1783 }
1784 }
1785
1786 static void
1787 ttn_mem(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
1788 {
1789 nir_builder *b = &c->build;
1790 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1791 nir_intrinsic_instr *instr = NULL;
1792 unsigned resource_index, addr_src_index, file;
1793
1794 switch (tgsi_inst->Instruction.Opcode) {
1795 case TGSI_OPCODE_LOAD:
1796 assert(!tgsi_inst->Src[0].Register.Indirect);
1797 resource_index = tgsi_inst->Src[0].Register.Index;
1798 file = tgsi_inst->Src[0].Register.File;
1799 addr_src_index = 1;
1800 break;
1801 case TGSI_OPCODE_STORE:
1802 assert(!tgsi_inst->Dst[0].Register.Indirect);
1803 resource_index = tgsi_inst->Dst[0].Register.Index;
1804 file = tgsi_inst->Dst[0].Register.File;
1805 addr_src_index = 0;
1806 break;
1807 default:
1808 unreachable("unexpected memory opcode");
1809 }
1810
1811 if (file == TGSI_FILE_BUFFER) {
1812 nir_intrinsic_op op;
1813
1814 switch (tgsi_inst->Instruction.Opcode) {
1815 case TGSI_OPCODE_LOAD:
1816 op = nir_intrinsic_load_ssbo;
1817 break;
1818 case TGSI_OPCODE_STORE:
1819 op = nir_intrinsic_store_ssbo;
1820 break;
1821 }
1822
1823 add_ssbo_var(c, resource_index);
1824
1825 instr = nir_intrinsic_instr_create(b->shader, op);
1826 instr->num_components = util_last_bit(tgsi_inst->Dst[0].Register.WriteMask);
1827 nir_intrinsic_set_access(instr, get_mem_qualifier(tgsi_inst));
1828 nir_intrinsic_set_align(instr, 4, 0);
1829
1830 unsigned i = 0;
1831 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_STORE)
1832 instr->src[i++] = nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W),
1833 instr->num_components));
1834 instr->src[i++] = nir_src_for_ssa(nir_imm_int(b, resource_index));
1835 instr->src[i++] = nir_src_for_ssa(ttn_channel(b, src[addr_src_index], X));
1836
1837 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_STORE)
1838 nir_intrinsic_set_write_mask(instr, tgsi_inst->Dst[0].Register.WriteMask);
1839
1840 } else if (file == TGSI_FILE_IMAGE) {
1841 nir_intrinsic_op op;
1842
1843 switch (tgsi_inst->Instruction.Opcode) {
1844 case TGSI_OPCODE_LOAD:
1845 op = nir_intrinsic_image_deref_load;
1846 break;
1847 case TGSI_OPCODE_STORE:
1848 op = nir_intrinsic_image_deref_store;
1849 break;
1850 }
1851
1852 instr = nir_intrinsic_instr_create(b->shader, op);
1853
1854 /* Set the image variable dereference. */
1855 enum glsl_sampler_dim dim;
1856 bool is_array;
1857 get_texture_info(tgsi_inst->Memory.Texture, &dim, NULL, &is_array);
1858
1859 enum glsl_base_type base_type = get_image_base_type(tgsi_inst);
1860 enum gl_access_qualifier access = get_mem_qualifier(tgsi_inst);
1861 GLenum format = get_image_format(tgsi_inst);
1862
1863 nir_variable *image =
1864 get_image_var(c, resource_index,
1865 dim, is_array, base_type, access, format);
1866 nir_deref_instr *image_deref = nir_build_deref_var(b, image);
1867 const struct glsl_type *type = image_deref->type;
1868 unsigned coord_components = glsl_get_sampler_coordinate_components(type);
1869
1870 nir_intrinsic_set_access(instr, image_deref->var->data.image.access);
1871
1872 instr->src[0] = nir_src_for_ssa(&image_deref->dest.ssa);
1873 instr->src[1] = nir_src_for_ssa(nir_swizzle(b, src[addr_src_index],
1874 SWIZ(X, Y, Z, W),
1875 coord_components));
1876
1877 /* Set the sample argument, which is undefined for single-sample images. */
1878 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS) {
1879 instr->src[2] = nir_src_for_ssa(ttn_channel(b, src[addr_src_index], W));
1880 } else {
1881 instr->src[2] = nir_src_for_ssa(nir_ssa_undef(b, 1, 32));
1882 }
1883
1884 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_STORE) {
1885 instr->src[3] = nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W), 4));
1886 }
1887
1888 instr->num_components = 4;
1889 } else {
1890 unreachable("unexpected file");
1891 }
1892
1893
1894 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_LOAD) {
1895 nir_ssa_dest_init(&instr->instr, &instr->dest,
1896 util_last_bit(tgsi_inst->Dst[0].Register.WriteMask),
1897 32, NULL);
1898 nir_builder_instr_insert(b, &instr->instr);
1899 ttn_move_dest(b, dest, &instr->dest.ssa);
1900 } else {
1901 nir_builder_instr_insert(b, &instr->instr);
1902 }
1903 }
1904
1905 static const nir_op op_trans[TGSI_OPCODE_LAST] = {
1906 [TGSI_OPCODE_ARL] = 0,
1907 [TGSI_OPCODE_MOV] = nir_op_mov,
1908 [TGSI_OPCODE_LIT] = 0,
1909 [TGSI_OPCODE_RCP] = nir_op_frcp,
1910 [TGSI_OPCODE_RSQ] = nir_op_frsq,
1911 [TGSI_OPCODE_EXP] = 0,
1912 [TGSI_OPCODE_LOG] = 0,
1913 [TGSI_OPCODE_MUL] = nir_op_fmul,
1914 [TGSI_OPCODE_ADD] = nir_op_fadd,
1915 [TGSI_OPCODE_DP3] = 0,
1916 [TGSI_OPCODE_DP4] = 0,
1917 [TGSI_OPCODE_DST] = 0,
1918 [TGSI_OPCODE_MIN] = nir_op_fmin,
1919 [TGSI_OPCODE_MAX] = nir_op_fmax,
1920 [TGSI_OPCODE_SLT] = nir_op_slt,
1921 [TGSI_OPCODE_SGE] = nir_op_sge,
1922 [TGSI_OPCODE_MAD] = nir_op_ffma,
1923 [TGSI_OPCODE_LRP] = 0,
1924 [TGSI_OPCODE_SQRT] = nir_op_fsqrt,
1925 [TGSI_OPCODE_FRC] = nir_op_ffract,
1926 [TGSI_OPCODE_TXF_LZ] = 0,
1927 [TGSI_OPCODE_FLR] = nir_op_ffloor,
1928 [TGSI_OPCODE_ROUND] = nir_op_fround_even,
1929 [TGSI_OPCODE_EX2] = nir_op_fexp2,
1930 [TGSI_OPCODE_LG2] = nir_op_flog2,
1931 [TGSI_OPCODE_POW] = nir_op_fpow,
1932 [TGSI_OPCODE_COS] = nir_op_fcos,
1933 [TGSI_OPCODE_DDX] = nir_op_fddx,
1934 [TGSI_OPCODE_DDY] = nir_op_fddy,
1935 [TGSI_OPCODE_KILL] = 0,
1936 [TGSI_OPCODE_PK2H] = 0, /* XXX */
1937 [TGSI_OPCODE_PK2US] = 0, /* XXX */
1938 [TGSI_OPCODE_PK4B] = 0, /* XXX */
1939 [TGSI_OPCODE_PK4UB] = 0, /* XXX */
1940 [TGSI_OPCODE_SEQ] = nir_op_seq,
1941 [TGSI_OPCODE_SGT] = 0,
1942 [TGSI_OPCODE_SIN] = nir_op_fsin,
1943 [TGSI_OPCODE_SNE] = nir_op_sne,
1944 [TGSI_OPCODE_SLE] = 0,
1945 [TGSI_OPCODE_TEX] = 0,
1946 [TGSI_OPCODE_TXD] = 0,
1947 [TGSI_OPCODE_TXP] = 0,
1948 [TGSI_OPCODE_UP2H] = 0, /* XXX */
1949 [TGSI_OPCODE_UP2US] = 0, /* XXX */
1950 [TGSI_OPCODE_UP4B] = 0, /* XXX */
1951 [TGSI_OPCODE_UP4UB] = 0, /* XXX */
1952 [TGSI_OPCODE_ARR] = 0,
1953
1954 /* No function calls, yet. */
1955 [TGSI_OPCODE_CAL] = 0, /* XXX */
1956 [TGSI_OPCODE_RET] = 0, /* XXX */
1957
1958 [TGSI_OPCODE_SSG] = nir_op_fsign,
1959 [TGSI_OPCODE_CMP] = 0,
1960 [TGSI_OPCODE_TXB] = 0,
1961 [TGSI_OPCODE_DIV] = nir_op_fdiv,
1962 [TGSI_OPCODE_DP2] = 0,
1963 [TGSI_OPCODE_TXL] = 0,
1964
1965 [TGSI_OPCODE_BRK] = 0,
1966 [TGSI_OPCODE_IF] = 0,
1967 [TGSI_OPCODE_UIF] = 0,
1968 [TGSI_OPCODE_ELSE] = 0,
1969 [TGSI_OPCODE_ENDIF] = 0,
1970
1971 [TGSI_OPCODE_DDX_FINE] = nir_op_fddx_fine,
1972 [TGSI_OPCODE_DDY_FINE] = nir_op_fddy_fine,
1973
1974 [TGSI_OPCODE_CEIL] = nir_op_fceil,
1975 [TGSI_OPCODE_I2F] = nir_op_i2f32,
1976 [TGSI_OPCODE_NOT] = nir_op_inot,
1977 [TGSI_OPCODE_TRUNC] = nir_op_ftrunc,
1978 [TGSI_OPCODE_SHL] = nir_op_ishl,
1979 [TGSI_OPCODE_AND] = nir_op_iand,
1980 [TGSI_OPCODE_OR] = nir_op_ior,
1981 [TGSI_OPCODE_MOD] = nir_op_umod,
1982 [TGSI_OPCODE_XOR] = nir_op_ixor,
1983 [TGSI_OPCODE_TXF] = 0,
1984 [TGSI_OPCODE_TXQ] = 0,
1985
1986 [TGSI_OPCODE_CONT] = 0,
1987
1988 [TGSI_OPCODE_EMIT] = 0, /* XXX */
1989 [TGSI_OPCODE_ENDPRIM] = 0, /* XXX */
1990
1991 [TGSI_OPCODE_BGNLOOP] = 0,
1992 [TGSI_OPCODE_BGNSUB] = 0, /* XXX: no function calls */
1993 [TGSI_OPCODE_ENDLOOP] = 0,
1994 [TGSI_OPCODE_ENDSUB] = 0, /* XXX: no function calls */
1995
1996 [TGSI_OPCODE_NOP] = 0,
1997 [TGSI_OPCODE_FSEQ] = nir_op_feq,
1998 [TGSI_OPCODE_FSGE] = nir_op_fge,
1999 [TGSI_OPCODE_FSLT] = nir_op_flt,
2000 [TGSI_OPCODE_FSNE] = nir_op_fne,
2001
2002 [TGSI_OPCODE_KILL_IF] = 0,
2003
2004 [TGSI_OPCODE_END] = 0,
2005
2006 [TGSI_OPCODE_F2I] = nir_op_f2i32,
2007 [TGSI_OPCODE_IDIV] = nir_op_idiv,
2008 [TGSI_OPCODE_IMAX] = nir_op_imax,
2009 [TGSI_OPCODE_IMIN] = nir_op_imin,
2010 [TGSI_OPCODE_INEG] = nir_op_ineg,
2011 [TGSI_OPCODE_ISGE] = nir_op_ige,
2012 [TGSI_OPCODE_ISHR] = nir_op_ishr,
2013 [TGSI_OPCODE_ISLT] = nir_op_ilt,
2014 [TGSI_OPCODE_F2U] = nir_op_f2u32,
2015 [TGSI_OPCODE_U2F] = nir_op_u2f32,
2016 [TGSI_OPCODE_UADD] = nir_op_iadd,
2017 [TGSI_OPCODE_UDIV] = nir_op_udiv,
2018 [TGSI_OPCODE_UMAD] = 0,
2019 [TGSI_OPCODE_UMAX] = nir_op_umax,
2020 [TGSI_OPCODE_UMIN] = nir_op_umin,
2021 [TGSI_OPCODE_UMOD] = nir_op_umod,
2022 [TGSI_OPCODE_UMUL] = nir_op_imul,
2023 [TGSI_OPCODE_USEQ] = nir_op_ieq,
2024 [TGSI_OPCODE_USGE] = nir_op_uge,
2025 [TGSI_OPCODE_USHR] = nir_op_ushr,
2026 [TGSI_OPCODE_USLT] = nir_op_ult,
2027 [TGSI_OPCODE_USNE] = nir_op_ine,
2028
2029 [TGSI_OPCODE_SWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
2030 [TGSI_OPCODE_CASE] = 0, /* not emitted by glsl_to_tgsi.cpp */
2031 [TGSI_OPCODE_DEFAULT] = 0, /* not emitted by glsl_to_tgsi.cpp */
2032 [TGSI_OPCODE_ENDSWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
2033
2034 /* XXX: SAMPLE opcodes */
2035
2036 [TGSI_OPCODE_UARL] = nir_op_mov,
2037 [TGSI_OPCODE_UCMP] = 0,
2038 [TGSI_OPCODE_IABS] = nir_op_iabs,
2039 [TGSI_OPCODE_ISSG] = nir_op_isign,
2040
2041 [TGSI_OPCODE_LOAD] = 0,
2042 [TGSI_OPCODE_STORE] = 0,
2043
2044 /* XXX: atomics */
2045
2046 [TGSI_OPCODE_TEX2] = 0,
2047 [TGSI_OPCODE_TXB2] = 0,
2048 [TGSI_OPCODE_TXL2] = 0,
2049
2050 [TGSI_OPCODE_IMUL_HI] = nir_op_imul_high,
2051 [TGSI_OPCODE_UMUL_HI] = nir_op_umul_high,
2052
2053 [TGSI_OPCODE_TG4] = 0,
2054 [TGSI_OPCODE_LODQ] = 0,
2055
2056 [TGSI_OPCODE_IBFE] = nir_op_ibitfield_extract,
2057 [TGSI_OPCODE_UBFE] = nir_op_ubitfield_extract,
2058 [TGSI_OPCODE_BFI] = nir_op_bitfield_insert,
2059 [TGSI_OPCODE_BREV] = nir_op_bitfield_reverse,
2060 [TGSI_OPCODE_POPC] = nir_op_bit_count,
2061 [TGSI_OPCODE_LSB] = nir_op_find_lsb,
2062 [TGSI_OPCODE_IMSB] = nir_op_ifind_msb,
2063 [TGSI_OPCODE_UMSB] = nir_op_ufind_msb,
2064
2065 [TGSI_OPCODE_INTERP_CENTROID] = 0, /* XXX */
2066 [TGSI_OPCODE_INTERP_SAMPLE] = 0, /* XXX */
2067 [TGSI_OPCODE_INTERP_OFFSET] = 0, /* XXX */
2068
2069 [TGSI_OPCODE_F2D] = nir_op_f2f64,
2070 [TGSI_OPCODE_D2F] = nir_op_f2f32,
2071 [TGSI_OPCODE_DMUL] = nir_op_fmul,
2072 [TGSI_OPCODE_D2U] = nir_op_f2u32,
2073 [TGSI_OPCODE_U2D] = nir_op_u2f64,
2074
2075 [TGSI_OPCODE_U64ADD] = nir_op_iadd,
2076 [TGSI_OPCODE_U64MUL] = nir_op_imul,
2077 [TGSI_OPCODE_U64DIV] = nir_op_udiv,
2078 [TGSI_OPCODE_U64SNE] = nir_op_ine,
2079 };
2080
2081 static void
2082 ttn_emit_instruction(struct ttn_compile *c)
2083 {
2084 nir_builder *b = &c->build;
2085 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
2086 unsigned i;
2087 unsigned tgsi_op = tgsi_inst->Instruction.Opcode;
2088 struct tgsi_full_dst_register *tgsi_dst = &tgsi_inst->Dst[0];
2089
2090 if (tgsi_op == TGSI_OPCODE_END)
2091 return;
2092
2093 nir_ssa_def *src[TGSI_FULL_MAX_SRC_REGISTERS];
2094 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
2095 src[i] = ttn_get_src(c, &tgsi_inst->Src[i], i);
2096 }
2097 nir_alu_dest dest = ttn_get_dest(c, tgsi_dst);
2098
2099 unsigned tgsi_dst_type = tgsi_opcode_infer_dst_type(tgsi_op, 0);
2100
2101 /* The destination bitsize of the NIR opcode (not TGSI, where it's always
2102 * 32 bits). This needs to be passed into ttn_alu() because it can't be
2103 * inferred for comparison opcodes.
2104 */
2105 unsigned dst_bitsize = tgsi_type_is_64bit(tgsi_dst_type) ? 64 : 32;
2106
2107 switch (tgsi_op) {
2108 case TGSI_OPCODE_RSQ:
2109 ttn_move_dest(b, dest, nir_frsq(b, ttn_channel(b, src[0], X)));
2110 break;
2111
2112 case TGSI_OPCODE_SQRT:
2113 ttn_move_dest(b, dest, nir_fsqrt(b, ttn_channel(b, src[0], X)));
2114 break;
2115
2116 case TGSI_OPCODE_RCP:
2117 ttn_move_dest(b, dest, nir_frcp(b, ttn_channel(b, src[0], X)));
2118 break;
2119
2120 case TGSI_OPCODE_EX2:
2121 ttn_move_dest(b, dest, nir_fexp2(b, ttn_channel(b, src[0], X)));
2122 break;
2123
2124 case TGSI_OPCODE_LG2:
2125 ttn_move_dest(b, dest, nir_flog2(b, ttn_channel(b, src[0], X)));
2126 break;
2127
2128 case TGSI_OPCODE_POW:
2129 ttn_move_dest(b, dest, nir_fpow(b,
2130 ttn_channel(b, src[0], X),
2131 ttn_channel(b, src[1], X)));
2132 break;
2133
2134 case TGSI_OPCODE_COS:
2135 ttn_move_dest(b, dest, nir_fcos(b, ttn_channel(b, src[0], X)));
2136 break;
2137
2138 case TGSI_OPCODE_SIN:
2139 ttn_move_dest(b, dest, nir_fsin(b, ttn_channel(b, src[0], X)));
2140 break;
2141
2142 case TGSI_OPCODE_ARL:
2143 ttn_arl(b, op_trans[tgsi_op], dest, src);
2144 break;
2145
2146 case TGSI_OPCODE_EXP:
2147 ttn_exp(b, op_trans[tgsi_op], dest, src);
2148 break;
2149
2150 case TGSI_OPCODE_LOG:
2151 ttn_log(b, op_trans[tgsi_op], dest, src);
2152 break;
2153
2154 case TGSI_OPCODE_DST:
2155 ttn_dst(b, op_trans[tgsi_op], dest, src);
2156 break;
2157
2158 case TGSI_OPCODE_LIT:
2159 ttn_lit(b, op_trans[tgsi_op], dest, src);
2160 break;
2161
2162 case TGSI_OPCODE_DP2:
2163 ttn_dp2(b, op_trans[tgsi_op], dest, src);
2164 break;
2165
2166 case TGSI_OPCODE_DP3:
2167 ttn_dp3(b, op_trans[tgsi_op], dest, src);
2168 break;
2169
2170 case TGSI_OPCODE_DP4:
2171 ttn_dp4(b, op_trans[tgsi_op], dest, src);
2172 break;
2173
2174 case TGSI_OPCODE_UMAD:
2175 ttn_umad(b, op_trans[tgsi_op], dest, src);
2176 break;
2177
2178 case TGSI_OPCODE_LRP:
2179 ttn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
2180 break;
2181
2182 case TGSI_OPCODE_KILL:
2183 ttn_kill(b, op_trans[tgsi_op], dest, src);
2184 break;
2185
2186 case TGSI_OPCODE_ARR:
2187 ttn_arr(b, op_trans[tgsi_op], dest, src);
2188 break;
2189
2190 case TGSI_OPCODE_CMP:
2191 ttn_cmp(b, op_trans[tgsi_op], dest, src);
2192 break;
2193
2194 case TGSI_OPCODE_UCMP:
2195 ttn_ucmp(b, op_trans[tgsi_op], dest, src);
2196 break;
2197
2198 case TGSI_OPCODE_SGT:
2199 ttn_sgt(b, op_trans[tgsi_op], dest, src);
2200 break;
2201
2202 case TGSI_OPCODE_SLE:
2203 ttn_sle(b, op_trans[tgsi_op], dest, src);
2204 break;
2205
2206 case TGSI_OPCODE_KILL_IF:
2207 ttn_kill_if(b, op_trans[tgsi_op], dest, src);
2208 break;
2209
2210 case TGSI_OPCODE_TEX:
2211 case TGSI_OPCODE_TXP:
2212 case TGSI_OPCODE_TXL:
2213 case TGSI_OPCODE_TXB:
2214 case TGSI_OPCODE_TXD:
2215 case TGSI_OPCODE_TEX2:
2216 case TGSI_OPCODE_TXL2:
2217 case TGSI_OPCODE_TXB2:
2218 case TGSI_OPCODE_TXF:
2219 case TGSI_OPCODE_TXF_LZ:
2220 case TGSI_OPCODE_TG4:
2221 case TGSI_OPCODE_LODQ:
2222 ttn_tex(c, dest, src);
2223 break;
2224
2225 case TGSI_OPCODE_TXQ:
2226 ttn_txq(c, dest, src);
2227 break;
2228
2229 case TGSI_OPCODE_LOAD:
2230 case TGSI_OPCODE_STORE:
2231 ttn_mem(c, dest, src);
2232 break;
2233
2234 case TGSI_OPCODE_NOP:
2235 break;
2236
2237 case TGSI_OPCODE_IF:
2238 ttn_if(c, src[0], false);
2239 break;
2240
2241 case TGSI_OPCODE_UIF:
2242 ttn_if(c, src[0], true);
2243 break;
2244
2245 case TGSI_OPCODE_ELSE:
2246 ttn_else(c);
2247 break;
2248
2249 case TGSI_OPCODE_ENDIF:
2250 ttn_endif(c);
2251 break;
2252
2253 case TGSI_OPCODE_BGNLOOP:
2254 ttn_bgnloop(c);
2255 break;
2256
2257 case TGSI_OPCODE_BRK:
2258 ttn_brk(b);
2259 break;
2260
2261 case TGSI_OPCODE_CONT:
2262 ttn_cont(b);
2263 break;
2264
2265 case TGSI_OPCODE_ENDLOOP:
2266 ttn_endloop(c);
2267 break;
2268
2269 default:
2270 if (op_trans[tgsi_op] != 0 || tgsi_op == TGSI_OPCODE_MOV) {
2271 ttn_alu(b, op_trans[tgsi_op], dest, dst_bitsize, src);
2272 } else {
2273 fprintf(stderr, "unknown TGSI opcode: %s\n",
2274 tgsi_get_opcode_name(tgsi_op));
2275 abort();
2276 }
2277 break;
2278 }
2279
2280 if (tgsi_inst->Instruction.Saturate) {
2281 assert(!dest.dest.is_ssa);
2282 ttn_move_dest(b, dest, nir_fsat(b, ttn_src_for_dest(b, &dest)));
2283 }
2284
2285 /* if the dst has a matching var, append store_var to move
2286 * output from reg to var
2287 */
2288 nir_variable *var = ttn_get_var(c, tgsi_dst);
2289 if (var) {
2290 unsigned index = tgsi_dst->Register.Index;
2291 unsigned offset = c->temp_regs[index].offset;
2292 struct tgsi_ind_register *indirect = tgsi_dst->Register.Indirect ?
2293 &tgsi_dst->Indirect : NULL;
2294 nir_src val = nir_src_for_reg(dest.dest.reg.reg);
2295 nir_store_deref(b, ttn_array_deref(c, var, offset, indirect),
2296 nir_ssa_for_src(b, val, 4), dest.write_mask);
2297 }
2298 }
2299
2300 /**
2301 * Puts a NIR intrinsic to store of each TGSI_FILE_OUTPUT value to the output
2302 * variables at the end of the shader.
2303 *
2304 * We don't generate these incrementally as the TGSI_FILE_OUTPUT values are
2305 * written, because there's no output load intrinsic, which means we couldn't
2306 * handle writemasks.
2307 */
2308 static void
2309 ttn_add_output_stores(struct ttn_compile *c)
2310 {
2311 nir_builder *b = &c->build;
2312
2313 for (int i = 0; i < c->build.shader->num_outputs; i++) {
2314 nir_variable *var = c->outputs[i];
2315 if (!var)
2316 continue;
2317
2318 nir_src src = nir_src_for_reg(c->output_regs[i].reg);
2319 src.reg.base_offset = c->output_regs[i].offset;
2320
2321 nir_ssa_def *store_value = nir_ssa_for_src(b, src, 4);
2322 if (c->build.shader->info.stage == MESA_SHADER_FRAGMENT &&
2323 var->data.location == FRAG_RESULT_DEPTH) {
2324 /* TGSI uses TGSI_SEMANTIC_POSITION.z for the depth output, while
2325 * NIR uses a single float FRAG_RESULT_DEPTH.
2326 */
2327 store_value = nir_channel(b, store_value, 2);
2328 }
2329
2330 nir_store_deref(b, nir_build_deref_var(b, var), store_value,
2331 (1 << store_value->num_components) - 1);
2332 }
2333 }
2334
2335 /**
2336 * Parses the given TGSI tokens.
2337 */
2338 static void
2339 ttn_parse_tgsi(struct ttn_compile *c, const void *tgsi_tokens)
2340 {
2341 struct tgsi_parse_context parser;
2342 int ret;
2343
2344 ret = tgsi_parse_init(&parser, tgsi_tokens);
2345 assert(ret == TGSI_PARSE_OK);
2346
2347 while (!tgsi_parse_end_of_tokens(&parser)) {
2348 tgsi_parse_token(&parser);
2349 c->token = &parser.FullToken;
2350
2351 switch (parser.FullToken.Token.Type) {
2352 case TGSI_TOKEN_TYPE_DECLARATION:
2353 ttn_emit_declaration(c);
2354 break;
2355
2356 case TGSI_TOKEN_TYPE_INSTRUCTION:
2357 ttn_emit_instruction(c);
2358 break;
2359
2360 case TGSI_TOKEN_TYPE_IMMEDIATE:
2361 ttn_emit_immediate(c);
2362 break;
2363 }
2364 }
2365
2366 tgsi_parse_free(&parser);
2367 }
2368
2369 static void
2370 ttn_read_pipe_caps(struct ttn_compile *c,
2371 struct pipe_screen *screen)
2372 {
2373 c->cap_scalar = screen->get_shader_param(screen, c->scan->processor, PIPE_SHADER_CAP_SCALAR_ISA);
2374 c->cap_packed_uniforms = screen->get_param(screen, PIPE_CAP_PACKED_UNIFORMS);
2375 c->cap_samplers_as_deref = screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF);
2376 c->cap_face_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL);
2377 c->cap_position_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL);
2378 c->cap_point_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL);
2379 }
2380
2381 /**
2382 * Initializes a TGSI-to-NIR compiler.
2383 */
2384 static struct ttn_compile *
2385 ttn_compile_init(const void *tgsi_tokens,
2386 const nir_shader_compiler_options *options,
2387 struct pipe_screen *screen)
2388 {
2389 struct ttn_compile *c;
2390 struct nir_shader *s;
2391 struct tgsi_shader_info scan;
2392
2393 assert(options || screen);
2394 c = rzalloc(NULL, struct ttn_compile);
2395
2396 tgsi_scan_shader(tgsi_tokens, &scan);
2397 c->scan = &scan;
2398
2399 if (!options) {
2400 options =
2401 screen->get_compiler_options(screen, PIPE_SHADER_IR_NIR, scan.processor);
2402 }
2403
2404 nir_builder_init_simple_shader(&c->build, NULL,
2405 tgsi_processor_to_shader_stage(scan.processor),
2406 options);
2407
2408 s = c->build.shader;
2409
2410 if (screen) {
2411 ttn_read_pipe_caps(c, screen);
2412 } else {
2413 /* TTN used to be hard coded to always make FACE a sysval,
2414 * so it makes sense to preserve that behavior so users don't break. */
2415 c->cap_face_is_sysval = true;
2416 }
2417
2418 if (s->info.stage == MESA_SHADER_FRAGMENT)
2419 s->info.fs.untyped_color_outputs = true;
2420
2421 s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
2422 s->num_uniforms = scan.const_file_max[0] + 1;
2423 s->num_outputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
2424
2425 for (unsigned i = 0; i < TGSI_PROPERTY_COUNT; i++) {
2426 unsigned value = scan.properties[i];
2427
2428 switch (i) {
2429 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2430 break; /* handled in ttn_emit_declaration */
2431 case TGSI_PROPERTY_FS_COORD_ORIGIN:
2432 s->info.fs.origin_upper_left = value == TGSI_FS_COORD_ORIGIN_UPPER_LEFT;
2433 break;
2434 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
2435 s->info.fs.pixel_center_integer = value == TGSI_FS_COORD_PIXEL_CENTER_INTEGER;
2436 break;
2437 case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
2438 s->info.fs.depth_layout = ttn_get_depth_layout(value);
2439 break;
2440 case TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION:
2441 s->info.vs.window_space_position = value;
2442 break;
2443 case TGSI_PROPERTY_NEXT_SHADER:
2444 s->info.next_stage = tgsi_processor_to_shader_stage(value);
2445 break;
2446 case TGSI_PROPERTY_VS_BLIT_SGPRS_AMD:
2447 s->info.vs.blit_sgprs_amd = value;
2448 break;
2449 case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
2450 s->info.cs.local_size[0] = value;
2451 break;
2452 case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
2453 s->info.cs.local_size[1] = value;
2454 break;
2455 case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
2456 s->info.cs.local_size[2] = value;
2457 break;
2458 case TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD:
2459 s->info.cs.user_data_components_amd = value;
2460 break;
2461 default:
2462 if (value) {
2463 fprintf(stderr, "tgsi_to_nir: unhandled TGSI property %u = %u\n",
2464 i, value);
2465 unreachable("unhandled TGSI property");
2466 }
2467 }
2468 }
2469
2470 if (s->info.stage == MESA_SHADER_COMPUTE &&
2471 (!s->info.cs.local_size[0] ||
2472 !s->info.cs.local_size[1] ||
2473 !s->info.cs.local_size[2]))
2474 s->info.cs.local_size_variable = true;
2475
2476 c->inputs = rzalloc_array(c, struct nir_variable *, s->num_inputs);
2477 c->outputs = rzalloc_array(c, struct nir_variable *, s->num_outputs);
2478
2479 c->output_regs = rzalloc_array(c, struct ttn_reg_info,
2480 scan.file_max[TGSI_FILE_OUTPUT] + 1);
2481 c->temp_regs = rzalloc_array(c, struct ttn_reg_info,
2482 scan.file_max[TGSI_FILE_TEMPORARY] + 1);
2483 c->imm_defs = rzalloc_array(c, nir_ssa_def *,
2484 scan.file_max[TGSI_FILE_IMMEDIATE] + 1);
2485
2486 c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2487 c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types);
2488
2489 c->if_stack = rzalloc_array(c, nir_cursor,
2490 (scan.opcode_count[TGSI_OPCODE_IF] +
2491 scan.opcode_count[TGSI_OPCODE_UIF]) * 2);
2492 c->loop_stack = rzalloc_array(c, nir_cursor,
2493 scan.opcode_count[TGSI_OPCODE_BGNLOOP]);
2494
2495
2496 ttn_parse_tgsi(c, tgsi_tokens);
2497 ttn_add_output_stores(c);
2498
2499 nir_validate_shader(c->build.shader, "TTN: after parsing TGSI and creating the NIR shader");
2500
2501 return c;
2502 }
2503
2504 static void
2505 ttn_optimize_nir(nir_shader *nir, bool scalar)
2506 {
2507 bool progress;
2508 do {
2509 progress = false;
2510
2511 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2512
2513 if (scalar) {
2514 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL);
2515 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
2516 }
2517
2518 NIR_PASS_V(nir, nir_lower_alu);
2519 NIR_PASS_V(nir, nir_lower_pack);
2520 NIR_PASS(progress, nir, nir_copy_prop);
2521 NIR_PASS(progress, nir, nir_opt_remove_phis);
2522 NIR_PASS(progress, nir, nir_opt_dce);
2523
2524 if (nir_opt_trivial_continues(nir)) {
2525 progress = true;
2526 NIR_PASS(progress, nir, nir_copy_prop);
2527 NIR_PASS(progress, nir, nir_opt_dce);
2528 }
2529
2530 NIR_PASS(progress, nir, nir_opt_if, false);
2531 NIR_PASS(progress, nir, nir_opt_dead_cf);
2532 NIR_PASS(progress, nir, nir_opt_cse);
2533 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
2534
2535 NIR_PASS(progress, nir, nir_opt_algebraic);
2536 NIR_PASS(progress, nir, nir_opt_constant_folding);
2537
2538 NIR_PASS(progress, nir, nir_opt_undef);
2539 NIR_PASS(progress, nir, nir_opt_conditional_discard);
2540
2541 if (nir->options->max_unroll_iterations) {
2542 NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
2543 }
2544
2545 } while (progress);
2546
2547 }
2548
2549 /**
2550 * Finalizes the NIR in a similar way as st_glsl_to_nir does.
2551 *
2552 * Drivers expect that these passes are already performed,
2553 * so we have to do it here too.
2554 */
2555 static void
2556 ttn_finalize_nir(struct ttn_compile *c)
2557 {
2558 struct nir_shader *nir = c->build.shader;
2559
2560 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2561 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
2562
2563 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
2564 NIR_PASS_V(nir, nir_split_var_copies);
2565 NIR_PASS_V(nir, nir_lower_var_copies);
2566 NIR_PASS_V(nir, nir_lower_system_values);
2567
2568 if (c->cap_packed_uniforms)
2569 NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16);
2570
2571 if (c->cap_samplers_as_deref)
2572 NIR_PASS_V(nir, gl_nir_lower_samplers_as_deref, NULL);
2573 else
2574 NIR_PASS_V(nir, gl_nir_lower_samplers, NULL);
2575
2576 ttn_optimize_nir(nir, c->cap_scalar);
2577 nir_shader_gather_info(nir, c->build.impl);
2578 nir_validate_shader(nir, "TTN: after all optimizations");
2579 }
2580
2581 struct nir_shader *
2582 tgsi_to_nir(const void *tgsi_tokens,
2583 struct pipe_screen *screen)
2584 {
2585 struct ttn_compile *c;
2586 struct nir_shader *s;
2587
2588 c = ttn_compile_init(tgsi_tokens, NULL, screen);
2589 s = c->build.shader;
2590 ttn_finalize_nir(c);
2591 ralloc_free(c);
2592
2593 return s;
2594 }
2595
2596 struct nir_shader *
2597 tgsi_to_nir_noscreen(const void *tgsi_tokens,
2598 const nir_shader_compiler_options *options)
2599 {
2600 struct ttn_compile *c;
2601 struct nir_shader *s;
2602
2603 c = ttn_compile_init(tgsi_tokens, options, NULL);
2604 s = c->build.shader;
2605 ralloc_free(c);
2606
2607 return s;
2608 }
2609