tgsi_to_nir: implement a few needed 64-bit integer opcodes
[mesa.git] / src / gallium / auxiliary / nir / tgsi_to_nir.c
1 /*
2 * Copyright © 2014-2015 Broadcom
3 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include "util/ralloc.h"
26 #include "pipe/p_screen.h"
27
28 #include "compiler/nir/nir.h"
29 #include "compiler/nir/nir_control_flow.h"
30 #include "compiler/nir/nir_builder.h"
31 #include "compiler/glsl/gl_nir.h"
32 #include "compiler/glsl/list.h"
33 #include "compiler/shader_enums.h"
34
35 #include "tgsi_to_nir.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "tgsi/tgsi_info.h"
39 #include "tgsi/tgsi_scan.h"
40 #include "tgsi/tgsi_from_mesa.h"
41
42 #define SWIZ(X, Y, Z, W) (unsigned[4]){ \
43 TGSI_SWIZZLE_##X, \
44 TGSI_SWIZZLE_##Y, \
45 TGSI_SWIZZLE_##Z, \
46 TGSI_SWIZZLE_##W, \
47 }
48
49 struct ttn_reg_info {
50 /** nir register containing this TGSI index. */
51 nir_register *reg;
52 nir_variable *var;
53 /** Offset (in vec4s) from the start of var for this TGSI index. */
54 int offset;
55 };
56
57 struct ttn_compile {
58 union tgsi_full_token *token;
59 nir_builder build;
60 struct tgsi_shader_info *scan;
61
62 struct ttn_reg_info *output_regs;
63 struct ttn_reg_info *temp_regs;
64 nir_ssa_def **imm_defs;
65
66 unsigned num_samp_types;
67 nir_alu_type *samp_types;
68
69 nir_register *addr_reg;
70
71 nir_variable **inputs;
72 nir_variable **outputs;
73 nir_variable *samplers[PIPE_MAX_SAMPLERS];
74
75 nir_variable *input_var_face;
76 nir_variable *input_var_position;
77 nir_variable *input_var_point;
78
79 /**
80 * Stack of nir_cursors where instructions should be pushed as we pop
81 * back out of the control flow stack.
82 *
83 * For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else
84 * instructions should be placed, and if_stack[if_stack_pos - 1] has where
85 * the next instructions outside of the if/then/else block go.
86 */
87 nir_cursor *if_stack;
88 unsigned if_stack_pos;
89
90 /**
91 * Stack of nir_cursors where instructions should be pushed as we pop
92 * back out of the control flow stack.
93 *
94 * loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside
95 * of the loop.
96 */
97 nir_cursor *loop_stack;
98 unsigned loop_stack_pos;
99
100 /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
101 unsigned next_imm;
102
103 bool cap_scalar;
104 bool cap_face_is_sysval;
105 bool cap_position_is_sysval;
106 bool cap_point_is_sysval;
107 bool cap_packed_uniforms;
108 bool cap_samplers_as_deref;
109 };
110
111 #define ttn_swizzle(b, src, x, y, z, w) \
112 nir_swizzle(b, src, SWIZ(x, y, z, w), 4)
113 #define ttn_channel(b, src, swiz) \
114 nir_channel(b, src, TGSI_SWIZZLE_##swiz)
115
116 static gl_varying_slot
117 tgsi_varying_semantic_to_slot(unsigned semantic, unsigned index)
118 {
119 switch (semantic) {
120 case TGSI_SEMANTIC_POSITION:
121 return VARYING_SLOT_POS;
122 case TGSI_SEMANTIC_COLOR:
123 if (index == 0)
124 return VARYING_SLOT_COL0;
125 else
126 return VARYING_SLOT_COL1;
127 case TGSI_SEMANTIC_BCOLOR:
128 if (index == 0)
129 return VARYING_SLOT_BFC0;
130 else
131 return VARYING_SLOT_BFC1;
132 case TGSI_SEMANTIC_FOG:
133 return VARYING_SLOT_FOGC;
134 case TGSI_SEMANTIC_PSIZE:
135 return VARYING_SLOT_PSIZ;
136 case TGSI_SEMANTIC_GENERIC:
137 return VARYING_SLOT_VAR0 + index;
138 case TGSI_SEMANTIC_FACE:
139 return VARYING_SLOT_FACE;
140 case TGSI_SEMANTIC_EDGEFLAG:
141 return VARYING_SLOT_EDGE;
142 case TGSI_SEMANTIC_PRIMID:
143 return VARYING_SLOT_PRIMITIVE_ID;
144 case TGSI_SEMANTIC_CLIPDIST:
145 if (index == 0)
146 return VARYING_SLOT_CLIP_DIST0;
147 else
148 return VARYING_SLOT_CLIP_DIST1;
149 case TGSI_SEMANTIC_CLIPVERTEX:
150 return VARYING_SLOT_CLIP_VERTEX;
151 case TGSI_SEMANTIC_TEXCOORD:
152 return VARYING_SLOT_TEX0 + index;
153 case TGSI_SEMANTIC_PCOORD:
154 return VARYING_SLOT_PNTC;
155 case TGSI_SEMANTIC_VIEWPORT_INDEX:
156 return VARYING_SLOT_VIEWPORT;
157 case TGSI_SEMANTIC_LAYER:
158 return VARYING_SLOT_LAYER;
159 default:
160 fprintf(stderr, "Bad TGSI semantic: %d/%d\n", semantic, index);
161 abort();
162 }
163 }
164
165 static nir_ssa_def *
166 ttn_src_for_dest(nir_builder *b, nir_alu_dest *dest)
167 {
168 nir_alu_src src;
169 memset(&src, 0, sizeof(src));
170
171 if (dest->dest.is_ssa)
172 src.src = nir_src_for_ssa(&dest->dest.ssa);
173 else {
174 assert(!dest->dest.reg.indirect);
175 src.src = nir_src_for_reg(dest->dest.reg.reg);
176 src.src.reg.base_offset = dest->dest.reg.base_offset;
177 }
178
179 for (int i = 0; i < 4; i++)
180 src.swizzle[i] = i;
181
182 return nir_mov_alu(b, src, 4);
183 }
184
185 static enum glsl_interp_mode
186 ttn_translate_interp_mode(unsigned tgsi_interp)
187 {
188 switch (tgsi_interp) {
189 case TGSI_INTERPOLATE_CONSTANT:
190 return INTERP_MODE_FLAT;
191 case TGSI_INTERPOLATE_LINEAR:
192 return INTERP_MODE_NOPERSPECTIVE;
193 case TGSI_INTERPOLATE_PERSPECTIVE:
194 return INTERP_MODE_SMOOTH;
195 case TGSI_INTERPOLATE_COLOR:
196 return INTERP_MODE_SMOOTH;
197 default:
198 unreachable("bad TGSI interpolation mode");
199 }
200 }
201
202 static void
203 ttn_emit_declaration(struct ttn_compile *c)
204 {
205 nir_builder *b = &c->build;
206 struct tgsi_full_declaration *decl = &c->token->FullDeclaration;
207 unsigned array_size = decl->Range.Last - decl->Range.First + 1;
208 unsigned file = decl->Declaration.File;
209 unsigned i;
210
211 if (file == TGSI_FILE_TEMPORARY) {
212 if (decl->Declaration.Array) {
213 /* for arrays, we create variables instead of registers: */
214 nir_variable *var = rzalloc(b->shader, nir_variable);
215
216 var->type = glsl_array_type(glsl_vec4_type(), array_size, 0);
217 var->data.mode = nir_var_shader_temp;
218 var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID);
219
220 exec_list_push_tail(&b->shader->globals, &var->node);
221
222 for (i = 0; i < array_size; i++) {
223 /* point all the matching slots to the same var,
224 * with appropriate offset set, mostly just so
225 * we know what to do when tgsi does a non-indirect
226 * access
227 */
228 c->temp_regs[decl->Range.First + i].reg = NULL;
229 c->temp_regs[decl->Range.First + i].var = var;
230 c->temp_regs[decl->Range.First + i].offset = i;
231 }
232 } else {
233 for (i = 0; i < array_size; i++) {
234 nir_register *reg = nir_local_reg_create(b->impl);
235 reg->num_components = 4;
236 c->temp_regs[decl->Range.First + i].reg = reg;
237 c->temp_regs[decl->Range.First + i].var = NULL;
238 c->temp_regs[decl->Range.First + i].offset = 0;
239 }
240 }
241 } else if (file == TGSI_FILE_ADDRESS) {
242 c->addr_reg = nir_local_reg_create(b->impl);
243 c->addr_reg->num_components = 4;
244 } else if (file == TGSI_FILE_SYSTEM_VALUE) {
245 /* Nothing to record for system values. */
246 } else if (file == TGSI_FILE_SAMPLER) {
247 /* Nothing to record for samplers. */
248 } else if (file == TGSI_FILE_SAMPLER_VIEW) {
249 struct tgsi_declaration_sampler_view *sview = &decl->SamplerView;
250 nir_alu_type type;
251
252 assert((sview->ReturnTypeX == sview->ReturnTypeY) &&
253 (sview->ReturnTypeX == sview->ReturnTypeZ) &&
254 (sview->ReturnTypeX == sview->ReturnTypeW));
255
256 switch (sview->ReturnTypeX) {
257 case TGSI_RETURN_TYPE_SINT:
258 type = nir_type_int;
259 break;
260 case TGSI_RETURN_TYPE_UINT:
261 type = nir_type_uint;
262 break;
263 case TGSI_RETURN_TYPE_FLOAT:
264 default:
265 type = nir_type_float;
266 break;
267 }
268
269 for (i = 0; i < array_size; i++) {
270 c->samp_types[decl->Range.First + i] = type;
271 }
272 } else {
273 bool is_array = (array_size > 1);
274
275 assert(file == TGSI_FILE_INPUT ||
276 file == TGSI_FILE_OUTPUT ||
277 file == TGSI_FILE_CONSTANT);
278
279 /* nothing to do for UBOs: */
280 if ((file == TGSI_FILE_CONSTANT) && decl->Declaration.Dimension &&
281 decl->Dim.Index2D != 0) {
282 b->shader->info.num_ubos =
283 MAX2(b->shader->info.num_ubos, decl->Dim.Index2D);
284 return;
285 }
286
287 if ((file == TGSI_FILE_INPUT) || (file == TGSI_FILE_OUTPUT)) {
288 is_array = (is_array && decl->Declaration.Array &&
289 (decl->Array.ArrayID != 0));
290 }
291
292 for (i = 0; i < array_size; i++) {
293 unsigned idx = decl->Range.First + i;
294 nir_variable *var = rzalloc(b->shader, nir_variable);
295
296 var->data.driver_location = idx;
297
298 var->type = glsl_vec4_type();
299 if (is_array)
300 var->type = glsl_array_type(var->type, array_size, 0);
301
302 switch (file) {
303 case TGSI_FILE_INPUT:
304 var->data.read_only = true;
305 var->data.mode = nir_var_shader_in;
306 var->name = ralloc_asprintf(var, "in_%d", idx);
307
308 if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
309 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
310 var->type = glsl_bool_type();
311 if (c->cap_face_is_sysval) {
312 var->data.mode = nir_var_system_value;
313 var->data.location = SYSTEM_VALUE_FRONT_FACE;
314 } else {
315 var->data.location = VARYING_SLOT_FACE;
316 }
317 c->input_var_face = var;
318 } else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
319 if (c->cap_position_is_sysval) {
320 var->data.mode = nir_var_system_value;
321 var->data.location = SYSTEM_VALUE_FRAG_COORD;
322 } else {
323 var->data.location = VARYING_SLOT_POS;
324 }
325 c->input_var_position = var;
326 } else if (decl->Semantic.Name == TGSI_SEMANTIC_PCOORD) {
327 if (c->cap_point_is_sysval) {
328 var->data.mode = nir_var_system_value;
329 var->data.location = SYSTEM_VALUE_POINT_COORD;
330 } else {
331 var->data.location = VARYING_SLOT_PNTC;
332 }
333 c->input_var_point = var;
334 } else {
335 var->data.location =
336 tgsi_varying_semantic_to_slot(decl->Semantic.Name,
337 decl->Semantic.Index);
338 }
339 } else {
340 assert(!decl->Declaration.Semantic);
341 var->data.location = VERT_ATTRIB_GENERIC0 + idx;
342 }
343 var->data.index = 0;
344 var->data.interpolation =
345 ttn_translate_interp_mode(decl->Interp.Interpolate);
346
347 exec_list_push_tail(&b->shader->inputs, &var->node);
348 c->inputs[idx] = var;
349
350 for (int i = 0; i < array_size; i++)
351 b->shader->info.inputs_read |= 1 << (var->data.location + i);
352
353 break;
354 case TGSI_FILE_OUTPUT: {
355 int semantic_name = decl->Semantic.Name;
356 int semantic_index = decl->Semantic.Index;
357 /* Since we can't load from outputs in the IR, we make temporaries
358 * for the outputs and emit stores to the real outputs at the end of
359 * the shader.
360 */
361 nir_register *reg = nir_local_reg_create(b->impl);
362 reg->num_components = 4;
363 if (is_array)
364 reg->num_array_elems = array_size;
365
366 var->data.mode = nir_var_shader_out;
367 var->name = ralloc_asprintf(var, "out_%d", idx);
368 var->data.index = 0;
369 var->data.interpolation =
370 ttn_translate_interp_mode(decl->Interp.Interpolate);
371
372 if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
373 switch (semantic_name) {
374 case TGSI_SEMANTIC_COLOR: {
375 /* TODO tgsi loses some information, so we cannot
376 * actually differentiate here between DSB and MRT
377 * at this point. But so far no drivers using tgsi-
378 * to-nir support dual source blend:
379 */
380 bool dual_src_blend = false;
381 if (dual_src_blend && (semantic_index == 1)) {
382 var->data.location = FRAG_RESULT_DATA0;
383 var->data.index = 1;
384 } else {
385 if (c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
386 var->data.location = FRAG_RESULT_COLOR;
387 else
388 var->data.location = FRAG_RESULT_DATA0 + semantic_index;
389 }
390 break;
391 }
392 case TGSI_SEMANTIC_POSITION:
393 var->data.location = FRAG_RESULT_DEPTH;
394 var->type = glsl_float_type();
395 break;
396 default:
397 fprintf(stderr, "Bad TGSI semantic: %d/%d\n",
398 decl->Semantic.Name, decl->Semantic.Index);
399 abort();
400 }
401 } else {
402 var->data.location =
403 tgsi_varying_semantic_to_slot(semantic_name, semantic_index);
404 }
405
406 if (is_array) {
407 unsigned j;
408 for (j = 0; j < array_size; j++) {
409 c->output_regs[idx + j].offset = i + j;
410 c->output_regs[idx + j].reg = reg;
411 }
412 } else {
413 c->output_regs[idx].offset = i;
414 c->output_regs[idx].reg = reg;
415 }
416
417 exec_list_push_tail(&b->shader->outputs, &var->node);
418 c->outputs[idx] = var;
419
420 for (int i = 0; i < array_size; i++)
421 b->shader->info.outputs_written |= 1ull << (var->data.location + i);
422 }
423 break;
424 case TGSI_FILE_CONSTANT:
425 var->data.mode = nir_var_uniform;
426 var->name = ralloc_asprintf(var, "uniform_%d", idx);
427 var->data.location = idx;
428
429 exec_list_push_tail(&b->shader->uniforms, &var->node);
430 break;
431 default:
432 unreachable("bad declaration file");
433 return;
434 }
435
436 if (is_array)
437 break;
438 }
439
440 }
441 }
442
443 static void
444 ttn_emit_immediate(struct ttn_compile *c)
445 {
446 nir_builder *b = &c->build;
447 struct tgsi_full_immediate *tgsi_imm = &c->token->FullImmediate;
448 nir_load_const_instr *load_const;
449 int i;
450
451 load_const = nir_load_const_instr_create(b->shader, 4, 32);
452 c->imm_defs[c->next_imm] = &load_const->def;
453 c->next_imm++;
454
455 for (i = 0; i < load_const->def.num_components; i++)
456 load_const->value[i].u32 = tgsi_imm->u[i].Uint;
457
458 nir_builder_instr_insert(b, &load_const->instr);
459 }
460
461 static nir_ssa_def *
462 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect);
463
464 /* generate either a constant or indirect deref chain for accessing an
465 * array variable.
466 */
467 static nir_deref_instr *
468 ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset,
469 struct tgsi_ind_register *indirect)
470 {
471 nir_deref_instr *deref = nir_build_deref_var(&c->build, var);
472 nir_ssa_def *index = nir_imm_int(&c->build, offset);
473 if (indirect)
474 index = nir_iadd(&c->build, index, ttn_src_for_indirect(c, indirect));
475 return nir_build_deref_array(&c->build, deref, index);
476 }
477
478 /* Special case: Turn the frontface varying into a load of the
479 * frontface variable, and create the vector as required by TGSI.
480 */
481 static nir_ssa_def *
482 ttn_emulate_tgsi_front_face(struct ttn_compile *c)
483 {
484 nir_ssa_def *tgsi_frontface[4];
485
486 if (c->cap_face_is_sysval) {
487 /* When it's a system value, it should be an integer vector: (F, 0, 0, 1)
488 * F is 0xffffffff if front-facing, 0 if not.
489 */
490
491 nir_ssa_def *frontface = nir_load_front_face(&c->build, 1);
492
493 tgsi_frontface[0] = nir_bcsel(&c->build,
494 frontface,
495 nir_imm_int(&c->build, 0xffffffff),
496 nir_imm_int(&c->build, 0));
497 tgsi_frontface[1] = nir_imm_int(&c->build, 0);
498 tgsi_frontface[2] = nir_imm_int(&c->build, 0);
499 tgsi_frontface[3] = nir_imm_int(&c->build, 1);
500 } else {
501 /* When it's an input, it should be a float vector: (F, 0.0, 0.0, 1.0)
502 * F is positive if front-facing, negative if not.
503 */
504
505 assert(c->input_var_face);
506 nir_ssa_def *frontface = nir_load_var(&c->build, c->input_var_face);
507
508 tgsi_frontface[0] = nir_bcsel(&c->build,
509 frontface,
510 nir_imm_float(&c->build, 1.0),
511 nir_imm_float(&c->build, -1.0));
512 tgsi_frontface[1] = nir_imm_float(&c->build, 0.0);
513 tgsi_frontface[2] = nir_imm_float(&c->build, 0.0);
514 tgsi_frontface[3] = nir_imm_float(&c->build, 1.0);
515 }
516
517 return nir_vec(&c->build, tgsi_frontface, 4);
518 }
519
520 static nir_src
521 ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
522 struct tgsi_ind_register *indirect,
523 struct tgsi_dimension *dim,
524 struct tgsi_ind_register *dimind,
525 bool src_is_float)
526 {
527 nir_builder *b = &c->build;
528 nir_src src;
529
530 memset(&src, 0, sizeof(src));
531
532 switch (file) {
533 case TGSI_FILE_TEMPORARY:
534 if (c->temp_regs[index].var) {
535 unsigned offset = c->temp_regs[index].offset;
536 nir_variable *var = c->temp_regs[index].var;
537 nir_ssa_def *load = nir_load_deref(&c->build,
538 ttn_array_deref(c, var, offset, indirect));
539
540 src = nir_src_for_ssa(load);
541 } else {
542 assert(!indirect);
543 src.reg.reg = c->temp_regs[index].reg;
544 }
545 assert(!dim);
546 break;
547
548 case TGSI_FILE_ADDRESS:
549 src.reg.reg = c->addr_reg;
550 assert(!dim);
551 break;
552
553 case TGSI_FILE_IMMEDIATE:
554 src = nir_src_for_ssa(c->imm_defs[index]);
555 assert(!indirect);
556 assert(!dim);
557 break;
558
559 case TGSI_FILE_SYSTEM_VALUE: {
560 nir_intrinsic_op op;
561 nir_ssa_def *load;
562
563 assert(!indirect);
564 assert(!dim);
565
566 switch (c->scan->system_value_semantic_name[index]) {
567 case TGSI_SEMANTIC_VERTEXID_NOBASE:
568 op = nir_intrinsic_load_vertex_id_zero_base;
569 load = nir_load_vertex_id_zero_base(b);
570 break;
571 case TGSI_SEMANTIC_VERTEXID:
572 op = nir_intrinsic_load_vertex_id;
573 load = nir_load_vertex_id(b);
574 break;
575 case TGSI_SEMANTIC_BASEVERTEX:
576 op = nir_intrinsic_load_base_vertex;
577 load = nir_load_base_vertex(b);
578 break;
579 case TGSI_SEMANTIC_INSTANCEID:
580 op = nir_intrinsic_load_instance_id;
581 load = nir_load_instance_id(b);
582 break;
583 case TGSI_SEMANTIC_FACE:
584 assert(c->cap_face_is_sysval);
585 op = nir_intrinsic_load_front_face;
586 load = ttn_emulate_tgsi_front_face(c);
587 break;
588 case TGSI_SEMANTIC_POSITION:
589 assert(c->cap_position_is_sysval);
590 op = nir_intrinsic_load_frag_coord;
591 load = nir_load_frag_coord(b);
592 break;
593 case TGSI_SEMANTIC_PCOORD:
594 assert(c->cap_point_is_sysval);
595 op = nir_intrinsic_load_point_coord;
596 load = nir_load_point_coord(b);
597 break;
598 default:
599 unreachable("bad system value");
600 }
601
602 src = nir_src_for_ssa(load);
603 b->shader->info.system_values_read |=
604 (1 << nir_system_value_from_intrinsic(op));
605
606 break;
607 }
608
609 case TGSI_FILE_INPUT:
610 if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
611 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_FACE) {
612 assert(!c->cap_face_is_sysval && c->input_var_face);
613 return nir_src_for_ssa(ttn_emulate_tgsi_front_face(c));
614 } else if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
615 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_POSITION) {
616 assert(!c->cap_position_is_sysval && c->input_var_position);
617 return nir_src_for_ssa(nir_load_var(&c->build, c->input_var_position));
618 } else if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
619 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_PCOORD) {
620 assert(!c->cap_point_is_sysval && c->input_var_point);
621 return nir_src_for_ssa(nir_load_var(&c->build, c->input_var_point));
622 } else {
623 /* Indirection on input arrays isn't supported by TTN. */
624 assert(!dim);
625 nir_deref_instr *deref = nir_build_deref_var(&c->build,
626 c->inputs[index]);
627 return nir_src_for_ssa(nir_load_deref(&c->build, deref));
628 }
629 break;
630
631 case TGSI_FILE_CONSTANT: {
632 nir_intrinsic_instr *load;
633 nir_intrinsic_op op;
634 unsigned srcn = 0;
635
636 if (dim && (dim->Index > 0 || dim->Indirect)) {
637 op = nir_intrinsic_load_ubo;
638 } else {
639 op = nir_intrinsic_load_uniform;
640 }
641
642 load = nir_intrinsic_instr_create(b->shader, op);
643 if (op == nir_intrinsic_load_uniform) {
644 nir_intrinsic_set_type(load, src_is_float ? nir_type_float :
645 nir_type_int);
646 }
647
648 load->num_components = 4;
649 if (dim && (dim->Index > 0 || dim->Indirect)) {
650 if (dimind) {
651 load->src[srcn] =
652 ttn_src_for_file_and_index(c, dimind->File, dimind->Index,
653 NULL, NULL, NULL, false);
654 } else {
655 /* UBOs start at index 1 in TGSI: */
656 load->src[srcn] =
657 nir_src_for_ssa(nir_imm_int(b, dim->Index - 1));
658 }
659 srcn++;
660 }
661
662 nir_ssa_def *offset;
663 if (op == nir_intrinsic_load_ubo) {
664 /* UBO loads don't have a base offset. */
665 offset = nir_imm_int(b, index);
666 if (indirect) {
667 offset = nir_iadd(b, offset, ttn_src_for_indirect(c, indirect));
668 }
669 /* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
670 offset = nir_ishl(b, offset, nir_imm_int(b, 4));
671 } else {
672 nir_intrinsic_set_base(load, index);
673 if (indirect) {
674 offset = ttn_src_for_indirect(c, indirect);
675 } else {
676 offset = nir_imm_int(b, 0);
677 }
678 }
679 load->src[srcn++] = nir_src_for_ssa(offset);
680
681 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
682 nir_builder_instr_insert(b, &load->instr);
683
684 src = nir_src_for_ssa(&load->dest.ssa);
685 break;
686 }
687
688 default:
689 unreachable("bad src file");
690 }
691
692
693 return src;
694 }
695
696 static nir_ssa_def *
697 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect)
698 {
699 nir_builder *b = &c->build;
700 nir_alu_src src;
701 memset(&src, 0, sizeof(src));
702 for (int i = 0; i < 4; i++)
703 src.swizzle[i] = indirect->Swizzle;
704 src.src = ttn_src_for_file_and_index(c,
705 indirect->File,
706 indirect->Index,
707 NULL, NULL, NULL,
708 false);
709 return nir_mov_alu(b, src, 1);
710 }
711
712 static nir_alu_dest
713 ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
714 {
715 struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
716 nir_alu_dest dest;
717 unsigned index = tgsi_dst->Index;
718
719 memset(&dest, 0, sizeof(dest));
720
721 if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
722 if (c->temp_regs[index].var) {
723 nir_register *reg;
724
725 /* this works, because TGSI will give us a base offset
726 * (in case of indirect index) that points back into
727 * the array. Access can be direct or indirect, we
728 * don't really care. Just create a one-shot dst reg
729 * that will get store_var'd back into the array var
730 * at the end of ttn_emit_instruction()
731 */
732 reg = nir_local_reg_create(c->build.impl);
733 reg->num_components = 4;
734 dest.dest.reg.reg = reg;
735 dest.dest.reg.base_offset = 0;
736 } else {
737 assert(!tgsi_dst->Indirect);
738 dest.dest.reg.reg = c->temp_regs[index].reg;
739 dest.dest.reg.base_offset = c->temp_regs[index].offset;
740 }
741 } else if (tgsi_dst->File == TGSI_FILE_OUTPUT) {
742 dest.dest.reg.reg = c->output_regs[index].reg;
743 dest.dest.reg.base_offset = c->output_regs[index].offset;
744 } else if (tgsi_dst->File == TGSI_FILE_ADDRESS) {
745 assert(index == 0);
746 dest.dest.reg.reg = c->addr_reg;
747 }
748
749 dest.write_mask = tgsi_dst->WriteMask;
750 dest.saturate = false;
751
752 if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) {
753 nir_src *indirect = ralloc(c->build.shader, nir_src);
754 *indirect = nir_src_for_ssa(ttn_src_for_indirect(c, &tgsi_fdst->Indirect));
755 dest.dest.reg.indirect = indirect;
756 }
757
758 return dest;
759 }
760
761 static nir_variable *
762 ttn_get_var(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
763 {
764 struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
765 unsigned index = tgsi_dst->Index;
766
767 if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
768 /* we should not have an indirect when there is no var! */
769 if (!c->temp_regs[index].var)
770 assert(!tgsi_dst->Indirect);
771 return c->temp_regs[index].var;
772 }
773
774 return NULL;
775 }
776
777 static nir_ssa_def *
778 ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc,
779 int src_idx)
780 {
781 nir_builder *b = &c->build;
782 struct tgsi_src_register *tgsi_src = &tgsi_fsrc->Register;
783 enum tgsi_opcode opcode = c->token->FullInstruction.Instruction.Opcode;
784 unsigned tgsi_src_type = tgsi_opcode_infer_src_type(opcode, src_idx);
785 bool src_is_float = (tgsi_src_type == TGSI_TYPE_FLOAT ||
786 tgsi_src_type == TGSI_TYPE_DOUBLE ||
787 tgsi_src_type == TGSI_TYPE_UNTYPED);
788 nir_alu_src src;
789
790 memset(&src, 0, sizeof(src));
791
792 if (tgsi_src->File == TGSI_FILE_NULL) {
793 return nir_imm_float(b, 0.0);
794 } else if (tgsi_src->File == TGSI_FILE_SAMPLER) {
795 /* Only the index of the sampler gets used in texturing, and it will
796 * handle looking that up on its own instead of using the nir_alu_src.
797 */
798 assert(!tgsi_src->Indirect);
799 return NULL;
800 } else {
801 struct tgsi_ind_register *ind = NULL;
802 struct tgsi_dimension *dim = NULL;
803 struct tgsi_ind_register *dimind = NULL;
804 if (tgsi_src->Indirect)
805 ind = &tgsi_fsrc->Indirect;
806 if (tgsi_src->Dimension) {
807 dim = &tgsi_fsrc->Dimension;
808 if (dim->Indirect)
809 dimind = &tgsi_fsrc->DimIndirect;
810 }
811 src.src = ttn_src_for_file_and_index(c,
812 tgsi_src->File,
813 tgsi_src->Index,
814 ind, dim, dimind,
815 src_is_float);
816 }
817
818 src.swizzle[0] = tgsi_src->SwizzleX;
819 src.swizzle[1] = tgsi_src->SwizzleY;
820 src.swizzle[2] = tgsi_src->SwizzleZ;
821 src.swizzle[3] = tgsi_src->SwizzleW;
822
823 nir_ssa_def *def = nir_mov_alu(b, src, 4);
824
825 if (tgsi_type_is_64bit(tgsi_src_type))
826 def = nir_bitcast_vector(b, def, 64);
827
828 if (tgsi_src->Absolute) {
829 if (src_is_float)
830 def = nir_fabs(b, def);
831 else
832 def = nir_iabs(b, def);
833 }
834
835 if (tgsi_src->Negate) {
836 if (src_is_float)
837 def = nir_fneg(b, def);
838 else
839 def = nir_ineg(b, def);
840 }
841
842 return def;
843 }
844
845 static void
846 ttn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
847 nir_ssa_def *def, unsigned write_mask)
848 {
849 if (!(dest.write_mask & write_mask))
850 return;
851
852 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
853 mov->dest = dest;
854 mov->dest.write_mask &= write_mask;
855 mov->src[0].src = nir_src_for_ssa(def);
856 for (unsigned i = def->num_components; i < 4; i++)
857 mov->src[0].swizzle[i] = def->num_components - 1;
858 nir_builder_instr_insert(b, &mov->instr);
859 }
860
861 static void
862 ttn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
863 {
864 ttn_move_dest_masked(b, dest, def, TGSI_WRITEMASK_XYZW);
865 }
866
867 static void
868 ttn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, unsigned dest_bitsize,
869 nir_ssa_def **src)
870 {
871 nir_ssa_def *def = nir_build_alu_src_arr(b, op, src);
872 if (def->bit_size == 1)
873 def = nir_ineg(b, nir_b2i(b, def, dest_bitsize));
874 assert(def->bit_size == dest_bitsize);
875 if (dest_bitsize == 64) {
876 if (def->num_components > 2) {
877 /* 32 -> 64 bit conversion ops are supposed to only convert the first
878 * two components, and we need to truncate here to avoid creating a
879 * vec8 after bitcasting the destination.
880 */
881 def = nir_channels(b, def, 0x3);
882 }
883 def = nir_bitcast_vector(b, def, 32);
884 }
885 ttn_move_dest(b, dest, def);
886 }
887
888 static void
889 ttn_arl(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
890 {
891 ttn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
892 }
893
894 /* EXP - Approximate Exponential Base 2
895 * dst.x = 2^{\lfloor src.x\rfloor}
896 * dst.y = src.x - \lfloor src.x\rfloor
897 * dst.z = 2^{src.x}
898 * dst.w = 1.0
899 */
900 static void
901 ttn_exp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
902 {
903 nir_ssa_def *srcx = ttn_channel(b, src[0], X);
904
905 ttn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)),
906 TGSI_WRITEMASK_X);
907 ttn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)),
908 TGSI_WRITEMASK_Y);
909 ttn_move_dest_masked(b, dest, nir_fexp2(b, srcx), TGSI_WRITEMASK_Z);
910 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
911 }
912
913 /* LOG - Approximate Logarithm Base 2
914 * dst.x = \lfloor\log_2{|src.x|}\rfloor
915 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
916 * dst.z = \log_2{|src.x|}
917 * dst.w = 1.0
918 */
919 static void
920 ttn_log(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
921 {
922 nir_ssa_def *abs_srcx = nir_fabs(b, ttn_channel(b, src[0], X));
923 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
924
925 ttn_move_dest_masked(b, dest, nir_ffloor(b, log2), TGSI_WRITEMASK_X);
926 ttn_move_dest_masked(b, dest,
927 nir_fdiv(b, abs_srcx, nir_fexp2(b, nir_ffloor(b, log2))),
928 TGSI_WRITEMASK_Y);
929 ttn_move_dest_masked(b, dest, nir_flog2(b, abs_srcx), TGSI_WRITEMASK_Z);
930 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
931 }
932
933 /* DST - Distance Vector
934 * dst.x = 1.0
935 * dst.y = src0.y \times src1.y
936 * dst.z = src0.z
937 * dst.w = src1.w
938 */
939 static void
940 ttn_dst(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
941 {
942 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_X);
943 ttn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), TGSI_WRITEMASK_Y);
944 ttn_move_dest_masked(b, dest, nir_mov(b, src[0]), TGSI_WRITEMASK_Z);
945 ttn_move_dest_masked(b, dest, nir_mov(b, src[1]), TGSI_WRITEMASK_W);
946 }
947
948 /* LIT - Light Coefficients
949 * dst.x = 1.0
950 * dst.y = max(src.x, 0.0)
951 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
952 * dst.w = 1.0
953 */
954 static void
955 ttn_lit(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
956 {
957 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_XW);
958
959 ttn_move_dest_masked(b, dest, nir_fmax(b, ttn_channel(b, src[0], X),
960 nir_imm_float(b, 0.0)), TGSI_WRITEMASK_Y);
961
962 if (dest.write_mask & TGSI_WRITEMASK_Z) {
963 nir_ssa_def *src0_y = ttn_channel(b, src[0], Y);
964 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ttn_channel(b, src[0], W),
965 nir_imm_float(b, 128.0)),
966 nir_imm_float(b, -128.0));
967 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
968 wclamp);
969
970 ttn_move_dest_masked(b, dest,
971 nir_bcsel(b,
972 nir_flt(b,
973 ttn_channel(b, src[0], X),
974 nir_imm_float(b, 0.0)),
975 nir_imm_float(b, 0.0),
976 pow),
977 TGSI_WRITEMASK_Z);
978 }
979 }
980
981 static void
982 ttn_sle(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
983 {
984 ttn_move_dest(b, dest, nir_sge(b, src[1], src[0]));
985 }
986
987 static void
988 ttn_sgt(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
989 {
990 ttn_move_dest(b, dest, nir_slt(b, src[1], src[0]));
991 }
992
993 static void
994 ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
995 {
996 ttn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
997 }
998
999 static void
1000 ttn_dp3(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1001 {
1002 ttn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
1003 }
1004
1005 static void
1006 ttn_dp4(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1007 {
1008 ttn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
1009 }
1010
1011 static void
1012 ttn_umad(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1013 {
1014 ttn_move_dest(b, dest, nir_iadd(b, nir_imul(b, src[0], src[1]), src[2]));
1015 }
1016
1017 static void
1018 ttn_arr(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1019 {
1020 ttn_move_dest(b, dest, nir_f2i32(b, nir_fround_even(b, src[0])));
1021 }
1022
1023 static void
1024 ttn_cmp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1025 {
1026 ttn_move_dest(b, dest, nir_bcsel(b,
1027 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
1028 src[1], src[2]));
1029 }
1030
1031 static void
1032 ttn_ucmp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1033 {
1034 ttn_move_dest(b, dest, nir_bcsel(b,
1035 nir_ine(b, src[0], nir_imm_int(b, 0)),
1036 src[1], src[2]));
1037 }
1038
1039 static void
1040 ttn_kill(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1041 {
1042 nir_intrinsic_instr *discard =
1043 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard);
1044 nir_builder_instr_insert(b, &discard->instr);
1045 b->shader->info.fs.uses_discard = true;
1046 }
1047
1048 static void
1049 ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1050 {
1051 nir_ssa_def *cmp = nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)));
1052 nir_intrinsic_instr *discard =
1053 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
1054 discard->src[0] = nir_src_for_ssa(cmp);
1055 nir_builder_instr_insert(b, &discard->instr);
1056 b->shader->info.fs.uses_discard = true;
1057 }
1058
1059 static void
1060 ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
1061 {
1062 nir_builder *b = &c->build;
1063 nir_ssa_def *src_x = ttn_channel(b, src, X);
1064
1065 nir_if *if_stmt = nir_if_create(b->shader);
1066 if (is_uint) {
1067 /* equivalent to TGSI UIF, src is interpreted as integer */
1068 if_stmt->condition = nir_src_for_ssa(nir_ine(b, src_x, nir_imm_int(b, 0)));
1069 } else {
1070 /* equivalent to TGSI IF, src is interpreted as float */
1071 if_stmt->condition = nir_src_for_ssa(nir_fne(b, src_x, nir_imm_float(b, 0.0)));
1072 }
1073 nir_builder_cf_insert(b, &if_stmt->cf_node);
1074
1075 c->if_stack[c->if_stack_pos] = nir_after_cf_node(&if_stmt->cf_node);
1076 c->if_stack_pos++;
1077
1078 b->cursor = nir_after_cf_list(&if_stmt->then_list);
1079
1080 c->if_stack[c->if_stack_pos] = nir_after_cf_list(&if_stmt->else_list);
1081 c->if_stack_pos++;
1082 }
1083
1084 static void
1085 ttn_else(struct ttn_compile *c)
1086 {
1087 nir_builder *b = &c->build;
1088
1089 b->cursor = c->if_stack[c->if_stack_pos - 1];
1090 }
1091
1092 static void
1093 ttn_endif(struct ttn_compile *c)
1094 {
1095 nir_builder *b = &c->build;
1096
1097 c->if_stack_pos -= 2;
1098 b->cursor = c->if_stack[c->if_stack_pos];
1099 }
1100
1101 static void
1102 ttn_bgnloop(struct ttn_compile *c)
1103 {
1104 nir_builder *b = &c->build;
1105
1106 nir_loop *loop = nir_loop_create(b->shader);
1107 nir_builder_cf_insert(b, &loop->cf_node);
1108
1109 c->loop_stack[c->loop_stack_pos] = nir_after_cf_node(&loop->cf_node);
1110 c->loop_stack_pos++;
1111
1112 b->cursor = nir_after_cf_list(&loop->body);
1113 }
1114
1115 static void
1116 ttn_cont(nir_builder *b)
1117 {
1118 nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_continue);
1119 nir_builder_instr_insert(b, &instr->instr);
1120 }
1121
1122 static void
1123 ttn_brk(nir_builder *b)
1124 {
1125 nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_break);
1126 nir_builder_instr_insert(b, &instr->instr);
1127 }
1128
1129 static void
1130 ttn_endloop(struct ttn_compile *c)
1131 {
1132 nir_builder *b = &c->build;
1133
1134 c->loop_stack_pos--;
1135 b->cursor = c->loop_stack[c->loop_stack_pos];
1136 }
1137
1138 static void
1139 setup_texture_info(nir_tex_instr *instr, unsigned texture)
1140 {
1141 switch (texture) {
1142 case TGSI_TEXTURE_BUFFER:
1143 instr->sampler_dim = GLSL_SAMPLER_DIM_BUF;
1144 break;
1145 case TGSI_TEXTURE_1D:
1146 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
1147 break;
1148 case TGSI_TEXTURE_1D_ARRAY:
1149 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
1150 instr->is_array = true;
1151 break;
1152 case TGSI_TEXTURE_SHADOW1D:
1153 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
1154 instr->is_shadow = true;
1155 break;
1156 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1157 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
1158 instr->is_shadow = true;
1159 instr->is_array = true;
1160 break;
1161 case TGSI_TEXTURE_2D:
1162 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
1163 break;
1164 case TGSI_TEXTURE_2D_ARRAY:
1165 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
1166 instr->is_array = true;
1167 break;
1168 case TGSI_TEXTURE_2D_MSAA:
1169 instr->sampler_dim = GLSL_SAMPLER_DIM_MS;
1170 break;
1171 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1172 instr->sampler_dim = GLSL_SAMPLER_DIM_MS;
1173 instr->is_array = true;
1174 break;
1175 case TGSI_TEXTURE_SHADOW2D:
1176 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
1177 instr->is_shadow = true;
1178 break;
1179 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1180 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
1181 instr->is_shadow = true;
1182 instr->is_array = true;
1183 break;
1184 case TGSI_TEXTURE_3D:
1185 instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
1186 break;
1187 case TGSI_TEXTURE_CUBE:
1188 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
1189 break;
1190 case TGSI_TEXTURE_CUBE_ARRAY:
1191 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
1192 instr->is_array = true;
1193 break;
1194 case TGSI_TEXTURE_SHADOWCUBE:
1195 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
1196 instr->is_shadow = true;
1197 break;
1198 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1199 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
1200 instr->is_shadow = true;
1201 instr->is_array = true;
1202 break;
1203 case TGSI_TEXTURE_RECT:
1204 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
1205 break;
1206 case TGSI_TEXTURE_SHADOWRECT:
1207 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
1208 instr->is_shadow = true;
1209 break;
1210 default:
1211 fprintf(stderr, "Unknown TGSI texture target %d\n", texture);
1212 abort();
1213 }
1214 }
1215
1216 static enum glsl_base_type
1217 base_type_for_alu_type(nir_alu_type type)
1218 {
1219 type = nir_alu_type_get_base_type(type);
1220
1221 switch (type) {
1222 case nir_type_float:
1223 return GLSL_TYPE_FLOAT;
1224 case nir_type_int:
1225 return GLSL_TYPE_INT;
1226 case nir_type_uint:
1227 return GLSL_TYPE_UINT;
1228 default:
1229 unreachable("invalid type");
1230 }
1231 }
1232
1233 static nir_variable *
1234 get_sampler_var(struct ttn_compile *c, int binding,
1235 enum glsl_sampler_dim dim,
1236 bool is_shadow,
1237 bool is_array,
1238 enum glsl_base_type base_type)
1239 {
1240 nir_variable *var = c->samplers[binding];
1241 if (!var) {
1242 const struct glsl_type *type =
1243 glsl_sampler_type(dim, is_shadow, is_array, base_type);
1244 var = nir_variable_create(c->build.shader, nir_var_uniform, type,
1245 "sampler");
1246 var->data.binding = binding;
1247 var->data.explicit_binding = true;
1248 c->samplers[binding] = var;
1249 }
1250
1251 return var;
1252 }
1253
1254 static void
1255 ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
1256 {
1257 nir_builder *b = &c->build;
1258 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1259 nir_tex_instr *instr;
1260 nir_texop op;
1261 unsigned num_srcs, samp = 1, sview, i;
1262
1263 switch (tgsi_inst->Instruction.Opcode) {
1264 case TGSI_OPCODE_TEX:
1265 op = nir_texop_tex;
1266 num_srcs = 1;
1267 break;
1268 case TGSI_OPCODE_TEX2:
1269 op = nir_texop_tex;
1270 num_srcs = 1;
1271 samp = 2;
1272 break;
1273 case TGSI_OPCODE_TXP:
1274 op = nir_texop_tex;
1275 num_srcs = 2;
1276 break;
1277 case TGSI_OPCODE_TXB:
1278 op = nir_texop_txb;
1279 num_srcs = 2;
1280 break;
1281 case TGSI_OPCODE_TXB2:
1282 op = nir_texop_txb;
1283 num_srcs = 2;
1284 samp = 2;
1285 break;
1286 case TGSI_OPCODE_TXL:
1287 op = nir_texop_txl;
1288 num_srcs = 2;
1289 break;
1290 case TGSI_OPCODE_TXL2:
1291 op = nir_texop_txl;
1292 num_srcs = 2;
1293 samp = 2;
1294 break;
1295 case TGSI_OPCODE_TXF:
1296 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
1297 tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) {
1298 op = nir_texop_txf_ms;
1299 } else {
1300 op = nir_texop_txf;
1301 }
1302 num_srcs = 2;
1303 break;
1304 case TGSI_OPCODE_TXD:
1305 op = nir_texop_txd;
1306 num_srcs = 3;
1307 samp = 3;
1308 break;
1309 case TGSI_OPCODE_LODQ:
1310 op = nir_texop_lod;
1311 num_srcs = 1;
1312 break;
1313
1314 default:
1315 fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode);
1316 abort();
1317 }
1318
1319 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
1320 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
1321 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
1322 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
1323 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
1324 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
1325 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1326 num_srcs++;
1327 }
1328
1329 /* Deref sources */
1330 num_srcs += 2;
1331
1332 num_srcs += tgsi_inst->Texture.NumOffsets;
1333
1334 instr = nir_tex_instr_create(b->shader, num_srcs);
1335 instr->op = op;
1336
1337 setup_texture_info(instr, tgsi_inst->Texture.Texture);
1338
1339 switch (instr->sampler_dim) {
1340 case GLSL_SAMPLER_DIM_1D:
1341 case GLSL_SAMPLER_DIM_BUF:
1342 instr->coord_components = 1;
1343 break;
1344 case GLSL_SAMPLER_DIM_2D:
1345 case GLSL_SAMPLER_DIM_RECT:
1346 case GLSL_SAMPLER_DIM_EXTERNAL:
1347 case GLSL_SAMPLER_DIM_MS:
1348 instr->coord_components = 2;
1349 break;
1350 case GLSL_SAMPLER_DIM_3D:
1351 case GLSL_SAMPLER_DIM_CUBE:
1352 instr->coord_components = 3;
1353 break;
1354 case GLSL_SAMPLER_DIM_SUBPASS:
1355 case GLSL_SAMPLER_DIM_SUBPASS_MS:
1356 unreachable("invalid sampler_dim");
1357 }
1358
1359 if (instr->is_array)
1360 instr->coord_components++;
1361
1362 assert(tgsi_inst->Src[samp].Register.File == TGSI_FILE_SAMPLER);
1363
1364 /* TODO if we supported any opc's which take an explicit SVIEW
1365 * src, we would use that here instead. But for the "legacy"
1366 * texture opc's the SVIEW index is same as SAMP index:
1367 */
1368 sview = tgsi_inst->Src[samp].Register.Index;
1369
1370 if (op == nir_texop_lod) {
1371 instr->dest_type = nir_type_float;
1372 } else if (sview < c->num_samp_types) {
1373 instr->dest_type = c->samp_types[sview];
1374 } else {
1375 instr->dest_type = nir_type_float;
1376 }
1377
1378 nir_variable *var =
1379 get_sampler_var(c, sview, instr->sampler_dim,
1380 instr->is_shadow,
1381 instr->is_array,
1382 base_type_for_alu_type(instr->dest_type));
1383
1384 nir_deref_instr *deref = nir_build_deref_var(b, var);
1385
1386 unsigned src_number = 0;
1387
1388 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
1389 instr->src[src_number].src_type = nir_tex_src_texture_deref;
1390 src_number++;
1391 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
1392 instr->src[src_number].src_type = nir_tex_src_sampler_deref;
1393 src_number++;
1394
1395 instr->src[src_number].src =
1396 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
1397 instr->coord_components));
1398 instr->src[src_number].src_type = nir_tex_src_coord;
1399 src_number++;
1400
1401 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1402 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1403 instr->src[src_number].src_type = nir_tex_src_projector;
1404 src_number++;
1405 }
1406
1407 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB) {
1408 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1409 instr->src[src_number].src_type = nir_tex_src_bias;
1410 src_number++;
1411 }
1412
1413 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
1414 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1415 instr->src[src_number].src_type = nir_tex_src_bias;
1416 src_number++;
1417 }
1418
1419 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
1420 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1421 instr->src[src_number].src_type = nir_tex_src_lod;
1422 src_number++;
1423 }
1424
1425 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
1426 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1427 instr->src[src_number].src_type = nir_tex_src_lod;
1428 src_number++;
1429 }
1430
1431 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
1432 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1433 if (op == nir_texop_txf_ms)
1434 instr->src[src_number].src_type = nir_tex_src_ms_index;
1435 else
1436 instr->src[src_number].src_type = nir_tex_src_lod;
1437 src_number++;
1438 }
1439
1440 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1441 instr->src[src_number].src_type = nir_tex_src_ddx;
1442 instr->src[src_number].src =
1443 nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W),
1444 nir_tex_instr_src_size(instr, src_number)));
1445 src_number++;
1446 instr->src[src_number].src_type = nir_tex_src_ddy;
1447 instr->src[src_number].src =
1448 nir_src_for_ssa(nir_swizzle(b, src[2], SWIZ(X, Y, Z, W),
1449 nir_tex_instr_src_size(instr, src_number)));
1450 src_number++;
1451 }
1452
1453 if (instr->is_shadow) {
1454 if (instr->coord_components == 4)
1455 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1456 else if (instr->coord_components == 3)
1457 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1458 else
1459 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
1460
1461 instr->src[src_number].src_type = nir_tex_src_comparator;
1462 src_number++;
1463 }
1464
1465 for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++) {
1466 struct tgsi_texture_offset *tex_offset = &tgsi_inst->TexOffsets[i];
1467 /* since TexOffset ins't using tgsi_full_src_register we get to
1468 * do some extra gymnastics:
1469 */
1470 nir_alu_src src;
1471
1472 memset(&src, 0, sizeof(src));
1473
1474 src.src = ttn_src_for_file_and_index(c,
1475 tex_offset->File,
1476 tex_offset->Index,
1477 NULL, NULL, NULL,
1478 true);
1479
1480 src.swizzle[0] = tex_offset->SwizzleX;
1481 src.swizzle[1] = tex_offset->SwizzleY;
1482 src.swizzle[2] = tex_offset->SwizzleZ;
1483 src.swizzle[3] = TGSI_SWIZZLE_W;
1484
1485 instr->src[src_number].src_type = nir_tex_src_offset;
1486 instr->src[src_number].src = nir_src_for_ssa(
1487 nir_mov_alu(b, src, nir_tex_instr_src_size(instr, src_number)));
1488 src_number++;
1489 }
1490
1491 assert(src_number == num_srcs);
1492 assert(src_number == instr->num_srcs);
1493
1494 nir_ssa_dest_init(&instr->instr, &instr->dest,
1495 nir_tex_instr_dest_size(instr),
1496 32, NULL);
1497 nir_builder_instr_insert(b, &instr->instr);
1498
1499 /* Resolve the writemask on the texture op. */
1500 ttn_move_dest(b, dest, &instr->dest.ssa);
1501 }
1502
1503 /* TGSI_OPCODE_TXQ is actually two distinct operations:
1504 *
1505 * dst.x = texture\_width(unit, lod)
1506 * dst.y = texture\_height(unit, lod)
1507 * dst.z = texture\_depth(unit, lod)
1508 * dst.w = texture\_levels(unit)
1509 *
1510 * dst.xyz map to NIR txs opcode, and dst.w maps to query_levels
1511 */
1512 static void
1513 ttn_txq(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
1514 {
1515 nir_builder *b = &c->build;
1516 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1517 nir_tex_instr *txs, *qlv;
1518
1519 txs = nir_tex_instr_create(b->shader, 2);
1520 txs->op = nir_texop_txs;
1521 setup_texture_info(txs, tgsi_inst->Texture.Texture);
1522
1523 qlv = nir_tex_instr_create(b->shader, 1);
1524 qlv->op = nir_texop_query_levels;
1525 setup_texture_info(qlv, tgsi_inst->Texture.Texture);
1526
1527 assert(tgsi_inst->Src[1].Register.File == TGSI_FILE_SAMPLER);
1528 int tex_index = tgsi_inst->Src[1].Register.Index;
1529
1530 nir_variable *var =
1531 get_sampler_var(c, tex_index, txs->sampler_dim,
1532 txs->is_shadow,
1533 txs->is_array,
1534 base_type_for_alu_type(txs->dest_type));
1535
1536 nir_deref_instr *deref = nir_build_deref_var(b, var);
1537
1538 txs->src[0].src = nir_src_for_ssa(&deref->dest.ssa);
1539 txs->src[0].src_type = nir_tex_src_texture_deref;
1540
1541 qlv->src[0].src = nir_src_for_ssa(&deref->dest.ssa);
1542 qlv->src[0].src_type = nir_tex_src_texture_deref;
1543
1544 /* lod: */
1545 txs->src[1].src = nir_src_for_ssa(ttn_channel(b, src[0], X));
1546 txs->src[1].src_type = nir_tex_src_lod;
1547
1548 nir_ssa_dest_init(&txs->instr, &txs->dest,
1549 nir_tex_instr_dest_size(txs), 32, NULL);
1550 nir_builder_instr_insert(b, &txs->instr);
1551
1552 nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, 32, NULL);
1553 nir_builder_instr_insert(b, &qlv->instr);
1554
1555 ttn_move_dest_masked(b, dest, &txs->dest.ssa, TGSI_WRITEMASK_XYZ);
1556 ttn_move_dest_masked(b, dest, &qlv->dest.ssa, TGSI_WRITEMASK_W);
1557 }
1558
1559 static const nir_op op_trans[TGSI_OPCODE_LAST] = {
1560 [TGSI_OPCODE_ARL] = 0,
1561 [TGSI_OPCODE_MOV] = nir_op_mov,
1562 [TGSI_OPCODE_LIT] = 0,
1563 [TGSI_OPCODE_RCP] = nir_op_frcp,
1564 [TGSI_OPCODE_RSQ] = nir_op_frsq,
1565 [TGSI_OPCODE_EXP] = 0,
1566 [TGSI_OPCODE_LOG] = 0,
1567 [TGSI_OPCODE_MUL] = nir_op_fmul,
1568 [TGSI_OPCODE_ADD] = nir_op_fadd,
1569 [TGSI_OPCODE_DP3] = 0,
1570 [TGSI_OPCODE_DP4] = 0,
1571 [TGSI_OPCODE_DST] = 0,
1572 [TGSI_OPCODE_MIN] = nir_op_fmin,
1573 [TGSI_OPCODE_MAX] = nir_op_fmax,
1574 [TGSI_OPCODE_SLT] = nir_op_slt,
1575 [TGSI_OPCODE_SGE] = nir_op_sge,
1576 [TGSI_OPCODE_MAD] = nir_op_ffma,
1577 [TGSI_OPCODE_LRP] = 0,
1578 [TGSI_OPCODE_SQRT] = nir_op_fsqrt,
1579 [TGSI_OPCODE_FRC] = nir_op_ffract,
1580 [TGSI_OPCODE_FLR] = nir_op_ffloor,
1581 [TGSI_OPCODE_ROUND] = nir_op_fround_even,
1582 [TGSI_OPCODE_EX2] = nir_op_fexp2,
1583 [TGSI_OPCODE_LG2] = nir_op_flog2,
1584 [TGSI_OPCODE_POW] = nir_op_fpow,
1585 [TGSI_OPCODE_COS] = nir_op_fcos,
1586 [TGSI_OPCODE_DDX] = nir_op_fddx,
1587 [TGSI_OPCODE_DDY] = nir_op_fddy,
1588 [TGSI_OPCODE_KILL] = 0,
1589 [TGSI_OPCODE_PK2H] = 0, /* XXX */
1590 [TGSI_OPCODE_PK2US] = 0, /* XXX */
1591 [TGSI_OPCODE_PK4B] = 0, /* XXX */
1592 [TGSI_OPCODE_PK4UB] = 0, /* XXX */
1593 [TGSI_OPCODE_SEQ] = nir_op_seq,
1594 [TGSI_OPCODE_SGT] = 0,
1595 [TGSI_OPCODE_SIN] = nir_op_fsin,
1596 [TGSI_OPCODE_SNE] = nir_op_sne,
1597 [TGSI_OPCODE_SLE] = 0,
1598 [TGSI_OPCODE_TEX] = 0,
1599 [TGSI_OPCODE_TXD] = 0,
1600 [TGSI_OPCODE_TXP] = 0,
1601 [TGSI_OPCODE_UP2H] = 0, /* XXX */
1602 [TGSI_OPCODE_UP2US] = 0, /* XXX */
1603 [TGSI_OPCODE_UP4B] = 0, /* XXX */
1604 [TGSI_OPCODE_UP4UB] = 0, /* XXX */
1605 [TGSI_OPCODE_ARR] = 0,
1606
1607 /* No function calls, yet. */
1608 [TGSI_OPCODE_CAL] = 0, /* XXX */
1609 [TGSI_OPCODE_RET] = 0, /* XXX */
1610
1611 [TGSI_OPCODE_SSG] = nir_op_fsign,
1612 [TGSI_OPCODE_CMP] = 0,
1613 [TGSI_OPCODE_TXB] = 0,
1614 [TGSI_OPCODE_DIV] = nir_op_fdiv,
1615 [TGSI_OPCODE_DP2] = 0,
1616 [TGSI_OPCODE_TXL] = 0,
1617
1618 [TGSI_OPCODE_BRK] = 0,
1619 [TGSI_OPCODE_IF] = 0,
1620 [TGSI_OPCODE_UIF] = 0,
1621 [TGSI_OPCODE_ELSE] = 0,
1622 [TGSI_OPCODE_ENDIF] = 0,
1623
1624 [TGSI_OPCODE_DDX_FINE] = nir_op_fddx_fine,
1625 [TGSI_OPCODE_DDY_FINE] = nir_op_fddy_fine,
1626
1627 [TGSI_OPCODE_CEIL] = nir_op_fceil,
1628 [TGSI_OPCODE_I2F] = nir_op_i2f32,
1629 [TGSI_OPCODE_NOT] = nir_op_inot,
1630 [TGSI_OPCODE_TRUNC] = nir_op_ftrunc,
1631 [TGSI_OPCODE_SHL] = nir_op_ishl,
1632 [TGSI_OPCODE_AND] = nir_op_iand,
1633 [TGSI_OPCODE_OR] = nir_op_ior,
1634 [TGSI_OPCODE_MOD] = nir_op_umod,
1635 [TGSI_OPCODE_XOR] = nir_op_ixor,
1636 [TGSI_OPCODE_TXF] = 0,
1637 [TGSI_OPCODE_TXQ] = 0,
1638
1639 [TGSI_OPCODE_CONT] = 0,
1640
1641 [TGSI_OPCODE_EMIT] = 0, /* XXX */
1642 [TGSI_OPCODE_ENDPRIM] = 0, /* XXX */
1643
1644 [TGSI_OPCODE_BGNLOOP] = 0,
1645 [TGSI_OPCODE_BGNSUB] = 0, /* XXX: no function calls */
1646 [TGSI_OPCODE_ENDLOOP] = 0,
1647 [TGSI_OPCODE_ENDSUB] = 0, /* XXX: no function calls */
1648
1649 [TGSI_OPCODE_NOP] = 0,
1650 [TGSI_OPCODE_FSEQ] = nir_op_feq,
1651 [TGSI_OPCODE_FSGE] = nir_op_fge,
1652 [TGSI_OPCODE_FSLT] = nir_op_flt,
1653 [TGSI_OPCODE_FSNE] = nir_op_fne,
1654
1655 [TGSI_OPCODE_KILL_IF] = 0,
1656
1657 [TGSI_OPCODE_END] = 0,
1658
1659 [TGSI_OPCODE_F2I] = nir_op_f2i32,
1660 [TGSI_OPCODE_IDIV] = nir_op_idiv,
1661 [TGSI_OPCODE_IMAX] = nir_op_imax,
1662 [TGSI_OPCODE_IMIN] = nir_op_imin,
1663 [TGSI_OPCODE_INEG] = nir_op_ineg,
1664 [TGSI_OPCODE_ISGE] = nir_op_ige,
1665 [TGSI_OPCODE_ISHR] = nir_op_ishr,
1666 [TGSI_OPCODE_ISLT] = nir_op_ilt,
1667 [TGSI_OPCODE_F2U] = nir_op_f2u32,
1668 [TGSI_OPCODE_U2F] = nir_op_u2f32,
1669 [TGSI_OPCODE_UADD] = nir_op_iadd,
1670 [TGSI_OPCODE_UDIV] = nir_op_udiv,
1671 [TGSI_OPCODE_UMAD] = 0,
1672 [TGSI_OPCODE_UMAX] = nir_op_umax,
1673 [TGSI_OPCODE_UMIN] = nir_op_umin,
1674 [TGSI_OPCODE_UMOD] = nir_op_umod,
1675 [TGSI_OPCODE_UMUL] = nir_op_imul,
1676 [TGSI_OPCODE_USEQ] = nir_op_ieq,
1677 [TGSI_OPCODE_USGE] = nir_op_uge,
1678 [TGSI_OPCODE_USHR] = nir_op_ushr,
1679 [TGSI_OPCODE_USLT] = nir_op_ult,
1680 [TGSI_OPCODE_USNE] = nir_op_ine,
1681
1682 [TGSI_OPCODE_SWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
1683 [TGSI_OPCODE_CASE] = 0, /* not emitted by glsl_to_tgsi.cpp */
1684 [TGSI_OPCODE_DEFAULT] = 0, /* not emitted by glsl_to_tgsi.cpp */
1685 [TGSI_OPCODE_ENDSWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
1686
1687 /* XXX: SAMPLE opcodes */
1688
1689 [TGSI_OPCODE_UARL] = nir_op_mov,
1690 [TGSI_OPCODE_UCMP] = 0,
1691 [TGSI_OPCODE_IABS] = nir_op_iabs,
1692 [TGSI_OPCODE_ISSG] = nir_op_isign,
1693
1694 /* XXX: atomics */
1695
1696 [TGSI_OPCODE_TEX2] = 0,
1697 [TGSI_OPCODE_TXB2] = 0,
1698 [TGSI_OPCODE_TXL2] = 0,
1699
1700 [TGSI_OPCODE_IMUL_HI] = nir_op_imul_high,
1701 [TGSI_OPCODE_UMUL_HI] = nir_op_umul_high,
1702
1703 [TGSI_OPCODE_TG4] = 0,
1704 [TGSI_OPCODE_LODQ] = 0,
1705
1706 [TGSI_OPCODE_IBFE] = nir_op_ibitfield_extract,
1707 [TGSI_OPCODE_UBFE] = nir_op_ubitfield_extract,
1708 [TGSI_OPCODE_BFI] = nir_op_bitfield_insert,
1709 [TGSI_OPCODE_BREV] = nir_op_bitfield_reverse,
1710 [TGSI_OPCODE_POPC] = nir_op_bit_count,
1711 [TGSI_OPCODE_LSB] = nir_op_find_lsb,
1712 [TGSI_OPCODE_IMSB] = nir_op_ifind_msb,
1713 [TGSI_OPCODE_UMSB] = nir_op_ufind_msb,
1714
1715 [TGSI_OPCODE_INTERP_CENTROID] = 0, /* XXX */
1716 [TGSI_OPCODE_INTERP_SAMPLE] = 0, /* XXX */
1717 [TGSI_OPCODE_INTERP_OFFSET] = 0, /* XXX */
1718
1719 [TGSI_OPCODE_U64ADD] = nir_op_iadd,
1720 [TGSI_OPCODE_U64MUL] = nir_op_imul,
1721 [TGSI_OPCODE_U64DIV] = nir_op_udiv,
1722 [TGSI_OPCODE_U64SNE] = nir_op_ine,
1723 };
1724
1725 static void
1726 ttn_emit_instruction(struct ttn_compile *c)
1727 {
1728 nir_builder *b = &c->build;
1729 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1730 unsigned i;
1731 unsigned tgsi_op = tgsi_inst->Instruction.Opcode;
1732 struct tgsi_full_dst_register *tgsi_dst = &tgsi_inst->Dst[0];
1733
1734 if (tgsi_op == TGSI_OPCODE_END)
1735 return;
1736
1737 nir_ssa_def *src[TGSI_FULL_MAX_SRC_REGISTERS];
1738 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
1739 src[i] = ttn_get_src(c, &tgsi_inst->Src[i], i);
1740 }
1741 nir_alu_dest dest = ttn_get_dest(c, tgsi_dst);
1742
1743 unsigned tgsi_dst_type = tgsi_opcode_infer_dst_type(tgsi_op, 0);
1744
1745 /* The destination bitsize of the NIR opcode (not TGSI, where it's always
1746 * 32 bits). This needs to be passed into ttn_alu() because it can't be
1747 * inferred for comparison opcodes.
1748 */
1749 unsigned dst_bitsize = tgsi_type_is_64bit(tgsi_dst_type) ? 64 : 32;
1750
1751 switch (tgsi_op) {
1752 case TGSI_OPCODE_RSQ:
1753 ttn_move_dest(b, dest, nir_frsq(b, ttn_channel(b, src[0], X)));
1754 break;
1755
1756 case TGSI_OPCODE_SQRT:
1757 ttn_move_dest(b, dest, nir_fsqrt(b, ttn_channel(b, src[0], X)));
1758 break;
1759
1760 case TGSI_OPCODE_RCP:
1761 ttn_move_dest(b, dest, nir_frcp(b, ttn_channel(b, src[0], X)));
1762 break;
1763
1764 case TGSI_OPCODE_EX2:
1765 ttn_move_dest(b, dest, nir_fexp2(b, ttn_channel(b, src[0], X)));
1766 break;
1767
1768 case TGSI_OPCODE_LG2:
1769 ttn_move_dest(b, dest, nir_flog2(b, ttn_channel(b, src[0], X)));
1770 break;
1771
1772 case TGSI_OPCODE_POW:
1773 ttn_move_dest(b, dest, nir_fpow(b,
1774 ttn_channel(b, src[0], X),
1775 ttn_channel(b, src[1], X)));
1776 break;
1777
1778 case TGSI_OPCODE_COS:
1779 ttn_move_dest(b, dest, nir_fcos(b, ttn_channel(b, src[0], X)));
1780 break;
1781
1782 case TGSI_OPCODE_SIN:
1783 ttn_move_dest(b, dest, nir_fsin(b, ttn_channel(b, src[0], X)));
1784 break;
1785
1786 case TGSI_OPCODE_ARL:
1787 ttn_arl(b, op_trans[tgsi_op], dest, src);
1788 break;
1789
1790 case TGSI_OPCODE_EXP:
1791 ttn_exp(b, op_trans[tgsi_op], dest, src);
1792 break;
1793
1794 case TGSI_OPCODE_LOG:
1795 ttn_log(b, op_trans[tgsi_op], dest, src);
1796 break;
1797
1798 case TGSI_OPCODE_DST:
1799 ttn_dst(b, op_trans[tgsi_op], dest, src);
1800 break;
1801
1802 case TGSI_OPCODE_LIT:
1803 ttn_lit(b, op_trans[tgsi_op], dest, src);
1804 break;
1805
1806 case TGSI_OPCODE_DP2:
1807 ttn_dp2(b, op_trans[tgsi_op], dest, src);
1808 break;
1809
1810 case TGSI_OPCODE_DP3:
1811 ttn_dp3(b, op_trans[tgsi_op], dest, src);
1812 break;
1813
1814 case TGSI_OPCODE_DP4:
1815 ttn_dp4(b, op_trans[tgsi_op], dest, src);
1816 break;
1817
1818 case TGSI_OPCODE_UMAD:
1819 ttn_umad(b, op_trans[tgsi_op], dest, src);
1820 break;
1821
1822 case TGSI_OPCODE_LRP:
1823 ttn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
1824 break;
1825
1826 case TGSI_OPCODE_KILL:
1827 ttn_kill(b, op_trans[tgsi_op], dest, src);
1828 break;
1829
1830 case TGSI_OPCODE_ARR:
1831 ttn_arr(b, op_trans[tgsi_op], dest, src);
1832 break;
1833
1834 case TGSI_OPCODE_CMP:
1835 ttn_cmp(b, op_trans[tgsi_op], dest, src);
1836 break;
1837
1838 case TGSI_OPCODE_UCMP:
1839 ttn_ucmp(b, op_trans[tgsi_op], dest, src);
1840 break;
1841
1842 case TGSI_OPCODE_SGT:
1843 ttn_sgt(b, op_trans[tgsi_op], dest, src);
1844 break;
1845
1846 case TGSI_OPCODE_SLE:
1847 ttn_sle(b, op_trans[tgsi_op], dest, src);
1848 break;
1849
1850 case TGSI_OPCODE_KILL_IF:
1851 ttn_kill_if(b, op_trans[tgsi_op], dest, src);
1852 break;
1853
1854 case TGSI_OPCODE_TEX:
1855 case TGSI_OPCODE_TXP:
1856 case TGSI_OPCODE_TXL:
1857 case TGSI_OPCODE_TXB:
1858 case TGSI_OPCODE_TXD:
1859 case TGSI_OPCODE_TEX2:
1860 case TGSI_OPCODE_TXL2:
1861 case TGSI_OPCODE_TXB2:
1862 case TGSI_OPCODE_TXF:
1863 case TGSI_OPCODE_TG4:
1864 case TGSI_OPCODE_LODQ:
1865 ttn_tex(c, dest, src);
1866 break;
1867
1868 case TGSI_OPCODE_TXQ:
1869 ttn_txq(c, dest, src);
1870 break;
1871
1872 case TGSI_OPCODE_NOP:
1873 break;
1874
1875 case TGSI_OPCODE_IF:
1876 ttn_if(c, src[0], false);
1877 break;
1878
1879 case TGSI_OPCODE_UIF:
1880 ttn_if(c, src[0], true);
1881 break;
1882
1883 case TGSI_OPCODE_ELSE:
1884 ttn_else(c);
1885 break;
1886
1887 case TGSI_OPCODE_ENDIF:
1888 ttn_endif(c);
1889 break;
1890
1891 case TGSI_OPCODE_BGNLOOP:
1892 ttn_bgnloop(c);
1893 break;
1894
1895 case TGSI_OPCODE_BRK:
1896 ttn_brk(b);
1897 break;
1898
1899 case TGSI_OPCODE_CONT:
1900 ttn_cont(b);
1901 break;
1902
1903 case TGSI_OPCODE_ENDLOOP:
1904 ttn_endloop(c);
1905 break;
1906
1907 default:
1908 if (op_trans[tgsi_op] != 0 || tgsi_op == TGSI_OPCODE_MOV) {
1909 ttn_alu(b, op_trans[tgsi_op], dest, dst_bitsize, src);
1910 } else {
1911 fprintf(stderr, "unknown TGSI opcode: %s\n",
1912 tgsi_get_opcode_name(tgsi_op));
1913 abort();
1914 }
1915 break;
1916 }
1917
1918 if (tgsi_inst->Instruction.Saturate) {
1919 assert(!dest.dest.is_ssa);
1920 ttn_move_dest(b, dest, nir_fsat(b, ttn_src_for_dest(b, &dest)));
1921 }
1922
1923 /* if the dst has a matching var, append store_var to move
1924 * output from reg to var
1925 */
1926 nir_variable *var = ttn_get_var(c, tgsi_dst);
1927 if (var) {
1928 unsigned index = tgsi_dst->Register.Index;
1929 unsigned offset = c->temp_regs[index].offset;
1930 struct tgsi_ind_register *indirect = tgsi_dst->Register.Indirect ?
1931 &tgsi_dst->Indirect : NULL;
1932 nir_src val = nir_src_for_reg(dest.dest.reg.reg);
1933 nir_store_deref(b, ttn_array_deref(c, var, offset, indirect),
1934 nir_ssa_for_src(b, val, 4), dest.write_mask);
1935 }
1936 }
1937
1938 /**
1939 * Puts a NIR intrinsic to store of each TGSI_FILE_OUTPUT value to the output
1940 * variables at the end of the shader.
1941 *
1942 * We don't generate these incrementally as the TGSI_FILE_OUTPUT values are
1943 * written, because there's no output load intrinsic, which means we couldn't
1944 * handle writemasks.
1945 */
1946 static void
1947 ttn_add_output_stores(struct ttn_compile *c)
1948 {
1949 nir_builder *b = &c->build;
1950
1951 for (int i = 0; i < c->build.shader->num_outputs; i++) {
1952 nir_variable *var = c->outputs[i];
1953 if (!var)
1954 continue;
1955
1956 nir_src src = nir_src_for_reg(c->output_regs[i].reg);
1957 src.reg.base_offset = c->output_regs[i].offset;
1958
1959 nir_ssa_def *store_value = nir_ssa_for_src(b, src, 4);
1960 if (c->build.shader->info.stage == MESA_SHADER_FRAGMENT &&
1961 var->data.location == FRAG_RESULT_DEPTH) {
1962 /* TGSI uses TGSI_SEMANTIC_POSITION.z for the depth output, while
1963 * NIR uses a single float FRAG_RESULT_DEPTH.
1964 */
1965 store_value = nir_channel(b, store_value, 2);
1966 }
1967
1968 nir_store_deref(b, nir_build_deref_var(b, var), store_value,
1969 (1 << store_value->num_components) - 1);
1970 }
1971 }
1972
1973 /**
1974 * Parses the given TGSI tokens.
1975 */
1976 static void
1977 ttn_parse_tgsi(struct ttn_compile *c, const void *tgsi_tokens)
1978 {
1979 struct tgsi_parse_context parser;
1980 int ret;
1981
1982 ret = tgsi_parse_init(&parser, tgsi_tokens);
1983 assert(ret == TGSI_PARSE_OK);
1984
1985 while (!tgsi_parse_end_of_tokens(&parser)) {
1986 tgsi_parse_token(&parser);
1987 c->token = &parser.FullToken;
1988
1989 switch (parser.FullToken.Token.Type) {
1990 case TGSI_TOKEN_TYPE_DECLARATION:
1991 ttn_emit_declaration(c);
1992 break;
1993
1994 case TGSI_TOKEN_TYPE_INSTRUCTION:
1995 ttn_emit_instruction(c);
1996 break;
1997
1998 case TGSI_TOKEN_TYPE_IMMEDIATE:
1999 ttn_emit_immediate(c);
2000 break;
2001 }
2002 }
2003
2004 tgsi_parse_free(&parser);
2005 }
2006
2007 static void
2008 ttn_read_pipe_caps(struct ttn_compile *c,
2009 struct pipe_screen *screen)
2010 {
2011 c->cap_scalar = screen->get_shader_param(screen, c->scan->processor, PIPE_SHADER_CAP_SCALAR_ISA);
2012 c->cap_packed_uniforms = screen->get_param(screen, PIPE_CAP_PACKED_UNIFORMS);
2013 c->cap_samplers_as_deref = screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF);
2014 c->cap_face_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL);
2015 c->cap_position_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL);
2016 c->cap_point_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL);
2017 }
2018
2019 /**
2020 * Initializes a TGSI-to-NIR compiler.
2021 */
2022 static struct ttn_compile *
2023 ttn_compile_init(const void *tgsi_tokens,
2024 const nir_shader_compiler_options *options,
2025 struct pipe_screen *screen)
2026 {
2027 struct ttn_compile *c;
2028 struct nir_shader *s;
2029 struct tgsi_shader_info scan;
2030
2031 assert(options || screen);
2032 c = rzalloc(NULL, struct ttn_compile);
2033
2034 tgsi_scan_shader(tgsi_tokens, &scan);
2035 c->scan = &scan;
2036
2037 if (!options) {
2038 options =
2039 screen->get_compiler_options(screen, PIPE_SHADER_IR_NIR, scan.processor);
2040 }
2041
2042 nir_builder_init_simple_shader(&c->build, NULL,
2043 tgsi_processor_to_shader_stage(scan.processor),
2044 options);
2045
2046 s = c->build.shader;
2047
2048 if (screen) {
2049 ttn_read_pipe_caps(c, screen);
2050 } else {
2051 /* TTN used to be hard coded to always make FACE a sysval,
2052 * so it makes sense to preserve that behavior so users don't break. */
2053 c->cap_face_is_sysval = true;
2054 }
2055
2056 if (s->info.stage == MESA_SHADER_FRAGMENT)
2057 s->info.fs.untyped_color_outputs = true;
2058
2059 s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
2060 s->num_uniforms = scan.const_file_max[0] + 1;
2061 s->num_outputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
2062
2063 s->info.vs.window_space_position = scan.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
2064
2065 c->inputs = rzalloc_array(c, struct nir_variable *, s->num_inputs);
2066 c->outputs = rzalloc_array(c, struct nir_variable *, s->num_outputs);
2067
2068 c->output_regs = rzalloc_array(c, struct ttn_reg_info,
2069 scan.file_max[TGSI_FILE_OUTPUT] + 1);
2070 c->temp_regs = rzalloc_array(c, struct ttn_reg_info,
2071 scan.file_max[TGSI_FILE_TEMPORARY] + 1);
2072 c->imm_defs = rzalloc_array(c, nir_ssa_def *,
2073 scan.file_max[TGSI_FILE_IMMEDIATE] + 1);
2074
2075 c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2076 c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types);
2077
2078 c->if_stack = rzalloc_array(c, nir_cursor,
2079 (scan.opcode_count[TGSI_OPCODE_IF] +
2080 scan.opcode_count[TGSI_OPCODE_UIF]) * 2);
2081 c->loop_stack = rzalloc_array(c, nir_cursor,
2082 scan.opcode_count[TGSI_OPCODE_BGNLOOP]);
2083
2084
2085 ttn_parse_tgsi(c, tgsi_tokens);
2086 ttn_add_output_stores(c);
2087
2088 nir_validate_shader(c->build.shader, "TTN: after parsing TGSI and creating the NIR shader");
2089
2090 return c;
2091 }
2092
2093 static void
2094 ttn_optimize_nir(nir_shader *nir, bool scalar)
2095 {
2096 bool progress;
2097 do {
2098 progress = false;
2099
2100 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2101
2102 if (scalar) {
2103 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL);
2104 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
2105 }
2106
2107 NIR_PASS_V(nir, nir_lower_alu);
2108 NIR_PASS_V(nir, nir_lower_pack);
2109 NIR_PASS(progress, nir, nir_copy_prop);
2110 NIR_PASS(progress, nir, nir_opt_remove_phis);
2111 NIR_PASS(progress, nir, nir_opt_dce);
2112
2113 if (nir_opt_trivial_continues(nir)) {
2114 progress = true;
2115 NIR_PASS(progress, nir, nir_copy_prop);
2116 NIR_PASS(progress, nir, nir_opt_dce);
2117 }
2118
2119 NIR_PASS(progress, nir, nir_opt_if, false);
2120 NIR_PASS(progress, nir, nir_opt_dead_cf);
2121 NIR_PASS(progress, nir, nir_opt_cse);
2122 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
2123
2124 NIR_PASS(progress, nir, nir_opt_algebraic);
2125 NIR_PASS(progress, nir, nir_opt_constant_folding);
2126
2127 NIR_PASS(progress, nir, nir_opt_undef);
2128 NIR_PASS(progress, nir, nir_opt_conditional_discard);
2129
2130 if (nir->options->max_unroll_iterations) {
2131 NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
2132 }
2133
2134 } while (progress);
2135
2136 }
2137
2138 /**
2139 * Finalizes the NIR in a similar way as st_glsl_to_nir does.
2140 *
2141 * Drivers expect that these passes are already performed,
2142 * so we have to do it here too.
2143 */
2144 static void
2145 ttn_finalize_nir(struct ttn_compile *c)
2146 {
2147 struct nir_shader *nir = c->build.shader;
2148
2149 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2150 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
2151
2152 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
2153 NIR_PASS_V(nir, nir_split_var_copies);
2154 NIR_PASS_V(nir, nir_lower_var_copies);
2155 NIR_PASS_V(nir, nir_lower_system_values);
2156
2157 if (c->cap_packed_uniforms)
2158 NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16);
2159
2160 if (c->cap_samplers_as_deref)
2161 NIR_PASS_V(nir, gl_nir_lower_samplers_as_deref, NULL);
2162 else
2163 NIR_PASS_V(nir, gl_nir_lower_samplers, NULL);
2164
2165 ttn_optimize_nir(nir, c->cap_scalar);
2166 nir_shader_gather_info(nir, c->build.impl);
2167 nir_validate_shader(nir, "TTN: after all optimizations");
2168 }
2169
2170 struct nir_shader *
2171 tgsi_to_nir(const void *tgsi_tokens,
2172 struct pipe_screen *screen)
2173 {
2174 struct ttn_compile *c;
2175 struct nir_shader *s;
2176
2177 c = ttn_compile_init(tgsi_tokens, NULL, screen);
2178 s = c->build.shader;
2179 ttn_finalize_nir(c);
2180 ralloc_free(c);
2181
2182 return s;
2183 }
2184
2185 struct nir_shader *
2186 tgsi_to_nir_noscreen(const void *tgsi_tokens,
2187 const nir_shader_compiler_options *options)
2188 {
2189 struct ttn_compile *c;
2190 struct nir_shader *s;
2191
2192 c = ttn_compile_init(tgsi_tokens, options, NULL);
2193 s = c->build.shader;
2194 ralloc_free(c);
2195
2196 return s;
2197 }
2198