c55e8b84a41140148b38ef3112ab9836c6b34440
[mesa.git] / src / gallium / auxiliary / nir / tgsi_to_nir.c
1 /*
2 * Copyright © 2014-2015 Broadcom
3 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include "util/ralloc.h"
26 #include "pipe/p_screen.h"
27
28 #include "compiler/nir/nir.h"
29 #include "compiler/nir/nir_control_flow.h"
30 #include "compiler/nir/nir_builder.h"
31 #include "compiler/glsl/gl_nir.h"
32 #include "compiler/glsl/list.h"
33 #include "compiler/shader_enums.h"
34
35 #include "tgsi_to_nir.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "tgsi/tgsi_info.h"
39 #include "tgsi/tgsi_scan.h"
40 #include "tgsi/tgsi_from_mesa.h"
41
42 #define SWIZ(X, Y, Z, W) (unsigned[4]){ \
43 TGSI_SWIZZLE_##X, \
44 TGSI_SWIZZLE_##Y, \
45 TGSI_SWIZZLE_##Z, \
46 TGSI_SWIZZLE_##W, \
47 }
48
49 struct ttn_reg_info {
50 /** nir register containing this TGSI index. */
51 nir_register *reg;
52 nir_variable *var;
53 /** Offset (in vec4s) from the start of var for this TGSI index. */
54 int offset;
55 };
56
57 struct ttn_compile {
58 union tgsi_full_token *token;
59 nir_builder build;
60 struct tgsi_shader_info *scan;
61
62 struct ttn_reg_info *output_regs;
63 struct ttn_reg_info *temp_regs;
64 nir_ssa_def **imm_defs;
65
66 unsigned num_samp_types;
67 nir_alu_type *samp_types;
68
69 nir_register *addr_reg;
70
71 nir_variable **inputs;
72 nir_variable **outputs;
73 nir_variable *samplers[PIPE_MAX_SAMPLERS];
74
75 nir_variable *input_var_face;
76 nir_variable *input_var_position;
77
78 /**
79 * Stack of nir_cursors where instructions should be pushed as we pop
80 * back out of the control flow stack.
81 *
82 * For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else
83 * instructions should be placed, and if_stack[if_stack_pos - 1] has where
84 * the next instructions outside of the if/then/else block go.
85 */
86 nir_cursor *if_stack;
87 unsigned if_stack_pos;
88
89 /**
90 * Stack of nir_cursors where instructions should be pushed as we pop
91 * back out of the control flow stack.
92 *
93 * loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside
94 * of the loop.
95 */
96 nir_cursor *loop_stack;
97 unsigned loop_stack_pos;
98
99 /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
100 unsigned next_imm;
101
102 bool cap_scalar;
103 bool cap_face_is_sysval;
104 bool cap_position_is_sysval;
105 bool cap_packed_uniforms;
106 bool cap_samplers_as_deref;
107 };
108
109 #define ttn_swizzle(b, src, x, y, z, w) \
110 nir_swizzle(b, src, SWIZ(x, y, z, w), 4, false)
111 #define ttn_channel(b, src, swiz) \
112 nir_swizzle(b, src, SWIZ(swiz, swiz, swiz, swiz), 1, false)
113
114 static gl_varying_slot
115 tgsi_varying_semantic_to_slot(unsigned semantic, unsigned index)
116 {
117 switch (semantic) {
118 case TGSI_SEMANTIC_POSITION:
119 return VARYING_SLOT_POS;
120 case TGSI_SEMANTIC_COLOR:
121 if (index == 0)
122 return VARYING_SLOT_COL0;
123 else
124 return VARYING_SLOT_COL1;
125 case TGSI_SEMANTIC_BCOLOR:
126 if (index == 0)
127 return VARYING_SLOT_BFC0;
128 else
129 return VARYING_SLOT_BFC1;
130 case TGSI_SEMANTIC_FOG:
131 return VARYING_SLOT_FOGC;
132 case TGSI_SEMANTIC_PSIZE:
133 return VARYING_SLOT_PSIZ;
134 case TGSI_SEMANTIC_GENERIC:
135 return VARYING_SLOT_VAR0 + index;
136 case TGSI_SEMANTIC_FACE:
137 return VARYING_SLOT_FACE;
138 case TGSI_SEMANTIC_EDGEFLAG:
139 return VARYING_SLOT_EDGE;
140 case TGSI_SEMANTIC_PRIMID:
141 return VARYING_SLOT_PRIMITIVE_ID;
142 case TGSI_SEMANTIC_CLIPDIST:
143 if (index == 0)
144 return VARYING_SLOT_CLIP_DIST0;
145 else
146 return VARYING_SLOT_CLIP_DIST1;
147 case TGSI_SEMANTIC_CLIPVERTEX:
148 return VARYING_SLOT_CLIP_VERTEX;
149 case TGSI_SEMANTIC_TEXCOORD:
150 return VARYING_SLOT_TEX0 + index;
151 case TGSI_SEMANTIC_PCOORD:
152 return VARYING_SLOT_PNTC;
153 case TGSI_SEMANTIC_VIEWPORT_INDEX:
154 return VARYING_SLOT_VIEWPORT;
155 case TGSI_SEMANTIC_LAYER:
156 return VARYING_SLOT_LAYER;
157 default:
158 fprintf(stderr, "Bad TGSI semantic: %d/%d\n", semantic, index);
159 abort();
160 }
161 }
162
163 static nir_ssa_def *
164 ttn_src_for_dest(nir_builder *b, nir_alu_dest *dest)
165 {
166 nir_alu_src src;
167 memset(&src, 0, sizeof(src));
168
169 if (dest->dest.is_ssa)
170 src.src = nir_src_for_ssa(&dest->dest.ssa);
171 else {
172 assert(!dest->dest.reg.indirect);
173 src.src = nir_src_for_reg(dest->dest.reg.reg);
174 src.src.reg.base_offset = dest->dest.reg.base_offset;
175 }
176
177 for (int i = 0; i < 4; i++)
178 src.swizzle[i] = i;
179
180 return nir_fmov_alu(b, src, 4);
181 }
182
183 static enum glsl_interp_mode
184 ttn_translate_interp_mode(unsigned tgsi_interp)
185 {
186 switch (tgsi_interp) {
187 case TGSI_INTERPOLATE_CONSTANT:
188 return INTERP_MODE_FLAT;
189 case TGSI_INTERPOLATE_LINEAR:
190 return INTERP_MODE_NOPERSPECTIVE;
191 case TGSI_INTERPOLATE_PERSPECTIVE:
192 return INTERP_MODE_SMOOTH;
193 case TGSI_INTERPOLATE_COLOR:
194 return INTERP_MODE_SMOOTH;
195 default:
196 unreachable("bad TGSI interpolation mode");
197 }
198 }
199
200 static void
201 ttn_emit_declaration(struct ttn_compile *c)
202 {
203 nir_builder *b = &c->build;
204 struct tgsi_full_declaration *decl = &c->token->FullDeclaration;
205 unsigned array_size = decl->Range.Last - decl->Range.First + 1;
206 unsigned file = decl->Declaration.File;
207 unsigned i;
208
209 if (file == TGSI_FILE_TEMPORARY) {
210 if (decl->Declaration.Array) {
211 /* for arrays, we create variables instead of registers: */
212 nir_variable *var = rzalloc(b->shader, nir_variable);
213
214 var->type = glsl_array_type(glsl_vec4_type(), array_size, 0);
215 var->data.mode = nir_var_shader_temp;
216 var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID);
217
218 exec_list_push_tail(&b->shader->globals, &var->node);
219
220 for (i = 0; i < array_size; i++) {
221 /* point all the matching slots to the same var,
222 * with appropriate offset set, mostly just so
223 * we know what to do when tgsi does a non-indirect
224 * access
225 */
226 c->temp_regs[decl->Range.First + i].reg = NULL;
227 c->temp_regs[decl->Range.First + i].var = var;
228 c->temp_regs[decl->Range.First + i].offset = i;
229 }
230 } else {
231 for (i = 0; i < array_size; i++) {
232 nir_register *reg = nir_local_reg_create(b->impl);
233 reg->num_components = 4;
234 c->temp_regs[decl->Range.First + i].reg = reg;
235 c->temp_regs[decl->Range.First + i].var = NULL;
236 c->temp_regs[decl->Range.First + i].offset = 0;
237 }
238 }
239 } else if (file == TGSI_FILE_ADDRESS) {
240 c->addr_reg = nir_local_reg_create(b->impl);
241 c->addr_reg->num_components = 4;
242 } else if (file == TGSI_FILE_SYSTEM_VALUE) {
243 /* Nothing to record for system values. */
244 } else if (file == TGSI_FILE_SAMPLER) {
245 /* Nothing to record for samplers. */
246 } else if (file == TGSI_FILE_SAMPLER_VIEW) {
247 struct tgsi_declaration_sampler_view *sview = &decl->SamplerView;
248 nir_alu_type type;
249
250 assert((sview->ReturnTypeX == sview->ReturnTypeY) &&
251 (sview->ReturnTypeX == sview->ReturnTypeZ) &&
252 (sview->ReturnTypeX == sview->ReturnTypeW));
253
254 switch (sview->ReturnTypeX) {
255 case TGSI_RETURN_TYPE_SINT:
256 type = nir_type_int;
257 break;
258 case TGSI_RETURN_TYPE_UINT:
259 type = nir_type_uint;
260 break;
261 case TGSI_RETURN_TYPE_FLOAT:
262 default:
263 type = nir_type_float;
264 break;
265 }
266
267 for (i = 0; i < array_size; i++) {
268 c->samp_types[decl->Range.First + i] = type;
269 }
270 } else {
271 bool is_array = (array_size > 1);
272
273 assert(file == TGSI_FILE_INPUT ||
274 file == TGSI_FILE_OUTPUT ||
275 file == TGSI_FILE_CONSTANT);
276
277 /* nothing to do for UBOs: */
278 if ((file == TGSI_FILE_CONSTANT) && decl->Declaration.Dimension &&
279 decl->Dim.Index2D != 0) {
280 b->shader->info.num_ubos =
281 MAX2(b->shader->info.num_ubos, decl->Dim.Index2D);
282 return;
283 }
284
285 if ((file == TGSI_FILE_INPUT) || (file == TGSI_FILE_OUTPUT)) {
286 is_array = (is_array && decl->Declaration.Array &&
287 (decl->Array.ArrayID != 0));
288 }
289
290 for (i = 0; i < array_size; i++) {
291 unsigned idx = decl->Range.First + i;
292 nir_variable *var = rzalloc(b->shader, nir_variable);
293
294 var->data.driver_location = idx;
295
296 var->type = glsl_vec4_type();
297 if (is_array)
298 var->type = glsl_array_type(var->type, array_size, 0);
299
300 switch (file) {
301 case TGSI_FILE_INPUT:
302 var->data.read_only = true;
303 var->data.mode = nir_var_shader_in;
304 var->name = ralloc_asprintf(var, "in_%d", idx);
305
306 if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
307 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
308 var->type = glsl_bool_type();
309 if (c->cap_face_is_sysval) {
310 var->data.mode = nir_var_system_value;
311 var->data.location = SYSTEM_VALUE_FRONT_FACE;
312 } else {
313 var->data.location = VARYING_SLOT_FACE;
314 }
315 c->input_var_face = var;
316 } else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
317 if (c->cap_position_is_sysval) {
318 var->data.mode = nir_var_system_value;
319 var->data.location = SYSTEM_VALUE_FRAG_COORD;
320 } else {
321 var->data.location = VARYING_SLOT_POS;
322 }
323 c->input_var_position = var;
324 } else {
325 var->data.location =
326 tgsi_varying_semantic_to_slot(decl->Semantic.Name,
327 decl->Semantic.Index);
328 }
329 } else {
330 assert(!decl->Declaration.Semantic);
331 var->data.location = VERT_ATTRIB_GENERIC0 + idx;
332 }
333 var->data.index = 0;
334 var->data.interpolation =
335 ttn_translate_interp_mode(decl->Interp.Interpolate);
336
337 exec_list_push_tail(&b->shader->inputs, &var->node);
338 c->inputs[idx] = var;
339
340 for (int i = 0; i < array_size; i++)
341 b->shader->info.inputs_read |= 1 << (var->data.location + i);
342
343 break;
344 case TGSI_FILE_OUTPUT: {
345 int semantic_name = decl->Semantic.Name;
346 int semantic_index = decl->Semantic.Index;
347 /* Since we can't load from outputs in the IR, we make temporaries
348 * for the outputs and emit stores to the real outputs at the end of
349 * the shader.
350 */
351 nir_register *reg = nir_local_reg_create(b->impl);
352 reg->num_components = 4;
353 if (is_array)
354 reg->num_array_elems = array_size;
355
356 var->data.mode = nir_var_shader_out;
357 var->name = ralloc_asprintf(var, "out_%d", idx);
358 var->data.index = 0;
359 var->data.interpolation =
360 ttn_translate_interp_mode(decl->Interp.Interpolate);
361
362 if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
363 switch (semantic_name) {
364 case TGSI_SEMANTIC_COLOR: {
365 /* TODO tgsi loses some information, so we cannot
366 * actually differentiate here between DSB and MRT
367 * at this point. But so far no drivers using tgsi-
368 * to-nir support dual source blend:
369 */
370 bool dual_src_blend = false;
371 if (dual_src_blend && (semantic_index == 1)) {
372 var->data.location = FRAG_RESULT_DATA0;
373 var->data.index = 1;
374 } else {
375 if (c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
376 var->data.location = FRAG_RESULT_COLOR;
377 else
378 var->data.location = FRAG_RESULT_DATA0 + semantic_index;
379 }
380 break;
381 }
382 case TGSI_SEMANTIC_POSITION:
383 var->data.location = FRAG_RESULT_DEPTH;
384 var->type = glsl_float_type();
385 break;
386 default:
387 fprintf(stderr, "Bad TGSI semantic: %d/%d\n",
388 decl->Semantic.Name, decl->Semantic.Index);
389 abort();
390 }
391 } else {
392 var->data.location =
393 tgsi_varying_semantic_to_slot(semantic_name, semantic_index);
394 }
395
396 if (is_array) {
397 unsigned j;
398 for (j = 0; j < array_size; j++) {
399 c->output_regs[idx + j].offset = i + j;
400 c->output_regs[idx + j].reg = reg;
401 }
402 } else {
403 c->output_regs[idx].offset = i;
404 c->output_regs[idx].reg = reg;
405 }
406
407 exec_list_push_tail(&b->shader->outputs, &var->node);
408 c->outputs[idx] = var;
409
410 for (int i = 0; i < array_size; i++)
411 b->shader->info.outputs_written |= 1ull << (var->data.location + i);
412 }
413 break;
414 case TGSI_FILE_CONSTANT:
415 var->data.mode = nir_var_uniform;
416 var->name = ralloc_asprintf(var, "uniform_%d", idx);
417 var->data.location = idx;
418
419 exec_list_push_tail(&b->shader->uniforms, &var->node);
420 break;
421 default:
422 unreachable("bad declaration file");
423 return;
424 }
425
426 if (is_array)
427 break;
428 }
429
430 }
431 }
432
433 static void
434 ttn_emit_immediate(struct ttn_compile *c)
435 {
436 nir_builder *b = &c->build;
437 struct tgsi_full_immediate *tgsi_imm = &c->token->FullImmediate;
438 nir_load_const_instr *load_const;
439 int i;
440
441 load_const = nir_load_const_instr_create(b->shader, 4, 32);
442 c->imm_defs[c->next_imm] = &load_const->def;
443 c->next_imm++;
444
445 for (i = 0; i < load_const->def.num_components; i++)
446 load_const->value[i].u32 = tgsi_imm->u[i].Uint;
447
448 nir_builder_instr_insert(b, &load_const->instr);
449 }
450
451 static nir_ssa_def *
452 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect);
453
454 /* generate either a constant or indirect deref chain for accessing an
455 * array variable.
456 */
457 static nir_deref_instr *
458 ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset,
459 struct tgsi_ind_register *indirect)
460 {
461 nir_deref_instr *deref = nir_build_deref_var(&c->build, var);
462 nir_ssa_def *index = nir_imm_int(&c->build, offset);
463 if (indirect)
464 index = nir_iadd(&c->build, index, ttn_src_for_indirect(c, indirect));
465 return nir_build_deref_array(&c->build, deref, index);
466 }
467
468 /* Special case: Turn the frontface varying into a load of the
469 * frontface variable, and create the vector as required by TGSI.
470 */
471 static nir_ssa_def *
472 ttn_emulate_tgsi_front_face(struct ttn_compile *c)
473 {
474 nir_ssa_def *tgsi_frontface[4];
475
476 if (c->cap_face_is_sysval) {
477 /* When it's a system value, it should be an integer vector: (F, 0, 0, 1)
478 * F is 0xffffffff if front-facing, 0 if not.
479 */
480
481 nir_ssa_def *frontface = nir_load_front_face(&c->build, 1);
482
483 tgsi_frontface[0] = nir_bcsel(&c->build,
484 frontface,
485 nir_imm_int(&c->build, 0xffffffff),
486 nir_imm_int(&c->build, 0));
487 tgsi_frontface[1] = nir_imm_int(&c->build, 0);
488 tgsi_frontface[2] = nir_imm_int(&c->build, 0);
489 tgsi_frontface[3] = nir_imm_int(&c->build, 1);
490 } else {
491 /* When it's an input, it should be a float vector: (F, 0.0, 0.0, 1.0)
492 * F is positive if front-facing, negative if not.
493 */
494
495 assert(c->input_var_face);
496 nir_ssa_def *frontface = nir_load_var(&c->build, c->input_var_face);
497
498 tgsi_frontface[0] = nir_bcsel(&c->build,
499 frontface,
500 nir_imm_float(&c->build, 1.0),
501 nir_imm_float(&c->build, -1.0));
502 tgsi_frontface[1] = nir_imm_float(&c->build, 0.0);
503 tgsi_frontface[2] = nir_imm_float(&c->build, 0.0);
504 tgsi_frontface[3] = nir_imm_float(&c->build, 1.0);
505 }
506
507 return nir_vec(&c->build, tgsi_frontface, 4);
508 }
509
510 static nir_src
511 ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
512 struct tgsi_ind_register *indirect,
513 struct tgsi_dimension *dim,
514 struct tgsi_ind_register *dimind)
515 {
516 nir_builder *b = &c->build;
517 nir_src src;
518
519 memset(&src, 0, sizeof(src));
520
521 switch (file) {
522 case TGSI_FILE_TEMPORARY:
523 if (c->temp_regs[index].var) {
524 unsigned offset = c->temp_regs[index].offset;
525 nir_variable *var = c->temp_regs[index].var;
526 nir_ssa_def *load = nir_load_deref(&c->build,
527 ttn_array_deref(c, var, offset, indirect));
528
529 src = nir_src_for_ssa(load);
530 } else {
531 assert(!indirect);
532 src.reg.reg = c->temp_regs[index].reg;
533 }
534 assert(!dim);
535 break;
536
537 case TGSI_FILE_ADDRESS:
538 src.reg.reg = c->addr_reg;
539 assert(!dim);
540 break;
541
542 case TGSI_FILE_IMMEDIATE:
543 src = nir_src_for_ssa(c->imm_defs[index]);
544 assert(!indirect);
545 assert(!dim);
546 break;
547
548 case TGSI_FILE_SYSTEM_VALUE: {
549 nir_intrinsic_op op;
550 nir_ssa_def *load;
551
552 assert(!indirect);
553 assert(!dim);
554
555 switch (c->scan->system_value_semantic_name[index]) {
556 case TGSI_SEMANTIC_VERTEXID_NOBASE:
557 op = nir_intrinsic_load_vertex_id_zero_base;
558 load = nir_load_vertex_id_zero_base(b);
559 break;
560 case TGSI_SEMANTIC_VERTEXID:
561 op = nir_intrinsic_load_vertex_id;
562 load = nir_load_vertex_id(b);
563 break;
564 case TGSI_SEMANTIC_BASEVERTEX:
565 op = nir_intrinsic_load_base_vertex;
566 load = nir_load_base_vertex(b);
567 break;
568 case TGSI_SEMANTIC_INSTANCEID:
569 op = nir_intrinsic_load_instance_id;
570 load = nir_load_instance_id(b);
571 break;
572 case TGSI_SEMANTIC_FACE:
573 assert(c->cap_face_is_sysval);
574 op = nir_intrinsic_load_front_face;
575 load = ttn_emulate_tgsi_front_face(c);
576 break;
577 case TGSI_SEMANTIC_POSITION:
578 assert(c->cap_position_is_sysval);
579 op = nir_intrinsic_load_frag_coord;
580 load = nir_load_frag_coord(b);
581 break;
582 default:
583 unreachable("bad system value");
584 }
585
586 src = nir_src_for_ssa(load);
587 b->shader->info.system_values_read |=
588 (1 << nir_system_value_from_intrinsic(op));
589
590 break;
591 }
592
593 case TGSI_FILE_INPUT:
594 if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
595 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_FACE) {
596 assert(!c->cap_face_is_sysval && c->input_var_face);
597 return nir_src_for_ssa(ttn_emulate_tgsi_front_face(c));
598 } else if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
599 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_POSITION) {
600 assert(!c->cap_position_is_sysval && c->input_var_position);
601 return nir_src_for_ssa(nir_load_var(&c->build, c->input_var_position));
602 } else {
603 /* Indirection on input arrays isn't supported by TTN. */
604 assert(!dim);
605 nir_deref_instr *deref = nir_build_deref_var(&c->build,
606 c->inputs[index]);
607 return nir_src_for_ssa(nir_load_deref(&c->build, deref));
608 }
609 break;
610
611 case TGSI_FILE_CONSTANT: {
612 nir_intrinsic_instr *load;
613 nir_intrinsic_op op;
614 unsigned srcn = 0;
615
616 if (dim && (dim->Index > 0 || dim->Indirect)) {
617 op = nir_intrinsic_load_ubo;
618 } else {
619 op = nir_intrinsic_load_uniform;
620 }
621
622 load = nir_intrinsic_instr_create(b->shader, op);
623
624 load->num_components = 4;
625 if (dim && (dim->Index > 0 || dim->Indirect)) {
626 if (dimind) {
627 load->src[srcn] =
628 ttn_src_for_file_and_index(c, dimind->File, dimind->Index,
629 NULL, NULL, NULL);
630 } else {
631 /* UBOs start at index 1 in TGSI: */
632 load->src[srcn] =
633 nir_src_for_ssa(nir_imm_int(b, dim->Index - 1));
634 }
635 srcn++;
636 }
637
638 nir_ssa_def *offset;
639 if (op == nir_intrinsic_load_ubo) {
640 /* UBO loads don't have a base offset. */
641 offset = nir_imm_int(b, index);
642 if (indirect) {
643 offset = nir_iadd(b, offset, ttn_src_for_indirect(c, indirect));
644 }
645 /* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
646 offset = nir_ishl(b, offset, nir_imm_int(b, 4));
647 } else {
648 nir_intrinsic_set_base(load, index);
649 if (indirect) {
650 offset = ttn_src_for_indirect(c, indirect);
651 } else {
652 offset = nir_imm_int(b, 0);
653 }
654 }
655 load->src[srcn++] = nir_src_for_ssa(offset);
656
657 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
658 nir_builder_instr_insert(b, &load->instr);
659
660 src = nir_src_for_ssa(&load->dest.ssa);
661 break;
662 }
663
664 default:
665 unreachable("bad src file");
666 }
667
668
669 return src;
670 }
671
672 static nir_ssa_def *
673 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect)
674 {
675 nir_builder *b = &c->build;
676 nir_alu_src src;
677 memset(&src, 0, sizeof(src));
678 for (int i = 0; i < 4; i++)
679 src.swizzle[i] = indirect->Swizzle;
680 src.src = ttn_src_for_file_and_index(c,
681 indirect->File,
682 indirect->Index,
683 NULL, NULL, NULL);
684 return nir_imov_alu(b, src, 1);
685 }
686
687 static nir_alu_dest
688 ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
689 {
690 struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
691 nir_alu_dest dest;
692 unsigned index = tgsi_dst->Index;
693
694 memset(&dest, 0, sizeof(dest));
695
696 if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
697 if (c->temp_regs[index].var) {
698 nir_register *reg;
699
700 /* this works, because TGSI will give us a base offset
701 * (in case of indirect index) that points back into
702 * the array. Access can be direct or indirect, we
703 * don't really care. Just create a one-shot dst reg
704 * that will get store_var'd back into the array var
705 * at the end of ttn_emit_instruction()
706 */
707 reg = nir_local_reg_create(c->build.impl);
708 reg->num_components = 4;
709 dest.dest.reg.reg = reg;
710 dest.dest.reg.base_offset = 0;
711 } else {
712 assert(!tgsi_dst->Indirect);
713 dest.dest.reg.reg = c->temp_regs[index].reg;
714 dest.dest.reg.base_offset = c->temp_regs[index].offset;
715 }
716 } else if (tgsi_dst->File == TGSI_FILE_OUTPUT) {
717 dest.dest.reg.reg = c->output_regs[index].reg;
718 dest.dest.reg.base_offset = c->output_regs[index].offset;
719 } else if (tgsi_dst->File == TGSI_FILE_ADDRESS) {
720 assert(index == 0);
721 dest.dest.reg.reg = c->addr_reg;
722 }
723
724 dest.write_mask = tgsi_dst->WriteMask;
725 dest.saturate = false;
726
727 if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) {
728 nir_src *indirect = ralloc(c->build.shader, nir_src);
729 *indirect = nir_src_for_ssa(ttn_src_for_indirect(c, &tgsi_fdst->Indirect));
730 dest.dest.reg.indirect = indirect;
731 }
732
733 return dest;
734 }
735
736 static nir_variable *
737 ttn_get_var(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
738 {
739 struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
740 unsigned index = tgsi_dst->Index;
741
742 if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
743 /* we should not have an indirect when there is no var! */
744 if (!c->temp_regs[index].var)
745 assert(!tgsi_dst->Indirect);
746 return c->temp_regs[index].var;
747 }
748
749 return NULL;
750 }
751
752 static nir_ssa_def *
753 ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc,
754 int src_idx)
755 {
756 nir_builder *b = &c->build;
757 struct tgsi_src_register *tgsi_src = &tgsi_fsrc->Register;
758 enum tgsi_opcode opcode = c->token->FullInstruction.Instruction.Opcode;
759 unsigned tgsi_src_type = tgsi_opcode_infer_src_type(opcode, src_idx);
760 bool src_is_float = !(tgsi_src_type == TGSI_TYPE_SIGNED ||
761 tgsi_src_type == TGSI_TYPE_UNSIGNED);
762 nir_alu_src src;
763
764 memset(&src, 0, sizeof(src));
765
766 if (tgsi_src->File == TGSI_FILE_NULL) {
767 return nir_imm_float(b, 0.0);
768 } else if (tgsi_src->File == TGSI_FILE_SAMPLER) {
769 /* Only the index of the sampler gets used in texturing, and it will
770 * handle looking that up on its own instead of using the nir_alu_src.
771 */
772 assert(!tgsi_src->Indirect);
773 return NULL;
774 } else {
775 struct tgsi_ind_register *ind = NULL;
776 struct tgsi_dimension *dim = NULL;
777 struct tgsi_ind_register *dimind = NULL;
778 if (tgsi_src->Indirect)
779 ind = &tgsi_fsrc->Indirect;
780 if (tgsi_src->Dimension) {
781 dim = &tgsi_fsrc->Dimension;
782 if (dim->Indirect)
783 dimind = &tgsi_fsrc->DimIndirect;
784 }
785 src.src = ttn_src_for_file_and_index(c,
786 tgsi_src->File,
787 tgsi_src->Index,
788 ind, dim, dimind);
789 }
790
791 src.swizzle[0] = tgsi_src->SwizzleX;
792 src.swizzle[1] = tgsi_src->SwizzleY;
793 src.swizzle[2] = tgsi_src->SwizzleZ;
794 src.swizzle[3] = tgsi_src->SwizzleW;
795
796 nir_ssa_def *def = nir_fmov_alu(b, src, 4);
797
798 if (tgsi_src->Absolute) {
799 if (src_is_float)
800 def = nir_fabs(b, def);
801 else
802 def = nir_iabs(b, def);
803 }
804
805 if (tgsi_src->Negate) {
806 if (src_is_float)
807 def = nir_fneg(b, def);
808 else
809 def = nir_ineg(b, def);
810 }
811
812 return def;
813 }
814
815 static void
816 ttn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
817 {
818 unsigned num_srcs = nir_op_infos[op].num_inputs;
819 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
820 unsigned i;
821
822 for (i = 0; i < num_srcs; i++)
823 instr->src[i].src = nir_src_for_ssa(src[i]);
824
825 instr->dest = dest;
826 nir_builder_instr_insert(b, &instr->instr);
827 }
828
829 static void
830 ttn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
831 nir_ssa_def *def, unsigned write_mask)
832 {
833 if (!(dest.write_mask & write_mask))
834 return;
835
836 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_imov);
837 mov->dest = dest;
838 mov->dest.write_mask &= write_mask;
839 mov->src[0].src = nir_src_for_ssa(def);
840 for (unsigned i = def->num_components; i < 4; i++)
841 mov->src[0].swizzle[i] = def->num_components - 1;
842 nir_builder_instr_insert(b, &mov->instr);
843 }
844
845 static void
846 ttn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
847 {
848 ttn_move_dest_masked(b, dest, def, TGSI_WRITEMASK_XYZW);
849 }
850
851 static void
852 ttn_arl(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
853 {
854 ttn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
855 }
856
857 /* EXP - Approximate Exponential Base 2
858 * dst.x = 2^{\lfloor src.x\rfloor}
859 * dst.y = src.x - \lfloor src.x\rfloor
860 * dst.z = 2^{src.x}
861 * dst.w = 1.0
862 */
863 static void
864 ttn_exp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
865 {
866 nir_ssa_def *srcx = ttn_channel(b, src[0], X);
867
868 ttn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)),
869 TGSI_WRITEMASK_X);
870 ttn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)),
871 TGSI_WRITEMASK_Y);
872 ttn_move_dest_masked(b, dest, nir_fexp2(b, srcx), TGSI_WRITEMASK_Z);
873 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
874 }
875
876 /* LOG - Approximate Logarithm Base 2
877 * dst.x = \lfloor\log_2{|src.x|}\rfloor
878 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
879 * dst.z = \log_2{|src.x|}
880 * dst.w = 1.0
881 */
882 static void
883 ttn_log(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
884 {
885 nir_ssa_def *abs_srcx = nir_fabs(b, ttn_channel(b, src[0], X));
886 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
887
888 ttn_move_dest_masked(b, dest, nir_ffloor(b, log2), TGSI_WRITEMASK_X);
889 ttn_move_dest_masked(b, dest,
890 nir_fdiv(b, abs_srcx, nir_fexp2(b, nir_ffloor(b, log2))),
891 TGSI_WRITEMASK_Y);
892 ttn_move_dest_masked(b, dest, nir_flog2(b, abs_srcx), TGSI_WRITEMASK_Z);
893 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
894 }
895
896 /* DST - Distance Vector
897 * dst.x = 1.0
898 * dst.y = src0.y \times src1.y
899 * dst.z = src0.z
900 * dst.w = src1.w
901 */
902 static void
903 ttn_dst(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
904 {
905 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_X);
906 ttn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), TGSI_WRITEMASK_Y);
907 ttn_move_dest_masked(b, dest, nir_fmov(b, src[0]), TGSI_WRITEMASK_Z);
908 ttn_move_dest_masked(b, dest, nir_fmov(b, src[1]), TGSI_WRITEMASK_W);
909 }
910
911 /* LIT - Light Coefficients
912 * dst.x = 1.0
913 * dst.y = max(src.x, 0.0)
914 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
915 * dst.w = 1.0
916 */
917 static void
918 ttn_lit(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
919 {
920 ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_XW);
921
922 ttn_move_dest_masked(b, dest, nir_fmax(b, ttn_channel(b, src[0], X),
923 nir_imm_float(b, 0.0)), TGSI_WRITEMASK_Y);
924
925 if (dest.write_mask & TGSI_WRITEMASK_Z) {
926 nir_ssa_def *src0_y = ttn_channel(b, src[0], Y);
927 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ttn_channel(b, src[0], W),
928 nir_imm_float(b, 128.0)),
929 nir_imm_float(b, -128.0));
930 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
931 wclamp);
932
933 ttn_move_dest_masked(b, dest,
934 nir_bcsel(b,
935 nir_flt(b,
936 ttn_channel(b, src[0], X),
937 nir_imm_float(b, 0.0)),
938 nir_imm_float(b, 0.0),
939 pow),
940 TGSI_WRITEMASK_Z);
941 }
942 }
943
944 static void
945 ttn_sle(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
946 {
947 ttn_move_dest(b, dest, nir_sge(b, src[1], src[0]));
948 }
949
950 static void
951 ttn_sgt(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
952 {
953 ttn_move_dest(b, dest, nir_slt(b, src[1], src[0]));
954 }
955
956 static void
957 ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
958 {
959 ttn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
960 }
961
962 static void
963 ttn_dp3(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
964 {
965 ttn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
966 }
967
968 static void
969 ttn_dp4(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
970 {
971 ttn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
972 }
973
974 static void
975 ttn_umad(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
976 {
977 ttn_move_dest(b, dest, nir_iadd(b, nir_imul(b, src[0], src[1]), src[2]));
978 }
979
980 static void
981 ttn_arr(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
982 {
983 ttn_move_dest(b, dest, nir_f2i32(b, nir_fround_even(b, src[0])));
984 }
985
986 static void
987 ttn_cmp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
988 {
989 ttn_move_dest(b, dest, nir_bcsel(b,
990 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
991 src[1], src[2]));
992 }
993
994 static void
995 ttn_ucmp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
996 {
997 ttn_move_dest(b, dest, nir_bcsel(b,
998 nir_ine(b, src[0], nir_imm_int(b, 0)),
999 src[1], src[2]));
1000 }
1001
1002 static void
1003 ttn_kill(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1004 {
1005 nir_intrinsic_instr *discard =
1006 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard);
1007 nir_builder_instr_insert(b, &discard->instr);
1008 b->shader->info.fs.uses_discard = true;
1009 }
1010
1011 static void
1012 ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
1013 {
1014 nir_ssa_def *cmp = nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)));
1015 nir_intrinsic_instr *discard =
1016 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
1017 discard->src[0] = nir_src_for_ssa(cmp);
1018 nir_builder_instr_insert(b, &discard->instr);
1019 b->shader->info.fs.uses_discard = true;
1020 }
1021
1022 static void
1023 ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
1024 {
1025 nir_builder *b = &c->build;
1026 nir_ssa_def *src_x = ttn_channel(b, src, X);
1027
1028 nir_if *if_stmt = nir_if_create(b->shader);
1029 if (is_uint) {
1030 /* equivalent to TGSI UIF, src is interpreted as integer */
1031 if_stmt->condition = nir_src_for_ssa(nir_ine(b, src_x, nir_imm_int(b, 0)));
1032 } else {
1033 /* equivalent to TGSI IF, src is interpreted as float */
1034 if_stmt->condition = nir_src_for_ssa(nir_fne(b, src_x, nir_imm_float(b, 0.0)));
1035 }
1036 nir_builder_cf_insert(b, &if_stmt->cf_node);
1037
1038 c->if_stack[c->if_stack_pos] = nir_after_cf_node(&if_stmt->cf_node);
1039 c->if_stack_pos++;
1040
1041 b->cursor = nir_after_cf_list(&if_stmt->then_list);
1042
1043 c->if_stack[c->if_stack_pos] = nir_after_cf_list(&if_stmt->else_list);
1044 c->if_stack_pos++;
1045 }
1046
1047 static void
1048 ttn_else(struct ttn_compile *c)
1049 {
1050 nir_builder *b = &c->build;
1051
1052 b->cursor = c->if_stack[c->if_stack_pos - 1];
1053 }
1054
1055 static void
1056 ttn_endif(struct ttn_compile *c)
1057 {
1058 nir_builder *b = &c->build;
1059
1060 c->if_stack_pos -= 2;
1061 b->cursor = c->if_stack[c->if_stack_pos];
1062 }
1063
1064 static void
1065 ttn_bgnloop(struct ttn_compile *c)
1066 {
1067 nir_builder *b = &c->build;
1068
1069 nir_loop *loop = nir_loop_create(b->shader);
1070 nir_builder_cf_insert(b, &loop->cf_node);
1071
1072 c->loop_stack[c->loop_stack_pos] = nir_after_cf_node(&loop->cf_node);
1073 c->loop_stack_pos++;
1074
1075 b->cursor = nir_after_cf_list(&loop->body);
1076 }
1077
1078 static void
1079 ttn_cont(nir_builder *b)
1080 {
1081 nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_continue);
1082 nir_builder_instr_insert(b, &instr->instr);
1083 }
1084
1085 static void
1086 ttn_brk(nir_builder *b)
1087 {
1088 nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_break);
1089 nir_builder_instr_insert(b, &instr->instr);
1090 }
1091
1092 static void
1093 ttn_endloop(struct ttn_compile *c)
1094 {
1095 nir_builder *b = &c->build;
1096
1097 c->loop_stack_pos--;
1098 b->cursor = c->loop_stack[c->loop_stack_pos];
1099 }
1100
1101 static void
1102 setup_texture_info(nir_tex_instr *instr, unsigned texture)
1103 {
1104 switch (texture) {
1105 case TGSI_TEXTURE_BUFFER:
1106 instr->sampler_dim = GLSL_SAMPLER_DIM_BUF;
1107 break;
1108 case TGSI_TEXTURE_1D:
1109 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
1110 break;
1111 case TGSI_TEXTURE_1D_ARRAY:
1112 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
1113 instr->is_array = true;
1114 break;
1115 case TGSI_TEXTURE_SHADOW1D:
1116 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
1117 instr->is_shadow = true;
1118 break;
1119 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1120 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
1121 instr->is_shadow = true;
1122 instr->is_array = true;
1123 break;
1124 case TGSI_TEXTURE_2D:
1125 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
1126 break;
1127 case TGSI_TEXTURE_2D_ARRAY:
1128 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
1129 instr->is_array = true;
1130 break;
1131 case TGSI_TEXTURE_2D_MSAA:
1132 instr->sampler_dim = GLSL_SAMPLER_DIM_MS;
1133 break;
1134 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1135 instr->sampler_dim = GLSL_SAMPLER_DIM_MS;
1136 instr->is_array = true;
1137 break;
1138 case TGSI_TEXTURE_SHADOW2D:
1139 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
1140 instr->is_shadow = true;
1141 break;
1142 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1143 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
1144 instr->is_shadow = true;
1145 instr->is_array = true;
1146 break;
1147 case TGSI_TEXTURE_3D:
1148 instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
1149 break;
1150 case TGSI_TEXTURE_CUBE:
1151 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
1152 break;
1153 case TGSI_TEXTURE_CUBE_ARRAY:
1154 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
1155 instr->is_array = true;
1156 break;
1157 case TGSI_TEXTURE_SHADOWCUBE:
1158 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
1159 instr->is_shadow = true;
1160 break;
1161 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1162 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
1163 instr->is_shadow = true;
1164 instr->is_array = true;
1165 break;
1166 case TGSI_TEXTURE_RECT:
1167 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
1168 break;
1169 case TGSI_TEXTURE_SHADOWRECT:
1170 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
1171 instr->is_shadow = true;
1172 break;
1173 default:
1174 fprintf(stderr, "Unknown TGSI texture target %d\n", texture);
1175 abort();
1176 }
1177 }
1178
1179 static enum glsl_base_type
1180 base_type_for_alu_type(nir_alu_type type)
1181 {
1182 type = nir_alu_type_get_base_type(type);
1183
1184 switch (type) {
1185 case nir_type_float:
1186 return GLSL_TYPE_FLOAT;
1187 case nir_type_int:
1188 return GLSL_TYPE_INT;
1189 case nir_type_uint:
1190 return GLSL_TYPE_UINT;
1191 default:
1192 unreachable("invalid type");
1193 }
1194 }
1195
1196 static nir_variable *
1197 get_sampler_var(struct ttn_compile *c, int binding,
1198 enum glsl_sampler_dim dim,
1199 bool is_shadow,
1200 bool is_array,
1201 enum glsl_base_type base_type)
1202 {
1203 nir_variable *var = c->samplers[binding];
1204 if (!var) {
1205 const struct glsl_type *type =
1206 glsl_sampler_type(dim, is_shadow, is_array, base_type);
1207 var = nir_variable_create(c->build.shader, nir_var_uniform, type,
1208 "sampler");
1209 var->data.binding = binding;
1210 var->data.explicit_binding = true;
1211 c->samplers[binding] = var;
1212 }
1213
1214 return var;
1215 }
1216
1217 static void
1218 ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
1219 {
1220 nir_builder *b = &c->build;
1221 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1222 nir_tex_instr *instr;
1223 nir_texop op;
1224 unsigned num_srcs, samp = 1, sview, i;
1225
1226 switch (tgsi_inst->Instruction.Opcode) {
1227 case TGSI_OPCODE_TEX:
1228 op = nir_texop_tex;
1229 num_srcs = 1;
1230 break;
1231 case TGSI_OPCODE_TEX2:
1232 op = nir_texop_tex;
1233 num_srcs = 1;
1234 samp = 2;
1235 break;
1236 case TGSI_OPCODE_TXP:
1237 op = nir_texop_tex;
1238 num_srcs = 2;
1239 break;
1240 case TGSI_OPCODE_TXB:
1241 op = nir_texop_txb;
1242 num_srcs = 2;
1243 break;
1244 case TGSI_OPCODE_TXB2:
1245 op = nir_texop_txb;
1246 num_srcs = 2;
1247 samp = 2;
1248 break;
1249 case TGSI_OPCODE_TXL:
1250 op = nir_texop_txl;
1251 num_srcs = 2;
1252 break;
1253 case TGSI_OPCODE_TXL2:
1254 op = nir_texop_txl;
1255 num_srcs = 2;
1256 samp = 2;
1257 break;
1258 case TGSI_OPCODE_TXF:
1259 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
1260 tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) {
1261 op = nir_texop_txf_ms;
1262 } else {
1263 op = nir_texop_txf;
1264 }
1265 num_srcs = 2;
1266 break;
1267 case TGSI_OPCODE_TXD:
1268 op = nir_texop_txd;
1269 num_srcs = 3;
1270 samp = 3;
1271 break;
1272 case TGSI_OPCODE_LODQ:
1273 op = nir_texop_lod;
1274 num_srcs = 1;
1275 break;
1276
1277 default:
1278 fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode);
1279 abort();
1280 }
1281
1282 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
1283 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
1284 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
1285 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
1286 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
1287 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
1288 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1289 num_srcs++;
1290 }
1291
1292 /* Deref sources */
1293 num_srcs += 2;
1294
1295 num_srcs += tgsi_inst->Texture.NumOffsets;
1296
1297 instr = nir_tex_instr_create(b->shader, num_srcs);
1298 instr->op = op;
1299
1300 setup_texture_info(instr, tgsi_inst->Texture.Texture);
1301
1302 switch (instr->sampler_dim) {
1303 case GLSL_SAMPLER_DIM_1D:
1304 case GLSL_SAMPLER_DIM_BUF:
1305 instr->coord_components = 1;
1306 break;
1307 case GLSL_SAMPLER_DIM_2D:
1308 case GLSL_SAMPLER_DIM_RECT:
1309 case GLSL_SAMPLER_DIM_EXTERNAL:
1310 case GLSL_SAMPLER_DIM_MS:
1311 instr->coord_components = 2;
1312 break;
1313 case GLSL_SAMPLER_DIM_3D:
1314 case GLSL_SAMPLER_DIM_CUBE:
1315 instr->coord_components = 3;
1316 break;
1317 case GLSL_SAMPLER_DIM_SUBPASS:
1318 case GLSL_SAMPLER_DIM_SUBPASS_MS:
1319 unreachable("invalid sampler_dim");
1320 }
1321
1322 if (instr->is_array)
1323 instr->coord_components++;
1324
1325 assert(tgsi_inst->Src[samp].Register.File == TGSI_FILE_SAMPLER);
1326
1327 /* TODO if we supported any opc's which take an explicit SVIEW
1328 * src, we would use that here instead. But for the "legacy"
1329 * texture opc's the SVIEW index is same as SAMP index:
1330 */
1331 sview = tgsi_inst->Src[samp].Register.Index;
1332
1333 if (op == nir_texop_lod) {
1334 instr->dest_type = nir_type_float;
1335 } else if (sview < c->num_samp_types) {
1336 instr->dest_type = c->samp_types[sview];
1337 } else {
1338 instr->dest_type = nir_type_float;
1339 }
1340
1341 nir_variable *var =
1342 get_sampler_var(c, sview, instr->sampler_dim,
1343 instr->is_shadow,
1344 instr->is_array,
1345 base_type_for_alu_type(instr->dest_type));
1346
1347 nir_deref_instr *deref = nir_build_deref_var(b, var);
1348
1349 unsigned src_number = 0;
1350
1351 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
1352 instr->src[src_number].src_type = nir_tex_src_texture_deref;
1353 src_number++;
1354 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
1355 instr->src[src_number].src_type = nir_tex_src_sampler_deref;
1356 src_number++;
1357
1358 instr->src[src_number].src =
1359 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
1360 instr->coord_components, false));
1361 instr->src[src_number].src_type = nir_tex_src_coord;
1362 src_number++;
1363
1364 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1365 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1366 instr->src[src_number].src_type = nir_tex_src_projector;
1367 src_number++;
1368 }
1369
1370 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB) {
1371 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1372 instr->src[src_number].src_type = nir_tex_src_bias;
1373 src_number++;
1374 }
1375
1376 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
1377 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1378 instr->src[src_number].src_type = nir_tex_src_bias;
1379 src_number++;
1380 }
1381
1382 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
1383 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1384 instr->src[src_number].src_type = nir_tex_src_lod;
1385 src_number++;
1386 }
1387
1388 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
1389 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1390 instr->src[src_number].src_type = nir_tex_src_lod;
1391 src_number++;
1392 }
1393
1394 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
1395 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1396 if (op == nir_texop_txf_ms)
1397 instr->src[src_number].src_type = nir_tex_src_ms_index;
1398 else
1399 instr->src[src_number].src_type = nir_tex_src_lod;
1400 src_number++;
1401 }
1402
1403 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1404 instr->src[src_number].src_type = nir_tex_src_ddx;
1405 instr->src[src_number].src =
1406 nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W),
1407 nir_tex_instr_src_size(instr, src_number),
1408 false));
1409 src_number++;
1410 instr->src[src_number].src_type = nir_tex_src_ddy;
1411 instr->src[src_number].src =
1412 nir_src_for_ssa(nir_swizzle(b, src[2], SWIZ(X, Y, Z, W),
1413 nir_tex_instr_src_size(instr, src_number),
1414 false));
1415 src_number++;
1416 }
1417
1418 if (instr->is_shadow) {
1419 if (instr->coord_components == 4)
1420 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1421 else if (instr->coord_components == 3)
1422 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1423 else
1424 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
1425
1426 instr->src[src_number].src_type = nir_tex_src_comparator;
1427 src_number++;
1428 }
1429
1430 for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++) {
1431 struct tgsi_texture_offset *tex_offset = &tgsi_inst->TexOffsets[i];
1432 /* since TexOffset ins't using tgsi_full_src_register we get to
1433 * do some extra gymnastics:
1434 */
1435 nir_alu_src src;
1436
1437 memset(&src, 0, sizeof(src));
1438
1439 src.src = ttn_src_for_file_and_index(c,
1440 tex_offset->File,
1441 tex_offset->Index,
1442 NULL, NULL, NULL);
1443
1444 src.swizzle[0] = tex_offset->SwizzleX;
1445 src.swizzle[1] = tex_offset->SwizzleY;
1446 src.swizzle[2] = tex_offset->SwizzleZ;
1447 src.swizzle[3] = TGSI_SWIZZLE_W;
1448
1449 instr->src[src_number].src_type = nir_tex_src_offset;
1450 instr->src[src_number].src = nir_src_for_ssa(
1451 nir_fmov_alu(b, src, nir_tex_instr_src_size(instr, src_number)));
1452 src_number++;
1453 }
1454
1455 assert(src_number == num_srcs);
1456 assert(src_number == instr->num_srcs);
1457
1458 nir_ssa_dest_init(&instr->instr, &instr->dest,
1459 nir_tex_instr_dest_size(instr),
1460 32, NULL);
1461 nir_builder_instr_insert(b, &instr->instr);
1462
1463 /* Resolve the writemask on the texture op. */
1464 ttn_move_dest(b, dest, &instr->dest.ssa);
1465 }
1466
1467 /* TGSI_OPCODE_TXQ is actually two distinct operations:
1468 *
1469 * dst.x = texture\_width(unit, lod)
1470 * dst.y = texture\_height(unit, lod)
1471 * dst.z = texture\_depth(unit, lod)
1472 * dst.w = texture\_levels(unit)
1473 *
1474 * dst.xyz map to NIR txs opcode, and dst.w maps to query_levels
1475 */
1476 static void
1477 ttn_txq(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
1478 {
1479 nir_builder *b = &c->build;
1480 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1481 nir_tex_instr *txs, *qlv;
1482
1483 txs = nir_tex_instr_create(b->shader, 2);
1484 txs->op = nir_texop_txs;
1485 setup_texture_info(txs, tgsi_inst->Texture.Texture);
1486
1487 qlv = nir_tex_instr_create(b->shader, 1);
1488 qlv->op = nir_texop_query_levels;
1489 setup_texture_info(qlv, tgsi_inst->Texture.Texture);
1490
1491 assert(tgsi_inst->Src[1].Register.File == TGSI_FILE_SAMPLER);
1492 int tex_index = tgsi_inst->Src[1].Register.Index;
1493
1494 nir_variable *var =
1495 get_sampler_var(c, tex_index, txs->sampler_dim,
1496 txs->is_shadow,
1497 txs->is_array,
1498 base_type_for_alu_type(txs->dest_type));
1499
1500 nir_deref_instr *deref = nir_build_deref_var(b, var);
1501
1502 txs->src[0].src = nir_src_for_ssa(&deref->dest.ssa);
1503 txs->src[0].src_type = nir_tex_src_texture_deref;
1504
1505 qlv->src[0].src = nir_src_for_ssa(&deref->dest.ssa);
1506 qlv->src[0].src_type = nir_tex_src_texture_deref;
1507
1508 /* lod: */
1509 txs->src[1].src = nir_src_for_ssa(ttn_channel(b, src[0], X));
1510 txs->src[1].src_type = nir_tex_src_lod;
1511
1512 nir_ssa_dest_init(&txs->instr, &txs->dest,
1513 nir_tex_instr_dest_size(txs), 32, NULL);
1514 nir_builder_instr_insert(b, &txs->instr);
1515
1516 nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, 32, NULL);
1517 nir_builder_instr_insert(b, &qlv->instr);
1518
1519 ttn_move_dest_masked(b, dest, &txs->dest.ssa, TGSI_WRITEMASK_XYZ);
1520 ttn_move_dest_masked(b, dest, &qlv->dest.ssa, TGSI_WRITEMASK_W);
1521 }
1522
1523 static const nir_op op_trans[TGSI_OPCODE_LAST] = {
1524 [TGSI_OPCODE_ARL] = 0,
1525 [TGSI_OPCODE_MOV] = nir_op_fmov,
1526 [TGSI_OPCODE_LIT] = 0,
1527 [TGSI_OPCODE_RCP] = nir_op_frcp,
1528 [TGSI_OPCODE_RSQ] = nir_op_frsq,
1529 [TGSI_OPCODE_EXP] = 0,
1530 [TGSI_OPCODE_LOG] = 0,
1531 [TGSI_OPCODE_MUL] = nir_op_fmul,
1532 [TGSI_OPCODE_ADD] = nir_op_fadd,
1533 [TGSI_OPCODE_DP3] = 0,
1534 [TGSI_OPCODE_DP4] = 0,
1535 [TGSI_OPCODE_DST] = 0,
1536 [TGSI_OPCODE_MIN] = nir_op_fmin,
1537 [TGSI_OPCODE_MAX] = nir_op_fmax,
1538 [TGSI_OPCODE_SLT] = nir_op_slt,
1539 [TGSI_OPCODE_SGE] = nir_op_sge,
1540 [TGSI_OPCODE_MAD] = nir_op_ffma,
1541 [TGSI_OPCODE_LRP] = 0,
1542 [TGSI_OPCODE_SQRT] = nir_op_fsqrt,
1543 [TGSI_OPCODE_FRC] = nir_op_ffract,
1544 [TGSI_OPCODE_FLR] = nir_op_ffloor,
1545 [TGSI_OPCODE_ROUND] = nir_op_fround_even,
1546 [TGSI_OPCODE_EX2] = nir_op_fexp2,
1547 [TGSI_OPCODE_LG2] = nir_op_flog2,
1548 [TGSI_OPCODE_POW] = nir_op_fpow,
1549 [TGSI_OPCODE_COS] = nir_op_fcos,
1550 [TGSI_OPCODE_DDX] = nir_op_fddx,
1551 [TGSI_OPCODE_DDY] = nir_op_fddy,
1552 [TGSI_OPCODE_KILL] = 0,
1553 [TGSI_OPCODE_PK2H] = 0, /* XXX */
1554 [TGSI_OPCODE_PK2US] = 0, /* XXX */
1555 [TGSI_OPCODE_PK4B] = 0, /* XXX */
1556 [TGSI_OPCODE_PK4UB] = 0, /* XXX */
1557 [TGSI_OPCODE_SEQ] = nir_op_seq,
1558 [TGSI_OPCODE_SGT] = 0,
1559 [TGSI_OPCODE_SIN] = nir_op_fsin,
1560 [TGSI_OPCODE_SNE] = nir_op_sne,
1561 [TGSI_OPCODE_SLE] = 0,
1562 [TGSI_OPCODE_TEX] = 0,
1563 [TGSI_OPCODE_TXD] = 0,
1564 [TGSI_OPCODE_TXP] = 0,
1565 [TGSI_OPCODE_UP2H] = 0, /* XXX */
1566 [TGSI_OPCODE_UP2US] = 0, /* XXX */
1567 [TGSI_OPCODE_UP4B] = 0, /* XXX */
1568 [TGSI_OPCODE_UP4UB] = 0, /* XXX */
1569 [TGSI_OPCODE_ARR] = 0,
1570
1571 /* No function calls, yet. */
1572 [TGSI_OPCODE_CAL] = 0, /* XXX */
1573 [TGSI_OPCODE_RET] = 0, /* XXX */
1574
1575 [TGSI_OPCODE_SSG] = nir_op_fsign,
1576 [TGSI_OPCODE_CMP] = 0,
1577 [TGSI_OPCODE_TXB] = 0,
1578 [TGSI_OPCODE_DIV] = nir_op_fdiv,
1579 [TGSI_OPCODE_DP2] = 0,
1580 [TGSI_OPCODE_TXL] = 0,
1581
1582 [TGSI_OPCODE_BRK] = 0,
1583 [TGSI_OPCODE_IF] = 0,
1584 [TGSI_OPCODE_UIF] = 0,
1585 [TGSI_OPCODE_ELSE] = 0,
1586 [TGSI_OPCODE_ENDIF] = 0,
1587
1588 [TGSI_OPCODE_DDX_FINE] = nir_op_fddx_fine,
1589 [TGSI_OPCODE_DDY_FINE] = nir_op_fddy_fine,
1590
1591 [TGSI_OPCODE_CEIL] = nir_op_fceil,
1592 [TGSI_OPCODE_I2F] = nir_op_i2f32,
1593 [TGSI_OPCODE_NOT] = nir_op_inot,
1594 [TGSI_OPCODE_TRUNC] = nir_op_ftrunc,
1595 [TGSI_OPCODE_SHL] = nir_op_ishl,
1596 [TGSI_OPCODE_AND] = nir_op_iand,
1597 [TGSI_OPCODE_OR] = nir_op_ior,
1598 [TGSI_OPCODE_MOD] = nir_op_umod,
1599 [TGSI_OPCODE_XOR] = nir_op_ixor,
1600 [TGSI_OPCODE_TXF] = 0,
1601 [TGSI_OPCODE_TXQ] = 0,
1602
1603 [TGSI_OPCODE_CONT] = 0,
1604
1605 [TGSI_OPCODE_EMIT] = 0, /* XXX */
1606 [TGSI_OPCODE_ENDPRIM] = 0, /* XXX */
1607
1608 [TGSI_OPCODE_BGNLOOP] = 0,
1609 [TGSI_OPCODE_BGNSUB] = 0, /* XXX: no function calls */
1610 [TGSI_OPCODE_ENDLOOP] = 0,
1611 [TGSI_OPCODE_ENDSUB] = 0, /* XXX: no function calls */
1612
1613 [TGSI_OPCODE_NOP] = 0,
1614 [TGSI_OPCODE_FSEQ] = nir_op_feq32,
1615 [TGSI_OPCODE_FSGE] = nir_op_fge32,
1616 [TGSI_OPCODE_FSLT] = nir_op_flt32,
1617 [TGSI_OPCODE_FSNE] = nir_op_fne32,
1618
1619 [TGSI_OPCODE_KILL_IF] = 0,
1620
1621 [TGSI_OPCODE_END] = 0,
1622
1623 [TGSI_OPCODE_F2I] = nir_op_f2i32,
1624 [TGSI_OPCODE_IDIV] = nir_op_idiv,
1625 [TGSI_OPCODE_IMAX] = nir_op_imax,
1626 [TGSI_OPCODE_IMIN] = nir_op_imin,
1627 [TGSI_OPCODE_INEG] = nir_op_ineg,
1628 [TGSI_OPCODE_ISGE] = nir_op_ige32,
1629 [TGSI_OPCODE_ISHR] = nir_op_ishr,
1630 [TGSI_OPCODE_ISLT] = nir_op_ilt32,
1631 [TGSI_OPCODE_F2U] = nir_op_f2u32,
1632 [TGSI_OPCODE_U2F] = nir_op_u2f32,
1633 [TGSI_OPCODE_UADD] = nir_op_iadd,
1634 [TGSI_OPCODE_UDIV] = nir_op_udiv,
1635 [TGSI_OPCODE_UMAD] = 0,
1636 [TGSI_OPCODE_UMAX] = nir_op_umax,
1637 [TGSI_OPCODE_UMIN] = nir_op_umin,
1638 [TGSI_OPCODE_UMOD] = nir_op_umod,
1639 [TGSI_OPCODE_UMUL] = nir_op_imul,
1640 [TGSI_OPCODE_USEQ] = nir_op_ieq32,
1641 [TGSI_OPCODE_USGE] = nir_op_uge32,
1642 [TGSI_OPCODE_USHR] = nir_op_ushr,
1643 [TGSI_OPCODE_USLT] = nir_op_ult32,
1644 [TGSI_OPCODE_USNE] = nir_op_ine32,
1645
1646 [TGSI_OPCODE_SWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
1647 [TGSI_OPCODE_CASE] = 0, /* not emitted by glsl_to_tgsi.cpp */
1648 [TGSI_OPCODE_DEFAULT] = 0, /* not emitted by glsl_to_tgsi.cpp */
1649 [TGSI_OPCODE_ENDSWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
1650
1651 /* XXX: SAMPLE opcodes */
1652
1653 [TGSI_OPCODE_UARL] = nir_op_imov,
1654 [TGSI_OPCODE_UCMP] = 0,
1655 [TGSI_OPCODE_IABS] = nir_op_iabs,
1656 [TGSI_OPCODE_ISSG] = nir_op_isign,
1657
1658 /* XXX: atomics */
1659
1660 [TGSI_OPCODE_TEX2] = 0,
1661 [TGSI_OPCODE_TXB2] = 0,
1662 [TGSI_OPCODE_TXL2] = 0,
1663
1664 [TGSI_OPCODE_IMUL_HI] = nir_op_imul_high,
1665 [TGSI_OPCODE_UMUL_HI] = nir_op_umul_high,
1666
1667 [TGSI_OPCODE_TG4] = 0,
1668 [TGSI_OPCODE_LODQ] = 0,
1669
1670 [TGSI_OPCODE_IBFE] = nir_op_ibitfield_extract,
1671 [TGSI_OPCODE_UBFE] = nir_op_ubitfield_extract,
1672 [TGSI_OPCODE_BFI] = nir_op_bitfield_insert,
1673 [TGSI_OPCODE_BREV] = nir_op_bitfield_reverse,
1674 [TGSI_OPCODE_POPC] = nir_op_bit_count,
1675 [TGSI_OPCODE_LSB] = nir_op_find_lsb,
1676 [TGSI_OPCODE_IMSB] = nir_op_ifind_msb,
1677 [TGSI_OPCODE_UMSB] = nir_op_ufind_msb,
1678
1679 [TGSI_OPCODE_INTERP_CENTROID] = 0, /* XXX */
1680 [TGSI_OPCODE_INTERP_SAMPLE] = 0, /* XXX */
1681 [TGSI_OPCODE_INTERP_OFFSET] = 0, /* XXX */
1682 };
1683
1684 static void
1685 ttn_emit_instruction(struct ttn_compile *c)
1686 {
1687 nir_builder *b = &c->build;
1688 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1689 unsigned i;
1690 unsigned tgsi_op = tgsi_inst->Instruction.Opcode;
1691 struct tgsi_full_dst_register *tgsi_dst = &tgsi_inst->Dst[0];
1692
1693 if (tgsi_op == TGSI_OPCODE_END)
1694 return;
1695
1696 nir_ssa_def *src[TGSI_FULL_MAX_SRC_REGISTERS];
1697 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
1698 src[i] = ttn_get_src(c, &tgsi_inst->Src[i], i);
1699 }
1700 nir_alu_dest dest = ttn_get_dest(c, tgsi_dst);
1701
1702 switch (tgsi_op) {
1703 case TGSI_OPCODE_RSQ:
1704 ttn_move_dest(b, dest, nir_frsq(b, ttn_channel(b, src[0], X)));
1705 break;
1706
1707 case TGSI_OPCODE_SQRT:
1708 ttn_move_dest(b, dest, nir_fsqrt(b, ttn_channel(b, src[0], X)));
1709 break;
1710
1711 case TGSI_OPCODE_RCP:
1712 ttn_move_dest(b, dest, nir_frcp(b, ttn_channel(b, src[0], X)));
1713 break;
1714
1715 case TGSI_OPCODE_EX2:
1716 ttn_move_dest(b, dest, nir_fexp2(b, ttn_channel(b, src[0], X)));
1717 break;
1718
1719 case TGSI_OPCODE_LG2:
1720 ttn_move_dest(b, dest, nir_flog2(b, ttn_channel(b, src[0], X)));
1721 break;
1722
1723 case TGSI_OPCODE_POW:
1724 ttn_move_dest(b, dest, nir_fpow(b,
1725 ttn_channel(b, src[0], X),
1726 ttn_channel(b, src[1], X)));
1727 break;
1728
1729 case TGSI_OPCODE_COS:
1730 ttn_move_dest(b, dest, nir_fcos(b, ttn_channel(b, src[0], X)));
1731 break;
1732
1733 case TGSI_OPCODE_SIN:
1734 ttn_move_dest(b, dest, nir_fsin(b, ttn_channel(b, src[0], X)));
1735 break;
1736
1737 case TGSI_OPCODE_ARL:
1738 ttn_arl(b, op_trans[tgsi_op], dest, src);
1739 break;
1740
1741 case TGSI_OPCODE_EXP:
1742 ttn_exp(b, op_trans[tgsi_op], dest, src);
1743 break;
1744
1745 case TGSI_OPCODE_LOG:
1746 ttn_log(b, op_trans[tgsi_op], dest, src);
1747 break;
1748
1749 case TGSI_OPCODE_DST:
1750 ttn_dst(b, op_trans[tgsi_op], dest, src);
1751 break;
1752
1753 case TGSI_OPCODE_LIT:
1754 ttn_lit(b, op_trans[tgsi_op], dest, src);
1755 break;
1756
1757 case TGSI_OPCODE_DP2:
1758 ttn_dp2(b, op_trans[tgsi_op], dest, src);
1759 break;
1760
1761 case TGSI_OPCODE_DP3:
1762 ttn_dp3(b, op_trans[tgsi_op], dest, src);
1763 break;
1764
1765 case TGSI_OPCODE_DP4:
1766 ttn_dp4(b, op_trans[tgsi_op], dest, src);
1767 break;
1768
1769 case TGSI_OPCODE_UMAD:
1770 ttn_umad(b, op_trans[tgsi_op], dest, src);
1771 break;
1772
1773 case TGSI_OPCODE_LRP:
1774 ttn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
1775 break;
1776
1777 case TGSI_OPCODE_KILL:
1778 ttn_kill(b, op_trans[tgsi_op], dest, src);
1779 break;
1780
1781 case TGSI_OPCODE_ARR:
1782 ttn_arr(b, op_trans[tgsi_op], dest, src);
1783 break;
1784
1785 case TGSI_OPCODE_CMP:
1786 ttn_cmp(b, op_trans[tgsi_op], dest, src);
1787 break;
1788
1789 case TGSI_OPCODE_UCMP:
1790 ttn_ucmp(b, op_trans[tgsi_op], dest, src);
1791 break;
1792
1793 case TGSI_OPCODE_SGT:
1794 ttn_sgt(b, op_trans[tgsi_op], dest, src);
1795 break;
1796
1797 case TGSI_OPCODE_SLE:
1798 ttn_sle(b, op_trans[tgsi_op], dest, src);
1799 break;
1800
1801 case TGSI_OPCODE_KILL_IF:
1802 ttn_kill_if(b, op_trans[tgsi_op], dest, src);
1803 break;
1804
1805 case TGSI_OPCODE_TEX:
1806 case TGSI_OPCODE_TXP:
1807 case TGSI_OPCODE_TXL:
1808 case TGSI_OPCODE_TXB:
1809 case TGSI_OPCODE_TXD:
1810 case TGSI_OPCODE_TEX2:
1811 case TGSI_OPCODE_TXL2:
1812 case TGSI_OPCODE_TXB2:
1813 case TGSI_OPCODE_TXF:
1814 case TGSI_OPCODE_TG4:
1815 case TGSI_OPCODE_LODQ:
1816 ttn_tex(c, dest, src);
1817 break;
1818
1819 case TGSI_OPCODE_TXQ:
1820 ttn_txq(c, dest, src);
1821 break;
1822
1823 case TGSI_OPCODE_NOP:
1824 break;
1825
1826 case TGSI_OPCODE_IF:
1827 ttn_if(c, src[0], false);
1828 break;
1829
1830 case TGSI_OPCODE_UIF:
1831 ttn_if(c, src[0], true);
1832 break;
1833
1834 case TGSI_OPCODE_ELSE:
1835 ttn_else(c);
1836 break;
1837
1838 case TGSI_OPCODE_ENDIF:
1839 ttn_endif(c);
1840 break;
1841
1842 case TGSI_OPCODE_BGNLOOP:
1843 ttn_bgnloop(c);
1844 break;
1845
1846 case TGSI_OPCODE_BRK:
1847 ttn_brk(b);
1848 break;
1849
1850 case TGSI_OPCODE_CONT:
1851 ttn_cont(b);
1852 break;
1853
1854 case TGSI_OPCODE_ENDLOOP:
1855 ttn_endloop(c);
1856 break;
1857
1858 default:
1859 if (op_trans[tgsi_op] != 0 || tgsi_op == TGSI_OPCODE_MOV) {
1860 ttn_alu(b, op_trans[tgsi_op], dest, src);
1861 } else {
1862 fprintf(stderr, "unknown TGSI opcode: %s\n",
1863 tgsi_get_opcode_name(tgsi_op));
1864 abort();
1865 }
1866 break;
1867 }
1868
1869 if (tgsi_inst->Instruction.Saturate) {
1870 assert(!dest.dest.is_ssa);
1871 ttn_move_dest(b, dest, nir_fsat(b, ttn_src_for_dest(b, &dest)));
1872 }
1873
1874 /* if the dst has a matching var, append store_var to move
1875 * output from reg to var
1876 */
1877 nir_variable *var = ttn_get_var(c, tgsi_dst);
1878 if (var) {
1879 unsigned index = tgsi_dst->Register.Index;
1880 unsigned offset = c->temp_regs[index].offset;
1881 struct tgsi_ind_register *indirect = tgsi_dst->Register.Indirect ?
1882 &tgsi_dst->Indirect : NULL;
1883 nir_src val = nir_src_for_reg(dest.dest.reg.reg);
1884 nir_store_deref(b, ttn_array_deref(c, var, offset, indirect),
1885 nir_ssa_for_src(b, val, 4), dest.write_mask);
1886 }
1887 }
1888
1889 /**
1890 * Puts a NIR intrinsic to store of each TGSI_FILE_OUTPUT value to the output
1891 * variables at the end of the shader.
1892 *
1893 * We don't generate these incrementally as the TGSI_FILE_OUTPUT values are
1894 * written, because there's no output load intrinsic, which means we couldn't
1895 * handle writemasks.
1896 */
1897 static void
1898 ttn_add_output_stores(struct ttn_compile *c)
1899 {
1900 nir_builder *b = &c->build;
1901
1902 for (int i = 0; i < c->build.shader->num_outputs; i++) {
1903 nir_variable *var = c->outputs[i];
1904 if (!var)
1905 continue;
1906
1907 nir_src src = nir_src_for_reg(c->output_regs[i].reg);
1908 src.reg.base_offset = c->output_regs[i].offset;
1909
1910 nir_ssa_def *store_value = nir_ssa_for_src(b, src, 4);
1911 if (c->build.shader->info.stage == MESA_SHADER_FRAGMENT &&
1912 var->data.location == FRAG_RESULT_DEPTH) {
1913 /* TGSI uses TGSI_SEMANTIC_POSITION.z for the depth output, while
1914 * NIR uses a single float FRAG_RESULT_DEPTH.
1915 */
1916 store_value = nir_channel(b, store_value, 2);
1917 }
1918
1919 nir_store_deref(b, nir_build_deref_var(b, var), store_value,
1920 (1 << store_value->num_components) - 1);
1921 }
1922 }
1923
1924 /**
1925 * Parses the given TGSI tokens.
1926 */
1927 static void
1928 ttn_parse_tgsi(struct ttn_compile *c, const void *tgsi_tokens)
1929 {
1930 struct tgsi_parse_context parser;
1931 int ret;
1932
1933 ret = tgsi_parse_init(&parser, tgsi_tokens);
1934 assert(ret == TGSI_PARSE_OK);
1935
1936 while (!tgsi_parse_end_of_tokens(&parser)) {
1937 tgsi_parse_token(&parser);
1938 c->token = &parser.FullToken;
1939
1940 switch (parser.FullToken.Token.Type) {
1941 case TGSI_TOKEN_TYPE_DECLARATION:
1942 ttn_emit_declaration(c);
1943 break;
1944
1945 case TGSI_TOKEN_TYPE_INSTRUCTION:
1946 ttn_emit_instruction(c);
1947 break;
1948
1949 case TGSI_TOKEN_TYPE_IMMEDIATE:
1950 ttn_emit_immediate(c);
1951 break;
1952 }
1953 }
1954
1955 tgsi_parse_free(&parser);
1956 }
1957
1958 static void
1959 ttn_read_pipe_caps(struct ttn_compile *c,
1960 struct pipe_screen *screen)
1961 {
1962 c->cap_scalar = screen->get_shader_param(screen, c->scan->processor, PIPE_SHADER_CAP_SCALAR_ISA);
1963 c->cap_packed_uniforms = screen->get_param(screen, PIPE_CAP_PACKED_UNIFORMS);
1964 c->cap_samplers_as_deref = screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF);
1965 c->cap_face_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL);
1966 c->cap_position_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL);
1967 }
1968
1969 /**
1970 * Initializes a TGSI-to-NIR compiler.
1971 */
1972 static struct ttn_compile *
1973 ttn_compile_init(const void *tgsi_tokens,
1974 const nir_shader_compiler_options *options,
1975 struct pipe_screen *screen)
1976 {
1977 struct ttn_compile *c;
1978 struct nir_shader *s;
1979 struct tgsi_shader_info scan;
1980
1981 assert(options || screen);
1982 c = rzalloc(NULL, struct ttn_compile);
1983
1984 tgsi_scan_shader(tgsi_tokens, &scan);
1985 c->scan = &scan;
1986
1987 if (!options) {
1988 options =
1989 screen->get_compiler_options(screen, PIPE_SHADER_IR_NIR, scan.processor);
1990 }
1991
1992 nir_builder_init_simple_shader(&c->build, NULL,
1993 tgsi_processor_to_shader_stage(scan.processor),
1994 options);
1995
1996 s = c->build.shader;
1997
1998 if (screen) {
1999 ttn_read_pipe_caps(c, screen);
2000 } else {
2001 /* TTN used to be hard coded to always make FACE a sysval,
2002 * so it makes sense to preserve that behavior so users don't break. */
2003 c->cap_face_is_sysval = true;
2004 }
2005
2006 if (s->info.stage == MESA_SHADER_FRAGMENT)
2007 s->info.fs.untyped_color_outputs = true;
2008
2009 s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
2010 s->num_uniforms = scan.const_file_max[0] + 1;
2011 s->num_outputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
2012
2013 s->info.vs.window_space_position = scan.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
2014
2015 c->inputs = rzalloc_array(c, struct nir_variable *, s->num_inputs);
2016 c->outputs = rzalloc_array(c, struct nir_variable *, s->num_outputs);
2017
2018 c->output_regs = rzalloc_array(c, struct ttn_reg_info,
2019 scan.file_max[TGSI_FILE_OUTPUT] + 1);
2020 c->temp_regs = rzalloc_array(c, struct ttn_reg_info,
2021 scan.file_max[TGSI_FILE_TEMPORARY] + 1);
2022 c->imm_defs = rzalloc_array(c, nir_ssa_def *,
2023 scan.file_max[TGSI_FILE_IMMEDIATE] + 1);
2024
2025 c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2026 c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types);
2027
2028 c->if_stack = rzalloc_array(c, nir_cursor,
2029 (scan.opcode_count[TGSI_OPCODE_IF] +
2030 scan.opcode_count[TGSI_OPCODE_UIF]) * 2);
2031 c->loop_stack = rzalloc_array(c, nir_cursor,
2032 scan.opcode_count[TGSI_OPCODE_BGNLOOP]);
2033
2034
2035 ttn_parse_tgsi(c, tgsi_tokens);
2036 ttn_add_output_stores(c);
2037
2038 nir_validate_shader(c->build.shader, "TTN: after parsing TGSI and creating the NIR shader");
2039
2040 return c;
2041 }
2042
2043 static void
2044 ttn_optimize_nir(nir_shader *nir, bool scalar)
2045 {
2046 bool progress;
2047 do {
2048 progress = false;
2049
2050 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2051
2052 if (scalar) {
2053 NIR_PASS_V(nir, nir_lower_alu_to_scalar);
2054 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
2055 }
2056
2057 NIR_PASS_V(nir, nir_lower_alu);
2058 NIR_PASS_V(nir, nir_lower_pack);
2059 NIR_PASS(progress, nir, nir_copy_prop);
2060 NIR_PASS(progress, nir, nir_opt_remove_phis);
2061 NIR_PASS(progress, nir, nir_opt_dce);
2062
2063 if (nir_opt_trivial_continues(nir)) {
2064 progress = true;
2065 NIR_PASS(progress, nir, nir_copy_prop);
2066 NIR_PASS(progress, nir, nir_opt_dce);
2067 }
2068
2069 NIR_PASS(progress, nir, nir_opt_if, false);
2070 NIR_PASS(progress, nir, nir_opt_dead_cf);
2071 NIR_PASS(progress, nir, nir_opt_cse);
2072 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
2073
2074 NIR_PASS(progress, nir, nir_opt_algebraic);
2075 NIR_PASS(progress, nir, nir_opt_constant_folding);
2076
2077 NIR_PASS(progress, nir, nir_opt_undef);
2078 NIR_PASS(progress, nir, nir_opt_conditional_discard);
2079
2080 if (nir->options->max_unroll_iterations) {
2081 NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
2082 }
2083
2084 } while (progress);
2085
2086 }
2087
2088 /**
2089 * Finalizes the NIR in a similar way as st_glsl_to_nir does.
2090 *
2091 * Drivers expect that these passes are already performed,
2092 * so we have to do it here too.
2093 */
2094 static void
2095 ttn_finalize_nir(struct ttn_compile *c)
2096 {
2097 struct nir_shader *nir = c->build.shader;
2098
2099 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2100 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
2101
2102 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
2103 NIR_PASS_V(nir, nir_split_var_copies);
2104 NIR_PASS_V(nir, nir_lower_var_copies);
2105 NIR_PASS_V(nir, nir_lower_system_values);
2106
2107 if (c->cap_packed_uniforms)
2108 NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16);
2109
2110 if (c->cap_samplers_as_deref)
2111 NIR_PASS_V(nir, gl_nir_lower_samplers_as_deref, NULL);
2112 else
2113 NIR_PASS_V(nir, gl_nir_lower_samplers, NULL);
2114
2115 ttn_optimize_nir(nir, c->cap_scalar);
2116 nir_shader_gather_info(nir, c->build.impl);
2117 nir_validate_shader(nir, "TTN: after all optimizations");
2118 }
2119
2120 struct nir_shader *
2121 tgsi_to_nir(const void *tgsi_tokens,
2122 struct pipe_screen *screen)
2123 {
2124 struct ttn_compile *c;
2125 struct nir_shader *s;
2126
2127 c = ttn_compile_init(tgsi_tokens, NULL, screen);
2128 s = c->build.shader;
2129 ttn_finalize_nir(c);
2130 ralloc_free(c);
2131
2132 return s;
2133 }
2134
2135 struct nir_shader *
2136 tgsi_to_nir_noscreen(const void *tgsi_tokens,
2137 const nir_shader_compiler_options *options)
2138 {
2139 struct ttn_compile *c;
2140 struct nir_shader *s;
2141
2142 c = ttn_compile_init(tgsi_tokens, options, NULL);
2143 s = c->build.shader;
2144 ralloc_free(c);
2145
2146 return s;
2147 }
2148