2 * Copyright © 2014-2015 Broadcom
3 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "util/ralloc.h"
26 #include "glsl/nir/nir.h"
27 #include "glsl/nir/nir_builder.h"
28 #include "glsl/list.h"
29 #include "glsl/shader_enums.h"
31 #include "nir/tgsi_to_nir.h"
32 #include "tgsi/tgsi_parse.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_scan.h"
37 #define SWIZ(X, Y, Z, W) (unsigned[4]){ \
45 /** nir register containing this TGSI index. */
47 /** Offset (in vec4s) from the start of var for this TGSI index. */
52 union tgsi_full_token
*token
;
55 struct tgsi_shader_info
*scan
;
57 struct ttn_reg_info
*output_regs
;
58 struct ttn_reg_info
*temp_regs
;
59 nir_ssa_def
**imm_defs
;
61 nir_register
*addr_reg
;
64 * Stack of cf_node_lists where instructions should be pushed as we pop
65 * back out of the control flow stack.
67 * For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else
68 * instructions should be placed, and if_stack[if_stack_pos - 1] has where
69 * the next instructions outside of the if/then/else block go.
71 struct exec_list
**if_stack
;
72 unsigned if_stack_pos
;
75 * Stack of cf_node_lists where instructions should be pushed as we pop
76 * back out of the control flow stack.
78 * loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside
81 struct exec_list
**loop_stack
;
82 unsigned loop_stack_pos
;
84 /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
88 #define ttn_swizzle(b, src, x, y, z, w) \
89 nir_swizzle(b, src, SWIZ(x, y, z, w), 4, false)
90 #define ttn_channel(b, src, swiz) \
91 nir_swizzle(b, src, SWIZ(swiz, swiz, swiz, swiz), 1, false)
94 ttn_src_for_dest(nir_builder
*b
, nir_alu_dest
*dest
)
97 memset(&src
, 0, sizeof(src
));
99 if (dest
->dest
.is_ssa
)
100 src
.src
= nir_src_for_ssa(&dest
->dest
.ssa
);
102 assert(!dest
->dest
.reg
.indirect
);
103 src
.src
= nir_src_for_reg(dest
->dest
.reg
.reg
);
104 src
.src
.reg
.base_offset
= dest
->dest
.reg
.base_offset
;
107 for (int i
= 0; i
< 4; i
++)
110 return nir_fmov_alu(b
, src
, 4);
114 ttn_emit_declaration(struct ttn_compile
*c
)
116 nir_builder
*b
= &c
->build
;
117 struct tgsi_full_declaration
*decl
= &c
->token
->FullDeclaration
;
118 unsigned array_size
= decl
->Range
.Last
- decl
->Range
.First
+ 1;
119 unsigned file
= decl
->Declaration
.File
;
122 if (file
== TGSI_FILE_TEMPORARY
) {
124 if (c
->scan
->indirect_files
& (1 << file
)) {
125 reg
= nir_local_reg_create(b
->impl
);
126 reg
->num_components
= 4;
127 reg
->num_array_elems
= array_size
;
129 for (i
= 0; i
< array_size
; i
++) {
130 c
->temp_regs
[decl
->Range
.First
+ i
].reg
= reg
;
131 c
->temp_regs
[decl
->Range
.First
+ i
].offset
= i
;
134 for (i
= 0; i
< array_size
; i
++) {
135 reg
= nir_local_reg_create(b
->impl
);
136 reg
->num_components
= 4;
137 c
->temp_regs
[decl
->Range
.First
+ i
].reg
= reg
;
138 c
->temp_regs
[decl
->Range
.First
+ i
].offset
= 0;
141 } else if (file
== TGSI_FILE_ADDRESS
) {
142 c
->addr_reg
= nir_local_reg_create(b
->impl
);
143 c
->addr_reg
->num_components
= 4;
144 } else if (file
== TGSI_FILE_SAMPLER
) {
145 /* Nothing to record for samplers. */
148 assert(file
== TGSI_FILE_INPUT
||
149 file
== TGSI_FILE_OUTPUT
||
150 file
== TGSI_FILE_CONSTANT
);
152 var
= rzalloc(b
->shader
, nir_variable
);
153 var
->data
.driver_location
= decl
->Range
.First
;
155 var
->type
= glsl_vec4_type();
157 var
->type
= glsl_array_type(var
->type
, array_size
);
160 case TGSI_FILE_INPUT
:
161 var
->data
.read_only
= true;
162 var
->data
.mode
= nir_var_shader_in
;
163 var
->name
= ralloc_asprintf(var
, "in_%d", decl
->Range
.First
);
165 /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_*
166 * instead, but nothing in NIR core is looking at the value
167 * currently, and this is less change to drivers.
169 var
->data
.location
= decl
->Semantic
.Name
;
170 var
->data
.index
= decl
->Semantic
.Index
;
172 /* We definitely need to translate the interpolation field, because
173 * nir_print will decode it.
175 switch (decl
->Interp
.Interpolate
) {
176 case TGSI_INTERPOLATE_CONSTANT
:
177 var
->data
.interpolation
= INTERP_QUALIFIER_FLAT
;
179 case TGSI_INTERPOLATE_LINEAR
:
180 var
->data
.interpolation
= INTERP_QUALIFIER_NOPERSPECTIVE
;
182 case TGSI_INTERPOLATE_PERSPECTIVE
:
183 var
->data
.interpolation
= INTERP_QUALIFIER_SMOOTH
;
187 exec_list_push_tail(&b
->shader
->inputs
, &var
->node
);
189 case TGSI_FILE_OUTPUT
: {
190 /* Since we can't load from outputs in the IR, we make temporaries
191 * for the outputs and emit stores to the real outputs at the end of
194 nir_register
*reg
= nir_local_reg_create(b
->impl
);
195 reg
->num_components
= 4;
197 reg
->num_array_elems
= array_size
;
199 var
->data
.mode
= nir_var_shader_out
;
200 var
->name
= ralloc_asprintf(var
, "out_%d", decl
->Range
.First
);
202 var
->data
.location
= decl
->Semantic
.Name
;
203 var
->data
.index
= decl
->Semantic
.Index
;
205 for (i
= 0; i
< array_size
; i
++) {
206 c
->output_regs
[decl
->Range
.First
+ i
].offset
= i
;
207 c
->output_regs
[decl
->Range
.First
+ i
].reg
= reg
;
210 exec_list_push_tail(&b
->shader
->outputs
, &var
->node
);
213 case TGSI_FILE_CONSTANT
:
214 var
->data
.mode
= nir_var_uniform
;
215 var
->name
= ralloc_asprintf(var
, "uniform_%d", decl
->Range
.First
);
217 exec_list_push_tail(&b
->shader
->uniforms
, &var
->node
);
220 unreachable("bad declaration file");
228 ttn_emit_immediate(struct ttn_compile
*c
)
230 nir_builder
*b
= &c
->build
;
231 struct tgsi_full_immediate
*tgsi_imm
= &c
->token
->FullImmediate
;
232 nir_load_const_instr
*load_const
;
235 load_const
= nir_load_const_instr_create(b
->shader
, 4);
236 c
->imm_defs
[c
->next_imm
] = &load_const
->def
;
239 for (i
= 0; i
< 4; i
++)
240 load_const
->value
.u
[i
] = tgsi_imm
->u
[i
].Uint
;
242 nir_instr_insert_after_cf_list(b
->cf_node_list
, &load_const
->instr
);
246 ttn_src_for_indirect(struct ttn_compile
*c
, struct tgsi_ind_register
*indirect
);
249 ttn_src_for_file_and_index(struct ttn_compile
*c
, unsigned file
, unsigned index
,
250 struct tgsi_ind_register
*indirect
)
252 nir_builder
*b
= &c
->build
;
255 memset(&src
, 0, sizeof(src
));
258 case TGSI_FILE_TEMPORARY
:
259 src
.reg
.reg
= c
->temp_regs
[index
].reg
;
260 src
.reg
.base_offset
= c
->temp_regs
[index
].offset
;
262 src
.reg
.indirect
= ttn_src_for_indirect(c
, indirect
);
265 case TGSI_FILE_ADDRESS
:
266 src
.reg
.reg
= c
->addr_reg
;
269 case TGSI_FILE_IMMEDIATE
:
270 src
= nir_src_for_ssa(c
->imm_defs
[index
]);
274 case TGSI_FILE_INPUT
:
275 case TGSI_FILE_CONSTANT
: {
276 nir_intrinsic_instr
*load
;
279 case TGSI_FILE_INPUT
:
280 load
= nir_intrinsic_instr_create(b
->shader
,
282 nir_intrinsic_load_input_indirect
:
283 nir_intrinsic_load_input
);
285 case TGSI_FILE_CONSTANT
:
286 load
= nir_intrinsic_instr_create(b
->shader
,
288 nir_intrinsic_load_uniform_indirect
:
289 nir_intrinsic_load_uniform
);
292 unreachable("No other load files supported");
296 load
->num_components
= 4;
297 load
->const_index
[0] = index
;
298 load
->const_index
[1] = 1;
300 nir_alu_src indirect_address
;
301 memset(&indirect_address
, 0, sizeof(indirect_address
));
302 indirect_address
.src
= nir_src_for_reg(c
->addr_reg
);
303 for (int i
= 0; i
< 4; i
++)
304 indirect_address
.swizzle
[i
] = indirect
->Swizzle
;
305 load
->src
[0] = nir_src_for_ssa(nir_imov_alu(b
, indirect_address
, 1));
307 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 4, NULL
);
308 nir_instr_insert_after_cf_list(b
->cf_node_list
, &load
->instr
);
310 src
= nir_src_for_ssa(&load
->dest
.ssa
);
315 unreachable("bad src file");
323 ttn_src_for_indirect(struct ttn_compile
*c
, struct tgsi_ind_register
*indirect
)
325 nir_builder
*b
= &c
->build
;
327 memset(&src
, 0, sizeof(src
));
328 for (int i
= 0; i
< 4; i
++)
329 src
.swizzle
[i
] = indirect
->Swizzle
;
330 src
.src
= ttn_src_for_file_and_index(c
,
332 indirect
->Index
, NULL
);
333 nir_src
*result
= ralloc(b
->shader
, nir_src
);
334 *result
= nir_src_for_ssa(nir_imov_alu(b
, src
, 1));
339 ttn_get_dest(struct ttn_compile
*c
, struct tgsi_full_dst_register
*tgsi_fdst
)
341 struct tgsi_dst_register
*tgsi_dst
= &tgsi_fdst
->Register
;
344 memset(&dest
, 0, sizeof(dest
));
346 if (tgsi_dst
->File
== TGSI_FILE_TEMPORARY
) {
347 dest
.dest
.reg
.reg
= c
->temp_regs
[tgsi_dst
->Index
].reg
;
348 dest
.dest
.reg
.base_offset
= c
->temp_regs
[tgsi_dst
->Index
].offset
;
349 } else if (tgsi_dst
->File
== TGSI_FILE_OUTPUT
) {
350 dest
.dest
.reg
.reg
= c
->output_regs
[tgsi_dst
->Index
].reg
;
351 dest
.dest
.reg
.base_offset
= c
->output_regs
[tgsi_dst
->Index
].offset
;
352 } else if (tgsi_dst
->File
== TGSI_FILE_ADDRESS
) {
353 assert(tgsi_dst
->Index
== 0);
354 dest
.dest
.reg
.reg
= c
->addr_reg
;
357 dest
.write_mask
= tgsi_dst
->WriteMask
;
358 dest
.saturate
= false;
360 if (tgsi_dst
->Indirect
)
361 dest
.dest
.reg
.indirect
= ttn_src_for_indirect(c
, &tgsi_fdst
->Indirect
);
367 ttn_get_src(struct ttn_compile
*c
, struct tgsi_full_src_register
*tgsi_fsrc
)
369 nir_builder
*b
= &c
->build
;
370 struct tgsi_src_register
*tgsi_src
= &tgsi_fsrc
->Register
;
371 unsigned tgsi_opcode
= c
->token
->FullInstruction
.Instruction
.Opcode
;
372 unsigned tgsi_src_type
= tgsi_opcode_infer_src_type(tgsi_opcode
);
373 bool src_is_float
= !(tgsi_src_type
== TGSI_TYPE_SIGNED
||
374 tgsi_src_type
== TGSI_TYPE_UNSIGNED
);
377 memset(&src
, 0, sizeof(src
));
379 if (tgsi_src
->File
== TGSI_FILE_NULL
) {
380 return nir_imm_float(b
, 0.0);
381 } else if (tgsi_src
->File
== TGSI_FILE_SAMPLER
) {
382 /* Only the index of the sampler gets used in texturing, and it will
383 * handle looking that up on its own instead of using the nir_alu_src.
385 assert(!tgsi_src
->Indirect
);
388 src
.src
= ttn_src_for_file_and_index(c
,
391 (tgsi_src
->Indirect
?
392 &tgsi_fsrc
->Indirect
: NULL
));
395 src
.swizzle
[0] = tgsi_src
->SwizzleX
;
396 src
.swizzle
[1] = tgsi_src
->SwizzleY
;
397 src
.swizzle
[2] = tgsi_src
->SwizzleZ
;
398 src
.swizzle
[3] = tgsi_src
->SwizzleW
;
400 nir_ssa_def
*def
= nir_fmov_alu(b
, src
, 4);
402 if (tgsi_src
->Absolute
) {
404 def
= nir_fabs(b
, def
);
406 def
= nir_iabs(b
, def
);
409 if (tgsi_src
->Negate
) {
411 def
= nir_fneg(b
, def
);
413 def
= nir_ineg(b
, def
);
420 ttn_alu(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
422 unsigned num_srcs
= nir_op_infos
[op
].num_inputs
;
423 nir_alu_instr
*instr
= nir_alu_instr_create(b
->shader
, op
);
426 for (i
= 0; i
< num_srcs
; i
++)
427 instr
->src
[i
].src
= nir_src_for_ssa(src
[i
]);
430 nir_instr_insert_after_cf_list(b
->cf_node_list
, &instr
->instr
);
434 ttn_move_dest_masked(nir_builder
*b
, nir_alu_dest dest
,
435 nir_ssa_def
*def
, unsigned write_mask
)
437 if (!(dest
.write_mask
& write_mask
))
440 nir_alu_instr
*mov
= nir_alu_instr_create(b
->shader
, nir_op_imov
);
442 mov
->dest
.write_mask
&= write_mask
;
443 mov
->src
[0].src
= nir_src_for_ssa(def
);
444 for (unsigned i
= def
->num_components
; i
< 4; i
++)
445 mov
->src
[0].swizzle
[i
] = def
->num_components
- 1;
446 nir_instr_insert_after_cf_list(b
->cf_node_list
, &mov
->instr
);
450 ttn_move_dest(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
*def
)
452 ttn_move_dest_masked(b
, dest
, def
, TGSI_WRITEMASK_XYZW
);
456 ttn_arl(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
458 ttn_move_dest(b
, dest
, nir_f2i(b
, nir_ffloor(b
, src
[0])));
461 /* EXP - Approximate Exponential Base 2
462 * dst.x = 2^{\lfloor src.x\rfloor}
463 * dst.y = src.x - \lfloor src.x\rfloor
468 ttn_exp(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
470 nir_ssa_def
*srcx
= ttn_channel(b
, src
[0], X
);
472 ttn_move_dest_masked(b
, dest
, nir_fexp2(b
, nir_ffloor(b
, srcx
)),
474 ttn_move_dest_masked(b
, dest
, nir_fsub(b
, srcx
, nir_ffloor(b
, srcx
)),
476 ttn_move_dest_masked(b
, dest
, nir_fexp2(b
, srcx
), TGSI_WRITEMASK_Z
);
477 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_W
);
480 /* LOG - Approximate Logarithm Base 2
481 * dst.x = \lfloor\log_2{|src.x|}\rfloor
482 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
483 * dst.z = \log_2{|src.x|}
487 ttn_log(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
489 nir_ssa_def
*abs_srcx
= nir_fabs(b
, ttn_channel(b
, src
[0], X
));
490 nir_ssa_def
*log2
= nir_flog2(b
, abs_srcx
);
492 ttn_move_dest_masked(b
, dest
, nir_ffloor(b
, log2
), TGSI_WRITEMASK_X
);
493 ttn_move_dest_masked(b
, dest
,
494 nir_fdiv(b
, abs_srcx
, nir_fexp2(b
, nir_ffloor(b
, log2
))),
496 ttn_move_dest_masked(b
, dest
, nir_flog2(b
, abs_srcx
), TGSI_WRITEMASK_Z
);
497 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_W
);
500 /* DST - Distance Vector
502 * dst.y = src0.y \times src1.y
507 ttn_dst(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
509 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_X
);
510 ttn_move_dest_masked(b
, dest
, nir_fmul(b
, src
[0], src
[1]), TGSI_WRITEMASK_Y
);
511 ttn_move_dest_masked(b
, dest
, nir_fmov(b
, src
[0]), TGSI_WRITEMASK_Z
);
512 ttn_move_dest_masked(b
, dest
, nir_fmov(b
, src
[1]), TGSI_WRITEMASK_W
);
515 /* LIT - Light Coefficients
517 * dst.y = max(src.x, 0.0)
518 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
522 ttn_lit(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
524 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_XW
);
526 ttn_move_dest_masked(b
, dest
, nir_fmax(b
, ttn_channel(b
, src
[0], X
),
527 nir_imm_float(b
, 0.0)), TGSI_WRITEMASK_Y
);
529 if (dest
.write_mask
& TGSI_WRITEMASK_Z
) {
530 nir_ssa_def
*src0_y
= ttn_channel(b
, src
[0], Y
);
531 nir_ssa_def
*wclamp
= nir_fmax(b
, nir_fmin(b
, ttn_channel(b
, src
[0], W
),
532 nir_imm_float(b
, 128.0)),
533 nir_imm_float(b
, -128.0));
534 nir_ssa_def
*pow
= nir_fpow(b
, nir_fmax(b
, src0_y
, nir_imm_float(b
, 0.0)),
537 ttn_move_dest_masked(b
, dest
,
540 nir_imm_float(b
, 0.0),
541 ttn_channel(b
, src
[0], X
)),
542 nir_imm_float(b
, 0.0),
549 * dst.x = \cos{src.x}
550 * dst.y = \sin{src.x}
555 ttn_scs(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
557 ttn_move_dest_masked(b
, dest
, nir_fcos(b
, ttn_channel(b
, src
[0], X
)),
559 ttn_move_dest_masked(b
, dest
, nir_fsin(b
, ttn_channel(b
, src
[0], X
)),
561 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 0.0), TGSI_WRITEMASK_Z
);
562 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_W
);
566 ttn_sle(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
568 ttn_move_dest(b
, dest
, nir_sge(b
, src
[1], src
[0]));
572 ttn_sgt(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
574 ttn_move_dest(b
, dest
, nir_slt(b
, src
[1], src
[0]));
578 ttn_clamp(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
580 ttn_move_dest(b
, dest
, nir_fmin(b
, nir_fmax(b
, src
[0], src
[1]), src
[2]));
584 ttn_xpd(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
586 ttn_move_dest_masked(b
, dest
,
589 ttn_swizzle(b
, src
[0], Y
, Z
, X
, X
),
590 ttn_swizzle(b
, src
[1], Z
, X
, Y
, X
)),
592 ttn_swizzle(b
, src
[1], Y
, Z
, X
, X
),
593 ttn_swizzle(b
, src
[0], Z
, X
, Y
, X
))),
595 ttn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), TGSI_WRITEMASK_W
);
599 ttn_dp2a(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
601 ttn_move_dest(b
, dest
,
602 ttn_channel(b
, nir_fadd(b
, nir_fdot2(b
, src
[0], src
[1]),
608 ttn_dp2(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
610 ttn_move_dest(b
, dest
, nir_fdot2(b
, src
[0], src
[1]));
614 ttn_dp3(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
616 ttn_move_dest(b
, dest
, nir_fdot3(b
, src
[0], src
[1]));
620 ttn_dp4(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
622 ttn_move_dest(b
, dest
, nir_fdot4(b
, src
[0], src
[1]));
626 ttn_dph(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
628 ttn_move_dest(b
, dest
, nir_fadd(b
, nir_fdot3(b
, src
[0], src
[1]),
629 ttn_channel(b
, src
[1], W
)));
633 ttn_umad(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
635 ttn_move_dest(b
, dest
, nir_iadd(b
, nir_imul(b
, src
[0], src
[1]), src
[2]));
639 ttn_arr(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
641 ttn_move_dest(b
, dest
, nir_ffloor(b
, nir_fadd(b
, src
[0], nir_imm_float(b
, 0.5))));
645 ttn_cmp(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
647 ttn_move_dest(b
, dest
, nir_bcsel(b
,
648 nir_flt(b
, src
[0], nir_imm_float(b
, 0.0)),
653 ttn_ucmp(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
655 ttn_move_dest(b
, dest
, nir_bcsel(b
,
656 nir_ine(b
, src
[0], nir_imm_int(b
, 0)),
661 ttn_kill(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
663 nir_intrinsic_instr
*discard
=
664 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_discard
);
665 nir_instr_insert_after_cf_list(b
->cf_node_list
, &discard
->instr
);
669 ttn_kill_if(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
671 nir_ssa_def
*cmp
= nir_bany4(b
, nir_flt(b
, src
[0], nir_imm_float(b
, 0.0)));
672 nir_intrinsic_instr
*discard
=
673 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_discard_if
);
674 discard
->src
[0] = nir_src_for_ssa(cmp
);
675 nir_instr_insert_after_cf_list(b
->cf_node_list
, &discard
->instr
);
679 ttn_if(struct ttn_compile
*c
, nir_ssa_def
*src
, bool is_uint
)
681 nir_builder
*b
= &c
->build
;
683 /* Save the outside-of-the-if-statement node list. */
684 c
->if_stack
[c
->if_stack_pos
] = b
->cf_node_list
;
687 src
= ttn_channel(b
, src
, X
);
689 nir_if
*if_stmt
= nir_if_create(b
->shader
);
691 if_stmt
->condition
= nir_src_for_ssa(nir_ine(b
, src
, nir_imm_int(b
, 0)));
693 if_stmt
->condition
= nir_src_for_ssa(nir_fne(b
, src
, nir_imm_int(b
, 0)));
695 nir_cf_node_insert_end(b
->cf_node_list
, &if_stmt
->cf_node
);
697 nir_builder_insert_after_cf_list(b
, &if_stmt
->then_list
);
699 c
->if_stack
[c
->if_stack_pos
] = &if_stmt
->else_list
;
704 ttn_else(struct ttn_compile
*c
)
706 nir_builder
*b
= &c
->build
;
708 nir_builder_insert_after_cf_list(b
, c
->if_stack
[c
->if_stack_pos
- 1]);
712 ttn_endif(struct ttn_compile
*c
)
714 nir_builder
*b
= &c
->build
;
716 c
->if_stack_pos
-= 2;
717 nir_builder_insert_after_cf_list(b
, c
->if_stack
[c
->if_stack_pos
]);
721 ttn_bgnloop(struct ttn_compile
*c
)
723 nir_builder
*b
= &c
->build
;
725 /* Save the outside-of-the-loop node list. */
726 c
->loop_stack
[c
->loop_stack_pos
] = b
->cf_node_list
;
729 nir_loop
*loop
= nir_loop_create(b
->shader
);
730 nir_cf_node_insert_end(b
->cf_node_list
, &loop
->cf_node
);
732 nir_builder_insert_after_cf_list(b
, &loop
->body
);
736 ttn_cont(nir_builder
*b
)
738 nir_jump_instr
*instr
= nir_jump_instr_create(b
->shader
, nir_jump_continue
);
739 nir_instr_insert_after_cf_list(b
->cf_node_list
, &instr
->instr
);
743 ttn_brk(nir_builder
*b
)
745 nir_jump_instr
*instr
= nir_jump_instr_create(b
->shader
, nir_jump_break
);
746 nir_instr_insert_after_cf_list(b
->cf_node_list
, &instr
->instr
);
750 ttn_endloop(struct ttn_compile
*c
)
752 nir_builder
*b
= &c
->build
;
755 nir_builder_insert_after_cf_list(b
, c
->loop_stack
[c
->loop_stack_pos
]);
759 ttn_tex(struct ttn_compile
*c
, nir_alu_dest dest
, nir_ssa_def
**src
)
761 nir_builder
*b
= &c
->build
;
762 struct tgsi_full_instruction
*tgsi_inst
= &c
->token
->FullInstruction
;
763 nir_tex_instr
*instr
;
767 switch (tgsi_inst
->Instruction
.Opcode
) {
768 case TGSI_OPCODE_TEX
:
772 case TGSI_OPCODE_TXP
:
776 case TGSI_OPCODE_TXB
:
780 case TGSI_OPCODE_TXL
:
784 case TGSI_OPCODE_TXF
:
788 case TGSI_OPCODE_TXD
:
793 fprintf(stderr
, "unknown TGSI tex op %d\n", tgsi_inst
->Instruction
.Opcode
);
797 if (tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D
||
798 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D_ARRAY
||
799 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D
||
800 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW2D_ARRAY
||
801 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWRECT
||
802 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE
||
803 tgsi_inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
) {
807 instr
= nir_tex_instr_create(b
->shader
, num_srcs
);
810 switch (tgsi_inst
->Texture
.Texture
) {
811 case TGSI_TEXTURE_1D
:
812 instr
->sampler_dim
= GLSL_SAMPLER_DIM_1D
;
814 case TGSI_TEXTURE_1D_ARRAY
:
815 instr
->sampler_dim
= GLSL_SAMPLER_DIM_1D
;
816 instr
->is_array
= true;
818 case TGSI_TEXTURE_SHADOW1D
:
819 instr
->sampler_dim
= GLSL_SAMPLER_DIM_1D
;
820 instr
->is_shadow
= true;
822 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
823 instr
->sampler_dim
= GLSL_SAMPLER_DIM_1D
;
824 instr
->is_shadow
= true;
825 instr
->is_array
= true;
827 case TGSI_TEXTURE_2D
:
828 instr
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
830 case TGSI_TEXTURE_2D_ARRAY
:
831 instr
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
832 instr
->is_array
= true;
834 case TGSI_TEXTURE_2D_MSAA
:
835 instr
->sampler_dim
= GLSL_SAMPLER_DIM_MS
;
837 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
838 instr
->sampler_dim
= GLSL_SAMPLER_DIM_MS
;
839 instr
->is_array
= true;
841 case TGSI_TEXTURE_SHADOW2D
:
842 instr
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
843 instr
->is_shadow
= true;
845 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
846 instr
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
847 instr
->is_shadow
= true;
848 instr
->is_array
= true;
850 case TGSI_TEXTURE_3D
:
851 instr
->sampler_dim
= GLSL_SAMPLER_DIM_3D
;
853 case TGSI_TEXTURE_CUBE
:
854 instr
->sampler_dim
= GLSL_SAMPLER_DIM_CUBE
;
856 case TGSI_TEXTURE_CUBE_ARRAY
:
857 instr
->sampler_dim
= GLSL_SAMPLER_DIM_CUBE
;
858 instr
->is_array
= true;
860 case TGSI_TEXTURE_SHADOWCUBE
:
861 instr
->sampler_dim
= GLSL_SAMPLER_DIM_CUBE
;
862 instr
->is_shadow
= true;
864 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
865 instr
->sampler_dim
= GLSL_SAMPLER_DIM_CUBE
;
866 instr
->is_shadow
= true;
867 instr
->is_array
= true;
869 case TGSI_TEXTURE_RECT
:
870 instr
->sampler_dim
= GLSL_SAMPLER_DIM_RECT
;
872 case TGSI_TEXTURE_SHADOWRECT
:
873 instr
->sampler_dim
= GLSL_SAMPLER_DIM_RECT
;
874 instr
->is_shadow
= true;
877 fprintf(stderr
, "Unknown TGSI texture target %d\n",
878 tgsi_inst
->Texture
.Texture
);
882 switch (instr
->sampler_dim
) {
883 case GLSL_SAMPLER_DIM_1D
:
884 case GLSL_SAMPLER_DIM_BUF
:
885 instr
->coord_components
= 1;
887 case GLSL_SAMPLER_DIM_2D
:
888 case GLSL_SAMPLER_DIM_RECT
:
889 case GLSL_SAMPLER_DIM_EXTERNAL
:
890 case GLSL_SAMPLER_DIM_MS
:
891 instr
->coord_components
= 2;
893 case GLSL_SAMPLER_DIM_3D
:
894 case GLSL_SAMPLER_DIM_CUBE
:
895 instr
->coord_components
= 3;
900 instr
->coord_components
++;
902 assert(tgsi_inst
->Src
[1].Register
.File
== TGSI_FILE_SAMPLER
);
903 instr
->sampler_index
= tgsi_inst
->Src
[1].Register
.Index
;
905 unsigned src_number
= 0;
907 if (tgsi_inst
->Instruction
.Opcode
!= TGSI_OPCODE_TXQ
) {
908 instr
->src
[src_number
].src
=
909 nir_src_for_ssa(nir_swizzle(b
, src
[0], SWIZ(X
, Y
, Z
, W
),
910 instr
->coord_components
, false));
911 instr
->src
[src_number
].src_type
= nir_tex_src_coord
;
915 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXP
) {
916 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], W
));
917 instr
->src
[src_number
].src_type
= nir_tex_src_projector
;
921 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB
) {
922 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], W
));
923 instr
->src
[src_number
].src_type
= nir_tex_src_bias
;
927 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL
) {
928 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], W
));
929 instr
->src
[src_number
].src_type
= nir_tex_src_lod
;
933 if (instr
->is_shadow
) {
934 if (instr
->coord_components
< 3)
935 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], Z
));
937 instr
->src
[src_number
].src
= nir_src_for_ssa(ttn_channel(b
, src
[0], W
));
939 instr
->src
[src_number
].src_type
= nir_tex_src_comparitor
;
943 assert(src_number
== num_srcs
);
945 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
, 4, NULL
);
946 nir_instr_insert_after_cf_list(b
->cf_node_list
, &instr
->instr
);
948 /* Resolve the writemask on the texture op. */
949 ttn_move_dest(b
, dest
, &instr
->dest
.ssa
);
952 static const nir_op op_trans
[TGSI_OPCODE_LAST
] = {
953 [TGSI_OPCODE_ARL
] = 0,
954 [TGSI_OPCODE_MOV
] = nir_op_fmov
,
955 [TGSI_OPCODE_LIT
] = 0,
956 [TGSI_OPCODE_RCP
] = nir_op_frcp
,
957 [TGSI_OPCODE_RSQ
] = nir_op_frsq
,
958 [TGSI_OPCODE_EXP
] = 0,
959 [TGSI_OPCODE_LOG
] = 0,
960 [TGSI_OPCODE_MUL
] = nir_op_fmul
,
961 [TGSI_OPCODE_ADD
] = nir_op_fadd
,
962 [TGSI_OPCODE_DP3
] = 0,
963 [TGSI_OPCODE_DP4
] = 0,
964 [TGSI_OPCODE_DST
] = 0,
965 [TGSI_OPCODE_MIN
] = nir_op_fmin
,
966 [TGSI_OPCODE_MAX
] = nir_op_fmax
,
967 [TGSI_OPCODE_SLT
] = nir_op_slt
,
968 [TGSI_OPCODE_SGE
] = nir_op_sge
,
969 [TGSI_OPCODE_MAD
] = nir_op_ffma
,
970 [TGSI_OPCODE_SUB
] = nir_op_fsub
,
971 [TGSI_OPCODE_LRP
] = 0,
972 [TGSI_OPCODE_SQRT
] = nir_op_fsqrt
,
973 [TGSI_OPCODE_DP2A
] = 0,
974 [TGSI_OPCODE_FRC
] = nir_op_ffract
,
975 [TGSI_OPCODE_CLAMP
] = 0,
976 [TGSI_OPCODE_FLR
] = nir_op_ffloor
,
977 [TGSI_OPCODE_ROUND
] = nir_op_fround_even
,
978 [TGSI_OPCODE_EX2
] = nir_op_fexp2
,
979 [TGSI_OPCODE_LG2
] = nir_op_flog2
,
980 [TGSI_OPCODE_POW
] = nir_op_fpow
,
981 [TGSI_OPCODE_XPD
] = 0,
982 [TGSI_OPCODE_ABS
] = nir_op_fabs
,
983 [TGSI_OPCODE_DPH
] = 0,
984 [TGSI_OPCODE_COS
] = nir_op_fcos
,
985 [TGSI_OPCODE_DDX
] = nir_op_fddx
,
986 [TGSI_OPCODE_DDY
] = nir_op_fddy
,
987 [TGSI_OPCODE_KILL
] = 0,
988 [TGSI_OPCODE_PK2H
] = 0, /* XXX */
989 [TGSI_OPCODE_PK2US
] = 0, /* XXX */
990 [TGSI_OPCODE_PK4B
] = 0, /* XXX */
991 [TGSI_OPCODE_PK4UB
] = 0, /* XXX */
992 [TGSI_OPCODE_SEQ
] = nir_op_seq
,
993 [TGSI_OPCODE_SGT
] = 0,
994 [TGSI_OPCODE_SIN
] = nir_op_fsin
,
995 [TGSI_OPCODE_SLE
] = 0,
996 [TGSI_OPCODE_TEX
] = 0,
997 [TGSI_OPCODE_TXD
] = 0,
998 [TGSI_OPCODE_TXP
] = 0,
999 [TGSI_OPCODE_UP2H
] = 0, /* XXX */
1000 [TGSI_OPCODE_UP2US
] = 0, /* XXX */
1001 [TGSI_OPCODE_UP4B
] = 0, /* XXX */
1002 [TGSI_OPCODE_UP4UB
] = 0, /* XXX */
1003 [TGSI_OPCODE_ARR
] = 0,
1005 /* No function calls, yet. */
1006 [TGSI_OPCODE_CAL
] = 0, /* XXX */
1007 [TGSI_OPCODE_RET
] = 0, /* XXX */
1009 [TGSI_OPCODE_SSG
] = nir_op_fsign
,
1010 [TGSI_OPCODE_CMP
] = 0,
1011 [TGSI_OPCODE_SCS
] = 0,
1012 [TGSI_OPCODE_TXB
] = 0,
1013 [TGSI_OPCODE_DIV
] = nir_op_fdiv
,
1014 [TGSI_OPCODE_DP2
] = 0,
1015 [TGSI_OPCODE_DP2A
] = 0,
1016 [TGSI_OPCODE_TXL
] = 0,
1018 [TGSI_OPCODE_BRK
] = 0,
1019 [TGSI_OPCODE_IF
] = 0,
1020 [TGSI_OPCODE_UIF
] = 0,
1021 [TGSI_OPCODE_ELSE
] = 0,
1022 [TGSI_OPCODE_ENDIF
] = 0,
1024 [TGSI_OPCODE_DDX_FINE
] = nir_op_fddx_fine
,
1025 [TGSI_OPCODE_DDY_FINE
] = nir_op_fddy_fine
,
1027 [TGSI_OPCODE_PUSHA
] = 0, /* XXX */
1028 [TGSI_OPCODE_POPA
] = 0, /* XXX */
1030 [TGSI_OPCODE_CEIL
] = nir_op_fceil
,
1031 [TGSI_OPCODE_I2F
] = nir_op_i2f
,
1032 [TGSI_OPCODE_NOT
] = nir_op_inot
,
1033 [TGSI_OPCODE_TRUNC
] = nir_op_ftrunc
,
1034 [TGSI_OPCODE_SHL
] = nir_op_ishl
,
1035 [TGSI_OPCODE_AND
] = nir_op_iand
,
1036 [TGSI_OPCODE_OR
] = nir_op_ior
,
1037 [TGSI_OPCODE_MOD
] = nir_op_umod
,
1038 [TGSI_OPCODE_XOR
] = nir_op_ixor
,
1039 [TGSI_OPCODE_SAD
] = 0, /* XXX */
1040 [TGSI_OPCODE_TXF
] = 0,
1041 [TGSI_OPCODE_TXQ
] = 0,
1043 [TGSI_OPCODE_CONT
] = 0,
1045 [TGSI_OPCODE_EMIT
] = 0, /* XXX */
1046 [TGSI_OPCODE_ENDPRIM
] = 0, /* XXX */
1048 [TGSI_OPCODE_BGNLOOP
] = 0,
1049 [TGSI_OPCODE_BGNSUB
] = 0, /* XXX: no function calls */
1050 [TGSI_OPCODE_ENDLOOP
] = 0,
1051 [TGSI_OPCODE_ENDSUB
] = 0, /* XXX: no function calls */
1053 [TGSI_OPCODE_TXQ_LZ
] = 0,
1054 [TGSI_OPCODE_NOP
] = 0,
1055 [TGSI_OPCODE_FSEQ
] = nir_op_feq
,
1056 [TGSI_OPCODE_FSGE
] = nir_op_fge
,
1057 [TGSI_OPCODE_FSLT
] = nir_op_flt
,
1058 [TGSI_OPCODE_FSNE
] = nir_op_fne
,
1060 /* No control flow yet */
1061 [TGSI_OPCODE_CALLNZ
] = 0, /* XXX */
1062 [TGSI_OPCODE_BREAKC
] = 0, /* not emitted by glsl_to_tgsi.cpp */
1064 [TGSI_OPCODE_KILL_IF
] = 0,
1066 [TGSI_OPCODE_END
] = 0,
1068 [TGSI_OPCODE_F2I
] = nir_op_f2i
,
1069 [TGSI_OPCODE_IDIV
] = nir_op_idiv
,
1070 [TGSI_OPCODE_IMAX
] = nir_op_imax
,
1071 [TGSI_OPCODE_IMIN
] = nir_op_imin
,
1072 [TGSI_OPCODE_INEG
] = nir_op_ineg
,
1073 [TGSI_OPCODE_ISGE
] = nir_op_ige
,
1074 [TGSI_OPCODE_ISHR
] = nir_op_ishr
,
1075 [TGSI_OPCODE_ISLT
] = nir_op_ilt
,
1076 [TGSI_OPCODE_F2U
] = nir_op_f2u
,
1077 [TGSI_OPCODE_U2F
] = nir_op_u2f
,
1078 [TGSI_OPCODE_UADD
] = nir_op_iadd
,
1079 [TGSI_OPCODE_UDIV
] = nir_op_udiv
,
1080 [TGSI_OPCODE_UMAD
] = 0,
1081 [TGSI_OPCODE_UMAX
] = nir_op_umax
,
1082 [TGSI_OPCODE_UMIN
] = nir_op_umin
,
1083 [TGSI_OPCODE_UMOD
] = nir_op_umod
,
1084 [TGSI_OPCODE_UMUL
] = nir_op_imul
,
1085 [TGSI_OPCODE_USEQ
] = nir_op_ieq
,
1086 [TGSI_OPCODE_USGE
] = nir_op_uge
,
1087 [TGSI_OPCODE_USHR
] = nir_op_ushr
,
1088 [TGSI_OPCODE_USLT
] = nir_op_ult
,
1089 [TGSI_OPCODE_USNE
] = nir_op_ine
,
1091 [TGSI_OPCODE_SWITCH
] = 0, /* not emitted by glsl_to_tgsi.cpp */
1092 [TGSI_OPCODE_CASE
] = 0, /* not emitted by glsl_to_tgsi.cpp */
1093 [TGSI_OPCODE_DEFAULT
] = 0, /* not emitted by glsl_to_tgsi.cpp */
1094 [TGSI_OPCODE_ENDSWITCH
] = 0, /* not emitted by glsl_to_tgsi.cpp */
1096 /* XXX: SAMPLE opcodes */
1098 [TGSI_OPCODE_UARL
] = nir_op_imov
,
1099 [TGSI_OPCODE_UCMP
] = 0,
1100 [TGSI_OPCODE_IABS
] = nir_op_iabs
,
1101 [TGSI_OPCODE_ISSG
] = nir_op_isign
,
1105 [TGSI_OPCODE_TEX2
] = 0,
1106 [TGSI_OPCODE_TXB2
] = 0,
1107 [TGSI_OPCODE_TXL2
] = 0,
1109 [TGSI_OPCODE_IMUL_HI
] = nir_op_imul_high
,
1110 [TGSI_OPCODE_UMUL_HI
] = nir_op_umul_high
,
1112 [TGSI_OPCODE_TG4
] = 0,
1113 [TGSI_OPCODE_LODQ
] = 0, /* XXX */
1115 [TGSI_OPCODE_IBFE
] = nir_op_ibitfield_extract
,
1116 [TGSI_OPCODE_UBFE
] = nir_op_ubitfield_extract
,
1117 [TGSI_OPCODE_BFI
] = nir_op_bitfield_insert
,
1118 [TGSI_OPCODE_BREV
] = nir_op_bitfield_reverse
,
1119 [TGSI_OPCODE_POPC
] = nir_op_bit_count
,
1120 [TGSI_OPCODE_LSB
] = nir_op_find_lsb
,
1121 [TGSI_OPCODE_IMSB
] = nir_op_ifind_msb
,
1122 [TGSI_OPCODE_UMSB
] = nir_op_ifind_msb
, /* XXX: signed vs unsigned */
1124 [TGSI_OPCODE_INTERP_CENTROID
] = 0, /* XXX */
1125 [TGSI_OPCODE_INTERP_SAMPLE
] = 0, /* XXX */
1126 [TGSI_OPCODE_INTERP_OFFSET
] = 0, /* XXX */
1130 ttn_emit_instruction(struct ttn_compile
*c
)
1132 nir_builder
*b
= &c
->build
;
1133 struct tgsi_full_instruction
*tgsi_inst
= &c
->token
->FullInstruction
;
1135 unsigned tgsi_op
= tgsi_inst
->Instruction
.Opcode
;
1137 if (tgsi_op
== TGSI_OPCODE_END
)
1140 nir_ssa_def
*src
[TGSI_FULL_MAX_SRC_REGISTERS
];
1141 for (i
= 0; i
< TGSI_FULL_MAX_SRC_REGISTERS
; i
++) {
1142 src
[i
] = ttn_get_src(c
, &tgsi_inst
->Src
[i
]);
1144 nir_alu_dest dest
= ttn_get_dest(c
, &tgsi_inst
->Dst
[0]);
1147 case TGSI_OPCODE_RSQ
:
1148 ttn_move_dest(b
, dest
, nir_frsq(b
, ttn_channel(b
, src
[0], X
)));
1151 case TGSI_OPCODE_SQRT
:
1152 ttn_move_dest(b
, dest
, nir_fsqrt(b
, ttn_channel(b
, src
[0], X
)));
1155 case TGSI_OPCODE_RCP
:
1156 ttn_move_dest(b
, dest
, nir_frcp(b
, ttn_channel(b
, src
[0], X
)));
1159 case TGSI_OPCODE_EX2
:
1160 ttn_move_dest(b
, dest
, nir_fexp2(b
, ttn_channel(b
, src
[0], X
)));
1163 case TGSI_OPCODE_LG2
:
1164 ttn_move_dest(b
, dest
, nir_flog2(b
, ttn_channel(b
, src
[0], X
)));
1167 case TGSI_OPCODE_POW
:
1168 ttn_move_dest(b
, dest
, nir_fpow(b
,
1169 ttn_channel(b
, src
[0], X
),
1170 ttn_channel(b
, src
[1], X
)));
1173 case TGSI_OPCODE_COS
:
1174 ttn_move_dest(b
, dest
, nir_fcos(b
, ttn_channel(b
, src
[0], X
)));
1177 case TGSI_OPCODE_SIN
:
1178 ttn_move_dest(b
, dest
, nir_fsin(b
, ttn_channel(b
, src
[0], X
)));
1181 case TGSI_OPCODE_ARL
:
1182 ttn_arl(b
, op_trans
[tgsi_op
], dest
, src
);
1185 case TGSI_OPCODE_EXP
:
1186 ttn_exp(b
, op_trans
[tgsi_op
], dest
, src
);
1189 case TGSI_OPCODE_LOG
:
1190 ttn_log(b
, op_trans
[tgsi_op
], dest
, src
);
1193 case TGSI_OPCODE_DST
:
1194 ttn_dst(b
, op_trans
[tgsi_op
], dest
, src
);
1197 case TGSI_OPCODE_LIT
:
1198 ttn_lit(b
, op_trans
[tgsi_op
], dest
, src
);
1201 case TGSI_OPCODE_CLAMP
:
1202 ttn_clamp(b
, op_trans
[tgsi_op
], dest
, src
);
1205 case TGSI_OPCODE_XPD
:
1206 ttn_xpd(b
, op_trans
[tgsi_op
], dest
, src
);
1209 case TGSI_OPCODE_DP2
:
1210 ttn_dp2(b
, op_trans
[tgsi_op
], dest
, src
);
1213 case TGSI_OPCODE_DP3
:
1214 ttn_dp3(b
, op_trans
[tgsi_op
], dest
, src
);
1217 case TGSI_OPCODE_DP4
:
1218 ttn_dp4(b
, op_trans
[tgsi_op
], dest
, src
);
1221 case TGSI_OPCODE_DP2A
:
1222 ttn_dp2a(b
, op_trans
[tgsi_op
], dest
, src
);
1225 case TGSI_OPCODE_DPH
:
1226 ttn_dph(b
, op_trans
[tgsi_op
], dest
, src
);
1229 case TGSI_OPCODE_UMAD
:
1230 ttn_umad(b
, op_trans
[tgsi_op
], dest
, src
);
1233 case TGSI_OPCODE_LRP
:
1234 ttn_move_dest(b
, dest
, nir_flrp(b
, src
[2], src
[1], src
[0]));
1237 case TGSI_OPCODE_KILL
:
1238 ttn_kill(b
, op_trans
[tgsi_op
], dest
, src
);
1241 case TGSI_OPCODE_ARR
:
1242 ttn_arr(b
, op_trans
[tgsi_op
], dest
, src
);
1245 case TGSI_OPCODE_CMP
:
1246 ttn_cmp(b
, op_trans
[tgsi_op
], dest
, src
);
1249 case TGSI_OPCODE_UCMP
:
1250 ttn_ucmp(b
, op_trans
[tgsi_op
], dest
, src
);
1253 case TGSI_OPCODE_SCS
:
1254 ttn_scs(b
, op_trans
[tgsi_op
], dest
, src
);
1257 case TGSI_OPCODE_SGT
:
1258 ttn_sgt(b
, op_trans
[tgsi_op
], dest
, src
);
1261 case TGSI_OPCODE_SLE
:
1262 ttn_sle(b
, op_trans
[tgsi_op
], dest
, src
);
1265 case TGSI_OPCODE_KILL_IF
:
1266 ttn_kill_if(b
, op_trans
[tgsi_op
], dest
, src
);
1269 case TGSI_OPCODE_TEX
:
1270 case TGSI_OPCODE_TXP
:
1271 case TGSI_OPCODE_TXL
:
1272 case TGSI_OPCODE_TXB
:
1273 case TGSI_OPCODE_TXD
:
1274 case TGSI_OPCODE_TXQ
:
1275 case TGSI_OPCODE_TXL2
:
1276 case TGSI_OPCODE_TXB2
:
1277 case TGSI_OPCODE_TXQ_LZ
:
1278 case TGSI_OPCODE_TXF
:
1279 case TGSI_OPCODE_TG4
:
1280 ttn_tex(c
, dest
, src
);
1283 case TGSI_OPCODE_NOP
:
1286 case TGSI_OPCODE_IF
:
1287 ttn_if(c
, src
[0], false);
1290 case TGSI_OPCODE_UIF
:
1291 ttn_if(c
, src
[0], true);
1294 case TGSI_OPCODE_ELSE
:
1298 case TGSI_OPCODE_ENDIF
:
1302 case TGSI_OPCODE_BGNLOOP
:
1306 case TGSI_OPCODE_BRK
:
1310 case TGSI_OPCODE_CONT
:
1314 case TGSI_OPCODE_ENDLOOP
:
1319 if (op_trans
[tgsi_op
] != 0 || tgsi_op
== TGSI_OPCODE_MOV
) {
1320 ttn_alu(b
, op_trans
[tgsi_op
], dest
, src
);
1322 fprintf(stderr
, "unknown TGSI opcode: %s\n",
1323 tgsi_get_opcode_name(tgsi_op
));
1329 if (tgsi_inst
->Instruction
.Saturate
) {
1330 assert(tgsi_inst
->Instruction
.Saturate
== TGSI_SAT_ZERO_ONE
);
1331 assert(!dest
.dest
.is_ssa
);
1332 ttn_move_dest(b
, dest
, nir_fsat(b
, ttn_src_for_dest(b
, &dest
)));
1337 * Puts a NIR intrinsic to store of each TGSI_FILE_OUTPUT value to the output
1338 * variables at the end of the shader.
1340 * We don't generate these incrementally as the TGSI_FILE_OUTPUT values are
1341 * written, because there's no output load intrinsic, which means we couldn't
1342 * handle writemasks.
1345 ttn_add_output_stores(struct ttn_compile
*c
)
1347 nir_builder
*b
= &c
->build
;
1349 foreach_list_typed(nir_variable
, var
, node
, &b
->shader
->outputs
) {
1350 unsigned array_len
= MAX2(glsl_get_length(var
->type
), 1);
1353 for (i
= 0; i
< array_len
; i
++) {
1354 nir_intrinsic_instr
*store
=
1355 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_output
);
1356 store
->num_components
= 4;
1357 store
->const_index
[0] = var
->data
.driver_location
+ i
;
1358 store
->const_index
[1] = 1;
1359 store
->src
[0].reg
.reg
= c
->output_regs
[var
->data
.driver_location
].reg
;
1360 nir_instr_insert_after_cf_list(b
->cf_node_list
, &store
->instr
);
1366 tgsi_to_nir(const void *tgsi_tokens
,
1367 const nir_shader_compiler_options
*options
)
1369 struct tgsi_parse_context parser
;
1370 struct tgsi_shader_info scan
;
1371 struct ttn_compile
*c
;
1372 struct nir_shader
*s
;
1375 c
= rzalloc(NULL
, struct ttn_compile
);
1376 s
= nir_shader_create(NULL
, options
);
1378 nir_function
*func
= nir_function_create(s
, "main");
1379 nir_function_overload
*overload
= nir_function_overload_create(func
);
1380 nir_function_impl
*impl
= nir_function_impl_create(overload
);
1382 nir_builder_init(&c
->build
, impl
);
1383 nir_builder_insert_after_cf_list(&c
->build
, &impl
->body
);
1385 tgsi_scan_shader(tgsi_tokens
, &scan
);
1388 s
->num_inputs
= scan
.file_max
[TGSI_FILE_INPUT
] + 1;
1389 s
->num_uniforms
= scan
.file_max
[TGSI_FILE_CONSTANT
] + 1;
1390 s
->num_outputs
= scan
.file_max
[TGSI_FILE_OUTPUT
] + 1;
1392 c
->output_regs
= rzalloc_array(c
, struct ttn_reg_info
,
1393 scan
.file_max
[TGSI_FILE_OUTPUT
] + 1);
1394 c
->temp_regs
= rzalloc_array(c
, struct ttn_reg_info
,
1395 scan
.file_max
[TGSI_FILE_TEMPORARY
] + 1);
1396 c
->imm_defs
= rzalloc_array(c
, nir_ssa_def
*,
1397 scan
.file_max
[TGSI_FILE_IMMEDIATE
] + 1);
1399 c
->if_stack
= rzalloc_array(c
, struct exec_list
*,
1400 (scan
.opcode_count
[TGSI_OPCODE_IF
] +
1401 scan
.opcode_count
[TGSI_OPCODE_UIF
]) * 2);
1402 c
->loop_stack
= rzalloc_array(c
, struct exec_list
*,
1403 scan
.opcode_count
[TGSI_OPCODE_BGNLOOP
]);
1405 ret
= tgsi_parse_init(&parser
, tgsi_tokens
);
1406 assert(ret
== TGSI_PARSE_OK
);
1408 while (!tgsi_parse_end_of_tokens(&parser
)) {
1409 tgsi_parse_token(&parser
);
1410 c
->token
= &parser
.FullToken
;
1412 switch (parser
.FullToken
.Token
.Type
) {
1413 case TGSI_TOKEN_TYPE_DECLARATION
:
1414 ttn_emit_declaration(c
);
1417 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1418 ttn_emit_instruction(c
);
1421 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1422 ttn_emit_immediate(c
);
1427 tgsi_parse_free(&parser
);
1429 ttn_add_output_stores(c
);