2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include "nir/nir_builder.h"
28 #include "glsl/list.h"
29 #include "main/imports.h"
30 #include "util/ralloc.h"
32 #include "prog_to_nir.h"
33 #include "prog_instruction.h"
34 #include "prog_parameter.h"
35 #include "prog_print.h"
40 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
41 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
42 * vertex processing. Full GLSL support should use glsl_to_nir instead.
46 struct gl_program
*prog
;
50 nir_variable
*input_vars
[VARYING_SLOT_MAX
];
51 nir_variable
*output_vars
[VARYING_SLOT_MAX
];
52 nir_register
**output_regs
;
53 nir_register
**temp_regs
;
55 nir_register
*addr_reg
;
58 #define SWIZ(X, Y, Z, W) \
59 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
60 #define ptn_swizzle(b, src, x, y, z, w) nir_swizzle(b, src, SWIZ(x, y, z, w), 4, true)
61 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
64 ptn_src_for_dest(struct ptn_compile
*c
, nir_alu_dest
*dest
)
66 nir_builder
*b
= &c
->build
;
69 memset(&src
, 0, sizeof(src
));
71 if (dest
->dest
.is_ssa
)
72 src
.src
= nir_src_for_ssa(&dest
->dest
.ssa
);
74 assert(!dest
->dest
.reg
.indirect
);
75 src
.src
= nir_src_for_reg(dest
->dest
.reg
.reg
);
76 src
.src
.reg
.base_offset
= dest
->dest
.reg
.base_offset
;
79 for (int i
= 0; i
< 4; i
++)
82 return nir_fmov_alu(b
, src
, 4);
86 ptn_get_dest(struct ptn_compile
*c
, const struct prog_dst_register
*prog_dst
)
90 memset(&dest
, 0, sizeof(dest
));
92 switch (prog_dst
->File
) {
93 case PROGRAM_TEMPORARY
:
94 dest
.dest
.reg
.reg
= c
->temp_regs
[prog_dst
->Index
];
97 dest
.dest
.reg
.reg
= c
->output_regs
[prog_dst
->Index
];
100 assert(prog_dst
->Index
== 0);
101 dest
.dest
.reg
.reg
= c
->addr_reg
;
103 case PROGRAM_UNDEFINED
:
107 dest
.write_mask
= prog_dst
->WriteMask
;
108 dest
.saturate
= false;
110 assert(!prog_dst
->RelAddr
);
116 * Multiply the contents of the ADDR register by 4 to convert from the number
117 * of vec4s to the number of floating point components.
120 ptn_addr_reg_value(struct ptn_compile
*c
)
122 nir_builder
*b
= &c
->build
;
124 memset(&src
, 0, sizeof(src
));
125 src
.src
= nir_src_for_reg(c
->addr_reg
);
127 return nir_imul(b
, nir_fmov_alu(b
, src
, 1), nir_imm_int(b
, 4));
131 ptn_get_src(struct ptn_compile
*c
, const struct prog_src_register
*prog_src
)
133 nir_builder
*b
= &c
->build
;
136 memset(&src
, 0, sizeof(src
));
138 switch (prog_src
->File
) {
139 case PROGRAM_UNDEFINED
:
140 return nir_imm_float(b
, 0.0);
141 case PROGRAM_TEMPORARY
:
142 assert(!prog_src
->RelAddr
&& prog_src
->Index
>= 0);
143 src
.src
.reg
.reg
= c
->temp_regs
[prog_src
->Index
];
145 case PROGRAM_INPUT
: {
146 /* ARB_vertex_program doesn't allow relative addressing on vertex
147 * attributes; ARB_fragment_program has no relative addressing at all.
149 assert(!prog_src
->RelAddr
);
151 assert(prog_src
->Index
>= 0 && prog_src
->Index
< VARYING_SLOT_MAX
);
153 nir_intrinsic_instr
*load
=
154 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_var
);
155 load
->num_components
= 4;
157 nir_deref_var_create(b
->shader
, c
->input_vars
[prog_src
->Index
]);
159 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 4, NULL
);
160 nir_instr_insert_after_cf_list(b
->cf_node_list
, &load
->instr
);
162 src
.src
= nir_src_for_ssa(&load
->dest
.ssa
);
165 case PROGRAM_STATE_VAR
:
166 case PROGRAM_CONSTANT
: {
167 /* We actually want to look at the type in the Parameters list for this,
168 * because it lets us upload constant builtin uniforms as actual
171 struct gl_program_parameter_list
*plist
= c
->prog
->Parameters
;
172 gl_register_file file
= prog_src
->RelAddr
? prog_src
->File
:
173 plist
->Parameters
[prog_src
->Index
].Type
;
176 case PROGRAM_CONSTANT
:
177 if ((c
->prog
->IndirectRegisterFiles
& (1 << PROGRAM_CONSTANT
)) == 0) {
178 float *v
= (float *) plist
->ParameterValues
[prog_src
->Index
];
179 src
.src
= nir_src_for_ssa(nir_imm_vec4(b
, v
[0], v
[1], v
[2], v
[3]));
183 case PROGRAM_STATE_VAR
: {
184 nir_intrinsic_op load_op
=
185 prog_src
->RelAddr
? nir_intrinsic_load_uniform_indirect
:
186 nir_intrinsic_load_uniform
;
187 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
->shader
, load_op
);
188 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 4, NULL
);
189 load
->num_components
= 4;
191 /* Multiply src->Index by 4 to scale from # of vec4s to components. */
192 load
->const_index
[0] = 4 * prog_src
->Index
;
193 load
->const_index
[1] = 1;
195 if (prog_src
->RelAddr
) {
196 nir_ssa_def
*reladdr
= ptn_addr_reg_value(c
);
197 if (prog_src
->Index
< 0) {
198 /* This is a negative offset which should be added to the address
201 reladdr
= nir_iadd(b
, reladdr
, nir_imm_int(b
, load
->const_index
[0]));
202 load
->const_index
[0] = 0;
204 load
->src
[0] = nir_src_for_ssa(reladdr
);
207 nir_instr_insert_after_cf_list(b
->cf_node_list
, &load
->instr
);
209 src
.src
= nir_src_for_ssa(&load
->dest
.ssa
);
213 fprintf(stderr
, "bad uniform src register file: %s (%d)\n",
214 _mesa_register_file_name(file
), file
);
220 fprintf(stderr
, "unknown src register file: %s (%d)\n",
221 _mesa_register_file_name(prog_src
->File
), prog_src
->File
);
226 if (!HAS_EXTENDED_SWIZZLE(prog_src
->Swizzle
)) {
227 for (int i
= 0; i
< 4; i
++)
228 src
.swizzle
[i
] = GET_SWZ(prog_src
->Swizzle
, i
);
230 def
= nir_fmov_alu(b
, src
, 4);
232 nir_ssa_def
*chans
[4];
233 for (int i
= 0; i
< 4; i
++) {
234 int swizzle
= GET_SWZ(prog_src
->Swizzle
, i
);
235 if (swizzle
== SWIZZLE_ZERO
) {
236 chans
[i
] = nir_imm_float(b
, 0.0);
237 } else if (swizzle
== SWIZZLE_ONE
) {
238 chans
[i
] = nir_imm_float(b
, 1.0);
240 assert(swizzle
!= SWIZZLE_NIL
);
241 nir_alu_instr
*mov
= nir_alu_instr_create(b
->shader
, nir_op_fmov
);
242 nir_ssa_dest_init(&mov
->instr
, &mov
->dest
.dest
, 1, NULL
);
243 mov
->dest
.write_mask
= 0x1;
245 mov
->src
[0].swizzle
[0] = swizzle
;
246 nir_instr_insert_after_cf_list(b
->cf_node_list
, &mov
->instr
);
248 chans
[i
] = &mov
->dest
.dest
.ssa
;
251 def
= nir_vec4(b
, chans
[0], chans
[1], chans
[2], chans
[3]);
255 def
= nir_fabs(b
, def
);
257 if (prog_src
->Negate
)
258 def
= nir_fneg(b
, def
);
264 ptn_alu(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
266 unsigned num_srcs
= nir_op_infos
[op
].num_inputs
;
267 nir_alu_instr
*instr
= nir_alu_instr_create(b
->shader
, op
);
270 for (i
= 0; i
< num_srcs
; i
++)
271 instr
->src
[i
].src
= nir_src_for_ssa(src
[i
]);
274 nir_instr_insert_after_cf_list(b
->cf_node_list
, &instr
->instr
);
278 ptn_move_dest_masked(nir_builder
*b
, nir_alu_dest dest
,
279 nir_ssa_def
*def
, unsigned write_mask
)
281 if (!(dest
.write_mask
& write_mask
))
284 nir_alu_instr
*mov
= nir_alu_instr_create(b
->shader
, nir_op_fmov
);
289 mov
->dest
.write_mask
&= write_mask
;
290 mov
->src
[0].src
= nir_src_for_ssa(def
);
291 for (unsigned i
= def
->num_components
; i
< 4; i
++)
292 mov
->src
[0].swizzle
[i
] = def
->num_components
- 1;
293 nir_instr_insert_after_cf_list(b
->cf_node_list
, &mov
->instr
);
297 ptn_move_dest(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
*def
)
299 ptn_move_dest_masked(b
, dest
, def
, WRITEMASK_XYZW
);
303 ptn_arl(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
305 ptn_move_dest(b
, dest
, nir_f2i(b
, nir_ffloor(b
, src
[0])));
308 /* EXP - Approximate Exponential Base 2
309 * dst.x = 2^{\lfloor src.x\rfloor}
310 * dst.y = src.x - \lfloor src.x\rfloor
315 ptn_exp(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
317 nir_ssa_def
*srcx
= ptn_channel(b
, src
[0], X
);
319 ptn_move_dest_masked(b
, dest
, nir_fexp2(b
, nir_ffloor(b
, srcx
)), WRITEMASK_X
);
320 ptn_move_dest_masked(b
, dest
, nir_fsub(b
, srcx
, nir_ffloor(b
, srcx
)), WRITEMASK_Y
);
321 ptn_move_dest_masked(b
, dest
, nir_fexp2(b
, srcx
), WRITEMASK_Z
);
322 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_W
);
325 /* LOG - Approximate Logarithm Base 2
326 * dst.x = \lfloor\log_2{|src.x|}\rfloor
327 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
328 * dst.z = \log_2{|src.x|}
332 ptn_log(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
334 nir_ssa_def
*abs_srcx
= nir_fabs(b
, ptn_channel(b
, src
[0], X
));
335 nir_ssa_def
*log2
= nir_flog2(b
, abs_srcx
);
336 nir_ssa_def
*floor_log2
= nir_ffloor(b
, log2
);
338 ptn_move_dest_masked(b
, dest
, floor_log2
, WRITEMASK_X
);
339 ptn_move_dest_masked(b
, dest
,
340 nir_fmul(b
, abs_srcx
,
341 nir_fexp2(b
, nir_fneg(b
, floor_log2
))),
343 ptn_move_dest_masked(b
, dest
, log2
, WRITEMASK_Z
);
344 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_W
);
347 /* DST - Distance Vector
349 * dst.y = src0.y \times src1.y
354 ptn_dst(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
356 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_X
);
357 ptn_move_dest_masked(b
, dest
, nir_fmul(b
, src
[0], src
[1]), WRITEMASK_Y
);
358 ptn_move_dest_masked(b
, dest
, nir_fmov(b
, src
[0]), WRITEMASK_Z
);
359 ptn_move_dest_masked(b
, dest
, nir_fmov(b
, src
[1]), WRITEMASK_W
);
362 /* LIT - Light Coefficients
364 * dst.y = max(src.x, 0.0)
365 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
369 ptn_lit(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
371 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_XW
);
373 ptn_move_dest_masked(b
, dest
, nir_fmax(b
, ptn_channel(b
, src
[0], X
),
374 nir_imm_float(b
, 0.0)), WRITEMASK_Y
);
376 if (dest
.write_mask
& WRITEMASK_Z
) {
377 nir_ssa_def
*src0_y
= ptn_channel(b
, src
[0], Y
);
378 nir_ssa_def
*wclamp
= nir_fmax(b
, nir_fmin(b
, ptn_channel(b
, src
[0], W
),
379 nir_imm_float(b
, 128.0)),
380 nir_imm_float(b
, -128.0));
381 nir_ssa_def
*pow
= nir_fpow(b
, nir_fmax(b
, src0_y
, nir_imm_float(b
, 0.0)),
385 if (b
->shader
->options
->native_integers
) {
387 nir_fge(b
, nir_imm_float(b
, 0.0), ptn_channel(b
, src
[0], X
)),
388 nir_imm_float(b
, 0.0),
392 nir_sge(b
, nir_imm_float(b
, 0.0), ptn_channel(b
, src
[0], X
)),
393 nir_imm_float(b
, 0.0),
397 ptn_move_dest_masked(b
, dest
, z
, WRITEMASK_Z
);
402 * dst.x = \cos{src.x}
403 * dst.y = \sin{src.x}
408 ptn_scs(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
410 ptn_move_dest_masked(b
, dest
, nir_fcos(b
, ptn_channel(b
, src
[0], X
)),
412 ptn_move_dest_masked(b
, dest
, nir_fsin(b
, ptn_channel(b
, src
[0], X
)),
414 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 0.0), WRITEMASK_Z
);
415 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_W
);
419 * Emit SLT. For platforms with integers, prefer b2f(flt(...)).
422 ptn_slt(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
424 if (b
->shader
->options
->native_integers
) {
425 ptn_move_dest(b
, dest
, nir_b2f(b
, nir_flt(b
, src
[0], src
[1])));
427 ptn_move_dest(b
, dest
, nir_slt(b
, src
[0], src
[1]));
432 * Emit SGE. For platforms with integers, prefer b2f(fge(...)).
435 ptn_sge(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
437 if (b
->shader
->options
->native_integers
) {
438 ptn_move_dest(b
, dest
, nir_b2f(b
, nir_fge(b
, src
[0], src
[1])));
440 ptn_move_dest(b
, dest
, nir_sge(b
, src
[0], src
[1]));
445 ptn_sle(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
447 nir_ssa_def
*commuted
[] = { src
[1], src
[0] };
448 ptn_sge(b
, dest
, commuted
);
452 ptn_sgt(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
454 nir_ssa_def
*commuted
[] = { src
[1], src
[0] };
455 ptn_slt(b
, dest
, commuted
);
459 * Emit SEQ. For platforms with integers, prefer b2f(feq(...)).
462 ptn_seq(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
464 if (b
->shader
->options
->native_integers
) {
465 ptn_move_dest(b
, dest
, nir_b2f(b
, nir_feq(b
, src
[0], src
[1])));
467 ptn_move_dest(b
, dest
, nir_seq(b
, src
[0], src
[1]));
472 * Emit SNE. For platforms with integers, prefer b2f(fne(...)).
475 ptn_sne(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
477 if (b
->shader
->options
->native_integers
) {
478 ptn_move_dest(b
, dest
, nir_b2f(b
, nir_fne(b
, src
[0], src
[1])));
480 ptn_move_dest(b
, dest
, nir_sne(b
, src
[0], src
[1]));
485 ptn_xpd(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
487 ptn_move_dest_masked(b
, dest
,
490 ptn_swizzle(b
, src
[0], Y
, Z
, X
, X
),
491 ptn_swizzle(b
, src
[1], Z
, X
, Y
, X
)),
493 ptn_swizzle(b
, src
[1], Y
, Z
, X
, X
),
494 ptn_swizzle(b
, src
[0], Z
, X
, Y
, X
))),
496 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_W
);
500 ptn_dp2(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
502 ptn_move_dest(b
, dest
, nir_fdot2(b
, src
[0], src
[1]));
506 ptn_dp3(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
508 ptn_move_dest(b
, dest
, nir_fdot3(b
, src
[0], src
[1]));
512 ptn_dp4(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
514 ptn_move_dest(b
, dest
, nir_fdot4(b
, src
[0], src
[1]));
518 ptn_dph(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
520 nir_ssa_def
*dp3
= nir_fdot3(b
, src
[0], src
[1]);
521 ptn_move_dest(b
, dest
, nir_fadd(b
, dp3
, ptn_channel(b
, src
[1], W
)));
525 ptn_cmp(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
527 if (b
->shader
->options
->native_integers
) {
528 ptn_move_dest(b
, dest
, nir_bcsel(b
,
529 nir_flt(b
, src
[0], nir_imm_float(b
, 0.0)),
532 ptn_move_dest(b
, dest
, nir_fcsel(b
,
533 nir_slt(b
, src
[0], nir_imm_float(b
, 0.0)),
539 ptn_lrp(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
541 ptn_move_dest(b
, dest
, nir_flrp(b
, src
[2], src
[1], src
[0]));
545 ptn_kil(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
547 nir_ssa_def
*cmp
= b
->shader
->options
->native_integers
?
548 nir_bany4(b
, nir_flt(b
, src
[0], nir_imm_float(b
, 0.0))) :
549 nir_fany4(b
, nir_slt(b
, src
[0], nir_imm_float(b
, 0.0)));
551 nir_intrinsic_instr
*discard
=
552 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_discard_if
);
553 discard
->src
[0] = nir_src_for_ssa(cmp
);
554 nir_instr_insert_after_cf_list(b
->cf_node_list
, &discard
->instr
);
558 ptn_tex(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
,
559 struct prog_instruction
*prog_inst
)
561 nir_tex_instr
*instr
;
565 switch (prog_inst
->Opcode
) {
587 assert(!"not handled");
592 fprintf(stderr
, "unknown tex op %d\n", prog_inst
->Opcode
);
596 if (prog_inst
->TexShadow
)
599 instr
= nir_tex_instr_create(b
->shader
, num_srcs
);
601 instr
->dest_type
= nir_type_float
;
602 instr
->is_shadow
= prog_inst
->TexShadow
;
603 instr
->sampler_index
= prog_inst
->TexSrcUnit
;
605 switch (prog_inst
->TexSrcTarget
) {
606 case TEXTURE_1D_INDEX
:
607 instr
->sampler_dim
= GLSL_SAMPLER_DIM_1D
;
609 case TEXTURE_2D_INDEX
:
610 instr
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
612 case TEXTURE_3D_INDEX
:
613 instr
->sampler_dim
= GLSL_SAMPLER_DIM_3D
;
615 case TEXTURE_CUBE_INDEX
:
616 instr
->sampler_dim
= GLSL_SAMPLER_DIM_CUBE
;
618 case TEXTURE_RECT_INDEX
:
619 instr
->sampler_dim
= GLSL_SAMPLER_DIM_RECT
;
622 fprintf(stderr
, "Unknown texture target %d\n", prog_inst
->TexSrcTarget
);
626 switch (instr
->sampler_dim
) {
627 case GLSL_SAMPLER_DIM_1D
:
628 case GLSL_SAMPLER_DIM_BUF
:
629 instr
->coord_components
= 1;
631 case GLSL_SAMPLER_DIM_2D
:
632 case GLSL_SAMPLER_DIM_RECT
:
633 case GLSL_SAMPLER_DIM_EXTERNAL
:
634 case GLSL_SAMPLER_DIM_MS
:
635 instr
->coord_components
= 2;
637 case GLSL_SAMPLER_DIM_3D
:
638 case GLSL_SAMPLER_DIM_CUBE
:
639 instr
->coord_components
= 3;
643 unsigned src_number
= 0;
645 instr
->src
[src_number
].src
=
646 nir_src_for_ssa(ptn_swizzle(b
, src
[0], X
, Y
, Z
, W
));
647 instr
->src
[src_number
].src_type
= nir_tex_src_coord
;
650 if (prog_inst
->Opcode
== OPCODE_TXP
) {
651 instr
->src
[src_number
].src
= nir_src_for_ssa(ptn_channel(b
, src
[0], W
));
652 instr
->src
[src_number
].src_type
= nir_tex_src_projector
;
656 if (prog_inst
->Opcode
== OPCODE_TXB
) {
657 instr
->src
[src_number
].src
= nir_src_for_ssa(ptn_channel(b
, src
[0], W
));
658 instr
->src
[src_number
].src_type
= nir_tex_src_bias
;
662 if (prog_inst
->Opcode
== OPCODE_TXL
) {
663 instr
->src
[src_number
].src
= nir_src_for_ssa(ptn_channel(b
, src
[0], W
));
664 instr
->src
[src_number
].src_type
= nir_tex_src_lod
;
668 if (instr
->is_shadow
) {
669 if (instr
->coord_components
< 3)
670 instr
->src
[src_number
].src
= nir_src_for_ssa(ptn_channel(b
, src
[0], Z
));
672 instr
->src
[src_number
].src
= nir_src_for_ssa(ptn_channel(b
, src
[0], W
));
674 instr
->src
[src_number
].src_type
= nir_tex_src_comparitor
;
678 assert(src_number
== num_srcs
);
680 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
, 4, NULL
);
681 nir_instr_insert_after_cf_list(b
->cf_node_list
, &instr
->instr
);
683 /* Resolve the writemask on the texture op. */
684 ptn_move_dest(b
, dest
, &instr
->dest
.ssa
);
687 static const nir_op op_trans
[MAX_OPCODE
] = {
689 [OPCODE_ABS
] = nir_op_fabs
,
690 [OPCODE_ADD
] = nir_op_fadd
,
693 [OPCODE_COS
] = nir_op_fcos
,
694 [OPCODE_DDX
] = nir_op_fddx
,
695 [OPCODE_DDY
] = nir_op_fddy
,
702 [OPCODE_EX2
] = nir_op_fexp2
,
703 [OPCODE_EXP
] = nir_op_fexp
,
704 [OPCODE_FLR
] = nir_op_ffloor
,
705 [OPCODE_FRC
] = nir_op_ffract
,
706 [OPCODE_LG2
] = nir_op_flog2
,
710 [OPCODE_MAD
] = nir_op_ffma
,
711 [OPCODE_MAX
] = nir_op_fmax
,
712 [OPCODE_MIN
] = nir_op_fmin
,
713 [OPCODE_MOV
] = nir_op_fmov
,
714 [OPCODE_MUL
] = nir_op_fmul
,
715 [OPCODE_POW
] = nir_op_fpow
,
716 [OPCODE_RCP
] = nir_op_frcp
,
718 [OPCODE_RSQ
] = nir_op_frsq
,
723 [OPCODE_SIN
] = nir_op_fsin
,
727 [OPCODE_SSG
] = nir_op_fsign
,
728 [OPCODE_SUB
] = nir_op_fsub
,
731 [OPCODE_TRUNC
] = nir_op_ftrunc
,
741 ptn_emit_instruction(struct ptn_compile
*c
, struct prog_instruction
*prog_inst
)
743 nir_builder
*b
= &c
->build
;
745 const unsigned op
= prog_inst
->Opcode
;
747 if (op
== OPCODE_END
)
751 for (i
= 0; i
< 3; i
++) {
752 src
[i
] = ptn_get_src(c
, &prog_inst
->SrcReg
[i
]);
754 nir_alu_dest dest
= ptn_get_dest(c
, &prog_inst
->DstReg
);
760 ptn_move_dest(b
, dest
, nir_frsq(b
, ptn_channel(b
, src
[0], X
)));
764 ptn_move_dest(b
, dest
, nir_frcp(b
, ptn_channel(b
, src
[0], X
)));
768 ptn_move_dest(b
, dest
, nir_fexp2(b
, ptn_channel(b
, src
[0], X
)));
772 ptn_move_dest(b
, dest
, nir_flog2(b
, ptn_channel(b
, src
[0], X
)));
776 ptn_move_dest(b
, dest
, nir_fpow(b
,
777 ptn_channel(b
, src
[0], X
),
778 ptn_channel(b
, src
[1], X
)));
782 ptn_move_dest(b
, dest
, nir_fcos(b
, ptn_channel(b
, src
[0], X
)));
786 ptn_move_dest(b
, dest
, nir_fsin(b
, ptn_channel(b
, src
[0], X
)));
790 ptn_arl(b
, dest
, src
);
794 ptn_exp(b
, dest
, src
);
798 ptn_log(b
, dest
, src
);
802 ptn_lrp(b
, dest
, src
);
806 ptn_dst(b
, dest
, src
);
810 ptn_lit(b
, dest
, src
);
814 ptn_xpd(b
, dest
, src
);
818 ptn_dp2(b
, dest
, src
);
822 ptn_dp3(b
, dest
, src
);
826 ptn_dp4(b
, dest
, src
);
830 ptn_dph(b
, dest
, src
);
834 ptn_kil(b
, dest
, src
);
838 ptn_cmp(b
, dest
, src
);
842 ptn_scs(b
, dest
, src
);
846 ptn_slt(b
, dest
, src
);
850 ptn_sgt(b
, dest
, src
);
854 ptn_sle(b
, dest
, src
);
858 ptn_sge(b
, dest
, src
);
862 ptn_seq(b
, dest
, src
);
866 ptn_sne(b
, dest
, src
);
875 ptn_tex(b
, dest
, src
, prog_inst
);
879 /* Extended swizzles were already handled in ptn_get_src(). */
880 ptn_alu(b
, nir_op_fmov
, dest
, src
);
887 if (op_trans
[op
] != 0 || op
== OPCODE_MOV
) {
888 ptn_alu(b
, op_trans
[op
], dest
, src
);
890 fprintf(stderr
, "unknown opcode: %s\n", _mesa_opcode_string(op
));
896 if (prog_inst
->SaturateMode
) {
897 assert(prog_inst
->SaturateMode
== SATURATE_ZERO_ONE
);
898 assert(!dest
.dest
.is_ssa
);
899 ptn_move_dest(b
, dest
, nir_fsat(b
, ptn_src_for_dest(c
, &dest
)));
904 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
905 * variables at the end of the shader.
907 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
908 * written, because there's no output load intrinsic, which means we couldn't
912 ptn_add_output_stores(struct ptn_compile
*c
)
914 nir_builder
*b
= &c
->build
;
916 foreach_list_typed(nir_variable
, var
, node
, &b
->shader
->outputs
) {
917 nir_intrinsic_instr
*store
=
918 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_var
);
919 store
->num_components
= 4;
920 store
->variables
[0] =
921 nir_deref_var_create(b
->shader
, c
->output_vars
[var
->data
.location
]);
922 store
->src
[0].reg
.reg
= c
->output_regs
[var
->data
.location
];
923 nir_instr_insert_after_cf_list(c
->build
.cf_node_list
, &store
->instr
);
928 setup_registers_and_variables(struct ptn_compile
*c
)
930 nir_builder
*b
= &c
->build
;
931 struct nir_shader
*shader
= b
->shader
;
933 /* Create input variables. */
934 const int num_inputs
= _mesa_flsll(c
->prog
->InputsRead
);
935 for (int i
= 0; i
< num_inputs
; i
++) {
936 if (!(c
->prog
->InputsRead
& BITFIELD64_BIT(i
)))
938 nir_variable
*var
= rzalloc(shader
, nir_variable
);
939 var
->type
= glsl_vec4_type();
940 var
->data
.read_only
= true;
941 var
->data
.mode
= nir_var_shader_in
;
942 var
->name
= ralloc_asprintf(var
, "in_%d", i
);
943 var
->data
.location
= i
;
946 if (c
->prog
->Target
== GL_FRAGMENT_PROGRAM_ARB
) {
947 struct gl_fragment_program
*fp
=
948 (struct gl_fragment_program
*) c
->prog
;
950 var
->data
.interpolation
= fp
->InterpQualifier
[i
];
952 if (i
== VARYING_SLOT_POS
) {
953 var
->data
.origin_upper_left
= fp
->OriginUpperLeft
;
954 var
->data
.pixel_center_integer
= fp
->PixelCenterInteger
;
955 } else if (i
== VARYING_SLOT_FOGC
) {
956 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
957 * input variable a float, and create a local containing the
960 var
->type
= glsl_float_type();
962 nir_intrinsic_instr
*load_x
=
963 nir_intrinsic_instr_create(shader
, nir_intrinsic_load_var
);
964 load_x
->num_components
= 1;
965 load_x
->variables
[0] = nir_deref_var_create(shader
, var
);
966 nir_ssa_dest_init(&load_x
->instr
, &load_x
->dest
, 1, NULL
);
967 nir_instr_insert_after_cf_list(b
->cf_node_list
, &load_x
->instr
);
969 nir_ssa_def
*f001
= nir_vec4(b
, &load_x
->dest
.ssa
, nir_imm_float(b
, 0.0),
970 nir_imm_float(b
, 0.0), nir_imm_float(b
, 1.0));
972 nir_variable
*fullvar
= rzalloc(shader
, nir_variable
);
973 fullvar
->type
= glsl_vec4_type();
974 fullvar
->data
.mode
= nir_var_local
;
975 fullvar
->name
= "fogcoord_tmp";
976 exec_list_push_tail(&b
->impl
->locals
, &fullvar
->node
);
978 nir_intrinsic_instr
*store
=
979 nir_intrinsic_instr_create(shader
, nir_intrinsic_store_var
);
980 store
->num_components
= 4;
981 store
->variables
[0] = nir_deref_var_create(shader
, fullvar
);
982 store
->src
[0] = nir_src_for_ssa(f001
);
983 nir_instr_insert_after_cf_list(b
->cf_node_list
, &store
->instr
);
985 /* Insert the real input into the list so the driver has real
986 * inputs, but set c->input_vars[i] to the temporary so we use
987 * the splatted value.
989 exec_list_push_tail(&shader
->inputs
, &var
->node
);
990 c
->input_vars
[i
] = fullvar
;
995 exec_list_push_tail(&shader
->inputs
, &var
->node
);
996 c
->input_vars
[i
] = var
;
999 /* Create output registers and variables. */
1000 int max_outputs
= _mesa_fls(c
->prog
->OutputsWritten
);
1001 c
->output_regs
= rzalloc_array(c
, nir_register
*, max_outputs
);
1003 for (int i
= 0; i
< max_outputs
; i
++) {
1004 if (!(c
->prog
->OutputsWritten
& BITFIELD64_BIT(i
)))
1007 /* Since we can't load from outputs in the IR, we make temporaries
1008 * for the outputs and emit stores to the real outputs at the end of
1011 nir_register
*reg
= nir_local_reg_create(b
->impl
);
1012 reg
->num_components
= 4;
1014 nir_variable
*var
= rzalloc(shader
, nir_variable
);
1015 var
->type
= glsl_vec4_type();
1016 var
->data
.mode
= nir_var_shader_out
;
1017 var
->name
= ralloc_asprintf(var
, "out_%d", i
);
1019 var
->data
.location
= i
;
1020 var
->data
.index
= 0;
1022 c
->output_regs
[i
] = reg
;
1024 exec_list_push_tail(&shader
->outputs
, &var
->node
);
1025 c
->output_vars
[i
] = var
;
1028 /* Create temporary registers. */
1029 c
->temp_regs
= rzalloc_array(c
, nir_register
*, c
->prog
->NumTemporaries
);
1032 for (int i
= 0; i
< c
->prog
->NumTemporaries
; i
++) {
1033 reg
= nir_local_reg_create(b
->impl
);
1038 reg
->num_components
= 4;
1039 c
->temp_regs
[i
] = reg
;
1042 /* Create the address register (for ARB_vertex_program). */
1043 reg
= nir_local_reg_create(b
->impl
);
1048 reg
->num_components
= 1;
1051 /* Set the number of uniforms */
1052 shader
->num_uniforms
= 4 * c
->prog
->Parameters
->NumParameters
;
1056 prog_to_nir(struct gl_program
*prog
, const nir_shader_compiler_options
*options
)
1058 struct ptn_compile
*c
;
1059 struct nir_shader
*s
;
1061 c
= rzalloc(NULL
, struct ptn_compile
);
1064 s
= nir_shader_create(NULL
, options
);
1069 nir_function
*func
= nir_function_create(s
, "main");
1070 nir_function_overload
*overload
= nir_function_overload_create(func
);
1071 nir_function_impl
*impl
= nir_function_impl_create(overload
);
1073 c
->build
.shader
= s
;
1074 c
->build
.impl
= impl
;
1075 c
->build
.cf_node_list
= &impl
->body
;
1077 setup_registers_and_variables(c
);
1078 if (unlikely(c
->error
))
1081 for (unsigned int i
= 0; i
< prog
->NumInstructions
; i
++) {
1082 ptn_emit_instruction(c
, &prog
->Instructions
[i
]);
1084 if (unlikely(c
->error
))
1088 ptn_add_output_stores(c
);