2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include "nir/nir_builder.h"
28 #include "glsl/list.h"
29 #include "main/imports.h"
30 #include "util/ralloc.h"
32 #include "prog_to_nir.h"
33 #include "prog_instruction.h"
34 #include "prog_parameter.h"
35 #include "prog_print.h"
41 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
42 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
43 * vertex processing. Full GLSL support should use glsl_to_nir instead.
47 const struct gl_program
*prog
;
51 nir_variable
*parameters
;
52 nir_variable
*input_vars
[VARYING_SLOT_MAX
];
53 nir_variable
*output_vars
[VARYING_SLOT_MAX
];
54 nir_register
**output_regs
;
55 nir_register
**temp_regs
;
57 nir_register
*addr_reg
;
60 #define SWIZ(X, Y, Z, W) \
61 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
62 #define ptn_swizzle(b, src, x, y, z, w) nir_swizzle(b, src, SWIZ(x, y, z, w), 4, true)
63 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
66 ptn_src_for_dest(struct ptn_compile
*c
, nir_alu_dest
*dest
)
68 nir_builder
*b
= &c
->build
;
71 memset(&src
, 0, sizeof(src
));
73 if (dest
->dest
.is_ssa
)
74 src
.src
= nir_src_for_ssa(&dest
->dest
.ssa
);
76 assert(!dest
->dest
.reg
.indirect
);
77 src
.src
= nir_src_for_reg(dest
->dest
.reg
.reg
);
78 src
.src
.reg
.base_offset
= dest
->dest
.reg
.base_offset
;
81 for (int i
= 0; i
< 4; i
++)
84 return nir_fmov_alu(b
, src
, 4);
88 ptn_get_dest(struct ptn_compile
*c
, const struct prog_dst_register
*prog_dst
)
92 memset(&dest
, 0, sizeof(dest
));
94 switch (prog_dst
->File
) {
95 case PROGRAM_TEMPORARY
:
96 dest
.dest
.reg
.reg
= c
->temp_regs
[prog_dst
->Index
];
99 dest
.dest
.reg
.reg
= c
->output_regs
[prog_dst
->Index
];
101 case PROGRAM_ADDRESS
:
102 assert(prog_dst
->Index
== 0);
103 dest
.dest
.reg
.reg
= c
->addr_reg
;
105 case PROGRAM_UNDEFINED
:
109 dest
.write_mask
= prog_dst
->WriteMask
;
110 dest
.saturate
= false;
112 assert(!prog_dst
->RelAddr
);
118 ptn_get_src(struct ptn_compile
*c
, const struct prog_src_register
*prog_src
)
120 nir_builder
*b
= &c
->build
;
123 memset(&src
, 0, sizeof(src
));
125 switch (prog_src
->File
) {
126 case PROGRAM_UNDEFINED
:
127 return nir_imm_float(b
, 0.0);
128 case PROGRAM_TEMPORARY
:
129 assert(!prog_src
->RelAddr
&& prog_src
->Index
>= 0);
130 src
.src
.reg
.reg
= c
->temp_regs
[prog_src
->Index
];
132 case PROGRAM_INPUT
: {
133 /* ARB_vertex_program doesn't allow relative addressing on vertex
134 * attributes; ARB_fragment_program has no relative addressing at all.
136 assert(!prog_src
->RelAddr
);
138 assert(prog_src
->Index
>= 0 && prog_src
->Index
< VARYING_SLOT_MAX
);
140 nir_intrinsic_instr
*load
=
141 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_var
);
142 load
->num_components
= 4;
143 load
->variables
[0] = nir_deref_var_create(load
, c
->input_vars
[prog_src
->Index
]);
145 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 4, NULL
);
146 nir_builder_instr_insert(b
, &load
->instr
);
148 src
.src
= nir_src_for_ssa(&load
->dest
.ssa
);
151 case PROGRAM_STATE_VAR
:
152 case PROGRAM_CONSTANT
: {
153 /* We actually want to look at the type in the Parameters list for this,
154 * because it lets us upload constant builtin uniforms as actual
157 struct gl_program_parameter_list
*plist
= c
->prog
->Parameters
;
158 gl_register_file file
= prog_src
->RelAddr
? prog_src
->File
:
159 plist
->Parameters
[prog_src
->Index
].Type
;
162 case PROGRAM_CONSTANT
:
163 if ((c
->prog
->IndirectRegisterFiles
& (1 << PROGRAM_CONSTANT
)) == 0) {
164 float *v
= (float *) plist
->ParameterValues
[prog_src
->Index
];
165 src
.src
= nir_src_for_ssa(nir_imm_vec4(b
, v
[0], v
[1], v
[2], v
[3]));
169 case PROGRAM_STATE_VAR
: {
170 assert(c
->parameters
!= NULL
);
172 nir_intrinsic_instr
*load
=
173 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_var
);
174 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 4, NULL
);
175 load
->num_components
= 4;
177 load
->variables
[0] = nir_deref_var_create(load
, c
->parameters
);
178 nir_deref_array
*deref_arr
=
179 nir_deref_array_create(load
->variables
[0]);
180 deref_arr
->deref
.type
= glsl_vec4_type();
181 load
->variables
[0]->deref
.child
= &deref_arr
->deref
;
183 if (prog_src
->RelAddr
) {
184 deref_arr
->deref_array_type
= nir_deref_array_type_indirect
;
186 nir_alu_src addr_src
= { NIR_SRC_INIT
};
187 addr_src
.src
= nir_src_for_reg(c
->addr_reg
);
188 nir_ssa_def
*reladdr
= nir_imov_alu(b
, addr_src
, 1);
190 if (prog_src
->Index
< 0) {
191 /* This is a negative offset which should be added to the address
194 reladdr
= nir_iadd(b
, reladdr
, nir_imm_int(b
, prog_src
->Index
));
196 deref_arr
->base_offset
= 0;
198 deref_arr
->base_offset
= prog_src
->Index
;
200 deref_arr
->indirect
= nir_src_for_ssa(reladdr
);
202 deref_arr
->deref_array_type
= nir_deref_array_type_direct
;
203 deref_arr
->base_offset
= prog_src
->Index
;
206 nir_builder_instr_insert(b
, &load
->instr
);
208 src
.src
= nir_src_for_ssa(&load
->dest
.ssa
);
212 fprintf(stderr
, "bad uniform src register file: %s (%d)\n",
213 _mesa_register_file_name(file
), file
);
219 fprintf(stderr
, "unknown src register file: %s (%d)\n",
220 _mesa_register_file_name(prog_src
->File
), prog_src
->File
);
225 if (!HAS_EXTENDED_SWIZZLE(prog_src
->Swizzle
) &&
226 (prog_src
->Negate
== NEGATE_NONE
|| prog_src
->Negate
== NEGATE_XYZW
)) {
227 /* The simple non-SWZ case. */
228 for (int i
= 0; i
< 4; i
++)
229 src
.swizzle
[i
] = GET_SWZ(prog_src
->Swizzle
, i
);
231 def
= nir_fmov_alu(b
, src
, 4);
234 def
= nir_fabs(b
, def
);
236 if (prog_src
->Negate
)
237 def
= nir_fneg(b
, def
);
239 /* The SWZ instruction allows per-component zero/one swizzles, and also
240 * per-component negation.
242 nir_ssa_def
*chans
[4];
243 for (int i
= 0; i
< 4; i
++) {
244 int swizzle
= GET_SWZ(prog_src
->Swizzle
, i
);
245 if (swizzle
== SWIZZLE_ZERO
) {
246 chans
[i
] = nir_imm_float(b
, 0.0);
247 } else if (swizzle
== SWIZZLE_ONE
) {
248 chans
[i
] = nir_imm_float(b
, 1.0);
250 assert(swizzle
!= SWIZZLE_NIL
);
251 nir_alu_instr
*mov
= nir_alu_instr_create(b
->shader
, nir_op_fmov
);
252 nir_ssa_dest_init(&mov
->instr
, &mov
->dest
.dest
, 1, NULL
);
253 mov
->dest
.write_mask
= 0x1;
255 mov
->src
[0].swizzle
[0] = swizzle
;
256 nir_builder_instr_insert(b
, &mov
->instr
);
258 chans
[i
] = &mov
->dest
.dest
.ssa
;
262 chans
[i
] = nir_fabs(b
, chans
[i
]);
264 if (prog_src
->Negate
& (1 << i
))
265 chans
[i
] = nir_fneg(b
, chans
[i
]);
267 def
= nir_vec4(b
, chans
[0], chans
[1], chans
[2], chans
[3]);
274 ptn_alu(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
276 unsigned num_srcs
= nir_op_infos
[op
].num_inputs
;
277 nir_alu_instr
*instr
= nir_alu_instr_create(b
->shader
, op
);
280 for (i
= 0; i
< num_srcs
; i
++)
281 instr
->src
[i
].src
= nir_src_for_ssa(src
[i
]);
284 nir_builder_instr_insert(b
, &instr
->instr
);
288 ptn_move_dest_masked(nir_builder
*b
, nir_alu_dest dest
,
289 nir_ssa_def
*def
, unsigned write_mask
)
291 if (!(dest
.write_mask
& write_mask
))
294 nir_alu_instr
*mov
= nir_alu_instr_create(b
->shader
, nir_op_fmov
);
299 mov
->dest
.write_mask
&= write_mask
;
300 mov
->src
[0].src
= nir_src_for_ssa(def
);
301 for (unsigned i
= def
->num_components
; i
< 4; i
++)
302 mov
->src
[0].swizzle
[i
] = def
->num_components
- 1;
303 nir_builder_instr_insert(b
, &mov
->instr
);
307 ptn_move_dest(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
*def
)
309 ptn_move_dest_masked(b
, dest
, def
, WRITEMASK_XYZW
);
313 ptn_arl(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
315 ptn_move_dest(b
, dest
, nir_f2i(b
, nir_ffloor(b
, src
[0])));
318 /* EXP - Approximate Exponential Base 2
319 * dst.x = 2^{\lfloor src.x\rfloor}
320 * dst.y = src.x - \lfloor src.x\rfloor
325 ptn_exp(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
327 nir_ssa_def
*srcx
= ptn_channel(b
, src
[0], X
);
329 ptn_move_dest_masked(b
, dest
, nir_fexp2(b
, nir_ffloor(b
, srcx
)), WRITEMASK_X
);
330 ptn_move_dest_masked(b
, dest
, nir_fsub(b
, srcx
, nir_ffloor(b
, srcx
)), WRITEMASK_Y
);
331 ptn_move_dest_masked(b
, dest
, nir_fexp2(b
, srcx
), WRITEMASK_Z
);
332 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_W
);
335 /* LOG - Approximate Logarithm Base 2
336 * dst.x = \lfloor\log_2{|src.x|}\rfloor
337 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
338 * dst.z = \log_2{|src.x|}
342 ptn_log(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
344 nir_ssa_def
*abs_srcx
= nir_fabs(b
, ptn_channel(b
, src
[0], X
));
345 nir_ssa_def
*log2
= nir_flog2(b
, abs_srcx
);
346 nir_ssa_def
*floor_log2
= nir_ffloor(b
, log2
);
348 ptn_move_dest_masked(b
, dest
, floor_log2
, WRITEMASK_X
);
349 ptn_move_dest_masked(b
, dest
,
350 nir_fmul(b
, abs_srcx
,
351 nir_fexp2(b
, nir_fneg(b
, floor_log2
))),
353 ptn_move_dest_masked(b
, dest
, log2
, WRITEMASK_Z
);
354 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_W
);
357 /* DST - Distance Vector
359 * dst.y = src0.y \times src1.y
364 ptn_dst(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
366 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_X
);
367 ptn_move_dest_masked(b
, dest
, nir_fmul(b
, src
[0], src
[1]), WRITEMASK_Y
);
368 ptn_move_dest_masked(b
, dest
, nir_fmov(b
, src
[0]), WRITEMASK_Z
);
369 ptn_move_dest_masked(b
, dest
, nir_fmov(b
, src
[1]), WRITEMASK_W
);
372 /* LIT - Light Coefficients
374 * dst.y = max(src.x, 0.0)
375 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
379 ptn_lit(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
381 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_XW
);
383 ptn_move_dest_masked(b
, dest
, nir_fmax(b
, ptn_channel(b
, src
[0], X
),
384 nir_imm_float(b
, 0.0)), WRITEMASK_Y
);
386 if (dest
.write_mask
& WRITEMASK_Z
) {
387 nir_ssa_def
*src0_y
= ptn_channel(b
, src
[0], Y
);
388 nir_ssa_def
*wclamp
= nir_fmax(b
, nir_fmin(b
, ptn_channel(b
, src
[0], W
),
389 nir_imm_float(b
, 128.0)),
390 nir_imm_float(b
, -128.0));
391 nir_ssa_def
*pow
= nir_fpow(b
, nir_fmax(b
, src0_y
, nir_imm_float(b
, 0.0)),
395 if (b
->shader
->options
->native_integers
) {
397 nir_fge(b
, nir_imm_float(b
, 0.0), ptn_channel(b
, src
[0], X
)),
398 nir_imm_float(b
, 0.0),
402 nir_sge(b
, nir_imm_float(b
, 0.0), ptn_channel(b
, src
[0], X
)),
403 nir_imm_float(b
, 0.0),
407 ptn_move_dest_masked(b
, dest
, z
, WRITEMASK_Z
);
412 * dst.x = \cos{src.x}
413 * dst.y = \sin{src.x}
418 ptn_scs(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
420 ptn_move_dest_masked(b
, dest
, nir_fcos(b
, ptn_channel(b
, src
[0], X
)),
422 ptn_move_dest_masked(b
, dest
, nir_fsin(b
, ptn_channel(b
, src
[0], X
)),
424 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 0.0), WRITEMASK_Z
);
425 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_W
);
429 * Emit SLT. For platforms with integers, prefer b2f(flt(...)).
432 ptn_slt(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
434 if (b
->shader
->options
->native_integers
) {
435 ptn_move_dest(b
, dest
, nir_b2f(b
, nir_flt(b
, src
[0], src
[1])));
437 ptn_move_dest(b
, dest
, nir_slt(b
, src
[0], src
[1]));
442 * Emit SGE. For platforms with integers, prefer b2f(fge(...)).
445 ptn_sge(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
447 if (b
->shader
->options
->native_integers
) {
448 ptn_move_dest(b
, dest
, nir_b2f(b
, nir_fge(b
, src
[0], src
[1])));
450 ptn_move_dest(b
, dest
, nir_sge(b
, src
[0], src
[1]));
455 ptn_sle(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
457 nir_ssa_def
*commuted
[] = { src
[1], src
[0] };
458 ptn_sge(b
, dest
, commuted
);
462 ptn_sgt(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
464 nir_ssa_def
*commuted
[] = { src
[1], src
[0] };
465 ptn_slt(b
, dest
, commuted
);
469 * Emit SEQ. For platforms with integers, prefer b2f(feq(...)).
472 ptn_seq(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
474 if (b
->shader
->options
->native_integers
) {
475 ptn_move_dest(b
, dest
, nir_b2f(b
, nir_feq(b
, src
[0], src
[1])));
477 ptn_move_dest(b
, dest
, nir_seq(b
, src
[0], src
[1]));
482 * Emit SNE. For platforms with integers, prefer b2f(fne(...)).
485 ptn_sne(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
487 if (b
->shader
->options
->native_integers
) {
488 ptn_move_dest(b
, dest
, nir_b2f(b
, nir_fne(b
, src
[0], src
[1])));
490 ptn_move_dest(b
, dest
, nir_sne(b
, src
[0], src
[1]));
495 ptn_xpd(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
497 ptn_move_dest_masked(b
, dest
,
500 ptn_swizzle(b
, src
[0], Y
, Z
, X
, X
),
501 ptn_swizzle(b
, src
[1], Z
, X
, Y
, X
)),
503 ptn_swizzle(b
, src
[1], Y
, Z
, X
, X
),
504 ptn_swizzle(b
, src
[0], Z
, X
, Y
, X
))),
506 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_W
);
510 ptn_dp2(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
512 ptn_move_dest(b
, dest
, nir_fdot2(b
, src
[0], src
[1]));
516 ptn_dp3(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
518 ptn_move_dest(b
, dest
, nir_fdot3(b
, src
[0], src
[1]));
522 ptn_dp4(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
524 ptn_move_dest(b
, dest
, nir_fdot4(b
, src
[0], src
[1]));
528 ptn_dph(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
530 ptn_move_dest(b
, dest
, nir_fdph(b
, src
[0], src
[1]));
534 ptn_cmp(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
536 if (b
->shader
->options
->native_integers
) {
537 ptn_move_dest(b
, dest
, nir_bcsel(b
,
538 nir_flt(b
, src
[0], nir_imm_float(b
, 0.0)),
541 ptn_move_dest(b
, dest
, nir_fcsel(b
,
542 nir_slt(b
, src
[0], nir_imm_float(b
, 0.0)),
548 ptn_lrp(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
550 ptn_move_dest(b
, dest
, nir_flrp(b
, src
[2], src
[1], src
[0]));
554 ptn_kil(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
556 nir_ssa_def
*cmp
= b
->shader
->options
->native_integers
?
557 nir_bany_inequal4(b
, nir_flt(b
, src
[0], nir_imm_float(b
, 0.0)), nir_imm_int(b
, 0)) :
558 nir_fany_nequal4(b
, nir_slt(b
, src
[0], nir_imm_float(b
, 0.0)), nir_imm_float(b
, 0.0));
560 nir_intrinsic_instr
*discard
=
561 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_discard_if
);
562 discard
->src
[0] = nir_src_for_ssa(cmp
);
563 nir_builder_instr_insert(b
, &discard
->instr
);
567 ptn_tex(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
,
568 struct prog_instruction
*prog_inst
)
570 nir_tex_instr
*instr
;
574 switch (prog_inst
->Opcode
) {
596 assert(!"not handled");
601 fprintf(stderr
, "unknown tex op %d\n", prog_inst
->Opcode
);
605 if (prog_inst
->TexShadow
)
608 instr
= nir_tex_instr_create(b
->shader
, num_srcs
);
610 instr
->dest_type
= nir_type_float
;
611 instr
->is_shadow
= prog_inst
->TexShadow
;
612 instr
->texture_index
= prog_inst
->TexSrcUnit
;
613 instr
->sampler_index
= prog_inst
->TexSrcUnit
;
615 switch (prog_inst
->TexSrcTarget
) {
616 case TEXTURE_1D_INDEX
:
617 instr
->sampler_dim
= GLSL_SAMPLER_DIM_1D
;
619 case TEXTURE_2D_INDEX
:
620 instr
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
622 case TEXTURE_3D_INDEX
:
623 instr
->sampler_dim
= GLSL_SAMPLER_DIM_3D
;
625 case TEXTURE_CUBE_INDEX
:
626 instr
->sampler_dim
= GLSL_SAMPLER_DIM_CUBE
;
628 case TEXTURE_RECT_INDEX
:
629 instr
->sampler_dim
= GLSL_SAMPLER_DIM_RECT
;
632 fprintf(stderr
, "Unknown texture target %d\n", prog_inst
->TexSrcTarget
);
636 switch (instr
->sampler_dim
) {
637 case GLSL_SAMPLER_DIM_1D
:
638 case GLSL_SAMPLER_DIM_BUF
:
639 instr
->coord_components
= 1;
641 case GLSL_SAMPLER_DIM_2D
:
642 case GLSL_SAMPLER_DIM_RECT
:
643 case GLSL_SAMPLER_DIM_EXTERNAL
:
644 case GLSL_SAMPLER_DIM_MS
:
645 instr
->coord_components
= 2;
647 case GLSL_SAMPLER_DIM_3D
:
648 case GLSL_SAMPLER_DIM_CUBE
:
649 instr
->coord_components
= 3;
653 unsigned src_number
= 0;
655 instr
->src
[src_number
].src
=
656 nir_src_for_ssa(ptn_swizzle(b
, src
[0], X
, Y
, Z
, W
));
657 instr
->src
[src_number
].src_type
= nir_tex_src_coord
;
660 if (prog_inst
->Opcode
== OPCODE_TXP
) {
661 instr
->src
[src_number
].src
= nir_src_for_ssa(ptn_channel(b
, src
[0], W
));
662 instr
->src
[src_number
].src_type
= nir_tex_src_projector
;
666 if (prog_inst
->Opcode
== OPCODE_TXB
) {
667 instr
->src
[src_number
].src
= nir_src_for_ssa(ptn_channel(b
, src
[0], W
));
668 instr
->src
[src_number
].src_type
= nir_tex_src_bias
;
672 if (prog_inst
->Opcode
== OPCODE_TXL
) {
673 instr
->src
[src_number
].src
= nir_src_for_ssa(ptn_channel(b
, src
[0], W
));
674 instr
->src
[src_number
].src_type
= nir_tex_src_lod
;
678 if (instr
->is_shadow
) {
679 if (instr
->coord_components
< 3)
680 instr
->src
[src_number
].src
= nir_src_for_ssa(ptn_channel(b
, src
[0], Z
));
682 instr
->src
[src_number
].src
= nir_src_for_ssa(ptn_channel(b
, src
[0], W
));
684 instr
->src
[src_number
].src_type
= nir_tex_src_comparitor
;
688 assert(src_number
== num_srcs
);
690 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
, 4, NULL
);
691 nir_builder_instr_insert(b
, &instr
->instr
);
693 /* Resolve the writemask on the texture op. */
694 ptn_move_dest(b
, dest
, &instr
->dest
.ssa
);
697 static const nir_op op_trans
[MAX_OPCODE
] = {
699 [OPCODE_ABS
] = nir_op_fabs
,
700 [OPCODE_ADD
] = nir_op_fadd
,
704 [OPCODE_DDX
] = nir_op_fddx
,
705 [OPCODE_DDY
] = nir_op_fddy
,
714 [OPCODE_FLR
] = nir_op_ffloor
,
715 [OPCODE_FRC
] = nir_op_ffract
,
720 [OPCODE_MAD
] = nir_op_ffma
,
721 [OPCODE_MAX
] = nir_op_fmax
,
722 [OPCODE_MIN
] = nir_op_fmin
,
723 [OPCODE_MOV
] = nir_op_fmov
,
724 [OPCODE_MUL
] = nir_op_fmul
,
737 [OPCODE_SSG
] = nir_op_fsign
,
738 [OPCODE_SUB
] = nir_op_fsub
,
741 [OPCODE_TRUNC
] = nir_op_ftrunc
,
751 ptn_emit_instruction(struct ptn_compile
*c
, struct prog_instruction
*prog_inst
)
753 nir_builder
*b
= &c
->build
;
755 const unsigned op
= prog_inst
->Opcode
;
757 if (op
== OPCODE_END
)
761 for (i
= 0; i
< 3; i
++) {
762 src
[i
] = ptn_get_src(c
, &prog_inst
->SrcReg
[i
]);
764 nir_alu_dest dest
= ptn_get_dest(c
, &prog_inst
->DstReg
);
770 ptn_move_dest(b
, dest
,
771 nir_frsq(b
, nir_fabs(b
, ptn_channel(b
, src
[0], X
))));
775 ptn_move_dest(b
, dest
, nir_frcp(b
, ptn_channel(b
, src
[0], X
)));
779 ptn_move_dest(b
, dest
, nir_fexp2(b
, ptn_channel(b
, src
[0], X
)));
783 ptn_move_dest(b
, dest
, nir_flog2(b
, ptn_channel(b
, src
[0], X
)));
787 ptn_move_dest(b
, dest
, nir_fpow(b
,
788 ptn_channel(b
, src
[0], X
),
789 ptn_channel(b
, src
[1], X
)));
793 ptn_move_dest(b
, dest
, nir_fcos(b
, ptn_channel(b
, src
[0], X
)));
797 ptn_move_dest(b
, dest
, nir_fsin(b
, ptn_channel(b
, src
[0], X
)));
801 ptn_arl(b
, dest
, src
);
805 ptn_exp(b
, dest
, src
);
809 ptn_log(b
, dest
, src
);
813 ptn_lrp(b
, dest
, src
);
817 ptn_dst(b
, dest
, src
);
821 ptn_lit(b
, dest
, src
);
825 ptn_xpd(b
, dest
, src
);
829 ptn_dp2(b
, dest
, src
);
833 ptn_dp3(b
, dest
, src
);
837 ptn_dp4(b
, dest
, src
);
841 ptn_dph(b
, dest
, src
);
845 ptn_kil(b
, dest
, src
);
849 ptn_cmp(b
, dest
, src
);
853 ptn_scs(b
, dest
, src
);
857 ptn_slt(b
, dest
, src
);
861 ptn_sgt(b
, dest
, src
);
865 ptn_sle(b
, dest
, src
);
869 ptn_sge(b
, dest
, src
);
873 ptn_seq(b
, dest
, src
);
877 ptn_sne(b
, dest
, src
);
886 ptn_tex(b
, dest
, src
, prog_inst
);
890 /* Extended swizzles were already handled in ptn_get_src(). */
891 ptn_alu(b
, nir_op_fmov
, dest
, src
);
898 if (op_trans
[op
] != 0) {
899 ptn_alu(b
, op_trans
[op
], dest
, src
);
901 fprintf(stderr
, "unknown opcode: %s\n", _mesa_opcode_string(op
));
907 if (prog_inst
->Saturate
) {
908 assert(prog_inst
->Saturate
);
909 assert(!dest
.dest
.is_ssa
);
910 ptn_move_dest(b
, dest
, nir_fsat(b
, ptn_src_for_dest(c
, &dest
)));
915 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
916 * variables at the end of the shader.
918 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
919 * written, because there's no output load intrinsic, which means we couldn't
923 ptn_add_output_stores(struct ptn_compile
*c
)
925 nir_builder
*b
= &c
->build
;
927 nir_foreach_variable(var
, &b
->shader
->outputs
) {
928 nir_intrinsic_instr
*store
=
929 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_var
);
930 store
->num_components
= glsl_get_vector_elements(var
->type
);
931 store
->const_index
[0] = (1 << store
->num_components
) - 1;
932 store
->variables
[0] =
933 nir_deref_var_create(store
, c
->output_vars
[var
->data
.location
]);
935 if (c
->prog
->Target
== GL_FRAGMENT_PROGRAM_ARB
&&
936 var
->data
.location
== FRAG_RESULT_DEPTH
) {
937 /* result.depth has this strange convention of being the .z component of
938 * a vec4 with undefined .xyw components. We resolve it to a scalar, to
939 * match GLSL's gl_FragDepth and the expectations of most backends.
941 nir_alu_src alu_src
= { NIR_SRC_INIT
};
942 alu_src
.src
= nir_src_for_reg(c
->output_regs
[FRAG_RESULT_DEPTH
]);
943 alu_src
.swizzle
[0] = SWIZZLE_Z
;
944 store
->src
[0] = nir_src_for_ssa(nir_fmov_alu(b
, alu_src
, 1));
946 store
->src
[0].reg
.reg
= c
->output_regs
[var
->data
.location
];
948 nir_builder_instr_insert(b
, &store
->instr
);
953 setup_registers_and_variables(struct ptn_compile
*c
)
955 nir_builder
*b
= &c
->build
;
956 struct nir_shader
*shader
= b
->shader
;
958 /* Create input variables. */
959 const int num_inputs
= _mesa_flsll(c
->prog
->InputsRead
);
960 for (int i
= 0; i
< num_inputs
; i
++) {
961 if (!(c
->prog
->InputsRead
& BITFIELD64_BIT(i
)))
965 nir_variable_create(shader
, nir_var_shader_in
, glsl_vec4_type(),
966 ralloc_asprintf(shader
, "in_%d", i
));
967 var
->data
.location
= i
;
970 if (c
->prog
->Target
== GL_FRAGMENT_PROGRAM_ARB
) {
971 struct gl_fragment_program
*fp
=
972 (struct gl_fragment_program
*) c
->prog
;
974 var
->data
.interpolation
= fp
->InterpQualifier
[i
];
976 if (i
== VARYING_SLOT_POS
) {
977 var
->data
.origin_upper_left
= fp
->OriginUpperLeft
;
978 var
->data
.pixel_center_integer
= fp
->PixelCenterInteger
;
979 } else if (i
== VARYING_SLOT_FOGC
) {
980 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
981 * input variable a float, and create a local containing the
984 var
->type
= glsl_float_type();
986 nir_intrinsic_instr
*load_x
=
987 nir_intrinsic_instr_create(shader
, nir_intrinsic_load_var
);
988 load_x
->num_components
= 1;
989 load_x
->variables
[0] = nir_deref_var_create(load_x
, var
);
990 nir_ssa_dest_init(&load_x
->instr
, &load_x
->dest
, 1, NULL
);
991 nir_builder_instr_insert(b
, &load_x
->instr
);
993 nir_ssa_def
*f001
= nir_vec4(b
, &load_x
->dest
.ssa
, nir_imm_float(b
, 0.0),
994 nir_imm_float(b
, 0.0), nir_imm_float(b
, 1.0));
996 nir_variable
*fullvar
=
997 nir_local_variable_create(b
->impl
, glsl_vec4_type(),
999 nir_intrinsic_instr
*store
=
1000 nir_intrinsic_instr_create(shader
, nir_intrinsic_store_var
);
1001 store
->num_components
= 4;
1002 store
->const_index
[0] = WRITEMASK_XYZW
;
1003 store
->variables
[0] = nir_deref_var_create(store
, fullvar
);
1004 store
->src
[0] = nir_src_for_ssa(f001
);
1005 nir_builder_instr_insert(b
, &store
->instr
);
1007 /* We inserted the real input into the list so the driver has real
1008 * inputs, but we set c->input_vars[i] to the temporary so we use
1009 * the splatted value.
1011 c
->input_vars
[i
] = fullvar
;
1016 c
->input_vars
[i
] = var
;
1019 /* Create output registers and variables. */
1020 int max_outputs
= _mesa_fls(c
->prog
->OutputsWritten
);
1021 c
->output_regs
= rzalloc_array(c
, nir_register
*, max_outputs
);
1023 for (int i
= 0; i
< max_outputs
; i
++) {
1024 if (!(c
->prog
->OutputsWritten
& BITFIELD64_BIT(i
)))
1027 /* Since we can't load from outputs in the IR, we make temporaries
1028 * for the outputs and emit stores to the real outputs at the end of
1031 nir_register
*reg
= nir_local_reg_create(b
->impl
);
1032 reg
->num_components
= 4;
1034 nir_variable
*var
= rzalloc(shader
, nir_variable
);
1035 if (c
->prog
->Target
== GL_FRAGMENT_PROGRAM_ARB
&& i
== FRAG_RESULT_DEPTH
)
1036 var
->type
= glsl_float_type();
1038 var
->type
= glsl_vec4_type();
1039 var
->data
.mode
= nir_var_shader_out
;
1040 var
->name
= ralloc_asprintf(var
, "out_%d", i
);
1042 var
->data
.location
= i
;
1043 var
->data
.index
= 0;
1045 c
->output_regs
[i
] = reg
;
1047 exec_list_push_tail(&shader
->outputs
, &var
->node
);
1048 c
->output_vars
[i
] = var
;
1051 /* Create temporary registers. */
1052 c
->temp_regs
= rzalloc_array(c
, nir_register
*, c
->prog
->NumTemporaries
);
1055 for (unsigned i
= 0; i
< c
->prog
->NumTemporaries
; i
++) {
1056 reg
= nir_local_reg_create(b
->impl
);
1061 reg
->num_components
= 4;
1062 c
->temp_regs
[i
] = reg
;
1065 /* Create the address register (for ARB_vertex_program). */
1066 reg
= nir_local_reg_create(b
->impl
);
1071 reg
->num_components
= 1;
1076 prog_to_nir(const struct gl_program
*prog
,
1077 const nir_shader_compiler_options
*options
)
1079 struct ptn_compile
*c
;
1080 struct nir_shader
*s
;
1081 gl_shader_stage stage
= _mesa_program_enum_to_shader_stage(prog
->Target
);
1083 c
= rzalloc(NULL
, struct ptn_compile
);
1088 nir_builder_init_simple_shader(&c
->build
, NULL
, stage
, options
);
1089 s
= c
->build
.shader
;
1091 if (prog
->Parameters
->NumParameters
> 0) {
1092 c
->parameters
= rzalloc(s
, nir_variable
);
1093 c
->parameters
->type
=
1094 glsl_array_type(glsl_vec4_type(), prog
->Parameters
->NumParameters
);
1095 c
->parameters
->name
= "parameters";
1096 c
->parameters
->data
.read_only
= true;
1097 c
->parameters
->data
.mode
= nir_var_uniform
;
1098 exec_list_push_tail(&s
->uniforms
, &c
->parameters
->node
);
1101 setup_registers_and_variables(c
);
1102 if (unlikely(c
->error
))
1105 for (unsigned int i
= 0; i
< prog
->NumInstructions
; i
++) {
1106 ptn_emit_instruction(c
, &prog
->Instructions
[i
]);
1108 if (unlikely(c
->error
))
1112 ptn_add_output_stores(c
);
1114 s
->info
.name
= ralloc_asprintf(s
, "ARB%d", prog
->Id
);
1115 s
->info
.num_textures
= _mesa_fls(prog
->SamplersUsed
);
1116 s
->info
.num_ubos
= 0;
1117 s
->info
.num_abos
= 0;
1118 s
->info
.num_ssbos
= 0;
1119 s
->info
.num_images
= 0;
1120 s
->info
.inputs_read
= prog
->InputsRead
;
1121 s
->info
.outputs_written
= prog
->OutputsWritten
;
1122 s
->info
.system_values_read
= prog
->SystemValuesRead
;
1123 s
->info
.uses_texture_gather
= false;
1124 s
->info
.uses_clip_distance_out
= false;
1125 s
->info
.separate_shader
= false;
1127 if (stage
== MESA_SHADER_FRAGMENT
) {
1128 struct gl_fragment_program
*fp
= (struct gl_fragment_program
*)prog
;
1130 s
->info
.fs
.uses_discard
= fp
->UsesKill
;