2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
30 #include "main/mtypes.h"
31 #include "util/ralloc.h"
33 #include "prog_to_nir.h"
34 #include "prog_instruction.h"
35 #include "prog_parameter.h"
36 #include "prog_print.h"
42 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
43 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
44 * vertex processing. Full GLSL support should use glsl_to_nir instead.
48 const struct gl_program
*prog
;
52 nir_variable
*parameters
;
53 nir_variable
*input_vars
[VARYING_SLOT_MAX
];
54 nir_variable
*output_vars
[VARYING_SLOT_MAX
];
55 nir_variable
*sysval_vars
[SYSTEM_VALUE_MAX
];
56 nir_variable
*sampler_vars
[32]; /* matches number of bits in TexSrcUnit */
57 nir_register
**output_regs
;
58 nir_register
**temp_regs
;
60 nir_register
*addr_reg
;
63 #define SWIZ(X, Y, Z, W) \
64 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
65 #define ptn_channel(b, src, ch) nir_channel(b, src, SWIZZLE_##ch)
68 ptn_src_for_dest(struct ptn_compile
*c
, nir_alu_dest
*dest
)
70 nir_builder
*b
= &c
->build
;
73 memset(&src
, 0, sizeof(src
));
75 if (dest
->dest
.is_ssa
)
76 src
.src
= nir_src_for_ssa(&dest
->dest
.ssa
);
78 assert(!dest
->dest
.reg
.indirect
);
79 src
.src
= nir_src_for_reg(dest
->dest
.reg
.reg
);
80 src
.src
.reg
.base_offset
= dest
->dest
.reg
.base_offset
;
83 for (int i
= 0; i
< 4; i
++)
86 return nir_mov_alu(b
, src
, 4);
90 ptn_get_dest(struct ptn_compile
*c
, const struct prog_dst_register
*prog_dst
)
94 memset(&dest
, 0, sizeof(dest
));
96 switch (prog_dst
->File
) {
97 case PROGRAM_TEMPORARY
:
98 dest
.dest
.reg
.reg
= c
->temp_regs
[prog_dst
->Index
];
101 dest
.dest
.reg
.reg
= c
->output_regs
[prog_dst
->Index
];
103 case PROGRAM_ADDRESS
:
104 assert(prog_dst
->Index
== 0);
105 dest
.dest
.reg
.reg
= c
->addr_reg
;
107 case PROGRAM_UNDEFINED
:
111 dest
.write_mask
= prog_dst
->WriteMask
;
112 dest
.saturate
= false;
114 assert(!prog_dst
->RelAddr
);
120 ptn_get_src(struct ptn_compile
*c
, const struct prog_src_register
*prog_src
)
122 nir_builder
*b
= &c
->build
;
125 memset(&src
, 0, sizeof(src
));
127 switch (prog_src
->File
) {
128 case PROGRAM_UNDEFINED
:
129 return nir_imm_float(b
, 0.0);
130 case PROGRAM_TEMPORARY
:
131 assert(!prog_src
->RelAddr
&& prog_src
->Index
>= 0);
132 src
.src
.reg
.reg
= c
->temp_regs
[prog_src
->Index
];
134 case PROGRAM_INPUT
: {
135 /* ARB_vertex_program doesn't allow relative addressing on vertex
136 * attributes; ARB_fragment_program has no relative addressing at all.
138 assert(!prog_src
->RelAddr
);
140 assert(prog_src
->Index
>= 0 && prog_src
->Index
< VARYING_SLOT_MAX
);
142 nir_variable
*var
= c
->input_vars
[prog_src
->Index
];
143 src
.src
= nir_src_for_ssa(nir_load_var(b
, var
));
146 case PROGRAM_SYSTEM_VALUE
: {
147 assert(!prog_src
->RelAddr
);
149 assert(prog_src
->Index
>= 0 && prog_src
->Index
< SYSTEM_VALUE_MAX
);
151 nir_variable
*var
= c
->sysval_vars
[prog_src
->Index
];
152 src
.src
= nir_src_for_ssa(nir_load_var(b
, var
));
155 case PROGRAM_STATE_VAR
:
156 case PROGRAM_CONSTANT
: {
157 /* We actually want to look at the type in the Parameters list for this,
158 * because it lets us upload constant builtin uniforms as actual
161 struct gl_program_parameter_list
*plist
= c
->prog
->Parameters
;
162 gl_register_file file
= prog_src
->RelAddr
? prog_src
->File
:
163 plist
->Parameters
[prog_src
->Index
].Type
;
166 case PROGRAM_CONSTANT
:
167 if ((c
->prog
->arb
.IndirectRegisterFiles
&
168 (1 << PROGRAM_CONSTANT
)) == 0) {
169 unsigned pvo
= plist
->ParameterValueOffset
[prog_src
->Index
];
170 float *v
= (float *) plist
->ParameterValues
+ pvo
;
171 src
.src
= nir_src_for_ssa(nir_imm_vec4(b
, v
[0], v
[1], v
[2], v
[3]));
175 case PROGRAM_STATE_VAR
: {
176 assert(c
->parameters
!= NULL
);
178 nir_deref_instr
*deref
= nir_build_deref_var(b
, c
->parameters
);
180 nir_ssa_def
*index
= nir_imm_int(b
, prog_src
->Index
);
181 if (prog_src
->RelAddr
)
182 index
= nir_iadd(b
, index
, nir_load_reg(b
, c
->addr_reg
));
183 deref
= nir_build_deref_array(b
, deref
, nir_channel(b
, index
, 0));
185 src
.src
= nir_src_for_ssa(nir_load_deref(b
, deref
));
189 fprintf(stderr
, "bad uniform src register file: %s (%d)\n",
190 _mesa_register_file_name(file
), file
);
196 fprintf(stderr
, "unknown src register file: %s (%d)\n",
197 _mesa_register_file_name(prog_src
->File
), prog_src
->File
);
202 if (!HAS_EXTENDED_SWIZZLE(prog_src
->Swizzle
) &&
203 (prog_src
->Negate
== NEGATE_NONE
|| prog_src
->Negate
== NEGATE_XYZW
)) {
204 /* The simple non-SWZ case. */
205 for (int i
= 0; i
< 4; i
++)
206 src
.swizzle
[i
] = GET_SWZ(prog_src
->Swizzle
, i
);
208 def
= nir_mov_alu(b
, src
, 4);
210 if (prog_src
->Negate
)
211 def
= nir_fneg(b
, def
);
213 /* The SWZ instruction allows per-component zero/one swizzles, and also
214 * per-component negation.
216 nir_ssa_def
*chans
[4];
217 for (int i
= 0; i
< 4; i
++) {
218 int swizzle
= GET_SWZ(prog_src
->Swizzle
, i
);
219 if (swizzle
== SWIZZLE_ZERO
) {
220 chans
[i
] = nir_imm_float(b
, 0.0);
221 } else if (swizzle
== SWIZZLE_ONE
) {
222 chans
[i
] = nir_imm_float(b
, 1.0);
224 assert(swizzle
!= SWIZZLE_NIL
);
225 nir_alu_instr
*mov
= nir_alu_instr_create(b
->shader
, nir_op_mov
);
226 nir_ssa_dest_init(&mov
->instr
, &mov
->dest
.dest
, 1, 32, NULL
);
227 mov
->dest
.write_mask
= 0x1;
229 mov
->src
[0].swizzle
[0] = swizzle
;
230 nir_builder_instr_insert(b
, &mov
->instr
);
232 chans
[i
] = &mov
->dest
.dest
.ssa
;
235 if (prog_src
->Negate
& (1 << i
))
236 chans
[i
] = nir_fneg(b
, chans
[i
]);
238 def
= nir_vec4(b
, chans
[0], chans
[1], chans
[2], chans
[3]);
245 ptn_alu(nir_builder
*b
, nir_op op
, nir_alu_dest dest
, nir_ssa_def
**src
)
247 unsigned num_srcs
= nir_op_infos
[op
].num_inputs
;
248 nir_alu_instr
*instr
= nir_alu_instr_create(b
->shader
, op
);
251 for (i
= 0; i
< num_srcs
; i
++)
252 instr
->src
[i
].src
= nir_src_for_ssa(src
[i
]);
255 nir_builder_instr_insert(b
, &instr
->instr
);
259 ptn_move_dest_masked(nir_builder
*b
, nir_alu_dest dest
,
260 nir_ssa_def
*def
, unsigned write_mask
)
262 if (!(dest
.write_mask
& write_mask
))
265 nir_alu_instr
*mov
= nir_alu_instr_create(b
->shader
, nir_op_mov
);
270 mov
->dest
.write_mask
&= write_mask
;
271 mov
->src
[0].src
= nir_src_for_ssa(def
);
272 for (unsigned i
= def
->num_components
; i
< 4; i
++)
273 mov
->src
[0].swizzle
[i
] = def
->num_components
- 1;
274 nir_builder_instr_insert(b
, &mov
->instr
);
278 ptn_move_dest(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
*def
)
280 ptn_move_dest_masked(b
, dest
, def
, WRITEMASK_XYZW
);
284 ptn_arl(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
286 ptn_move_dest(b
, dest
, nir_f2i32(b
, nir_ffloor(b
, src
[0])));
289 /* EXP - Approximate Exponential Base 2
290 * dst.x = 2^{\lfloor src.x\rfloor}
291 * dst.y = src.x - \lfloor src.x\rfloor
296 ptn_exp(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
298 nir_ssa_def
*srcx
= ptn_channel(b
, src
[0], X
);
300 ptn_move_dest_masked(b
, dest
, nir_fexp2(b
, nir_ffloor(b
, srcx
)), WRITEMASK_X
);
301 ptn_move_dest_masked(b
, dest
, nir_fsub(b
, srcx
, nir_ffloor(b
, srcx
)), WRITEMASK_Y
);
302 ptn_move_dest_masked(b
, dest
, nir_fexp2(b
, srcx
), WRITEMASK_Z
);
303 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_W
);
306 /* LOG - Approximate Logarithm Base 2
307 * dst.x = \lfloor\log_2{|src.x|}\rfloor
308 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
309 * dst.z = \log_2{|src.x|}
313 ptn_log(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
315 nir_ssa_def
*abs_srcx
= nir_fabs(b
, ptn_channel(b
, src
[0], X
));
316 nir_ssa_def
*log2
= nir_flog2(b
, abs_srcx
);
317 nir_ssa_def
*floor_log2
= nir_ffloor(b
, log2
);
319 ptn_move_dest_masked(b
, dest
, floor_log2
, WRITEMASK_X
);
320 ptn_move_dest_masked(b
, dest
,
321 nir_fmul(b
, abs_srcx
,
322 nir_fexp2(b
, nir_fneg(b
, floor_log2
))),
324 ptn_move_dest_masked(b
, dest
, log2
, WRITEMASK_Z
);
325 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_W
);
328 /* DST - Distance Vector
330 * dst.y = src0.y \times src1.y
335 ptn_dst(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
337 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_X
);
338 ptn_move_dest_masked(b
, dest
, nir_fmul(b
, src
[0], src
[1]), WRITEMASK_Y
);
339 ptn_move_dest_masked(b
, dest
, nir_mov(b
, src
[0]), WRITEMASK_Z
);
340 ptn_move_dest_masked(b
, dest
, nir_mov(b
, src
[1]), WRITEMASK_W
);
343 /* LIT - Light Coefficients
345 * dst.y = max(src.x, 0.0)
346 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
350 ptn_lit(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
352 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_XW
);
354 ptn_move_dest_masked(b
, dest
, nir_fmax(b
, ptn_channel(b
, src
[0], X
),
355 nir_imm_float(b
, 0.0)), WRITEMASK_Y
);
357 if (dest
.write_mask
& WRITEMASK_Z
) {
358 nir_ssa_def
*src0_y
= ptn_channel(b
, src
[0], Y
);
359 nir_ssa_def
*wclamp
= nir_fmax(b
, nir_fmin(b
, ptn_channel(b
, src
[0], W
),
360 nir_imm_float(b
, 128.0)),
361 nir_imm_float(b
, -128.0));
362 nir_ssa_def
*pow
= nir_fpow(b
, nir_fmax(b
, src0_y
, nir_imm_float(b
, 0.0)),
365 nir_ssa_def
*z
= nir_bcsel(b
,
366 nir_fge(b
, nir_imm_float(b
, 0.0), ptn_channel(b
, src
[0], X
)),
367 nir_imm_float(b
, 0.0),
370 ptn_move_dest_masked(b
, dest
, z
, WRITEMASK_Z
);
375 * dst.x = \cos{src.x}
376 * dst.y = \sin{src.x}
381 ptn_scs(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
383 ptn_move_dest_masked(b
, dest
, nir_fcos(b
, ptn_channel(b
, src
[0], X
)),
385 ptn_move_dest_masked(b
, dest
, nir_fsin(b
, ptn_channel(b
, src
[0], X
)),
387 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 0.0), WRITEMASK_Z
);
388 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_W
);
392 ptn_slt(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
394 ptn_move_dest(b
, dest
, nir_slt(b
, src
[0], src
[1]));
398 ptn_sge(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
400 ptn_move_dest(b
, dest
, nir_sge(b
, src
[0], src
[1]));
404 ptn_xpd(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
406 ptn_move_dest_masked(b
, dest
,
409 nir_swizzle(b
, src
[0], SWIZ(Y
, Z
, X
, W
), 3),
410 nir_swizzle(b
, src
[1], SWIZ(Z
, X
, Y
, W
), 3)),
412 nir_swizzle(b
, src
[1], SWIZ(Y
, Z
, X
, W
), 3),
413 nir_swizzle(b
, src
[0], SWIZ(Z
, X
, Y
, W
), 3))),
415 ptn_move_dest_masked(b
, dest
, nir_imm_float(b
, 1.0), WRITEMASK_W
);
419 ptn_dp2(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
421 ptn_move_dest(b
, dest
, nir_fdot2(b
, src
[0], src
[1]));
425 ptn_dp3(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
427 ptn_move_dest(b
, dest
, nir_fdot3(b
, src
[0], src
[1]));
431 ptn_dp4(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
433 ptn_move_dest(b
, dest
, nir_fdot4(b
, src
[0], src
[1]));
437 ptn_dph(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
439 ptn_move_dest(b
, dest
, nir_fdph(b
, src
[0], src
[1]));
443 ptn_cmp(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
445 ptn_move_dest(b
, dest
, nir_bcsel(b
,
446 nir_flt(b
, src
[0], nir_imm_float(b
, 0.0)),
451 ptn_lrp(nir_builder
*b
, nir_alu_dest dest
, nir_ssa_def
**src
)
453 ptn_move_dest(b
, dest
, nir_flrp(b
, src
[2], src
[1], src
[0]));
457 ptn_kil(nir_builder
*b
, nir_ssa_def
**src
)
459 /* flt must be exact, because NaN shouldn't discard. (apps rely on this) */
461 nir_ssa_def
*cmp
= nir_bany(b
, nir_flt(b
, src
[0], nir_imm_float(b
, 0.0)));
464 nir_intrinsic_instr
*discard
=
465 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_discard_if
);
466 discard
->src
[0] = nir_src_for_ssa(cmp
);
467 nir_builder_instr_insert(b
, &discard
->instr
);
471 ptn_tex(struct ptn_compile
*c
, nir_alu_dest dest
, nir_ssa_def
**src
,
472 struct prog_instruction
*prog_inst
)
474 nir_builder
*b
= &c
->build
;
475 nir_tex_instr
*instr
;
479 switch (prog_inst
->Opcode
) {
501 fprintf(stderr
, "unknown tex op %d\n", prog_inst
->Opcode
);
508 if (prog_inst
->TexShadow
)
511 instr
= nir_tex_instr_create(b
->shader
, num_srcs
);
513 instr
->dest_type
= nir_type_float
;
514 instr
->is_shadow
= prog_inst
->TexShadow
;
516 switch (prog_inst
->TexSrcTarget
) {
517 case TEXTURE_1D_INDEX
:
518 instr
->sampler_dim
= GLSL_SAMPLER_DIM_1D
;
520 case TEXTURE_2D_INDEX
:
521 instr
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
523 case TEXTURE_3D_INDEX
:
524 instr
->sampler_dim
= GLSL_SAMPLER_DIM_3D
;
526 case TEXTURE_CUBE_INDEX
:
527 instr
->sampler_dim
= GLSL_SAMPLER_DIM_CUBE
;
529 case TEXTURE_RECT_INDEX
:
530 instr
->sampler_dim
= GLSL_SAMPLER_DIM_RECT
;
533 fprintf(stderr
, "Unknown texture target %d\n", prog_inst
->TexSrcTarget
);
537 instr
->coord_components
=
538 glsl_get_sampler_dim_coordinate_components(instr
->sampler_dim
);
540 nir_variable
*var
= c
->sampler_vars
[prog_inst
->TexSrcUnit
];
542 const struct glsl_type
*type
=
543 glsl_sampler_type(instr
->sampler_dim
, instr
->is_shadow
, false, GLSL_TYPE_FLOAT
);
544 char samplerName
[20];
545 snprintf(samplerName
, sizeof(samplerName
), "sampler_%d", prog_inst
->TexSrcUnit
);
546 var
= nir_variable_create(b
->shader
, nir_var_uniform
, type
, samplerName
);
547 var
->data
.binding
= prog_inst
->TexSrcUnit
;
548 var
->data
.explicit_binding
= true;
549 c
->sampler_vars
[prog_inst
->TexSrcUnit
] = var
;
552 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
554 unsigned src_number
= 0;
556 instr
->src
[src_number
].src
= nir_src_for_ssa(&deref
->dest
.ssa
);
557 instr
->src
[src_number
].src_type
= nir_tex_src_texture_deref
;
559 instr
->src
[src_number
].src
= nir_src_for_ssa(&deref
->dest
.ssa
);
560 instr
->src
[src_number
].src_type
= nir_tex_src_sampler_deref
;
563 instr
->src
[src_number
].src
=
564 nir_src_for_ssa(nir_swizzle(b
, src
[0], SWIZ(X
, Y
, Z
, W
),
565 instr
->coord_components
));
566 instr
->src
[src_number
].src_type
= nir_tex_src_coord
;
569 if (prog_inst
->Opcode
== OPCODE_TXP
) {
570 instr
->src
[src_number
].src
= nir_src_for_ssa(ptn_channel(b
, src
[0], W
));
571 instr
->src
[src_number
].src_type
= nir_tex_src_projector
;
575 if (prog_inst
->Opcode
== OPCODE_TXB
) {
576 instr
->src
[src_number
].src
= nir_src_for_ssa(ptn_channel(b
, src
[0], W
));
577 instr
->src
[src_number
].src_type
= nir_tex_src_bias
;
581 if (prog_inst
->Opcode
== OPCODE_TXL
) {
582 instr
->src
[src_number
].src
= nir_src_for_ssa(ptn_channel(b
, src
[0], W
));
583 instr
->src
[src_number
].src_type
= nir_tex_src_lod
;
587 if (instr
->is_shadow
) {
588 if (instr
->coord_components
< 3)
589 instr
->src
[src_number
].src
= nir_src_for_ssa(ptn_channel(b
, src
[0], Z
));
591 instr
->src
[src_number
].src
= nir_src_for_ssa(ptn_channel(b
, src
[0], W
));
593 instr
->src
[src_number
].src_type
= nir_tex_src_comparator
;
597 assert(src_number
== num_srcs
);
599 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
, 4, 32, NULL
);
600 nir_builder_instr_insert(b
, &instr
->instr
);
602 /* Resolve the writemask on the texture op. */
603 ptn_move_dest(b
, dest
, &instr
->dest
.ssa
);
606 static const nir_op op_trans
[MAX_OPCODE
] = {
608 [OPCODE_ABS
] = nir_op_fabs
,
609 [OPCODE_ADD
] = nir_op_fadd
,
613 [OPCODE_DDX
] = nir_op_fddx
,
614 [OPCODE_DDY
] = nir_op_fddy
,
623 [OPCODE_FLR
] = nir_op_ffloor
,
624 [OPCODE_FRC
] = nir_op_ffract
,
630 [OPCODE_MAX
] = nir_op_fmax
,
631 [OPCODE_MIN
] = nir_op_fmin
,
632 [OPCODE_MOV
] = nir_op_mov
,
633 [OPCODE_MUL
] = nir_op_fmul
,
642 [OPCODE_SSG
] = nir_op_fsign
,
643 [OPCODE_SUB
] = nir_op_fsub
,
646 [OPCODE_TRUNC
] = nir_op_ftrunc
,
655 ptn_emit_instruction(struct ptn_compile
*c
, struct prog_instruction
*prog_inst
)
657 nir_builder
*b
= &c
->build
;
659 const unsigned op
= prog_inst
->Opcode
;
661 if (op
== OPCODE_END
)
665 for (i
= 0; i
< 3; i
++) {
666 src
[i
] = ptn_get_src(c
, &prog_inst
->SrcReg
[i
]);
668 nir_alu_dest dest
= ptn_get_dest(c
, &prog_inst
->DstReg
);
674 ptn_move_dest(b
, dest
,
675 nir_frsq(b
, nir_fabs(b
, ptn_channel(b
, src
[0], X
))));
679 ptn_move_dest(b
, dest
, nir_frcp(b
, ptn_channel(b
, src
[0], X
)));
683 ptn_move_dest(b
, dest
, nir_fexp2(b
, ptn_channel(b
, src
[0], X
)));
687 ptn_move_dest(b
, dest
, nir_flog2(b
, ptn_channel(b
, src
[0], X
)));
691 ptn_move_dest(b
, dest
, nir_fpow(b
,
692 ptn_channel(b
, src
[0], X
),
693 ptn_channel(b
, src
[1], X
)));
697 ptn_move_dest(b
, dest
, nir_fcos(b
, ptn_channel(b
, src
[0], X
)));
701 ptn_move_dest(b
, dest
, nir_fsin(b
, ptn_channel(b
, src
[0], X
)));
705 ptn_arl(b
, dest
, src
);
709 ptn_exp(b
, dest
, src
);
713 ptn_log(b
, dest
, src
);
717 ptn_lrp(b
, dest
, src
);
721 ptn_move_dest(b
, dest
, nir_fadd(b
, nir_fmul(b
, src
[0], src
[1]), src
[2]));
725 ptn_dst(b
, dest
, src
);
729 ptn_lit(b
, dest
, src
);
733 ptn_xpd(b
, dest
, src
);
737 ptn_dp2(b
, dest
, src
);
741 ptn_dp3(b
, dest
, src
);
745 ptn_dp4(b
, dest
, src
);
749 ptn_dph(b
, dest
, src
);
757 ptn_cmp(b
, dest
, src
);
761 ptn_scs(b
, dest
, src
);
765 ptn_slt(b
, dest
, src
);
769 ptn_sge(b
, dest
, src
);
777 ptn_tex(c
, dest
, src
, prog_inst
);
781 /* Extended swizzles were already handled in ptn_get_src(). */
782 ptn_alu(b
, nir_op_mov
, dest
, src
);
789 if (op_trans
[op
] != 0) {
790 ptn_alu(b
, op_trans
[op
], dest
, src
);
792 fprintf(stderr
, "unknown opcode: %s\n", _mesa_opcode_string(op
));
798 if (prog_inst
->Saturate
) {
799 assert(prog_inst
->Saturate
);
800 assert(!dest
.dest
.is_ssa
);
801 ptn_move_dest(b
, dest
, nir_fsat(b
, ptn_src_for_dest(c
, &dest
)));
806 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
807 * variables at the end of the shader.
809 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
810 * written, because there's no output load intrinsic, which means we couldn't
814 ptn_add_output_stores(struct ptn_compile
*c
)
816 nir_builder
*b
= &c
->build
;
818 nir_foreach_shader_out_variable(var
, b
->shader
) {
819 nir_ssa_def
*src
= nir_load_reg(b
, c
->output_regs
[var
->data
.location
]);
820 if (c
->prog
->Target
== GL_FRAGMENT_PROGRAM_ARB
&&
821 var
->data
.location
== FRAG_RESULT_DEPTH
) {
822 /* result.depth has this strange convention of being the .z component of
823 * a vec4 with undefined .xyw components. We resolve it to a scalar, to
824 * match GLSL's gl_FragDepth and the expectations of most backends.
826 src
= nir_channel(b
, src
, 2);
828 if (c
->prog
->Target
== GL_VERTEX_PROGRAM_ARB
&&
829 (var
->data
.location
== VARYING_SLOT_FOGC
||
830 var
->data
.location
== VARYING_SLOT_PSIZ
)) {
831 /* result.{fogcoord,psiz} is a single component value */
832 src
= nir_channel(b
, src
, 0);
834 unsigned num_components
= glsl_get_vector_elements(var
->type
);
835 nir_store_var(b
, var
, src
, (1 << num_components
) - 1);
840 setup_registers_and_variables(struct ptn_compile
*c
)
842 nir_builder
*b
= &c
->build
;
843 struct nir_shader
*shader
= b
->shader
;
845 /* Create input variables. */
846 uint64_t inputs_read
= c
->prog
->info
.inputs_read
;
847 while (inputs_read
) {
848 const int i
= u_bit_scan64(&inputs_read
);
851 nir_variable_create(shader
, nir_var_shader_in
, glsl_vec4_type(),
852 ralloc_asprintf(shader
, "in_%d", i
));
853 var
->data
.location
= i
;
856 if (c
->prog
->Target
== GL_FRAGMENT_PROGRAM_ARB
) {
857 if (i
== VARYING_SLOT_FOGC
) {
858 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
859 * input variable a float, and create a local containing the
862 var
->type
= glsl_float_type();
864 nir_variable
*fullvar
=
865 nir_local_variable_create(b
->impl
, glsl_vec4_type(),
868 nir_store_var(b
, fullvar
,
869 nir_vec4(b
, nir_load_var(b
, var
),
870 nir_imm_float(b
, 0.0),
871 nir_imm_float(b
, 0.0),
872 nir_imm_float(b
, 1.0)),
875 /* We inserted the real input into the list so the driver has real
876 * inputs, but we set c->input_vars[i] to the temporary so we use
877 * the splatted value.
879 c
->input_vars
[i
] = fullvar
;
884 c
->input_vars
[i
] = var
;
887 /* Create system value variables */
888 uint64_t system_values_read
= c
->prog
->info
.system_values_read
;
889 while (system_values_read
) {
890 const int i
= u_bit_scan64(&system_values_read
);
893 nir_variable_create(shader
, nir_var_system_value
, glsl_vec4_type(),
894 ralloc_asprintf(shader
, "sv_%d", i
));
895 var
->data
.location
= i
;
898 c
->sysval_vars
[i
] = var
;
901 /* Create output registers and variables. */
902 int max_outputs
= util_last_bit(c
->prog
->info
.outputs_written
);
903 c
->output_regs
= rzalloc_array(c
, nir_register
*, max_outputs
);
905 uint64_t outputs_written
= c
->prog
->info
.outputs_written
;
906 while (outputs_written
) {
907 const int i
= u_bit_scan64(&outputs_written
);
909 /* Since we can't load from outputs in the IR, we make temporaries
910 * for the outputs and emit stores to the real outputs at the end of
913 nir_register
*reg
= nir_local_reg_create(b
->impl
);
914 reg
->num_components
= 4;
916 const struct glsl_type
*type
;
917 if ((c
->prog
->Target
== GL_FRAGMENT_PROGRAM_ARB
&& i
== FRAG_RESULT_DEPTH
) ||
918 (c
->prog
->Target
== GL_VERTEX_PROGRAM_ARB
&& i
== VARYING_SLOT_FOGC
) ||
919 (c
->prog
->Target
== GL_VERTEX_PROGRAM_ARB
&& i
== VARYING_SLOT_PSIZ
))
920 type
= glsl_float_type();
922 type
= glsl_vec4_type();
925 nir_variable_create(shader
, nir_var_shader_out
, type
,
926 ralloc_asprintf(shader
, "out_%d", i
));
927 var
->data
.location
= i
;
930 c
->output_regs
[i
] = reg
;
931 c
->output_vars
[i
] = var
;
934 /* Create temporary registers. */
935 c
->temp_regs
= rzalloc_array(c
, nir_register
*,
936 c
->prog
->arb
.NumTemporaries
);
939 for (unsigned i
= 0; i
< c
->prog
->arb
.NumTemporaries
; i
++) {
940 reg
= nir_local_reg_create(b
->impl
);
945 reg
->num_components
= 4;
946 c
->temp_regs
[i
] = reg
;
949 /* Create the address register (for ARB_vertex_program). */
950 reg
= nir_local_reg_create(b
->impl
);
955 reg
->num_components
= 1;
960 prog_to_nir(const struct gl_program
*prog
,
961 const nir_shader_compiler_options
*options
)
963 struct ptn_compile
*c
;
964 struct nir_shader
*s
;
965 gl_shader_stage stage
= _mesa_program_enum_to_shader_stage(prog
->Target
);
967 c
= rzalloc(NULL
, struct ptn_compile
);
972 nir_builder_init_simple_shader(&c
->build
, NULL
, stage
, options
);
974 /* Copy the shader_info from the gl_program */
975 c
->build
.shader
->info
= prog
->info
;
979 if (prog
->Parameters
->NumParameters
> 0) {
980 const struct glsl_type
*type
=
981 glsl_array_type(glsl_vec4_type(), prog
->Parameters
->NumParameters
, 0);
983 nir_variable_create(s
, nir_var_uniform
, type
,
984 prog
->Parameters
->Parameters
[0].Name
);
987 setup_registers_and_variables(c
);
988 if (unlikely(c
->error
))
991 for (unsigned int i
= 0; i
< prog
->arb
.NumInstructions
; i
++) {
992 ptn_emit_instruction(c
, &prog
->arb
.Instructions
[i
]);
994 if (unlikely(c
->error
))
998 ptn_add_output_stores(c
);
1000 s
->info
.name
= ralloc_asprintf(s
, "ARB%d", prog
->Id
);
1001 s
->info
.num_textures
= util_last_bit(prog
->SamplersUsed
);
1002 s
->info
.num_ubos
= 0;
1003 s
->info
.num_abos
= 0;
1004 s
->info
.num_ssbos
= 0;
1005 s
->info
.num_images
= 0;
1006 s
->info
.uses_texture_gather
= false;
1007 s
->info
.clip_distance_array_size
= 0;
1008 s
->info
.cull_distance_array_size
= 0;
1009 s
->info
.separate_shader
= false;