2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 /** @file brw_fs_fp.cpp
26 * Implementation of the compiler for GL_ARB_fragment_program shaders on top
27 * of the GLSL compiler backend.
30 #include "brw_context.h"
34 fs_visitor::emit_fp_alu1(enum opcode opcode
,
35 const struct prog_instruction
*fpi
,
36 fs_reg dst
, fs_reg src
)
38 for (int i
= 0; i
< 4; i
++) {
39 if (fpi
->DstReg
.WriteMask
& (1 << i
))
40 emit(opcode
, offset(dst
, i
), offset(src
, i
));
45 fs_visitor::emit_fp_alu2(enum opcode opcode
,
46 const struct prog_instruction
*fpi
,
47 fs_reg dst
, fs_reg src0
, fs_reg src1
)
49 for (int i
= 0; i
< 4; i
++) {
50 if (fpi
->DstReg
.WriteMask
& (1 << i
))
51 emit(opcode
, offset(dst
, i
),
52 offset(src0
, i
), offset(src1
, i
));
57 fs_visitor::emit_fp_minmax(const prog_instruction
*fpi
,
58 fs_reg dst
, fs_reg src0
, fs_reg src1
)
60 enum brw_conditional_mod conditionalmod
;
61 if (fpi
->Opcode
== OPCODE_MIN
)
62 conditionalmod
= BRW_CONDITIONAL_L
;
64 conditionalmod
= BRW_CONDITIONAL_GE
;
66 for (int i
= 0; i
< 4; i
++) {
67 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
68 emit_minmax(conditionalmod
, offset(dst
, i
),
69 offset(src0
, i
), offset(src1
, i
));
75 fs_visitor::emit_fp_sop(enum brw_conditional_mod conditional_mod
,
76 const struct prog_instruction
*fpi
,
77 fs_reg dst
, fs_reg src0
, fs_reg src1
,
80 for (int i
= 0; i
< 4; i
++) {
81 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
84 emit(CMP(reg_null_d
, offset(src0
, i
), offset(src1
, i
),
87 inst
= emit(BRW_OPCODE_SEL
, offset(dst
, i
), one
, fs_reg(0.0f
));
88 inst
->predicate
= BRW_PREDICATE_NORMAL
;
94 fs_visitor::emit_fp_scalar_write(const struct prog_instruction
*fpi
,
95 fs_reg dst
, fs_reg src
)
97 for (int i
= 0; i
< 4; i
++) {
98 if (fpi
->DstReg
.WriteMask
& (1 << i
))
99 emit(MOV(offset(dst
, i
), src
));
104 fs_visitor::emit_fp_scalar_math(enum opcode opcode
,
105 const struct prog_instruction
*fpi
,
106 fs_reg dst
, fs_reg src
)
108 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
109 emit_math(opcode
, temp
, src
);
110 emit_fp_scalar_write(fpi
, dst
, temp
);
114 fs_visitor::emit_fragment_program_code()
118 /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
128 fs_reg one
= fs_reg(this, glsl_type::float_type
);
129 emit(MOV(one
, fs_reg(1.0f
)));
131 for (unsigned int insn
= 0; insn
< prog
->NumInstructions
; insn
++) {
132 const struct prog_instruction
*fpi
= &prog
->Instructions
[insn
];
138 /* We always emit into a temporary destination register to avoid
141 dst
= fs_reg(this, glsl_type::vec4_type
);
143 for (int i
= 0; i
< 3; i
++)
144 src
[i
] = get_fp_src_reg(&fpi
->SrcReg
[i
]);
146 switch (fpi
->Opcode
) {
149 src
[0].negate
= false;
150 emit_fp_alu1(BRW_OPCODE_MOV
, fpi
, dst
, src
[0]);
154 emit_fp_alu2(BRW_OPCODE_ADD
, fpi
, dst
, src
[0], src
[1]);
158 for (int i
= 0; i
< 4; i
++) {
159 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
162 emit(CMP(reg_null_f
, offset(src
[0], i
), fs_reg(0.0f
),
165 inst
= emit(BRW_OPCODE_SEL
, offset(dst
, i
),
166 offset(src
[1], i
), offset(src
[2], i
));
167 inst
->predicate
= BRW_PREDICATE_NORMAL
;
173 emit_fp_scalar_math(SHADER_OPCODE_COS
, fpi
, dst
, src
[0]);
180 fs_reg mul
= fs_reg(this, glsl_type::float_type
);
181 fs_reg acc
= fs_reg(this, glsl_type::float_type
);
184 switch (fpi
->Opcode
) {
185 case OPCODE_DP2
: count
= 2; break;
186 case OPCODE_DP3
: count
= 3; break;
187 case OPCODE_DP4
: count
= 4; break;
188 case OPCODE_DPH
: count
= 3; break;
189 default: unreachable("not reached");
192 emit(MUL(acc
, offset(src
[0], 0), offset(src
[1], 0)));
193 for (int i
= 1; i
< count
; i
++) {
194 emit(MUL(mul
, offset(src
[0], i
), offset(src
[1], i
)));
195 emit(ADD(acc
, acc
, mul
));
198 if (fpi
->Opcode
== OPCODE_DPH
)
199 emit(ADD(acc
, acc
, offset(src
[1], 3)));
201 emit_fp_scalar_write(fpi
, dst
, acc
);
206 if (fpi
->DstReg
.WriteMask
& WRITEMASK_X
)
207 emit(MOV(dst
, fs_reg(1.0f
)));
208 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Y
) {
209 emit(MUL(offset(dst
, 1),
210 offset(src
[0], 1), offset(src
[1], 1)));
212 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Z
)
213 emit(MOV(offset(dst
, 2), offset(src
[0], 2)));
214 if (fpi
->DstReg
.WriteMask
& WRITEMASK_W
)
215 emit(MOV(offset(dst
, 3), offset(src
[1], 3)));
219 emit_fp_scalar_math(SHADER_OPCODE_EXP2
, fpi
, dst
, src
[0]);
223 emit_fp_alu1(BRW_OPCODE_RNDD
, fpi
, dst
, src
[0]);
227 emit_fp_alu1(BRW_OPCODE_FRC
, fpi
, dst
, src
[0]);
231 for (int i
= 0; i
< 4; i
++) {
232 /* In most cases the argument to a KIL will be something like
233 * TEMP[0].wwww, so there's no point in checking whether .w is < 0
237 GET_SWZ(fpi
->SrcReg
[0].Swizzle
, i
) ==
238 GET_SWZ(fpi
->SrcReg
[0].Swizzle
, i
- 1) &&
239 ((fpi
->SrcReg
[0].Negate
>> i
) & 1) ==
240 ((fpi
->SrcReg
[0].Negate
>> (i
- 1)) & 1)) {
245 /* Emit an instruction that's predicated on the current
246 * undiscarded pixels, and updates just those pixels to be
249 fs_inst
*cmp
= emit(CMP(reg_null_f
, offset(src
[0], i
),
250 fs_reg(0.0f
), BRW_CONDITIONAL_GE
));
251 cmp
->predicate
= BRW_PREDICATE_NORMAL
;
252 cmp
->flag_subreg
= 1;
258 emit_fp_scalar_math(SHADER_OPCODE_LOG2
, fpi
, dst
, src
[0]);
262 /* From the ARB_fragment_program spec:
264 * tmp = VectorLoad(op0);
265 * if (tmp.x < 0) tmp.x = 0;
266 * if (tmp.y < 0) tmp.y = 0;
267 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
268 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
271 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
274 * Note that we don't do the clamping to +/- 128. We didn't in
275 * brw_wm_emit.c either.
277 if (fpi
->DstReg
.WriteMask
& WRITEMASK_X
)
278 emit(MOV(offset(dst
, 0), fs_reg(1.0f
)));
280 if (fpi
->DstReg
.WriteMask
& WRITEMASK_YZ
) {
282 emit(CMP(reg_null_f
, offset(src
[0], 0), fs_reg(0.0f
),
283 BRW_CONDITIONAL_LE
));
285 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Y
) {
286 emit(MOV(offset(dst
, 1), offset(src
[0], 0)));
287 inst
= emit(MOV(offset(dst
, 1), fs_reg(0.0f
)));
288 inst
->predicate
= BRW_PREDICATE_NORMAL
;
291 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Z
) {
292 emit_math(SHADER_OPCODE_POW
, offset(dst
, 2),
293 offset(src
[0], 1), offset(src
[0], 3));
295 inst
= emit(MOV(offset(dst
, 2), fs_reg(0.0f
)));
296 inst
->predicate
= BRW_PREDICATE_NORMAL
;
300 if (fpi
->DstReg
.WriteMask
& WRITEMASK_W
)
301 emit(MOV(offset(dst
, 3), fs_reg(1.0f
)));
306 for (int i
= 0; i
< 4; i
++) {
307 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
308 fs_reg a
= offset(src
[0], i
);
309 fs_reg y
= offset(src
[1], i
);
310 fs_reg x
= offset(src
[2], i
);
311 emit_lrp(offset(dst
, i
), x
, y
, a
);
317 for (int i
= 0; i
< 4; i
++) {
318 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
319 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
320 emit(MUL(temp
, offset(src
[0], i
), offset(src
[1], i
)));
321 emit(ADD(offset(dst
, i
), temp
, offset(src
[2], i
)));
327 emit_fp_minmax(fpi
, dst
, src
[0], src
[1]);
331 emit_fp_alu1(BRW_OPCODE_MOV
, fpi
, dst
, src
[0]);
335 emit_fp_minmax(fpi
, dst
, src
[0], src
[1]);
339 emit_fp_alu2(BRW_OPCODE_MUL
, fpi
, dst
, src
[0], src
[1]);
343 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
344 emit_math(SHADER_OPCODE_POW
, temp
, src
[0], src
[1]);
345 emit_fp_scalar_write(fpi
, dst
, temp
);
350 emit_fp_scalar_math(SHADER_OPCODE_RCP
, fpi
, dst
, src
[0]);
354 emit_fp_scalar_math(SHADER_OPCODE_RSQ
, fpi
, dst
, src
[0]);
358 if (fpi
->DstReg
.WriteMask
& WRITEMASK_X
) {
359 emit_math(SHADER_OPCODE_COS
, offset(dst
, 0),
363 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Y
) {
364 emit_math(SHADER_OPCODE_SIN
, offset(dst
, 1),
370 emit_fp_sop(BRW_CONDITIONAL_GE
, fpi
, dst
, src
[0], src
[1], one
);
374 emit_fp_scalar_math(SHADER_OPCODE_SIN
, fpi
, dst
, src
[0]);
378 emit_fp_sop(BRW_CONDITIONAL_L
, fpi
, dst
, src
[0], src
[1], one
);
382 fs_reg neg_src1
= src
[1];
383 neg_src1
.negate
= !src
[1].negate
;
385 emit_fp_alu2(BRW_OPCODE_ADD
, fpi
, dst
, src
[0], neg_src1
);
392 ir_texture_opcode op
;
395 fs_reg coordinate
= src
[0];
398 fs_reg texel_offset
; /* No offsets; leave as BAD_FILE. */
400 switch (fpi
->Opcode
) {
407 coordinate
= fs_reg(this, glsl_type::vec3_type
);
408 fs_reg invproj
= fs_reg(this, glsl_type::float_type
);
409 emit_math(SHADER_OPCODE_RCP
, invproj
, offset(src
[0], 3));
410 for (int i
= 0; i
< 3; i
++) {
411 emit(MUL(offset(coordinate
, i
),
412 offset(src
[0], i
), invproj
));
418 lod
= offset(src
[0], 3);
421 unreachable("not reached");
424 int coord_components
;
425 switch (fpi
->TexSrcTarget
) {
426 case TEXTURE_1D_INDEX
:
427 coord_components
= 1;
430 case TEXTURE_2D_INDEX
:
431 case TEXTURE_1D_ARRAY_INDEX
:
432 case TEXTURE_RECT_INDEX
:
433 case TEXTURE_EXTERNAL_INDEX
:
434 coord_components
= 2;
437 case TEXTURE_3D_INDEX
:
438 case TEXTURE_2D_ARRAY_INDEX
:
439 coord_components
= 3;
442 case TEXTURE_CUBE_INDEX
: {
443 coord_components
= 4;
445 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
446 fs_reg cubecoord
= fs_reg(this, glsl_type::vec3_type
);
447 fs_reg abscoord
= coordinate
;
448 abscoord
.negate
= false;
450 emit_minmax(BRW_CONDITIONAL_GE
, temp
,
451 offset(abscoord
, 0), offset(abscoord
, 1));
452 emit_minmax(BRW_CONDITIONAL_GE
, temp
,
453 temp
, offset(abscoord
, 2));
454 emit_math(SHADER_OPCODE_RCP
, temp
, temp
);
455 for (int i
= 0; i
< 3; i
++) {
456 emit(MUL(offset(cubecoord
, i
),
457 offset(coordinate
, i
), temp
));
460 coordinate
= cubecoord
;
465 unreachable("not reached");
469 shadow_c
= offset(coordinate
, 2);
471 emit_texture(op
, glsl_type::vec4_type
, coordinate
, coord_components
,
472 shadow_c
, lod
, dpdy
, 0, sample_index
,
473 reg_undef
, 0, /* offset, components */
475 0, /* gather component */
476 false, /* is cube array */
477 fpi
->TexSrcTarget
== TEXTURE_RECT_INDEX
,
478 fpi
->TexSrcUnit
, fs_reg(fpi
->TexSrcUnit
),
486 /* Note that SWZ's extended swizzles are handled in the general
487 * get_src_reg() code.
489 emit_fp_alu1(BRW_OPCODE_MOV
, fpi
, dst
, src
[0]);
493 for (int i
= 0; i
< 3; i
++) {
494 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
495 int i1
= (i
+ 1) % 3;
496 int i2
= (i
+ 2) % 3;
498 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
499 fs_reg neg_src1_1
= offset(src
[1], i1
);
500 neg_src1_1
.negate
= !neg_src1_1
.negate
;
501 emit(MUL(temp
, offset(src
[0], i2
), neg_src1_1
));
502 emit(MUL(offset(dst
, i
),
503 offset(src
[0], i1
), offset(src
[1], i2
)));
504 emit(ADD(offset(dst
, i
), offset(dst
, i
), temp
));
513 _mesa_problem(ctx
, "Unsupported opcode %s in fragment program\n",
514 _mesa_opcode_string(fpi
->Opcode
));
517 /* To handle saturates, we emit a MOV with a saturate bit, which
518 * optimization should fold into the preceding instructions when safe.
520 if (fpi
->Opcode
!= OPCODE_END
) {
521 fs_reg real_dst
= get_fp_dst_reg(&fpi
->DstReg
);
523 for (int i
= 0; i
< 4; i
++) {
524 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
525 fs_inst
*inst
= emit(MOV(offset(real_dst
, i
),
527 inst
->saturate
= fpi
->SaturateMode
;
535 * Fragment depth has this strange convention of being the .z component of
536 * a vec4. emit_fb_write() wants to see a float value, instead.
538 this->current_annotation
= "result.depth write";
539 if (frag_depth
.file
!= BAD_FILE
) {
540 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
541 emit(MOV(temp
, offset(frag_depth
, 2)));
547 fs_visitor::setup_fp_regs()
549 /* PROGRAM_TEMPORARY */
550 int num_temp
= prog
->NumTemporaries
;
551 fp_temp_regs
= rzalloc_array(mem_ctx
, fs_reg
, num_temp
);
552 for (int i
= 0; i
< num_temp
; i
++)
553 fp_temp_regs
[i
] = fs_reg(this, glsl_type::vec4_type
);
555 /* PROGRAM_STATE_VAR etc. */
556 if (dispatch_width
== 8) {
558 p
< prog
->Parameters
->NumParameters
; p
++) {
559 for (unsigned int i
= 0; i
< 4; i
++) {
560 stage_prog_data
->param
[uniforms
++] =
561 &prog
->Parameters
->ParameterValues
[p
][i
];
566 fp_input_regs
= rzalloc_array(mem_ctx
, fs_reg
, VARYING_SLOT_MAX
);
567 for (int i
= 0; i
< VARYING_SLOT_MAX
; i
++) {
568 if (prog
->InputsRead
& BITFIELD64_BIT(i
)) {
569 this->current_annotation
= ralloc_asprintf(ctx
, "interpolate input %d",
573 case VARYING_SLOT_POS
:
575 assert(stage
== MESA_SHADER_FRAGMENT
);
576 gl_fragment_program
*fp
= (gl_fragment_program
*) prog
;
578 *emit_fragcoord_interpolation(fp
->PixelCenterInteger
,
579 fp
->OriginUpperLeft
);
582 case VARYING_SLOT_FACE
:
583 fp_input_regs
[i
] = *emit_frontfacing_interpolation();
586 fp_input_regs
[i
] = fs_reg(this, glsl_type::vec4_type
);
587 emit_general_interpolation(fp_input_regs
[i
], "fp_input",
588 glsl_type::vec4_type
,
589 INTERP_QUALIFIER_NONE
,
592 if (i
== VARYING_SLOT_FOGC
) {
593 emit(MOV(offset(fp_input_regs
[i
], 1), fs_reg(0.0f
)));
594 emit(MOV(offset(fp_input_regs
[i
], 2), fs_reg(0.0f
)));
595 emit(MOV(offset(fp_input_regs
[i
], 3), fs_reg(1.0f
)));
601 this->current_annotation
= NULL
;
607 fs_visitor::get_fp_dst_reg(const prog_dst_register
*dst
)
609 assert(stage
== MESA_SHADER_FRAGMENT
);
610 brw_wm_prog_key
*key
= (brw_wm_prog_key
*) this->key
;
613 case PROGRAM_TEMPORARY
:
614 return fp_temp_regs
[dst
->Index
];
617 if (dst
->Index
== FRAG_RESULT_DEPTH
) {
618 if (frag_depth
.file
== BAD_FILE
)
619 frag_depth
= fs_reg(this, glsl_type::vec4_type
);
621 } else if (dst
->Index
== FRAG_RESULT_COLOR
) {
622 if (outputs
[0].file
== BAD_FILE
) {
623 outputs
[0] = fs_reg(this, glsl_type::vec4_type
);
624 output_components
[0] = 4;
626 /* Tell emit_fb_writes() to smear fragment.color across all the
629 for (int i
= 1; i
< key
->nr_color_regions
; i
++) {
630 outputs
[i
] = outputs
[0];
631 output_components
[i
] = output_components
[0];
636 int output_index
= dst
->Index
- FRAG_RESULT_DATA0
;
637 if (outputs
[output_index
].file
== BAD_FILE
) {
638 outputs
[output_index
] = fs_reg(this, glsl_type::vec4_type
);
640 output_components
[output_index
] = 4;
641 return outputs
[output_index
];
644 case PROGRAM_UNDEFINED
:
648 _mesa_problem(ctx
, "bad dst register file: %s\n",
649 _mesa_register_file_name((gl_register_file
)dst
->File
));
650 return fs_reg(this, glsl_type::vec4_type
);
655 fs_visitor::get_fp_src_reg(const prog_src_register
*src
)
657 struct gl_program_parameter_list
*plist
= prog
->Parameters
;
664 case PROGRAM_UNDEFINED
:
666 case PROGRAM_TEMPORARY
:
667 result
= fp_temp_regs
[src
->Index
];
671 result
= fp_input_regs
[src
->Index
];
674 case PROGRAM_STATE_VAR
:
675 case PROGRAM_UNIFORM
:
676 case PROGRAM_CONSTANT
:
677 /* We actually want to look at the type in the Parameters list for this,
678 * because this lets us upload constant builtin uniforms, as actual
681 switch (plist
->Parameters
[src
->Index
].Type
) {
682 case PROGRAM_CONSTANT
: {
683 result
= fs_reg(this, glsl_type::vec4_type
);
685 for (int i
= 0; i
< 4; i
++) {
686 emit(MOV(offset(result
, i
),
687 fs_reg(plist
->ParameterValues
[src
->Index
][i
].f
)));
692 case PROGRAM_STATE_VAR
:
693 case PROGRAM_UNIFORM
:
694 result
= fs_reg(UNIFORM
, src
->Index
* 4);
698 _mesa_problem(ctx
, "bad uniform src register file: %s\n",
699 _mesa_register_file_name((gl_register_file
)src
->File
));
700 return fs_reg(this, glsl_type::vec4_type
);
705 _mesa_problem(ctx
, "bad src register file: %s\n",
706 _mesa_register_file_name((gl_register_file
)src
->File
));
707 return fs_reg(this, glsl_type::vec4_type
);
710 if (src
->Swizzle
!= SWIZZLE_NOOP
|| src
->Negate
) {
711 fs_reg unswizzled
= result
;
712 result
= fs_reg(this, glsl_type::vec4_type
);
713 for (int i
= 0; i
< 4; i
++) {
714 bool negate
= src
->Negate
& (1 << i
);
715 /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
716 * but it costs us nothing to support it.
718 int src_swiz
= GET_SWZ(src
->Swizzle
, i
);
719 if (src_swiz
== SWIZZLE_ZERO
) {
720 emit(MOV(offset(result
, i
), fs_reg(0.0f
)));
721 } else if (src_swiz
== SWIZZLE_ONE
) {
722 emit(MOV(offset(result
, i
),
723 negate
? fs_reg(-1.0f
) : fs_reg(1.0f
)));
725 fs_reg src
= offset(unswizzled
, src_swiz
);
727 src
.negate
= !src
.negate
;
728 emit(MOV(offset(result
, i
), src
));