2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 /** @file brw_fs_fp.cpp
26 * Implementation of the compiler for GL_ARB_fragment_program shaders on top
27 * of the GLSL compiler backend.
30 #include "brw_context.h"
34 regoffset(fs_reg reg
, int i
)
41 fs_visitor::emit_fp_alu1(enum opcode opcode
,
42 const struct prog_instruction
*fpi
,
43 fs_reg dst
, fs_reg src
)
45 for (int i
= 0; i
< 4; i
++) {
46 if (fpi
->DstReg
.WriteMask
& (1 << i
))
47 emit(opcode
, regoffset(dst
, i
), regoffset(src
, i
));
52 fs_visitor::emit_fp_alu2(enum opcode opcode
,
53 const struct prog_instruction
*fpi
,
54 fs_reg dst
, fs_reg src0
, fs_reg src1
)
56 for (int i
= 0; i
< 4; i
++) {
57 if (fpi
->DstReg
.WriteMask
& (1 << i
))
58 emit(opcode
, regoffset(dst
, i
),
59 regoffset(src0
, i
), regoffset(src1
, i
));
64 fs_visitor::emit_fp_minmax(const prog_instruction
*fpi
,
65 fs_reg dst
, fs_reg src0
, fs_reg src1
)
67 uint32_t conditionalmod
;
68 if (fpi
->Opcode
== OPCODE_MIN
)
69 conditionalmod
= BRW_CONDITIONAL_L
;
71 conditionalmod
= BRW_CONDITIONAL_GE
;
73 for (int i
= 0; i
< 4; i
++) {
74 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
75 emit_minmax(conditionalmod
, regoffset(dst
, i
),
76 regoffset(src0
, i
), regoffset(src1
, i
));
82 fs_visitor::emit_fp_sop(uint32_t conditional_mod
,
83 const struct prog_instruction
*fpi
,
84 fs_reg dst
, fs_reg src0
, fs_reg src1
,
87 for (int i
= 0; i
< 4; i
++) {
88 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
91 emit(CMP(reg_null_d
, regoffset(src0
, i
), regoffset(src1
, i
),
94 inst
= emit(BRW_OPCODE_SEL
, regoffset(dst
, i
), one
, fs_reg(0.0f
));
95 inst
->predicate
= BRW_PREDICATE_NORMAL
;
101 fs_visitor::emit_fp_scalar_write(const struct prog_instruction
*fpi
,
102 fs_reg dst
, fs_reg src
)
104 for (int i
= 0; i
< 4; i
++) {
105 if (fpi
->DstReg
.WriteMask
& (1 << i
))
106 emit(MOV(regoffset(dst
, i
), src
));
111 fs_visitor::emit_fp_scalar_math(enum opcode opcode
,
112 const struct prog_instruction
*fpi
,
113 fs_reg dst
, fs_reg src
)
115 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
116 emit_math(opcode
, temp
, src
);
117 emit_fp_scalar_write(fpi
, dst
, temp
);
121 fs_visitor::emit_fragment_program_code()
125 fs_reg null
= fs_reg(brw_null_reg());
127 /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
137 fs_reg one
= fs_reg(this, glsl_type::float_type
);
138 emit(MOV(one
, fs_reg(1.0f
)));
140 for (unsigned int insn
= 0; insn
< fp
->Base
.NumInstructions
; insn
++) {
141 const struct prog_instruction
*fpi
= &fp
->Base
.Instructions
[insn
];
144 //_mesa_print_instruction(fpi);
149 /* We always emit into a temporary destination register to avoid
152 dst
= fs_reg(this, glsl_type::vec4_type
);
154 for (int i
= 0; i
< 3; i
++)
155 src
[i
] = get_fp_src_reg(&fpi
->SrcReg
[i
]);
157 switch (fpi
->Opcode
) {
160 src
[0].negate
= false;
161 emit_fp_alu1(BRW_OPCODE_MOV
, fpi
, dst
, src
[0]);
165 emit_fp_alu2(BRW_OPCODE_ADD
, fpi
, dst
, src
[0], src
[1]);
169 for (int i
= 0; i
< 4; i
++) {
170 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
173 emit(CMP(null
, regoffset(src
[0], i
), fs_reg(0.0f
),
176 inst
= emit(BRW_OPCODE_SEL
, regoffset(dst
, i
),
177 regoffset(src
[1], i
), regoffset(src
[2], i
));
178 inst
->predicate
= BRW_PREDICATE_NORMAL
;
184 emit_fp_scalar_math(SHADER_OPCODE_COS
, fpi
, dst
, src
[0]);
191 fs_reg mul
= fs_reg(this, glsl_type::float_type
);
192 fs_reg acc
= fs_reg(this, glsl_type::float_type
);
195 switch (fpi
->Opcode
) {
196 case OPCODE_DP2
: count
= 2; break;
197 case OPCODE_DP3
: count
= 3; break;
198 case OPCODE_DP4
: count
= 4; break;
199 case OPCODE_DPH
: count
= 3; break;
200 default: assert(!"not reached"); count
= 0; break;
203 emit(MUL(acc
, regoffset(src
[0], 0), regoffset(src
[1], 0)));
204 for (int i
= 1; i
< count
; i
++) {
205 emit(MUL(mul
, regoffset(src
[0], i
), regoffset(src
[1], i
)));
206 emit(ADD(acc
, acc
, mul
));
209 if (fpi
->Opcode
== OPCODE_DPH
)
210 emit(ADD(acc
, acc
, regoffset(src
[1], 3)));
212 emit_fp_scalar_write(fpi
, dst
, acc
);
217 if (fpi
->DstReg
.WriteMask
& WRITEMASK_X
)
218 emit(MOV(dst
, fs_reg(1.0f
)));
219 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Y
) {
220 emit(MUL(regoffset(dst
, 1),
221 regoffset(src
[0], 1), regoffset(src
[1], 1)));
223 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Z
)
224 emit(MOV(regoffset(dst
, 2), regoffset(src
[0], 2)));
225 if (fpi
->DstReg
.WriteMask
& WRITEMASK_W
)
226 emit(MOV(regoffset(dst
, 3), regoffset(src
[1], 3)));
230 emit_fp_scalar_math(SHADER_OPCODE_EXP2
, fpi
, dst
, src
[0]);
234 emit_fp_alu1(BRW_OPCODE_RNDD
, fpi
, dst
, src
[0]);
238 emit_fp_alu1(BRW_OPCODE_FRC
, fpi
, dst
, src
[0]);
242 for (int i
= 0; i
< 4; i
++) {
243 /* In most cases the argument to a KIL will be something like
244 * TEMP[0].wwww, so there's no point in checking whether .w is < 0
248 GET_SWZ(fpi
->SrcReg
[0].Swizzle
, i
) ==
249 GET_SWZ(fpi
->SrcReg
[0].Swizzle
, i
- 1) &&
250 ((fpi
->SrcReg
[0].Negate
>> i
) & 1) ==
251 ((fpi
->SrcReg
[0].Negate
>> (i
- 1)) & 1)) {
256 /* Emit an instruction that's predicated on the current
257 * undiscarded pixels, and updates just those pixels to be
260 fs_inst
*cmp
= emit(CMP(null
, regoffset(src
[0], i
), fs_reg(0.0f
),
261 BRW_CONDITIONAL_GE
));
262 cmp
->predicate
= BRW_PREDICATE_NORMAL
;
263 cmp
->flag_subreg
= 1;
269 emit_fp_scalar_math(SHADER_OPCODE_LOG2
, fpi
, dst
, src
[0]);
273 /* From the ARB_fragment_program spec:
275 * tmp = VectorLoad(op0);
276 * if (tmp.x < 0) tmp.x = 0;
277 * if (tmp.y < 0) tmp.y = 0;
278 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
279 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
282 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
285 * Note that we don't do the clamping to +/- 128. We didn't in
286 * brw_wm_emit.c either.
288 if (fpi
->DstReg
.WriteMask
& WRITEMASK_X
)
289 emit(MOV(regoffset(dst
, 0), fs_reg(1.0f
)));
291 if (fpi
->DstReg
.WriteMask
& WRITEMASK_YZ
) {
293 emit(CMP(null
, regoffset(src
[0], 0), fs_reg(0.0f
),
294 BRW_CONDITIONAL_LE
));
296 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Y
) {
297 emit(MOV(regoffset(dst
, 1), regoffset(src
[0], 0)));
298 inst
= emit(MOV(regoffset(dst
, 1), fs_reg(0.0f
)));
299 inst
->predicate
= BRW_PREDICATE_NORMAL
;
302 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Z
) {
303 emit_math(SHADER_OPCODE_POW
, regoffset(dst
, 2),
304 regoffset(src
[0], 1), regoffset(src
[0], 3));
306 inst
= emit(MOV(regoffset(dst
, 2), fs_reg(0.0f
)));
307 inst
->predicate
= BRW_PREDICATE_NORMAL
;
311 if (fpi
->DstReg
.WriteMask
& WRITEMASK_W
)
312 emit(MOV(regoffset(dst
, 3), fs_reg(1.0f
)));
317 for (int i
= 0; i
< 4; i
++) {
318 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
319 fs_reg a
= regoffset(src
[0], i
);
320 fs_reg y
= regoffset(src
[1], i
);
321 fs_reg x
= regoffset(src
[2], i
);
322 emit_lrp(regoffset(dst
, i
), x
, y
, a
);
328 for (int i
= 0; i
< 4; i
++) {
329 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
330 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
331 emit(MUL(temp
, regoffset(src
[0], i
), regoffset(src
[1], i
)));
332 emit(ADD(regoffset(dst
, i
), temp
, regoffset(src
[2], i
)));
338 emit_fp_minmax(fpi
, dst
, src
[0], src
[1]);
342 emit_fp_alu1(BRW_OPCODE_MOV
, fpi
, dst
, src
[0]);
346 emit_fp_minmax(fpi
, dst
, src
[0], src
[1]);
350 emit_fp_alu2(BRW_OPCODE_MUL
, fpi
, dst
, src
[0], src
[1]);
354 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
355 emit_math(SHADER_OPCODE_POW
, temp
, src
[0], src
[1]);
356 emit_fp_scalar_write(fpi
, dst
, temp
);
361 emit_fp_scalar_math(SHADER_OPCODE_RCP
, fpi
, dst
, src
[0]);
365 emit_fp_scalar_math(SHADER_OPCODE_RSQ
, fpi
, dst
, src
[0]);
369 if (fpi
->DstReg
.WriteMask
& WRITEMASK_X
) {
370 emit_math(SHADER_OPCODE_COS
, regoffset(dst
, 0),
371 regoffset(src
[0], 0));
374 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Y
) {
375 emit_math(SHADER_OPCODE_SIN
, regoffset(dst
, 1),
376 regoffset(src
[0], 1));
381 emit_fp_sop(BRW_CONDITIONAL_GE
, fpi
, dst
, src
[0], src
[1], one
);
385 emit_fp_scalar_math(SHADER_OPCODE_SIN
, fpi
, dst
, src
[0]);
389 emit_fp_sop(BRW_CONDITIONAL_L
, fpi
, dst
, src
[0], src
[1], one
);
393 fs_reg neg_src1
= src
[1];
394 neg_src1
.negate
= !src
[1].negate
;
396 emit_fp_alu2(BRW_OPCODE_ADD
, fpi
, dst
, src
[0], neg_src1
);
403 /* We piggy-back on the GLSL IR support for texture setup. To do so,
404 * we have to cook up an ir_texture that has the coordinate field
405 * with appropriate type, and shadow_comparitor set or not. All the
406 * other properties of ir_texture are passed in as arguments to the
407 * emit_texture_gen* function.
409 ir_texture
*ir
= NULL
;
413 fs_reg coordinate
= src
[0];
417 switch (fpi
->Opcode
) {
419 ir
= new(mem_ctx
) ir_texture(ir_tex
);
422 ir
= new(mem_ctx
) ir_texture(ir_tex
);
424 coordinate
= fs_reg(this, glsl_type::vec3_type
);
425 fs_reg invproj
= fs_reg(this, glsl_type::float_type
);
426 emit_math(SHADER_OPCODE_RCP
, invproj
, regoffset(src
[0], 3));
427 for (int i
= 0; i
< 3; i
++) {
428 emit(MUL(regoffset(coordinate
, i
),
429 regoffset(src
[0], i
), invproj
));
434 ir
= new(mem_ctx
) ir_texture(ir_txb
);
435 lod
= regoffset(src
[0], 3);
438 assert(!"not reached");
442 ir
->type
= glsl_type::vec4_type
;
444 const glsl_type
*coordinate_type
;
445 switch (fpi
->TexSrcTarget
) {
446 case TEXTURE_1D_INDEX
:
447 coordinate_type
= glsl_type::float_type
;
450 case TEXTURE_2D_INDEX
:
451 case TEXTURE_1D_ARRAY_INDEX
:
452 case TEXTURE_RECT_INDEX
:
453 case TEXTURE_EXTERNAL_INDEX
:
454 coordinate_type
= glsl_type::vec2_type
;
457 case TEXTURE_3D_INDEX
:
458 case TEXTURE_2D_ARRAY_INDEX
:
459 coordinate_type
= glsl_type::vec3_type
;
462 case TEXTURE_CUBE_INDEX
: {
463 coordinate_type
= glsl_type::vec3_type
;
465 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
466 fs_reg cubecoord
= fs_reg(this, glsl_type::vec3_type
);
467 fs_reg abscoord
= coordinate
;
468 abscoord
.negate
= false;
470 emit_minmax(BRW_CONDITIONAL_GE
, temp
,
471 regoffset(abscoord
, 0), regoffset(abscoord
, 1));
472 emit_minmax(BRW_CONDITIONAL_GE
, temp
,
473 temp
, regoffset(abscoord
, 2));
474 emit_math(SHADER_OPCODE_RCP
, temp
, temp
);
475 for (int i
= 0; i
< 3; i
++) {
476 emit(MUL(regoffset(cubecoord
, i
),
477 regoffset(coordinate
, i
), temp
));
480 coordinate
= cubecoord
;
485 assert(!"not reached");
486 coordinate_type
= glsl_type::vec2_type
;
490 ir_constant_data junk_data
;
491 ir
->coordinate
= new(mem_ctx
) ir_constant(coordinate_type
, &junk_data
);
493 coordinate
= rescale_texcoord(ir
, coordinate
,
494 fpi
->TexSrcTarget
== TEXTURE_RECT_INDEX
,
495 fpi
->TexSrcUnit
, fpi
->TexSrcUnit
);
497 if (fpi
->TexShadow
) {
498 shadow_c
= regoffset(coordinate
, 2);
499 ir
->shadow_comparitor
= new(mem_ctx
) ir_constant(0.0f
);
504 inst
= emit_texture_gen7(ir
, dst
, coordinate
, shadow_c
, lod
, dpdy
, sample_index
);
505 } else if (brw
->gen
>= 5) {
506 inst
= emit_texture_gen5(ir
, dst
, coordinate
, shadow_c
, lod
, dpdy
, sample_index
);
508 inst
= emit_texture_gen4(ir
, dst
, coordinate
, shadow_c
, lod
, dpdy
);
511 inst
->sampler
= fpi
->TexSrcUnit
;
512 inst
->shadow_compare
= fpi
->TexShadow
;
514 /* Reuse the GLSL swizzle_result() handler. */
515 swizzle_result(ir
, dst
, fpi
->TexSrcUnit
);
522 /* Note that SWZ's extended swizzles are handled in the general
523 * get_src_reg() code.
525 emit_fp_alu1(BRW_OPCODE_MOV
, fpi
, dst
, src
[0]);
529 for (int i
= 0; i
< 3; i
++) {
530 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
531 int i1
= (i
+ 1) % 3;
532 int i2
= (i
+ 2) % 3;
534 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
535 fs_reg neg_src1_1
= regoffset(src
[1], i1
);
536 neg_src1_1
.negate
= !neg_src1_1
.negate
;
537 emit(MUL(temp
, regoffset(src
[0], i2
), neg_src1_1
));
538 emit(MUL(regoffset(dst
, i
),
539 regoffset(src
[0], i1
), regoffset(src
[1], i2
)));
540 emit(ADD(regoffset(dst
, i
), regoffset(dst
, i
), temp
));
549 _mesa_problem(ctx
, "Unsupported opcode %s in fragment program\n",
550 _mesa_opcode_string(fpi
->Opcode
));
553 /* To handle saturates, we emit a MOV with a saturate bit, which
554 * optimization should fold into the preceding instructions when safe.
556 if (fpi
->Opcode
!= OPCODE_END
) {
557 fs_reg real_dst
= get_fp_dst_reg(&fpi
->DstReg
);
559 for (int i
= 0; i
< 4; i
++) {
560 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
561 fs_inst
*inst
= emit(MOV(regoffset(real_dst
, i
),
563 inst
->saturate
= fpi
->SaturateMode
;
571 * Fragment depth has this strange convention of being the .z component of
572 * a vec4. emit_fb_write() wants to see a float value, instead.
574 this->current_annotation
= "result.depth write";
575 if (frag_depth
.file
!= BAD_FILE
) {
576 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
577 emit(MOV(temp
, regoffset(frag_depth
, 2)));
583 fs_visitor::setup_fp_regs()
585 /* PROGRAM_TEMPORARY */
586 int num_temp
= fp
->Base
.NumTemporaries
;
587 fp_temp_regs
= rzalloc_array(mem_ctx
, fs_reg
, num_temp
);
588 for (int i
= 0; i
< num_temp
; i
++)
589 fp_temp_regs
[i
] = fs_reg(this, glsl_type::vec4_type
);
591 /* PROGRAM_STATE_VAR etc. */
592 if (dispatch_width
== 8) {
594 p
< fp
->Base
.Parameters
->NumParameters
; p
++) {
595 for (unsigned int i
= 0; i
< 4; i
++) {
596 c
->prog_data
.param
[c
->prog_data
.nr_params
++] =
597 &fp
->Base
.Parameters
->ParameterValues
[p
][i
].f
;
602 fp_input_regs
= rzalloc_array(mem_ctx
, fs_reg
, VARYING_SLOT_MAX
);
603 for (int i
= 0; i
< VARYING_SLOT_MAX
; i
++) {
604 if (fp
->Base
.InputsRead
& BITFIELD64_BIT(i
)) {
605 /* Make up a dummy instruction to reuse code for emitting
608 ir_variable
*ir
= new(mem_ctx
) ir_variable(glsl_type::vec4_type
,
613 this->current_annotation
= ralloc_asprintf(ctx
, "interpolate input %d",
617 case VARYING_SLOT_POS
:
618 ir
->pixel_center_integer
= fp
->PixelCenterInteger
;
619 ir
->origin_upper_left
= fp
->OriginUpperLeft
;
620 fp_input_regs
[i
] = *emit_fragcoord_interpolation(ir
);
622 case VARYING_SLOT_FACE
:
623 fp_input_regs
[i
] = *emit_frontfacing_interpolation(ir
);
626 fp_input_regs
[i
] = *emit_general_interpolation(ir
);
628 if (i
== VARYING_SLOT_FOGC
) {
629 emit(MOV(regoffset(fp_input_regs
[i
], 1), fs_reg(0.0f
)));
630 emit(MOV(regoffset(fp_input_regs
[i
], 2), fs_reg(0.0f
)));
631 emit(MOV(regoffset(fp_input_regs
[i
], 3), fs_reg(1.0f
)));
637 this->current_annotation
= NULL
;
643 fs_visitor::get_fp_dst_reg(const prog_dst_register
*dst
)
646 case PROGRAM_TEMPORARY
:
647 return fp_temp_regs
[dst
->Index
];
650 if (dst
->Index
== FRAG_RESULT_DEPTH
) {
651 if (frag_depth
.file
== BAD_FILE
)
652 frag_depth
= fs_reg(this, glsl_type::vec4_type
);
654 } else if (dst
->Index
== FRAG_RESULT_COLOR
) {
655 if (outputs
[0].file
== BAD_FILE
) {
656 outputs
[0] = fs_reg(this, glsl_type::vec4_type
);
657 output_components
[0] = 4;
659 /* Tell emit_fb_writes() to smear fragment.color across all the
662 for (int i
= 1; i
< c
->key
.nr_color_regions
; i
++) {
663 outputs
[i
] = outputs
[0];
664 output_components
[i
] = output_components
[0];
669 int output_index
= dst
->Index
- FRAG_RESULT_DATA0
;
670 if (outputs
[output_index
].file
== BAD_FILE
) {
671 outputs
[output_index
] = fs_reg(this, glsl_type::vec4_type
);
673 output_components
[output_index
] = 4;
674 return outputs
[output_index
];
677 case PROGRAM_UNDEFINED
:
681 _mesa_problem(ctx
, "bad dst register file: %s\n",
682 _mesa_register_file_name((gl_register_file
)dst
->File
));
683 return fs_reg(this, glsl_type::vec4_type
);
688 fs_visitor::get_fp_src_reg(const prog_src_register
*src
)
690 struct gl_program_parameter_list
*plist
= fp
->Base
.Parameters
;
697 case PROGRAM_UNDEFINED
:
699 case PROGRAM_TEMPORARY
:
700 result
= fp_temp_regs
[src
->Index
];
704 result
= fp_input_regs
[src
->Index
];
707 case PROGRAM_STATE_VAR
:
708 case PROGRAM_UNIFORM
:
709 case PROGRAM_CONSTANT
:
710 /* We actually want to look at the type in the Parameters list for this,
711 * because this lets us upload constant builtin uniforms, as actual
714 switch (plist
->Parameters
[src
->Index
].Type
) {
715 case PROGRAM_CONSTANT
: {
716 result
= fs_reg(this, glsl_type::vec4_type
);
718 for (int i
= 0; i
< 4; i
++) {
719 emit(MOV(regoffset(result
, i
),
720 fs_reg(plist
->ParameterValues
[src
->Index
][i
].f
)));
725 case PROGRAM_STATE_VAR
:
726 case PROGRAM_UNIFORM
:
727 result
= fs_reg(UNIFORM
, src
->Index
* 4);
731 _mesa_problem(ctx
, "bad uniform src register file: %s\n",
732 _mesa_register_file_name((gl_register_file
)src
->File
));
733 return fs_reg(this, glsl_type::vec4_type
);
738 _mesa_problem(ctx
, "bad src register file: %s\n",
739 _mesa_register_file_name((gl_register_file
)src
->File
));
740 return fs_reg(this, glsl_type::vec4_type
);
743 if (src
->Swizzle
!= SWIZZLE_NOOP
|| src
->Negate
) {
744 fs_reg unswizzled
= result
;
745 result
= fs_reg(this, glsl_type::vec4_type
);
746 for (int i
= 0; i
< 4; i
++) {
747 bool negate
= src
->Negate
& (1 << i
);
748 /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
749 * but it costs us nothing to support it.
751 int src_swiz
= GET_SWZ(src
->Swizzle
, i
);
752 if (src_swiz
== SWIZZLE_ZERO
) {
753 emit(MOV(regoffset(result
, i
), fs_reg(0.0f
)));
754 } else if (src_swiz
== SWIZZLE_ONE
) {
755 emit(MOV(regoffset(result
, i
),
756 negate
? fs_reg(-1.0f
) : fs_reg(1.0f
)));
758 fs_reg src
= regoffset(unswizzled
, src_swiz
);
760 src
.negate
= !src
.negate
;
761 emit(MOV(regoffset(result
, i
), src
));