2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 /** @file brw_fs_fp.cpp
26 * Implementation of the compiler for GL_ARB_fragment_program shaders on top
27 * of the GLSL compiler backend.
30 #include "brw_context.h"
34 fs_visitor::emit_fp_alu1(enum opcode opcode
,
35 const struct prog_instruction
*fpi
,
36 fs_reg dst
, fs_reg src
)
38 for (int i
= 0; i
< 4; i
++) {
39 if (fpi
->DstReg
.WriteMask
& (1 << i
))
40 emit(opcode
, offset(dst
, i
), offset(src
, i
));
45 fs_visitor::emit_fp_alu2(enum opcode opcode
,
46 const struct prog_instruction
*fpi
,
47 fs_reg dst
, fs_reg src0
, fs_reg src1
)
49 for (int i
= 0; i
< 4; i
++) {
50 if (fpi
->DstReg
.WriteMask
& (1 << i
))
51 emit(opcode
, offset(dst
, i
),
52 offset(src0
, i
), offset(src1
, i
));
57 fs_visitor::emit_fp_minmax(const prog_instruction
*fpi
,
58 fs_reg dst
, fs_reg src0
, fs_reg src1
)
60 uint32_t conditionalmod
;
61 if (fpi
->Opcode
== OPCODE_MIN
)
62 conditionalmod
= BRW_CONDITIONAL_L
;
64 conditionalmod
= BRW_CONDITIONAL_GE
;
66 for (int i
= 0; i
< 4; i
++) {
67 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
68 emit_minmax(conditionalmod
, offset(dst
, i
),
69 offset(src0
, i
), offset(src1
, i
));
75 fs_visitor::emit_fp_sop(uint32_t conditional_mod
,
76 const struct prog_instruction
*fpi
,
77 fs_reg dst
, fs_reg src0
, fs_reg src1
,
80 for (int i
= 0; i
< 4; i
++) {
81 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
84 emit(CMP(reg_null_d
, offset(src0
, i
), offset(src1
, i
),
87 inst
= emit(BRW_OPCODE_SEL
, offset(dst
, i
), one
, fs_reg(0.0f
));
88 inst
->predicate
= BRW_PREDICATE_NORMAL
;
94 fs_visitor::emit_fp_scalar_write(const struct prog_instruction
*fpi
,
95 fs_reg dst
, fs_reg src
)
97 for (int i
= 0; i
< 4; i
++) {
98 if (fpi
->DstReg
.WriteMask
& (1 << i
))
99 emit(MOV(offset(dst
, i
), src
));
104 fs_visitor::emit_fp_scalar_math(enum opcode opcode
,
105 const struct prog_instruction
*fpi
,
106 fs_reg dst
, fs_reg src
)
108 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
109 emit_math(opcode
, temp
, src
);
110 emit_fp_scalar_write(fpi
, dst
, temp
);
114 fs_visitor::emit_fragment_program_code()
118 fs_reg null
= fs_reg(brw_null_reg());
120 /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
130 fs_reg one
= fs_reg(this, glsl_type::float_type
);
131 emit(MOV(one
, fs_reg(1.0f
)));
133 for (unsigned int insn
= 0; insn
< prog
->NumInstructions
; insn
++) {
134 const struct prog_instruction
*fpi
= &prog
->Instructions
[insn
];
137 //_mesa_print_instruction(fpi);
142 /* We always emit into a temporary destination register to avoid
145 dst
= fs_reg(this, glsl_type::vec4_type
);
147 for (int i
= 0; i
< 3; i
++)
148 src
[i
] = get_fp_src_reg(&fpi
->SrcReg
[i
]);
150 switch (fpi
->Opcode
) {
153 src
[0].negate
= false;
154 emit_fp_alu1(BRW_OPCODE_MOV
, fpi
, dst
, src
[0]);
158 emit_fp_alu2(BRW_OPCODE_ADD
, fpi
, dst
, src
[0], src
[1]);
162 for (int i
= 0; i
< 4; i
++) {
163 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
166 emit(CMP(null
, offset(src
[0], i
), fs_reg(0.0f
),
169 inst
= emit(BRW_OPCODE_SEL
, offset(dst
, i
),
170 offset(src
[1], i
), offset(src
[2], i
));
171 inst
->predicate
= BRW_PREDICATE_NORMAL
;
177 emit_fp_scalar_math(SHADER_OPCODE_COS
, fpi
, dst
, src
[0]);
184 fs_reg mul
= fs_reg(this, glsl_type::float_type
);
185 fs_reg acc
= fs_reg(this, glsl_type::float_type
);
188 switch (fpi
->Opcode
) {
189 case OPCODE_DP2
: count
= 2; break;
190 case OPCODE_DP3
: count
= 3; break;
191 case OPCODE_DP4
: count
= 4; break;
192 case OPCODE_DPH
: count
= 3; break;
193 default: assert(!"not reached"); count
= 0; break;
196 emit(MUL(acc
, offset(src
[0], 0), offset(src
[1], 0)));
197 for (int i
= 1; i
< count
; i
++) {
198 emit(MUL(mul
, offset(src
[0], i
), offset(src
[1], i
)));
199 emit(ADD(acc
, acc
, mul
));
202 if (fpi
->Opcode
== OPCODE_DPH
)
203 emit(ADD(acc
, acc
, offset(src
[1], 3)));
205 emit_fp_scalar_write(fpi
, dst
, acc
);
210 if (fpi
->DstReg
.WriteMask
& WRITEMASK_X
)
211 emit(MOV(dst
, fs_reg(1.0f
)));
212 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Y
) {
213 emit(MUL(offset(dst
, 1),
214 offset(src
[0], 1), offset(src
[1], 1)));
216 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Z
)
217 emit(MOV(offset(dst
, 2), offset(src
[0], 2)));
218 if (fpi
->DstReg
.WriteMask
& WRITEMASK_W
)
219 emit(MOV(offset(dst
, 3), offset(src
[1], 3)));
223 emit_fp_scalar_math(SHADER_OPCODE_EXP2
, fpi
, dst
, src
[0]);
227 emit_fp_alu1(BRW_OPCODE_RNDD
, fpi
, dst
, src
[0]);
231 emit_fp_alu1(BRW_OPCODE_FRC
, fpi
, dst
, src
[0]);
235 for (int i
= 0; i
< 4; i
++) {
236 /* In most cases the argument to a KIL will be something like
237 * TEMP[0].wwww, so there's no point in checking whether .w is < 0
241 GET_SWZ(fpi
->SrcReg
[0].Swizzle
, i
) ==
242 GET_SWZ(fpi
->SrcReg
[0].Swizzle
, i
- 1) &&
243 ((fpi
->SrcReg
[0].Negate
>> i
) & 1) ==
244 ((fpi
->SrcReg
[0].Negate
>> (i
- 1)) & 1)) {
249 /* Emit an instruction that's predicated on the current
250 * undiscarded pixels, and updates just those pixels to be
253 fs_inst
*cmp
= emit(CMP(null
, offset(src
[0], i
), fs_reg(0.0f
),
254 BRW_CONDITIONAL_GE
));
255 cmp
->predicate
= BRW_PREDICATE_NORMAL
;
256 cmp
->flag_subreg
= 1;
262 emit_fp_scalar_math(SHADER_OPCODE_LOG2
, fpi
, dst
, src
[0]);
266 /* From the ARB_fragment_program spec:
268 * tmp = VectorLoad(op0);
269 * if (tmp.x < 0) tmp.x = 0;
270 * if (tmp.y < 0) tmp.y = 0;
271 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
272 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
275 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
278 * Note that we don't do the clamping to +/- 128. We didn't in
279 * brw_wm_emit.c either.
281 if (fpi
->DstReg
.WriteMask
& WRITEMASK_X
)
282 emit(MOV(offset(dst
, 0), fs_reg(1.0f
)));
284 if (fpi
->DstReg
.WriteMask
& WRITEMASK_YZ
) {
286 emit(CMP(null
, offset(src
[0], 0), fs_reg(0.0f
),
287 BRW_CONDITIONAL_LE
));
289 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Y
) {
290 emit(MOV(offset(dst
, 1), offset(src
[0], 0)));
291 inst
= emit(MOV(offset(dst
, 1), fs_reg(0.0f
)));
292 inst
->predicate
= BRW_PREDICATE_NORMAL
;
295 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Z
) {
296 emit_math(SHADER_OPCODE_POW
, offset(dst
, 2),
297 offset(src
[0], 1), offset(src
[0], 3));
299 inst
= emit(MOV(offset(dst
, 2), fs_reg(0.0f
)));
300 inst
->predicate
= BRW_PREDICATE_NORMAL
;
304 if (fpi
->DstReg
.WriteMask
& WRITEMASK_W
)
305 emit(MOV(offset(dst
, 3), fs_reg(1.0f
)));
310 for (int i
= 0; i
< 4; i
++) {
311 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
312 fs_reg a
= offset(src
[0], i
);
313 fs_reg y
= offset(src
[1], i
);
314 fs_reg x
= offset(src
[2], i
);
315 emit_lrp(offset(dst
, i
), x
, y
, a
);
321 for (int i
= 0; i
< 4; i
++) {
322 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
323 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
324 emit(MUL(temp
, offset(src
[0], i
), offset(src
[1], i
)));
325 emit(ADD(offset(dst
, i
), temp
, offset(src
[2], i
)));
331 emit_fp_minmax(fpi
, dst
, src
[0], src
[1]);
335 emit_fp_alu1(BRW_OPCODE_MOV
, fpi
, dst
, src
[0]);
339 emit_fp_minmax(fpi
, dst
, src
[0], src
[1]);
343 emit_fp_alu2(BRW_OPCODE_MUL
, fpi
, dst
, src
[0], src
[1]);
347 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
348 emit_math(SHADER_OPCODE_POW
, temp
, src
[0], src
[1]);
349 emit_fp_scalar_write(fpi
, dst
, temp
);
354 emit_fp_scalar_math(SHADER_OPCODE_RCP
, fpi
, dst
, src
[0]);
358 emit_fp_scalar_math(SHADER_OPCODE_RSQ
, fpi
, dst
, src
[0]);
362 if (fpi
->DstReg
.WriteMask
& WRITEMASK_X
) {
363 emit_math(SHADER_OPCODE_COS
, offset(dst
, 0),
367 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Y
) {
368 emit_math(SHADER_OPCODE_SIN
, offset(dst
, 1),
374 emit_fp_sop(BRW_CONDITIONAL_GE
, fpi
, dst
, src
[0], src
[1], one
);
378 emit_fp_scalar_math(SHADER_OPCODE_SIN
, fpi
, dst
, src
[0]);
382 emit_fp_sop(BRW_CONDITIONAL_L
, fpi
, dst
, src
[0], src
[1], one
);
386 fs_reg neg_src1
= src
[1];
387 neg_src1
.negate
= !src
[1].negate
;
389 emit_fp_alu2(BRW_OPCODE_ADD
, fpi
, dst
, src
[0], neg_src1
);
396 /* We piggy-back on the GLSL IR support for texture setup. To do so,
397 * we have to cook up an ir_texture that has the coordinate field
398 * with appropriate type, and shadow_comparitor set or not. All the
399 * other properties of ir_texture are passed in as arguments to the
400 * emit_texture_gen* function.
402 ir_texture
*ir
= NULL
;
406 fs_reg coordinate
= src
[0];
410 switch (fpi
->Opcode
) {
412 ir
= new(mem_ctx
) ir_texture(ir_tex
);
415 ir
= new(mem_ctx
) ir_texture(ir_tex
);
417 coordinate
= fs_reg(this, glsl_type::vec3_type
);
418 fs_reg invproj
= fs_reg(this, glsl_type::float_type
);
419 emit_math(SHADER_OPCODE_RCP
, invproj
, offset(src
[0], 3));
420 for (int i
= 0; i
< 3; i
++) {
421 emit(MUL(offset(coordinate
, i
),
422 offset(src
[0], i
), invproj
));
427 ir
= new(mem_ctx
) ir_texture(ir_txb
);
428 lod
= offset(src
[0], 3);
431 assert(!"not reached");
435 ir
->type
= glsl_type::vec4_type
;
437 const glsl_type
*coordinate_type
;
438 switch (fpi
->TexSrcTarget
) {
439 case TEXTURE_1D_INDEX
:
440 coordinate_type
= glsl_type::float_type
;
443 case TEXTURE_2D_INDEX
:
444 case TEXTURE_1D_ARRAY_INDEX
:
445 case TEXTURE_RECT_INDEX
:
446 case TEXTURE_EXTERNAL_INDEX
:
447 coordinate_type
= glsl_type::vec2_type
;
450 case TEXTURE_3D_INDEX
:
451 case TEXTURE_2D_ARRAY_INDEX
:
452 coordinate_type
= glsl_type::vec3_type
;
455 case TEXTURE_CUBE_INDEX
: {
456 coordinate_type
= glsl_type::vec3_type
;
458 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
459 fs_reg cubecoord
= fs_reg(this, glsl_type::vec3_type
);
460 fs_reg abscoord
= coordinate
;
461 abscoord
.negate
= false;
463 emit_minmax(BRW_CONDITIONAL_GE
, temp
,
464 offset(abscoord
, 0), offset(abscoord
, 1));
465 emit_minmax(BRW_CONDITIONAL_GE
, temp
,
466 temp
, offset(abscoord
, 2));
467 emit_math(SHADER_OPCODE_RCP
, temp
, temp
);
468 for (int i
= 0; i
< 3; i
++) {
469 emit(MUL(offset(cubecoord
, i
),
470 offset(coordinate
, i
), temp
));
473 coordinate
= cubecoord
;
478 assert(!"not reached");
479 coordinate_type
= glsl_type::vec2_type
;
483 ir_constant_data junk_data
;
484 ir
->coordinate
= new(mem_ctx
) ir_constant(coordinate_type
, &junk_data
);
486 if (fpi
->TexShadow
) {
487 shadow_c
= offset(coordinate
, 2);
488 ir
->shadow_comparitor
= new(mem_ctx
) ir_constant(0.0f
);
491 coordinate
= rescale_texcoord(ir
, coordinate
,
492 fpi
->TexSrcTarget
== TEXTURE_RECT_INDEX
,
493 fpi
->TexSrcUnit
, fpi
->TexSrcUnit
);
497 inst
= emit_texture_gen7(ir
, dst
, coordinate
, shadow_c
, lod
, dpdy
, sample_index
, fs_reg(0u), fpi
->TexSrcUnit
);
498 } else if (brw
->gen
>= 5) {
499 inst
= emit_texture_gen5(ir
, dst
, coordinate
, shadow_c
, lod
, dpdy
, sample_index
);
501 inst
= emit_texture_gen4(ir
, dst
, coordinate
, shadow_c
, lod
, dpdy
);
504 inst
->sampler
= fpi
->TexSrcUnit
;
505 inst
->shadow_compare
= fpi
->TexShadow
;
507 /* Reuse the GLSL swizzle_result() handler. */
508 swizzle_result(ir
, dst
, fpi
->TexSrcUnit
);
515 /* Note that SWZ's extended swizzles are handled in the general
516 * get_src_reg() code.
518 emit_fp_alu1(BRW_OPCODE_MOV
, fpi
, dst
, src
[0]);
522 for (int i
= 0; i
< 3; i
++) {
523 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
524 int i1
= (i
+ 1) % 3;
525 int i2
= (i
+ 2) % 3;
527 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
528 fs_reg neg_src1_1
= offset(src
[1], i1
);
529 neg_src1_1
.negate
= !neg_src1_1
.negate
;
530 emit(MUL(temp
, offset(src
[0], i2
), neg_src1_1
));
531 emit(MUL(offset(dst
, i
),
532 offset(src
[0], i1
), offset(src
[1], i2
)));
533 emit(ADD(offset(dst
, i
), offset(dst
, i
), temp
));
542 _mesa_problem(ctx
, "Unsupported opcode %s in fragment program\n",
543 _mesa_opcode_string(fpi
->Opcode
));
546 /* To handle saturates, we emit a MOV with a saturate bit, which
547 * optimization should fold into the preceding instructions when safe.
549 if (fpi
->Opcode
!= OPCODE_END
) {
550 fs_reg real_dst
= get_fp_dst_reg(&fpi
->DstReg
);
552 for (int i
= 0; i
< 4; i
++) {
553 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
554 fs_inst
*inst
= emit(MOV(offset(real_dst
, i
),
556 inst
->saturate
= fpi
->SaturateMode
;
564 * Fragment depth has this strange convention of being the .z component of
565 * a vec4. emit_fb_write() wants to see a float value, instead.
567 this->current_annotation
= "result.depth write";
568 if (frag_depth
.file
!= BAD_FILE
) {
569 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
570 emit(MOV(temp
, offset(frag_depth
, 2)));
576 fs_visitor::setup_fp_regs()
578 /* PROGRAM_TEMPORARY */
579 int num_temp
= prog
->NumTemporaries
;
580 fp_temp_regs
= rzalloc_array(mem_ctx
, fs_reg
, num_temp
);
581 for (int i
= 0; i
< num_temp
; i
++)
582 fp_temp_regs
[i
] = fs_reg(this, glsl_type::vec4_type
);
584 /* PROGRAM_STATE_VAR etc. */
585 if (dispatch_width
== 8) {
587 p
< prog
->Parameters
->NumParameters
; p
++) {
588 for (unsigned int i
= 0; i
< 4; i
++) {
589 stage_prog_data
->param
[uniforms
++] =
590 &prog
->Parameters
->ParameterValues
[p
][i
].f
;
595 fp_input_regs
= rzalloc_array(mem_ctx
, fs_reg
, VARYING_SLOT_MAX
);
596 for (int i
= 0; i
< VARYING_SLOT_MAX
; i
++) {
597 if (prog
->InputsRead
& BITFIELD64_BIT(i
)) {
598 /* Make up a dummy instruction to reuse code for emitting
601 ir_variable
*ir
= new(mem_ctx
) ir_variable(glsl_type::vec4_type
,
604 ir
->data
.location
= i
;
606 this->current_annotation
= ralloc_asprintf(ctx
, "interpolate input %d",
610 case VARYING_SLOT_POS
:
611 ir
->data
.pixel_center_integer
= fp
->PixelCenterInteger
;
612 ir
->data
.origin_upper_left
= fp
->OriginUpperLeft
;
613 fp_input_regs
[i
] = *emit_fragcoord_interpolation(ir
);
615 case VARYING_SLOT_FACE
:
616 fp_input_regs
[i
] = *emit_frontfacing_interpolation(ir
);
619 fp_input_regs
[i
] = *emit_general_interpolation(ir
);
621 if (i
== VARYING_SLOT_FOGC
) {
622 emit(MOV(offset(fp_input_regs
[i
], 1), fs_reg(0.0f
)));
623 emit(MOV(offset(fp_input_regs
[i
], 2), fs_reg(0.0f
)));
624 emit(MOV(offset(fp_input_regs
[i
], 3), fs_reg(1.0f
)));
630 this->current_annotation
= NULL
;
636 fs_visitor::get_fp_dst_reg(const prog_dst_register
*dst
)
639 case PROGRAM_TEMPORARY
:
640 return fp_temp_regs
[dst
->Index
];
643 if (dst
->Index
== FRAG_RESULT_DEPTH
) {
644 if (frag_depth
.file
== BAD_FILE
)
645 frag_depth
= fs_reg(this, glsl_type::vec4_type
);
647 } else if (dst
->Index
== FRAG_RESULT_COLOR
) {
648 if (outputs
[0].file
== BAD_FILE
) {
649 outputs
[0] = fs_reg(this, glsl_type::vec4_type
);
650 output_components
[0] = 4;
652 /* Tell emit_fb_writes() to smear fragment.color across all the
655 for (int i
= 1; i
< c
->key
.nr_color_regions
; i
++) {
656 outputs
[i
] = outputs
[0];
657 output_components
[i
] = output_components
[0];
662 int output_index
= dst
->Index
- FRAG_RESULT_DATA0
;
663 if (outputs
[output_index
].file
== BAD_FILE
) {
664 outputs
[output_index
] = fs_reg(this, glsl_type::vec4_type
);
666 output_components
[output_index
] = 4;
667 return outputs
[output_index
];
670 case PROGRAM_UNDEFINED
:
674 _mesa_problem(ctx
, "bad dst register file: %s\n",
675 _mesa_register_file_name((gl_register_file
)dst
->File
));
676 return fs_reg(this, glsl_type::vec4_type
);
681 fs_visitor::get_fp_src_reg(const prog_src_register
*src
)
683 struct gl_program_parameter_list
*plist
= prog
->Parameters
;
690 case PROGRAM_UNDEFINED
:
692 case PROGRAM_TEMPORARY
:
693 result
= fp_temp_regs
[src
->Index
];
697 result
= fp_input_regs
[src
->Index
];
700 case PROGRAM_STATE_VAR
:
701 case PROGRAM_UNIFORM
:
702 case PROGRAM_CONSTANT
:
703 /* We actually want to look at the type in the Parameters list for this,
704 * because this lets us upload constant builtin uniforms, as actual
707 switch (plist
->Parameters
[src
->Index
].Type
) {
708 case PROGRAM_CONSTANT
: {
709 result
= fs_reg(this, glsl_type::vec4_type
);
711 for (int i
= 0; i
< 4; i
++) {
712 emit(MOV(offset(result
, i
),
713 fs_reg(plist
->ParameterValues
[src
->Index
][i
].f
)));
718 case PROGRAM_STATE_VAR
:
719 case PROGRAM_UNIFORM
:
720 result
= fs_reg(UNIFORM
, src
->Index
* 4);
724 _mesa_problem(ctx
, "bad uniform src register file: %s\n",
725 _mesa_register_file_name((gl_register_file
)src
->File
));
726 return fs_reg(this, glsl_type::vec4_type
);
731 _mesa_problem(ctx
, "bad src register file: %s\n",
732 _mesa_register_file_name((gl_register_file
)src
->File
));
733 return fs_reg(this, glsl_type::vec4_type
);
736 if (src
->Swizzle
!= SWIZZLE_NOOP
|| src
->Negate
) {
737 fs_reg unswizzled
= result
;
738 result
= fs_reg(this, glsl_type::vec4_type
);
739 for (int i
= 0; i
< 4; i
++) {
740 bool negate
= src
->Negate
& (1 << i
);
741 /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
742 * but it costs us nothing to support it.
744 int src_swiz
= GET_SWZ(src
->Swizzle
, i
);
745 if (src_swiz
== SWIZZLE_ZERO
) {
746 emit(MOV(offset(result
, i
), fs_reg(0.0f
)));
747 } else if (src_swiz
== SWIZZLE_ONE
) {
748 emit(MOV(offset(result
, i
),
749 negate
? fs_reg(-1.0f
) : fs_reg(1.0f
)));
751 fs_reg src
= offset(unswizzled
, src_swiz
);
753 src
.negate
= !src
.negate
;
754 emit(MOV(offset(result
, i
), src
));