2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 /** @file brw_fs_fp.cpp
26 * Implementation of the compiler for GL_ARB_fragment_program shaders on top
27 * of the GLSL compiler backend.
30 #include "brw_context.h"
34 fs_visitor::emit_fp_alu1(enum opcode opcode
,
35 const struct prog_instruction
*fpi
,
36 fs_reg dst
, fs_reg src
)
38 for (int i
= 0; i
< 4; i
++) {
39 if (fpi
->DstReg
.WriteMask
& (1 << i
))
40 emit(opcode
, offset(dst
, i
), offset(src
, i
));
45 fs_visitor::emit_fp_alu2(enum opcode opcode
,
46 const struct prog_instruction
*fpi
,
47 fs_reg dst
, fs_reg src0
, fs_reg src1
)
49 for (int i
= 0; i
< 4; i
++) {
50 if (fpi
->DstReg
.WriteMask
& (1 << i
))
51 emit(opcode
, offset(dst
, i
),
52 offset(src0
, i
), offset(src1
, i
));
57 fs_visitor::emit_fp_minmax(const prog_instruction
*fpi
,
58 fs_reg dst
, fs_reg src0
, fs_reg src1
)
60 enum brw_conditional_mod conditionalmod
;
61 if (fpi
->Opcode
== OPCODE_MIN
)
62 conditionalmod
= BRW_CONDITIONAL_L
;
64 conditionalmod
= BRW_CONDITIONAL_GE
;
66 for (int i
= 0; i
< 4; i
++) {
67 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
68 emit_minmax(conditionalmod
, offset(dst
, i
),
69 offset(src0
, i
), offset(src1
, i
));
75 fs_visitor::emit_fp_sop(enum brw_conditional_mod conditional_mod
,
76 const struct prog_instruction
*fpi
,
77 fs_reg dst
, fs_reg src0
, fs_reg src1
,
80 for (int i
= 0; i
< 4; i
++) {
81 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
84 emit(CMP(reg_null_d
, offset(src0
, i
), offset(src1
, i
),
87 inst
= emit(BRW_OPCODE_SEL
, offset(dst
, i
), one
, fs_reg(0.0f
));
88 inst
->predicate
= BRW_PREDICATE_NORMAL
;
94 fs_visitor::emit_fp_scalar_write(const struct prog_instruction
*fpi
,
95 fs_reg dst
, fs_reg src
)
97 for (int i
= 0; i
< 4; i
++) {
98 if (fpi
->DstReg
.WriteMask
& (1 << i
))
99 emit(MOV(offset(dst
, i
), src
));
104 fs_visitor::emit_fp_scalar_math(enum opcode opcode
,
105 const struct prog_instruction
*fpi
,
106 fs_reg dst
, fs_reg src
)
108 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
109 emit_math(opcode
, temp
, src
);
110 emit_fp_scalar_write(fpi
, dst
, temp
);
114 fs_visitor::emit_fragment_program_code()
118 fs_reg null
= fs_reg(brw_null_reg());
120 /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
130 fs_reg one
= fs_reg(this, glsl_type::float_type
);
131 emit(MOV(one
, fs_reg(1.0f
)));
133 for (unsigned int insn
= 0; insn
< prog
->NumInstructions
; insn
++) {
134 const struct prog_instruction
*fpi
= &prog
->Instructions
[insn
];
137 //_mesa_print_instruction(fpi);
142 /* We always emit into a temporary destination register to avoid
145 dst
= fs_reg(this, glsl_type::vec4_type
);
147 for (int i
= 0; i
< 3; i
++)
148 src
[i
] = get_fp_src_reg(&fpi
->SrcReg
[i
]);
150 switch (fpi
->Opcode
) {
153 src
[0].negate
= false;
154 emit_fp_alu1(BRW_OPCODE_MOV
, fpi
, dst
, src
[0]);
158 emit_fp_alu2(BRW_OPCODE_ADD
, fpi
, dst
, src
[0], src
[1]);
162 for (int i
= 0; i
< 4; i
++) {
163 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
166 emit(CMP(null
, offset(src
[0], i
), fs_reg(0.0f
),
169 inst
= emit(BRW_OPCODE_SEL
, offset(dst
, i
),
170 offset(src
[1], i
), offset(src
[2], i
));
171 inst
->predicate
= BRW_PREDICATE_NORMAL
;
177 emit_fp_scalar_math(SHADER_OPCODE_COS
, fpi
, dst
, src
[0]);
184 fs_reg mul
= fs_reg(this, glsl_type::float_type
);
185 fs_reg acc
= fs_reg(this, glsl_type::float_type
);
188 switch (fpi
->Opcode
) {
189 case OPCODE_DP2
: count
= 2; break;
190 case OPCODE_DP3
: count
= 3; break;
191 case OPCODE_DP4
: count
= 4; break;
192 case OPCODE_DPH
: count
= 3; break;
193 default: unreachable("not reached");
196 emit(MUL(acc
, offset(src
[0], 0), offset(src
[1], 0)));
197 for (int i
= 1; i
< count
; i
++) {
198 emit(MUL(mul
, offset(src
[0], i
), offset(src
[1], i
)));
199 emit(ADD(acc
, acc
, mul
));
202 if (fpi
->Opcode
== OPCODE_DPH
)
203 emit(ADD(acc
, acc
, offset(src
[1], 3)));
205 emit_fp_scalar_write(fpi
, dst
, acc
);
210 if (fpi
->DstReg
.WriteMask
& WRITEMASK_X
)
211 emit(MOV(dst
, fs_reg(1.0f
)));
212 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Y
) {
213 emit(MUL(offset(dst
, 1),
214 offset(src
[0], 1), offset(src
[1], 1)));
216 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Z
)
217 emit(MOV(offset(dst
, 2), offset(src
[0], 2)));
218 if (fpi
->DstReg
.WriteMask
& WRITEMASK_W
)
219 emit(MOV(offset(dst
, 3), offset(src
[1], 3)));
223 emit_fp_scalar_math(SHADER_OPCODE_EXP2
, fpi
, dst
, src
[0]);
227 emit_fp_alu1(BRW_OPCODE_RNDD
, fpi
, dst
, src
[0]);
231 emit_fp_alu1(BRW_OPCODE_FRC
, fpi
, dst
, src
[0]);
235 for (int i
= 0; i
< 4; i
++) {
236 /* In most cases the argument to a KIL will be something like
237 * TEMP[0].wwww, so there's no point in checking whether .w is < 0
241 GET_SWZ(fpi
->SrcReg
[0].Swizzle
, i
) ==
242 GET_SWZ(fpi
->SrcReg
[0].Swizzle
, i
- 1) &&
243 ((fpi
->SrcReg
[0].Negate
>> i
) & 1) ==
244 ((fpi
->SrcReg
[0].Negate
>> (i
- 1)) & 1)) {
249 /* Emit an instruction that's predicated on the current
250 * undiscarded pixels, and updates just those pixels to be
253 fs_inst
*cmp
= emit(CMP(null
, offset(src
[0], i
), fs_reg(0.0f
),
254 BRW_CONDITIONAL_GE
));
255 cmp
->predicate
= BRW_PREDICATE_NORMAL
;
256 cmp
->flag_subreg
= 1;
262 emit_fp_scalar_math(SHADER_OPCODE_LOG2
, fpi
, dst
, src
[0]);
266 /* From the ARB_fragment_program spec:
268 * tmp = VectorLoad(op0);
269 * if (tmp.x < 0) tmp.x = 0;
270 * if (tmp.y < 0) tmp.y = 0;
271 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
272 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
275 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
278 * Note that we don't do the clamping to +/- 128. We didn't in
279 * brw_wm_emit.c either.
281 if (fpi
->DstReg
.WriteMask
& WRITEMASK_X
)
282 emit(MOV(offset(dst
, 0), fs_reg(1.0f
)));
284 if (fpi
->DstReg
.WriteMask
& WRITEMASK_YZ
) {
286 emit(CMP(null
, offset(src
[0], 0), fs_reg(0.0f
),
287 BRW_CONDITIONAL_LE
));
289 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Y
) {
290 emit(MOV(offset(dst
, 1), offset(src
[0], 0)));
291 inst
= emit(MOV(offset(dst
, 1), fs_reg(0.0f
)));
292 inst
->predicate
= BRW_PREDICATE_NORMAL
;
295 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Z
) {
296 emit_math(SHADER_OPCODE_POW
, offset(dst
, 2),
297 offset(src
[0], 1), offset(src
[0], 3));
299 inst
= emit(MOV(offset(dst
, 2), fs_reg(0.0f
)));
300 inst
->predicate
= BRW_PREDICATE_NORMAL
;
304 if (fpi
->DstReg
.WriteMask
& WRITEMASK_W
)
305 emit(MOV(offset(dst
, 3), fs_reg(1.0f
)));
310 for (int i
= 0; i
< 4; i
++) {
311 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
312 fs_reg a
= offset(src
[0], i
);
313 fs_reg y
= offset(src
[1], i
);
314 fs_reg x
= offset(src
[2], i
);
315 emit_lrp(offset(dst
, i
), x
, y
, a
);
321 for (int i
= 0; i
< 4; i
++) {
322 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
323 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
324 emit(MUL(temp
, offset(src
[0], i
), offset(src
[1], i
)));
325 emit(ADD(offset(dst
, i
), temp
, offset(src
[2], i
)));
331 emit_fp_minmax(fpi
, dst
, src
[0], src
[1]);
335 emit_fp_alu1(BRW_OPCODE_MOV
, fpi
, dst
, src
[0]);
339 emit_fp_minmax(fpi
, dst
, src
[0], src
[1]);
343 emit_fp_alu2(BRW_OPCODE_MUL
, fpi
, dst
, src
[0], src
[1]);
347 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
348 emit_math(SHADER_OPCODE_POW
, temp
, src
[0], src
[1]);
349 emit_fp_scalar_write(fpi
, dst
, temp
);
354 emit_fp_scalar_math(SHADER_OPCODE_RCP
, fpi
, dst
, src
[0]);
358 emit_fp_scalar_math(SHADER_OPCODE_RSQ
, fpi
, dst
, src
[0]);
362 if (fpi
->DstReg
.WriteMask
& WRITEMASK_X
) {
363 emit_math(SHADER_OPCODE_COS
, offset(dst
, 0),
367 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Y
) {
368 emit_math(SHADER_OPCODE_SIN
, offset(dst
, 1),
374 emit_fp_sop(BRW_CONDITIONAL_GE
, fpi
, dst
, src
[0], src
[1], one
);
378 emit_fp_scalar_math(SHADER_OPCODE_SIN
, fpi
, dst
, src
[0]);
382 emit_fp_sop(BRW_CONDITIONAL_L
, fpi
, dst
, src
[0], src
[1], one
);
386 fs_reg neg_src1
= src
[1];
387 neg_src1
.negate
= !src
[1].negate
;
389 emit_fp_alu2(BRW_OPCODE_ADD
, fpi
, dst
, src
[0], neg_src1
);
396 /* We piggy-back on the GLSL IR support for texture setup. To do so,
397 * we have to cook up an ir_texture that has the coordinate field
398 * with appropriate type, and shadow_comparitor set or not. All the
399 * other properties of ir_texture are passed in as arguments to the
400 * emit_texture_gen* function.
402 ir_texture
*ir
= NULL
;
406 fs_reg coordinate
= src
[0];
410 switch (fpi
->Opcode
) {
412 ir
= new(mem_ctx
) ir_texture(ir_tex
);
415 ir
= new(mem_ctx
) ir_texture(ir_tex
);
417 coordinate
= fs_reg(this, glsl_type::vec3_type
);
418 fs_reg invproj
= fs_reg(this, glsl_type::float_type
);
419 emit_math(SHADER_OPCODE_RCP
, invproj
, offset(src
[0], 3));
420 for (int i
= 0; i
< 3; i
++) {
421 emit(MUL(offset(coordinate
, i
),
422 offset(src
[0], i
), invproj
));
427 ir
= new(mem_ctx
) ir_texture(ir_txb
);
428 lod
= offset(src
[0], 3);
431 unreachable("not reached");
434 ir
->type
= glsl_type::vec4_type
;
436 const glsl_type
*coordinate_type
;
437 switch (fpi
->TexSrcTarget
) {
438 case TEXTURE_1D_INDEX
:
439 coordinate_type
= glsl_type::float_type
;
442 case TEXTURE_2D_INDEX
:
443 case TEXTURE_1D_ARRAY_INDEX
:
444 case TEXTURE_RECT_INDEX
:
445 case TEXTURE_EXTERNAL_INDEX
:
446 coordinate_type
= glsl_type::vec2_type
;
449 case TEXTURE_3D_INDEX
:
450 case TEXTURE_2D_ARRAY_INDEX
:
451 coordinate_type
= glsl_type::vec3_type
;
454 case TEXTURE_CUBE_INDEX
: {
455 coordinate_type
= glsl_type::vec3_type
;
457 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
458 fs_reg cubecoord
= fs_reg(this, glsl_type::vec3_type
);
459 fs_reg abscoord
= coordinate
;
460 abscoord
.negate
= false;
462 emit_minmax(BRW_CONDITIONAL_GE
, temp
,
463 offset(abscoord
, 0), offset(abscoord
, 1));
464 emit_minmax(BRW_CONDITIONAL_GE
, temp
,
465 temp
, offset(abscoord
, 2));
466 emit_math(SHADER_OPCODE_RCP
, temp
, temp
);
467 for (int i
= 0; i
< 3; i
++) {
468 emit(MUL(offset(cubecoord
, i
),
469 offset(coordinate
, i
), temp
));
472 coordinate
= cubecoord
;
477 unreachable("not reached");
480 ir_constant_data junk_data
;
481 ir
->coordinate
= new(mem_ctx
) ir_constant(coordinate_type
, &junk_data
);
483 if (fpi
->TexShadow
) {
484 shadow_c
= offset(coordinate
, 2);
485 ir
->shadow_comparitor
= new(mem_ctx
) ir_constant(0.0f
);
488 coordinate
= rescale_texcoord(ir
, coordinate
,
489 fpi
->TexSrcTarget
== TEXTURE_RECT_INDEX
,
490 fpi
->TexSrcUnit
, fpi
->TexSrcUnit
);
494 inst
= emit_texture_gen7(ir
, dst
, coordinate
, shadow_c
, lod
, dpdy
, sample_index
, fs_reg(0u), fpi
->TexSrcUnit
);
495 } else if (brw
->gen
>= 5) {
496 inst
= emit_texture_gen5(ir
, dst
, coordinate
, shadow_c
, lod
, dpdy
, sample_index
, fpi
->TexSrcUnit
);
498 inst
= emit_texture_gen4(ir
, dst
, coordinate
, shadow_c
, lod
, dpdy
, fpi
->TexSrcUnit
);
501 inst
->shadow_compare
= fpi
->TexShadow
;
503 /* Reuse the GLSL swizzle_result() handler. */
504 swizzle_result(ir
, dst
, fpi
->TexSrcUnit
);
511 /* Note that SWZ's extended swizzles are handled in the general
512 * get_src_reg() code.
514 emit_fp_alu1(BRW_OPCODE_MOV
, fpi
, dst
, src
[0]);
518 for (int i
= 0; i
< 3; i
++) {
519 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
520 int i1
= (i
+ 1) % 3;
521 int i2
= (i
+ 2) % 3;
523 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
524 fs_reg neg_src1_1
= offset(src
[1], i1
);
525 neg_src1_1
.negate
= !neg_src1_1
.negate
;
526 emit(MUL(temp
, offset(src
[0], i2
), neg_src1_1
));
527 emit(MUL(offset(dst
, i
),
528 offset(src
[0], i1
), offset(src
[1], i2
)));
529 emit(ADD(offset(dst
, i
), offset(dst
, i
), temp
));
538 _mesa_problem(ctx
, "Unsupported opcode %s in fragment program\n",
539 _mesa_opcode_string(fpi
->Opcode
));
542 /* To handle saturates, we emit a MOV with a saturate bit, which
543 * optimization should fold into the preceding instructions when safe.
545 if (fpi
->Opcode
!= OPCODE_END
) {
546 fs_reg real_dst
= get_fp_dst_reg(&fpi
->DstReg
);
548 for (int i
= 0; i
< 4; i
++) {
549 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
550 fs_inst
*inst
= emit(MOV(offset(real_dst
, i
),
552 inst
->saturate
= fpi
->SaturateMode
;
560 * Fragment depth has this strange convention of being the .z component of
561 * a vec4. emit_fb_write() wants to see a float value, instead.
563 this->current_annotation
= "result.depth write";
564 if (frag_depth
.file
!= BAD_FILE
) {
565 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
566 emit(MOV(temp
, offset(frag_depth
, 2)));
572 fs_visitor::setup_fp_regs()
574 /* PROGRAM_TEMPORARY */
575 int num_temp
= prog
->NumTemporaries
;
576 fp_temp_regs
= rzalloc_array(mem_ctx
, fs_reg
, num_temp
);
577 for (int i
= 0; i
< num_temp
; i
++)
578 fp_temp_regs
[i
] = fs_reg(this, glsl_type::vec4_type
);
580 /* PROGRAM_STATE_VAR etc. */
581 if (dispatch_width
== 8) {
583 p
< prog
->Parameters
->NumParameters
; p
++) {
584 for (unsigned int i
= 0; i
< 4; i
++) {
585 stage_prog_data
->param
[uniforms
++] =
586 &prog
->Parameters
->ParameterValues
[p
][i
].f
;
591 fp_input_regs
= rzalloc_array(mem_ctx
, fs_reg
, VARYING_SLOT_MAX
);
592 for (int i
= 0; i
< VARYING_SLOT_MAX
; i
++) {
593 if (prog
->InputsRead
& BITFIELD64_BIT(i
)) {
594 /* Make up a dummy instruction to reuse code for emitting
597 ir_variable
*ir
= new(mem_ctx
) ir_variable(glsl_type::vec4_type
,
600 ir
->data
.location
= i
;
602 this->current_annotation
= ralloc_asprintf(ctx
, "interpolate input %d",
606 case VARYING_SLOT_POS
:
607 ir
->data
.pixel_center_integer
= fp
->PixelCenterInteger
;
608 ir
->data
.origin_upper_left
= fp
->OriginUpperLeft
;
609 fp_input_regs
[i
] = *emit_fragcoord_interpolation(ir
);
611 case VARYING_SLOT_FACE
:
612 fp_input_regs
[i
] = *emit_frontfacing_interpolation(ir
);
615 fp_input_regs
[i
] = *emit_general_interpolation(ir
);
617 if (i
== VARYING_SLOT_FOGC
) {
618 emit(MOV(offset(fp_input_regs
[i
], 1), fs_reg(0.0f
)));
619 emit(MOV(offset(fp_input_regs
[i
], 2), fs_reg(0.0f
)));
620 emit(MOV(offset(fp_input_regs
[i
], 3), fs_reg(1.0f
)));
626 this->current_annotation
= NULL
;
632 fs_visitor::get_fp_dst_reg(const prog_dst_register
*dst
)
635 case PROGRAM_TEMPORARY
:
636 return fp_temp_regs
[dst
->Index
];
639 if (dst
->Index
== FRAG_RESULT_DEPTH
) {
640 if (frag_depth
.file
== BAD_FILE
)
641 frag_depth
= fs_reg(this, glsl_type::vec4_type
);
643 } else if (dst
->Index
== FRAG_RESULT_COLOR
) {
644 if (outputs
[0].file
== BAD_FILE
) {
645 outputs
[0] = fs_reg(this, glsl_type::vec4_type
);
646 output_components
[0] = 4;
648 /* Tell emit_fb_writes() to smear fragment.color across all the
651 for (int i
= 1; i
< key
->nr_color_regions
; i
++) {
652 outputs
[i
] = outputs
[0];
653 output_components
[i
] = output_components
[0];
658 int output_index
= dst
->Index
- FRAG_RESULT_DATA0
;
659 if (outputs
[output_index
].file
== BAD_FILE
) {
660 outputs
[output_index
] = fs_reg(this, glsl_type::vec4_type
);
662 output_components
[output_index
] = 4;
663 return outputs
[output_index
];
666 case PROGRAM_UNDEFINED
:
670 _mesa_problem(ctx
, "bad dst register file: %s\n",
671 _mesa_register_file_name((gl_register_file
)dst
->File
));
672 return fs_reg(this, glsl_type::vec4_type
);
677 fs_visitor::get_fp_src_reg(const prog_src_register
*src
)
679 struct gl_program_parameter_list
*plist
= prog
->Parameters
;
686 case PROGRAM_UNDEFINED
:
688 case PROGRAM_TEMPORARY
:
689 result
= fp_temp_regs
[src
->Index
];
693 result
= fp_input_regs
[src
->Index
];
696 case PROGRAM_STATE_VAR
:
697 case PROGRAM_UNIFORM
:
698 case PROGRAM_CONSTANT
:
699 /* We actually want to look at the type in the Parameters list for this,
700 * because this lets us upload constant builtin uniforms, as actual
703 switch (plist
->Parameters
[src
->Index
].Type
) {
704 case PROGRAM_CONSTANT
: {
705 result
= fs_reg(this, glsl_type::vec4_type
);
707 for (int i
= 0; i
< 4; i
++) {
708 emit(MOV(offset(result
, i
),
709 fs_reg(plist
->ParameterValues
[src
->Index
][i
].f
)));
714 case PROGRAM_STATE_VAR
:
715 case PROGRAM_UNIFORM
:
716 result
= fs_reg(UNIFORM
, src
->Index
* 4);
720 _mesa_problem(ctx
, "bad uniform src register file: %s\n",
721 _mesa_register_file_name((gl_register_file
)src
->File
));
722 return fs_reg(this, glsl_type::vec4_type
);
727 _mesa_problem(ctx
, "bad src register file: %s\n",
728 _mesa_register_file_name((gl_register_file
)src
->File
));
729 return fs_reg(this, glsl_type::vec4_type
);
732 if (src
->Swizzle
!= SWIZZLE_NOOP
|| src
->Negate
) {
733 fs_reg unswizzled
= result
;
734 result
= fs_reg(this, glsl_type::vec4_type
);
735 for (int i
= 0; i
< 4; i
++) {
736 bool negate
= src
->Negate
& (1 << i
);
737 /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
738 * but it costs us nothing to support it.
740 int src_swiz
= GET_SWZ(src
->Swizzle
, i
);
741 if (src_swiz
== SWIZZLE_ZERO
) {
742 emit(MOV(offset(result
, i
), fs_reg(0.0f
)));
743 } else if (src_swiz
== SWIZZLE_ONE
) {
744 emit(MOV(offset(result
, i
),
745 negate
? fs_reg(-1.0f
) : fs_reg(1.0f
)));
747 fs_reg src
= offset(unswizzled
, src_swiz
);
749 src
.negate
= !src
.negate
;
750 emit(MOV(offset(result
, i
), src
));