2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 /** @file brw_fs_fp.cpp
26 * Implementation of the compiler for GL_ARB_fragment_program shaders on top
27 * of the GLSL compiler backend.
30 #include "brw_context.h"
34 regoffset(fs_reg reg
, int i
)
41 fs_visitor::emit_fp_alu1(enum opcode opcode
,
42 const struct prog_instruction
*fpi
,
43 fs_reg dst
, fs_reg src
)
45 for (int i
= 0; i
< 4; i
++) {
46 if (fpi
->DstReg
.WriteMask
& (1 << i
))
47 emit(opcode
, regoffset(dst
, i
), regoffset(src
, i
));
52 fs_visitor::emit_fp_alu2(enum opcode opcode
,
53 const struct prog_instruction
*fpi
,
54 fs_reg dst
, fs_reg src0
, fs_reg src1
)
56 for (int i
= 0; i
< 4; i
++) {
57 if (fpi
->DstReg
.WriteMask
& (1 << i
))
58 emit(opcode
, regoffset(dst
, i
),
59 regoffset(src0
, i
), regoffset(src1
, i
));
64 fs_visitor::emit_fp_minmax(const prog_instruction
*fpi
,
65 fs_reg dst
, fs_reg src0
, fs_reg src1
)
67 uint32_t conditionalmod
;
68 if (fpi
->Opcode
== OPCODE_MIN
)
69 conditionalmod
= BRW_CONDITIONAL_L
;
71 conditionalmod
= BRW_CONDITIONAL_GE
;
73 for (int i
= 0; i
< 4; i
++) {
74 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
75 emit_minmax(conditionalmod
, regoffset(dst
, i
),
76 regoffset(src0
, i
), regoffset(src1
, i
));
82 fs_visitor::emit_fp_sop(uint32_t conditional_mod
,
83 const struct prog_instruction
*fpi
,
84 fs_reg dst
, fs_reg src0
, fs_reg src1
,
87 for (int i
= 0; i
< 4; i
++) {
88 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
91 inst
= emit(BRW_OPCODE_CMP
, fs_reg(brw_null_reg()),
92 regoffset(src0
, i
), regoffset(src1
, i
));
93 inst
->conditional_mod
= conditional_mod
;
95 inst
= emit(BRW_OPCODE_SEL
, regoffset(dst
, i
), one
, fs_reg(0.0f
));
96 inst
->predicated
= true;
102 fs_visitor::emit_fp_scalar_write(const struct prog_instruction
*fpi
,
103 fs_reg dst
, fs_reg src
)
105 for (int i
= 0; i
< 4; i
++) {
106 if (fpi
->DstReg
.WriteMask
& (1 << i
))
107 emit(BRW_OPCODE_MOV
, regoffset(dst
, i
), src
);
112 fs_visitor::emit_fp_scalar_math(enum opcode opcode
,
113 const struct prog_instruction
*fpi
,
114 fs_reg dst
, fs_reg src
)
116 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
117 emit_math(opcode
, temp
, src
);
118 emit_fp_scalar_write(fpi
, dst
, temp
);
122 fs_visitor::emit_fragment_program_code()
126 fs_reg null
= fs_reg(brw_null_reg());
128 /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
138 fs_reg one
= fs_reg(this, glsl_type::float_type
);
139 emit(BRW_OPCODE_MOV
, one
, fs_reg(1.0f
));
141 for (unsigned int insn
= 0; insn
< fp
->Base
.NumInstructions
; insn
++) {
142 const struct prog_instruction
*fpi
= &fp
->Base
.Instructions
[insn
];
145 //_mesa_print_instruction(fpi);
150 /* We always emit into a temporary destination register to avoid
153 dst
= fs_reg(this, glsl_type::vec4_type
);
155 for (int i
= 0; i
< 3; i
++)
156 src
[i
] = get_fp_src_reg(&fpi
->SrcReg
[i
]);
158 switch (fpi
->Opcode
) {
161 src
[0].negate
= false;
162 emit_fp_alu1(BRW_OPCODE_MOV
, fpi
, dst
, src
[0]);
166 emit_fp_alu2(BRW_OPCODE_ADD
, fpi
, dst
, src
[0], src
[1]);
170 for (int i
= 0; i
< 4; i
++) {
171 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
174 inst
= emit(BRW_OPCODE_CMP
, null
,
175 regoffset(src
[0], i
), fs_reg(0.0f
));
176 inst
->conditional_mod
= BRW_CONDITIONAL_L
;
178 inst
= emit(BRW_OPCODE_SEL
, regoffset(dst
, i
),
179 regoffset(src
[1], i
), regoffset(src
[2], i
));
180 inst
->predicated
= true;
186 emit_fp_scalar_math(SHADER_OPCODE_COS
, fpi
, dst
, src
[0]);
193 fs_reg mul
= fs_reg(this, glsl_type::float_type
);
194 fs_reg acc
= fs_reg(this, glsl_type::float_type
);
197 switch (fpi
->Opcode
) {
198 case OPCODE_DP2
: count
= 2; break;
199 case OPCODE_DP3
: count
= 3; break;
200 case OPCODE_DP4
: count
= 4; break;
201 case OPCODE_DPH
: count
= 3; break;
202 default: assert(!"not reached"); count
= 0; break;
205 emit(BRW_OPCODE_MUL
, acc
,
206 regoffset(src
[0], 0), regoffset(src
[1], 0));
207 for (int i
= 1; i
< count
; i
++) {
208 emit(BRW_OPCODE_MUL
, mul
,
209 regoffset(src
[0], i
), regoffset(src
[1], i
));
210 emit(BRW_OPCODE_ADD
, acc
, acc
, mul
);
213 if (fpi
->Opcode
== OPCODE_DPH
)
214 emit(BRW_OPCODE_ADD
, acc
, acc
, regoffset(src
[1], 3));
216 emit_fp_scalar_write(fpi
, dst
, acc
);
221 if (fpi
->DstReg
.WriteMask
& WRITEMASK_X
)
222 emit(BRW_OPCODE_MOV
, dst
, fs_reg(1.0f
));
223 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Y
) {
224 emit(BRW_OPCODE_MUL
, regoffset(dst
, 1),
225 regoffset(src
[0], 1), regoffset(src
[1], 1));
227 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Z
)
228 emit(BRW_OPCODE_MOV
, regoffset(dst
, 2), regoffset(src
[0], 2));
229 if (fpi
->DstReg
.WriteMask
& WRITEMASK_W
)
230 emit(BRW_OPCODE_MOV
, regoffset(dst
, 3), regoffset(src
[1], 3));
234 emit_fp_scalar_math(SHADER_OPCODE_EXP2
, fpi
, dst
, src
[0]);
238 emit_fp_alu1(BRW_OPCODE_RNDD
, fpi
, dst
, src
[0]);
242 emit_fp_alu1(BRW_OPCODE_FRC
, fpi
, dst
, src
[0]);
246 for (int i
= 0; i
< 4; i
++) {
247 /* In most cases the argument to a KIL will be something like
248 * TEMP[0].wwww, so there's no point in checking whether .w is < 0
252 GET_SWZ(fpi
->SrcReg
[0].Swizzle
, i
) ==
253 GET_SWZ(fpi
->SrcReg
[0].Swizzle
, i
- 1) &&
254 ((fpi
->SrcReg
[0].Negate
>> i
) & 1) ==
255 ((fpi
->SrcReg
[0].Negate
>> (i
- 1)) & 1)) {
259 fs_inst
*inst
= emit(BRW_OPCODE_CMP
, null
,
260 regoffset(src
[0], i
), 0.0f
);
261 inst
->conditional_mod
= BRW_CONDITIONAL_L
;
263 inst
= emit(BRW_OPCODE_IF
);
264 inst
->predicated
= true;
265 emit(FS_OPCODE_DISCARD
);
266 emit(BRW_OPCODE_ENDIF
);
272 emit_fp_scalar_math(SHADER_OPCODE_LOG2
, fpi
, dst
, src
[0]);
276 /* From the ARB_fragment_program spec:
278 * tmp = VectorLoad(op0);
279 * if (tmp.x < 0) tmp.x = 0;
280 * if (tmp.y < 0) tmp.y = 0;
281 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
282 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
285 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
288 * Note that we don't do the clamping to +/- 128. We didn't in
289 * brw_wm_emit.c either.
291 if (fpi
->DstReg
.WriteMask
& WRITEMASK_X
)
292 emit(BRW_OPCODE_MOV
, regoffset(dst
, 0), fs_reg(1.0f
));
294 if (fpi
->DstReg
.WriteMask
& WRITEMASK_YZ
) {
296 inst
= emit(BRW_OPCODE_CMP
, null
,
297 regoffset(src
[0], 0), fs_reg(0.0f
));
298 inst
->conditional_mod
= BRW_CONDITIONAL_LE
;
300 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Y
) {
301 emit(BRW_OPCODE_MOV
, regoffset(dst
, 1), regoffset(src
[0], 0));
302 inst
= emit(BRW_OPCODE_MOV
, regoffset(dst
, 1), fs_reg(0.0f
));
303 inst
->predicated
= true;
306 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Z
) {
307 emit_math(SHADER_OPCODE_POW
, regoffset(dst
, 2),
308 regoffset(src
[0], 1), regoffset(src
[0], 3));
310 inst
= emit(BRW_OPCODE_MOV
, regoffset(dst
, 2), fs_reg(0.0f
));
311 inst
->predicated
= true;
315 if (fpi
->DstReg
.WriteMask
& WRITEMASK_W
)
316 emit(BRW_OPCODE_MOV
, regoffset(dst
, 3), fs_reg(1.0f
));
321 for (int i
= 0; i
< 4; i
++) {
322 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
323 fs_reg neg_src0
= regoffset(src
[0], i
);
324 neg_src0
.negate
= !neg_src0
.negate
;
325 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
326 fs_reg temp2
= fs_reg(this, glsl_type::float_type
);
327 emit(BRW_OPCODE_ADD
, temp
, neg_src0
, fs_reg(1.0f
));
328 emit(BRW_OPCODE_MUL
, temp
, temp
, regoffset(src
[2], i
));
329 emit(BRW_OPCODE_MUL
, temp2
,
330 regoffset(src
[0], i
), regoffset(src
[1], i
));
331 emit(BRW_OPCODE_ADD
, regoffset(dst
, i
), temp
, temp2
);
337 for (int i
= 0; i
< 4; i
++) {
338 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
339 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
340 emit(BRW_OPCODE_MUL
, temp
,
341 regoffset(src
[0], i
), regoffset(src
[1], i
));
342 emit(BRW_OPCODE_ADD
, regoffset(dst
, i
),
343 temp
, regoffset(src
[2], i
));
349 emit_fp_minmax(fpi
, dst
, src
[0], src
[1]);
353 emit_fp_alu1(BRW_OPCODE_MOV
, fpi
, dst
, src
[0]);
357 emit_fp_minmax(fpi
, dst
, src
[0], src
[1]);
361 emit_fp_alu2(BRW_OPCODE_MUL
, fpi
, dst
, src
[0], src
[1]);
365 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
366 emit_math(SHADER_OPCODE_POW
, temp
, src
[0], src
[1]);
367 emit_fp_scalar_write(fpi
, dst
, temp
);
372 emit_fp_scalar_math(SHADER_OPCODE_RCP
, fpi
, dst
, src
[0]);
376 emit_fp_scalar_math(SHADER_OPCODE_RSQ
, fpi
, dst
, src
[0]);
380 if (fpi
->DstReg
.WriteMask
& WRITEMASK_X
) {
381 emit_math(SHADER_OPCODE_COS
, regoffset(dst
, 0),
382 regoffset(src
[0], 0));
385 if (fpi
->DstReg
.WriteMask
& WRITEMASK_Y
) {
386 emit_math(SHADER_OPCODE_SIN
, regoffset(dst
, 1),
387 regoffset(src
[0], 1));
392 emit_fp_sop(BRW_CONDITIONAL_GE
, fpi
, dst
, src
[0], src
[1], one
);
396 emit_fp_scalar_math(SHADER_OPCODE_SIN
, fpi
, dst
, src
[0]);
400 emit_fp_sop(BRW_CONDITIONAL_L
, fpi
, dst
, src
[0], src
[1], one
);
404 fs_reg neg_src1
= src
[1];
405 neg_src1
.negate
= !src
[1].negate
;
407 emit_fp_alu2(BRW_OPCODE_ADD
, fpi
, dst
, src
[0], neg_src1
);
414 /* We piggy-back on the GLSL IR support for texture setup. To do so,
415 * we have to cook up an ir_texture that has the coordinate field
416 * with appropriate type, and shadow_comparitor set or not. All the
417 * other properties of ir_texture are passed in as arguments to the
418 * emit_texture_gen* function.
420 ir_texture
*ir
= NULL
;
424 fs_reg coordinate
= src
[0];
427 switch (fpi
->Opcode
) {
429 ir
= new(mem_ctx
) ir_texture(ir_tex
);
432 ir
= new(mem_ctx
) ir_texture(ir_tex
);
434 coordinate
= fs_reg(this, glsl_type::vec3_type
);
435 fs_reg invproj
= fs_reg(this, glsl_type::float_type
);
436 emit_math(SHADER_OPCODE_RCP
, invproj
, regoffset(src
[0], 3));
437 for (int i
= 0; i
< 3; i
++) {
438 emit(BRW_OPCODE_MUL
, regoffset(coordinate
, i
),
439 regoffset(src
[0], i
), invproj
);
444 ir
= new(mem_ctx
) ir_texture(ir_txb
);
445 lod
= regoffset(src
[0], 3);
448 assert(!"not reached");
452 const glsl_type
*coordinate_type
;
453 switch (fpi
->TexSrcTarget
) {
454 case TEXTURE_1D_INDEX
:
455 coordinate_type
= glsl_type::float_type
;
458 case TEXTURE_2D_INDEX
:
459 case TEXTURE_1D_ARRAY_INDEX
:
460 case TEXTURE_RECT_INDEX
:
461 case TEXTURE_EXTERNAL_INDEX
:
462 coordinate_type
= glsl_type::vec2_type
;
465 case TEXTURE_3D_INDEX
:
466 case TEXTURE_2D_ARRAY_INDEX
:
467 coordinate_type
= glsl_type::vec3_type
;
470 case TEXTURE_CUBE_INDEX
: {
471 coordinate_type
= glsl_type::vec3_type
;
473 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
474 fs_reg cubecoord
= fs_reg(this, glsl_type::vec3_type
);
475 fs_reg abscoord
= coordinate
;
476 abscoord
.negate
= false;
478 emit_minmax(BRW_CONDITIONAL_GE
, temp
,
479 regoffset(abscoord
, 0), regoffset(abscoord
, 1));
480 emit_minmax(BRW_CONDITIONAL_GE
, temp
,
481 temp
, regoffset(abscoord
, 2));
482 emit_math(SHADER_OPCODE_RCP
, temp
, temp
);
483 for (int i
= 0; i
< 3; i
++) {
484 emit(BRW_OPCODE_MUL
, regoffset(cubecoord
, i
),
485 regoffset(coordinate
, i
), temp
);
488 coordinate
= cubecoord
;
493 assert(!"not reached");
494 coordinate_type
= glsl_type::vec2_type
;
498 ir_constant_data junk_data
;
499 ir
->coordinate
= new(mem_ctx
) ir_constant(coordinate_type
, &junk_data
);
501 coordinate
= rescale_texcoord(ir
, coordinate
,
502 fpi
->TexSrcTarget
== TEXTURE_RECT_INDEX
,
503 fpi
->TexSrcUnit
, fpi
->TexSrcUnit
);
505 if (fpi
->TexShadow
) {
506 shadow_c
= regoffset(coordinate
, 2);
507 ir
->shadow_comparitor
= new(mem_ctx
) ir_constant(0.0f
);
511 if (intel
->gen
>= 7) {
512 inst
= emit_texture_gen7(ir
, dst
, coordinate
, shadow_c
, lod
, dpdy
);
513 } else if (intel
->gen
>= 5) {
514 inst
= emit_texture_gen5(ir
, dst
, coordinate
, shadow_c
, lod
, dpdy
);
516 inst
= emit_texture_gen4(ir
, dst
, coordinate
, shadow_c
, lod
, dpdy
);
519 inst
->sampler
= fpi
->TexSrcUnit
;
520 inst
->shadow_compare
= fpi
->TexShadow
;
522 /* Reuse the GLSL swizzle_result() handler. */
523 swizzle_result(ir
, dst
, fpi
->TexSrcUnit
);
530 /* Note that SWZ's extended swizzles are handled in the general
531 * get_src_reg() code.
533 emit_fp_alu1(BRW_OPCODE_MOV
, fpi
, dst
, src
[0]);
537 for (int i
= 0; i
< 3; i
++) {
538 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
539 int i1
= (i
+ 1) % 3;
540 int i2
= (i
+ 2) % 3;
542 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
543 fs_reg neg_src1_1
= regoffset(src
[1], i1
);
544 neg_src1_1
.negate
= !neg_src1_1
.negate
;
545 emit(BRW_OPCODE_MUL
, temp
,
546 regoffset(src
[0], i2
), neg_src1_1
);
547 emit(BRW_OPCODE_MUL
, regoffset(dst
, i
),
548 regoffset(src
[0], i1
), regoffset(src
[1], i2
));
549 emit(BRW_OPCODE_ADD
, regoffset(dst
, i
),
550 regoffset(dst
, i
), temp
);
559 _mesa_problem(ctx
, "Unsupported opcode %s in fragment program\n",
560 _mesa_opcode_string(fpi
->Opcode
));
563 /* To handle saturates, we emit a MOV with a saturate bit, which
564 * optimization should fold into the preceding instructions when safe.
566 if (fpi
->Opcode
!= OPCODE_END
) {
567 fs_reg real_dst
= get_fp_dst_reg(&fpi
->DstReg
);
569 for (int i
= 0; i
< 4; i
++) {
570 if (fpi
->DstReg
.WriteMask
& (1 << i
)) {
571 fs_inst
*inst
= emit(BRW_OPCODE_MOV
,
572 regoffset(real_dst
, i
),
574 inst
->saturate
= fpi
->SaturateMode
;
582 * Fragment depth has this strange convention of being the .z component of
583 * a vec4. emit_fb_write() wants to see a float value, instead.
585 this->current_annotation
= "result.depth write";
586 if (frag_depth
.file
!= BAD_FILE
) {
587 fs_reg temp
= fs_reg(this, glsl_type::float_type
);
588 emit(BRW_OPCODE_MOV
, temp
, regoffset(frag_depth
, 2));
594 fs_visitor::setup_fp_regs()
596 /* PROGRAM_TEMPORARY */
597 int num_temp
= fp
->Base
.NumTemporaries
;
598 fp_temp_regs
= rzalloc_array(mem_ctx
, fs_reg
, num_temp
);
599 for (int i
= 0; i
< num_temp
; i
++)
600 fp_temp_regs
[i
] = fs_reg(this, glsl_type::vec4_type
);
602 /* PROGRAM_STATE_VAR etc. */
603 if (c
->dispatch_width
== 8) {
605 p
< c
->fp
->program
.Base
.Parameters
->NumParameters
; p
++) {
606 for (unsigned int i
= 0; i
< 4; i
++) {
607 this->param_index
[c
->prog_data
.nr_params
] = p
;
608 this->param_offset
[c
->prog_data
.nr_params
] = i
;
609 c
->prog_data
.nr_params
++;
614 fp_input_regs
= rzalloc_array(mem_ctx
, fs_reg
, FRAG_ATTRIB_MAX
);
615 for (int i
= 0; i
< FRAG_ATTRIB_MAX
; i
++) {
616 if (fp
->Base
.InputsRead
& BITFIELD64_BIT(i
)) {
617 /* Make up a dummy instruction to reuse code for emitting
620 ir_variable
*ir
= new(mem_ctx
) ir_variable(glsl_type::vec4_type
,
625 this->current_annotation
= ralloc_asprintf(ctx
, "interpolate input %d",
629 case FRAG_ATTRIB_WPOS
:
630 ir
->pixel_center_integer
= fp
->PixelCenterInteger
;
631 ir
->origin_upper_left
= fp
->OriginUpperLeft
;
632 fp_input_regs
[i
] = *emit_fragcoord_interpolation(ir
);
634 case FRAG_ATTRIB_FACE
:
635 fp_input_regs
[i
] = *emit_frontfacing_interpolation(ir
);
638 fp_input_regs
[i
] = *emit_general_interpolation(ir
);
640 if (i
== FRAG_ATTRIB_FOGC
) {
642 regoffset(fp_input_regs
[i
], 1), fs_reg(0.0f
));
644 regoffset(fp_input_regs
[i
], 2), fs_reg(0.0f
));
646 regoffset(fp_input_regs
[i
], 3), fs_reg(1.0f
));
652 this->current_annotation
= NULL
;
658 fs_visitor::get_fp_dst_reg(const prog_dst_register
*dst
)
661 case PROGRAM_TEMPORARY
:
662 return fp_temp_regs
[dst
->Index
];
665 if (dst
->Index
== FRAG_RESULT_DEPTH
) {
666 if (frag_depth
.file
== BAD_FILE
)
667 frag_depth
= fs_reg(this, glsl_type::vec4_type
);
669 } else if (dst
->Index
== FRAG_RESULT_COLOR
) {
670 if (outputs
[0].file
== BAD_FILE
) {
671 outputs
[0] = fs_reg(this, glsl_type::vec4_type
);
672 output_components
[0] = 4;
674 /* Tell emit_fb_writes() to smear fragment.color across all the
677 for (int i
= 1; i
< c
->key
.nr_color_regions
; i
++) {
678 outputs
[i
] = outputs
[0];
679 output_components
[i
] = output_components
[0];
684 int output_index
= dst
->Index
- FRAG_RESULT_DATA0
;
685 if (outputs
[output_index
].file
== BAD_FILE
) {
686 outputs
[output_index
] = fs_reg(this, glsl_type::vec4_type
);
688 output_components
[output_index
] = 4;
689 return outputs
[output_index
];
692 case PROGRAM_UNDEFINED
:
696 _mesa_problem(ctx
, "bad dst register file: %s\n",
697 _mesa_register_file_name((gl_register_file
)dst
->File
));
698 return fs_reg(this, glsl_type::vec4_type
);
703 fs_visitor::get_fp_src_reg(const prog_src_register
*src
)
705 struct gl_program_parameter_list
*plist
= c
->fp
->program
.Base
.Parameters
;
712 case PROGRAM_UNDEFINED
:
714 case PROGRAM_TEMPORARY
:
715 result
= fp_temp_regs
[src
->Index
];
719 result
= fp_input_regs
[src
->Index
];
722 case PROGRAM_STATE_VAR
:
723 case PROGRAM_UNIFORM
:
724 case PROGRAM_CONSTANT
:
725 /* We actually want to look at the type in the Parameters list for this,
726 * because this lets us upload constant builtin uniforms, as actual
729 switch (plist
->Parameters
[src
->Index
].Type
) {
730 case PROGRAM_CONSTANT
: {
731 result
= fs_reg(this, glsl_type::vec4_type
);
733 for (int i
= 0; i
< 4; i
++) {
734 emit(BRW_OPCODE_MOV
, regoffset(result
, i
),
735 fs_reg(plist
->ParameterValues
[src
->Index
][i
].f
));
740 case PROGRAM_STATE_VAR
:
741 case PROGRAM_UNIFORM
:
742 result
= fs_reg(UNIFORM
, src
->Index
* 4);
746 _mesa_problem(ctx
, "bad uniform src register file: %s\n",
747 _mesa_register_file_name((gl_register_file
)src
->File
));
748 return fs_reg(this, glsl_type::vec4_type
);
753 _mesa_problem(ctx
, "bad src register file: %s\n",
754 _mesa_register_file_name((gl_register_file
)src
->File
));
755 return fs_reg(this, glsl_type::vec4_type
);
758 if (src
->Swizzle
!= SWIZZLE_NOOP
|| src
->Negate
) {
759 fs_reg unswizzled
= result
;
760 result
= fs_reg(this, glsl_type::vec4_type
);
761 for (int i
= 0; i
< 4; i
++) {
762 bool negate
= src
->Negate
& (1 << i
);
763 /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
764 * but it costs us nothing to support it.
766 int src_swiz
= GET_SWZ(src
->Swizzle
, i
);
767 if (src_swiz
== SWIZZLE_ZERO
) {
768 emit(BRW_OPCODE_MOV
, regoffset(result
, i
), fs_reg(0.0f
));
769 } else if (src_swiz
== SWIZZLE_ONE
) {
770 emit(BRW_OPCODE_MOV
, regoffset(result
, i
),
771 negate
? fs_reg(-1.0f
) : fs_reg(1.0f
));
773 fs_reg src
= regoffset(unswizzled
, src_swiz
);
775 src
.negate
= !src
.negate
;
776 emit(BRW_OPCODE_MOV
, regoffset(result
, i
), src
);