i965: bump MAX_DEPTH_TEXTURE_SAMPLES to 4/8
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_vp.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_vec4_vp.cpp
25 *
26 * A translator from Mesa IR to the i965 driver's Vec4 IR, used to implement
27 * ARB_vertex_program and fixed-function vertex processing.
28 */
29
30 #include "brw_context.h"
31 #include "brw_vec4.h"
32 extern "C" {
33 #include "program/prog_parameter.h"
34 #include "program/prog_print.h"
35 }
36 using namespace brw;
37
38 void
39 vec4_visitor::emit_vp_sop(uint32_t conditional_mod,
40 dst_reg dst, src_reg src0, src_reg src1,
41 src_reg one)
42 {
43 vec4_instruction *inst;
44
45 inst = emit(BRW_OPCODE_CMP, dst_null_d(), src0, src1);
46 inst->conditional_mod = conditional_mod;
47
48 inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f));
49 inst->predicate = BRW_PREDICATE_NORMAL;
50 }
51
52 /**
53 * Reswizzle a given source register.
54 * \sa brw_swizzle().
55 */
56 static inline src_reg
57 reswizzle(src_reg orig, unsigned x, unsigned y, unsigned z, unsigned w)
58 {
59 src_reg t = orig;
60 t.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(orig.swizzle, x),
61 BRW_GET_SWZ(orig.swizzle, y),
62 BRW_GET_SWZ(orig.swizzle, z),
63 BRW_GET_SWZ(orig.swizzle, w));
64 return t;
65 }
66
67 void
68 vec4_visitor::emit_vertex_program_code()
69 {
70 this->need_all_constants_in_pull_buffer = false;
71
72 setup_vp_regs();
73
74 /* Keep a reg with 1.0 around, for reuse by emit_vs_sop so that it can just
75 * be:
76 *
77 * sel.f0 dst 1.0 0.0
78 *
79 * instead of
80 *
81 * mov dst 0.0
82 * mov.f0 dst 1.0
83 */
84 src_reg one = src_reg(this, glsl_type::float_type);
85 emit(MOV(dst_reg(one), src_reg(1.0f)));
86
87 for (unsigned int insn = 0; insn < vp->Base.NumInstructions; insn++) {
88 const struct prog_instruction *vpi = &vp->Base.Instructions[insn];
89 base_ir = vpi;
90
91 dst_reg dst;
92 src_reg src[3];
93
94 /* We always emit into a temporary destination register to avoid
95 * aliasing issues.
96 */
97 dst = dst_reg(this, glsl_type::vec4_type);
98
99 for (int i = 0; i < 3; i++)
100 src[i] = get_vp_src_reg(vpi->SrcReg[i]);
101
102 switch (vpi->Opcode) {
103 case OPCODE_ABS:
104 src[0].abs = true;
105 src[0].negate = false;
106 emit(MOV(dst, src[0]));
107 break;
108
109 case OPCODE_ADD:
110 emit(ADD(dst, src[0], src[1]));
111 break;
112
113 case OPCODE_ARL:
114 if (intel->gen >= 6) {
115 dst.writemask = WRITEMASK_X;
116 dst_reg dst_f = dst;
117 dst_f.type = BRW_REGISTER_TYPE_F;
118
119 emit(RNDD(dst_f, src[0]));
120 emit(MOV(dst, src_reg(dst_f)));
121 } else {
122 emit(RNDD(dst, src[0]));
123 }
124 break;
125
126 case OPCODE_DP3:
127 emit(DP3(dst, src[0], src[1]));
128 break;
129 case OPCODE_DP4:
130 emit(DP4(dst, src[0], src[1]));
131 break;
132 case OPCODE_DPH:
133 emit(DPH(dst, src[0], src[1]));
134 break;
135
136 case OPCODE_DST: {
137 dst_reg t = dst;
138 if (vpi->DstReg.WriteMask & WRITEMASK_X) {
139 t.writemask = WRITEMASK_X;
140 emit(MOV(t, src_reg(1.0f)));
141 }
142 if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
143 t.writemask = WRITEMASK_Y;
144 emit(MUL(t, src[0], src[1]));
145 }
146 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
147 t.writemask = WRITEMASK_Z;
148 emit(MOV(t, src[0]));
149 }
150 if (vpi->DstReg.WriteMask & WRITEMASK_W) {
151 t.writemask = WRITEMASK_W;
152 emit(MOV(t, src[1]));
153 }
154 break;
155 }
156
157 case OPCODE_EXP: {
158 dst_reg result = dst;
159 if (vpi->DstReg.WriteMask & WRITEMASK_X) {
160 /* tmp_d = floor(src[0].x) */
161 src_reg tmp_d = src_reg(this, glsl_type::ivec4_type);
162 assert(tmp_d.type == BRW_REGISTER_TYPE_D);
163 emit(RNDD(dst_reg(tmp_d), reswizzle(src[0], 0, 0, 0, 0)));
164
165 /* result[0] = 2.0 ^ tmp */
166 /* Adjust exponent for floating point: exp += 127 */
167 dst_reg tmp_d_x(GRF, tmp_d.reg, glsl_type::int_type, WRITEMASK_X);
168 emit(ADD(tmp_d_x, tmp_d, src_reg(127)));
169
170 /* Install exponent and sign. Excess drops off the edge: */
171 dst_reg res_d_x(GRF, result.reg, glsl_type::int_type, WRITEMASK_X);
172 emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23));
173 }
174 if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
175 result.writemask = WRITEMASK_Y;
176 emit(FRC(result, src[0]));
177 }
178 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
179 result.writemask = WRITEMASK_Z;
180 emit_math(SHADER_OPCODE_EXP2, result, src[0]);
181 }
182 if (vpi->DstReg.WriteMask & WRITEMASK_W) {
183 result.writemask = WRITEMASK_W;
184 emit(MOV(result, src_reg(1.0f)));
185 }
186 break;
187 }
188
189 case OPCODE_EX2:
190 emit_math(SHADER_OPCODE_EXP2, dst, src[0]);
191 break;
192
193 case OPCODE_FLR:
194 emit(RNDD(dst, src[0]));
195 break;
196
197 case OPCODE_FRC:
198 emit(FRC(dst, src[0]));
199 break;
200
201 case OPCODE_LG2:
202 emit_math(SHADER_OPCODE_LOG2, dst, src[0]);
203 break;
204
205 case OPCODE_LIT: {
206 dst_reg result = dst;
207 /* From the ARB_vertex_program spec:
208 *
209 * tmp = VectorLoad(op0);
210 * if (tmp.x < 0) tmp.x = 0;
211 * if (tmp.y < 0) tmp.y = 0;
212 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
213 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
214 * result.x = 1.0;
215 * result.y = tmp.x;
216 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
217 * result.w = 1.0;
218 *
219 * Note that we don't do the clamping to +/- 128. We didn't in
220 * brw_vs_emit.c either.
221 */
222 if (vpi->DstReg.WriteMask & WRITEMASK_XW) {
223 result.writemask = WRITEMASK_XW;
224 emit(MOV(result, src_reg(1.0f)));
225 }
226 if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
227 result.writemask = WRITEMASK_YZ;
228 emit(MOV(result, src_reg(0.0f)));
229
230 src_reg tmp_x = reswizzle(src[0], 0, 0, 0, 0);
231
232 emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G));
233 emit(IF(BRW_PREDICATE_NORMAL));
234
235 if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
236 result.writemask = WRITEMASK_Y;
237 emit(MOV(result, tmp_x));
238 }
239
240 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
241 /* if (tmp.y < 0) tmp.y = 0; */
242 src_reg tmp_y = reswizzle(src[0], 1, 1, 1, 1);
243 result.writemask = WRITEMASK_Z;
244 emit_minmax(BRW_CONDITIONAL_G, result, tmp_y, src_reg(0.0f));
245
246 src_reg clamped_y(result);
247 clamped_y.swizzle = BRW_SWIZZLE_ZZZZ;
248
249 src_reg tmp_w = reswizzle(src[0], 3, 3, 3, 3);
250
251 emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
252 }
253 emit(BRW_OPCODE_ENDIF);
254 }
255 break;
256 }
257
258 case OPCODE_LOG: {
259 dst_reg result = dst;
260 result.type = BRW_REGISTER_TYPE_UD;
261 src_reg result_src = src_reg(result);
262
263 src_reg arg0_ud = reswizzle(src[0], 0, 0, 0, 0);
264 arg0_ud.type = BRW_REGISTER_TYPE_UD;
265
266 /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
267 * according to spec:
268 *
269 * These almost look likey they could be joined up, but not really
270 * practical:
271 *
272 * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
273 * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
274 */
275 if (vpi->DstReg.WriteMask & WRITEMASK_XZ) {
276 result.writemask = WRITEMASK_X;
277 emit(AND(result, arg0_ud, src_reg((1u << 31) - 1)));
278 emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u));
279 src_reg result_d(result_src);
280 result_d.type = BRW_REGISTER_TYPE_D; /* does it matter? */
281 result.type = BRW_REGISTER_TYPE_F;
282 emit(ADD(result, result_d, src_reg(-127)));
283 }
284
285 if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
286 result.writemask = WRITEMASK_Y;
287 result.type = BRW_REGISTER_TYPE_UD;
288 emit(AND(result, arg0_ud, src_reg((1u << 23) - 1)));
289 emit(OR(result, result_src, src_reg(127u << 23)));
290 }
291
292 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
293 /* result[2] = result[0] + LOG2(result[1]); */
294
295 /* Why bother? The above is just a hint how to do this with a
296 * taylor series. Maybe we *should* use a taylor series as by
297 * the time all the above has been done it's almost certainly
298 * quicker than calling the mathbox, even with low precision.
299 *
300 * Options are:
301 * - result[0] + mathbox.LOG2(result[1])
302 * - mathbox.LOG2(arg0.x)
303 * - result[0] + inline_taylor_approx(result[1])
304 */
305 result.type = BRW_REGISTER_TYPE_F;
306 result.writemask = WRITEMASK_Z;
307 src_reg result_x(result), result_y(result), result_z(result);
308 result_x.swizzle = BRW_SWIZZLE_XXXX;
309 result_y.swizzle = BRW_SWIZZLE_YYYY;
310 result_z.swizzle = BRW_SWIZZLE_ZZZZ;
311 emit_math(SHADER_OPCODE_LOG2, result, result_y);
312 emit(ADD(result, result_z, result_x));
313 }
314
315 if (vpi->DstReg.WriteMask & WRITEMASK_W) {
316 result.type = BRW_REGISTER_TYPE_F;
317 result.writemask = WRITEMASK_W;
318 emit(MOV(result, src_reg(1.0f)));
319 }
320 break;
321 }
322
323 case OPCODE_MAD: {
324 src_reg temp = src_reg(this, glsl_type::vec4_type);
325 emit(MUL(dst_reg(temp), src[0], src[1]));
326 emit(ADD(dst, temp, src[2]));
327 break;
328 }
329
330 case OPCODE_MAX:
331 emit_minmax(BRW_CONDITIONAL_G, dst, src[0], src[1]);
332 break;
333
334 case OPCODE_MIN:
335 emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]);
336 break;
337
338 case OPCODE_MOV:
339 emit(MOV(dst, src[0]));
340 break;
341
342 case OPCODE_MUL:
343 emit(MUL(dst, src[0], src[1]));
344 break;
345
346 case OPCODE_POW:
347 emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]);
348 break;
349
350 case OPCODE_RCP:
351 emit_math(SHADER_OPCODE_RCP, dst, src[0]);
352 break;
353
354 case OPCODE_RSQ:
355 emit_math(SHADER_OPCODE_RSQ, dst, src[0]);
356 break;
357
358 case OPCODE_SGE:
359 emit_vp_sop(BRW_CONDITIONAL_GE, dst, src[0], src[1], one);
360 break;
361
362 case OPCODE_SLT:
363 emit_vp_sop(BRW_CONDITIONAL_L, dst, src[0], src[1], one);
364 break;
365
366 case OPCODE_SUB: {
367 src_reg neg_src1 = src[1];
368 neg_src1.negate = !src[1].negate;
369 emit(ADD(dst, src[0], neg_src1));
370 break;
371 }
372
373 case OPCODE_SWZ:
374 /* Note that SWZ's extended swizzles are handled in the general
375 * get_src_reg() code.
376 */
377 emit(MOV(dst, src[0]));
378 break;
379
380 case OPCODE_XPD: {
381 src_reg t1 = src_reg(this, glsl_type::vec4_type);
382 src_reg t2 = src_reg(this, glsl_type::vec4_type);
383
384 emit(MUL(dst_reg(t1),
385 reswizzle(src[0], 1, 2, 0, 3),
386 reswizzle(src[1], 2, 0, 1, 3)));
387 emit(MUL(dst_reg(t2),
388 reswizzle(src[0], 2, 0, 1, 3),
389 reswizzle(src[1], 1, 2, 0, 3)));
390 t2.negate = true;
391 emit(ADD(dst, t1, t2));
392 break;
393 }
394
395 case OPCODE_END:
396 break;
397
398 default:
399 _mesa_problem(ctx, "Unsupported opcode %s in vertex program\n",
400 _mesa_opcode_string(vpi->Opcode));
401 }
402
403 /* Copy the temporary back into the actual destination register. */
404 if (vpi->Opcode != OPCODE_END) {
405 emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst)));
406 }
407 }
408
409 /* If we used relative addressing, we need to upload all constants as
410 * pull constants. Do that now.
411 */
412 if (this->need_all_constants_in_pull_buffer) {
413 const struct gl_program_parameter_list *params = c->vp->program.Base.Parameters;
414 unsigned i;
415 for (i = 0; i < params->NumParameters * 4; i++) {
416 c->prog_data.pull_param[i] = &params->ParameterValues[i / 4][i % 4].f;
417 }
418 c->prog_data.nr_pull_params = i;
419 }
420 }
421
422 void
423 vec4_visitor::setup_vp_regs()
424 {
425 /* PROGRAM_TEMPORARY */
426 int num_temp = vp->Base.NumTemporaries;
427 vp_temp_regs = rzalloc_array(mem_ctx, src_reg, num_temp);
428 for (int i = 0; i < num_temp; i++)
429 vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type);
430
431 /* PROGRAM_STATE_VAR etc. */
432 struct gl_program_parameter_list *plist = c->vp->program.Base.Parameters;
433 for (unsigned p = 0; p < plist->NumParameters; p++) {
434 unsigned components = plist->Parameters[p].Size;
435
436 /* Parameters should be either vec4 uniforms or single component
437 * constants; matrices and other larger types should have been broken
438 * down earlier.
439 */
440 assert(components <= 4);
441
442 this->uniform_size[this->uniforms] = 1; /* 1 vec4 */
443 this->uniform_vector_size[this->uniforms] = components;
444 for (unsigned i = 0; i < 4; i++) {
445 c->prog_data.param[this->uniforms * 4 + i] = i >= components ? 0 :
446 &plist->ParameterValues[p][i].f;
447 }
448 this->uniforms++; /* counted in vec4 units */
449 }
450
451 /* PROGRAM_OUTPUT */
452 for (int slot = 0; slot < c->prog_data.vue_map.num_slots; slot++) {
453 int vert_result = c->prog_data.vue_map.slot_to_vert_result[slot];
454 if (vert_result == VARYING_SLOT_PSIZ)
455 output_reg[vert_result] = dst_reg(this, glsl_type::float_type);
456 else
457 output_reg[vert_result] = dst_reg(this, glsl_type::vec4_type);
458 assert(output_reg[vert_result].type == BRW_REGISTER_TYPE_F);
459 }
460
461 /* PROGRAM_ADDRESS */
462 this->vp_addr_reg = src_reg(this, glsl_type::int_type);
463 assert(this->vp_addr_reg.type == BRW_REGISTER_TYPE_D);
464 }
465
466 dst_reg
467 vec4_visitor::get_vp_dst_reg(const prog_dst_register &dst)
468 {
469 dst_reg result;
470
471 assert(!dst.RelAddr);
472
473 switch (dst.File) {
474 case PROGRAM_TEMPORARY:
475 result = dst_reg(vp_temp_regs[dst.Index]);
476 break;
477
478 case PROGRAM_OUTPUT:
479 result = output_reg[dst.Index];
480 break;
481
482 case PROGRAM_ADDRESS: {
483 assert(dst.Index == 0);
484 result = dst_reg(this->vp_addr_reg);
485 break;
486 }
487
488 case PROGRAM_UNDEFINED:
489 return dst_null_f();
490
491 default:
492 assert("vec4_vp: bad destination register file");
493 return dst_reg(this, glsl_type::vec4_type);
494 }
495
496 result.writemask = dst.WriteMask;
497 return result;
498 }
499
500 src_reg
501 vec4_visitor::get_vp_src_reg(const prog_src_register &src)
502 {
503 struct gl_program_parameter_list *plist = c->vp->program.Base.Parameters;
504
505 src_reg result;
506
507 assert(!src.Abs);
508
509 switch (src.File) {
510 case PROGRAM_UNDEFINED:
511 return src_reg(brw_null_reg());
512
513 case PROGRAM_TEMPORARY:
514 result = vp_temp_regs[src.Index];
515 break;
516
517 case PROGRAM_INPUT:
518 result = src_reg(ATTR, src.Index, glsl_type::vec4_type);
519 result.type = BRW_REGISTER_TYPE_F;
520 break;
521
522 case PROGRAM_ADDRESS: {
523 assert(src.Index == 0);
524 result = this->vp_addr_reg;
525 break;
526 }
527
528 case PROGRAM_STATE_VAR:
529 case PROGRAM_CONSTANT:
530 /* From the ARB_vertex_program specification:
531 * "Relative addressing can only be used for accessing program
532 * parameter arrays."
533 */
534 if (src.RelAddr) {
535 /* Since we have no idea what the base of the array is, we need to
536 * upload ALL constants as push constants.
537 */
538 this->need_all_constants_in_pull_buffer = true;
539
540 /* Add the small constant index to the address register */
541 src_reg reladdr = src_reg(this, glsl_type::int_type);
542 dst_reg dst_reladdr = dst_reg(reladdr);
543 dst_reladdr.writemask = WRITEMASK_X;
544 emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
545
546 if (intel->gen < 6)
547 emit(MUL(dst_reladdr, reladdr, src_reg(16)));
548
549 #if 0
550 assert(src.Index < this->uniforms);
551 result = src_reg(dst_reg(UNIFORM, 0));
552 result.type = BRW_REGISTER_TYPE_F;
553 result.reladdr = new(mem_ctx) src_reg();
554 memcpy(result.reladdr, &reladdr, sizeof(src_reg));
555 #endif
556
557 result = src_reg(this, glsl_type::vec4_type);
558 src_reg surf_index = src_reg(unsigned(SURF_INDEX_VERT_CONST_BUFFER));
559 vec4_instruction *load =
560 new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
561 dst_reg(result), surf_index, reladdr);
562 load->base_mrf = 14;
563 load->mlen = 1;
564 emit(load);
565 break;
566 }
567
568 /* We actually want to look at the type in the Parameters list for this,
569 * because this lets us upload constant builtin uniforms as actual
570 * constants.
571 */
572 switch (plist->Parameters[src.Index].Type) {
573 case PROGRAM_CONSTANT:
574 result = src_reg(this, glsl_type::vec4_type);
575 for (int i = 0; i < 4; i++) {
576 dst_reg t = dst_reg(result);
577 t.writemask = 1 << i;
578 emit(MOV(t, src_reg(plist->ParameterValues[src.Index][i].f)));
579 }
580 break;
581
582 case PROGRAM_STATE_VAR:
583 assert(src.Index < this->uniforms);
584 result = src_reg(dst_reg(UNIFORM, src.Index));
585 result.type = BRW_REGISTER_TYPE_F;
586 break;
587
588 default:
589 _mesa_problem(ctx, "bad uniform src register file: %s\n",
590 _mesa_register_file_name((gl_register_file)src.File));
591 return src_reg(this, glsl_type::vec4_type);
592 }
593 break;
594
595 default:
596 _mesa_problem(ctx, "bad src register file: %s\n",
597 _mesa_register_file_name((gl_register_file)src.File));
598 return src_reg(this, glsl_type::vec4_type);
599 }
600
601 if (src.Swizzle != SWIZZLE_NOOP || src.Negate) {
602 unsigned short zeros_mask = 0;
603 unsigned short ones_mask = 0;
604 unsigned short src_mask = 0;
605 unsigned short src_swiz[4];
606
607 for (int i = 0; i < 4; i++) {
608 src_swiz[i] = 0; /* initialize for safety */
609
610 /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
611 * but it's simplest to handle it here.
612 */
613 int s = GET_SWZ(src.Swizzle, i);
614 switch (s) {
615 case SWIZZLE_X:
616 case SWIZZLE_Y:
617 case SWIZZLE_Z:
618 case SWIZZLE_W:
619 src_mask |= 1 << i;
620 src_swiz[i] = s;
621 break;
622 case SWIZZLE_ZERO:
623 zeros_mask |= 1 << i;
624 break;
625 case SWIZZLE_ONE:
626 ones_mask |= 1 << i;
627 break;
628 }
629 }
630
631 result.swizzle =
632 BRW_SWIZZLE4(src_swiz[0], src_swiz[1], src_swiz[2], src_swiz[3]);
633
634 /* The hardware doesn't natively handle the SWZ instruction's zero/one
635 * swizzles or per-component negation, so we need to use a temporary.
636 */
637 if (zeros_mask || ones_mask || src.Negate) {
638 src_reg temp_src(this, glsl_type::vec4_type);
639 dst_reg temp(temp_src);
640
641 if (src_mask) {
642 temp.writemask = src_mask;
643 emit(MOV(temp, result));
644 }
645
646 if (zeros_mask) {
647 temp.writemask = zeros_mask;
648 emit(MOV(temp, src_reg(0.0f)));
649 }
650
651 if (ones_mask) {
652 temp.writemask = ones_mask;
653 emit(MOV(temp, src_reg(1.0f)));
654 }
655
656 if (src.Negate) {
657 temp.writemask = src.Negate;
658 src_reg neg(temp_src);
659 neg.negate = true;
660 emit(MOV(temp, neg));
661 }
662 result = temp_src;
663 }
664 }
665
666 return result;
667 }