d2dc2536be281f5d9b330cb265edaa18996c9be7
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_vp.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_vec4_vp.cpp
25 *
26 * A translator from Mesa IR to the i965 driver's Vec4 IR, used to implement
27 * ARB_vertex_program and fixed-function vertex processing.
28 */
29
30 #include "brw_context.h"
31 #include "brw_vec4.h"
32 #include "brw_vs.h"
33 extern "C" {
34 #include "program/prog_parameter.h"
35 #include "program/prog_print.h"
36 }
37 using namespace brw;
38
39 void
40 vec4_visitor::emit_vp_sop(uint32_t conditional_mod,
41 dst_reg dst, src_reg src0, src_reg src1,
42 src_reg one)
43 {
44 vec4_instruction *inst;
45
46 inst = emit(BRW_OPCODE_CMP, dst_null_d(), src0, src1);
47 inst->conditional_mod = conditional_mod;
48
49 inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f));
50 inst->predicate = BRW_PREDICATE_NORMAL;
51 }
52
53 /**
54 * Reswizzle a given source register.
55 * \sa brw_swizzle().
56 */
57 static inline src_reg
58 reswizzle(src_reg orig, unsigned x, unsigned y, unsigned z, unsigned w)
59 {
60 src_reg t = orig;
61 t.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(orig.swizzle, x),
62 BRW_GET_SWZ(orig.swizzle, y),
63 BRW_GET_SWZ(orig.swizzle, z),
64 BRW_GET_SWZ(orig.swizzle, w));
65 return t;
66 }
67
68 void
69 vec4_vs_visitor::emit_program_code()
70 {
71 this->need_all_constants_in_pull_buffer = false;
72
73 setup_vp_regs();
74
75 /* Keep a reg with 1.0 around, for reuse by emit_vs_sop so that it can just
76 * be:
77 *
78 * sel.f0 dst 1.0 0.0
79 *
80 * instead of
81 *
82 * mov dst 0.0
83 * mov.f0 dst 1.0
84 */
85 src_reg one = src_reg(this, glsl_type::float_type);
86 emit(MOV(dst_reg(one), src_reg(1.0f)));
87
88 for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
89 const struct prog_instruction *vpi = &prog->Instructions[insn];
90 base_ir = vpi;
91
92 dst_reg dst;
93 src_reg src[3];
94
95 /* We always emit into a temporary destination register to avoid
96 * aliasing issues.
97 */
98 dst = dst_reg(this, glsl_type::vec4_type);
99
100 for (int i = 0; i < 3; i++)
101 src[i] = get_vp_src_reg(vpi->SrcReg[i]);
102
103 switch (vpi->Opcode) {
104 case OPCODE_ABS:
105 src[0].abs = true;
106 src[0].negate = false;
107 emit(MOV(dst, src[0]));
108 break;
109
110 case OPCODE_ADD:
111 emit(ADD(dst, src[0], src[1]));
112 break;
113
114 case OPCODE_ARL:
115 if (brw->gen >= 6) {
116 dst.writemask = WRITEMASK_X;
117 dst_reg dst_f = dst;
118 dst_f.type = BRW_REGISTER_TYPE_F;
119
120 emit(RNDD(dst_f, src[0]));
121 emit(MOV(dst, src_reg(dst_f)));
122 } else {
123 emit(RNDD(dst, src[0]));
124 }
125 break;
126
127 case OPCODE_DP3:
128 emit(DP3(dst, src[0], src[1]));
129 break;
130 case OPCODE_DP4:
131 emit(DP4(dst, src[0], src[1]));
132 break;
133 case OPCODE_DPH:
134 emit(DPH(dst, src[0], src[1]));
135 break;
136
137 case OPCODE_DST: {
138 dst_reg t = dst;
139 if (vpi->DstReg.WriteMask & WRITEMASK_X) {
140 t.writemask = WRITEMASK_X;
141 emit(MOV(t, src_reg(1.0f)));
142 }
143 if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
144 t.writemask = WRITEMASK_Y;
145 emit(MUL(t, src[0], src[1]));
146 }
147 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
148 t.writemask = WRITEMASK_Z;
149 emit(MOV(t, src[0]));
150 }
151 if (vpi->DstReg.WriteMask & WRITEMASK_W) {
152 t.writemask = WRITEMASK_W;
153 emit(MOV(t, src[1]));
154 }
155 break;
156 }
157
158 case OPCODE_EXP: {
159 dst_reg result = dst;
160 if (vpi->DstReg.WriteMask & WRITEMASK_X) {
161 /* tmp_d = floor(src[0].x) */
162 src_reg tmp_d = src_reg(this, glsl_type::ivec4_type);
163 assert(tmp_d.type == BRW_REGISTER_TYPE_D);
164 emit(RNDD(dst_reg(tmp_d), reswizzle(src[0], 0, 0, 0, 0)));
165
166 /* result[0] = 2.0 ^ tmp */
167 /* Adjust exponent for floating point: exp += 127 */
168 dst_reg tmp_d_x(GRF, tmp_d.reg, glsl_type::int_type, WRITEMASK_X);
169 emit(ADD(tmp_d_x, tmp_d, src_reg(127)));
170
171 /* Install exponent and sign. Excess drops off the edge: */
172 dst_reg res_d_x(GRF, result.reg, glsl_type::int_type, WRITEMASK_X);
173 emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23));
174 }
175 if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
176 result.writemask = WRITEMASK_Y;
177 emit(FRC(result, src[0]));
178 }
179 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
180 result.writemask = WRITEMASK_Z;
181 emit_math(SHADER_OPCODE_EXP2, result, src[0]);
182 }
183 if (vpi->DstReg.WriteMask & WRITEMASK_W) {
184 result.writemask = WRITEMASK_W;
185 emit(MOV(result, src_reg(1.0f)));
186 }
187 break;
188 }
189
190 case OPCODE_EX2:
191 emit_math(SHADER_OPCODE_EXP2, dst, src[0]);
192 break;
193
194 case OPCODE_FLR:
195 emit(RNDD(dst, src[0]));
196 break;
197
198 case OPCODE_FRC:
199 emit(FRC(dst, src[0]));
200 break;
201
202 case OPCODE_LG2:
203 emit_math(SHADER_OPCODE_LOG2, dst, src[0]);
204 break;
205
206 case OPCODE_LIT: {
207 dst_reg result = dst;
208 /* From the ARB_vertex_program spec:
209 *
210 * tmp = VectorLoad(op0);
211 * if (tmp.x < 0) tmp.x = 0;
212 * if (tmp.y < 0) tmp.y = 0;
213 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
214 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
215 * result.x = 1.0;
216 * result.y = tmp.x;
217 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
218 * result.w = 1.0;
219 *
220 * Note that we don't do the clamping to +/- 128. We didn't in
221 * brw_vs_emit.c either.
222 */
223 if (vpi->DstReg.WriteMask & WRITEMASK_XW) {
224 result.writemask = WRITEMASK_XW;
225 emit(MOV(result, src_reg(1.0f)));
226 }
227 if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
228 result.writemask = WRITEMASK_YZ;
229 emit(MOV(result, src_reg(0.0f)));
230
231 src_reg tmp_x = reswizzle(src[0], 0, 0, 0, 0);
232
233 emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G));
234 emit(IF(BRW_PREDICATE_NORMAL));
235
236 if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
237 result.writemask = WRITEMASK_Y;
238 emit(MOV(result, tmp_x));
239 }
240
241 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
242 /* if (tmp.y < 0) tmp.y = 0; */
243 src_reg tmp_y = reswizzle(src[0], 1, 1, 1, 1);
244 result.writemask = WRITEMASK_Z;
245 emit_minmax(BRW_CONDITIONAL_G, result, tmp_y, src_reg(0.0f));
246
247 src_reg clamped_y(result);
248 clamped_y.swizzle = BRW_SWIZZLE_ZZZZ;
249
250 src_reg tmp_w = reswizzle(src[0], 3, 3, 3, 3);
251
252 emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
253 }
254 emit(BRW_OPCODE_ENDIF);
255 }
256 break;
257 }
258
259 case OPCODE_LOG: {
260 dst_reg result = dst;
261 result.type = BRW_REGISTER_TYPE_UD;
262 src_reg result_src = src_reg(result);
263
264 src_reg arg0_ud = reswizzle(src[0], 0, 0, 0, 0);
265 arg0_ud.type = BRW_REGISTER_TYPE_UD;
266
267 /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
268 * according to spec:
269 *
270 * These almost look likey they could be joined up, but not really
271 * practical:
272 *
273 * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
274 * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
275 */
276 if (vpi->DstReg.WriteMask & WRITEMASK_XZ) {
277 result.writemask = WRITEMASK_X;
278 emit(AND(result, arg0_ud, src_reg((1u << 31) - 1)));
279 emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u));
280 src_reg result_d(result_src);
281 result_d.type = BRW_REGISTER_TYPE_D; /* does it matter? */
282 result.type = BRW_REGISTER_TYPE_F;
283 emit(ADD(result, result_d, src_reg(-127)));
284 }
285
286 if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
287 result.writemask = WRITEMASK_Y;
288 result.type = BRW_REGISTER_TYPE_UD;
289 emit(AND(result, arg0_ud, src_reg((1u << 23) - 1)));
290 emit(OR(result, result_src, src_reg(127u << 23)));
291 }
292
293 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
294 /* result[2] = result[0] + LOG2(result[1]); */
295
296 /* Why bother? The above is just a hint how to do this with a
297 * taylor series. Maybe we *should* use a taylor series as by
298 * the time all the above has been done it's almost certainly
299 * quicker than calling the mathbox, even with low precision.
300 *
301 * Options are:
302 * - result[0] + mathbox.LOG2(result[1])
303 * - mathbox.LOG2(arg0.x)
304 * - result[0] + inline_taylor_approx(result[1])
305 */
306 result.type = BRW_REGISTER_TYPE_F;
307 result.writemask = WRITEMASK_Z;
308 src_reg result_x(result), result_y(result), result_z(result);
309 result_x.swizzle = BRW_SWIZZLE_XXXX;
310 result_y.swizzle = BRW_SWIZZLE_YYYY;
311 result_z.swizzle = BRW_SWIZZLE_ZZZZ;
312 emit_math(SHADER_OPCODE_LOG2, result, result_y);
313 emit(ADD(result, result_z, result_x));
314 }
315
316 if (vpi->DstReg.WriteMask & WRITEMASK_W) {
317 result.type = BRW_REGISTER_TYPE_F;
318 result.writemask = WRITEMASK_W;
319 emit(MOV(result, src_reg(1.0f)));
320 }
321 break;
322 }
323
324 case OPCODE_MAD: {
325 src_reg temp = src_reg(this, glsl_type::vec4_type);
326 emit(MUL(dst_reg(temp), src[0], src[1]));
327 emit(ADD(dst, temp, src[2]));
328 break;
329 }
330
331 case OPCODE_MAX:
332 emit_minmax(BRW_CONDITIONAL_G, dst, src[0], src[1]);
333 break;
334
335 case OPCODE_MIN:
336 emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]);
337 break;
338
339 case OPCODE_MOV:
340 emit(MOV(dst, src[0]));
341 break;
342
343 case OPCODE_MUL:
344 emit(MUL(dst, src[0], src[1]));
345 break;
346
347 case OPCODE_POW:
348 emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]);
349 break;
350
351 case OPCODE_RCP:
352 emit_math(SHADER_OPCODE_RCP, dst, src[0]);
353 break;
354
355 case OPCODE_RSQ:
356 emit_math(SHADER_OPCODE_RSQ, dst, src[0]);
357 break;
358
359 case OPCODE_SGE:
360 emit_vp_sop(BRW_CONDITIONAL_GE, dst, src[0], src[1], one);
361 break;
362
363 case OPCODE_SLT:
364 emit_vp_sop(BRW_CONDITIONAL_L, dst, src[0], src[1], one);
365 break;
366
367 case OPCODE_SUB: {
368 src_reg neg_src1 = src[1];
369 neg_src1.negate = !src[1].negate;
370 emit(ADD(dst, src[0], neg_src1));
371 break;
372 }
373
374 case OPCODE_SWZ:
375 /* Note that SWZ's extended swizzles are handled in the general
376 * get_src_reg() code.
377 */
378 emit(MOV(dst, src[0]));
379 break;
380
381 case OPCODE_XPD: {
382 src_reg t1 = src_reg(this, glsl_type::vec4_type);
383 src_reg t2 = src_reg(this, glsl_type::vec4_type);
384
385 emit(MUL(dst_reg(t1),
386 reswizzle(src[0], 1, 2, 0, 3),
387 reswizzle(src[1], 2, 0, 1, 3)));
388 emit(MUL(dst_reg(t2),
389 reswizzle(src[0], 2, 0, 1, 3),
390 reswizzle(src[1], 1, 2, 0, 3)));
391 t2.negate = true;
392 emit(ADD(dst, t1, t2));
393 break;
394 }
395
396 case OPCODE_END:
397 break;
398
399 default:
400 _mesa_problem(ctx, "Unsupported opcode %s in vertex program\n",
401 _mesa_opcode_string(vpi->Opcode));
402 }
403
404 /* Copy the temporary back into the actual destination register. */
405 if (vpi->Opcode != OPCODE_END) {
406 emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst)));
407 }
408 }
409
410 /* If we used relative addressing, we need to upload all constants as
411 * pull constants. Do that now.
412 */
413 if (this->need_all_constants_in_pull_buffer) {
414 const struct gl_program_parameter_list *params =
415 vs_compile->vp->program.Base.Parameters;
416 unsigned i;
417 for (i = 0; i < params->NumParameters * 4; i++) {
418 prog_data->pull_param[i] =
419 &params->ParameterValues[i / 4][i % 4].f;
420 }
421 prog_data->nr_pull_params = i;
422 }
423 }
424
425 void
426 vec4_vs_visitor::setup_vp_regs()
427 {
428 /* PROGRAM_TEMPORARY */
429 int num_temp = prog->NumTemporaries;
430 vp_temp_regs = rzalloc_array(mem_ctx, src_reg, num_temp);
431 for (int i = 0; i < num_temp; i++)
432 vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type);
433
434 /* PROGRAM_STATE_VAR etc. */
435 struct gl_program_parameter_list *plist =
436 vs_compile->vp->program.Base.Parameters;
437 for (unsigned p = 0; p < plist->NumParameters; p++) {
438 unsigned components = plist->Parameters[p].Size;
439
440 /* Parameters should be either vec4 uniforms or single component
441 * constants; matrices and other larger types should have been broken
442 * down earlier.
443 */
444 assert(components <= 4);
445
446 this->uniform_size[this->uniforms] = 1; /* 1 vec4 */
447 this->uniform_vector_size[this->uniforms] = components;
448 for (unsigned i = 0; i < 4; i++) {
449 prog_data->param[this->uniforms * 4 + i] = i >= components
450 ? 0 : &plist->ParameterValues[p][i].f;
451 }
452 this->uniforms++; /* counted in vec4 units */
453 }
454
455 /* PROGRAM_OUTPUT */
456 for (int slot = 0; slot < prog_data->vue_map.num_slots; slot++) {
457 int varying = prog_data->vue_map.slot_to_varying[slot];
458 if (varying == VARYING_SLOT_PSIZ)
459 output_reg[varying] = dst_reg(this, glsl_type::float_type);
460 else
461 output_reg[varying] = dst_reg(this, glsl_type::vec4_type);
462 assert(output_reg[varying].type == BRW_REGISTER_TYPE_F);
463 }
464
465 /* PROGRAM_ADDRESS */
466 this->vp_addr_reg = src_reg(this, glsl_type::int_type);
467 assert(this->vp_addr_reg.type == BRW_REGISTER_TYPE_D);
468 }
469
470 dst_reg
471 vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst)
472 {
473 dst_reg result;
474
475 assert(!dst.RelAddr);
476
477 switch (dst.File) {
478 case PROGRAM_TEMPORARY:
479 result = dst_reg(vp_temp_regs[dst.Index]);
480 break;
481
482 case PROGRAM_OUTPUT:
483 result = output_reg[dst.Index];
484 break;
485
486 case PROGRAM_ADDRESS: {
487 assert(dst.Index == 0);
488 result = dst_reg(this->vp_addr_reg);
489 break;
490 }
491
492 case PROGRAM_UNDEFINED:
493 return dst_null_f();
494
495 default:
496 assert("vec4_vp: bad destination register file");
497 return dst_reg(this, glsl_type::vec4_type);
498 }
499
500 result.writemask = dst.WriteMask;
501 return result;
502 }
503
504 src_reg
505 vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
506 {
507 struct gl_program_parameter_list *plist =
508 vs_compile->vp->program.Base.Parameters;
509
510 src_reg result;
511
512 assert(!src.Abs);
513
514 switch (src.File) {
515 case PROGRAM_UNDEFINED:
516 return src_reg(brw_null_reg());
517
518 case PROGRAM_TEMPORARY:
519 result = vp_temp_regs[src.Index];
520 break;
521
522 case PROGRAM_INPUT:
523 result = src_reg(ATTR, src.Index, glsl_type::vec4_type);
524 result.type = BRW_REGISTER_TYPE_F;
525 break;
526
527 case PROGRAM_ADDRESS: {
528 assert(src.Index == 0);
529 result = this->vp_addr_reg;
530 break;
531 }
532
533 case PROGRAM_STATE_VAR:
534 case PROGRAM_CONSTANT:
535 /* From the ARB_vertex_program specification:
536 * "Relative addressing can only be used for accessing program
537 * parameter arrays."
538 */
539 if (src.RelAddr) {
540 /* Since we have no idea what the base of the array is, we need to
541 * upload ALL constants as push constants.
542 */
543 this->need_all_constants_in_pull_buffer = true;
544
545 /* Add the small constant index to the address register */
546 src_reg reladdr = src_reg(this, glsl_type::int_type);
547 dst_reg dst_reladdr = dst_reg(reladdr);
548 dst_reladdr.writemask = WRITEMASK_X;
549 emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
550
551 if (brw->gen < 6)
552 emit(MUL(dst_reladdr, reladdr, src_reg(16)));
553
554 #if 0
555 assert(src.Index < this->uniforms);
556 result = src_reg(dst_reg(UNIFORM, 0));
557 result.type = BRW_REGISTER_TYPE_F;
558 result.reladdr = new(mem_ctx) src_reg();
559 memcpy(result.reladdr, &reladdr, sizeof(src_reg));
560 #endif
561
562 result = src_reg(this, glsl_type::vec4_type);
563 src_reg surf_index = src_reg(unsigned(SURF_INDEX_VEC4_CONST_BUFFER));
564 vec4_instruction *load =
565 new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
566 dst_reg(result), surf_index, reladdr);
567 load->base_mrf = 14;
568 load->mlen = 1;
569 emit(load);
570 break;
571 }
572
573 /* We actually want to look at the type in the Parameters list for this,
574 * because this lets us upload constant builtin uniforms as actual
575 * constants.
576 */
577 switch (plist->Parameters[src.Index].Type) {
578 case PROGRAM_CONSTANT:
579 result = src_reg(this, glsl_type::vec4_type);
580 for (int i = 0; i < 4; i++) {
581 dst_reg t = dst_reg(result);
582 t.writemask = 1 << i;
583 emit(MOV(t, src_reg(plist->ParameterValues[src.Index][i].f)));
584 }
585 break;
586
587 case PROGRAM_STATE_VAR:
588 assert(src.Index < this->uniforms);
589 result = src_reg(dst_reg(UNIFORM, src.Index));
590 result.type = BRW_REGISTER_TYPE_F;
591 break;
592
593 default:
594 _mesa_problem(ctx, "bad uniform src register file: %s\n",
595 _mesa_register_file_name((gl_register_file)src.File));
596 return src_reg(this, glsl_type::vec4_type);
597 }
598 break;
599
600 default:
601 _mesa_problem(ctx, "bad src register file: %s\n",
602 _mesa_register_file_name((gl_register_file)src.File));
603 return src_reg(this, glsl_type::vec4_type);
604 }
605
606 if (src.Swizzle != SWIZZLE_NOOP || src.Negate) {
607 unsigned short zeros_mask = 0;
608 unsigned short ones_mask = 0;
609 unsigned short src_mask = 0;
610 unsigned short src_swiz[4];
611
612 for (int i = 0; i < 4; i++) {
613 src_swiz[i] = 0; /* initialize for safety */
614
615 /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
616 * but it's simplest to handle it here.
617 */
618 int s = GET_SWZ(src.Swizzle, i);
619 switch (s) {
620 case SWIZZLE_X:
621 case SWIZZLE_Y:
622 case SWIZZLE_Z:
623 case SWIZZLE_W:
624 src_mask |= 1 << i;
625 src_swiz[i] = s;
626 break;
627 case SWIZZLE_ZERO:
628 zeros_mask |= 1 << i;
629 break;
630 case SWIZZLE_ONE:
631 ones_mask |= 1 << i;
632 break;
633 }
634 }
635
636 result.swizzle =
637 BRW_SWIZZLE4(src_swiz[0], src_swiz[1], src_swiz[2], src_swiz[3]);
638
639 /* The hardware doesn't natively handle the SWZ instruction's zero/one
640 * swizzles or per-component negation, so we need to use a temporary.
641 */
642 if (zeros_mask || ones_mask || src.Negate) {
643 src_reg temp_src(this, glsl_type::vec4_type);
644 dst_reg temp(temp_src);
645
646 if (src_mask) {
647 temp.writemask = src_mask;
648 emit(MOV(temp, result));
649 }
650
651 if (zeros_mask) {
652 temp.writemask = zeros_mask;
653 emit(MOV(temp, src_reg(0.0f)));
654 }
655
656 if (ones_mask) {
657 temp.writemask = ones_mask;
658 emit(MOV(temp, src_reg(1.0f)));
659 }
660
661 if (src.Negate) {
662 temp.writemask = src.Negate;
663 src_reg neg(temp_src);
664 neg.negate = true;
665 emit(MOV(temp, neg));
666 }
667 result = temp_src;
668 }
669 }
670
671 return result;
672 }