i965/vec4: Simplify opt_reduce_swizzle() using the swizzle utils.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_vp.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_vec4_vp.cpp
25 *
26 * A translator from Mesa IR to the i965 driver's Vec4 IR, used to implement
27 * ARB_vertex_program and fixed-function vertex processing.
28 */
29
30 #include "brw_context.h"
31 #include "brw_vec4.h"
32 #include "brw_vs.h"
33 extern "C" {
34 #include "program/prog_parameter.h"
35 #include "program/prog_print.h"
36 }
37 using namespace brw;
38
39 void
40 vec4_visitor::emit_vp_sop(enum brw_conditional_mod conditional_mod,
41 dst_reg dst, src_reg src0, src_reg src1,
42 src_reg one)
43 {
44 vec4_instruction *inst;
45
46 inst = emit(CMP(dst_null_f(), src0, src1, conditional_mod));
47
48 inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f));
49 inst->predicate = BRW_PREDICATE_NORMAL;
50 }
51
52 void
53 vec4_vs_visitor::emit_program_code()
54 {
55 this->need_all_constants_in_pull_buffer = false;
56
57 setup_vp_regs();
58
59 /* Keep a reg with 1.0 around, for reuse by emit_vs_sop so that it can just
60 * be:
61 *
62 * sel.f0 dst 1.0 0.0
63 *
64 * instead of
65 *
66 * mov dst 0.0
67 * mov.f0 dst 1.0
68 */
69 src_reg one = src_reg(this, glsl_type::float_type);
70 emit(MOV(dst_reg(one), src_reg(1.0f)));
71
72 for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
73 const struct prog_instruction *vpi = &prog->Instructions[insn];
74 base_ir = vpi;
75
76 dst_reg dst;
77 src_reg src[3];
78
79 /* We always emit into a temporary destination register to avoid
80 * aliasing issues.
81 */
82 dst = dst_reg(this, glsl_type::vec4_type);
83
84 for (int i = 0; i < 3; i++)
85 src[i] = get_vp_src_reg(vpi->SrcReg[i]);
86
87 switch (vpi->Opcode) {
88 case OPCODE_ABS:
89 src[0].abs = true;
90 src[0].negate = false;
91 emit(MOV(dst, src[0]));
92 break;
93
94 case OPCODE_ADD:
95 emit(ADD(dst, src[0], src[1]));
96 break;
97
98 case OPCODE_ARL:
99 if (brw->gen >= 6) {
100 dst.writemask = WRITEMASK_X;
101 dst_reg dst_f = dst;
102 dst_f.type = BRW_REGISTER_TYPE_F;
103
104 emit(RNDD(dst_f, src[0]));
105 emit(MOV(dst, src_reg(dst_f)));
106 } else {
107 emit(RNDD(dst, src[0]));
108 }
109 break;
110
111 case OPCODE_DP3:
112 emit(DP3(dst, src[0], src[1]));
113 break;
114 case OPCODE_DP4:
115 emit(DP4(dst, src[0], src[1]));
116 break;
117 case OPCODE_DPH:
118 emit(DPH(dst, src[0], src[1]));
119 break;
120
121 case OPCODE_DST: {
122 dst_reg t = dst;
123 if (vpi->DstReg.WriteMask & WRITEMASK_X) {
124 t.writemask = WRITEMASK_X;
125 emit(MOV(t, src_reg(1.0f)));
126 }
127 if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
128 t.writemask = WRITEMASK_Y;
129 emit(MUL(t, src[0], src[1]));
130 }
131 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
132 t.writemask = WRITEMASK_Z;
133 emit(MOV(t, src[0]));
134 }
135 if (vpi->DstReg.WriteMask & WRITEMASK_W) {
136 t.writemask = WRITEMASK_W;
137 emit(MOV(t, src[1]));
138 }
139 break;
140 }
141
142 case OPCODE_EXP: {
143 dst_reg result = dst;
144 if (vpi->DstReg.WriteMask & WRITEMASK_X) {
145 /* tmp_d = floor(src[0].x) */
146 src_reg tmp_d = src_reg(this, glsl_type::ivec4_type);
147 assert(tmp_d.type == BRW_REGISTER_TYPE_D);
148 emit(RNDD(dst_reg(tmp_d), swizzle(src[0], BRW_SWIZZLE_XXXX)));
149
150 /* result[0] = 2.0 ^ tmp */
151 /* Adjust exponent for floating point: exp += 127 */
152 dst_reg tmp_d_x(GRF, tmp_d.reg, glsl_type::int_type, WRITEMASK_X);
153 emit(ADD(tmp_d_x, tmp_d, src_reg(127)));
154
155 /* Install exponent and sign. Excess drops off the edge: */
156 dst_reg res_d_x(GRF, result.reg, glsl_type::int_type, WRITEMASK_X);
157 emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23));
158 }
159 if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
160 result.writemask = WRITEMASK_Y;
161 emit(FRC(result, src[0]));
162 }
163 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
164 result.writemask = WRITEMASK_Z;
165 emit_math(SHADER_OPCODE_EXP2, result, src[0]);
166 }
167 if (vpi->DstReg.WriteMask & WRITEMASK_W) {
168 result.writemask = WRITEMASK_W;
169 emit(MOV(result, src_reg(1.0f)));
170 }
171 break;
172 }
173
174 case OPCODE_EX2:
175 emit_math(SHADER_OPCODE_EXP2, dst, src[0]);
176 break;
177
178 case OPCODE_FLR:
179 emit(RNDD(dst, src[0]));
180 break;
181
182 case OPCODE_FRC:
183 emit(FRC(dst, src[0]));
184 break;
185
186 case OPCODE_LG2:
187 emit_math(SHADER_OPCODE_LOG2, dst, src[0]);
188 break;
189
190 case OPCODE_LIT: {
191 dst_reg result = dst;
192 /* From the ARB_vertex_program spec:
193 *
194 * tmp = VectorLoad(op0);
195 * if (tmp.x < 0) tmp.x = 0;
196 * if (tmp.y < 0) tmp.y = 0;
197 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
198 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
199 * result.x = 1.0;
200 * result.y = tmp.x;
201 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
202 * result.w = 1.0;
203 *
204 * Note that we don't do the clamping to +/- 128. We didn't in
205 * brw_vs_emit.c either.
206 */
207 if (vpi->DstReg.WriteMask & WRITEMASK_XW) {
208 result.writemask = WRITEMASK_XW;
209 emit(MOV(result, src_reg(1.0f)));
210 }
211 if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
212 result.writemask = WRITEMASK_YZ;
213 emit(MOV(result, src_reg(0.0f)));
214
215 src_reg tmp_x = swizzle(src[0], BRW_SWIZZLE_XXXX);
216
217 emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G));
218 emit(IF(BRW_PREDICATE_NORMAL));
219
220 if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
221 result.writemask = WRITEMASK_Y;
222 emit(MOV(result, tmp_x));
223 }
224
225 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
226 /* if (tmp.y < 0) tmp.y = 0; */
227 src_reg tmp_y = swizzle(src[0], BRW_SWIZZLE_YYYY);
228 result.writemask = WRITEMASK_Z;
229 emit_minmax(BRW_CONDITIONAL_GE, result, tmp_y, src_reg(0.0f));
230
231 src_reg clamped_y(result);
232 clamped_y.swizzle = BRW_SWIZZLE_ZZZZ;
233
234 src_reg tmp_w = swizzle(src[0], BRW_SWIZZLE_WWWW);
235
236 emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
237 }
238 emit(BRW_OPCODE_ENDIF);
239 }
240 break;
241 }
242
243 case OPCODE_LOG: {
244 dst_reg result = dst;
245 result.type = BRW_REGISTER_TYPE_UD;
246 src_reg result_src = src_reg(result);
247
248 src_reg arg0_ud = swizzle(src[0], BRW_SWIZZLE_XXXX);
249 arg0_ud.type = BRW_REGISTER_TYPE_UD;
250
251 /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
252 * according to spec:
253 *
254 * These almost look likey they could be joined up, but not really
255 * practical:
256 *
257 * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
258 * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
259 */
260 if (vpi->DstReg.WriteMask & WRITEMASK_XZ) {
261 result.writemask = WRITEMASK_X;
262 emit(AND(result, arg0_ud, src_reg((1u << 31) - 1)));
263 emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u));
264 src_reg result_d(result_src);
265 result_d.type = BRW_REGISTER_TYPE_D; /* does it matter? */
266 result.type = BRW_REGISTER_TYPE_F;
267 emit(ADD(result, result_d, src_reg(-127)));
268 }
269
270 if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
271 result.writemask = WRITEMASK_Y;
272 result.type = BRW_REGISTER_TYPE_UD;
273 emit(AND(result, arg0_ud, src_reg((1u << 23) - 1)));
274 emit(OR(result, result_src, src_reg(127u << 23)));
275 }
276
277 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
278 /* result[2] = result[0] + LOG2(result[1]); */
279
280 /* Why bother? The above is just a hint how to do this with a
281 * taylor series. Maybe we *should* use a taylor series as by
282 * the time all the above has been done it's almost certainly
283 * quicker than calling the mathbox, even with low precision.
284 *
285 * Options are:
286 * - result[0] + mathbox.LOG2(result[1])
287 * - mathbox.LOG2(arg0.x)
288 * - result[0] + inline_taylor_approx(result[1])
289 */
290 result.type = BRW_REGISTER_TYPE_F;
291 result.writemask = WRITEMASK_Z;
292 src_reg result_x(result), result_y(result), result_z(result);
293 result_x.swizzle = BRW_SWIZZLE_XXXX;
294 result_y.swizzle = BRW_SWIZZLE_YYYY;
295 result_z.swizzle = BRW_SWIZZLE_ZZZZ;
296 emit_math(SHADER_OPCODE_LOG2, result, result_y);
297 emit(ADD(result, result_z, result_x));
298 }
299
300 if (vpi->DstReg.WriteMask & WRITEMASK_W) {
301 result.type = BRW_REGISTER_TYPE_F;
302 result.writemask = WRITEMASK_W;
303 emit(MOV(result, src_reg(1.0f)));
304 }
305 break;
306 }
307
308 case OPCODE_MAD: {
309 src_reg temp = src_reg(this, glsl_type::vec4_type);
310 emit(MUL(dst_reg(temp), src[0], src[1]));
311 emit(ADD(dst, temp, src[2]));
312 break;
313 }
314
315 case OPCODE_MAX:
316 emit_minmax(BRW_CONDITIONAL_GE, dst, src[0], src[1]);
317 break;
318
319 case OPCODE_MIN:
320 emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]);
321 break;
322
323 case OPCODE_MOV:
324 emit(MOV(dst, src[0]));
325 break;
326
327 case OPCODE_MUL:
328 emit(MUL(dst, src[0], src[1]));
329 break;
330
331 case OPCODE_POW:
332 emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]);
333 break;
334
335 case OPCODE_RCP:
336 emit_math(SHADER_OPCODE_RCP, dst, src[0]);
337 break;
338
339 case OPCODE_RSQ:
340 emit_math(SHADER_OPCODE_RSQ, dst, src[0]);
341 break;
342
343 case OPCODE_SGE:
344 emit_vp_sop(BRW_CONDITIONAL_GE, dst, src[0], src[1], one);
345 break;
346
347 case OPCODE_SLT:
348 emit_vp_sop(BRW_CONDITIONAL_L, dst, src[0], src[1], one);
349 break;
350
351 case OPCODE_SUB: {
352 src_reg neg_src1 = src[1];
353 neg_src1.negate = !src[1].negate;
354 emit(ADD(dst, src[0], neg_src1));
355 break;
356 }
357
358 case OPCODE_SWZ:
359 /* Note that SWZ's extended swizzles are handled in the general
360 * get_src_reg() code.
361 */
362 emit(MOV(dst, src[0]));
363 break;
364
365 case OPCODE_XPD: {
366 src_reg t1 = src_reg(this, glsl_type::vec4_type);
367 src_reg t2 = src_reg(this, glsl_type::vec4_type);
368
369 emit(MUL(dst_reg(t1),
370 swizzle(src[0], BRW_SWIZZLE_YZXW),
371 swizzle(src[1], BRW_SWIZZLE_ZXYW)));
372 emit(MUL(dst_reg(t2),
373 swizzle(src[0], BRW_SWIZZLE_ZXYW),
374 swizzle(src[1], BRW_SWIZZLE_YZXW)));
375 t2.negate = true;
376 emit(ADD(dst, t1, t2));
377 break;
378 }
379
380 case OPCODE_END:
381 break;
382
383 default:
384 _mesa_problem(ctx, "Unsupported opcode %s in vertex program\n",
385 _mesa_opcode_string(vpi->Opcode));
386 }
387
388 /* Copy the temporary back into the actual destination register. */
389 if (_mesa_num_inst_dst_regs(vpi->Opcode) != 0) {
390 emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst)));
391 }
392 }
393
394 /* If we used relative addressing, we need to upload all constants as
395 * pull constants. Do that now.
396 */
397 if (this->need_all_constants_in_pull_buffer) {
398 const struct gl_program_parameter_list *params =
399 vs_compile->vp->program.Base.Parameters;
400 unsigned i;
401 for (i = 0; i < params->NumParameters * 4; i++) {
402 stage_prog_data->pull_param[i] =
403 &params->ParameterValues[i / 4][i % 4];
404 }
405 stage_prog_data->nr_pull_params = i;
406 }
407 }
408
409 void
410 vec4_vs_visitor::setup_vp_regs()
411 {
412 /* PROGRAM_TEMPORARY */
413 int num_temp = prog->NumTemporaries;
414 vp_temp_regs = rzalloc_array(mem_ctx, src_reg, num_temp);
415 for (int i = 0; i < num_temp; i++)
416 vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type);
417
418 /* PROGRAM_STATE_VAR etc. */
419 struct gl_program_parameter_list *plist =
420 vs_compile->vp->program.Base.Parameters;
421 for (unsigned p = 0; p < plist->NumParameters; p++) {
422 unsigned components = plist->Parameters[p].Size;
423
424 /* Parameters should be either vec4 uniforms or single component
425 * constants; matrices and other larger types should have been broken
426 * down earlier.
427 */
428 assert(components <= 4);
429
430 this->uniform_size[this->uniforms] = 1; /* 1 vec4 */
431 this->uniform_vector_size[this->uniforms] = components;
432 for (unsigned i = 0; i < 4; i++) {
433 stage_prog_data->param[this->uniforms * 4 + i] = i >= components
434 ? 0 : &plist->ParameterValues[p][i];
435 }
436 this->uniforms++; /* counted in vec4 units */
437 }
438
439 /* PROGRAM_OUTPUT */
440 for (int slot = 0; slot < prog_data->vue_map.num_slots; slot++) {
441 int varying = prog_data->vue_map.slot_to_varying[slot];
442 if (varying == VARYING_SLOT_PSIZ)
443 output_reg[varying] = dst_reg(this, glsl_type::float_type);
444 else
445 output_reg[varying] = dst_reg(this, glsl_type::vec4_type);
446 assert(output_reg[varying].type == BRW_REGISTER_TYPE_F);
447 }
448
449 /* PROGRAM_ADDRESS */
450 this->vp_addr_reg = src_reg(this, glsl_type::int_type);
451 assert(this->vp_addr_reg.type == BRW_REGISTER_TYPE_D);
452 }
453
454 dst_reg
455 vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst)
456 {
457 dst_reg result;
458
459 assert(!dst.RelAddr);
460
461 switch (dst.File) {
462 case PROGRAM_TEMPORARY:
463 result = dst_reg(vp_temp_regs[dst.Index]);
464 break;
465
466 case PROGRAM_OUTPUT:
467 result = output_reg[dst.Index];
468 break;
469
470 case PROGRAM_ADDRESS: {
471 assert(dst.Index == 0);
472 result = dst_reg(this->vp_addr_reg);
473 break;
474 }
475
476 case PROGRAM_UNDEFINED:
477 return dst_null_f();
478
479 default:
480 unreachable("vec4_vp: bad destination register file");
481 }
482
483 result.writemask = dst.WriteMask;
484 return result;
485 }
486
487 src_reg
488 vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
489 {
490 struct gl_program_parameter_list *plist =
491 vs_compile->vp->program.Base.Parameters;
492
493 src_reg result;
494
495 assert(!src.Abs);
496
497 switch (src.File) {
498 case PROGRAM_UNDEFINED:
499 return src_reg(brw_null_reg());
500
501 case PROGRAM_TEMPORARY:
502 result = vp_temp_regs[src.Index];
503 break;
504
505 case PROGRAM_INPUT:
506 result = src_reg(ATTR, src.Index, glsl_type::vec4_type);
507 result.type = BRW_REGISTER_TYPE_F;
508 break;
509
510 case PROGRAM_ADDRESS: {
511 assert(src.Index == 0);
512 result = this->vp_addr_reg;
513 break;
514 }
515
516 case PROGRAM_STATE_VAR:
517 case PROGRAM_CONSTANT:
518 /* From the ARB_vertex_program specification:
519 * "Relative addressing can only be used for accessing program
520 * parameter arrays."
521 */
522 if (src.RelAddr) {
523 /* Since we have no idea what the base of the array is, we need to
524 * upload ALL constants as push constants.
525 */
526 this->need_all_constants_in_pull_buffer = true;
527
528 /* Add the small constant index to the address register */
529 src_reg reladdr = src_reg(this, glsl_type::int_type);
530
531 /* We have to use a message header on Skylake to get SIMD4x2 mode.
532 * Reserve space for the register.
533 */
534 if (brw->gen >= 9) {
535 reladdr.reg_offset++;
536 alloc.sizes[reladdr.reg] = 2;
537 }
538
539 dst_reg dst_reladdr = dst_reg(reladdr);
540 dst_reladdr.writemask = WRITEMASK_X;
541 emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
542
543 if (brw->gen < 6)
544 emit(MUL(dst_reladdr, reladdr, src_reg(16)));
545
546 #if 0
547 assert(src.Index < this->uniforms);
548 result = src_reg(dst_reg(UNIFORM, 0));
549 result.type = BRW_REGISTER_TYPE_F;
550 result.reladdr = new(mem_ctx) src_reg();
551 memcpy(result.reladdr, &reladdr, sizeof(src_reg));
552 #endif
553
554 result = src_reg(this, glsl_type::vec4_type);
555 src_reg surf_index = src_reg(unsigned(prog_data->base.binding_table.pull_constants_start));
556 vec4_instruction *load;
557 if (brw->gen >= 7) {
558 load = new(mem_ctx)
559 vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
560 dst_reg(result), surf_index, reladdr);
561 load->mlen = 1;
562 } else {
563 load = new(mem_ctx)
564 vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
565 dst_reg(result), surf_index, reladdr);
566 load->base_mrf = 14;
567 load->mlen = 1;
568 }
569 emit(load);
570 break;
571 }
572
573 /* We actually want to look at the type in the Parameters list for this,
574 * because this lets us upload constant builtin uniforms as actual
575 * constants.
576 */
577 switch (plist->Parameters[src.Index].Type) {
578 case PROGRAM_CONSTANT:
579 result = src_reg(this, glsl_type::vec4_type);
580 for (int i = 0; i < 4; i++) {
581 dst_reg t = dst_reg(result);
582 t.writemask = 1 << i;
583 emit(MOV(t, src_reg(plist->ParameterValues[src.Index][i].f)));
584 }
585 break;
586
587 case PROGRAM_STATE_VAR:
588 assert(src.Index < this->uniforms);
589 result = src_reg(dst_reg(UNIFORM, src.Index));
590 result.type = BRW_REGISTER_TYPE_F;
591 break;
592
593 default:
594 _mesa_problem(ctx, "bad uniform src register file: %s\n",
595 _mesa_register_file_name((gl_register_file)src.File));
596 return src_reg(this, glsl_type::vec4_type);
597 }
598 break;
599
600 default:
601 _mesa_problem(ctx, "bad src register file: %s\n",
602 _mesa_register_file_name((gl_register_file)src.File));
603 return src_reg(this, glsl_type::vec4_type);
604 }
605
606 if (src.Swizzle != SWIZZLE_NOOP || src.Negate) {
607 unsigned short zeros_mask = 0;
608 unsigned short ones_mask = 0;
609 unsigned short src_mask = 0;
610 unsigned short src_swiz[4];
611
612 for (int i = 0; i < 4; i++) {
613 src_swiz[i] = 0; /* initialize for safety */
614
615 /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
616 * but it's simplest to handle it here.
617 */
618 int s = GET_SWZ(src.Swizzle, i);
619 switch (s) {
620 case SWIZZLE_X:
621 case SWIZZLE_Y:
622 case SWIZZLE_Z:
623 case SWIZZLE_W:
624 src_mask |= 1 << i;
625 src_swiz[i] = s;
626 break;
627 case SWIZZLE_ZERO:
628 zeros_mask |= 1 << i;
629 break;
630 case SWIZZLE_ONE:
631 ones_mask |= 1 << i;
632 break;
633 }
634 }
635
636 result.swizzle =
637 BRW_SWIZZLE4(src_swiz[0], src_swiz[1], src_swiz[2], src_swiz[3]);
638
639 /* The hardware doesn't natively handle the SWZ instruction's zero/one
640 * swizzles or per-component negation, so we need to use a temporary.
641 */
642 if (zeros_mask || ones_mask || src.Negate) {
643 src_reg temp_src(this, glsl_type::vec4_type);
644 dst_reg temp(temp_src);
645
646 if (src_mask) {
647 temp.writemask = src_mask;
648 emit(MOV(temp, result));
649 }
650
651 if (zeros_mask) {
652 temp.writemask = zeros_mask;
653 emit(MOV(temp, src_reg(0.0f)));
654 }
655
656 if (ones_mask) {
657 temp.writemask = ones_mask;
658 emit(MOV(temp, src_reg(1.0f)));
659 }
660
661 if (src.Negate) {
662 temp.writemask = src.Negate;
663 src_reg neg(temp_src);
664 neg.negate = true;
665 emit(MOV(temp, neg));
666 }
667 result = temp_src;
668 }
669 }
670
671 return result;
672 }