i965/vp: Use the sampler for pull constant loads on Gen7/7.5.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_vp.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_vec4_vp.cpp
25 *
26 * A translator from Mesa IR to the i965 driver's Vec4 IR, used to implement
27 * ARB_vertex_program and fixed-function vertex processing.
28 */
29
30 #include "brw_context.h"
31 #include "brw_vec4.h"
32 #include "brw_vs.h"
33 extern "C" {
34 #include "program/prog_parameter.h"
35 #include "program/prog_print.h"
36 }
37 using namespace brw;
38
39 void
40 vec4_visitor::emit_vp_sop(enum brw_conditional_mod conditional_mod,
41 dst_reg dst, src_reg src0, src_reg src1,
42 src_reg one)
43 {
44 vec4_instruction *inst;
45
46 inst = emit(BRW_OPCODE_CMP, dst_null_d(), src0, src1);
47 inst->conditional_mod = conditional_mod;
48
49 inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f));
50 inst->predicate = BRW_PREDICATE_NORMAL;
51 }
52
53 void
54 vec4_vs_visitor::emit_program_code()
55 {
56 this->need_all_constants_in_pull_buffer = false;
57
58 setup_vp_regs();
59
60 /* Keep a reg with 1.0 around, for reuse by emit_vs_sop so that it can just
61 * be:
62 *
63 * sel.f0 dst 1.0 0.0
64 *
65 * instead of
66 *
67 * mov dst 0.0
68 * mov.f0 dst 1.0
69 */
70 src_reg one = src_reg(this, glsl_type::float_type);
71 emit(MOV(dst_reg(one), src_reg(1.0f)));
72
73 for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
74 const struct prog_instruction *vpi = &prog->Instructions[insn];
75 base_ir = vpi;
76
77 dst_reg dst;
78 src_reg src[3];
79
80 /* We always emit into a temporary destination register to avoid
81 * aliasing issues.
82 */
83 dst = dst_reg(this, glsl_type::vec4_type);
84
85 for (int i = 0; i < 3; i++)
86 src[i] = get_vp_src_reg(vpi->SrcReg[i]);
87
88 switch (vpi->Opcode) {
89 case OPCODE_ABS:
90 src[0].abs = true;
91 src[0].negate = false;
92 emit(MOV(dst, src[0]));
93 break;
94
95 case OPCODE_ADD:
96 emit(ADD(dst, src[0], src[1]));
97 break;
98
99 case OPCODE_ARL:
100 if (brw->gen >= 6) {
101 dst.writemask = WRITEMASK_X;
102 dst_reg dst_f = dst;
103 dst_f.type = BRW_REGISTER_TYPE_F;
104
105 emit(RNDD(dst_f, src[0]));
106 emit(MOV(dst, src_reg(dst_f)));
107 } else {
108 emit(RNDD(dst, src[0]));
109 }
110 break;
111
112 case OPCODE_DP3:
113 emit(DP3(dst, src[0], src[1]));
114 break;
115 case OPCODE_DP4:
116 emit(DP4(dst, src[0], src[1]));
117 break;
118 case OPCODE_DPH:
119 emit(DPH(dst, src[0], src[1]));
120 break;
121
122 case OPCODE_DST: {
123 dst_reg t = dst;
124 if (vpi->DstReg.WriteMask & WRITEMASK_X) {
125 t.writemask = WRITEMASK_X;
126 emit(MOV(t, src_reg(1.0f)));
127 }
128 if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
129 t.writemask = WRITEMASK_Y;
130 emit(MUL(t, src[0], src[1]));
131 }
132 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
133 t.writemask = WRITEMASK_Z;
134 emit(MOV(t, src[0]));
135 }
136 if (vpi->DstReg.WriteMask & WRITEMASK_W) {
137 t.writemask = WRITEMASK_W;
138 emit(MOV(t, src[1]));
139 }
140 break;
141 }
142
143 case OPCODE_EXP: {
144 dst_reg result = dst;
145 if (vpi->DstReg.WriteMask & WRITEMASK_X) {
146 /* tmp_d = floor(src[0].x) */
147 src_reg tmp_d = src_reg(this, glsl_type::ivec4_type);
148 assert(tmp_d.type == BRW_REGISTER_TYPE_D);
149 emit(RNDD(dst_reg(tmp_d), swizzle(src[0], BRW_SWIZZLE_XXXX)));
150
151 /* result[0] = 2.0 ^ tmp */
152 /* Adjust exponent for floating point: exp += 127 */
153 dst_reg tmp_d_x(GRF, tmp_d.reg, glsl_type::int_type, WRITEMASK_X);
154 emit(ADD(tmp_d_x, tmp_d, src_reg(127)));
155
156 /* Install exponent and sign. Excess drops off the edge: */
157 dst_reg res_d_x(GRF, result.reg, glsl_type::int_type, WRITEMASK_X);
158 emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23));
159 }
160 if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
161 result.writemask = WRITEMASK_Y;
162 emit(FRC(result, src[0]));
163 }
164 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
165 result.writemask = WRITEMASK_Z;
166 emit_math(SHADER_OPCODE_EXP2, result, src[0]);
167 }
168 if (vpi->DstReg.WriteMask & WRITEMASK_W) {
169 result.writemask = WRITEMASK_W;
170 emit(MOV(result, src_reg(1.0f)));
171 }
172 break;
173 }
174
175 case OPCODE_EX2:
176 emit_math(SHADER_OPCODE_EXP2, dst, src[0]);
177 break;
178
179 case OPCODE_FLR:
180 emit(RNDD(dst, src[0]));
181 break;
182
183 case OPCODE_FRC:
184 emit(FRC(dst, src[0]));
185 break;
186
187 case OPCODE_LG2:
188 emit_math(SHADER_OPCODE_LOG2, dst, src[0]);
189 break;
190
191 case OPCODE_LIT: {
192 dst_reg result = dst;
193 /* From the ARB_vertex_program spec:
194 *
195 * tmp = VectorLoad(op0);
196 * if (tmp.x < 0) tmp.x = 0;
197 * if (tmp.y < 0) tmp.y = 0;
198 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
199 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
200 * result.x = 1.0;
201 * result.y = tmp.x;
202 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
203 * result.w = 1.0;
204 *
205 * Note that we don't do the clamping to +/- 128. We didn't in
206 * brw_vs_emit.c either.
207 */
208 if (vpi->DstReg.WriteMask & WRITEMASK_XW) {
209 result.writemask = WRITEMASK_XW;
210 emit(MOV(result, src_reg(1.0f)));
211 }
212 if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
213 result.writemask = WRITEMASK_YZ;
214 emit(MOV(result, src_reg(0.0f)));
215
216 src_reg tmp_x = swizzle(src[0], BRW_SWIZZLE_XXXX);
217
218 emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G));
219 emit(IF(BRW_PREDICATE_NORMAL));
220
221 if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
222 result.writemask = WRITEMASK_Y;
223 emit(MOV(result, tmp_x));
224 }
225
226 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
227 /* if (tmp.y < 0) tmp.y = 0; */
228 src_reg tmp_y = swizzle(src[0], BRW_SWIZZLE_YYYY);
229 result.writemask = WRITEMASK_Z;
230 emit_minmax(BRW_CONDITIONAL_G, result, tmp_y, src_reg(0.0f));
231
232 src_reg clamped_y(result);
233 clamped_y.swizzle = BRW_SWIZZLE_ZZZZ;
234
235 src_reg tmp_w = swizzle(src[0], BRW_SWIZZLE_WWWW);
236
237 emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
238 }
239 emit(BRW_OPCODE_ENDIF);
240 }
241 break;
242 }
243
244 case OPCODE_LOG: {
245 dst_reg result = dst;
246 result.type = BRW_REGISTER_TYPE_UD;
247 src_reg result_src = src_reg(result);
248
249 src_reg arg0_ud = swizzle(src[0], BRW_SWIZZLE_XXXX);
250 arg0_ud.type = BRW_REGISTER_TYPE_UD;
251
252 /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
253 * according to spec:
254 *
255 * These almost look likey they could be joined up, but not really
256 * practical:
257 *
258 * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
259 * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
260 */
261 if (vpi->DstReg.WriteMask & WRITEMASK_XZ) {
262 result.writemask = WRITEMASK_X;
263 emit(AND(result, arg0_ud, src_reg((1u << 31) - 1)));
264 emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u));
265 src_reg result_d(result_src);
266 result_d.type = BRW_REGISTER_TYPE_D; /* does it matter? */
267 result.type = BRW_REGISTER_TYPE_F;
268 emit(ADD(result, result_d, src_reg(-127)));
269 }
270
271 if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
272 result.writemask = WRITEMASK_Y;
273 result.type = BRW_REGISTER_TYPE_UD;
274 emit(AND(result, arg0_ud, src_reg((1u << 23) - 1)));
275 emit(OR(result, result_src, src_reg(127u << 23)));
276 }
277
278 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
279 /* result[2] = result[0] + LOG2(result[1]); */
280
281 /* Why bother? The above is just a hint how to do this with a
282 * taylor series. Maybe we *should* use a taylor series as by
283 * the time all the above has been done it's almost certainly
284 * quicker than calling the mathbox, even with low precision.
285 *
286 * Options are:
287 * - result[0] + mathbox.LOG2(result[1])
288 * - mathbox.LOG2(arg0.x)
289 * - result[0] + inline_taylor_approx(result[1])
290 */
291 result.type = BRW_REGISTER_TYPE_F;
292 result.writemask = WRITEMASK_Z;
293 src_reg result_x(result), result_y(result), result_z(result);
294 result_x.swizzle = BRW_SWIZZLE_XXXX;
295 result_y.swizzle = BRW_SWIZZLE_YYYY;
296 result_z.swizzle = BRW_SWIZZLE_ZZZZ;
297 emit_math(SHADER_OPCODE_LOG2, result, result_y);
298 emit(ADD(result, result_z, result_x));
299 }
300
301 if (vpi->DstReg.WriteMask & WRITEMASK_W) {
302 result.type = BRW_REGISTER_TYPE_F;
303 result.writemask = WRITEMASK_W;
304 emit(MOV(result, src_reg(1.0f)));
305 }
306 break;
307 }
308
309 case OPCODE_MAD: {
310 src_reg temp = src_reg(this, glsl_type::vec4_type);
311 emit(MUL(dst_reg(temp), src[0], src[1]));
312 emit(ADD(dst, temp, src[2]));
313 break;
314 }
315
316 case OPCODE_MAX:
317 emit_minmax(BRW_CONDITIONAL_G, dst, src[0], src[1]);
318 break;
319
320 case OPCODE_MIN:
321 emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]);
322 break;
323
324 case OPCODE_MOV:
325 emit(MOV(dst, src[0]));
326 break;
327
328 case OPCODE_MUL:
329 emit(MUL(dst, src[0], src[1]));
330 break;
331
332 case OPCODE_POW:
333 emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]);
334 break;
335
336 case OPCODE_RCP:
337 emit_math(SHADER_OPCODE_RCP, dst, src[0]);
338 break;
339
340 case OPCODE_RSQ:
341 emit_math(SHADER_OPCODE_RSQ, dst, src[0]);
342 break;
343
344 case OPCODE_SGE:
345 emit_vp_sop(BRW_CONDITIONAL_GE, dst, src[0], src[1], one);
346 break;
347
348 case OPCODE_SLT:
349 emit_vp_sop(BRW_CONDITIONAL_L, dst, src[0], src[1], one);
350 break;
351
352 case OPCODE_SUB: {
353 src_reg neg_src1 = src[1];
354 neg_src1.negate = !src[1].negate;
355 emit(ADD(dst, src[0], neg_src1));
356 break;
357 }
358
359 case OPCODE_SWZ:
360 /* Note that SWZ's extended swizzles are handled in the general
361 * get_src_reg() code.
362 */
363 emit(MOV(dst, src[0]));
364 break;
365
366 case OPCODE_XPD: {
367 src_reg t1 = src_reg(this, glsl_type::vec4_type);
368 src_reg t2 = src_reg(this, glsl_type::vec4_type);
369
370 emit(MUL(dst_reg(t1),
371 swizzle(src[0], BRW_SWIZZLE_YZXW),
372 swizzle(src[1], BRW_SWIZZLE_ZXYW)));
373 emit(MUL(dst_reg(t2),
374 swizzle(src[0], BRW_SWIZZLE_ZXYW),
375 swizzle(src[1], BRW_SWIZZLE_YZXW)));
376 t2.negate = true;
377 emit(ADD(dst, t1, t2));
378 break;
379 }
380
381 case OPCODE_END:
382 break;
383
384 default:
385 _mesa_problem(ctx, "Unsupported opcode %s in vertex program\n",
386 _mesa_opcode_string(vpi->Opcode));
387 }
388
389 /* Copy the temporary back into the actual destination register. */
390 if (vpi->Opcode != OPCODE_END) {
391 emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst)));
392 }
393 }
394
395 /* If we used relative addressing, we need to upload all constants as
396 * pull constants. Do that now.
397 */
398 if (this->need_all_constants_in_pull_buffer) {
399 const struct gl_program_parameter_list *params =
400 vs_compile->vp->program.Base.Parameters;
401 unsigned i;
402 for (i = 0; i < params->NumParameters * 4; i++) {
403 stage_prog_data->pull_param[i] =
404 &params->ParameterValues[i / 4][i % 4].f;
405 }
406 stage_prog_data->nr_pull_params = i;
407 }
408 }
409
410 void
411 vec4_vs_visitor::setup_vp_regs()
412 {
413 /* PROGRAM_TEMPORARY */
414 int num_temp = prog->NumTemporaries;
415 vp_temp_regs = rzalloc_array(mem_ctx, src_reg, num_temp);
416 for (int i = 0; i < num_temp; i++)
417 vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type);
418
419 /* PROGRAM_STATE_VAR etc. */
420 struct gl_program_parameter_list *plist =
421 vs_compile->vp->program.Base.Parameters;
422 for (unsigned p = 0; p < plist->NumParameters; p++) {
423 unsigned components = plist->Parameters[p].Size;
424
425 /* Parameters should be either vec4 uniforms or single component
426 * constants; matrices and other larger types should have been broken
427 * down earlier.
428 */
429 assert(components <= 4);
430
431 this->uniform_size[this->uniforms] = 1; /* 1 vec4 */
432 this->uniform_vector_size[this->uniforms] = components;
433 for (unsigned i = 0; i < 4; i++) {
434 stage_prog_data->param[this->uniforms * 4 + i] = i >= components
435 ? 0 : &plist->ParameterValues[p][i].f;
436 }
437 this->uniforms++; /* counted in vec4 units */
438 }
439
440 /* PROGRAM_OUTPUT */
441 for (int slot = 0; slot < prog_data->vue_map.num_slots; slot++) {
442 int varying = prog_data->vue_map.slot_to_varying[slot];
443 if (varying == VARYING_SLOT_PSIZ)
444 output_reg[varying] = dst_reg(this, glsl_type::float_type);
445 else
446 output_reg[varying] = dst_reg(this, glsl_type::vec4_type);
447 assert(output_reg[varying].type == BRW_REGISTER_TYPE_F);
448 }
449
450 /* PROGRAM_ADDRESS */
451 this->vp_addr_reg = src_reg(this, glsl_type::int_type);
452 assert(this->vp_addr_reg.type == BRW_REGISTER_TYPE_D);
453 }
454
455 dst_reg
456 vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst)
457 {
458 dst_reg result;
459
460 assert(!dst.RelAddr);
461
462 switch (dst.File) {
463 case PROGRAM_TEMPORARY:
464 result = dst_reg(vp_temp_regs[dst.Index]);
465 break;
466
467 case PROGRAM_OUTPUT:
468 result = output_reg[dst.Index];
469 break;
470
471 case PROGRAM_ADDRESS: {
472 assert(dst.Index == 0);
473 result = dst_reg(this->vp_addr_reg);
474 break;
475 }
476
477 case PROGRAM_UNDEFINED:
478 return dst_null_f();
479
480 default:
481 unreachable("vec4_vp: bad destination register file");
482 }
483
484 result.writemask = dst.WriteMask;
485 return result;
486 }
487
488 src_reg
489 vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
490 {
491 struct gl_program_parameter_list *plist =
492 vs_compile->vp->program.Base.Parameters;
493
494 src_reg result;
495
496 assert(!src.Abs);
497
498 switch (src.File) {
499 case PROGRAM_UNDEFINED:
500 return src_reg(brw_null_reg());
501
502 case PROGRAM_TEMPORARY:
503 result = vp_temp_regs[src.Index];
504 break;
505
506 case PROGRAM_INPUT:
507 result = src_reg(ATTR, src.Index, glsl_type::vec4_type);
508 result.type = BRW_REGISTER_TYPE_F;
509 break;
510
511 case PROGRAM_ADDRESS: {
512 assert(src.Index == 0);
513 result = this->vp_addr_reg;
514 break;
515 }
516
517 case PROGRAM_STATE_VAR:
518 case PROGRAM_CONSTANT:
519 /* From the ARB_vertex_program specification:
520 * "Relative addressing can only be used for accessing program
521 * parameter arrays."
522 */
523 if (src.RelAddr) {
524 /* Since we have no idea what the base of the array is, we need to
525 * upload ALL constants as push constants.
526 */
527 this->need_all_constants_in_pull_buffer = true;
528
529 /* Add the small constant index to the address register */
530 src_reg reladdr = src_reg(this, glsl_type::int_type);
531 dst_reg dst_reladdr = dst_reg(reladdr);
532 dst_reladdr.writemask = WRITEMASK_X;
533 emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
534
535 if (brw->gen < 6)
536 emit(MUL(dst_reladdr, reladdr, src_reg(16)));
537
538 #if 0
539 assert(src.Index < this->uniforms);
540 result = src_reg(dst_reg(UNIFORM, 0));
541 result.type = BRW_REGISTER_TYPE_F;
542 result.reladdr = new(mem_ctx) src_reg();
543 memcpy(result.reladdr, &reladdr, sizeof(src_reg));
544 #endif
545
546 result = src_reg(this, glsl_type::vec4_type);
547 src_reg surf_index = src_reg(unsigned(prog_data->base.binding_table.pull_constants_start));
548 vec4_instruction *load;
549 if (brw->gen >= 7) {
550 load = new(mem_ctx)
551 vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
552 dst_reg(result), surf_index, reladdr);
553 } else {
554 load = new(mem_ctx)
555 vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
556 dst_reg(result), surf_index, reladdr);
557 load->base_mrf = 14;
558 load->mlen = 1;
559 }
560 emit(load);
561 break;
562 }
563
564 /* We actually want to look at the type in the Parameters list for this,
565 * because this lets us upload constant builtin uniforms as actual
566 * constants.
567 */
568 switch (plist->Parameters[src.Index].Type) {
569 case PROGRAM_CONSTANT:
570 result = src_reg(this, glsl_type::vec4_type);
571 for (int i = 0; i < 4; i++) {
572 dst_reg t = dst_reg(result);
573 t.writemask = 1 << i;
574 emit(MOV(t, src_reg(plist->ParameterValues[src.Index][i].f)));
575 }
576 break;
577
578 case PROGRAM_STATE_VAR:
579 assert(src.Index < this->uniforms);
580 result = src_reg(dst_reg(UNIFORM, src.Index));
581 result.type = BRW_REGISTER_TYPE_F;
582 break;
583
584 default:
585 _mesa_problem(ctx, "bad uniform src register file: %s\n",
586 _mesa_register_file_name((gl_register_file)src.File));
587 return src_reg(this, glsl_type::vec4_type);
588 }
589 break;
590
591 default:
592 _mesa_problem(ctx, "bad src register file: %s\n",
593 _mesa_register_file_name((gl_register_file)src.File));
594 return src_reg(this, glsl_type::vec4_type);
595 }
596
597 if (src.Swizzle != SWIZZLE_NOOP || src.Negate) {
598 unsigned short zeros_mask = 0;
599 unsigned short ones_mask = 0;
600 unsigned short src_mask = 0;
601 unsigned short src_swiz[4];
602
603 for (int i = 0; i < 4; i++) {
604 src_swiz[i] = 0; /* initialize for safety */
605
606 /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
607 * but it's simplest to handle it here.
608 */
609 int s = GET_SWZ(src.Swizzle, i);
610 switch (s) {
611 case SWIZZLE_X:
612 case SWIZZLE_Y:
613 case SWIZZLE_Z:
614 case SWIZZLE_W:
615 src_mask |= 1 << i;
616 src_swiz[i] = s;
617 break;
618 case SWIZZLE_ZERO:
619 zeros_mask |= 1 << i;
620 break;
621 case SWIZZLE_ONE:
622 ones_mask |= 1 << i;
623 break;
624 }
625 }
626
627 result.swizzle =
628 BRW_SWIZZLE4(src_swiz[0], src_swiz[1], src_swiz[2], src_swiz[3]);
629
630 /* The hardware doesn't natively handle the SWZ instruction's zero/one
631 * swizzles or per-component negation, so we need to use a temporary.
632 */
633 if (zeros_mask || ones_mask || src.Negate) {
634 src_reg temp_src(this, glsl_type::vec4_type);
635 dst_reg temp(temp_src);
636
637 if (src_mask) {
638 temp.writemask = src_mask;
639 emit(MOV(temp, result));
640 }
641
642 if (zeros_mask) {
643 temp.writemask = zeros_mask;
644 emit(MOV(temp, src_reg(0.0f)));
645 }
646
647 if (ones_mask) {
648 temp.writemask = ones_mask;
649 emit(MOV(temp, src_reg(1.0f)));
650 }
651
652 if (src.Negate) {
653 temp.writemask = src.Negate;
654 src_reg neg(temp_src);
655 neg.negate = true;
656 emit(MOV(temp, neg));
657 }
658 result = temp_src;
659 }
660 }
661
662 return result;
663 }