i965/vec4: Turn some _mesa_problem calls into asserts
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_vp.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_vec4_vp.cpp
25 *
26 * A translator from Mesa IR to the i965 driver's Vec4 IR, used to implement
27 * ARB_vertex_program and fixed-function vertex processing.
28 */
29
30 #include "brw_context.h"
31 #include "brw_vec4.h"
32 #include "brw_vs.h"
33 extern "C" {
34 #include "program/prog_parameter.h"
35 #include "program/prog_print.h"
36 }
37 using namespace brw;
38
39 void
40 vec4_visitor::emit_vp_sop(enum brw_conditional_mod conditional_mod,
41 dst_reg dst, src_reg src0, src_reg src1,
42 src_reg one)
43 {
44 vec4_instruction *inst;
45
46 inst = emit(CMP(dst_null_f(), src0, src1, conditional_mod));
47
48 inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f));
49 inst->predicate = BRW_PREDICATE_NORMAL;
50 }
51
52 void
53 vec4_vs_visitor::emit_program_code()
54 {
55 this->need_all_constants_in_pull_buffer = false;
56
57 setup_vp_regs();
58
59 /* Keep a reg with 1.0 around, for reuse by emit_vs_sop so that it can just
60 * be:
61 *
62 * sel.f0 dst 1.0 0.0
63 *
64 * instead of
65 *
66 * mov dst 0.0
67 * mov.f0 dst 1.0
68 */
69 src_reg one = src_reg(this, glsl_type::float_type);
70 emit(MOV(dst_reg(one), src_reg(1.0f)));
71
72 for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
73 const struct prog_instruction *vpi = &prog->Instructions[insn];
74 base_ir = vpi;
75
76 dst_reg dst;
77 src_reg src[3];
78
79 /* We always emit into a temporary destination register to avoid
80 * aliasing issues.
81 */
82 dst = dst_reg(this, glsl_type::vec4_type);
83
84 for (int i = 0; i < 3; i++)
85 src[i] = get_vp_src_reg(vpi->SrcReg[i]);
86
87 switch (vpi->Opcode) {
88 case OPCODE_ABS:
89 src[0].abs = true;
90 src[0].negate = false;
91 emit(MOV(dst, src[0]));
92 break;
93
94 case OPCODE_ADD:
95 emit(ADD(dst, src[0], src[1]));
96 break;
97
98 case OPCODE_ARL:
99 if (devinfo->gen >= 6) {
100 dst.writemask = WRITEMASK_X;
101 dst_reg dst_f = dst;
102 dst_f.type = BRW_REGISTER_TYPE_F;
103
104 emit(RNDD(dst_f, src[0]));
105 emit(MOV(dst, src_reg(dst_f)));
106 } else {
107 emit(RNDD(dst, src[0]));
108 }
109 break;
110
111 case OPCODE_DP3:
112 emit(DP3(dst, src[0], src[1]));
113 break;
114 case OPCODE_DP4:
115 emit(DP4(dst, src[0], src[1]));
116 break;
117 case OPCODE_DPH:
118 emit(DPH(dst, src[0], src[1]));
119 break;
120
121 case OPCODE_DST: {
122 dst_reg t = dst;
123 if (vpi->DstReg.WriteMask & WRITEMASK_X) {
124 t.writemask = WRITEMASK_X;
125 emit(MOV(t, src_reg(1.0f)));
126 }
127 if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
128 t.writemask = WRITEMASK_Y;
129 emit(MUL(t, src[0], src[1]));
130 }
131 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
132 t.writemask = WRITEMASK_Z;
133 emit(MOV(t, src[0]));
134 }
135 if (vpi->DstReg.WriteMask & WRITEMASK_W) {
136 t.writemask = WRITEMASK_W;
137 emit(MOV(t, src[1]));
138 }
139 break;
140 }
141
142 case OPCODE_EXP: {
143 dst_reg result = dst;
144 if (vpi->DstReg.WriteMask & WRITEMASK_X) {
145 /* tmp_d = floor(src[0].x) */
146 src_reg tmp_d = src_reg(this, glsl_type::ivec4_type);
147 assert(tmp_d.type == BRW_REGISTER_TYPE_D);
148 emit(RNDD(dst_reg(tmp_d), swizzle(src[0], BRW_SWIZZLE_XXXX)));
149
150 /* result[0] = 2.0 ^ tmp */
151 /* Adjust exponent for floating point: exp += 127 */
152 dst_reg tmp_d_x(GRF, tmp_d.reg, glsl_type::int_type, WRITEMASK_X);
153 emit(ADD(tmp_d_x, tmp_d, src_reg(127)));
154
155 /* Install exponent and sign. Excess drops off the edge: */
156 dst_reg res_d_x(GRF, result.reg, glsl_type::int_type, WRITEMASK_X);
157 emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23));
158 }
159 if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
160 result.writemask = WRITEMASK_Y;
161 emit(FRC(result, src[0]));
162 }
163 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
164 result.writemask = WRITEMASK_Z;
165 emit_math(SHADER_OPCODE_EXP2, result, src[0]);
166 }
167 if (vpi->DstReg.WriteMask & WRITEMASK_W) {
168 result.writemask = WRITEMASK_W;
169 emit(MOV(result, src_reg(1.0f)));
170 }
171 break;
172 }
173
174 case OPCODE_EX2:
175 emit_math(SHADER_OPCODE_EXP2, dst, src[0]);
176 break;
177
178 case OPCODE_FLR:
179 emit(RNDD(dst, src[0]));
180 break;
181
182 case OPCODE_FRC:
183 emit(FRC(dst, src[0]));
184 break;
185
186 case OPCODE_LG2:
187 emit_math(SHADER_OPCODE_LOG2, dst, src[0]);
188 break;
189
190 case OPCODE_LIT: {
191 dst_reg result = dst;
192 /* From the ARB_vertex_program spec:
193 *
194 * tmp = VectorLoad(op0);
195 * if (tmp.x < 0) tmp.x = 0;
196 * if (tmp.y < 0) tmp.y = 0;
197 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
198 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
199 * result.x = 1.0;
200 * result.y = tmp.x;
201 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
202 * result.w = 1.0;
203 *
204 * Note that we don't do the clamping to +/- 128. We didn't in
205 * brw_vs_emit.c either.
206 */
207 if (vpi->DstReg.WriteMask & WRITEMASK_XW) {
208 result.writemask = WRITEMASK_XW;
209 emit(MOV(result, src_reg(1.0f)));
210 }
211 if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
212 result.writemask = WRITEMASK_YZ;
213 emit(MOV(result, src_reg(0.0f)));
214
215 src_reg tmp_x = swizzle(src[0], BRW_SWIZZLE_XXXX);
216
217 emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G));
218 emit(IF(BRW_PREDICATE_NORMAL));
219
220 if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
221 result.writemask = WRITEMASK_Y;
222 emit(MOV(result, tmp_x));
223 }
224
225 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
226 /* if (tmp.y < 0) tmp.y = 0; */
227 src_reg tmp_y = swizzle(src[0], BRW_SWIZZLE_YYYY);
228 result.writemask = WRITEMASK_Z;
229 emit_minmax(BRW_CONDITIONAL_GE, result, tmp_y, src_reg(0.0f));
230
231 src_reg clamped_y(result);
232 clamped_y.swizzle = BRW_SWIZZLE_ZZZZ;
233
234 src_reg tmp_w = swizzle(src[0], BRW_SWIZZLE_WWWW);
235
236 emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
237 }
238 emit(BRW_OPCODE_ENDIF);
239 }
240 break;
241 }
242
243 case OPCODE_LOG: {
244 dst_reg result = dst;
245 result.type = BRW_REGISTER_TYPE_UD;
246 src_reg result_src = src_reg(result);
247
248 src_reg arg0_ud = swizzle(src[0], BRW_SWIZZLE_XXXX);
249 arg0_ud.type = BRW_REGISTER_TYPE_UD;
250
251 /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
252 * according to spec:
253 *
254 * These almost look likey they could be joined up, but not really
255 * practical:
256 *
257 * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
258 * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
259 */
260 if (vpi->DstReg.WriteMask & WRITEMASK_XZ) {
261 result.writemask = WRITEMASK_X;
262 emit(AND(result, arg0_ud, src_reg((1u << 31) - 1)));
263 emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u));
264 src_reg result_d(result_src);
265 result_d.type = BRW_REGISTER_TYPE_D; /* does it matter? */
266 result.type = BRW_REGISTER_TYPE_F;
267 emit(ADD(result, result_d, src_reg(-127)));
268 }
269
270 if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
271 result.writemask = WRITEMASK_Y;
272 result.type = BRW_REGISTER_TYPE_UD;
273 emit(AND(result, arg0_ud, src_reg((1u << 23) - 1)));
274 emit(OR(result, result_src, src_reg(127u << 23)));
275 }
276
277 if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
278 /* result[2] = result[0] + LOG2(result[1]); */
279
280 /* Why bother? The above is just a hint how to do this with a
281 * taylor series. Maybe we *should* use a taylor series as by
282 * the time all the above has been done it's almost certainly
283 * quicker than calling the mathbox, even with low precision.
284 *
285 * Options are:
286 * - result[0] + mathbox.LOG2(result[1])
287 * - mathbox.LOG2(arg0.x)
288 * - result[0] + inline_taylor_approx(result[1])
289 */
290 result.type = BRW_REGISTER_TYPE_F;
291 result.writemask = WRITEMASK_Z;
292 src_reg result_x(result), result_y(result), result_z(result);
293 result_x.swizzle = BRW_SWIZZLE_XXXX;
294 result_y.swizzle = BRW_SWIZZLE_YYYY;
295 result_z.swizzle = BRW_SWIZZLE_ZZZZ;
296 emit_math(SHADER_OPCODE_LOG2, result, result_y);
297 emit(ADD(result, result_z, result_x));
298 }
299
300 if (vpi->DstReg.WriteMask & WRITEMASK_W) {
301 result.type = BRW_REGISTER_TYPE_F;
302 result.writemask = WRITEMASK_W;
303 emit(MOV(result, src_reg(1.0f)));
304 }
305 break;
306 }
307
308 case OPCODE_MAD: {
309 src_reg temp = src_reg(this, glsl_type::vec4_type);
310 emit(MUL(dst_reg(temp), src[0], src[1]));
311 emit(ADD(dst, temp, src[2]));
312 break;
313 }
314
315 case OPCODE_MAX:
316 emit_minmax(BRW_CONDITIONAL_GE, dst, src[0], src[1]);
317 break;
318
319 case OPCODE_MIN:
320 emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]);
321 break;
322
323 case OPCODE_MOV:
324 emit(MOV(dst, src[0]));
325 break;
326
327 case OPCODE_MUL:
328 emit(MUL(dst, src[0], src[1]));
329 break;
330
331 case OPCODE_POW:
332 emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]);
333 break;
334
335 case OPCODE_RCP:
336 emit_math(SHADER_OPCODE_RCP, dst, src[0]);
337 break;
338
339 case OPCODE_RSQ:
340 emit_math(SHADER_OPCODE_RSQ, dst, src[0]);
341 break;
342
343 case OPCODE_SGE:
344 emit_vp_sop(BRW_CONDITIONAL_GE, dst, src[0], src[1], one);
345 break;
346
347 case OPCODE_SLT:
348 emit_vp_sop(BRW_CONDITIONAL_L, dst, src[0], src[1], one);
349 break;
350
351 case OPCODE_SUB: {
352 src_reg neg_src1 = src[1];
353 neg_src1.negate = !src[1].negate;
354 emit(ADD(dst, src[0], neg_src1));
355 break;
356 }
357
358 case OPCODE_SWZ:
359 /* Note that SWZ's extended swizzles are handled in the general
360 * get_src_reg() code.
361 */
362 emit(MOV(dst, src[0]));
363 break;
364
365 case OPCODE_XPD: {
366 src_reg t1 = src_reg(this, glsl_type::vec4_type);
367 src_reg t2 = src_reg(this, glsl_type::vec4_type);
368
369 emit(MUL(dst_reg(t1),
370 swizzle(src[0], BRW_SWIZZLE_YZXW),
371 swizzle(src[1], BRW_SWIZZLE_ZXYW)));
372 emit(MUL(dst_reg(t2),
373 swizzle(src[0], BRW_SWIZZLE_ZXYW),
374 swizzle(src[1], BRW_SWIZZLE_YZXW)));
375 t2.negate = true;
376 emit(ADD(dst, t1, t2));
377 break;
378 }
379
380 case OPCODE_END:
381 break;
382
383 default:
384 assert(!"Unsupported opcode in vertex program");
385 }
386
387 /* Copy the temporary back into the actual destination register. */
388 if (_mesa_num_inst_dst_regs(vpi->Opcode) != 0) {
389 emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst)));
390 }
391 }
392
393 /* If we used relative addressing, we need to upload all constants as
394 * pull constants. Do that now.
395 */
396 if (this->need_all_constants_in_pull_buffer) {
397 const struct gl_program_parameter_list *params =
398 vs_compile->vp->program.Base.Parameters;
399 unsigned i;
400 for (i = 0; i < params->NumParameters * 4; i++) {
401 stage_prog_data->pull_param[i] =
402 &params->ParameterValues[i / 4][i % 4];
403 }
404 stage_prog_data->nr_pull_params = i;
405 }
406 }
407
408 void
409 vec4_vs_visitor::setup_vp_regs()
410 {
411 /* PROGRAM_TEMPORARY */
412 int num_temp = prog->NumTemporaries;
413 vp_temp_regs = rzalloc_array(mem_ctx, src_reg, num_temp);
414 for (int i = 0; i < num_temp; i++)
415 vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type);
416
417 /* PROGRAM_STATE_VAR etc. */
418 struct gl_program_parameter_list *plist =
419 vs_compile->vp->program.Base.Parameters;
420 for (unsigned p = 0; p < plist->NumParameters; p++) {
421 unsigned components = plist->Parameters[p].Size;
422
423 /* Parameters should be either vec4 uniforms or single component
424 * constants; matrices and other larger types should have been broken
425 * down earlier.
426 */
427 assert(components <= 4);
428
429 this->uniform_size[this->uniforms] = 1; /* 1 vec4 */
430 this->uniform_vector_size[this->uniforms] = components;
431 for (unsigned i = 0; i < 4; i++) {
432 stage_prog_data->param[this->uniforms * 4 + i] = i >= components
433 ? 0 : &plist->ParameterValues[p][i];
434 }
435 this->uniforms++; /* counted in vec4 units */
436 }
437
438 /* PROGRAM_OUTPUT */
439 for (int slot = 0; slot < prog_data->vue_map.num_slots; slot++) {
440 int varying = prog_data->vue_map.slot_to_varying[slot];
441 if (varying == VARYING_SLOT_PSIZ)
442 output_reg[varying] = dst_reg(this, glsl_type::float_type);
443 else
444 output_reg[varying] = dst_reg(this, glsl_type::vec4_type);
445 assert(output_reg[varying].type == BRW_REGISTER_TYPE_F);
446 }
447
448 /* PROGRAM_ADDRESS */
449 this->vp_addr_reg = src_reg(this, glsl_type::int_type);
450 assert(this->vp_addr_reg.type == BRW_REGISTER_TYPE_D);
451 }
452
453 dst_reg
454 vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst)
455 {
456 dst_reg result;
457
458 assert(!dst.RelAddr);
459
460 switch (dst.File) {
461 case PROGRAM_TEMPORARY:
462 result = dst_reg(vp_temp_regs[dst.Index]);
463 break;
464
465 case PROGRAM_OUTPUT:
466 result = output_reg[dst.Index];
467 break;
468
469 case PROGRAM_ADDRESS: {
470 assert(dst.Index == 0);
471 result = dst_reg(this->vp_addr_reg);
472 break;
473 }
474
475 case PROGRAM_UNDEFINED:
476 return dst_null_f();
477
478 default:
479 unreachable("vec4_vp: bad destination register file");
480 }
481
482 result.writemask = dst.WriteMask;
483 return result;
484 }
485
486 src_reg
487 vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
488 {
489 struct gl_program_parameter_list *plist =
490 vs_compile->vp->program.Base.Parameters;
491
492 src_reg result;
493
494 assert(!src.Abs);
495
496 switch (src.File) {
497 case PROGRAM_UNDEFINED:
498 return src_reg(brw_null_reg());
499
500 case PROGRAM_TEMPORARY:
501 result = vp_temp_regs[src.Index];
502 break;
503
504 case PROGRAM_INPUT:
505 result = src_reg(ATTR, src.Index, glsl_type::vec4_type);
506 result.type = BRW_REGISTER_TYPE_F;
507 break;
508
509 case PROGRAM_ADDRESS: {
510 assert(src.Index == 0);
511 result = this->vp_addr_reg;
512 break;
513 }
514
515 case PROGRAM_STATE_VAR:
516 case PROGRAM_CONSTANT:
517 /* From the ARB_vertex_program specification:
518 * "Relative addressing can only be used for accessing program
519 * parameter arrays."
520 */
521 if (src.RelAddr) {
522 /* Since we have no idea what the base of the array is, we need to
523 * upload ALL constants as push constants.
524 */
525 this->need_all_constants_in_pull_buffer = true;
526
527 /* Add the small constant index to the address register */
528 src_reg reladdr = src_reg(this, glsl_type::int_type);
529
530 dst_reg dst_reladdr = dst_reg(reladdr);
531 dst_reladdr.writemask = WRITEMASK_X;
532 emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
533
534 if (devinfo->gen < 6)
535 emit(MUL(dst_reladdr, reladdr, src_reg(16)));
536
537 #if 0
538 assert(src.Index < this->uniforms);
539 result = src_reg(dst_reg(UNIFORM, 0));
540 result.type = BRW_REGISTER_TYPE_F;
541 result.reladdr = new(mem_ctx) src_reg();
542 memcpy(result.reladdr, &reladdr, sizeof(src_reg));
543 #endif
544
545 result = src_reg(this, glsl_type::vec4_type);
546 src_reg surf_index = src_reg(unsigned(prog_data->base.binding_table.pull_constants_start));
547
548 emit_pull_constant_load_reg(dst_reg(result),
549 surf_index,
550 reladdr,
551 NULL, NULL /* before_block/inst */);
552 break;
553 }
554
555 /* We actually want to look at the type in the Parameters list for this,
556 * because this lets us upload constant builtin uniforms as actual
557 * constants.
558 */
559 switch (plist->Parameters[src.Index].Type) {
560 case PROGRAM_CONSTANT:
561 result = src_reg(this, glsl_type::vec4_type);
562 for (int i = 0; i < 4; i++) {
563 dst_reg t = dst_reg(result);
564 t.writemask = 1 << i;
565 emit(MOV(t, src_reg(plist->ParameterValues[src.Index][i].f)));
566 }
567 break;
568
569 case PROGRAM_STATE_VAR:
570 assert(src.Index < this->uniforms);
571 result = src_reg(dst_reg(UNIFORM, src.Index));
572 result.type = BRW_REGISTER_TYPE_F;
573 break;
574
575 default:
576 assert(!"Bad uniform in src register file");
577 return src_reg(this, glsl_type::vec4_type);
578 }
579 break;
580
581 default:
582 assert(!"Bad src register file");
583 return src_reg(this, glsl_type::vec4_type);
584 }
585
586 if (src.Swizzle != SWIZZLE_NOOP || src.Negate) {
587 unsigned short zeros_mask = 0;
588 unsigned short ones_mask = 0;
589 unsigned short src_mask = 0;
590 unsigned short src_swiz[4];
591
592 for (int i = 0; i < 4; i++) {
593 src_swiz[i] = 0; /* initialize for safety */
594
595 /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
596 * but it's simplest to handle it here.
597 */
598 int s = GET_SWZ(src.Swizzle, i);
599 switch (s) {
600 case SWIZZLE_X:
601 case SWIZZLE_Y:
602 case SWIZZLE_Z:
603 case SWIZZLE_W:
604 src_mask |= 1 << i;
605 src_swiz[i] = s;
606 break;
607 case SWIZZLE_ZERO:
608 zeros_mask |= 1 << i;
609 break;
610 case SWIZZLE_ONE:
611 ones_mask |= 1 << i;
612 break;
613 }
614 }
615
616 result.swizzle =
617 BRW_SWIZZLE4(src_swiz[0], src_swiz[1], src_swiz[2], src_swiz[3]);
618
619 /* The hardware doesn't natively handle the SWZ instruction's zero/one
620 * swizzles or per-component negation, so we need to use a temporary.
621 */
622 if (zeros_mask || ones_mask || src.Negate) {
623 src_reg temp_src(this, glsl_type::vec4_type);
624 dst_reg temp(temp_src);
625
626 if (src_mask) {
627 temp.writemask = src_mask;
628 emit(MOV(temp, result));
629 }
630
631 if (zeros_mask) {
632 temp.writemask = zeros_mask;
633 emit(MOV(temp, src_reg(0.0f)));
634 }
635
636 if (ones_mask) {
637 temp.writemask = ones_mask;
638 emit(MOV(temp, src_reg(1.0f)));
639 }
640
641 if (src.Negate) {
642 temp.writemask = src.Negate;
643 src_reg neg(temp_src);
644 neg.negate = true;
645 emit(MOV(temp, neg));
646 }
647 result = temp_src;
648 }
649 }
650
651 return result;
652 }