i965/vs: Split final assembly code generation out of vec4_visitor.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_emit.cpp
1 /* Copyright © 2011 Intel Corporation
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a
4 * copy of this software and associated documentation files (the "Software"),
5 * to deal in the Software without restriction, including without limitation
6 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 * and/or sell copies of the Software, and to permit persons to whom the
8 * Software is furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice (including the next
11 * paragraph) shall be included in all copies or substantial portions of the
12 * Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 * IN THE SOFTWARE.
21 */
22
23 #include "brw_vec4.h"
24
25 extern "C" {
26 #include "brw_eu.h"
27 #include "main/macros.h"
28 #include "program/prog_print.h"
29 #include "program/prog_parameter.h"
30 };
31
32 namespace brw {
33
34 struct brw_reg
35 vec4_instruction::get_dst(void)
36 {
37 struct brw_reg brw_reg;
38
39 switch (dst.file) {
40 case GRF:
41 brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
42 brw_reg = retype(brw_reg, dst.type);
43 brw_reg.dw1.bits.writemask = dst.writemask;
44 break;
45
46 case MRF:
47 brw_reg = brw_message_reg(dst.reg + dst.reg_offset);
48 brw_reg = retype(brw_reg, dst.type);
49 brw_reg.dw1.bits.writemask = dst.writemask;
50 break;
51
52 case HW_REG:
53 brw_reg = dst.fixed_hw_reg;
54 break;
55
56 case BAD_FILE:
57 brw_reg = brw_null_reg();
58 break;
59
60 default:
61 assert(!"not reached");
62 brw_reg = brw_null_reg();
63 break;
64 }
65 return brw_reg;
66 }
67
68 struct brw_reg
69 vec4_instruction::get_src(int i)
70 {
71 struct brw_reg brw_reg;
72
73 switch (src[i].file) {
74 case GRF:
75 brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
76 brw_reg = retype(brw_reg, src[i].type);
77 brw_reg.dw1.bits.swizzle = src[i].swizzle;
78 if (src[i].abs)
79 brw_reg = brw_abs(brw_reg);
80 if (src[i].negate)
81 brw_reg = negate(brw_reg);
82 break;
83
84 case IMM:
85 switch (src[i].type) {
86 case BRW_REGISTER_TYPE_F:
87 brw_reg = brw_imm_f(src[i].imm.f);
88 break;
89 case BRW_REGISTER_TYPE_D:
90 brw_reg = brw_imm_d(src[i].imm.i);
91 break;
92 case BRW_REGISTER_TYPE_UD:
93 brw_reg = brw_imm_ud(src[i].imm.u);
94 break;
95 default:
96 assert(!"not reached");
97 brw_reg = brw_null_reg();
98 break;
99 }
100 break;
101
102 case UNIFORM:
103 brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2,
104 ((src[i].reg + src[i].reg_offset) % 2) * 4),
105 0, 4, 1);
106 brw_reg = retype(brw_reg, src[i].type);
107 brw_reg.dw1.bits.swizzle = src[i].swizzle;
108 if (src[i].abs)
109 brw_reg = brw_abs(brw_reg);
110 if (src[i].negate)
111 brw_reg = negate(brw_reg);
112
113 /* This should have been moved to pull constants. */
114 assert(!src[i].reladdr);
115 break;
116
117 case HW_REG:
118 brw_reg = src[i].fixed_hw_reg;
119 break;
120
121 case BAD_FILE:
122 /* Probably unused. */
123 brw_reg = brw_null_reg();
124 break;
125 case ATTR:
126 default:
127 assert(!"not reached");
128 brw_reg = brw_null_reg();
129 break;
130 }
131
132 return brw_reg;
133 }
134
135 vec4_generator::vec4_generator(struct brw_context *brw,
136 struct brw_vs_compile *c,
137 struct gl_shader_program *prog,
138 void *mem_ctx)
139 : brw(brw), c(c), prog(prog), mem_ctx(mem_ctx)
140 {
141 intel = &brw->intel;
142 vp = &c->vp->program;
143 p = &c->func;
144 }
145
146 vec4_generator::~vec4_generator()
147 {
148 }
149
150 void
151 vec4_generator::generate_math1_gen4(vec4_instruction *inst,
152 struct brw_reg dst,
153 struct brw_reg src)
154 {
155 brw_math(p,
156 dst,
157 brw_math_function(inst->opcode),
158 inst->base_mrf,
159 src,
160 BRW_MATH_DATA_VECTOR,
161 BRW_MATH_PRECISION_FULL);
162 }
163
164 static void
165 check_gen6_math_src_arg(struct brw_reg src)
166 {
167 /* Source swizzles are ignored. */
168 assert(!src.abs);
169 assert(!src.negate);
170 assert(src.dw1.bits.swizzle == BRW_SWIZZLE_XYZW);
171 }
172
173 void
174 vec4_generator::generate_math1_gen6(vec4_instruction *inst,
175 struct brw_reg dst,
176 struct brw_reg src)
177 {
178 /* Can't do writemask because math can't be align16. */
179 assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
180 check_gen6_math_src_arg(src);
181
182 brw_set_access_mode(p, BRW_ALIGN_1);
183 brw_math(p,
184 dst,
185 brw_math_function(inst->opcode),
186 inst->base_mrf,
187 src,
188 BRW_MATH_DATA_SCALAR,
189 BRW_MATH_PRECISION_FULL);
190 brw_set_access_mode(p, BRW_ALIGN_16);
191 }
192
193 void
194 vec4_generator::generate_math2_gen7(vec4_instruction *inst,
195 struct brw_reg dst,
196 struct brw_reg src0,
197 struct brw_reg src1)
198 {
199 brw_math2(p,
200 dst,
201 brw_math_function(inst->opcode),
202 src0, src1);
203 }
204
205 void
206 vec4_generator::generate_math2_gen6(vec4_instruction *inst,
207 struct brw_reg dst,
208 struct brw_reg src0,
209 struct brw_reg src1)
210 {
211 /* Can't do writemask because math can't be align16. */
212 assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
213 /* Source swizzles are ignored. */
214 check_gen6_math_src_arg(src0);
215 check_gen6_math_src_arg(src1);
216
217 brw_set_access_mode(p, BRW_ALIGN_1);
218 brw_math2(p,
219 dst,
220 brw_math_function(inst->opcode),
221 src0, src1);
222 brw_set_access_mode(p, BRW_ALIGN_16);
223 }
224
225 void
226 vec4_generator::generate_math2_gen4(vec4_instruction *inst,
227 struct brw_reg dst,
228 struct brw_reg src0,
229 struct brw_reg src1)
230 {
231 /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
232 * "Message Payload":
233 *
234 * "Operand0[7]. For the INT DIV functions, this operand is the
235 * denominator."
236 * ...
237 * "Operand1[7]. For the INT DIV functions, this operand is the
238 * numerator."
239 */
240 bool is_int_div = inst->opcode != SHADER_OPCODE_POW;
241 struct brw_reg &op0 = is_int_div ? src1 : src0;
242 struct brw_reg &op1 = is_int_div ? src0 : src1;
243
244 brw_push_insn_state(p);
245 brw_set_saturate(p, false);
246 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
247 brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1);
248 brw_pop_insn_state(p);
249
250 brw_math(p,
251 dst,
252 brw_math_function(inst->opcode),
253 inst->base_mrf,
254 op0,
255 BRW_MATH_DATA_VECTOR,
256 BRW_MATH_PRECISION_FULL);
257 }
258
259 void
260 vec4_generator::generate_tex(vec4_instruction *inst,
261 struct brw_reg dst,
262 struct brw_reg src)
263 {
264 int msg_type = -1;
265
266 if (intel->gen >= 5) {
267 switch (inst->opcode) {
268 case SHADER_OPCODE_TEX:
269 case SHADER_OPCODE_TXL:
270 if (inst->shadow_compare) {
271 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
272 } else {
273 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
274 }
275 break;
276 case SHADER_OPCODE_TXD:
277 /* There is no sample_d_c message; comparisons are done manually. */
278 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
279 break;
280 case SHADER_OPCODE_TXF:
281 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
282 break;
283 case SHADER_OPCODE_TXS:
284 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
285 break;
286 default:
287 assert(!"should not get here: invalid VS texture opcode");
288 break;
289 }
290 } else {
291 switch (inst->opcode) {
292 case SHADER_OPCODE_TEX:
293 case SHADER_OPCODE_TXL:
294 if (inst->shadow_compare) {
295 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE;
296 assert(inst->mlen == 3);
297 } else {
298 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD;
299 assert(inst->mlen == 2);
300 }
301 break;
302 case SHADER_OPCODE_TXD:
303 /* There is no sample_d_c message; comparisons are done manually. */
304 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS;
305 assert(inst->mlen == 4);
306 break;
307 case SHADER_OPCODE_TXF:
308 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD;
309 assert(inst->mlen == 2);
310 break;
311 case SHADER_OPCODE_TXS:
312 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO;
313 assert(inst->mlen == 2);
314 break;
315 default:
316 assert(!"should not get here: invalid VS texture opcode");
317 break;
318 }
319 }
320
321 assert(msg_type != -1);
322
323 /* Load the message header if present. If there's a texture offset, we need
324 * to set it up explicitly and load the offset bitfield. Otherwise, we can
325 * use an implied move from g0 to the first message register.
326 */
327 if (inst->texture_offset) {
328 /* Explicitly set up the message header by copying g0 to the MRF. */
329 brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
330 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
331
332 /* Then set the offset bits in DWord 2. */
333 brw_set_access_mode(p, BRW_ALIGN_1);
334 brw_MOV(p,
335 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, inst->base_mrf, 2),
336 BRW_REGISTER_TYPE_UD),
337 brw_imm_uw(inst->texture_offset));
338 brw_set_access_mode(p, BRW_ALIGN_16);
339 } else if (inst->header_present) {
340 /* Set up an implied move from g0 to the MRF. */
341 src = brw_vec8_grf(0, 0);
342 }
343
344 uint32_t return_format;
345
346 switch (dst.type) {
347 case BRW_REGISTER_TYPE_D:
348 return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
349 break;
350 case BRW_REGISTER_TYPE_UD:
351 return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
352 break;
353 default:
354 return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
355 break;
356 }
357
358 brw_SAMPLE(p,
359 dst,
360 inst->base_mrf,
361 src,
362 SURF_INDEX_VS_TEXTURE(inst->sampler),
363 inst->sampler,
364 WRITEMASK_XYZW,
365 msg_type,
366 1, /* response length */
367 inst->mlen,
368 inst->header_present,
369 BRW_SAMPLER_SIMD_MODE_SIMD4X2,
370 return_format);
371 }
372
373 void
374 vec4_generator::generate_urb_write(vec4_instruction *inst)
375 {
376 brw_urb_WRITE(p,
377 brw_null_reg(), /* dest */
378 inst->base_mrf, /* starting mrf reg nr */
379 brw_vec8_grf(0, 0), /* src */
380 false, /* allocate */
381 true, /* used */
382 inst->mlen,
383 0, /* response len */
384 inst->eot, /* eot */
385 inst->eot, /* writes complete */
386 inst->offset, /* urb destination offset */
387 BRW_URB_SWIZZLE_INTERLEAVE);
388 }
389
390 void
391 vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
392 struct brw_reg index)
393 {
394 int second_vertex_offset;
395
396 if (intel->gen >= 6)
397 second_vertex_offset = 1;
398 else
399 second_vertex_offset = 16;
400
401 m1 = retype(m1, BRW_REGISTER_TYPE_D);
402
403 /* Set up M1 (message payload). Only the block offsets in M1.0 and
404 * M1.4 are used, and the rest are ignored.
405 */
406 struct brw_reg m1_0 = suboffset(vec1(m1), 0);
407 struct brw_reg m1_4 = suboffset(vec1(m1), 4);
408 struct brw_reg index_0 = suboffset(vec1(index), 0);
409 struct brw_reg index_4 = suboffset(vec1(index), 4);
410
411 brw_push_insn_state(p);
412 brw_set_mask_control(p, BRW_MASK_DISABLE);
413 brw_set_access_mode(p, BRW_ALIGN_1);
414
415 brw_MOV(p, m1_0, index_0);
416
417 if (index.file == BRW_IMMEDIATE_VALUE) {
418 index_4.dw1.ud += second_vertex_offset;
419 brw_MOV(p, m1_4, index_4);
420 } else {
421 brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
422 }
423
424 brw_pop_insn_state(p);
425 }
426
427 void
428 vec4_generator::generate_scratch_read(vec4_instruction *inst,
429 struct brw_reg dst,
430 struct brw_reg index)
431 {
432 struct brw_reg header = brw_vec8_grf(0, 0);
433
434 gen6_resolve_implied_move(p, &header, inst->base_mrf);
435
436 generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
437 index);
438
439 uint32_t msg_type;
440
441 if (intel->gen >= 6)
442 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
443 else if (intel->gen == 5 || intel->is_g4x)
444 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
445 else
446 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
447
448 /* Each of the 8 channel enables is considered for whether each
449 * dword is written.
450 */
451 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
452 brw_set_dest(p, send, dst);
453 brw_set_src0(p, send, header);
454 if (intel->gen < 6)
455 send->header.destreg__conditionalmod = inst->base_mrf;
456 brw_set_dp_read_message(p, send,
457 255, /* binding table index: stateless access */
458 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
459 msg_type,
460 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
461 2, /* mlen */
462 1 /* rlen */);
463 }
464
465 void
466 vec4_generator::generate_scratch_write(vec4_instruction *inst,
467 struct brw_reg dst,
468 struct brw_reg src,
469 struct brw_reg index)
470 {
471 struct brw_reg header = brw_vec8_grf(0, 0);
472 bool write_commit;
473
474 /* If the instruction is predicated, we'll predicate the send, not
475 * the header setup.
476 */
477 brw_set_predicate_control(p, false);
478
479 gen6_resolve_implied_move(p, &header, inst->base_mrf);
480
481 generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
482 index);
483
484 brw_MOV(p,
485 retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D),
486 retype(src, BRW_REGISTER_TYPE_D));
487
488 uint32_t msg_type;
489
490 if (intel->gen >= 7)
491 msg_type = GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
492 else if (intel->gen == 6)
493 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
494 else
495 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
496
497 brw_set_predicate_control(p, inst->predicate);
498
499 /* Pre-gen6, we have to specify write commits to ensure ordering
500 * between reads and writes within a thread. Afterwards, that's
501 * guaranteed and write commits only matter for inter-thread
502 * synchronization.
503 */
504 if (intel->gen >= 6) {
505 write_commit = false;
506 } else {
507 /* The visitor set up our destination register to be g0. This
508 * means that when the next read comes along, we will end up
509 * reading from g0 and causing a block on the write commit. For
510 * write-after-read, we are relying on the value of the previous
511 * read being used (and thus blocking on completion) before our
512 * write is executed. This means we have to be careful in
513 * instruction scheduling to not violate this assumption.
514 */
515 write_commit = true;
516 }
517
518 /* Each of the 8 channel enables is considered for whether each
519 * dword is written.
520 */
521 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
522 brw_set_dest(p, send, dst);
523 brw_set_src0(p, send, header);
524 if (intel->gen < 6)
525 send->header.destreg__conditionalmod = inst->base_mrf;
526 brw_set_dp_write_message(p, send,
527 255, /* binding table index: stateless access */
528 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
529 msg_type,
530 3, /* mlen */
531 true, /* header present */
532 false, /* not a render target write */
533 write_commit, /* rlen */
534 false, /* eot */
535 write_commit);
536 }
537
538 void
539 vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
540 struct brw_reg dst,
541 struct brw_reg index,
542 struct brw_reg offset)
543 {
544 assert(index.file == BRW_IMMEDIATE_VALUE &&
545 index.type == BRW_REGISTER_TYPE_UD);
546 uint32_t surf_index = index.dw1.ud;
547
548 if (intel->gen == 7) {
549 gen6_resolve_implied_move(p, &offset, inst->base_mrf);
550 brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
551 brw_set_dest(p, insn, dst);
552 brw_set_src0(p, insn, offset);
553 brw_set_sampler_message(p, insn,
554 surf_index,
555 0, /* LD message ignores sampler unit */
556 GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
557 1, /* rlen */
558 1, /* mlen */
559 false, /* no header */
560 BRW_SAMPLER_SIMD_MODE_SIMD4X2,
561 0);
562 return;
563 }
564
565 struct brw_reg header = brw_vec8_grf(0, 0);
566
567 gen6_resolve_implied_move(p, &header, inst->base_mrf);
568
569 brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D),
570 offset);
571
572 uint32_t msg_type;
573
574 if (intel->gen >= 6)
575 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
576 else if (intel->gen == 5 || intel->is_g4x)
577 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
578 else
579 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
580
581 /* Each of the 8 channel enables is considered for whether each
582 * dword is written.
583 */
584 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
585 brw_set_dest(p, send, dst);
586 brw_set_src0(p, send, header);
587 if (intel->gen < 6)
588 send->header.destreg__conditionalmod = inst->base_mrf;
589 brw_set_dp_read_message(p, send,
590 surf_index,
591 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
592 msg_type,
593 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
594 2, /* mlen */
595 1 /* rlen */);
596 }
597
598 void
599 vec4_generator::generate_vs_instruction(vec4_instruction *instruction,
600 struct brw_reg dst,
601 struct brw_reg *src)
602 {
603 vec4_instruction *inst = (vec4_instruction *)instruction;
604
605 switch (inst->opcode) {
606 case SHADER_OPCODE_RCP:
607 case SHADER_OPCODE_RSQ:
608 case SHADER_OPCODE_SQRT:
609 case SHADER_OPCODE_EXP2:
610 case SHADER_OPCODE_LOG2:
611 case SHADER_OPCODE_SIN:
612 case SHADER_OPCODE_COS:
613 if (intel->gen == 6) {
614 generate_math1_gen6(inst, dst, src[0]);
615 } else {
616 /* Also works for Gen7. */
617 generate_math1_gen4(inst, dst, src[0]);
618 }
619 break;
620
621 case SHADER_OPCODE_POW:
622 case SHADER_OPCODE_INT_QUOTIENT:
623 case SHADER_OPCODE_INT_REMAINDER:
624 if (intel->gen >= 7) {
625 generate_math2_gen7(inst, dst, src[0], src[1]);
626 } else if (intel->gen == 6) {
627 generate_math2_gen6(inst, dst, src[0], src[1]);
628 } else {
629 generate_math2_gen4(inst, dst, src[0], src[1]);
630 }
631 break;
632
633 case SHADER_OPCODE_TEX:
634 case SHADER_OPCODE_TXD:
635 case SHADER_OPCODE_TXF:
636 case SHADER_OPCODE_TXL:
637 case SHADER_OPCODE_TXS:
638 generate_tex(inst, dst, src[0]);
639 break;
640
641 case VS_OPCODE_URB_WRITE:
642 generate_urb_write(inst);
643 break;
644
645 case VS_OPCODE_SCRATCH_READ:
646 generate_scratch_read(inst, dst, src[0]);
647 break;
648
649 case VS_OPCODE_SCRATCH_WRITE:
650 generate_scratch_write(inst, dst, src[0], src[1]);
651 break;
652
653 case VS_OPCODE_PULL_CONSTANT_LOAD:
654 generate_pull_constant_load(inst, dst, src[0], src[1]);
655 break;
656
657 default:
658 if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
659 _mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n",
660 opcode_descs[inst->opcode].name);
661 } else {
662 _mesa_problem(ctx, "Unsupported opcode %d in VS", inst->opcode);
663 }
664 abort();
665 }
666 }
667
668 void
669 vec4_generator::generate_code(exec_list *instructions)
670 {
671 int last_native_insn_offset = 0;
672 const char *last_annotation_string = NULL;
673 const void *last_annotation_ir = NULL;
674
675 if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
676 if (shader) {
677 printf("Native code for vertex shader %d:\n", prog->Name);
678 } else {
679 printf("Native code for vertex program %d:\n", c->vp->program.Base.Id);
680 }
681 }
682
683 foreach_list(node, instructions) {
684 vec4_instruction *inst = (vec4_instruction *)node;
685 struct brw_reg src[3], dst;
686
687 if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
688 if (last_annotation_ir != inst->ir) {
689 last_annotation_ir = inst->ir;
690 if (last_annotation_ir) {
691 printf(" ");
692 if (shader) {
693 ((ir_instruction *) last_annotation_ir)->print();
694 } else {
695 const prog_instruction *vpi;
696 vpi = (const prog_instruction *) inst->ir;
697 printf("%d: ", (int)(vpi - vp->Base.Instructions));
698 _mesa_fprint_instruction_opt(stdout, vpi, 0,
699 PROG_PRINT_DEBUG, NULL);
700 }
701 printf("\n");
702 }
703 }
704 if (last_annotation_string != inst->annotation) {
705 last_annotation_string = inst->annotation;
706 if (last_annotation_string)
707 printf(" %s\n", last_annotation_string);
708 }
709 }
710
711 for (unsigned int i = 0; i < 3; i++) {
712 src[i] = inst->get_src(i);
713 }
714 dst = inst->get_dst();
715
716 brw_set_conditionalmod(p, inst->conditional_mod);
717 brw_set_predicate_control(p, inst->predicate);
718 brw_set_predicate_inverse(p, inst->predicate_inverse);
719 brw_set_saturate(p, inst->saturate);
720
721 switch (inst->opcode) {
722 case BRW_OPCODE_MOV:
723 brw_MOV(p, dst, src[0]);
724 break;
725 case BRW_OPCODE_ADD:
726 brw_ADD(p, dst, src[0], src[1]);
727 break;
728 case BRW_OPCODE_MUL:
729 brw_MUL(p, dst, src[0], src[1]);
730 break;
731 case BRW_OPCODE_MACH:
732 brw_set_acc_write_control(p, 1);
733 brw_MACH(p, dst, src[0], src[1]);
734 brw_set_acc_write_control(p, 0);
735 break;
736
737 case BRW_OPCODE_FRC:
738 brw_FRC(p, dst, src[0]);
739 break;
740 case BRW_OPCODE_RNDD:
741 brw_RNDD(p, dst, src[0]);
742 break;
743 case BRW_OPCODE_RNDE:
744 brw_RNDE(p, dst, src[0]);
745 break;
746 case BRW_OPCODE_RNDZ:
747 brw_RNDZ(p, dst, src[0]);
748 break;
749
750 case BRW_OPCODE_AND:
751 brw_AND(p, dst, src[0], src[1]);
752 break;
753 case BRW_OPCODE_OR:
754 brw_OR(p, dst, src[0], src[1]);
755 break;
756 case BRW_OPCODE_XOR:
757 brw_XOR(p, dst, src[0], src[1]);
758 break;
759 case BRW_OPCODE_NOT:
760 brw_NOT(p, dst, src[0]);
761 break;
762 case BRW_OPCODE_ASR:
763 brw_ASR(p, dst, src[0], src[1]);
764 break;
765 case BRW_OPCODE_SHR:
766 brw_SHR(p, dst, src[0], src[1]);
767 break;
768 case BRW_OPCODE_SHL:
769 brw_SHL(p, dst, src[0], src[1]);
770 break;
771
772 case BRW_OPCODE_CMP:
773 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
774 break;
775 case BRW_OPCODE_SEL:
776 brw_SEL(p, dst, src[0], src[1]);
777 break;
778
779 case BRW_OPCODE_DPH:
780 brw_DPH(p, dst, src[0], src[1]);
781 break;
782
783 case BRW_OPCODE_DP4:
784 brw_DP4(p, dst, src[0], src[1]);
785 break;
786
787 case BRW_OPCODE_DP3:
788 brw_DP3(p, dst, src[0], src[1]);
789 break;
790
791 case BRW_OPCODE_DP2:
792 brw_DP2(p, dst, src[0], src[1]);
793 break;
794
795 case BRW_OPCODE_IF:
796 if (inst->src[0].file != BAD_FILE) {
797 /* The instruction has an embedded compare (only allowed on gen6) */
798 assert(intel->gen == 6);
799 gen6_IF(p, inst->conditional_mod, src[0], src[1]);
800 } else {
801 struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
802 brw_inst->header.predicate_control = inst->predicate;
803 }
804 break;
805
806 case BRW_OPCODE_ELSE:
807 brw_ELSE(p);
808 break;
809 case BRW_OPCODE_ENDIF:
810 brw_ENDIF(p);
811 break;
812
813 case BRW_OPCODE_DO:
814 brw_DO(p, BRW_EXECUTE_8);
815 break;
816
817 case BRW_OPCODE_BREAK:
818 brw_BREAK(p);
819 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
820 break;
821 case BRW_OPCODE_CONTINUE:
822 /* FINISHME: We need to write the loop instruction support still. */
823 if (intel->gen >= 6)
824 gen6_CONT(p);
825 else
826 brw_CONT(p);
827 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
828 break;
829
830 case BRW_OPCODE_WHILE:
831 brw_WHILE(p);
832 break;
833
834 default:
835 generate_vs_instruction(inst, dst, src);
836 break;
837 }
838
839 if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
840 brw_dump_compile(p, stdout,
841 last_native_insn_offset, p->next_insn_offset);
842 }
843
844 last_native_insn_offset = p->next_insn_offset;
845 }
846
847 if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
848 printf("\n");
849 }
850
851 brw_set_uip_jip(p);
852
853 /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
854 * emit issues, it doesn't get the jump distances into the output,
855 * which is often something we want to debug. So this is here in
856 * case you're doing that.
857 */
858 if (0 && unlikely(INTEL_DEBUG & DEBUG_VS)) {
859 brw_dump_compile(p, stdout, 0, p->next_insn_offset);
860 }
861 }
862
863 const unsigned *
864 vec4_generator::generate_assembly(exec_list *instructions,
865 unsigned *assembly_size)
866 {
867 brw_set_access_mode(p, BRW_ALIGN_16);
868 generate_code(instructions);
869 return brw_get_program(p, assembly_size);
870 }
871
872 } /* namespace brw */