i965/vs: Fix access of attribute arrays.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_emit.cpp
1 /* Copyright © 2011 Intel Corporation
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a
4 * copy of this software and associated documentation files (the "Software"),
5 * to deal in the Software without restriction, including without limitation
6 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 * and/or sell copies of the Software, and to permit persons to whom the
8 * Software is furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice (including the next
11 * paragraph) shall be included in all copies or substantial portions of the
12 * Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 * IN THE SOFTWARE.
21 */
22
23 #include "brw_vec4.h"
24 #include "../glsl/ir_print_visitor.h"
25
26 extern "C" {
27 #include "brw_eu.h"
28 };
29
30 using namespace brw;
31
32 namespace brw {
33
34 int
35 vec4_visitor::setup_attributes(int payload_reg)
36 {
37 int nr_attributes;
38 int attribute_map[VERT_ATTRIB_MAX];
39
40 nr_attributes = 0;
41 for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
42 if (prog_data->inputs_read & BITFIELD64_BIT(i)) {
43 attribute_map[i] = payload_reg + nr_attributes;
44 nr_attributes++;
45 }
46 }
47
48 foreach_iter(exec_list_iterator, iter, this->instructions) {
49 vec4_instruction *inst = (vec4_instruction *)iter.get();
50
51 for (int i = 0; i < 3; i++) {
52 if (inst->src[i].file != ATTR)
53 continue;
54
55 int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset];
56 inst->src[i].file = HW_REG;
57 inst->src[i].fixed_hw_reg = brw_vec8_grf(grf, 0);
58 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle;
59 }
60 }
61
62 /* The BSpec says we always have to read at least one thing from
63 * the VF, and it appears that the hardware wedges otherwise.
64 */
65 if (nr_attributes == 0)
66 nr_attributes = 1;
67
68 prog_data->urb_read_length = (nr_attributes + 1) / 2;
69
70 return payload_reg + nr_attributes;
71 }
72
73 int
74 vec4_visitor::setup_uniforms(int reg)
75 {
76 /* User clip planes from curbe:
77 */
78 if (c->key.nr_userclip) {
79 if (intel->gen >= 6) {
80 for (int i = 0; i < c->key.nr_userclip; i++) {
81 c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
82 (i % 2) * 4), 0, 4, 1);
83 }
84 reg += ALIGN(c->key.nr_userclip, 2) / 2;
85 } else {
86 for (int i = 0; i < c->key.nr_userclip; i++) {
87 c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
88 (i % 2) * 4), 0, 4, 1);
89 }
90 reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2;
91 }
92 }
93
94 /* The pre-gen6 VS requires that some push constants get loaded no
95 * matter what, or the GPU would hang.
96 */
97 if (intel->gen < 6 && this->uniforms == 0) {
98 this->uniform_size[this->uniforms] = 1;
99
100 for (unsigned int i = 0; i < 4; i++) {
101 unsigned int slot = this->uniforms * 4 + i;
102
103 c->prog_data.param[slot] = NULL;
104 c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO;
105 }
106
107 this->uniforms++;
108 reg++;
109 } else {
110 reg += ALIGN(uniforms, 2) / 2;
111 }
112
113 /* for now, we are not doing any elimination of unused slots, nor
114 * are we packing our uniforms.
115 */
116 c->prog_data.nr_params = this->uniforms * 4;
117
118 c->prog_data.curb_read_length = reg - 1;
119 c->prog_data.uses_new_param_layout = true;
120
121 return reg;
122 }
123
124 void
125 vec4_visitor::setup_payload(void)
126 {
127 int reg = 0;
128
129 /* The payload always contains important data in g0, which contains
130 * the URB handles that are passed on to the URB write at the end
131 * of the thread. So, we always start push constants at g1.
132 */
133 reg++;
134
135 reg = setup_uniforms(reg);
136
137 reg = setup_attributes(reg);
138
139 this->first_non_payload_grf = reg;
140 }
141
142 struct brw_reg
143 vec4_instruction::get_dst(void)
144 {
145 struct brw_reg brw_reg;
146
147 switch (dst.file) {
148 case GRF:
149 brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
150 brw_reg = retype(brw_reg, dst.type);
151 brw_reg.dw1.bits.writemask = dst.writemask;
152 break;
153
154 case HW_REG:
155 brw_reg = dst.fixed_hw_reg;
156 break;
157
158 case BAD_FILE:
159 brw_reg = brw_null_reg();
160 break;
161
162 default:
163 assert(!"not reached");
164 brw_reg = brw_null_reg();
165 break;
166 }
167 return brw_reg;
168 }
169
170 struct brw_reg
171 vec4_instruction::get_src(int i)
172 {
173 struct brw_reg brw_reg;
174
175 switch (src[i].file) {
176 case GRF:
177 brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
178 brw_reg = retype(brw_reg, src[i].type);
179 brw_reg.dw1.bits.swizzle = src[i].swizzle;
180 if (src[i].abs)
181 brw_reg = brw_abs(brw_reg);
182 if (src[i].negate)
183 brw_reg = negate(brw_reg);
184 break;
185
186 case IMM:
187 switch (src[i].type) {
188 case BRW_REGISTER_TYPE_F:
189 brw_reg = brw_imm_f(src[i].imm.f);
190 break;
191 case BRW_REGISTER_TYPE_D:
192 brw_reg = brw_imm_d(src[i].imm.i);
193 break;
194 case BRW_REGISTER_TYPE_UD:
195 brw_reg = brw_imm_ud(src[i].imm.u);
196 break;
197 default:
198 assert(!"not reached");
199 brw_reg = brw_null_reg();
200 break;
201 }
202 break;
203
204 case UNIFORM:
205 brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2,
206 ((src[i].reg + src[i].reg_offset) % 2) * 4),
207 0, 4, 1);
208 brw_reg = retype(brw_reg, src[i].type);
209 brw_reg.dw1.bits.swizzle = src[i].swizzle;
210 if (src[i].abs)
211 brw_reg = brw_abs(brw_reg);
212 if (src[i].negate)
213 brw_reg = negate(brw_reg);
214 break;
215
216 case HW_REG:
217 brw_reg = src[i].fixed_hw_reg;
218 break;
219
220 case BAD_FILE:
221 /* Probably unused. */
222 brw_reg = brw_null_reg();
223 break;
224 case ATTR:
225 default:
226 assert(!"not reached");
227 brw_reg = brw_null_reg();
228 break;
229 }
230
231 return brw_reg;
232 }
233
234 void
235 vec4_visitor::generate_math1_gen4(vec4_instruction *inst,
236 struct brw_reg dst,
237 struct brw_reg src)
238 {
239 brw_math(p,
240 dst,
241 brw_math_function(inst->opcode),
242 BRW_MATH_SATURATE_NONE,
243 inst->base_mrf,
244 src,
245 BRW_MATH_DATA_SCALAR,
246 BRW_MATH_PRECISION_FULL);
247 }
248
249 static void
250 check_gen6_math_src_arg(struct brw_reg src)
251 {
252 /* Source swizzles are ignored. */
253 assert(!src.abs);
254 assert(!src.negate);
255 assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW);
256 }
257
258 void
259 vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
260 struct brw_reg dst,
261 struct brw_reg src)
262 {
263 /* Can't do writemask because math can't be align16. */
264 assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
265 check_gen6_math_src_arg(src);
266
267 brw_set_access_mode(p, BRW_ALIGN_1);
268 brw_math(p,
269 dst,
270 brw_math_function(inst->opcode),
271 BRW_MATH_SATURATE_NONE,
272 inst->base_mrf,
273 src,
274 BRW_MATH_DATA_SCALAR,
275 BRW_MATH_PRECISION_FULL);
276 brw_set_access_mode(p, BRW_ALIGN_16);
277 }
278
279 void
280 vec4_visitor::generate_math2_gen6(vec4_instruction *inst,
281 struct brw_reg dst,
282 struct brw_reg src0,
283 struct brw_reg src1)
284 {
285 /* Can't do writemask because math can't be align16. */
286 assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
287 /* Source swizzles are ignored. */
288 check_gen6_math_src_arg(src0);
289 check_gen6_math_src_arg(src1);
290
291 brw_set_access_mode(p, BRW_ALIGN_1);
292 brw_math2(p,
293 dst,
294 brw_math_function(inst->opcode),
295 src0, src1);
296 brw_set_access_mode(p, BRW_ALIGN_16);
297 }
298
299 void
300 vec4_visitor::generate_math2_gen4(vec4_instruction *inst,
301 struct brw_reg dst,
302 struct brw_reg src0,
303 struct brw_reg src1)
304 {
305 /* Can't do writemask because math can't be align16. */
306 assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
307
308 brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src1);
309
310 brw_set_access_mode(p, BRW_ALIGN_1);
311 brw_math(p,
312 dst,
313 brw_math_function(inst->opcode),
314 BRW_MATH_SATURATE_NONE,
315 inst->base_mrf,
316 src0,
317 BRW_MATH_DATA_VECTOR,
318 BRW_MATH_PRECISION_FULL);
319 brw_set_access_mode(p, BRW_ALIGN_16);
320 }
321
322 void
323 vec4_visitor::generate_urb_write(vec4_instruction *inst)
324 {
325 brw_urb_WRITE(p,
326 brw_null_reg(), /* dest */
327 inst->base_mrf, /* starting mrf reg nr */
328 brw_vec8_grf(0, 0), /* src */
329 false, /* allocate */
330 true, /* used */
331 inst->mlen,
332 0, /* response len */
333 inst->eot, /* eot */
334 inst->eot, /* writes complete */
335 inst->offset, /* urb destination offset */
336 BRW_URB_SWIZZLE_INTERLEAVE);
337 }
338
339 void
340 vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1,
341 struct brw_reg index)
342 {
343 int second_vertex_offset;
344
345 if (intel->gen >= 6)
346 second_vertex_offset = 1;
347 else
348 second_vertex_offset = 16;
349
350 m1 = retype(m1, BRW_REGISTER_TYPE_D);
351
352 /* Set up M1 (message payload). Only the block offsets in M1.0 and
353 * M1.4 are used, and the rest are ignored.
354 */
355 struct brw_reg m1_0 = suboffset(vec1(m1), 0);
356 struct brw_reg m1_4 = suboffset(vec1(m1), 4);
357 struct brw_reg index_0 = suboffset(vec1(index), 0);
358 struct brw_reg index_4 = suboffset(vec1(index), 4);
359
360 brw_push_insn_state(p);
361 brw_set_mask_control(p, BRW_MASK_DISABLE);
362 brw_set_access_mode(p, BRW_ALIGN_1);
363
364 brw_MOV(p, m1_0, index_0);
365
366 brw_set_predicate_inverse(p, true);
367 if (index.file == BRW_IMMEDIATE_VALUE) {
368 index_4.dw1.ud++;
369 brw_MOV(p, m1_4, index_4);
370 } else {
371 brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
372 }
373
374 brw_pop_insn_state(p);
375 }
376
377 void
378 vec4_visitor::generate_scratch_read(vec4_instruction *inst,
379 struct brw_reg dst,
380 struct brw_reg index)
381 {
382 if (intel->gen >= 6) {
383 brw_push_insn_state(p);
384 brw_set_mask_control(p, BRW_MASK_DISABLE);
385 brw_MOV(p,
386 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
387 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
388 brw_pop_insn_state(p);
389 }
390
391 generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
392 index);
393
394 uint32_t msg_type;
395
396 if (intel->gen >= 6)
397 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
398 else if (intel->gen == 5 || intel->is_g4x)
399 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
400 else
401 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
402
403 /* Each of the 8 channel enables is considered for whether each
404 * dword is written.
405 */
406 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
407 brw_set_dest(p, send, dst);
408 brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
409 brw_set_dp_read_message(p, send,
410 255, /* binding table index: stateless access */
411 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
412 msg_type,
413 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
414 2, /* mlen */
415 1 /* rlen */);
416 }
417
418 void
419 vec4_visitor::generate_scratch_write(vec4_instruction *inst,
420 struct brw_reg dst,
421 struct brw_reg src,
422 struct brw_reg index)
423 {
424 /* If the instruction is predicated, we'll predicate the send, not
425 * the header setup.
426 */
427 brw_set_predicate_control(p, false);
428
429 if (intel->gen >= 6) {
430 brw_push_insn_state(p);
431 brw_set_mask_control(p, BRW_MASK_DISABLE);
432 brw_MOV(p,
433 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
434 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
435 brw_pop_insn_state(p);
436 }
437
438 generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
439 index);
440
441 brw_MOV(p,
442 retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D),
443 retype(src, BRW_REGISTER_TYPE_D));
444
445 uint32_t msg_type;
446
447 if (intel->gen >= 6)
448 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
449 else
450 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
451
452 brw_set_predicate_control(p, inst->predicate);
453
454 /* Each of the 8 channel enables is considered for whether each
455 * dword is written.
456 */
457 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
458 brw_set_dest(p, send, dst);
459 brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
460 brw_set_dp_write_message(p, send,
461 255, /* binding table index: stateless access */
462 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
463 msg_type,
464 3, /* mlen */
465 true, /* header present */
466 false, /* pixel scoreboard */
467 0, /* rlen */
468 false, /* eot */
469 false /* commit */);
470 }
471
472 void
473 vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
474 struct brw_reg dst,
475 struct brw_reg *src)
476 {
477 vec4_instruction *inst = (vec4_instruction *)instruction;
478
479 switch (inst->opcode) {
480 case SHADER_OPCODE_RCP:
481 case SHADER_OPCODE_RSQ:
482 case SHADER_OPCODE_SQRT:
483 case SHADER_OPCODE_EXP2:
484 case SHADER_OPCODE_LOG2:
485 case SHADER_OPCODE_SIN:
486 case SHADER_OPCODE_COS:
487 if (intel->gen >= 6) {
488 generate_math1_gen6(inst, dst, src[0]);
489 } else {
490 generate_math1_gen4(inst, dst, src[0]);
491 }
492 break;
493
494 case SHADER_OPCODE_POW:
495 if (intel->gen >= 6) {
496 generate_math2_gen6(inst, dst, src[0], src[1]);
497 } else {
498 generate_math2_gen4(inst, dst, src[0], src[1]);
499 }
500 break;
501
502 case VS_OPCODE_URB_WRITE:
503 generate_urb_write(inst);
504 break;
505
506 case VS_OPCODE_SCRATCH_READ:
507 generate_scratch_read(inst, dst, src[0]);
508 break;
509
510 case VS_OPCODE_SCRATCH_WRITE:
511 generate_scratch_write(inst, dst, src[0], src[1]);
512 break;
513
514 default:
515 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
516 fail("unsupported opcode in `%s' in VS\n",
517 brw_opcodes[inst->opcode].name);
518 } else {
519 fail("Unsupported opcode %d in VS", inst->opcode);
520 }
521 }
522 }
523
524 bool
525 vec4_visitor::run()
526 {
527 /* Generate VS IR for main(). (the visitor only descends into
528 * functions called "main").
529 */
530 foreach_iter(exec_list_iterator, iter, *shader->ir) {
531 ir_instruction *ir = (ir_instruction *)iter.get();
532 base_ir = ir;
533 ir->accept(this);
534 }
535
536 emit_urb_writes();
537
538 /* Before any optimization, push array accesses out to scratch
539 * space where we need them to be. This pass may allocate new
540 * virtual GRFs, so we want to do it early. It also makes sure
541 * that we have reladdr computations available for CSE, since we'll
542 * often do repeated subexpressions for those.
543 */
544 move_grf_array_access_to_scratch();
545
546 if (failed)
547 return false;
548
549 setup_payload();
550 reg_allocate();
551
552 brw_set_access_mode(p, BRW_ALIGN_16);
553
554 generate_code();
555
556 return !failed;
557 }
558
559 void
560 vec4_visitor::generate_code()
561 {
562 int last_native_inst = p->nr_insn;
563 const char *last_annotation_string = NULL;
564 ir_instruction *last_annotation_ir = NULL;
565
566 int loop_stack_array_size = 16;
567 int loop_stack_depth = 0;
568 brw_instruction **loop_stack =
569 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
570 int *if_depth_in_loop =
571 rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
572
573
574 if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
575 printf("Native code for vertex shader %d:\n", prog->Name);
576 }
577
578 foreach_list(node, &this->instructions) {
579 vec4_instruction *inst = (vec4_instruction *)node;
580 struct brw_reg src[3], dst;
581
582 if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
583 if (last_annotation_ir != inst->ir) {
584 last_annotation_ir = inst->ir;
585 if (last_annotation_ir) {
586 printf(" ");
587 last_annotation_ir->print();
588 printf("\n");
589 }
590 }
591 if (last_annotation_string != inst->annotation) {
592 last_annotation_string = inst->annotation;
593 if (last_annotation_string)
594 printf(" %s\n", last_annotation_string);
595 }
596 }
597
598 for (unsigned int i = 0; i < 3; i++) {
599 src[i] = inst->get_src(i);
600 }
601 dst = inst->get_dst();
602
603 brw_set_conditionalmod(p, inst->conditional_mod);
604 brw_set_predicate_control(p, inst->predicate);
605 brw_set_predicate_inverse(p, inst->predicate_inverse);
606 brw_set_saturate(p, inst->saturate);
607
608 switch (inst->opcode) {
609 case BRW_OPCODE_MOV:
610 brw_MOV(p, dst, src[0]);
611 break;
612 case BRW_OPCODE_ADD:
613 brw_ADD(p, dst, src[0], src[1]);
614 break;
615 case BRW_OPCODE_MUL:
616 brw_MUL(p, dst, src[0], src[1]);
617 break;
618
619 case BRW_OPCODE_FRC:
620 brw_FRC(p, dst, src[0]);
621 break;
622 case BRW_OPCODE_RNDD:
623 brw_RNDD(p, dst, src[0]);
624 break;
625 case BRW_OPCODE_RNDE:
626 brw_RNDE(p, dst, src[0]);
627 break;
628 case BRW_OPCODE_RNDZ:
629 brw_RNDZ(p, dst, src[0]);
630 break;
631
632 case BRW_OPCODE_AND:
633 brw_AND(p, dst, src[0], src[1]);
634 break;
635 case BRW_OPCODE_OR:
636 brw_OR(p, dst, src[0], src[1]);
637 break;
638 case BRW_OPCODE_XOR:
639 brw_XOR(p, dst, src[0], src[1]);
640 break;
641 case BRW_OPCODE_NOT:
642 brw_NOT(p, dst, src[0]);
643 break;
644 case BRW_OPCODE_ASR:
645 brw_ASR(p, dst, src[0], src[1]);
646 break;
647 case BRW_OPCODE_SHR:
648 brw_SHR(p, dst, src[0], src[1]);
649 break;
650 case BRW_OPCODE_SHL:
651 brw_SHL(p, dst, src[0], src[1]);
652 break;
653
654 case BRW_OPCODE_CMP:
655 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
656 break;
657 case BRW_OPCODE_SEL:
658 brw_SEL(p, dst, src[0], src[1]);
659 break;
660
661 case BRW_OPCODE_DP4:
662 brw_DP4(p, dst, src[0], src[1]);
663 break;
664
665 case BRW_OPCODE_DP3:
666 brw_DP3(p, dst, src[0], src[1]);
667 break;
668
669 case BRW_OPCODE_DP2:
670 brw_DP2(p, dst, src[0], src[1]);
671 break;
672
673 case BRW_OPCODE_IF:
674 if (inst->src[0].file != BAD_FILE) {
675 /* The instruction has an embedded compare (only allowed on gen6) */
676 assert(intel->gen == 6);
677 gen6_IF(p, inst->conditional_mod, src[0], src[1]);
678 } else {
679 struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
680 brw_inst->header.predicate_control = inst->predicate;
681 }
682 if_depth_in_loop[loop_stack_depth]++;
683 break;
684
685 case BRW_OPCODE_ELSE:
686 brw_ELSE(p);
687 break;
688 case BRW_OPCODE_ENDIF:
689 brw_ENDIF(p);
690 if_depth_in_loop[loop_stack_depth]--;
691 break;
692
693 case BRW_OPCODE_DO:
694 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
695 if (loop_stack_array_size <= loop_stack_depth) {
696 loop_stack_array_size *= 2;
697 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
698 loop_stack_array_size);
699 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
700 loop_stack_array_size);
701 }
702 if_depth_in_loop[loop_stack_depth] = 0;
703 break;
704
705 case BRW_OPCODE_BREAK:
706 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
707 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
708 break;
709 case BRW_OPCODE_CONTINUE:
710 /* FINISHME: We need to write the loop instruction support still. */
711 if (intel->gen >= 6)
712 gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
713 else
714 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
715 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
716 break;
717
718 case BRW_OPCODE_WHILE: {
719 struct brw_instruction *inst0, *inst1;
720 GLuint br = 1;
721
722 if (intel->gen >= 5)
723 br = 2;
724
725 assert(loop_stack_depth > 0);
726 loop_stack_depth--;
727 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
728 if (intel->gen < 6) {
729 /* patch all the BREAK/CONT instructions from last BGNLOOP */
730 while (inst0 > loop_stack[loop_stack_depth]) {
731 inst0--;
732 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
733 inst0->bits3.if_else.jump_count == 0) {
734 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
735 }
736 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
737 inst0->bits3.if_else.jump_count == 0) {
738 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
739 }
740 }
741 }
742 }
743 break;
744
745 default:
746 generate_vs_instruction(inst, dst, src);
747 break;
748 }
749
750 if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
751 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
752 if (0) {
753 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
754 ((uint32_t *)&p->store[i])[3],
755 ((uint32_t *)&p->store[i])[2],
756 ((uint32_t *)&p->store[i])[1],
757 ((uint32_t *)&p->store[i])[0]);
758 }
759 brw_disasm(stdout, &p->store[i], intel->gen);
760 }
761 }
762
763 last_native_inst = p->nr_insn;
764 }
765
766 if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
767 printf("\n");
768 }
769
770 ralloc_free(loop_stack);
771 ralloc_free(if_depth_in_loop);
772
773 brw_set_uip_jip(p);
774
775 /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
776 * emit issues, it doesn't get the jump distances into the output,
777 * which is often something we want to debug. So this is here in
778 * case you're doing that.
779 */
780 if (0) {
781 if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
782 for (unsigned int i = 0; i < p->nr_insn; i++) {
783 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
784 ((uint32_t *)&p->store[i])[3],
785 ((uint32_t *)&p->store[i])[2],
786 ((uint32_t *)&p->store[i])[1],
787 ((uint32_t *)&p->store[i])[0]);
788 brw_disasm(stdout, &p->store[i], intel->gen);
789 }
790 }
791 }
792 }
793
794 extern "C" {
795
796 bool
797 brw_vs_emit(struct brw_vs_compile *c)
798 {
799 struct brw_compile *p = &c->func;
800 struct brw_context *brw = p->brw;
801 struct intel_context *intel = &brw->intel;
802 struct gl_context *ctx = &intel->ctx;
803 struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram;
804
805 if (!prog)
806 return false;
807
808 struct brw_shader *shader =
809 (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
810 if (!shader)
811 return false;
812
813 if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
814 printf("GLSL IR for native vertex shader %d:\n", prog->Name);
815 _mesa_print_ir(shader->ir, NULL);
816 printf("\n\n");
817 }
818
819 vec4_visitor v(c, prog, shader);
820 if (!v.run()) {
821 /* FINISHME: Cleanly fail, test at link time, etc. */
822 assert(!"not reached");
823 return false;
824 }
825
826 return true;
827 }
828
829 } /* extern "C" */
830
831 } /* namespace brw */