9020fcc001244aab40871c86886762d086546f34
[mesa.git] / src / gallium / drivers / i965simple / brw_vs_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32 #include "brw_context.h"
33 #include "brw_vs.h"
34
35 #include "pipe/p_shader_tokens.h"
36 #include "tgsi/util/tgsi_parse.h"
37
38 struct brw_prog_info {
39 unsigned num_temps;
40 unsigned num_addrs;
41 unsigned num_consts;
42
43 unsigned writes_psize;
44
45 unsigned pos_idx;
46 unsigned result_edge_idx;
47 unsigned edge_flag_idx;
48 unsigned psize_idx;
49 };
50
51 /* Do things as simply as possible. Allocate and populate all regs
52 * ahead of time.
53 */
54 static void brw_vs_alloc_regs( struct brw_vs_compile *c,
55 struct brw_prog_info *info )
56 {
57 unsigned i, reg = 0, mrf;
58 unsigned nr_params;
59
60 /* r0 -- reserved as usual
61 */
62 c->r0 = brw_vec8_grf(reg, 0); reg++;
63
64 /* User clip planes from curbe:
65 */
66 if (c->key.nr_userclip) {
67 for (i = 0; i < c->key.nr_userclip; i++) {
68 c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
69 }
70
71 /* Deal with curbe alignment:
72 */
73 reg += ((6+c->key.nr_userclip+3)/4)*2;
74 }
75
76 /* Vertex program parameters from curbe:
77 */
78 nr_params = c->prog_data.max_const;
79 for (i = 0; i < nr_params; i++) {
80 c->regs[TGSI_FILE_CONSTANT][i] = stride(brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
81 }
82 reg += (nr_params+1)/2;
83 c->prog_data.curb_read_length = reg - 1;
84
85
86
87 /* Allocate input regs:
88 */
89 c->nr_inputs = c->vp->info.num_inputs;
90 for (i = 0; i < c->nr_inputs; i++) {
91 c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0);
92 reg++;
93 }
94
95
96 /* Allocate outputs: TODO: could organize the non-position outputs
97 * to go straight into message regs.
98 */
99 c->nr_outputs = 0;
100 c->first_output = reg;
101 mrf = 4;
102 for (i = 0; i < c->vp->info.num_outputs; i++) {
103 c->nr_outputs++;
104 #if 0
105 if (i == VERT_RESULT_HPOS) {
106 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
107 reg++;
108 }
109 else if (i == VERT_RESULT_PSIZ) {
110 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
111 reg++;
112 mrf++; /* just a placeholder? XXX fix later stages & remove this */
113 }
114 else {
115 c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
116 mrf++;
117 }
118 #else
119 /*treat pos differently for now */
120 if (i == info->pos_idx) {
121 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
122 reg++;
123 } else {
124 c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
125 mrf++;
126 }
127 #endif
128 }
129
130 /* Allocate program temporaries:
131 */
132 for (i = 0; i < info->num_temps; i++) {
133 c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0);
134 reg++;
135 }
136
137 /* Address reg(s). Don't try to use the internal address reg until
138 * deref time.
139 */
140 for (i = 0; i < info->num_addrs; i++) {
141 c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE,
142 reg,
143 0,
144 BRW_REGISTER_TYPE_D,
145 BRW_VERTICAL_STRIDE_8,
146 BRW_WIDTH_8,
147 BRW_HORIZONTAL_STRIDE_1,
148 BRW_SWIZZLE_XXXX,
149 TGSI_WRITEMASK_X);
150 reg++;
151 }
152
153 for (i = 0; i < 128; i++) {
154 if (c->output_regs[i].used_in_src) {
155 c->output_regs[i].reg = brw_vec8_grf(reg, 0);
156 reg++;
157 }
158 }
159
160 c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
161 reg += 2;
162
163
164 /* Some opcodes need an internal temporary:
165 */
166 c->first_tmp = reg;
167 c->last_tmp = reg; /* for allocation purposes */
168
169 /* Each input reg holds data from two vertices. The
170 * urb_read_length is the number of registers read from *each*
171 * vertex urb, so is half the amount:
172 */
173 c->prog_data.urb_read_length = (c->nr_inputs+1)/2;
174
175 c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4;
176 c->prog_data.total_grf = reg;
177 }
178
179
180 static struct brw_reg get_tmp( struct brw_vs_compile *c )
181 {
182 struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
183
184 if (++c->last_tmp > c->prog_data.total_grf)
185 c->prog_data.total_grf = c->last_tmp;
186
187 return tmp;
188 }
189
190 static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
191 {
192 if (tmp.nr == c->last_tmp-1)
193 c->last_tmp--;
194 }
195
196 static void release_tmps( struct brw_vs_compile *c )
197 {
198 c->last_tmp = c->first_tmp;
199 }
200
201
202 static void unalias1( struct brw_vs_compile *c,
203 struct brw_reg dst,
204 struct brw_reg arg0,
205 void (*func)( struct brw_vs_compile *,
206 struct brw_reg,
207 struct brw_reg ))
208 {
209 if (dst.file == arg0.file && dst.nr == arg0.nr) {
210 struct brw_compile *p = &c->func;
211 struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
212 func(c, tmp, arg0);
213 brw_MOV(p, dst, tmp);
214 }
215 else {
216 func(c, dst, arg0);
217 }
218 }
219
220 static void unalias2( struct brw_vs_compile *c,
221 struct brw_reg dst,
222 struct brw_reg arg0,
223 struct brw_reg arg1,
224 void (*func)( struct brw_vs_compile *,
225 struct brw_reg,
226 struct brw_reg,
227 struct brw_reg ))
228 {
229 if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
230 (dst.file == arg1.file && dst.nr == arg1.nr)) {
231 struct brw_compile *p = &c->func;
232 struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
233 func(c, tmp, arg0, arg1);
234 brw_MOV(p, dst, tmp);
235 }
236 else {
237 func(c, dst, arg0, arg1);
238 }
239 }
240
241 static void emit_sop( struct brw_compile *p,
242 struct brw_reg dst,
243 struct brw_reg arg0,
244 struct brw_reg arg1,
245 unsigned cond)
246 {
247 brw_push_insn_state(p);
248 brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
249 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
250 brw_MOV(p, dst, brw_imm_f(1.0f));
251 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
252 brw_MOV(p, dst, brw_imm_f(0.0f));
253 brw_pop_insn_state(p);
254 }
255
256 static void emit_seq( struct brw_compile *p,
257 struct brw_reg dst,
258 struct brw_reg arg0,
259 struct brw_reg arg1 )
260 {
261 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
262 }
263
264 static void emit_sne( struct brw_compile *p,
265 struct brw_reg dst,
266 struct brw_reg arg0,
267 struct brw_reg arg1 )
268 {
269 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
270 }
271 static void emit_slt( struct brw_compile *p,
272 struct brw_reg dst,
273 struct brw_reg arg0,
274 struct brw_reg arg1 )
275 {
276 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L);
277 }
278
279 static void emit_sle( struct brw_compile *p,
280 struct brw_reg dst,
281 struct brw_reg arg0,
282 struct brw_reg arg1 )
283 {
284 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE);
285 }
286
287 static void emit_sgt( struct brw_compile *p,
288 struct brw_reg dst,
289 struct brw_reg arg0,
290 struct brw_reg arg1 )
291 {
292 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G);
293 }
294
295 static void emit_sge( struct brw_compile *p,
296 struct brw_reg dst,
297 struct brw_reg arg0,
298 struct brw_reg arg1 )
299 {
300 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE);
301 }
302
303 static void emit_max( struct brw_compile *p,
304 struct brw_reg dst,
305 struct brw_reg arg0,
306 struct brw_reg arg1 )
307 {
308 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
309 brw_SEL(p, dst, arg1, arg0);
310 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
311 }
312
313 static void emit_min( struct brw_compile *p,
314 struct brw_reg dst,
315 struct brw_reg arg0,
316 struct brw_reg arg1 )
317 {
318 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
319 brw_SEL(p, dst, arg0, arg1);
320 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
321 }
322
323
324 static void emit_math1( struct brw_vs_compile *c,
325 unsigned function,
326 struct brw_reg dst,
327 struct brw_reg arg0,
328 unsigned precision)
329 {
330 /* There are various odd behaviours with SEND on the simulator. In
331 * addition there are documented issues with the fact that the GEN4
332 * processor doesn't do dependency control properly on SEND
333 * results. So, on balance, this kludge to get around failures
334 * with writemasked math results looks like it might be necessary
335 * whether that turns out to be a simulator bug or not:
336 */
337 struct brw_compile *p = &c->func;
338 struct brw_reg tmp = dst;
339 boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
340 dst.file != BRW_GENERAL_REGISTER_FILE);
341
342 if (need_tmp)
343 tmp = get_tmp(c);
344
345 brw_math(p,
346 tmp,
347 function,
348 BRW_MATH_SATURATE_NONE,
349 2,
350 arg0,
351 BRW_MATH_DATA_SCALAR,
352 precision);
353
354 if (need_tmp) {
355 brw_MOV(p, dst, tmp);
356 release_tmp(c, tmp);
357 }
358 }
359
360 static void emit_math2( struct brw_vs_compile *c,
361 unsigned function,
362 struct brw_reg dst,
363 struct brw_reg arg0,
364 struct brw_reg arg1,
365 unsigned precision)
366 {
367 struct brw_compile *p = &c->func;
368 struct brw_reg tmp = dst;
369 boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
370 dst.file != BRW_GENERAL_REGISTER_FILE);
371
372 if (need_tmp)
373 tmp = get_tmp(c);
374
375 brw_MOV(p, brw_message_reg(3), arg1);
376
377 brw_math(p,
378 tmp,
379 function,
380 BRW_MATH_SATURATE_NONE,
381 2,
382 arg0,
383 BRW_MATH_DATA_SCALAR,
384 precision);
385
386 if (need_tmp) {
387 brw_MOV(p, dst, tmp);
388 release_tmp(c, tmp);
389 }
390 }
391
392
393
394 static void emit_exp_noalias( struct brw_vs_compile *c,
395 struct brw_reg dst,
396 struct brw_reg arg0 )
397 {
398 struct brw_compile *p = &c->func;
399
400
401 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) {
402 struct brw_reg tmp = get_tmp(c);
403 struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
404
405 /* tmp_d = floor(arg0.x) */
406 brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0));
407
408 /* result[0] = 2.0 ^ tmp */
409
410 /* Adjust exponent for floating point:
411 * exp += 127
412 */
413 brw_ADD(p, brw_writemask(tmp_d, TGSI_WRITEMASK_X), tmp_d, brw_imm_d(127));
414
415 /* Install exponent and sign.
416 * Excess drops off the edge:
417 */
418 brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), TGSI_WRITEMASK_X),
419 tmp_d, brw_imm_d(23));
420
421 release_tmp(c, tmp);
422 }
423
424 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) {
425 /* result[1] = arg0.x - floor(arg0.x) */
426 brw_FRC(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0, 0));
427 }
428
429 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) {
430 /* As with the LOG instruction, we might be better off just
431 * doing a taylor expansion here, seeing as we have to do all
432 * the prep work.
433 *
434 * If mathbox partial precision is too low, consider also:
435 * result[3] = result[0] * EXP(result[1])
436 */
437 emit_math1(c,
438 BRW_MATH_FUNCTION_EXP,
439 brw_writemask(dst, TGSI_WRITEMASK_Z),
440 brw_swizzle1(arg0, 0),
441 BRW_MATH_PRECISION_PARTIAL);
442 }
443
444 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) {
445 /* result[3] = 1.0; */
446 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), brw_imm_f(1));
447 }
448 }
449
450
451 static void emit_log_noalias( struct brw_vs_compile *c,
452 struct brw_reg dst,
453 struct brw_reg arg0 )
454 {
455 struct brw_compile *p = &c->func;
456 struct brw_reg tmp = dst;
457 struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
458 struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD);
459 boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
460 dst.file != BRW_GENERAL_REGISTER_FILE);
461
462 if (need_tmp) {
463 tmp = get_tmp(c);
464 tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
465 }
466
467 /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
468 * according to spec:
469 *
470 * These almost look likey they could be joined up, but not really
471 * practical:
472 *
473 * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
474 * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
475 */
476 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_XZ) {
477 brw_AND(p,
478 brw_writemask(tmp_ud, TGSI_WRITEMASK_X),
479 brw_swizzle1(arg0_ud, 0),
480 brw_imm_ud((1U<<31)-1));
481
482 brw_SHR(p,
483 brw_writemask(tmp_ud, TGSI_WRITEMASK_X),
484 tmp_ud,
485 brw_imm_ud(23));
486
487 brw_ADD(p,
488 brw_writemask(tmp, TGSI_WRITEMASK_X),
489 retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */
490 brw_imm_d(-127));
491 }
492
493 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_YZ) {
494 brw_AND(p,
495 brw_writemask(tmp_ud, TGSI_WRITEMASK_Y),
496 brw_swizzle1(arg0_ud, 0),
497 brw_imm_ud((1<<23)-1));
498
499 brw_OR(p,
500 brw_writemask(tmp_ud, TGSI_WRITEMASK_Y),
501 tmp_ud,
502 brw_imm_ud(127<<23));
503 }
504
505 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) {
506 /* result[2] = result[0] + LOG2(result[1]); */
507
508 /* Why bother? The above is just a hint how to do this with a
509 * taylor series. Maybe we *should* use a taylor series as by
510 * the time all the above has been done it's almost certainly
511 * quicker than calling the mathbox, even with low precision.
512 *
513 * Options are:
514 * - result[0] + mathbox.LOG2(result[1])
515 * - mathbox.LOG2(arg0.x)
516 * - result[0] + inline_taylor_approx(result[1])
517 */
518 emit_math1(c,
519 BRW_MATH_FUNCTION_LOG,
520 brw_writemask(tmp, TGSI_WRITEMASK_Z),
521 brw_swizzle1(tmp, 1),
522 BRW_MATH_PRECISION_FULL);
523
524 brw_ADD(p,
525 brw_writemask(tmp, TGSI_WRITEMASK_Z),
526 brw_swizzle1(tmp, 2),
527 brw_swizzle1(tmp, 0));
528 }
529
530 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) {
531 /* result[3] = 1.0; */
532 brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_W), brw_imm_f(1));
533 }
534
535 if (need_tmp) {
536 brw_MOV(p, dst, tmp);
537 release_tmp(c, tmp);
538 }
539 }
540
541
542
543
544 /* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1
545 */
546 static void emit_dst_noalias( struct brw_vs_compile *c,
547 struct brw_reg dst,
548 struct brw_reg arg0,
549 struct brw_reg arg1)
550 {
551 struct brw_compile *p = &c->func;
552
553 /* There must be a better way to do this:
554 */
555 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X)
556 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_X), brw_imm_f(1.0));
557 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y)
558 brw_MUL(p, brw_writemask(dst, TGSI_WRITEMASK_Y), arg0, arg1);
559 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z)
560 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Z), arg0);
561 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W)
562 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), arg1);
563 }
564
565 static void emit_xpd( struct brw_compile *p,
566 struct brw_reg dst,
567 struct brw_reg t,
568 struct brw_reg u)
569 {
570 brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3));
571 brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3));
572 }
573
574
575
576 static void emit_lit_noalias( struct brw_vs_compile *c,
577 struct brw_reg dst,
578 struct brw_reg arg0 )
579 {
580 struct brw_compile *p = &c->func;
581 struct brw_instruction *if_insn;
582 struct brw_reg tmp = dst;
583 boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
584
585 if (need_tmp)
586 tmp = get_tmp(c);
587
588 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_YZ), brw_imm_f(0));
589 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_XW), brw_imm_f(1));
590
591 /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
592 * to get all channels active inside the IF. In the clipping code
593 * we run with NoMask, so it's not an option and we can use
594 * BRW_EXECUTE_1 for all comparisions.
595 */
596 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
597 if_insn = brw_IF(p, BRW_EXECUTE_8);
598 {
599 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0,0));
600
601 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0));
602 brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_Z), brw_swizzle1(arg0,1));
603 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
604
605 emit_math2(c,
606 BRW_MATH_FUNCTION_POW,
607 brw_writemask(dst, TGSI_WRITEMASK_Z),
608 brw_swizzle1(tmp, 2),
609 brw_swizzle1(arg0, 3),
610 BRW_MATH_PRECISION_PARTIAL);
611 }
612
613 brw_ENDIF(p, if_insn);
614 }
615
616
617
618
619
620 /* TODO: relative addressing!
621 */
622 static struct brw_reg get_reg( struct brw_vs_compile *c,
623 unsigned file,
624 unsigned index )
625 {
626 switch (file) {
627 case TGSI_FILE_TEMPORARY:
628 case TGSI_FILE_INPUT:
629 case TGSI_FILE_OUTPUT:
630 assert(c->regs[file][index].nr != 0);
631 return c->regs[file][index];
632 case TGSI_FILE_CONSTANT:
633 assert(c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm].nr != 0);
634 return c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm];
635 case TGSI_FILE_IMMEDIATE:
636 assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0);
637 return c->regs[TGSI_FILE_CONSTANT][index];
638 case TGSI_FILE_ADDRESS:
639 assert(index == 0);
640 return c->regs[file][index];
641
642 case TGSI_FILE_NULL: /* undef values */
643 return brw_null_reg();
644
645 default:
646 assert(0);
647 return brw_null_reg();
648 }
649 }
650
651
652
653 static struct brw_reg deref( struct brw_vs_compile *c,
654 struct brw_reg arg,
655 int offset)
656 {
657 struct brw_compile *p = &c->func;
658 struct brw_reg tmp = vec4(get_tmp(c));
659 struct brw_reg vp_address = retype(vec1(get_reg(c, TGSI_FILE_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
660 unsigned byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
661 struct brw_reg indirect = brw_vec4_indirect(0,0);
662
663 {
664 brw_push_insn_state(p);
665 brw_set_access_mode(p, BRW_ALIGN_1);
666
667 /* This is pretty clunky - load the address register twice and
668 * fetch each 4-dword value in turn. There must be a way to do
669 * this in a single pass, but I couldn't get it to work.
670 */
671 brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
672 brw_MOV(p, tmp, indirect);
673
674 brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
675 brw_MOV(p, suboffset(tmp, 4), indirect);
676
677 brw_pop_insn_state(p);
678 }
679
680 return vec8(tmp);
681 }
682
683
684 static void emit_arl( struct brw_vs_compile *c,
685 struct brw_reg dst,
686 struct brw_reg arg0 )
687 {
688 struct brw_compile *p = &c->func;
689 struct brw_reg tmp = dst;
690 boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
691
692 if (need_tmp)
693 tmp = get_tmp(c);
694
695 brw_RNDD(p, tmp, arg0);
696 brw_MUL(p, dst, tmp, brw_imm_d(16));
697
698 if (need_tmp)
699 release_tmp(c, tmp);
700 }
701
702
703 /* Will return mangled results for SWZ op. The emit_swz() function
704 * ignores this result and recalculates taking extended swizzles into
705 * account.
706 */
707 static struct brw_reg get_arg( struct brw_vs_compile *c,
708 struct tgsi_src_register *src )
709 {
710 struct brw_reg reg;
711
712 if (src->File == TGSI_FILE_NULL)
713 return brw_null_reg();
714
715 #if 0
716 if (src->RelAddr)
717 reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
718 else
719 #endif
720 reg = get_reg(c, src->File, src->Index);
721
722 /* Convert 3-bit swizzle to 2-bit.
723 */
724 reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SwizzleX,
725 src->SwizzleY,
726 src->SwizzleZ,
727 src->SwizzleW);
728
729 /* Note this is ok for non-swizzle instructions:
730 */
731 reg.negate = src->Negate ? 1 : 0;
732
733 return reg;
734 }
735
736
737 static struct brw_reg get_dst( struct brw_vs_compile *c,
738 const struct tgsi_dst_register *dst )
739 {
740 struct brw_reg reg = get_reg(c, dst->File, dst->Index);
741
742 reg.dw1.bits.writemask = dst->WriteMask;
743
744 return reg;
745 }
746
747
748
749
750 static void emit_swz( struct brw_vs_compile *c,
751 struct brw_reg dst,
752 struct tgsi_src_register src )
753 {
754 struct brw_compile *p = &c->func;
755 unsigned zeros_mask = 0;
756 unsigned ones_mask = 0;
757 unsigned src_mask = 0;
758 ubyte src_swz[4];
759 boolean need_tmp = (src.Negate &&
760 dst.file != BRW_GENERAL_REGISTER_FILE);
761 struct brw_reg tmp = dst;
762 unsigned i;
763
764 if (need_tmp)
765 tmp = get_tmp(c);
766
767 for (i = 0; i < 4; i++) {
768 if (dst.dw1.bits.writemask & (1<<i)) {
769 ubyte s = 0;
770 switch(i) {
771 case 0:
772 s = src.SwizzleX;
773 break;
774 s = src.SwizzleY;
775 case 1:
776 break;
777 s = src.SwizzleZ;
778 case 2:
779 break;
780 s = src.SwizzleW;
781 case 3:
782 break;
783 }
784 switch (s) {
785 case TGSI_SWIZZLE_X:
786 case TGSI_SWIZZLE_Y:
787 case TGSI_SWIZZLE_Z:
788 case TGSI_SWIZZLE_W:
789 src_mask |= 1<<i;
790 src_swz[i] = s;
791 break;
792 case TGSI_EXTSWIZZLE_ZERO:
793 zeros_mask |= 1<<i;
794 break;
795 case TGSI_EXTSWIZZLE_ONE:
796 ones_mask |= 1<<i;
797 break;
798 }
799 }
800 }
801
802 /* Do src first, in case dst aliases src:
803 */
804 if (src_mask) {
805 struct brw_reg arg0;
806
807 #if 0
808 if (src.RelAddr)
809 arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
810 else
811 #endif
812 arg0 = get_reg(c, src.File, src.Index);
813
814 arg0 = brw_swizzle(arg0,
815 src_swz[0], src_swz[1],
816 src_swz[2], src_swz[3]);
817
818 brw_MOV(p, brw_writemask(tmp, src_mask), arg0);
819 }
820
821 if (zeros_mask)
822 brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0));
823
824 if (ones_mask)
825 brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
826
827 if (src.Negate)
828 brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
829
830 if (need_tmp) {
831 brw_MOV(p, dst, tmp);
832 release_tmp(c, tmp);
833 }
834 }
835
836
837
838 /* Post-vertex-program processing. Send the results to the URB.
839 */
840 static void emit_vertex_write( struct brw_vs_compile *c, struct brw_prog_info *info)
841 {
842 struct brw_compile *p = &c->func;
843 struct brw_reg m0 = brw_message_reg(0);
844 struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][info->pos_idx];
845 struct brw_reg ndc;
846
847 if (c->key.copy_edgeflag) {
848 brw_MOV(p,
849 get_reg(c, TGSI_FILE_OUTPUT, info->result_edge_idx),
850 get_reg(c, TGSI_FILE_INPUT, info->edge_flag_idx));
851 }
852
853
854 /* Build ndc coords? TODO: Shortcircuit when w is known to be one.
855 */
856 if (!c->key.know_w_is_one) {
857 ndc = get_tmp(c);
858 emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
859 brw_MUL(p, brw_writemask(ndc, TGSI_WRITEMASK_XYZ), pos, ndc);
860 }
861 else {
862 ndc = pos;
863 }
864
865 /* This includes the workaround for -ve rhw, so is no longer an
866 * optional step:
867 */
868 if (info->writes_psize ||
869 c->key.nr_userclip ||
870 !c->key.know_w_is_one)
871 {
872 struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
873 unsigned i;
874
875 brw_MOV(p, header1, brw_imm_ud(0));
876
877 brw_set_access_mode(p, BRW_ALIGN_16);
878
879 if (info->writes_psize) {
880 struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][info->psize_idx];
881 brw_MUL(p, brw_writemask(header1, TGSI_WRITEMASK_W),
882 brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
883 brw_AND(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1,
884 brw_imm_ud(0x7ff<<8));
885 }
886
887
888 for (i = 0; i < c->key.nr_userclip; i++) {
889 brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
890 brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
891 brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<i));
892 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
893 }
894
895
896 /* i965 clipping workaround:
897 * 1) Test for -ve rhw
898 * 2) If set,
899 * set ndc = (0,0,0,0)
900 * set ucp[6] = 1
901 *
902 * Later, clipping will detect ucp[6] and ensure the primitive is
903 * clipped against all fixed planes.
904 */
905 if (!c->key.know_w_is_one) {
906 brw_CMP(p,
907 vec8(brw_null_reg()),
908 BRW_CONDITIONAL_L,
909 brw_swizzle1(ndc, 3),
910 brw_imm_f(0));
911
912 brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<6));
913 brw_MOV(p, ndc, brw_imm_f(0));
914 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
915 }
916
917 brw_set_access_mode(p, BRW_ALIGN_1); /* why? */
918 brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1);
919 brw_set_access_mode(p, BRW_ALIGN_16);
920
921 release_tmp(c, header1);
922 }
923 else {
924 brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
925 }
926
927
928 /* Emit the (interleaved) headers for the two vertices - an 8-reg
929 * of zeros followed by two sets of NDC coordinates:
930 */
931 brw_set_access_mode(p, BRW_ALIGN_1);
932 brw_MOV(p, offset(m0, 2), ndc);
933 brw_MOV(p, offset(m0, 3), pos);
934
935
936 brw_urb_WRITE(p,
937 brw_null_reg(), /* dest */
938 0, /* starting mrf reg nr */
939 c->r0, /* src */
940 0, /* allocate */
941 1, /* used */
942 c->nr_outputs + 3, /* msg len */
943 0, /* response len */
944 1, /* eot */
945 1, /* writes complete */
946 0, /* urb destination offset */
947 BRW_URB_SWIZZLE_INTERLEAVE);
948
949 }
950
951 static void
952 post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst )
953 {
954 struct tgsi_parse_context parse;
955 const struct tgsi_token *tokens = c->vp->program.tokens;
956 tgsi_parse_init(&parse, tokens);
957 while (!tgsi_parse_end_of_tokens(&parse)) {
958 tgsi_parse_token(&parse);
959 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
960 #if 0
961 struct brw_instruction *brw_inst1, *brw_inst2;
962 const struct tgsi_full_instruction *inst1, *inst2;
963 int offset;
964 inst1 = &parse.FullToken.FullInstruction;
965 brw_inst1 = inst1->Data;
966 switch (inst1->Opcode) {
967 case TGSI_OPCODE_CAL:
968 case TGSI_OPCODE_BRA:
969 target_insn = inst1->BranchTarget;
970 inst2 = &c->vp->program.Base.Instructions[target_insn];
971 brw_inst2 = inst2->Data;
972 offset = brw_inst2 - brw_inst1;
973 brw_set_src1(brw_inst1, brw_imm_d(offset*16));
974 break;
975 case TGSI_OPCODE_END:
976 offset = end_inst - brw_inst1;
977 brw_set_src1(brw_inst1, brw_imm_d(offset*16));
978 break;
979 default:
980 break;
981 }
982 #endif
983 }
984 }
985 tgsi_parse_free(&parse);
986 }
987
988 static void process_declaration(const struct tgsi_full_declaration *decl,
989 struct brw_prog_info *info)
990 {
991 int first = decl->u.DeclarationRange.First;
992 int last = decl->u.DeclarationRange.Last;
993
994 assert (decl->Declaration.Declare != TGSI_DECLARE_MASK);
995
996 switch(decl->Declaration.File) {
997 case TGSI_FILE_CONSTANT:
998 info->num_consts += last - first + 1;
999 break;
1000 case TGSI_FILE_INPUT: {
1001 }
1002 break;
1003 case TGSI_FILE_OUTPUT: {
1004 assert(last == first); /* for now */
1005 if (decl->Declaration.Semantic) {
1006 switch (decl->Semantic.SemanticName) {
1007 case TGSI_SEMANTIC_POSITION: {
1008 info->pos_idx = first;
1009 }
1010 break;
1011 case TGSI_SEMANTIC_COLOR:
1012 break;
1013 case TGSI_SEMANTIC_BCOLOR:
1014 break;
1015 case TGSI_SEMANTIC_FOG:
1016 break;
1017 case TGSI_SEMANTIC_PSIZE: {
1018 info->writes_psize = TRUE;
1019 info->psize_idx = first;
1020 }
1021 break;
1022 case TGSI_SEMANTIC_GENERIC:
1023 break;
1024 }
1025 }
1026 }
1027 break;
1028 case TGSI_FILE_TEMPORARY: {
1029 info->num_temps += (last - first) + 1;
1030 }
1031 break;
1032 case TGSI_FILE_SAMPLER: {
1033 }
1034 break;
1035 case TGSI_FILE_ADDRESS: {
1036 info->num_addrs += (last - first) + 1;
1037 }
1038 break;
1039 case TGSI_FILE_IMMEDIATE: {
1040 }
1041 break;
1042 case TGSI_FILE_NULL: {
1043 }
1044 break;
1045 }
1046 }
1047
1048 static void process_instruction(struct brw_vs_compile *c,
1049 struct tgsi_full_instruction *inst,
1050 struct brw_prog_info *info)
1051 {
1052 struct brw_reg args[3], dst;
1053 struct brw_compile *p = &c->func;
1054 /*struct brw_indirect stack_index = brw_indirect(0, 0);*/
1055 unsigned i;
1056 unsigned index;
1057 unsigned file;
1058 /*FIXME: might not be the only one*/
1059 const struct tgsi_dst_register *dst_reg = &inst->FullDstRegisters[0].DstRegister;
1060 /*
1061 struct brw_instruction *if_inst[MAX_IFSN];
1062 unsigned insn, if_insn = 0;
1063 */
1064
1065 for (i = 0; i < 3; i++) {
1066 struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
1067 index = src->SrcRegister.Index;
1068 file = src->SrcRegister.File;
1069 if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src)
1070 args[i] = c->output_regs[index].reg;
1071 else
1072 args[i] = get_arg(c, &src->SrcRegister);
1073 }
1074
1075 /* Get dest regs. Note that it is possible for a reg to be both
1076 * dst and arg, given the static allocation of registers. So
1077 * care needs to be taken emitting multi-operation instructions.
1078 */
1079 index = dst_reg->Index;
1080 file = dst_reg->File;
1081 if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src)
1082 dst = c->output_regs[index].reg;
1083 else
1084 dst = get_dst(c, dst_reg);
1085
1086 switch (inst->Instruction.Opcode) {
1087 case TGSI_OPCODE_ABS:
1088 brw_MOV(p, dst, brw_abs(args[0]));
1089 break;
1090 case TGSI_OPCODE_ADD:
1091 brw_ADD(p, dst, args[0], args[1]);
1092 break;
1093 case TGSI_OPCODE_DP3:
1094 brw_DP3(p, dst, args[0], args[1]);
1095 break;
1096 case TGSI_OPCODE_DP4:
1097 brw_DP4(p, dst, args[0], args[1]);
1098 break;
1099 case TGSI_OPCODE_DPH:
1100 brw_DPH(p, dst, args[0], args[1]);
1101 break;
1102 case TGSI_OPCODE_DST:
1103 unalias2(c, dst, args[0], args[1], emit_dst_noalias);
1104 break;
1105 case TGSI_OPCODE_EXP:
1106 unalias1(c, dst, args[0], emit_exp_noalias);
1107 break;
1108 case TGSI_OPCODE_EX2:
1109 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
1110 break;
1111 case TGSI_OPCODE_ARL:
1112 emit_arl(c, dst, args[0]);
1113 break;
1114 case TGSI_OPCODE_FLR:
1115 brw_RNDD(p, dst, args[0]);
1116 break;
1117 case TGSI_OPCODE_FRC:
1118 brw_FRC(p, dst, args[0]);
1119 break;
1120 case TGSI_OPCODE_LOG:
1121 unalias1(c, dst, args[0], emit_log_noalias);
1122 break;
1123 case TGSI_OPCODE_LG2:
1124 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
1125 break;
1126 case TGSI_OPCODE_LIT:
1127 unalias1(c, dst, args[0], emit_lit_noalias);
1128 break;
1129 case TGSI_OPCODE_MAD:
1130 brw_MOV(p, brw_acc_reg(), args[2]);
1131 brw_MAC(p, dst, args[0], args[1]);
1132 break;
1133 case TGSI_OPCODE_MAX:
1134 emit_max(p, dst, args[0], args[1]);
1135 break;
1136 case TGSI_OPCODE_MIN:
1137 emit_min(p, dst, args[0], args[1]);
1138 break;
1139 case TGSI_OPCODE_MOV:
1140 #if 0
1141 case TGSI_OPCODE_SWZ:
1142 /* The args[0] value can't be used here as it won't have
1143 * correctly encoded the full swizzle:
1144 */
1145 emit_swz(c, dst, inst->SrcReg[0] );
1146 #endif
1147 brw_MOV(p, dst, args[0]);
1148 break;
1149 case TGSI_OPCODE_MUL:
1150 brw_MUL(p, dst, args[0], args[1]);
1151 break;
1152 case TGSI_OPCODE_POW:
1153 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);
1154 break;
1155 case TGSI_OPCODE_RCP:
1156 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
1157 break;
1158 case TGSI_OPCODE_RSQ:
1159 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
1160 break;
1161
1162 case TGSI_OPCODE_SEQ:
1163 emit_seq(p, dst, args[0], args[1]);
1164 break;
1165 case TGSI_OPCODE_SNE:
1166 emit_sne(p, dst, args[0], args[1]);
1167 break;
1168 case TGSI_OPCODE_SGE:
1169 emit_sge(p, dst, args[0], args[1]);
1170 break;
1171 case TGSI_OPCODE_SGT:
1172 emit_sgt(p, dst, args[0], args[1]);
1173 break;
1174 case TGSI_OPCODE_SLT:
1175 emit_slt(p, dst, args[0], args[1]);
1176 break;
1177 case TGSI_OPCODE_SLE:
1178 emit_sle(p, dst, args[0], args[1]);
1179 break;
1180 case TGSI_OPCODE_SUB:
1181 brw_ADD(p, dst, args[0], negate(args[1]));
1182 break;
1183 case TGSI_OPCODE_XPD:
1184 emit_xpd(p, dst, args[0], args[1]);
1185 break;
1186 #if 0
1187 case TGSI_OPCODE_IF:
1188 assert(if_insn < MAX_IFSN);
1189 if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
1190 break;
1191 case TGSI_OPCODE_ELSE:
1192 if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
1193 break;
1194 case TGSI_OPCODE_ENDIF:
1195 assert(if_insn > 0);
1196 brw_ENDIF(p, if_inst[--if_insn]);
1197 break;
1198 case TGSI_OPCODE_BRA:
1199 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
1200 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1201 brw_set_predicate_control_flag_value(p, 0xff);
1202 break;
1203 case TGSI_OPCODE_CAL:
1204 brw_set_access_mode(p, BRW_ALIGN_1);
1205 brw_ADD(p, deref_1uw(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
1206 brw_set_access_mode(p, BRW_ALIGN_16);
1207 brw_ADD(p, get_addr_reg(stack_index),
1208 get_addr_reg(stack_index), brw_imm_d(4));
1209 inst->Data = &p->store[p->nr_insn];
1210 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1211 break;
1212 #endif
1213 case TGSI_OPCODE_RET:
1214 #if 0
1215 brw_ADD(p, get_addr_reg(stack_index),
1216 get_addr_reg(stack_index), brw_imm_d(-4));
1217 brw_set_access_mode(p, BRW_ALIGN_1);
1218 brw_MOV(p, brw_ip_reg(), deref_1uw(stack_index, 0));
1219 brw_set_access_mode(p, BRW_ALIGN_16);
1220 #else
1221 /*brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));*/
1222 #endif
1223 break;
1224 case TGSI_OPCODE_END:
1225 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1226 break;
1227 case TGSI_OPCODE_BGNSUB:
1228 case TGSI_OPCODE_ENDSUB:
1229 break;
1230 default:
1231 debug_printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode);
1232 break;
1233 }
1234
1235 if (dst_reg->File == TGSI_FILE_OUTPUT
1236 && dst_reg->Index != info->pos_idx
1237 && c->output_regs[dst_reg->Index].used_in_src)
1238 brw_MOV(p, get_dst(c, dst_reg), dst);
1239
1240 release_tmps(c);
1241 }
1242
1243 /* Emit the fragment program instructions here.
1244 */
1245 void brw_vs_emit(struct brw_vs_compile *c)
1246 {
1247 #define MAX_IFSN 32
1248 struct brw_compile *p = &c->func;
1249 struct brw_instruction *end_inst;
1250 struct tgsi_parse_context parse;
1251 struct brw_indirect stack_index = brw_indirect(0, 0);
1252 const struct tgsi_token *tokens = c->vp->program.tokens;
1253 struct brw_prog_info prog_info;
1254 unsigned allocated_registers = 0;
1255 memset(&prog_info, 0, sizeof(struct brw_prog_info));
1256
1257 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1258 brw_set_access_mode(p, BRW_ALIGN_16);
1259
1260 tgsi_parse_init(&parse, tokens);
1261 /* Message registers can't be read, so copy the output into GRF register
1262 if they are used in source registers */
1263 while (!tgsi_parse_end_of_tokens(&parse)) {
1264 tgsi_parse_token(&parse);
1265 unsigned i;
1266 switch (parse.FullToken.Token.Type) {
1267 case TGSI_TOKEN_TYPE_INSTRUCTION: {
1268 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
1269 for (i = 0; i < 3; ++i) {
1270 const struct tgsi_src_register *src = &inst->FullSrcRegisters[i].SrcRegister;
1271 unsigned index = src->Index;
1272 unsigned file = src->File;
1273 if (file == TGSI_FILE_OUTPUT)
1274 c->output_regs[index].used_in_src = TRUE;
1275 }
1276 }
1277 break;
1278 default:
1279 /* nothing */
1280 break;
1281 }
1282 }
1283 tgsi_parse_free(&parse);
1284
1285 tgsi_parse_init(&parse, tokens);
1286
1287 while (!tgsi_parse_end_of_tokens(&parse)) {
1288 tgsi_parse_token(&parse);
1289
1290 switch (parse.FullToken.Token.Type) {
1291 case TGSI_TOKEN_TYPE_DECLARATION: {
1292 struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
1293 process_declaration(decl, &prog_info);
1294 }
1295 break;
1296 case TGSI_TOKEN_TYPE_IMMEDIATE: {
1297 struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate;
1298 /*assert(imm->Immediate.Size == 4);*/
1299 c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float;
1300 c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float;
1301 c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float;
1302 c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u.ImmediateFloat32[3].Float;
1303 c->prog_data.num_imm++;
1304 }
1305 break;
1306 case TGSI_TOKEN_TYPE_INSTRUCTION: {
1307 struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
1308 if (!allocated_registers) {
1309 /* first instruction (declerations finished).
1310 * now that we know what vars are being used allocate
1311 * registers for them.*/
1312 c->prog_data.num_consts = prog_info.num_consts;
1313 c->prog_data.max_const = prog_info.num_consts + c->prog_data.num_imm;
1314 brw_vs_alloc_regs(c, &prog_info);
1315
1316 brw_set_access_mode(p, BRW_ALIGN_1);
1317 brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
1318 brw_set_access_mode(p, BRW_ALIGN_16);
1319 allocated_registers = 1;
1320 }
1321 process_instruction(c, inst, &prog_info);
1322 }
1323 break;
1324 }
1325 }
1326
1327 end_inst = &p->store[p->nr_insn];
1328 emit_vertex_write(c, &prog_info);
1329 post_vs_emit(c, end_inst);
1330 tgsi_parse_free(&parse);
1331
1332 }