Merge commit 'origin/gallium-master-merge'
[mesa.git] / src / gallium / drivers / i965simple / brw_vs_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32 #include "brw_context.h"
33 #include "brw_vs.h"
34
35 #include "pipe/p_shader_tokens.h"
36 #include "tgsi/tgsi_parse.h"
37
38 struct brw_prog_info {
39 unsigned num_temps;
40 unsigned num_addrs;
41 unsigned num_consts;
42
43 unsigned writes_psize;
44
45 unsigned pos_idx;
46 unsigned result_edge_idx;
47 unsigned edge_flag_idx;
48 unsigned psize_idx;
49 };
50
51 /* Do things as simply as possible. Allocate and populate all regs
52 * ahead of time.
53 */
54 static void brw_vs_alloc_regs( struct brw_vs_compile *c,
55 struct brw_prog_info *info )
56 {
57 unsigned i, reg = 0, mrf;
58 unsigned nr_params;
59
60 /* r0 -- reserved as usual
61 */
62 c->r0 = brw_vec8_grf(reg, 0); reg++;
63
64 /* User clip planes from curbe:
65 */
66 if (c->key.nr_userclip) {
67 for (i = 0; i < c->key.nr_userclip; i++) {
68 c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
69 }
70
71 /* Deal with curbe alignment:
72 */
73 reg += ((6+c->key.nr_userclip+3)/4)*2;
74 }
75
76 /* Vertex program parameters from curbe:
77 */
78 nr_params = c->prog_data.max_const;
79 for (i = 0; i < nr_params; i++) {
80 c->regs[TGSI_FILE_CONSTANT][i] = stride(brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
81 }
82 reg += (nr_params+1)/2;
83 c->prog_data.curb_read_length = reg - 1;
84
85
86
87 /* Allocate input regs:
88 */
89 c->nr_inputs = c->vp->info.num_inputs;
90 for (i = 0; i < c->nr_inputs; i++) {
91 c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0);
92 reg++;
93 }
94
95
96 /* Allocate outputs: TODO: could organize the non-position outputs
97 * to go straight into message regs.
98 */
99 c->nr_outputs = 0;
100 c->first_output = reg;
101 mrf = 4;
102 for (i = 0; i < c->vp->info.num_outputs; i++) {
103 c->nr_outputs++;
104 #if 0
105 if (i == VERT_RESULT_HPOS) {
106 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
107 reg++;
108 }
109 else if (i == VERT_RESULT_PSIZ) {
110 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
111 reg++;
112 mrf++; /* just a placeholder? XXX fix later stages & remove this */
113 }
114 else {
115 c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
116 mrf++;
117 }
118 #else
119 /*treat pos differently for now */
120 if (i == info->pos_idx) {
121 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
122 reg++;
123 } else {
124 c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
125 mrf++;
126 }
127 #endif
128 }
129
130 /* Allocate program temporaries:
131 */
132 for (i = 0; i < info->num_temps; i++) {
133 c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0);
134 reg++;
135 }
136
137 /* Address reg(s). Don't try to use the internal address reg until
138 * deref time.
139 */
140 for (i = 0; i < info->num_addrs; i++) {
141 c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE,
142 reg,
143 0,
144 BRW_REGISTER_TYPE_D,
145 BRW_VERTICAL_STRIDE_8,
146 BRW_WIDTH_8,
147 BRW_HORIZONTAL_STRIDE_1,
148 BRW_SWIZZLE_XXXX,
149 TGSI_WRITEMASK_X);
150 reg++;
151 }
152
153 for (i = 0; i < 128; i++) {
154 if (c->output_regs[i].used_in_src) {
155 c->output_regs[i].reg = brw_vec8_grf(reg, 0);
156 reg++;
157 }
158 }
159
160 c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
161 reg += 2;
162
163
164 /* Some opcodes need an internal temporary:
165 */
166 c->first_tmp = reg;
167 c->last_tmp = reg; /* for allocation purposes */
168
169 /* Each input reg holds data from two vertices. The
170 * urb_read_length is the number of registers read from *each*
171 * vertex urb, so is half the amount:
172 */
173 c->prog_data.urb_read_length = (c->nr_inputs+1)/2;
174
175 c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4;
176 c->prog_data.total_grf = reg;
177 }
178
179
180 static struct brw_reg get_tmp( struct brw_vs_compile *c )
181 {
182 struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
183
184 if (++c->last_tmp > c->prog_data.total_grf)
185 c->prog_data.total_grf = c->last_tmp;
186
187 return tmp;
188 }
189
190 static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
191 {
192 if (tmp.nr == c->last_tmp-1)
193 c->last_tmp--;
194 }
195
196 static void release_tmps( struct brw_vs_compile *c )
197 {
198 c->last_tmp = c->first_tmp;
199 }
200
201
202 static void unalias1( struct brw_vs_compile *c,
203 struct brw_reg dst,
204 struct brw_reg arg0,
205 void (*func)( struct brw_vs_compile *,
206 struct brw_reg,
207 struct brw_reg ))
208 {
209 if (dst.file == arg0.file && dst.nr == arg0.nr) {
210 struct brw_compile *p = &c->func;
211 struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
212 func(c, tmp, arg0);
213 brw_MOV(p, dst, tmp);
214 }
215 else {
216 func(c, dst, arg0);
217 }
218 }
219
220 static void unalias2( struct brw_vs_compile *c,
221 struct brw_reg dst,
222 struct brw_reg arg0,
223 struct brw_reg arg1,
224 void (*func)( struct brw_vs_compile *,
225 struct brw_reg,
226 struct brw_reg,
227 struct brw_reg ))
228 {
229 if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
230 (dst.file == arg1.file && dst.nr == arg1.nr)) {
231 struct brw_compile *p = &c->func;
232 struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
233 func(c, tmp, arg0, arg1);
234 brw_MOV(p, dst, tmp);
235 }
236 else {
237 func(c, dst, arg0, arg1);
238 }
239 }
240
241 static void emit_sop( struct brw_compile *p,
242 struct brw_reg dst,
243 struct brw_reg arg0,
244 struct brw_reg arg1,
245 unsigned cond)
246 {
247 brw_push_insn_state(p);
248 brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
249 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
250 brw_MOV(p, dst, brw_imm_f(1.0f));
251 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
252 brw_MOV(p, dst, brw_imm_f(0.0f));
253 brw_pop_insn_state(p);
254 }
255
256 static void emit_seq( struct brw_compile *p,
257 struct brw_reg dst,
258 struct brw_reg arg0,
259 struct brw_reg arg1 )
260 {
261 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
262 }
263
264 static void emit_sne( struct brw_compile *p,
265 struct brw_reg dst,
266 struct brw_reg arg0,
267 struct brw_reg arg1 )
268 {
269 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
270 }
271 static void emit_slt( struct brw_compile *p,
272 struct brw_reg dst,
273 struct brw_reg arg0,
274 struct brw_reg arg1 )
275 {
276 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L);
277 }
278
279 static void emit_sle( struct brw_compile *p,
280 struct brw_reg dst,
281 struct brw_reg arg0,
282 struct brw_reg arg1 )
283 {
284 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE);
285 }
286
287 static void emit_sgt( struct brw_compile *p,
288 struct brw_reg dst,
289 struct brw_reg arg0,
290 struct brw_reg arg1 )
291 {
292 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G);
293 }
294
295 static void emit_sge( struct brw_compile *p,
296 struct brw_reg dst,
297 struct brw_reg arg0,
298 struct brw_reg arg1 )
299 {
300 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE);
301 }
302
303 static void emit_max( struct brw_compile *p,
304 struct brw_reg dst,
305 struct brw_reg arg0,
306 struct brw_reg arg1 )
307 {
308 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
309 brw_SEL(p, dst, arg1, arg0);
310 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
311 }
312
313 static void emit_min( struct brw_compile *p,
314 struct brw_reg dst,
315 struct brw_reg arg0,
316 struct brw_reg arg1 )
317 {
318 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
319 brw_SEL(p, dst, arg0, arg1);
320 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
321 }
322
323
324 static void emit_math1( struct brw_vs_compile *c,
325 unsigned function,
326 struct brw_reg dst,
327 struct brw_reg arg0,
328 unsigned precision)
329 {
330 /* There are various odd behaviours with SEND on the simulator. In
331 * addition there are documented issues with the fact that the GEN4
332 * processor doesn't do dependency control properly on SEND
333 * results. So, on balance, this kludge to get around failures
334 * with writemasked math results looks like it might be necessary
335 * whether that turns out to be a simulator bug or not:
336 */
337 struct brw_compile *p = &c->func;
338 struct brw_reg tmp = dst;
339 boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
340 dst.file != BRW_GENERAL_REGISTER_FILE);
341
342 if (need_tmp)
343 tmp = get_tmp(c);
344
345 brw_math(p,
346 tmp,
347 function,
348 BRW_MATH_SATURATE_NONE,
349 2,
350 arg0,
351 BRW_MATH_DATA_SCALAR,
352 precision);
353
354 if (need_tmp) {
355 brw_MOV(p, dst, tmp);
356 release_tmp(c, tmp);
357 }
358 }
359
360 static void emit_math2( struct brw_vs_compile *c,
361 unsigned function,
362 struct brw_reg dst,
363 struct brw_reg arg0,
364 struct brw_reg arg1,
365 unsigned precision)
366 {
367 struct brw_compile *p = &c->func;
368 struct brw_reg tmp = dst;
369 boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
370 dst.file != BRW_GENERAL_REGISTER_FILE);
371
372 if (need_tmp)
373 tmp = get_tmp(c);
374
375 brw_MOV(p, brw_message_reg(3), arg1);
376
377 brw_math(p,
378 tmp,
379 function,
380 BRW_MATH_SATURATE_NONE,
381 2,
382 arg0,
383 BRW_MATH_DATA_SCALAR,
384 precision);
385
386 if (need_tmp) {
387 brw_MOV(p, dst, tmp);
388 release_tmp(c, tmp);
389 }
390 }
391
392
393
394 static void emit_exp_noalias( struct brw_vs_compile *c,
395 struct brw_reg dst,
396 struct brw_reg arg0 )
397 {
398 struct brw_compile *p = &c->func;
399
400
401 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) {
402 struct brw_reg tmp = get_tmp(c);
403 struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
404
405 /* tmp_d = floor(arg0.x) */
406 brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0));
407
408 /* result[0] = 2.0 ^ tmp */
409
410 /* Adjust exponent for floating point:
411 * exp += 127
412 */
413 brw_ADD(p, brw_writemask(tmp_d, TGSI_WRITEMASK_X), tmp_d, brw_imm_d(127));
414
415 /* Install exponent and sign.
416 * Excess drops off the edge:
417 */
418 brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), TGSI_WRITEMASK_X),
419 tmp_d, brw_imm_d(23));
420
421 release_tmp(c, tmp);
422 }
423
424 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) {
425 /* result[1] = arg0.x - floor(arg0.x) */
426 brw_FRC(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0, 0));
427 }
428
429 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) {
430 /* As with the LOG instruction, we might be better off just
431 * doing a taylor expansion here, seeing as we have to do all
432 * the prep work.
433 *
434 * If mathbox partial precision is too low, consider also:
435 * result[3] = result[0] * EXP(result[1])
436 */
437 emit_math1(c,
438 BRW_MATH_FUNCTION_EXP,
439 brw_writemask(dst, TGSI_WRITEMASK_Z),
440 brw_swizzle1(arg0, 0),
441 BRW_MATH_PRECISION_PARTIAL);
442 }
443
444 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) {
445 /* result[3] = 1.0; */
446 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), brw_imm_f(1));
447 }
448 }
449
450
451 static void emit_log_noalias( struct brw_vs_compile *c,
452 struct brw_reg dst,
453 struct brw_reg arg0 )
454 {
455 struct brw_compile *p = &c->func;
456 struct brw_reg tmp = dst;
457 struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
458 struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD);
459 boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
460 dst.file != BRW_GENERAL_REGISTER_FILE);
461
462 if (need_tmp) {
463 tmp = get_tmp(c);
464 tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
465 }
466
467 /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
468 * according to spec:
469 *
470 * These almost look likey they could be joined up, but not really
471 * practical:
472 *
473 * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
474 * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
475 */
476 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_XZ) {
477 brw_AND(p,
478 brw_writemask(tmp_ud, TGSI_WRITEMASK_X),
479 brw_swizzle1(arg0_ud, 0),
480 brw_imm_ud((1U<<31)-1));
481
482 brw_SHR(p,
483 brw_writemask(tmp_ud, TGSI_WRITEMASK_X),
484 tmp_ud,
485 brw_imm_ud(23));
486
487 brw_ADD(p,
488 brw_writemask(tmp, TGSI_WRITEMASK_X),
489 retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */
490 brw_imm_d(-127));
491 }
492
493 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_YZ) {
494 brw_AND(p,
495 brw_writemask(tmp_ud, TGSI_WRITEMASK_Y),
496 brw_swizzle1(arg0_ud, 0),
497 brw_imm_ud((1<<23)-1));
498
499 brw_OR(p,
500 brw_writemask(tmp_ud, TGSI_WRITEMASK_Y),
501 tmp_ud,
502 brw_imm_ud(127<<23));
503 }
504
505 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) {
506 /* result[2] = result[0] + LOG2(result[1]); */
507
508 /* Why bother? The above is just a hint how to do this with a
509 * taylor series. Maybe we *should* use a taylor series as by
510 * the time all the above has been done it's almost certainly
511 * quicker than calling the mathbox, even with low precision.
512 *
513 * Options are:
514 * - result[0] + mathbox.LOG2(result[1])
515 * - mathbox.LOG2(arg0.x)
516 * - result[0] + inline_taylor_approx(result[1])
517 */
518 emit_math1(c,
519 BRW_MATH_FUNCTION_LOG,
520 brw_writemask(tmp, TGSI_WRITEMASK_Z),
521 brw_swizzle1(tmp, 1),
522 BRW_MATH_PRECISION_FULL);
523
524 brw_ADD(p,
525 brw_writemask(tmp, TGSI_WRITEMASK_Z),
526 brw_swizzle1(tmp, 2),
527 brw_swizzle1(tmp, 0));
528 }
529
530 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) {
531 /* result[3] = 1.0; */
532 brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_W), brw_imm_f(1));
533 }
534
535 if (need_tmp) {
536 brw_MOV(p, dst, tmp);
537 release_tmp(c, tmp);
538 }
539 }
540
541
542
543
544 /* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1
545 */
546 static void emit_dst_noalias( struct brw_vs_compile *c,
547 struct brw_reg dst,
548 struct brw_reg arg0,
549 struct brw_reg arg1)
550 {
551 struct brw_compile *p = &c->func;
552
553 /* There must be a better way to do this:
554 */
555 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X)
556 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_X), brw_imm_f(1.0));
557 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y)
558 brw_MUL(p, brw_writemask(dst, TGSI_WRITEMASK_Y), arg0, arg1);
559 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z)
560 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Z), arg0);
561 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W)
562 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), arg1);
563 }
564
565 static void emit_xpd( struct brw_compile *p,
566 struct brw_reg dst,
567 struct brw_reg t,
568 struct brw_reg u)
569 {
570 brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3));
571 brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3));
572 }
573
574
575
576 static void emit_lit_noalias( struct brw_vs_compile *c,
577 struct brw_reg dst,
578 struct brw_reg arg0 )
579 {
580 struct brw_compile *p = &c->func;
581 struct brw_instruction *if_insn;
582 struct brw_reg tmp = dst;
583 boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
584
585 if (need_tmp)
586 tmp = get_tmp(c);
587
588 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_YZ), brw_imm_f(0));
589 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_XW), brw_imm_f(1));
590
591 /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
592 * to get all channels active inside the IF. In the clipping code
593 * we run with NoMask, so it's not an option and we can use
594 * BRW_EXECUTE_1 for all comparisions.
595 */
596 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
597 if_insn = brw_IF(p, BRW_EXECUTE_8);
598 {
599 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0,0));
600
601 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0));
602 brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_Z), brw_swizzle1(arg0,1));
603 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
604
605 emit_math2(c,
606 BRW_MATH_FUNCTION_POW,
607 brw_writemask(dst, TGSI_WRITEMASK_Z),
608 brw_swizzle1(tmp, 2),
609 brw_swizzle1(arg0, 3),
610 BRW_MATH_PRECISION_PARTIAL);
611 }
612
613 brw_ENDIF(p, if_insn);
614 }
615
616
617
618
619
620 /* TODO: relative addressing!
621 */
622 static struct brw_reg get_reg( struct brw_vs_compile *c,
623 unsigned file,
624 unsigned index )
625 {
626 switch (file) {
627 case TGSI_FILE_TEMPORARY:
628 case TGSI_FILE_INPUT:
629 case TGSI_FILE_OUTPUT:
630 assert(c->regs[file][index].nr != 0);
631 return c->regs[file][index];
632 case TGSI_FILE_CONSTANT:
633 assert(c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm].nr != 0);
634 return c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm];
635 case TGSI_FILE_IMMEDIATE:
636 assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0);
637 return c->regs[TGSI_FILE_CONSTANT][index];
638 case TGSI_FILE_ADDRESS:
639 assert(index == 0);
640 return c->regs[file][index];
641
642 case TGSI_FILE_NULL: /* undef values */
643 return brw_null_reg();
644
645 default:
646 assert(0);
647 return brw_null_reg();
648 }
649 }
650
651
652
653 static struct brw_reg deref( struct brw_vs_compile *c,
654 struct brw_reg arg,
655 int offset)
656 {
657 struct brw_compile *p = &c->func;
658 struct brw_reg tmp = vec4(get_tmp(c));
659 struct brw_reg vp_address = retype(vec1(get_reg(c, TGSI_FILE_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
660 unsigned byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
661 struct brw_reg indirect = brw_vec4_indirect(0,0);
662
663 {
664 brw_push_insn_state(p);
665 brw_set_access_mode(p, BRW_ALIGN_1);
666
667 /* This is pretty clunky - load the address register twice and
668 * fetch each 4-dword value in turn. There must be a way to do
669 * this in a single pass, but I couldn't get it to work.
670 */
671 brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
672 brw_MOV(p, tmp, indirect);
673
674 brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
675 brw_MOV(p, suboffset(tmp, 4), indirect);
676
677 brw_pop_insn_state(p);
678 }
679
680 return vec8(tmp);
681 }
682
683
684 static void emit_arl( struct brw_vs_compile *c,
685 struct brw_reg dst,
686 struct brw_reg arg0 )
687 {
688 struct brw_compile *p = &c->func;
689 struct brw_reg tmp = dst;
690 boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
691
692 if (need_tmp)
693 tmp = get_tmp(c);
694
695 brw_RNDD(p, tmp, arg0);
696 brw_MUL(p, dst, tmp, brw_imm_d(16));
697
698 if (need_tmp)
699 release_tmp(c, tmp);
700 }
701
702
703 /* Will return mangled results for SWZ op. The emit_swz() function
704 * ignores this result and recalculates taking extended swizzles into
705 * account.
706 */
707 static struct brw_reg get_arg( struct brw_vs_compile *c,
708 struct tgsi_src_register *src )
709 {
710 struct brw_reg reg;
711
712 if (src->File == TGSI_FILE_NULL)
713 return brw_null_reg();
714
715 #if 0
716 if (src->RelAddr)
717 reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
718 else
719 #endif
720 reg = get_reg(c, src->File, src->Index);
721
722 /* Convert 3-bit swizzle to 2-bit.
723 */
724 reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SwizzleX,
725 src->SwizzleY,
726 src->SwizzleZ,
727 src->SwizzleW);
728
729 /* Note this is ok for non-swizzle instructions:
730 */
731 reg.negate = src->Negate ? 1 : 0;
732
733 return reg;
734 }
735
736
737 static struct brw_reg get_dst( struct brw_vs_compile *c,
738 const struct tgsi_dst_register *dst )
739 {
740 struct brw_reg reg = get_reg(c, dst->File, dst->Index);
741
742 reg.dw1.bits.writemask = dst->WriteMask;
743
744 return reg;
745 }
746
747
748
749
750 static void emit_swz( struct brw_vs_compile *c,
751 struct brw_reg dst,
752 struct tgsi_src_register src )
753 {
754 struct brw_compile *p = &c->func;
755 unsigned zeros_mask = 0;
756 unsigned ones_mask = 0;
757 unsigned src_mask = 0;
758 ubyte src_swz[4];
759 boolean need_tmp = (src.Negate &&
760 dst.file != BRW_GENERAL_REGISTER_FILE);
761 struct brw_reg tmp = dst;
762 unsigned i;
763
764 if (need_tmp)
765 tmp = get_tmp(c);
766
767 for (i = 0; i < 4; i++) {
768 if (dst.dw1.bits.writemask & (1<<i)) {
769 ubyte s = 0;
770 switch(i) {
771 case 0:
772 s = src.SwizzleX;
773 break;
774 s = src.SwizzleY;
775 case 1:
776 break;
777 s = src.SwizzleZ;
778 case 2:
779 break;
780 s = src.SwizzleW;
781 case 3:
782 break;
783 }
784 switch (s) {
785 case TGSI_SWIZZLE_X:
786 case TGSI_SWIZZLE_Y:
787 case TGSI_SWIZZLE_Z:
788 case TGSI_SWIZZLE_W:
789 src_mask |= 1<<i;
790 src_swz[i] = s;
791 break;
792 case TGSI_EXTSWIZZLE_ZERO:
793 zeros_mask |= 1<<i;
794 break;
795 case TGSI_EXTSWIZZLE_ONE:
796 ones_mask |= 1<<i;
797 break;
798 }
799 }
800 }
801
802 /* Do src first, in case dst aliases src:
803 */
804 if (src_mask) {
805 struct brw_reg arg0;
806
807 #if 0
808 if (src.RelAddr)
809 arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
810 else
811 #endif
812 arg0 = get_reg(c, src.File, src.Index);
813
814 arg0 = brw_swizzle(arg0,
815 src_swz[0], src_swz[1],
816 src_swz[2], src_swz[3]);
817
818 brw_MOV(p, brw_writemask(tmp, src_mask), arg0);
819 }
820
821 if (zeros_mask)
822 brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0));
823
824 if (ones_mask)
825 brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
826
827 if (src.Negate)
828 brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
829
830 if (need_tmp) {
831 brw_MOV(p, dst, tmp);
832 release_tmp(c, tmp);
833 }
834 }
835
836
837
838 /* Post-vertex-program processing. Send the results to the URB.
839 */
840 static void emit_vertex_write( struct brw_vs_compile *c, struct brw_prog_info *info)
841 {
842 struct brw_compile *p = &c->func;
843 struct brw_reg m0 = brw_message_reg(0);
844 struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][info->pos_idx];
845 struct brw_reg ndc;
846
847 if (c->key.copy_edgeflag) {
848 brw_MOV(p,
849 get_reg(c, TGSI_FILE_OUTPUT, info->result_edge_idx),
850 get_reg(c, TGSI_FILE_INPUT, info->edge_flag_idx));
851 }
852
853
854 /* Build ndc coords? TODO: Shortcircuit when w is known to be one.
855 */
856 if (!c->key.know_w_is_one) {
857 ndc = get_tmp(c);
858 emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
859 brw_MUL(p, brw_writemask(ndc, TGSI_WRITEMASK_XYZ), pos, ndc);
860 }
861 else {
862 ndc = pos;
863 }
864
865 /* This includes the workaround for -ve rhw, so is no longer an
866 * optional step:
867 */
868 if (info->writes_psize ||
869 c->key.nr_userclip ||
870 !c->key.know_w_is_one)
871 {
872 struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
873 unsigned i;
874
875 brw_MOV(p, header1, brw_imm_ud(0));
876
877 brw_set_access_mode(p, BRW_ALIGN_16);
878
879 if (info->writes_psize) {
880 struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][info->psize_idx];
881 brw_MUL(p, brw_writemask(header1, TGSI_WRITEMASK_W),
882 brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
883 brw_AND(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1,
884 brw_imm_ud(0x7ff<<8));
885 }
886
887
888 for (i = 0; i < c->key.nr_userclip; i++) {
889 brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
890 brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
891 brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<i));
892 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
893 }
894
895
896 /* i965 clipping workaround:
897 * 1) Test for -ve rhw
898 * 2) If set,
899 * set ndc = (0,0,0,0)
900 * set ucp[6] = 1
901 *
902 * Later, clipping will detect ucp[6] and ensure the primitive is
903 * clipped against all fixed planes.
904 */
905 if (!c->key.know_w_is_one) {
906 brw_CMP(p,
907 vec8(brw_null_reg()),
908 BRW_CONDITIONAL_L,
909 brw_swizzle1(ndc, 3),
910 brw_imm_f(0));
911
912 brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<6));
913 brw_MOV(p, ndc, brw_imm_f(0));
914 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
915 }
916
917 brw_set_access_mode(p, BRW_ALIGN_1); /* why? */
918 brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1);
919 brw_set_access_mode(p, BRW_ALIGN_16);
920
921 release_tmp(c, header1);
922 }
923 else {
924 brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
925 }
926
927
928 /* Emit the (interleaved) headers for the two vertices - an 8-reg
929 * of zeros followed by two sets of NDC coordinates:
930 */
931 brw_set_access_mode(p, BRW_ALIGN_1);
932 brw_MOV(p, offset(m0, 2), ndc);
933 brw_MOV(p, offset(m0, 3), pos);
934
935
936 brw_urb_WRITE(p,
937 brw_null_reg(), /* dest */
938 0, /* starting mrf reg nr */
939 c->r0, /* src */
940 0, /* allocate */
941 1, /* used */
942 c->nr_outputs + 3, /* msg len */
943 0, /* response len */
944 1, /* eot */
945 1, /* writes complete */
946 0, /* urb destination offset */
947 BRW_URB_SWIZZLE_INTERLEAVE);
948
949 }
950
951 static void
952 post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst )
953 {
954 struct tgsi_parse_context parse;
955 const struct tgsi_token *tokens = c->vp->program.tokens;
956 tgsi_parse_init(&parse, tokens);
957 while (!tgsi_parse_end_of_tokens(&parse)) {
958 tgsi_parse_token(&parse);
959 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
960 #if 0
961 struct brw_instruction *brw_inst1, *brw_inst2;
962 const struct tgsi_full_instruction *inst1, *inst2;
963 int offset;
964 inst1 = &parse.FullToken.FullInstruction;
965 brw_inst1 = inst1->Data;
966 switch (inst1->Opcode) {
967 case TGSI_OPCODE_CAL:
968 case TGSI_OPCODE_BRA:
969 target_insn = inst1->BranchTarget;
970 inst2 = &c->vp->program.Base.Instructions[target_insn];
971 brw_inst2 = inst2->Data;
972 offset = brw_inst2 - brw_inst1;
973 brw_set_src1(brw_inst1, brw_imm_d(offset*16));
974 break;
975 case TGSI_OPCODE_END:
976 offset = end_inst - brw_inst1;
977 brw_set_src1(brw_inst1, brw_imm_d(offset*16));
978 break;
979 default:
980 break;
981 }
982 #endif
983 }
984 }
985 tgsi_parse_free(&parse);
986 }
987
988 static void process_declaration(const struct tgsi_full_declaration *decl,
989 struct brw_prog_info *info)
990 {
991 int first = decl->DeclarationRange.First;
992 int last = decl->DeclarationRange.Last;
993
994 switch(decl->Declaration.File) {
995 case TGSI_FILE_CONSTANT:
996 info->num_consts += last - first + 1;
997 break;
998 case TGSI_FILE_INPUT: {
999 }
1000 break;
1001 case TGSI_FILE_OUTPUT: {
1002 assert(last == first); /* for now */
1003 if (decl->Declaration.Semantic) {
1004 switch (decl->Semantic.SemanticName) {
1005 case TGSI_SEMANTIC_POSITION: {
1006 info->pos_idx = first;
1007 }
1008 break;
1009 case TGSI_SEMANTIC_COLOR:
1010 break;
1011 case TGSI_SEMANTIC_BCOLOR:
1012 break;
1013 case TGSI_SEMANTIC_FOG:
1014 break;
1015 case TGSI_SEMANTIC_PSIZE: {
1016 info->writes_psize = TRUE;
1017 info->psize_idx = first;
1018 }
1019 break;
1020 case TGSI_SEMANTIC_GENERIC:
1021 break;
1022 }
1023 }
1024 }
1025 break;
1026 case TGSI_FILE_TEMPORARY: {
1027 info->num_temps += (last - first) + 1;
1028 }
1029 break;
1030 case TGSI_FILE_SAMPLER: {
1031 }
1032 break;
1033 case TGSI_FILE_ADDRESS: {
1034 info->num_addrs += (last - first) + 1;
1035 }
1036 break;
1037 case TGSI_FILE_IMMEDIATE: {
1038 }
1039 break;
1040 case TGSI_FILE_NULL: {
1041 }
1042 break;
1043 }
1044 }
1045
1046 static void process_instruction(struct brw_vs_compile *c,
1047 struct tgsi_full_instruction *inst,
1048 struct brw_prog_info *info)
1049 {
1050 struct brw_reg args[3], dst;
1051 struct brw_compile *p = &c->func;
1052 /*struct brw_indirect stack_index = brw_indirect(0, 0);*/
1053 unsigned i;
1054 unsigned index;
1055 unsigned file;
1056 /*FIXME: might not be the only one*/
1057 const struct tgsi_dst_register *dst_reg = &inst->FullDstRegisters[0].DstRegister;
1058 /*
1059 struct brw_instruction *if_inst[MAX_IFSN];
1060 unsigned insn, if_insn = 0;
1061 */
1062
1063 for (i = 0; i < 3; i++) {
1064 struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
1065 index = src->SrcRegister.Index;
1066 file = src->SrcRegister.File;
1067 if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src)
1068 args[i] = c->output_regs[index].reg;
1069 else
1070 args[i] = get_arg(c, &src->SrcRegister);
1071 }
1072
1073 /* Get dest regs. Note that it is possible for a reg to be both
1074 * dst and arg, given the static allocation of registers. So
1075 * care needs to be taken emitting multi-operation instructions.
1076 */
1077 index = dst_reg->Index;
1078 file = dst_reg->File;
1079 if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src)
1080 dst = c->output_regs[index].reg;
1081 else
1082 dst = get_dst(c, dst_reg);
1083
1084 switch (inst->Instruction.Opcode) {
1085 case TGSI_OPCODE_ABS:
1086 brw_MOV(p, dst, brw_abs(args[0]));
1087 break;
1088 case TGSI_OPCODE_ADD:
1089 brw_ADD(p, dst, args[0], args[1]);
1090 break;
1091 case TGSI_OPCODE_DP3:
1092 brw_DP3(p, dst, args[0], args[1]);
1093 break;
1094 case TGSI_OPCODE_DP4:
1095 brw_DP4(p, dst, args[0], args[1]);
1096 break;
1097 case TGSI_OPCODE_DPH:
1098 brw_DPH(p, dst, args[0], args[1]);
1099 break;
1100 case TGSI_OPCODE_DST:
1101 unalias2(c, dst, args[0], args[1], emit_dst_noalias);
1102 break;
1103 case TGSI_OPCODE_EXP:
1104 unalias1(c, dst, args[0], emit_exp_noalias);
1105 break;
1106 case TGSI_OPCODE_EX2:
1107 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
1108 break;
1109 case TGSI_OPCODE_ARL:
1110 emit_arl(c, dst, args[0]);
1111 break;
1112 case TGSI_OPCODE_FLR:
1113 brw_RNDD(p, dst, args[0]);
1114 break;
1115 case TGSI_OPCODE_FRC:
1116 brw_FRC(p, dst, args[0]);
1117 break;
1118 case TGSI_OPCODE_LOG:
1119 unalias1(c, dst, args[0], emit_log_noalias);
1120 break;
1121 case TGSI_OPCODE_LG2:
1122 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
1123 break;
1124 case TGSI_OPCODE_LIT:
1125 unalias1(c, dst, args[0], emit_lit_noalias);
1126 break;
1127 case TGSI_OPCODE_MAD:
1128 brw_MOV(p, brw_acc_reg(), args[2]);
1129 brw_MAC(p, dst, args[0], args[1]);
1130 break;
1131 case TGSI_OPCODE_MAX:
1132 emit_max(p, dst, args[0], args[1]);
1133 break;
1134 case TGSI_OPCODE_MIN:
1135 emit_min(p, dst, args[0], args[1]);
1136 break;
1137 case TGSI_OPCODE_MOV:
1138 case TGSI_OPCODE_SWZ:
1139 #if 0
1140 /* The args[0] value can't be used here as it won't have
1141 * correctly encoded the full swizzle:
1142 */
1143 emit_swz(c, dst, inst->SrcReg[0] );
1144 #endif
1145 brw_MOV(p, dst, args[0]);
1146 break;
1147 case TGSI_OPCODE_MUL:
1148 brw_MUL(p, dst, args[0], args[1]);
1149 break;
1150 case TGSI_OPCODE_POW:
1151 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);
1152 break;
1153 case TGSI_OPCODE_RCP:
1154 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
1155 break;
1156 case TGSI_OPCODE_RSQ:
1157 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
1158 break;
1159
1160 case TGSI_OPCODE_SEQ:
1161 emit_seq(p, dst, args[0], args[1]);
1162 break;
1163 case TGSI_OPCODE_SNE:
1164 emit_sne(p, dst, args[0], args[1]);
1165 break;
1166 case TGSI_OPCODE_SGE:
1167 emit_sge(p, dst, args[0], args[1]);
1168 break;
1169 case TGSI_OPCODE_SGT:
1170 emit_sgt(p, dst, args[0], args[1]);
1171 break;
1172 case TGSI_OPCODE_SLT:
1173 emit_slt(p, dst, args[0], args[1]);
1174 break;
1175 case TGSI_OPCODE_SLE:
1176 emit_sle(p, dst, args[0], args[1]);
1177 break;
1178 case TGSI_OPCODE_SUB:
1179 brw_ADD(p, dst, args[0], negate(args[1]));
1180 break;
1181 case TGSI_OPCODE_XPD:
1182 emit_xpd(p, dst, args[0], args[1]);
1183 break;
1184 #if 0
1185 case TGSI_OPCODE_IF:
1186 assert(if_insn < MAX_IFSN);
1187 if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
1188 break;
1189 case TGSI_OPCODE_ELSE:
1190 if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
1191 break;
1192 case TGSI_OPCODE_ENDIF:
1193 assert(if_insn > 0);
1194 brw_ENDIF(p, if_inst[--if_insn]);
1195 break;
1196 case TGSI_OPCODE_BRA:
1197 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
1198 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1199 brw_set_predicate_control_flag_value(p, 0xff);
1200 break;
1201 case TGSI_OPCODE_CAL:
1202 brw_set_access_mode(p, BRW_ALIGN_1);
1203 brw_ADD(p, deref_1uw(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
1204 brw_set_access_mode(p, BRW_ALIGN_16);
1205 brw_ADD(p, get_addr_reg(stack_index),
1206 get_addr_reg(stack_index), brw_imm_d(4));
1207 inst->Data = &p->store[p->nr_insn];
1208 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1209 break;
1210 #endif
1211 case TGSI_OPCODE_RET:
1212 #if 0
1213 brw_ADD(p, get_addr_reg(stack_index),
1214 get_addr_reg(stack_index), brw_imm_d(-4));
1215 brw_set_access_mode(p, BRW_ALIGN_1);
1216 brw_MOV(p, brw_ip_reg(), deref_1uw(stack_index, 0));
1217 brw_set_access_mode(p, BRW_ALIGN_16);
1218 #else
1219 /*brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));*/
1220 #endif
1221 break;
1222 case TGSI_OPCODE_END:
1223 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1224 break;
1225 case TGSI_OPCODE_BGNSUB:
1226 case TGSI_OPCODE_ENDSUB:
1227 break;
1228 default:
1229 debug_printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode);
1230 break;
1231 }
1232
1233 if (dst_reg->File == TGSI_FILE_OUTPUT
1234 && dst_reg->Index != info->pos_idx
1235 && c->output_regs[dst_reg->Index].used_in_src)
1236 brw_MOV(p, get_dst(c, dst_reg), dst);
1237
1238 release_tmps(c);
1239 }
1240
1241 /* Emit the fragment program instructions here.
1242 */
1243 void brw_vs_emit(struct brw_vs_compile *c)
1244 {
1245 #define MAX_IFSN 32
1246 struct brw_compile *p = &c->func;
1247 struct brw_instruction *end_inst;
1248 struct tgsi_parse_context parse;
1249 struct brw_indirect stack_index = brw_indirect(0, 0);
1250 const struct tgsi_token *tokens = c->vp->program.tokens;
1251 struct brw_prog_info prog_info;
1252 unsigned allocated_registers = 0;
1253 memset(&prog_info, 0, sizeof(struct brw_prog_info));
1254
1255 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1256 brw_set_access_mode(p, BRW_ALIGN_16);
1257
1258 tgsi_parse_init(&parse, tokens);
1259 /* Message registers can't be read, so copy the output into GRF register
1260 if they are used in source registers */
1261 while (!tgsi_parse_end_of_tokens(&parse)) {
1262 tgsi_parse_token(&parse);
1263 unsigned i;
1264 switch (parse.FullToken.Token.Type) {
1265 case TGSI_TOKEN_TYPE_INSTRUCTION: {
1266 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
1267 for (i = 0; i < 3; ++i) {
1268 const struct tgsi_src_register *src = &inst->FullSrcRegisters[i].SrcRegister;
1269 unsigned index = src->Index;
1270 unsigned file = src->File;
1271 if (file == TGSI_FILE_OUTPUT)
1272 c->output_regs[index].used_in_src = TRUE;
1273 }
1274 }
1275 break;
1276 default:
1277 /* nothing */
1278 break;
1279 }
1280 }
1281 tgsi_parse_free(&parse);
1282
1283 tgsi_parse_init(&parse, tokens);
1284
1285 while (!tgsi_parse_end_of_tokens(&parse)) {
1286 tgsi_parse_token(&parse);
1287
1288 switch (parse.FullToken.Token.Type) {
1289 case TGSI_TOKEN_TYPE_DECLARATION: {
1290 struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
1291 process_declaration(decl, &prog_info);
1292 }
1293 break;
1294 case TGSI_TOKEN_TYPE_IMMEDIATE: {
1295 struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate;
1296 assert(imm->Immediate.NrTokens == 4 + 1);
1297 c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float;
1298 c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float;
1299 c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float;
1300 c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u.ImmediateFloat32[3].Float;
1301 c->prog_data.num_imm++;
1302 }
1303 break;
1304 case TGSI_TOKEN_TYPE_INSTRUCTION: {
1305 struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
1306 if (!allocated_registers) {
1307 /* first instruction (declerations finished).
1308 * now that we know what vars are being used allocate
1309 * registers for them.*/
1310 c->prog_data.num_consts = prog_info.num_consts;
1311 c->prog_data.max_const = prog_info.num_consts + c->prog_data.num_imm;
1312 brw_vs_alloc_regs(c, &prog_info);
1313
1314 brw_set_access_mode(p, BRW_ALIGN_1);
1315 brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
1316 brw_set_access_mode(p, BRW_ALIGN_16);
1317 allocated_registers = 1;
1318 }
1319 process_instruction(c, inst, &prog_info);
1320 }
1321 break;
1322 }
1323 }
1324
1325 end_inst = &p->store[p->nr_insn];
1326 emit_vertex_write(c, &prog_info);
1327 post_vs_emit(c, end_inst);
1328 tgsi_parse_free(&parse);
1329
1330 }