178ca7635dc9f71fcd43eb997e3d6496eda565fc
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertexprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 */
32 #include "glheader.h"
33 #include "macros.h"
34 #include "enums.h"
35
36 #include "program.h"
37 #include "r300_context.h"
38 #include "nvvertprog.h"
39
40 #define SCALAR_FLAG (1<<31)
41 #define FLAG_MASK (1<<31)
42 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
43 #define OPN(operator, ip, op) {#operator, VP_OPCODE_##operator, ip, op}
44
45 static struct{
46 char *name;
47 int opcode;
48 unsigned long ip; /* number of input operands and flags */
49 unsigned long op;
50 }op_names[]={
51 OPN(ABS, 1, 1),
52 OPN(ADD, 2, 1),
53 OPN(ARL, 1, 1|SCALAR_FLAG),
54 OPN(DP3, 2, 3|SCALAR_FLAG),
55 OPN(DP4, 2, 3|SCALAR_FLAG),
56 OPN(DPH, 2, 3|SCALAR_FLAG),
57 OPN(DST, 2, 1),
58 OPN(EX2, 1|SCALAR_FLAG, 4|SCALAR_FLAG),
59 OPN(EXP, 1|SCALAR_FLAG, 1),
60 OPN(FLR, 1, 1),
61 OPN(FRC, 1, 1),
62 OPN(LG2, 1|SCALAR_FLAG, 4|SCALAR_FLAG),
63 OPN(LIT, 1, 1),
64 OPN(LOG, 1|SCALAR_FLAG, 1),
65 OPN(MAD, 3, 1),
66 OPN(MAX, 2, 1),
67 OPN(MIN, 2, 1),
68 OPN(MOV, 1, 1),
69 OPN(MUL, 2, 1),
70 OPN(POW, 2|SCALAR_FLAG, 4|SCALAR_FLAG),
71 OPN(RCP, 1|SCALAR_FLAG, 4|SCALAR_FLAG),
72 OPN(RSQ, 1|SCALAR_FLAG, 4|SCALAR_FLAG),
73 OPN(SGE, 2, 1),
74 OPN(SLT, 2, 1),
75 OPN(SUB, 2, 1),
76 OPN(SWZ, 1, 1),
77 OPN(XPD, 2, 1),
78 OPN(RCC, 0, 0), //extra
79 OPN(PRINT, 0, 0),
80 OPN(END, 0, 0),
81 };
82 #undef OPN
83 #define OPN(rf) {#rf, PROGRAM_##rf}
84
85 static struct{
86 char *name;
87 int id;
88 }register_file_names[]={
89 OPN(TEMPORARY),
90 OPN(INPUT),
91 OPN(OUTPUT),
92 OPN(LOCAL_PARAM),
93 OPN(ENV_PARAM),
94 OPN(NAMED_PARAM),
95 OPN(STATE_VAR),
96 OPN(WRITE_ONLY),
97 OPN(ADDRESS),
98 };
99
100 static char *dst_mask_names[4]={ "X", "Y", "Z", "W" };
101
102 /* from vertex program spec:
103 Instruction Inputs Output Description
104 ----------- ------ ------ --------------------------------
105 ABS v v absolute value
106 ADD v,v v add
107 ARL v a address register load
108 DP3 v,v ssss 3-component dot product
109 DP4 v,v ssss 4-component dot product
110 DPH v,v ssss homogeneous dot product
111 DST v,v v distance vector
112 EX2 s ssss exponential base 2
113 EXP s v exponential base 2 (approximate)
114 FLR v v floor
115 FRC v v fraction
116 LG2 s ssss logarithm base 2
117 LIT v v compute light coefficients
118 LOG s v logarithm base 2 (approximate)
119 MAD v,v,v v multiply and add
120 MAX v,v v maximum
121 MIN v,v v minimum
122 MOV v v move
123 MUL v,v v multiply
124 POW s,s ssss exponentiate
125 RCP s ssss reciprocal
126 RSQ s ssss reciprocal square root
127 SGE v,v v set on greater than or equal
128 SLT v,v v set on less than
129 SUB v,v v subtract
130 SWZ v v extended swizzle
131 XPD v,v v cross product
132 */
133
134 void dump_program_params(GLcontext *ctx, struct vertex_program *vp)
135 {
136 int i;
137 int pi;
138
139 fprintf(stderr, "NumInstructions=%d\n", vp->Base.NumInstructions);
140 fprintf(stderr, "NumTemporaries=%d\n", vp->Base.NumTemporaries);
141 fprintf(stderr, "NumParameters=%d\n", vp->Base.NumParameters);
142 fprintf(stderr, "NumAttributes=%d\n", vp->Base.NumAttributes);
143 fprintf(stderr, "NumAddressRegs=%d\n", vp->Base.NumAddressRegs);
144
145 _mesa_load_state_parameters(ctx, vp->Parameters);
146
147 #if 0
148 for(pi=0; pi < vp->Base.NumParameters; pi++){
149 fprintf(stderr, "{ ");
150 for(i=0; i < 4; i++)
151 fprintf(stderr, "%f ", vp->Base.LocalParams[pi][i]);
152 fprintf(stderr, "}\n");
153 }
154 #endif
155 for(pi=0; pi < vp->Parameters->NumParameters; pi++){
156 fprintf(stderr, "param %02d:", pi);
157
158 switch(vp->Parameters->Parameters[pi].Type){
159
160 case NAMED_PARAMETER:
161 fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
162 fprintf(stderr, "(NAMED_PARAMETER)");
163 break;
164
165 case CONSTANT:
166 fprintf(stderr, "(CONSTANT)");
167 break;
168
169 case STATE:
170 fprintf(stderr, "(STATE)\n");
171 break;
172
173 }
174
175 fprintf(stderr, "{ ");
176 for(i=0; i < 4; i++)
177 fprintf(stderr, "%f ", vp->Parameters->ParameterValues[pi][i]);
178 fprintf(stderr, "}\n");
179
180 }
181 }
182
183 void debug_vp(GLcontext *ctx, struct vertex_program *vp)
184 {
185 struct vp_instruction *vpi;
186 int i, operand_index;
187 int operator_index;
188
189 dump_program_params(ctx, vp);
190
191 vpi=vp->Instructions;
192
193 for(;; vpi++){
194 if(vpi->Opcode == VP_OPCODE_END)
195 break;
196
197 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++){
198 if(vpi->Opcode == op_names[i].opcode){
199 fprintf(stderr, "%s ", op_names[i].name);
200 break;
201 }
202 }
203 operator_index=i;
204
205 for(i=0; i < sizeof(register_file_names) / sizeof(*register_file_names); i++){
206 if(vpi->DstReg.File == register_file_names[i].id){
207 fprintf(stderr, "%s ", register_file_names[i].name);
208 break;
209 }
210 }
211
212 fprintf(stderr, "%d.", vpi->DstReg.Index);
213
214 for(i=0; i < 4; i++)
215 if(vpi->DstReg.WriteMask & (1<<i))
216 fprintf(stderr, "%s", dst_mask_names[i]);
217 fprintf(stderr, " ");
218
219 for(operand_index=0; operand_index < (op_names[operator_index].ip & (~FLAG_MASK));
220 operand_index++){
221
222 if(vpi->SrcReg[operand_index].Negate)
223 fprintf(stderr, "-");
224
225 for(i=0; i < sizeof(register_file_names) / sizeof(*register_file_names); i++){
226 if(vpi->SrcReg[operand_index].File == register_file_names[i].id){
227 fprintf(stderr, "%s ", register_file_names[i].name);
228 break;
229 }
230 }
231 fprintf(stderr, "%d.", vpi->SrcReg[operand_index].Index);
232
233 for(i=0; i < 4; i++)
234 fprintf(stderr, "%s", dst_mask_names[GET_SWZ(vpi->SrcReg[operand_index].Swizzle, i)]);
235
236 if(operand_index+1 < (op_names[operator_index].ip & (~FLAG_MASK)) )
237 fprintf(stderr, ",");
238 }
239 fprintf(stderr, "\n");
240 }
241
242 }
243
244 void r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program *vp)
245 {
246 int pi;
247 struct vertex_program *mesa_vp=(void *)vp;
248 int dst_index;
249
250 _mesa_load_state_parameters(ctx, mesa_vp->Parameters);
251
252 //debug_vp(ctx, mesa_vp);
253 if(mesa_vp->Parameters->NumParameters * 4 > VSF_MAX_FRAGMENT_LENGTH){
254 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
255 exit(-1);
256 }
257 dst_index=0;
258 for(pi=0; pi < mesa_vp->Parameters->NumParameters; pi++){
259 switch(mesa_vp->Parameters->Parameters[pi].Type){
260
261 case STATE:
262 case NAMED_PARAMETER:
263 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
264 case CONSTANT:
265 vp->params.body.f[dst_index++]=mesa_vp->Parameters->ParameterValues[pi][0];
266 vp->params.body.f[dst_index++]=mesa_vp->Parameters->ParameterValues[pi][1];
267 vp->params.body.f[dst_index++]=mesa_vp->Parameters->ParameterValues[pi][2];
268 vp->params.body.f[dst_index++]=mesa_vp->Parameters->ParameterValues[pi][3];
269 break;
270
271 default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
272 }
273
274 }
275
276 vp->params.length=dst_index;
277 }
278
279 static unsigned long t_dst_mask(GLuint mask)
280 {
281 unsigned long flags=0;
282
283 if(mask & WRITEMASK_X) flags |= VSF_FLAG_X;
284 if(mask & WRITEMASK_Y) flags |= VSF_FLAG_Y;
285 if(mask & WRITEMASK_Z) flags |= VSF_FLAG_Z;
286 if(mask & WRITEMASK_W) flags |= VSF_FLAG_W;
287
288 return flags;
289 }
290
291 static unsigned long t_dst_class(enum register_file file)
292 {
293
294 switch(file){
295 case PROGRAM_TEMPORARY:
296 return VSF_OUT_CLASS_TMP;
297 case PROGRAM_OUTPUT:
298 return VSF_OUT_CLASS_RESULT;
299 /*
300 case PROGRAM_INPUT:
301 case PROGRAM_LOCAL_PARAM:
302 case PROGRAM_ENV_PARAM:
303 case PROGRAM_NAMED_PARAM:
304 case PROGRAM_STATE_VAR:
305 case PROGRAM_WRITE_ONLY:
306 case PROGRAM_ADDRESS:
307 */
308 default:
309 fprintf(stderr, "problem in %s", __FUNCTION__);
310 exit(0);
311 }
312 }
313
314 static unsigned long t_dst_index(struct r300_vertex_program *vp, struct vp_dst_register *dst)
315 {
316 if(dst->File == PROGRAM_OUTPUT) {
317 if (vp->outputs[dst->Index] != -1)
318 return vp->outputs[dst->Index];
319 else {
320 WARN_ONCE("Unknown output %d\n", dst->Index);
321 return 10;
322 }
323 }
324 return dst->Index;
325 }
326
327 static unsigned long t_src_class(enum register_file file)
328 {
329
330 switch(file){
331 case PROGRAM_TEMPORARY:
332 return VSF_IN_CLASS_TMP;
333
334 case PROGRAM_INPUT:
335 return VSF_IN_CLASS_ATTR;
336
337 case PROGRAM_LOCAL_PARAM:
338 case PROGRAM_ENV_PARAM:
339 case PROGRAM_NAMED_PARAM:
340 case PROGRAM_STATE_VAR:
341 return VSF_IN_CLASS_PARAM;
342 /*
343 case PROGRAM_OUTPUT:
344 case PROGRAM_WRITE_ONLY:
345 case PROGRAM_ADDRESS:
346 */
347 default:
348 fprintf(stderr, "problem in %s", __FUNCTION__);
349 exit(0);
350 }
351 }
352
353 static unsigned long t_swizzle(GLubyte swizzle)
354 {
355 switch(swizzle){
356 case SWIZZLE_X: return VSF_IN_COMPONENT_X;
357 case SWIZZLE_Y: return VSF_IN_COMPONENT_Y;
358 case SWIZZLE_Z: return VSF_IN_COMPONENT_Z;
359 case SWIZZLE_W: return VSF_IN_COMPONENT_W;
360 case SWIZZLE_ZERO: return VSF_IN_COMPONENT_ZERO;
361 case SWIZZLE_ONE: return VSF_IN_COMPONENT_ONE;
362 default:
363 fprintf(stderr, "problem in %s", __FUNCTION__);
364 exit(0);
365 }
366 }
367
368 void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
369 {
370 int i;
371
372 if(vp == NULL){
373 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
374 return ;
375 }
376
377 fprintf(stderr, "%s:<", caller);
378 for(i=0; i < VERT_ATTRIB_MAX; i++)
379 fprintf(stderr, "%d ", vp->inputs[i]);
380 fprintf(stderr, ">\n");
381
382 }
383
384 static unsigned long t_src_index(struct r300_vertex_program *vp, struct vp_src_register *src)
385 {
386 int i;
387 int max_reg=-1;
388
389 if(src->File == PROGRAM_INPUT){
390 if(vp->inputs[src->Index] != -1)
391 return vp->inputs[src->Index];
392
393 for(i=0; i < VERT_ATTRIB_MAX; i++)
394 if(vp->inputs[i] > max_reg)
395 max_reg=vp->inputs[i];
396
397 vp->inputs[src->Index]=max_reg+1;
398
399 //vp_dump_inputs(vp, __FUNCTION__);
400
401 return vp->inputs[src->Index];
402 }else{
403 return src->Index;
404 }
405 }
406
407 static unsigned long t_src(struct r300_vertex_program *vp, struct vp_src_register *src)
408 {
409
410 return MAKE_VSF_SOURCE(t_src_index(vp, src),
411 t_swizzle(GET_SWZ(src->Swizzle, 0)),
412 t_swizzle(GET_SWZ(src->Swizzle, 1)),
413 t_swizzle(GET_SWZ(src->Swizzle, 2)),
414 t_swizzle(GET_SWZ(src->Swizzle, 3)),
415 t_src_class(src->File),
416 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
417 }
418
419 static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct vp_src_register *src)
420 {
421
422 return MAKE_VSF_SOURCE(t_src_index(vp, src),
423 t_swizzle(GET_SWZ(src->Swizzle, 0)),
424 t_swizzle(GET_SWZ(src->Swizzle, 0)),
425 t_swizzle(GET_SWZ(src->Swizzle, 0)),
426 t_swizzle(GET_SWZ(src->Swizzle, 0)),
427 t_src_class(src->File),
428 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
429 }
430
431 static unsigned long t_opcode(enum vp_opcode opcode)
432 {
433
434 switch(opcode){
435 case VP_OPCODE_DST: return R300_VPI_OUT_OP_DST;
436 case VP_OPCODE_EX2: return R300_VPI_OUT_OP_EX2;
437 case VP_OPCODE_EXP: return R300_VPI_OUT_OP_EXP;
438 case VP_OPCODE_FRC: return R300_VPI_OUT_OP_FRC;
439 case VP_OPCODE_LG2: return R300_VPI_OUT_OP_LG2;
440 case VP_OPCODE_LOG: return R300_VPI_OUT_OP_LOG;
441 case VP_OPCODE_MAX: return R300_VPI_OUT_OP_MAX;
442 case VP_OPCODE_MIN: return R300_VPI_OUT_OP_MIN;
443 case VP_OPCODE_MUL: return R300_VPI_OUT_OP_MUL;
444 case VP_OPCODE_POW: return R300_VPI_OUT_OP_POW;
445 case VP_OPCODE_RCP: return R300_VPI_OUT_OP_RCP;
446 case VP_OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ;
447 case VP_OPCODE_SGE: return R300_VPI_OUT_OP_SGE;
448 case VP_OPCODE_SLT: return R300_VPI_OUT_OP_SLT;
449 case VP_OPCODE_DP4: return R300_VPI_OUT_OP_DOT;
450
451 default:
452 fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
453 }
454 exit(-1);
455 return 0;
456 }
457
458 static unsigned long op_operands(enum vp_opcode opcode)
459 {
460 int i;
461
462 /* Can we trust mesas opcodes to be in order ? */
463 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
464 if(op_names[i].opcode == opcode)
465 return op_names[i].ip;
466
467 fprintf(stderr, "op %d not found in op_names\n", opcode);
468 exit(-1);
469 return 0;
470 }
471
472 /* TODO: Get rid of t_src_class call */
473 #define CMP_SRCS(a, b) (a.Index != b.Index && \
474 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
475 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
476 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
477 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
478
479 #define SRCS_WRITABLE 1
480 void translate_vertex_shader(struct r300_vertex_program *vp)
481 {
482 struct vertex_program *mesa_vp=(void *)vp;
483 struct vp_instruction *vpi;
484 int i, cur_reg=0;
485 VERTEX_SHADER_INSTRUCTION *o_inst;
486 unsigned long operands;
487 int are_srcs_scalar;
488 unsigned long hw_op;
489 /* Initial value should be last tmp reg that hw supports.
490 Strangely enough r300 doesnt mind even though these would be out of range.
491 Smart enough to realize that it doesnt need it? */
492 int u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1;
493 #ifdef SRCS_WRITABLE
494 struct vp_src_register src[3];
495 #else
496 #define src vpi->SrcReg
497 #endif
498 vp->pos_end=0; /* Not supported yet */
499 vp->program.length=0;
500 vp->num_temporaries=mesa_vp->Base.NumTemporaries;
501
502 for(i=0; i < VERT_ATTRIB_MAX; i++)
503 vp->inputs[i] = -1;
504
505 for(i=0; i < VERT_RESULT_MAX; i++)
506 vp->outputs[i] = -1;
507
508 assert(mesa_vp->OutputsWritten & (1 << VERT_RESULT_HPOS));
509 assert(mesa_vp->OutputsWritten & (1 << VERT_RESULT_COL0));
510
511 /* Assign outputs */
512 if(mesa_vp->OutputsWritten & (1 << VERT_RESULT_HPOS))
513 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
514
515 if(mesa_vp->OutputsWritten & (1 << VERT_RESULT_PSIZ))
516 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
517
518 if(mesa_vp->OutputsWritten & (1 << VERT_RESULT_COL0))
519 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
520
521 #if 0 /* Not supported yet */
522 if(mesa_vp->OutputsWritten & (1 << VERT_RESULT_BFC0))
523 vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
524
525 if(mesa_vp->OutputsWritten & (1 << VERT_RESULT_COL1))
526 vp->outputs[VERT_RESULT_COL1] = cur_reg++;
527
528 if(mesa_vp->OutputsWritten & (1 << VERT_RESULT_BFC1))
529 vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
530
531 if(mesa_vp->OutputsWritten & (1 << VERT_RESULT_FOGC))
532 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
533 #endif
534
535 for(i=VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++)
536 if(mesa_vp->OutputsWritten & (1 << i))
537 vp->outputs[i] = cur_reg++;
538
539 o_inst=vp->program.body.i;
540 for(vpi=mesa_vp->Instructions; vpi->Opcode != VP_OPCODE_END; vpi++, o_inst++){
541
542 operands=op_operands(vpi->Opcode);
543 are_srcs_scalar=operands & SCALAR_FLAG;
544 operands &= OP_MASK;
545
546 for(i=0; i < operands; i++)
547 src[i]=vpi->SrcReg[i];
548 #if 1
549 if(operands == 3){ /* TODO: scalars */
550 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
551 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
552 VSF_FLAG_ALL, VSF_OUT_CLASS_TMP);
553
554 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
555 SWIZZLE_X, SWIZZLE_Y,
556 SWIZZLE_Z, SWIZZLE_W,
557 t_src_class(src[2].File), VSF_FLAG_NONE);
558
559 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
560 SWIZZLE_ZERO, SWIZZLE_ZERO,
561 SWIZZLE_ZERO, SWIZZLE_ZERO,
562 t_src_class(src[2].File), VSF_FLAG_NONE);
563 o_inst->src3=0;
564 o_inst++;
565
566 src[2].File=PROGRAM_TEMPORARY;
567 src[2].Index=u_temp_i;
568 u_temp_i--;
569 }
570
571 }
572 if(operands >= 2){
573 if( CMP_SRCS(src[1], src[0]) ){
574 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
575 VSF_FLAG_ALL, VSF_OUT_CLASS_TMP);
576
577 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
578 SWIZZLE_X, SWIZZLE_Y,
579 SWIZZLE_Z, SWIZZLE_W,
580 t_src_class(src[0].File), VSF_FLAG_NONE);
581
582 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
583 SWIZZLE_ZERO, SWIZZLE_ZERO,
584 SWIZZLE_ZERO, SWIZZLE_ZERO,
585 t_src_class(src[0].File), VSF_FLAG_NONE);
586 o_inst->src3=0;
587 o_inst++;
588
589 src[0].File=PROGRAM_TEMPORARY;
590 src[0].Index=u_temp_i;
591 u_temp_i--;
592 }
593 }
594 #endif
595 /* these ops need special handling.
596 Ops that need temp vars should probably be given reg indexes starting at the end of tmp area. */
597 switch(vpi->Opcode){
598 case VP_OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
599 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
600 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
601 o_inst->src1=t_src(vp, &src[0]);
602 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
603 SWIZZLE_ZERO, SWIZZLE_ZERO,
604 SWIZZLE_ZERO, SWIZZLE_ZERO,
605 t_src_class(src[0].File), VSF_FLAG_NONE);
606
607 o_inst->src3=0;
608
609 goto next;
610
611 case VP_OPCODE_ADD:
612 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
613 src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
614
615 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
616 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
617 o_inst->src1=t_src(vp, &src[0]);
618 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
619 SWIZZLE_ONE, SWIZZLE_ONE,
620 SWIZZLE_ONE, SWIZZLE_ONE,
621 t_src_class(src[0].File), VSF_FLAG_NONE);
622 o_inst->src3=t_src(vp, &src[1]);
623 goto next;
624
625 case VP_OPCODE_MAD:
626 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
627 src[1].File == PROGRAM_TEMPORARY &&
628 src[2].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
629
630 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
631 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
632 o_inst->src1=t_src(vp, &src[0]);
633 o_inst->src2=t_src(vp, &src[1]);
634 o_inst->src3=t_src(vp, &src[2]);
635 goto next;
636
637 case VP_OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */
638 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
639 src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
640
641 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
642 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
643 o_inst->src1=t_src(vp, &src[0]);
644 o_inst->src2=t_src(vp, &src[1]);
645
646 o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
647 SWIZZLE_ZERO, SWIZZLE_ZERO,
648 SWIZZLE_ZERO, SWIZZLE_ZERO,
649 t_src_class(src[1].File), VSF_FLAG_NONE);
650 goto next;
651
652 case VP_OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
653 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
654 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
655
656 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
657 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
658 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
659 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
660 SWIZZLE_ZERO,
661 t_src_class(src[0].File),
662 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE);
663
664 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
665 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
666 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
667 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
668 SWIZZLE_ZERO,
669 t_src_class(src[1].File),
670 src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE);
671
672 o_inst->src3=0;
673 goto next;
674
675 case VP_OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
676 #if 1
677 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
678 src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
679
680 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
681 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
682 o_inst->src1=t_src(vp, &src[0]);
683 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
684 SWIZZLE_ONE, SWIZZLE_ONE,
685 SWIZZLE_ONE, SWIZZLE_ONE,
686 t_src_class(src[0].File), VSF_FLAG_NONE);
687 o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
688 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
689 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
690 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
691 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
692 t_src_class(src[1].File),
693 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE);
694 #else
695 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
696 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
697
698 o_inst->src1=t_src(vp, &src[0]);
699 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
700 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
701 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
702 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
703 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
704 t_src_class(src[1].File),
705 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE);
706 o_inst->src3=0;
707 #endif
708 goto next;
709
710 case VP_OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
711 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg),
712 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
713
714 o_inst->src1=t_src(vp, &src[0]);
715 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
716 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
717 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
718 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
719 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
720 t_src_class(src[0].File),
721 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE);
722 o_inst->src3=0;
723 goto next;
724
725 case VP_OPCODE_FLR:
726 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
727 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
728
729 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i,
730 t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP);
731
732 o_inst->src1=t_src(vp, &src[0]);
733 o_inst->src2=0;
734 o_inst->src3=0;
735 o_inst++;
736
737 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
738 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
739
740 o_inst->src1=t_src(vp, &src[0]);
741 o_inst->src2=MAKE_VSF_SOURCE(u_temp_i,
742 VSF_IN_COMPONENT_X,
743 VSF_IN_COMPONENT_Y,
744 VSF_IN_COMPONENT_Z,
745 VSF_IN_COMPONENT_W,
746 VSF_IN_CLASS_TMP,
747 /* Not 100% sure about this */
748 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
749
750 o_inst->src3=0;
751 u_temp_i--;
752 goto next;
753
754 case VP_OPCODE_LG2:// LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
755 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, t_dst_index(vp, &vpi->DstReg),
756 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
757
758 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
759 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
760 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
761 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
762 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
763 t_src_class(src[0].File),
764 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
765 o_inst->src2=0;
766 o_inst->src3=0;
767 goto next;
768
769 case VP_OPCODE_LIT://LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
770 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, t_dst_index(vp, &vpi->DstReg),
771 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
772 /* NOTE: Users swizzling might not work. */
773 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
774 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
775 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
776 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
777 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
778 t_src_class(src[0].File),
779 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
780 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
781 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
782 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
783 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
784 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
785 t_src_class(src[0].File),
786 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
787 o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
788 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
789 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
790 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
791 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
792 t_src_class(src[0].File),
793 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
794 goto next;
795
796 case VP_OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
797 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
798 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
799
800 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
801 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
802 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
803 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
804 VSF_IN_COMPONENT_ONE,
805 t_src_class(src[0].File),
806 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE);
807 o_inst->src2=t_src(vp, &src[1]);
808 o_inst->src3=0;
809 goto next;
810
811 case VP_OPCODE_XPD:
812 /* mul r0, r1.yzxw, r2.zxyw
813 mad r0, -r2.yzxw, r1.zxyw, r0
814 NOTE: might need MAD_2
815 */
816
817 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i,
818 t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP);
819
820 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
821 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
822 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
823 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
824 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
825 t_src_class(src[0].File),
826 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
827
828 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
829 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
830 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
831 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
832 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
833 t_src_class(src[1].File),
834 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
835
836 o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
837 SWIZZLE_ZERO, SWIZZLE_ZERO,
838 SWIZZLE_ZERO, SWIZZLE_ZERO,
839 t_src_class(src[1].File),
840 VSF_FLAG_NONE);
841 o_inst++;
842 u_temp_i--;
843
844 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, t_dst_index(vp, &vpi->DstReg),
845 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
846
847 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
848 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
849 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
850 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
851 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
852 t_src_class(src[1].File),
853 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE);
854
855 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
856 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
857 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
858 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
859 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
860 t_src_class(src[0].File),
861 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
862
863 o_inst->src3=MAKE_VSF_SOURCE(u_temp_i+1,
864 VSF_IN_COMPONENT_X,
865 VSF_IN_COMPONENT_Y,
866 VSF_IN_COMPONENT_Z,
867 VSF_IN_COMPONENT_W,
868 VSF_IN_CLASS_TMP,
869 VSF_FLAG_NONE);
870
871 goto next;
872
873 case VP_OPCODE_ARL:
874 case VP_OPCODE_SWZ:
875 case VP_OPCODE_RCC:
876 case VP_OPCODE_PRINT:
877 //vp->num_temporaries++;
878 fprintf(stderr, "Dont know how to handle op %d yet\n", vpi->Opcode);
879 exit(-1);
880 break;
881 case VP_OPCODE_END:
882 break;
883 default:
884 break;
885 }
886
887 o_inst->op=MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst_index(vp, &vpi->DstReg),
888 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
889
890 if(are_srcs_scalar){
891 switch(operands){
892 case 1:
893 o_inst->src1=t_src_scalar(vp, &src[0]);
894 o_inst->src2=0;
895 o_inst->src3=0;
896 break;
897
898 case 2:
899 o_inst->src1=t_src_scalar(vp, &src[0]);
900 o_inst->src2=t_src_scalar(vp, &src[1]);
901 o_inst->src3=0;
902 break;
903
904 case 3:
905 o_inst->src1=t_src_scalar(vp, &src[0]);
906 o_inst->src2=t_src_scalar(vp, &src[1]);
907 o_inst->src3=t_src_scalar(vp, &src[2]);
908 break;
909
910 default:
911 fprintf(stderr, "scalars and op RCC not handled yet");
912 exit(-1);
913 break;
914 }
915 }else{
916 switch(operands){
917 case 1:
918 o_inst->src1=t_src(vp, &src[0]);
919 o_inst->src2=0;
920 o_inst->src3=0;
921 break;
922
923 case 2:
924 o_inst->src1=t_src(vp, &src[0]);
925 o_inst->src2=t_src(vp, &src[1]);
926 o_inst->src3=0;
927 break;
928
929 case 3:
930 o_inst->src1=t_src(vp, &src[0]);
931 o_inst->src2=t_src(vp, &src[1]);
932 o_inst->src3=t_src(vp, &src[2]);
933 break;
934
935 default:
936 fprintf(stderr, "scalars and op RCC not handled yet");
937 exit(-1);
938 break;
939 }
940 }
941 next: ;
942 }
943
944 vp->program.length=(o_inst - vp->program.body.i) * 4;
945
946 if(u_temp_i < vp->num_temporaries)
947 vp->translated=GL_FALSE; /* temps exhausted - program cannot be run */
948 else
949 vp->translated=GL_TRUE;
950 }
951