fbSwapBuffers needs to return a status
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertexprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 */
32 #include "glheader.h"
33 #include "macros.h"
34 #include "enums.h"
35
36 #include "program.h"
37 #include "r300_context.h"
38 #include "nvvertprog.h"
39
40 #define SCALAR_FLAG (1<<31)
41 #define FLAG_MASK (1<<31)
42 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
43 #define OPN(operator, ip, op) {#operator, VP_OPCODE_##operator, ip, op}
44
45 static struct{
46 char *name;
47 int opcode;
48 unsigned long ip; /* number of input operands and flags */
49 unsigned long op;
50 }op_names[]={
51 OPN(ABS, 1, 1),
52 OPN(ADD, 2, 1),
53 OPN(ARL, 1, 1|SCALAR_FLAG),
54 OPN(DP3, 2, 3|SCALAR_FLAG),
55 OPN(DP4, 2, 3|SCALAR_FLAG),
56 OPN(DPH, 2, 3|SCALAR_FLAG),
57 OPN(DST, 2, 1),
58 OPN(EX2, 1|SCALAR_FLAG, 4|SCALAR_FLAG),
59 OPN(EXP, 1|SCALAR_FLAG, 1),
60 OPN(FLR, 1, 1),
61 OPN(FRC, 1, 1),
62 OPN(LG2, 1|SCALAR_FLAG, 4|SCALAR_FLAG),
63 OPN(LIT, 1, 1),
64 OPN(LOG, 1|SCALAR_FLAG, 1),
65 OPN(MAD, 3, 1),
66 OPN(MAX, 2, 1),
67 OPN(MIN, 2, 1),
68 OPN(MOV, 1, 1),
69 OPN(MUL, 2, 1),
70 OPN(POW, 2|SCALAR_FLAG, 4|SCALAR_FLAG),
71 OPN(RCP, 1|SCALAR_FLAG, 4|SCALAR_FLAG),
72 OPN(RSQ, 1|SCALAR_FLAG, 4|SCALAR_FLAG),
73 OPN(SGE, 2, 1),
74 OPN(SLT, 2, 1),
75 OPN(SUB, 2, 1),
76 OPN(SWZ, 1, 1),
77 OPN(XPD, 2, 1),
78 OPN(RCC, 0, 0), //extra
79 OPN(PRINT, 0, 0),
80 OPN(END, 0, 0),
81 };
82 #undef OPN
83 #define OPN(rf) {#rf, PROGRAM_##rf}
84
85 static struct{
86 char *name;
87 int id;
88 }register_file_names[]={
89 OPN(TEMPORARY),
90 OPN(INPUT),
91 OPN(OUTPUT),
92 OPN(LOCAL_PARAM),
93 OPN(ENV_PARAM),
94 OPN(NAMED_PARAM),
95 OPN(STATE_VAR),
96 OPN(WRITE_ONLY),
97 OPN(ADDRESS),
98 };
99
100 static char *dst_mask_names[4]={ "X", "Y", "Z", "W" };
101
102 /* from vertex program spec:
103 Instruction Inputs Output Description
104 ----------- ------ ------ --------------------------------
105 ABS v v absolute value
106 ADD v,v v add
107 ARL v a address register load
108 DP3 v,v ssss 3-component dot product
109 DP4 v,v ssss 4-component dot product
110 DPH v,v ssss homogeneous dot product
111 DST v,v v distance vector
112 EX2 s ssss exponential base 2
113 EXP s v exponential base 2 (approximate)
114 FLR v v floor
115 FRC v v fraction
116 LG2 s ssss logarithm base 2
117 LIT v v compute light coefficients
118 LOG s v logarithm base 2 (approximate)
119 MAD v,v,v v multiply and add
120 MAX v,v v maximum
121 MIN v,v v minimum
122 MOV v v move
123 MUL v,v v multiply
124 POW s,s ssss exponentiate
125 RCP s ssss reciprocal
126 RSQ s ssss reciprocal square root
127 SGE v,v v set on greater than or equal
128 SLT v,v v set on less than
129 SUB v,v v subtract
130 SWZ v v extended swizzle
131 XPD v,v v cross product
132 */
133
134 void dump_program_params(GLcontext *ctx, struct vertex_program *vp)
135 {
136 int i;
137 int pi;
138
139 fprintf(stderr, "NumInstructions=%d\n", vp->Base.NumInstructions);
140 fprintf(stderr, "NumTemporaries=%d\n", vp->Base.NumTemporaries);
141 fprintf(stderr, "NumParameters=%d\n", vp->Base.NumParameters);
142 fprintf(stderr, "NumAttributes=%d\n", vp->Base.NumAttributes);
143 fprintf(stderr, "NumAddressRegs=%d\n", vp->Base.NumAddressRegs);
144
145 _mesa_load_state_parameters(ctx, vp->Parameters);
146
147 #if 0
148 for(pi=0; pi < vp->Base.NumParameters; pi++){
149 fprintf(stderr, "{ ");
150 for(i=0; i < 4; i++)
151 fprintf(stderr, "%f ", vp->Base.LocalParams[pi][i]);
152 fprintf(stderr, "}\n");
153 }
154 #endif
155 for(pi=0; pi < vp->Parameters->NumParameters; pi++){
156 fprintf(stderr, "param %02d:", pi);
157
158 switch(vp->Parameters->Parameters[pi].Type){
159
160 case NAMED_PARAMETER:
161 fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
162 fprintf(stderr, "(NAMED_PARAMETER)");
163 break;
164
165 case CONSTANT:
166 fprintf(stderr, "(CONSTANT)");
167 break;
168
169 case STATE:
170 fprintf(stderr, "(STATE)\n");
171 break;
172
173 }
174
175 fprintf(stderr, "{ ");
176 for(i=0; i < 4; i++)
177 fprintf(stderr, "%f ", vp->Parameters->ParameterValues[pi][i]);
178 fprintf(stderr, "}\n");
179
180 }
181 }
182
183 void debug_vp(GLcontext *ctx, struct vertex_program *vp)
184 {
185 struct vp_instruction *vpi;
186 int i, operand_index;
187 int operator_index;
188
189 dump_program_params(ctx, vp);
190
191 vpi=vp->Instructions;
192
193 for(;; vpi++){
194 if(vpi->Opcode == VP_OPCODE_END)
195 break;
196
197 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++){
198 if(vpi->Opcode == op_names[i].opcode){
199 fprintf(stderr, "%s ", op_names[i].name);
200 break;
201 }
202 }
203 operator_index=i;
204
205 for(i=0; i < sizeof(register_file_names) / sizeof(*register_file_names); i++){
206 if(vpi->DstReg.File == register_file_names[i].id){
207 fprintf(stderr, "%s ", register_file_names[i].name);
208 break;
209 }
210 }
211
212 fprintf(stderr, "%d.", vpi->DstReg.Index);
213
214 for(i=0; i < 4; i++)
215 if(vpi->DstReg.WriteMask & (1<<i))
216 fprintf(stderr, "%s", dst_mask_names[i]);
217 fprintf(stderr, " ");
218
219 for(operand_index=0; operand_index < (op_names[operator_index].ip & (~FLAG_MASK));
220 operand_index++){
221
222 if(vpi->SrcReg[operand_index].Negate)
223 fprintf(stderr, "-");
224
225 for(i=0; i < sizeof(register_file_names) / sizeof(*register_file_names); i++){
226 if(vpi->SrcReg[operand_index].File == register_file_names[i].id){
227 fprintf(stderr, "%s ", register_file_names[i].name);
228 break;
229 }
230 }
231 fprintf(stderr, "%d.", vpi->SrcReg[operand_index].Index);
232
233 for(i=0; i < 4; i++)
234 fprintf(stderr, "%s", dst_mask_names[GET_SWZ(vpi->SrcReg[operand_index].Swizzle, i)]);
235
236 if(operand_index+1 < (op_names[operator_index].ip & (~FLAG_MASK)) )
237 fprintf(stderr, ",");
238 }
239 fprintf(stderr, "\n");
240 }
241
242 }
243
244 void r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program *vp)
245 {
246 int pi;
247 struct vertex_program *mesa_vp=(void *)vp;
248 int dst_index;
249
250 _mesa_load_state_parameters(ctx, mesa_vp->Parameters);
251
252 //debug_vp(ctx, mesa_vp);
253 if(mesa_vp->Parameters->NumParameters * 4 > VSF_MAX_FRAGMENT_LENGTH){
254 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
255 exit(-1);
256 }
257 dst_index=0;
258 for(pi=0; pi < mesa_vp->Parameters->NumParameters; pi++){
259 switch(mesa_vp->Parameters->Parameters[pi].Type){
260
261 case STATE:
262 case NAMED_PARAMETER:
263 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
264 case CONSTANT:
265 vp->params.body.f[dst_index++]=mesa_vp->Parameters->ParameterValues[pi][0];
266 vp->params.body.f[dst_index++]=mesa_vp->Parameters->ParameterValues[pi][1];
267 vp->params.body.f[dst_index++]=mesa_vp->Parameters->ParameterValues[pi][2];
268 vp->params.body.f[dst_index++]=mesa_vp->Parameters->ParameterValues[pi][3];
269 break;
270
271 default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
272 }
273
274 }
275
276 vp->params.length=dst_index;
277 }
278
279 static unsigned long t_dst_mask(GLuint mask)
280 {
281 unsigned long flags=0;
282
283 if(mask & WRITEMASK_X) flags |= VSF_FLAG_X;
284 if(mask & WRITEMASK_Y) flags |= VSF_FLAG_Y;
285 if(mask & WRITEMASK_Z) flags |= VSF_FLAG_Z;
286 if(mask & WRITEMASK_W) flags |= VSF_FLAG_W;
287
288 return flags;
289 }
290
291 static unsigned long t_dst_class(enum register_file file)
292 {
293
294 switch(file){
295 case PROGRAM_TEMPORARY:
296 return VSF_OUT_CLASS_TMP;
297 case PROGRAM_OUTPUT:
298 return VSF_OUT_CLASS_RESULT;
299 /*
300 case PROGRAM_INPUT:
301 case PROGRAM_LOCAL_PARAM:
302 case PROGRAM_ENV_PARAM:
303 case PROGRAM_NAMED_PARAM:
304 case PROGRAM_STATE_VAR:
305 case PROGRAM_WRITE_ONLY:
306 case PROGRAM_ADDRESS:
307 */
308 default:
309 fprintf(stderr, "problem in %s", __FUNCTION__);
310 exit(0);
311 }
312 }
313
314 static unsigned long t_dst_index(struct r300_vertex_program *vp, struct vp_dst_register *dst)
315 {
316 if(dst->File == PROGRAM_OUTPUT) {
317 if (vp->outputs[dst->Index] != -1)
318 return vp->outputs[dst->Index];
319 else {
320 WARN_ONCE("Unknown output %d\n", dst->Index);
321 return 10;
322 }
323 }
324 return dst->Index;
325 }
326
327 static unsigned long t_src_class(enum register_file file)
328 {
329
330 switch(file){
331 case PROGRAM_TEMPORARY:
332 return VSF_IN_CLASS_TMP;
333
334 case PROGRAM_INPUT:
335 return VSF_IN_CLASS_ATTR;
336
337 case PROGRAM_LOCAL_PARAM:
338 case PROGRAM_ENV_PARAM:
339 case PROGRAM_NAMED_PARAM:
340 case PROGRAM_STATE_VAR:
341 return VSF_IN_CLASS_PARAM;
342 /*
343 case PROGRAM_OUTPUT:
344 case PROGRAM_WRITE_ONLY:
345 case PROGRAM_ADDRESS:
346 */
347 default:
348 fprintf(stderr, "problem in %s", __FUNCTION__);
349 exit(0);
350 }
351 }
352
353 static unsigned long t_swizzle(GLubyte swizzle)
354 {
355 switch(swizzle){
356 case SWIZZLE_X: return VSF_IN_COMPONENT_X;
357 case SWIZZLE_Y: return VSF_IN_COMPONENT_Y;
358 case SWIZZLE_Z: return VSF_IN_COMPONENT_Z;
359 case SWIZZLE_W: return VSF_IN_COMPONENT_W;
360 case SWIZZLE_ZERO: return VSF_IN_COMPONENT_ZERO;
361 case SWIZZLE_ONE: return VSF_IN_COMPONENT_ONE;
362 default:
363 fprintf(stderr, "problem in %s", __FUNCTION__);
364 exit(0);
365 }
366 }
367
368 void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
369 {
370 int i;
371
372 if(vp == NULL){
373 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
374 return ;
375 }
376
377 fprintf(stderr, "%s:<", caller);
378 for(i=0; i < VERT_ATTRIB_MAX; i++)
379 fprintf(stderr, "%d ", vp->inputs[i]);
380 fprintf(stderr, ">\n");
381
382 }
383
384 static unsigned long t_src_index(struct r300_vertex_program *vp, struct vp_src_register *src)
385 {
386 int i;
387 int max_reg=-1;
388
389 if(src->File == PROGRAM_INPUT){
390 if(vp->inputs[src->Index] != -1)
391 return vp->inputs[src->Index];
392
393 for(i=0; i < VERT_ATTRIB_MAX; i++)
394 if(vp->inputs[i] > max_reg)
395 max_reg=vp->inputs[i];
396
397 vp->inputs[src->Index]=max_reg+1;
398
399 //vp_dump_inputs(vp, __FUNCTION__);
400
401 return vp->inputs[src->Index];
402 }else{
403 return src->Index;
404 }
405 }
406
407 static unsigned long t_src(struct r300_vertex_program *vp, struct vp_src_register *src)
408 {
409
410 return MAKE_VSF_SOURCE(t_src_index(vp, src),
411 t_swizzle(GET_SWZ(src->Swizzle, 0)),
412 t_swizzle(GET_SWZ(src->Swizzle, 1)),
413 t_swizzle(GET_SWZ(src->Swizzle, 2)),
414 t_swizzle(GET_SWZ(src->Swizzle, 3)),
415 t_src_class(src->File),
416 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
417 }
418
419 static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct vp_src_register *src)
420 {
421
422 return MAKE_VSF_SOURCE(t_src_index(vp, src),
423 t_swizzle(GET_SWZ(src->Swizzle, 0)),
424 t_swizzle(GET_SWZ(src->Swizzle, 0)),
425 t_swizzle(GET_SWZ(src->Swizzle, 0)),
426 t_swizzle(GET_SWZ(src->Swizzle, 0)),
427 t_src_class(src->File),
428 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
429 }
430
431 static unsigned long t_opcode(enum vp_opcode opcode)
432 {
433
434 switch(opcode){
435 case VP_OPCODE_DST: return R300_VPI_OUT_OP_DST;
436 case VP_OPCODE_EX2: return R300_VPI_OUT_OP_EX2;
437 case VP_OPCODE_EXP: return R300_VPI_OUT_OP_EXP;
438 case VP_OPCODE_FRC: return R300_VPI_OUT_OP_FRC;
439 case VP_OPCODE_LG2: return R300_VPI_OUT_OP_LG2;
440 case VP_OPCODE_LOG: return R300_VPI_OUT_OP_LOG;
441 case VP_OPCODE_MAX: return R300_VPI_OUT_OP_MAX;
442 case VP_OPCODE_MIN: return R300_VPI_OUT_OP_MIN;
443 case VP_OPCODE_MUL: return R300_VPI_OUT_OP_MUL;
444 case VP_OPCODE_POW: return R300_VPI_OUT_OP_POW;
445 case VP_OPCODE_RCP: return R300_VPI_OUT_OP_RCP;
446 case VP_OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ;
447 case VP_OPCODE_SGE: return R300_VPI_OUT_OP_SGE;
448 case VP_OPCODE_SLT: return R300_VPI_OUT_OP_SLT;
449 case VP_OPCODE_DP4: return R300_VPI_OUT_OP_DOT;
450
451 default:
452 fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
453 }
454 exit(-1);
455 return 0;
456 }
457
458 static unsigned long op_operands(enum vp_opcode opcode)
459 {
460 int i;
461
462 /* Can we trust mesas opcodes to be in order ? */
463 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
464 if(op_names[i].opcode == opcode)
465 return op_names[i].ip;
466
467 fprintf(stderr, "op %d not found in op_names\n", opcode);
468 exit(-1);
469 return 0;
470 }
471
472 /* TODO: Get rid of t_src_class call */
473 #define CMP_SRCS(a, b) (a.Index != b.Index && \
474 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
475 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
476 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
477 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
478
479 #define SRCS_WRITABLE 1
480 void translate_vertex_shader(struct r300_vertex_program *vp)
481 {
482 struct vertex_program *mesa_vp=(void *)vp;
483 struct vp_instruction *vpi;
484 int i, cur_reg=0;
485 VERTEX_SHADER_INSTRUCTION *o_inst;
486 unsigned long operands;
487 int are_srcs_scalar;
488 unsigned long hw_op;
489 /* Initial value should be last tmp reg that hw supports.
490 Strangely enough r300 doesnt mind even though these would be out of range.
491 Smart enough to realize that it doesnt need it? */
492 int u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1;
493 #ifdef SRCS_WRITABLE
494 struct vp_src_register src[3];
495 #else
496 #define src vpi->SrcReg
497 #endif
498 vp->pos_end=0; /* Not supported yet */
499 vp->program.length=0;
500 vp->num_temporaries=mesa_vp->Base.NumTemporaries;
501
502 for(i=0; i < VERT_ATTRIB_MAX; i++)
503 vp->inputs[i] = -1;
504
505 for(i=0; i < VERT_RESULT_MAX; i++)
506 vp->outputs[i] = -1;
507
508 assert(mesa_vp->OutputsWritten & (1 << VERT_RESULT_HPOS));
509 assert(mesa_vp->OutputsWritten & (1 << VERT_RESULT_COL0));
510
511 /* Assign outputs */
512 if(mesa_vp->OutputsWritten & (1 << VERT_RESULT_HPOS))
513 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
514
515 if(mesa_vp->OutputsWritten & (1 << VERT_RESULT_PSIZ))
516 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
517
518 if(mesa_vp->OutputsWritten & (1 << VERT_RESULT_COL0))
519 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
520
521 #if 0 /* Not supported yet */
522 if(mesa_vp->OutputsWritten & (1 << VERT_RESULT_BFC0))
523 vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
524
525 if(mesa_vp->OutputsWritten & (1 << VERT_RESULT_COL1))
526 vp->outputs[VERT_RESULT_COL1] = cur_reg++;
527
528 if(mesa_vp->OutputsWritten & (1 << VERT_RESULT_BFC1))
529 vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
530
531 if(mesa_vp->OutputsWritten & (1 << VERT_RESULT_FOGC))
532 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
533 #endif
534
535 for(i=VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++)
536 if(mesa_vp->OutputsWritten & (1 << i))
537 vp->outputs[i] = cur_reg++;
538
539 o_inst=vp->program.body.i;
540 for(vpi=mesa_vp->Instructions; vpi->Opcode != VP_OPCODE_END; vpi++, o_inst++){
541
542 operands=op_operands(vpi->Opcode);
543 are_srcs_scalar=operands & SCALAR_FLAG;
544 operands &= OP_MASK;
545
546 for(i=0; i < operands; i++)
547 src[i]=vpi->SrcReg[i];
548 #if 1
549 if(operands == 3){ /* TODO: scalars */
550 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
551 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
552 VSF_FLAG_ALL, VSF_OUT_CLASS_TMP);
553
554 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
555 SWIZZLE_X, SWIZZLE_Y,
556 SWIZZLE_Z, SWIZZLE_W,
557 t_src_class(src[2].File), VSF_FLAG_NONE);
558
559 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
560 SWIZZLE_ZERO, SWIZZLE_ZERO,
561 SWIZZLE_ZERO, SWIZZLE_ZERO,
562 t_src_class(src[2].File), VSF_FLAG_NONE);
563 o_inst->src3=0;
564 o_inst++;
565
566 src[2].File=PROGRAM_TEMPORARY;
567 src[2].Index=u_temp_i;
568 u_temp_i--;
569 }
570
571 }
572 if(operands >= 2){
573 if( CMP_SRCS(src[1], src[0]) ){
574 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
575 VSF_FLAG_ALL, VSF_OUT_CLASS_TMP);
576
577 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
578 SWIZZLE_X, SWIZZLE_Y,
579 SWIZZLE_Z, SWIZZLE_W,
580 t_src_class(src[0].File), VSF_FLAG_NONE);
581
582 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
583 SWIZZLE_ZERO, SWIZZLE_ZERO,
584 SWIZZLE_ZERO, SWIZZLE_ZERO,
585 t_src_class(src[0].File), VSF_FLAG_NONE);
586 o_inst->src3=0;
587 o_inst++;
588
589 src[0].File=PROGRAM_TEMPORARY;
590 src[0].Index=u_temp_i;
591 u_temp_i--;
592 }
593 }
594 #endif
595 /* these ops need special handling.
596 Ops that need temp vars should probably be given reg indexes starting at the end of tmp area. */
597 switch(vpi->Opcode){
598 case VP_OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
599 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
600 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
601 o_inst->src1=t_src(vp, &src[0]);
602 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
603 SWIZZLE_ZERO, SWIZZLE_ZERO,
604 SWIZZLE_ZERO, SWIZZLE_ZERO,
605 t_src_class(src[0].File), VSF_FLAG_NONE);
606
607 o_inst->src3=0;
608
609 goto next;
610
611 case VP_OPCODE_ADD:
612 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
613 src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
614
615 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
616 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
617 o_inst->src1=t_src(vp, &src[0]);
618 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
619 SWIZZLE_ONE, SWIZZLE_ONE,
620 SWIZZLE_ONE, SWIZZLE_ONE,
621 t_src_class(src[0].File), VSF_FLAG_NONE);
622 o_inst->src3=t_src(vp, &src[1]);
623 goto next;
624
625 case VP_OPCODE_MAD:
626 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
627 src[1].File == PROGRAM_TEMPORARY &&
628 src[2].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
629
630 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
631 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
632 o_inst->src1=t_src(vp, &src[0]);
633 o_inst->src2=t_src(vp, &src[1]);
634 o_inst->src3=t_src(vp, &src[2]);
635 goto next;
636
637 case VP_OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */
638 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
639 src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
640
641 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
642 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
643 o_inst->src1=t_src(vp, &src[0]);
644 o_inst->src2=t_src(vp, &src[1]);
645
646 o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
647 SWIZZLE_ZERO, SWIZZLE_ZERO,
648 SWIZZLE_ZERO, SWIZZLE_ZERO,
649 t_src_class(src[1].File), VSF_FLAG_NONE);
650 goto next;
651
652 case VP_OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
653 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
654 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
655
656 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
657 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
658 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
659 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
660 SWIZZLE_ZERO,
661 t_src_class(src[0].File),
662 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE);
663
664 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
665 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
666 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
667 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
668 SWIZZLE_ZERO,
669 t_src_class(src[1].File),
670 src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE);
671
672 o_inst->src3=0;
673 goto next;
674
675 case VP_OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
676 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
677 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
678
679 o_inst->src1=t_src(vp, &src[0]);
680 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
681 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
682 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
683 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
684 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
685 t_src_class(src[1].File),
686 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE);
687 o_inst->src3=0;
688 goto next;
689
690 case VP_OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
691 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg),
692 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
693
694 o_inst->src1=t_src(vp, &src[0]);
695 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
696 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
697 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
698 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
699 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
700 t_src_class(src[0].File),
701 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE);
702 o_inst->src3=0;
703 goto next;
704
705 case VP_OPCODE_FLR:
706 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
707 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
708
709 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i,
710 t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP);
711
712 o_inst->src1=t_src(vp, &src[0]);
713 o_inst->src2=0;
714 o_inst->src3=0;
715 o_inst++;
716
717 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
718 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
719
720 o_inst->src1=t_src(vp, &src[0]);
721 o_inst->src2=MAKE_VSF_SOURCE(u_temp_i,
722 VSF_IN_COMPONENT_X,
723 VSF_IN_COMPONENT_Y,
724 VSF_IN_COMPONENT_Z,
725 VSF_IN_COMPONENT_W,
726 VSF_IN_CLASS_TMP,
727 /* Not 100% sure about this */
728 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
729
730 o_inst->src3=0;
731 u_temp_i--;
732 goto next;
733
734 case VP_OPCODE_LG2:// LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
735 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, t_dst_index(vp, &vpi->DstReg),
736 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
737
738 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
739 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
740 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
741 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
742 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
743 t_src_class(src[0].File),
744 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
745 o_inst->src2=0;
746 o_inst->src3=0;
747 goto next;
748
749 case VP_OPCODE_LIT://LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
750 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, t_dst_index(vp, &vpi->DstReg),
751 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
752 /* NOTE: Users swizzling might not work. */
753 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
754 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
755 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
756 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
757 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
758 t_src_class(src[0].File),
759 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
760 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
761 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
762 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
763 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
764 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
765 t_src_class(src[0].File),
766 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
767 o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
768 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
769 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
770 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
771 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
772 t_src_class(src[0].File),
773 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
774 goto next;
775
776 case VP_OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
777 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
778 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
779
780 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
781 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
782 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
783 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
784 VSF_IN_COMPONENT_ONE,
785 t_src_class(src[0].File),
786 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE);
787 o_inst->src2=t_src(vp, &src[1]);
788 o_inst->src3=0;
789 goto next;
790
791 case VP_OPCODE_XPD:
792 /* mul r0, r1.yzxw, r2.zxyw
793 mad r0, -r2.yzxw, r1.zxyw, r0
794 NOTE: might need MAD_2
795 */
796
797 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i,
798 t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP);
799
800 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
801 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
802 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
803 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
804 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
805 t_src_class(src[0].File),
806 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
807
808 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
809 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
810 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
811 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
812 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
813 t_src_class(src[1].File),
814 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
815
816 o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
817 SWIZZLE_ZERO, SWIZZLE_ZERO,
818 SWIZZLE_ZERO, SWIZZLE_ZERO,
819 t_src_class(src[1].File),
820 VSF_FLAG_NONE);
821 o_inst++;
822 u_temp_i--;
823
824 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, t_dst_index(vp, &vpi->DstReg),
825 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
826
827 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
828 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
829 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
830 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
831 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
832 t_src_class(src[1].File),
833 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE);
834
835 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
836 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
837 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
838 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
839 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
840 t_src_class(src[0].File),
841 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE);
842
843 o_inst->src3=MAKE_VSF_SOURCE(u_temp_i+1,
844 VSF_IN_COMPONENT_X,
845 VSF_IN_COMPONENT_Y,
846 VSF_IN_COMPONENT_Z,
847 VSF_IN_COMPONENT_W,
848 VSF_IN_CLASS_TMP,
849 VSF_FLAG_NONE);
850
851 goto next;
852
853 case VP_OPCODE_ARL:
854 case VP_OPCODE_SWZ:
855 case VP_OPCODE_RCC:
856 case VP_OPCODE_PRINT:
857 //vp->num_temporaries++;
858 fprintf(stderr, "Dont know how to handle op %d yet\n", vpi->Opcode);
859 exit(-1);
860 break;
861 case VP_OPCODE_END:
862 break;
863 default:
864 break;
865 }
866
867 o_inst->op=MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst_index(vp, &vpi->DstReg),
868 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
869
870 if(are_srcs_scalar){
871 switch(operands){
872 case 1:
873 o_inst->src1=t_src_scalar(vp, &src[0]);
874 o_inst->src2=0;
875 o_inst->src3=0;
876 break;
877
878 case 2:
879 o_inst->src1=t_src_scalar(vp, &src[0]);
880 o_inst->src2=t_src_scalar(vp, &src[1]);
881 o_inst->src3=0;
882 break;
883
884 case 3:
885 o_inst->src1=t_src_scalar(vp, &src[0]);
886 o_inst->src2=t_src_scalar(vp, &src[1]);
887 o_inst->src3=t_src_scalar(vp, &src[2]);
888 break;
889
890 default:
891 fprintf(stderr, "scalars and op RCC not handled yet");
892 exit(-1);
893 break;
894 }
895 }else{
896 switch(operands){
897 case 1:
898 o_inst->src1=t_src(vp, &src[0]);
899 o_inst->src2=0;
900 o_inst->src3=0;
901 break;
902
903 case 2:
904 o_inst->src1=t_src(vp, &src[0]);
905 o_inst->src2=t_src(vp, &src[1]);
906 o_inst->src3=0;
907 break;
908
909 case 3:
910 o_inst->src1=t_src(vp, &src[0]);
911 o_inst->src2=t_src(vp, &src[1]);
912 o_inst->src3=t_src(vp, &src[2]);
913 break;
914
915 default:
916 fprintf(stderr, "scalars and op RCC not handled yet");
917 exit(-1);
918 break;
919 }
920 }
921 next: ;
922 }
923
924 vp->program.length=(o_inst - vp->program.body.i) * 4;
925
926 if(u_temp_i < vp->num_temporaries)
927 vp->translated=GL_FALSE; /* temps exhausted - program cannot be run */
928 else
929 vp->translated=GL_TRUE;
930 }
931