cd008df7aab609ee1d8e2893350f6cd0019c3c9b
[mesa.git] / src / mesa / drivers / dri / r200 / r200_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 */
32 #include "glheader.h"
33 #include "macros.h"
34 #include "enums.h"
35 #include "program.h"
36
37 #include "r200_context.h"
38 #include "r200_vertprog.h"
39 #include "r200_ioctl.h"
40 #include "r200_tcl.h"
41 #include "program_instruction.h"
42 #include "tnl/tnl.h"
43
44 #define SCALAR_FLAG (1<<31)
45 #define FLAG_MASK (1<<31)
46 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
47 #define OPN(operator, ip, op) {#operator, OPCODE_##operator, ip, op}
48
49 static struct{
50 char *name;
51 int opcode;
52 unsigned long ip; /* number of input operands and flags */
53 unsigned long op;
54 }op_names[]={
55 OPN(ABS, 1, 1),
56 OPN(ADD, 2, 1),
57 OPN(ARL, 1, 1|SCALAR_FLAG),
58 OPN(DP3, 2, 3|SCALAR_FLAG),
59 OPN(DP4, 2, 3|SCALAR_FLAG),
60 OPN(DPH, 2, 3|SCALAR_FLAG),
61 OPN(DST, 2, 1),
62 OPN(EX2, 1|SCALAR_FLAG, 4|SCALAR_FLAG),
63 OPN(EXP, 1|SCALAR_FLAG, 1),
64 OPN(FLR, 1, 1),
65 OPN(FRC, 1, 1),
66 OPN(LG2, 1|SCALAR_FLAG, 4|SCALAR_FLAG),
67 OPN(LIT, 1, 1),
68 OPN(LOG, 1|SCALAR_FLAG, 1),
69 OPN(MAD, 3, 1),
70 OPN(MAX, 2, 1),
71 OPN(MIN, 2, 1),
72 OPN(MOV, 1, 1),
73 OPN(MUL, 2, 1),
74 OPN(POW, 2|SCALAR_FLAG, 4|SCALAR_FLAG),
75 OPN(RCP, 1|SCALAR_FLAG, 4|SCALAR_FLAG),
76 OPN(RSQ, 1|SCALAR_FLAG, 4|SCALAR_FLAG),
77 OPN(SGE, 2, 1),
78 OPN(SLT, 2, 1),
79 OPN(SUB, 2, 1),
80 OPN(SWZ, 1, 1),
81 OPN(XPD, 2, 1),
82 OPN(RCC, 0, 0), //extra
83 OPN(PRINT, 0, 0),
84 OPN(END, 0, 0),
85 };
86 #undef OPN
87
88 static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp)
89 {
90 r200ContextPtr rmesa = R200_CONTEXT( ctx );
91 GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
92 int pi;
93 struct vertex_program *mesa_vp = (void *)vp;
94 struct program_parameter_list *paramList;
95 drm_radeon_cmd_header_t tmp;
96
97 R200_STATECHANGE( rmesa, vpp[0] );
98 R200_STATECHANGE( rmesa, vpp[1] );
99 assert(mesa_vp->Base.Parameters);
100 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
101 paramList = mesa_vp->Base.Parameters;
102
103 if(paramList->NumParameters > R200_VSF_MAX_PARAM){
104 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
105 return GL_FALSE;
106 }
107
108 for(pi = 0; pi < paramList->NumParameters; pi++) {
109 switch(paramList->Parameters[pi].Type) {
110 case PROGRAM_STATE_VAR:
111 case PROGRAM_NAMED_PARAM:
112 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
113 case PROGRAM_CONSTANT:
114 *fcmd++ = paramList->ParameterValues[pi][0];
115 *fcmd++ = paramList->ParameterValues[pi][1];
116 *fcmd++ = paramList->ParameterValues[pi][2];
117 *fcmd++ = paramList->ParameterValues[pi][3];
118 break;
119 default:
120 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
121 break;
122 }
123 if (pi == 95) {
124 fcmd = (GLfloat *)rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
125 }
126 }
127 /* hack up the cmd_size so not the whole state atom is emitted always. */
128 rmesa->hw.vpp[0].cmd_size =
129 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
130 tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
131 tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
132 rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
133 if (paramList->NumParameters > 96) {
134 rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
135 tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
136 tmp.veclinear.count = paramList->NumParameters - 96;
137 rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
138 }
139 return GL_TRUE;
140 }
141
142 static unsigned long t_dst_mask(GLuint mask)
143 {
144 unsigned long flags = 0;
145
146 if(mask & WRITEMASK_X) flags |= VSF_FLAG_X;
147 if(mask & WRITEMASK_Y) flags |= VSF_FLAG_Y;
148 if(mask & WRITEMASK_Z) flags |= VSF_FLAG_Z;
149 if(mask & WRITEMASK_W) flags |= VSF_FLAG_W;
150
151 return flags;
152 }
153
154 static unsigned long t_dst(struct prog_dst_register *dst)
155 {
156 switch(dst->File) {
157 case PROGRAM_TEMPORARY:
158 return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
159 | R200_VSF_OUT_CLASS_TMP);
160 case PROGRAM_OUTPUT:
161 switch (dst->Index) {
162 case VERT_RESULT_HPOS:
163 return R200_VSF_OUT_CLASS_RESULT_POS;
164 case VERT_RESULT_COL0:
165 return R200_VSF_OUT_CLASS_RESULT_COLOR;
166 case VERT_RESULT_COL1:
167 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
168 | R200_VSF_OUT_CLASS_RESULT_COLOR);
169 case VERT_RESULT_FOGC:
170 return R200_VSF_OUT_CLASS_RESULT_FOGC;
171 case VERT_RESULT_TEX0:
172 case VERT_RESULT_TEX1:
173 case VERT_RESULT_TEX2:
174 case VERT_RESULT_TEX3:
175 case VERT_RESULT_TEX4:
176 case VERT_RESULT_TEX5:
177 return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
178 | R200_VSF_OUT_CLASS_RESULT_TEXC);
179 case VERT_RESULT_PSIZ:
180 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
181 default:
182 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
183 exit(0);
184 return 0;
185 }
186 case PROGRAM_ADDRESS:
187 assert (dst->Index == 0);
188 return R200_VSF_OUT_CLASS_ADDR;
189 default:
190 fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
191 exit(0);
192 return 0;
193 }
194 }
195
196 static unsigned long t_src_class(enum register_file file)
197 {
198
199 switch(file){
200 case PROGRAM_TEMPORARY:
201 return VSF_IN_CLASS_TMP;
202
203 case PROGRAM_INPUT:
204 return VSF_IN_CLASS_ATTR;
205
206 case PROGRAM_LOCAL_PARAM:
207 case PROGRAM_ENV_PARAM:
208 case PROGRAM_NAMED_PARAM:
209 case PROGRAM_STATE_VAR:
210 return VSF_IN_CLASS_PARAM;
211 /*
212 case PROGRAM_OUTPUT:
213 case PROGRAM_WRITE_ONLY:
214 case PROGRAM_ADDRESS:
215 */
216 default:
217 fprintf(stderr, "problem in %s", __FUNCTION__);
218 exit(0);
219 }
220 }
221
222 static __inline unsigned long t_swizzle(GLubyte swizzle)
223 {
224 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
225 /*
226 switch(swizzle){
227 case SWIZZLE_X: return VSF_IN_COMPONENT_X;
228 case SWIZZLE_Y: return VSF_IN_COMPONENT_Y;
229 case SWIZZLE_Z: return VSF_IN_COMPONENT_Z;
230 case SWIZZLE_W: return VSF_IN_COMPONENT_W;
231 case SWIZZLE_ZERO: return VSF_IN_COMPONENT_ZERO;
232 case SWIZZLE_ONE: return VSF_IN_COMPONENT_ONE;
233 default:
234 fprintf(stderr, "problem in %s", __FUNCTION__);
235 exit(0);
236 }
237 */
238 return swizzle;
239 }
240
241 #if 0
242 static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
243 {
244 int i;
245
246 if(vp == NULL){
247 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
248 return ;
249 }
250
251 fprintf(stderr, "%s:<", caller);
252 for(i=0; i < VERT_ATTRIB_MAX; i++)
253 fprintf(stderr, "%d ", vp->inputs[i]);
254 fprintf(stderr, ">\n");
255
256 }
257 #endif
258
259 static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
260 {
261 /*
262 int i;
263 int max_reg = -1;
264 */
265 if(src->File == PROGRAM_INPUT){
266 /* if(vp->inputs[src->Index] != -1)
267 return vp->inputs[src->Index];
268
269 for(i=0; i < VERT_ATTRIB_MAX; i++)
270 if(vp->inputs[i] > max_reg)
271 max_reg = vp->inputs[i];
272
273 vp->inputs[src->Index] = max_reg+1;*/
274
275 //vp_dump_inputs(vp, __FUNCTION__);
276 assert(vp->inputs[src->Index] != -1);
277 return vp->inputs[src->Index];
278 } else {
279 if (src->Index < 0) {
280 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
281 return 0;
282 }
283 return src->Index;
284 }
285 }
286
287 static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
288 {
289
290 return MAKE_VSF_SOURCE(t_src_index(vp, src),
291 t_swizzle(GET_SWZ(src->Swizzle, 0)),
292 t_swizzle(GET_SWZ(src->Swizzle, 1)),
293 t_swizzle(GET_SWZ(src->Swizzle, 2)),
294 t_swizzle(GET_SWZ(src->Swizzle, 3)),
295 t_src_class(src->File),
296 src->NegateBase) | (src->RelAddr << 4);
297 }
298
299 static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
300 {
301
302 return MAKE_VSF_SOURCE(t_src_index(vp, src),
303 t_swizzle(GET_SWZ(src->Swizzle, 0)),
304 t_swizzle(GET_SWZ(src->Swizzle, 0)),
305 t_swizzle(GET_SWZ(src->Swizzle, 0)),
306 t_swizzle(GET_SWZ(src->Swizzle, 0)),
307 t_src_class(src->File),
308 src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
309 }
310
311 static unsigned long t_opcode(enum prog_opcode opcode)
312 {
313
314 switch(opcode){
315 case OPCODE_DST: return R200_VPI_OUT_OP_DST;
316 case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
317 case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
318 case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
319 case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
320 case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
321 case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
322 case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
323 case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
324 case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
325 case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
326 case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
327 case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
328 case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
329 case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
330
331 default:
332 fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
333 }
334 exit(-1);
335 return 0;
336 }
337
338 static unsigned long op_operands(enum prog_opcode opcode)
339 {
340 int i;
341
342 /* Can we trust mesas opcodes to be in order ? */
343 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
344 if(op_names[i].opcode == opcode)
345 return op_names[i].ip;
346
347 fprintf(stderr, "op %d not found in op_names\n", opcode);
348 exit(-1);
349 return 0;
350 }
351
352 /* TODO: Get rid of t_src_class call */
353 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
354 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
355 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
356 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
357 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
358
359 /* fglrx on rv250 codes up unused sources as follows:
360 unused but necessary sources are same as previous source, zero-ed out.
361 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
362 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
363 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
364
365 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
366 Those are NOT semantically equivalent to the r300 ones, requires code changes */
367 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
368 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
369 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
370 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
371 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
372
373 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
374 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
375 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
376 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
377 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
378
379 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
380 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
381 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
382 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
383 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
384
385 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
386
387 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
388
389 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
390
391
392 /* DP4 version seems to trigger some hw peculiarity - fglrx does this on r200 however */
393 #define PREFER_DP4
394
395 static GLboolean r200_translate_vertex_program(struct r200_vertex_program *vp)
396 {
397 struct vertex_program *mesa_vp = (void *)vp;
398 struct prog_instruction *vpi;
399 int i;
400 VERTEX_SHADER_INSTRUCTION *o_inst;
401 unsigned long operands;
402 int are_srcs_scalar;
403 unsigned long hw_op;
404
405 vp->native = GL_FALSE;
406
407 if ((mesa_vp->Base.InputsRead &
408 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
409 VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
410 VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
411 if (R200_DEBUG & DEBUG_FALLBACKS) {
412 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
413 mesa_vp->Base.InputsRead);
414 }
415 return GL_FALSE;
416 }
417
418 /* Initial value should be last tmp reg that hw supports.
419 Strangely enough r300 doesnt mind even though these would be out of range.
420 Smart enough to realize that it doesnt need it? */
421 int u_temp_i = R200_VSF_MAX_TEMPS - 1;
422 struct prog_src_register src[3];
423
424 /* if (getenv("R300_VP_SAFETY")) {
425 WARN_ONCE("R300_VP_SAFETY enabled.\n");
426
427 vpi = malloc((mesa_vp->Base.NumInstructions + VSF_MAX_FRAGMENT_TEMPS) * sizeof(struct prog_instruction));
428 memset(vpi, 0, VSF_MAX_FRAGMENT_TEMPS * sizeof(struct prog_instruction));
429
430 for (i=0; i < VSF_MAX_FRAGMENT_TEMPS; i++) {
431 vpi[i].Opcode = OPCODE_MOV;
432 vpi[i].StringPos = 0;
433 vpi[i].Data = 0;
434
435 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
436 vpi[i].DstReg.Index = i;
437 vpi[i].DstReg.WriteMask = WRITEMASK_XYZW;
438 vpi[i].DstReg.CondMask = COND_TR;
439
440 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
441 vpi[i].SrcReg[0].Index = 0;
442 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE);
443 }
444
445 memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction));
446
447 free(mesa_vp->Base.Instructions);
448
449 mesa_vp->Base.Instructions = vpi;
450
451 mesa_vp->Base.NumInstructions += VSF_MAX_FRAGMENT_TEMPS;
452 vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1];
453
454 assert(vpi->Opcode == OPCODE_END);
455 }*/
456 /* FIXME: is changing the prog safe to do here? */
457 if (mesa_vp->IsPositionInvariant) {
458 struct program_parameter_list *paramList;
459 GLint tokens[6] = { STATE_MATRIX, STATE_MVP, 0, 0, 0, STATE_MATRIX };
460
461 #ifdef PREFER_DP4
462 tokens[5] = STATE_MATRIX;
463 #else
464 tokens[5] = STATE_MATRIX_TRANSPOSE;
465 #endif
466 paramList = mesa_vp->Base.Parameters;
467
468 vpi = malloc((mesa_vp->Base.NumInstructions + 4) * sizeof(struct prog_instruction));
469 memset(vpi, 0, 4 * sizeof(struct prog_instruction));
470
471 for (i=0; i < 4; i++) {
472 GLint idx;
473 tokens[3] = tokens[4] = i;
474 idx = _mesa_add_state_reference(paramList, tokens);
475 #ifdef PREFER_DP4
476 vpi[i].Opcode = OPCODE_DP4;
477 vpi[i].StringPos = 0;
478 vpi[i].Data = 0;
479
480 vpi[i].DstReg.File = PROGRAM_OUTPUT;
481 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
482 vpi[i].DstReg.WriteMask = 1 << i;
483 vpi[i].DstReg.CondMask = COND_TR;
484
485 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
486 vpi[i].SrcReg[0].Index = idx;
487 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
488
489 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
490 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
491 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
492 #else
493 if (i == 0)
494 vpi[i].Opcode = OPCODE_MUL;
495 else
496 vpi[i].Opcode = OPCODE_MAD;
497
498 vpi[i].StringPos = 0;
499 vpi[i].Data = 0;
500
501 if (i == 3)
502 vpi[i].DstReg.File = PROGRAM_OUTPUT;
503 else
504 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
505 vpi[i].DstReg.Index = 0;
506 vpi[i].DstReg.WriteMask = 0xf;
507 vpi[i].DstReg.CondMask = COND_TR;
508
509 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
510 vpi[i].SrcReg[0].Index = idx;
511 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
512
513 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
514 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
515 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
516
517 if (i > 0) {
518 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
519 vpi[i].SrcReg[2].Index = 0;
520 vpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
521 }
522 #endif
523 }
524
525 memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction));
526
527 free(mesa_vp->Base.Instructions);
528
529 mesa_vp->Base.Instructions = vpi;
530
531 mesa_vp->Base.NumInstructions += 4;
532 vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1];
533
534 assert(vpi->Opcode == OPCODE_END);
535
536 mesa_vp->Base.InputsRead |= (1 << VERT_ATTRIB_POS);
537 mesa_vp->Base.OutputsWritten |= (1 << VERT_RESULT_HPOS);
538
539 //fprintf(stderr, "IsPositionInvariant is set!\n");
540 //_mesa_print_program(&mesa_vp->Base);
541 }
542
543 vp->pos_end = 0;
544 mesa_vp->Base.NumNativeInstructions = 0;
545 mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
546
547 for(i=0; i < VERT_ATTRIB_MAX; i++)
548 vp->inputs[i] = -1;
549 /* fglrx uses fixed inputs as follows for conventional attribs.
550 generic attribs use non-fixed assignment, fglrx will always use the lowest attrib values available.
551 There are 12 generic attribs possible, corresponding to attrib 0, 2-11 and 13 in a hw vertex prog.
552 attr 1 and 12 are not available for generic attribs as those cannot be made vec4 (correspond to
553 vertex normal/weight)
554 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
555 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
556 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
557 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
558 generic attribs would require some more work (dma regions, renaming). */
559
560 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
561 vp->inputs[VERT_ATTRIB_POS] = 0;
562 vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
563 vp->inputs[VERT_ATTRIB_NORMAL] = 1;
564 vp->inputs[VERT_ATTRIB_COLOR0] = 2;
565 vp->inputs[VERT_ATTRIB_COLOR1] = 3;
566 vp->inputs[VERT_ATTRIB_FOG] = 15;
567 vp->inputs[VERT_ATTRIB_TEX0] = 6;
568 vp->inputs[VERT_ATTRIB_TEX1] = 7;
569 vp->inputs[VERT_ATTRIB_TEX2] = 8;
570 vp->inputs[VERT_ATTRIB_TEX3] = 9;
571 vp->inputs[VERT_ATTRIB_TEX4] = 10;
572 vp->inputs[VERT_ATTRIB_TEX5] = 11;
573 /* attr 4,5 and 13 are only used with generic attribs.
574 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
575 not possibe to use with vertex progs as it is lacking in vert prog specification) */
576
577 assert(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS));
578
579 vp->translated = GL_TRUE;
580
581 o_inst = vp->instr;
582 for(vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
583 if (u_temp_i < mesa_vp->Base.NumTemporaries) {
584 if (R200_DEBUG & DEBUG_FALLBACKS) {
585 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i);
586 }
587 return GL_FALSE;
588 }
589 u_temp_i = R200_VSF_MAX_TEMPS - 1;
590 if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
591 mesa_vp->Base.NumNativeInstructions = 129;
592 if (R200_DEBUG & DEBUG_FALLBACKS) {
593 fprintf(stderr, "more than 128 native instructions\n");
594 }
595 return GL_FALSE;
596 }
597
598 operands = op_operands(vpi->Opcode);
599 are_srcs_scalar = operands & SCALAR_FLAG;
600 operands &= OP_MASK;
601
602 for(i=0; i < operands; i++)
603 src[i] = vpi->SrcReg[i];
604
605 if(operands == 3){ /* TODO: scalars */
606 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
607 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
608 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
609 VSF_FLAG_ALL);
610
611 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
612 SWIZZLE_X, SWIZZLE_Y,
613 SWIZZLE_Z, SWIZZLE_W,
614 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
615
616 o_inst->src1 = ZERO_SRC_0;
617 o_inst->src2 = UNUSED_SRC_1;
618 o_inst++;
619
620 src[2].File = PROGRAM_TEMPORARY;
621 src[2].Index = u_temp_i;
622 src[2].RelAddr = 0;
623 u_temp_i--;
624 }
625 }
626
627 if(operands >= 2){
628 if( CMP_SRCS(src[1], src[0]) ){
629 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
630 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
631 VSF_FLAG_ALL);
632
633 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
634 SWIZZLE_X, SWIZZLE_Y,
635 SWIZZLE_Z, SWIZZLE_W,
636 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
637
638 o_inst->src1 = ZERO_SRC_0;
639 o_inst->src2 = UNUSED_SRC_1;
640 o_inst++;
641
642 src[0].File = PROGRAM_TEMPORARY;
643 src[0].Index = u_temp_i;
644 src[0].RelAddr = 0;
645 u_temp_i--;
646 }
647 }
648
649 /* These ops need special handling. */
650 switch(vpi->Opcode){
651 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just sems to ignore neg offsets
652 which isn't quite correct... */
653 case OPCODE_ARL:
654 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ARL, t_dst(&vpi->DstReg),
655 t_dst_mask(vpi->DstReg.WriteMask));
656 o_inst->src0 = t_src_scalar(vp, &src[0]);
657 o_inst->src1 = UNUSED_SRC_0;
658 o_inst->src2 = UNUSED_SRC_1;
659 goto next;
660
661 case OPCODE_POW:
662 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
663 So may need to insert additional instruction */
664 /* this appears to be different to r300 */
665 if ((src[0].File == src[1].File) &&
666 (src[0].Index == src[1].Index)) {
667 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&vpi->DstReg),
668 t_dst_mask(vpi->DstReg.WriteMask));
669 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
670 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
671 SWIZZLE_ZERO,
672 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
673 SWIZZLE_ZERO,
674 t_src_class(src[0].File),
675 src[0].NegateBase) | (src[0].RelAddr << 4);
676 o_inst->src1 = UNUSED_SRC_0;
677 o_inst->src2 = UNUSED_SRC_0;
678 }
679 else {
680 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
681 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
682 VSF_FLAG_ALL);
683 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
684 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
685 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
686 t_src_class(src[0].File),
687 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
688 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
689 SWIZZLE_ZERO, SWIZZLE_ZERO,
690 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
691 t_src_class(src[1].File),
692 src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
693 o_inst->src2 = UNUSED_SRC_1;
694 o_inst++;
695
696 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&vpi->DstReg),
697 t_dst_mask(vpi->DstReg.WriteMask));
698 o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
699 VSF_IN_COMPONENT_X,
700 VSF_IN_COMPONENT_Y,
701 VSF_IN_COMPONENT_Z,
702 VSF_IN_COMPONENT_W,
703 VSF_IN_CLASS_TMP,
704 VSF_FLAG_NONE);
705 o_inst->src1 = UNUSED_SRC_0;
706 o_inst->src2 = UNUSED_SRC_0;
707 u_temp_i--;
708 }
709 goto next;
710
711 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
712 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
713 t_dst_mask(vpi->DstReg.WriteMask));
714 o_inst->src0 = t_src(vp, &src[0]);
715 o_inst->src1 = ZERO_SRC_0;
716 o_inst->src2 = UNUSED_SRC_1;
717 goto next;
718 case OPCODE_MAD:
719 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
720 src[1].File == PROGRAM_TEMPORARY &&
721 src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
722
723 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&vpi->DstReg),
724 t_dst_mask(vpi->DstReg.WriteMask));
725 o_inst->src0 = t_src(vp, &src[0]);
726 #if 0
727 if ((o_inst - vp->instr) == 31) {
728 /* fix up the broken vertex program of quake4 demo... */
729 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
730 SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
731 t_src_class(src[1].File),
732 src[1].NegateBase) | (src[1].RelAddr << 4);
733 o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
734 SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
735 t_src_class(src[1].File),
736 src[1].NegateBase) | (src[1].RelAddr << 4);
737 }
738 else {
739 o_inst->src1 = t_src(vp, &src[1]);
740 o_inst->src2 = t_src(vp, &src[2]);
741 }
742 #else
743 o_inst->src1 = t_src(vp, &src[1]);
744 o_inst->src2 = t_src(vp, &src[2]);
745 #endif
746 goto next;
747
748 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
749 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&vpi->DstReg),
750 t_dst_mask(vpi->DstReg.WriteMask));
751
752 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
753 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
754 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
755 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
756 SWIZZLE_ZERO,
757 t_src_class(src[0].File),
758 src[0].NegateBase) | (src[0].RelAddr << 4);
759
760 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
761 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
762 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
763 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
764 SWIZZLE_ZERO,
765 t_src_class(src[1].File),
766 src[1].NegateBase) | (src[1].RelAddr << 4);
767
768 o_inst->src2 = UNUSED_SRC_1;
769 goto next;
770
771 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
772 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
773 t_dst_mask(vpi->DstReg.WriteMask));
774
775 o_inst->src0 = t_src(vp, &src[0]);
776 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
777 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
778 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
779 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
780 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
781 t_src_class(src[1].File),
782 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
783 o_inst->src2 = UNUSED_SRC_1;
784 goto next;
785
786 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
787 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&vpi->DstReg),
788 t_dst_mask(vpi->DstReg.WriteMask));
789
790 o_inst->src0=t_src(vp, &src[0]);
791 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
792 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
793 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
794 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
795 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
796 t_src_class(src[0].File),
797 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
798 o_inst->src2 = UNUSED_SRC_1;
799 goto next;
800
801 case OPCODE_FLR:
802 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
803 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
804
805 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
806 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
807 t_dst_mask(vpi->DstReg.WriteMask));
808
809 o_inst->src0 = t_src(vp, &src[0]);
810 o_inst->src1 = UNUSED_SRC_0;
811 o_inst->src2 = UNUSED_SRC_1;
812 o_inst++;
813
814 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
815 t_dst_mask(vpi->DstReg.WriteMask));
816
817 o_inst->src0 = t_src(vp, &src[0]);
818 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
819 VSF_IN_COMPONENT_X,
820 VSF_IN_COMPONENT_Y,
821 VSF_IN_COMPONENT_Z,
822 VSF_IN_COMPONENT_W,
823 VSF_IN_CLASS_TMP,
824 /* Not 100% sure about this */
825 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
826
827 o_inst->src2 = UNUSED_SRC_0;
828 u_temp_i--;
829 goto next;
830
831 case OPCODE_LG2:// LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
832 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_LG2, t_dst(&vpi->DstReg),
833 t_dst_mask(vpi->DstReg.WriteMask));
834
835 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
836 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
837 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
838 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
839 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
840 t_src_class(src[0].File),
841 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
842 o_inst->src1 = UNUSED_SRC_0;
843 o_inst->src2 = UNUSED_SRC_0;
844 goto next;
845
846 case OPCODE_LIT://LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
847 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_LIT, t_dst(&vpi->DstReg),
848 t_dst_mask(vpi->DstReg.WriteMask));
849 /* r200 in contrast to r300 does not seem to need any complicated setup,
850 its LIT instruction is "more native" */
851 o_inst->src0 = t_src(vp, &src[0]);
852 o_inst->src1 = UNUSED_SRC_0;
853 o_inst->src2 = UNUSED_SRC_0;
854 goto next;
855
856 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
857 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&vpi->DstReg),
858 t_dst_mask(vpi->DstReg.WriteMask));
859
860 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
861 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
862 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
863 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
864 VSF_IN_COMPONENT_ONE,
865 t_src_class(src[0].File),
866 src[0].NegateBase) | (src[0].RelAddr << 4);
867 o_inst->src1 = t_src(vp, &src[1]);
868 o_inst->src2 = UNUSED_SRC_1;
869 goto next;
870
871 case OPCODE_XPD:
872 /* mul r0, r1.yzxw, r2.zxyw
873 mad r0, -r2.yzxw, r1.zxyw, r0
874 NOTE: might need MAD_2
875 */
876
877 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
878 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
879 t_dst_mask(vpi->DstReg.WriteMask));
880
881 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
882 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
883 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
884 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
885 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
886 t_src_class(src[0].File),
887 src[0].NegateBase) | (src[0].RelAddr << 4);
888
889 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
890 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
891 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
892 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
893 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
894 t_src_class(src[1].File),
895 src[1].NegateBase) | (src[1].RelAddr << 4);
896
897 o_inst->src2 = UNUSED_SRC_1;
898 o_inst++;
899 u_temp_i--;
900
901 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&vpi->DstReg),
902 t_dst_mask(vpi->DstReg.WriteMask));
903
904 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
905 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
906 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
907 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
908 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
909 t_src_class(src[1].File),
910 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
911
912 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
913 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
914 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
915 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
916 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
917 t_src_class(src[0].File),
918 src[0].NegateBase) | (src[0].RelAddr << 4);
919
920 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
921 VSF_IN_COMPONENT_X,
922 VSF_IN_COMPONENT_Y,
923 VSF_IN_COMPONENT_Z,
924 VSF_IN_COMPONENT_W,
925 VSF_IN_CLASS_TMP,
926 VSF_FLAG_NONE);
927
928 goto next;
929
930 case OPCODE_SWZ:
931 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
932 t_dst_mask(vpi->DstReg.WriteMask));
933 o_inst->src0 = t_src(vp, &src[0]);
934 o_inst->src1 = ZERO_SRC_0;
935 o_inst->src2 = UNUSED_SRC_1;
936 goto next;
937
938 case OPCODE_RCC:
939 if (R200_DEBUG & DEBUG_FALLBACKS) {
940 fprintf(stderr, "Don't know how to handle op %d yet\n", vpi->Opcode);
941 }
942 return GL_FALSE;
943 break;
944 case OPCODE_END:
945 break;
946 default:
947 break;
948 }
949
950 o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&vpi->DstReg),
951 t_dst_mask(vpi->DstReg.WriteMask));
952
953 if(are_srcs_scalar){
954 switch(operands){
955 case 1:
956 o_inst->src0 = t_src_scalar(vp, &src[0]);
957 o_inst->src1 = UNUSED_SRC_0;
958 o_inst->src2 = UNUSED_SRC_1;
959 break;
960
961 case 2:
962 o_inst->src0 = t_src_scalar(vp, &src[0]);
963 o_inst->src1 = t_src_scalar(vp, &src[1]);
964 o_inst->src2 = UNUSED_SRC_1;
965 break;
966
967 case 3:
968 o_inst->src0 = t_src_scalar(vp, &src[0]);
969 o_inst->src1 = t_src_scalar(vp, &src[1]);
970 o_inst->src2 = t_src_scalar(vp, &src[2]);
971 break;
972
973 default:
974 fprintf(stderr, "illegal number of operands %lu\n", operands);
975 exit(-1);
976 break;
977 }
978 } else {
979 switch(operands){
980 case 1:
981 o_inst->src0 = t_src(vp, &src[0]);
982 o_inst->src1 = UNUSED_SRC_0;
983 o_inst->src2 = UNUSED_SRC_1;
984 break;
985
986 case 2:
987 o_inst->src0 = t_src(vp, &src[0]);
988 o_inst->src1 = t_src(vp, &src[1]);
989 o_inst->src2 = UNUSED_SRC_1;
990 break;
991
992 case 3:
993 o_inst->src0 = t_src(vp, &src[0]);
994 o_inst->src1 = t_src(vp, &src[1]);
995 o_inst->src2 = t_src(vp, &src[2]);
996 break;
997
998 default:
999 fprintf(stderr, "illegal number of operands %lu\n", operands);
1000 exit(-1);
1001 break;
1002 }
1003 }
1004 next:
1005 if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1006 vp->pos_end = (o_inst - vp->instr);
1007 }
1008 }
1009
1010 /* need to test again since some instructions require more than one (up to 3) native inst */
1011 if(o_inst - vp->instr > R200_VSF_MAX_INST) {
1012 mesa_vp->Base.NumNativeInstructions = 129;
1013 if (R200_DEBUG & DEBUG_FALLBACKS) {
1014 fprintf(stderr, "more than 128 native instructions\n");
1015 }
1016 return GL_FALSE;
1017 }
1018 vp->native = GL_TRUE;
1019 mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
1020 #if 0
1021 fprintf(stderr, "hw program:\n");
1022 for(i=0; i < vp->program.length; i++)
1023 fprintf(stderr, "%08x\n", vp->instr[i]);
1024 #endif
1025 return GL_TRUE;
1026 }
1027
1028 void r200SetupVertexProg( GLcontext *ctx ) {
1029 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1030 struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1031 GLboolean fallback;
1032 GLint i;
1033
1034 if (!vp->translated) {
1035 rmesa->curr_vp_hw = NULL;
1036 r200_translate_vertex_program(vp);
1037 }
1038 /* could optimize setting up vertex progs away for non-tcl hw */
1039 fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
1040 rmesa->r200Screen->drmSupportsVertexProgram);
1041 TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1042 if (fallback) return;
1043
1044 R200_STATECHANGE( rmesa, pvs );
1045
1046 rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1047 ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1048 (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1049 rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1050 (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1051
1052 /* maybe user clip planes just work with vertex progs... untested */
1053 if (ctx->Transform.ClipPlanesEnabled) {
1054 R200_STATECHANGE( rmesa, tcl );
1055 if (vp->mesa_program.IsPositionInvariant) {
1056 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1057 }
1058 else {
1059 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1060 }
1061 }
1062
1063 if (vp != rmesa->curr_vp_hw) {
1064 GLuint count = vp->mesa_program.Base.NumNativeInstructions;
1065 drm_radeon_cmd_header_t tmp;
1066
1067 R200_STATECHANGE( rmesa, vpi[0] );
1068 R200_STATECHANGE( rmesa, vpi[1] );
1069
1070 /* FIXME: what about using a memcopy... */
1071 for (i = 0; (i < 64) && i < count; i++) {
1072 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1073 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1074 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1075 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1076 }
1077 /* hack up the cmd_size so not the whole state atom is emitted always.
1078 This may require some more thought, we may emit half progs on lost state, but
1079 hopefully it won't matter?
1080 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1081 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1082 rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1083 tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1084 tmp.veclinear.count = (count > 64) ? 64 : count;
1085 rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1086 if (count > 64) {
1087 for (i = 0; i < (count - 64); i++) {
1088 rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1089 rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1090 rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1091 rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1092 }
1093 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1094 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1095 tmp.veclinear.count = count - 64;
1096 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1097 }
1098 rmesa->curr_vp_hw = vp;
1099 }
1100 }
1101
1102
1103 static void r200BindProgram(GLcontext *ctx, GLenum target, struct program *prog)
1104 {
1105 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1106
1107 switch(target){
1108 case GL_VERTEX_PROGRAM_ARB:
1109 rmesa->curr_vp_hw = NULL;
1110 break;
1111 default:
1112 _mesa_problem(ctx, "Target not supported yet!");
1113 break;
1114 }
1115 }
1116
1117 static struct program *r200NewProgram(GLcontext *ctx, GLenum target, GLuint id)
1118 {
1119 struct r200_vertex_program *vp;
1120
1121 switch(target){
1122 case GL_VERTEX_PROGRAM_ARB:
1123 vp = CALLOC_STRUCT(r200_vertex_program);
1124 return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
1125 case GL_FRAGMENT_PROGRAM_ARB:
1126 case GL_FRAGMENT_PROGRAM_NV:
1127 return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(fragment_program), target, id );
1128 default:
1129 _mesa_problem(ctx, "Bad target in r200NewProgram");
1130 }
1131 return NULL;
1132 }
1133
1134
1135 static void r200DeleteProgram(GLcontext *ctx, struct program *prog)
1136 {
1137 _mesa_delete_program(ctx, prog);
1138 }
1139
1140 static void r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct program *prog)
1141 {
1142 struct r200_vertex_program *vp = (void *)prog;
1143
1144 switch(target) {
1145 case GL_VERTEX_PROGRAM_ARB:
1146 vp->translated = GL_FALSE;
1147 memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct vertex_program));
1148 /*r200_translate_vertex_shader(vp);*/
1149 break;
1150 }
1151 /* need this for tcl fallbacks */
1152 _tnl_program_string(ctx, target, prog);
1153 }
1154
1155 static GLboolean r200IsProgramNative(GLcontext *ctx, GLenum target, struct program *prog)
1156 {
1157 struct r200_vertex_program *vp = (void *)prog;
1158
1159 switch(target){
1160 case GL_VERTEX_STATE_PROGRAM_NV:
1161 case GL_VERTEX_PROGRAM_ARB:
1162 if (!vp->translated) {
1163 r200_translate_vertex_program(vp);
1164 }
1165 /* does not take parameters etc. into account */
1166 return vp->native;
1167 default:
1168 _mesa_problem(ctx, "Bad target in r200NewProgram");
1169 }
1170 return 0;
1171 }
1172
1173 void r200InitShaderFuncs(struct dd_function_table *functions)
1174 {
1175 functions->NewProgram = r200NewProgram;
1176 functions->BindProgram = r200BindProgram;
1177 functions->DeleteProgram = r200DeleteProgram;
1178 functions->ProgramStringNotify = r200ProgramStringNotify;
1179 functions->IsProgramNative = r200IsProgramNative;
1180 }