1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "shader/program.h"
37 #include "shader/prog_instruction.h"
38 #include "shader/prog_parameter.h"
39 #include "shader/prog_statevars.h"
40 #include "shader/programopt.h"
43 #include "r200_context.h"
44 #include "r200_vertprog.h"
45 #include "r200_ioctl.h"
48 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
49 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
50 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
51 SWIZZLE_W != VSF_IN_COMPONENT_W || \
52 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
53 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
54 WRITEMASK_X != VSF_FLAG_X || \
55 WRITEMASK_Y != VSF_FLAG_Y || \
56 WRITEMASK_Z != VSF_FLAG_Z || \
57 WRITEMASK_W != VSF_FLAG_W
58 #error Cannot change these!
61 #define SCALAR_FLAG (1<<31)
62 #define FLAG_MASK (1<<31)
63 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
64 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
69 unsigned long ip
; /* number of input operands and flags */
73 OPN(ARL
, 1|SCALAR_FLAG
),
78 OPN(EX2
, 1|SCALAR_FLAG
),
79 OPN(EXP
, 1|SCALAR_FLAG
),
82 OPN(LG2
, 1|SCALAR_FLAG
),
84 OPN(LOG
, 1|SCALAR_FLAG
),
90 OPN(POW
, 2|SCALAR_FLAG
),
91 OPN(RCP
, 1|SCALAR_FLAG
),
92 OPN(RSQ
, 1|SCALAR_FLAG
),
103 static GLboolean
r200VertexProgUpdateParams(GLcontext
*ctx
, struct r200_vertex_program
*vp
)
105 r200ContextPtr rmesa
= R200_CONTEXT( ctx
);
106 GLfloat
*fcmd
= (GLfloat
*)&rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
+ 1];
108 struct gl_vertex_program
*mesa_vp
= &vp
->mesa_program
;
109 struct gl_program_parameter_list
*paramList
;
110 drm_radeon_cmd_header_t tmp
;
112 R200_STATECHANGE( rmesa
, vpp
[0] );
113 R200_STATECHANGE( rmesa
, vpp
[1] );
114 assert(mesa_vp
->Base
.Parameters
);
115 _mesa_load_state_parameters(ctx
, mesa_vp
->Base
.Parameters
);
116 paramList
= mesa_vp
->Base
.Parameters
;
118 if(paramList
->NumParameters
> R200_VSF_MAX_PARAM
){
119 fprintf(stderr
, "%s:Params exhausted\n", __FUNCTION__
);
123 for(pi
= 0; pi
< paramList
->NumParameters
; pi
++) {
124 switch(paramList
->Parameters
[pi
].Type
) {
125 case PROGRAM_STATE_VAR
:
126 case PROGRAM_NAMED_PARAM
:
127 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
128 case PROGRAM_CONSTANT
:
129 *fcmd
++ = paramList
->ParameterValues
[pi
][0];
130 *fcmd
++ = paramList
->ParameterValues
[pi
][1];
131 *fcmd
++ = paramList
->ParameterValues
[pi
][2];
132 *fcmd
++ = paramList
->ParameterValues
[pi
][3];
135 _mesa_problem(NULL
, "Bad param type in %s", __FUNCTION__
);
139 fcmd
= (GLfloat
*)&rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
+ 1];
142 /* hack up the cmd_size so not the whole state atom is emitted always. */
143 rmesa
->hw
.vpp
[0].cmd_size
=
144 1 + 4 * ((paramList
->NumParameters
> 96) ? 96 : paramList
->NumParameters
);
145 tmp
.i
= rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
];
146 tmp
.veclinear
.count
= (paramList
->NumParameters
> 96) ? 96 : paramList
->NumParameters
;
147 rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
] = tmp
.i
;
148 if (paramList
->NumParameters
> 96) {
149 rmesa
->hw
.vpp
[1].cmd_size
= 1 + 4 * (paramList
->NumParameters
- 96);
150 tmp
.i
= rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
];
151 tmp
.veclinear
.count
= paramList
->NumParameters
- 96;
152 rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
] = tmp
.i
;
157 static __inline
unsigned long t_dst_mask(GLuint mask
)
159 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
160 return mask
& VSF_FLAG_ALL
;
163 static unsigned long t_dst(struct prog_dst_register
*dst
)
166 case PROGRAM_TEMPORARY
:
167 return ((dst
->Index
<< R200_VPI_OUT_REG_INDEX_SHIFT
)
168 | R200_VSF_OUT_CLASS_TMP
);
170 switch (dst
->Index
) {
171 case VERT_RESULT_HPOS
:
172 return R200_VSF_OUT_CLASS_RESULT_POS
;
173 case VERT_RESULT_COL0
:
174 return R200_VSF_OUT_CLASS_RESULT_COLOR
;
175 case VERT_RESULT_COL1
:
176 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT
)
177 | R200_VSF_OUT_CLASS_RESULT_COLOR
);
178 case VERT_RESULT_FOGC
:
179 return R200_VSF_OUT_CLASS_RESULT_FOGC
;
180 case VERT_RESULT_TEX0
:
181 case VERT_RESULT_TEX1
:
182 case VERT_RESULT_TEX2
:
183 case VERT_RESULT_TEX3
:
184 case VERT_RESULT_TEX4
:
185 case VERT_RESULT_TEX5
:
186 return (((dst
->Index
- VERT_RESULT_TEX0
) << R200_VPI_OUT_REG_INDEX_SHIFT
)
187 | R200_VSF_OUT_CLASS_RESULT_TEXC
);
188 case VERT_RESULT_PSIZ
:
189 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE
;
191 fprintf(stderr
, "problem in %s, unknown dst output reg %d\n", __FUNCTION__
, dst
->Index
);
195 case PROGRAM_ADDRESS
:
196 assert (dst
->Index
== 0);
197 return R200_VSF_OUT_CLASS_ADDR
;
199 fprintf(stderr
, "problem in %s, unknown register type %d\n", __FUNCTION__
, dst
->File
);
205 static unsigned long t_src_class(enum register_file file
)
209 case PROGRAM_TEMPORARY
:
210 return VSF_IN_CLASS_TMP
;
213 return VSF_IN_CLASS_ATTR
;
215 case PROGRAM_LOCAL_PARAM
:
216 case PROGRAM_ENV_PARAM
:
217 case PROGRAM_NAMED_PARAM
:
218 case PROGRAM_STATE_VAR
:
219 return VSF_IN_CLASS_PARAM
;
222 case PROGRAM_WRITE_ONLY:
223 case PROGRAM_ADDRESS:
226 fprintf(stderr
, "problem in %s", __FUNCTION__
);
231 static __inline
unsigned long t_swizzle(GLubyte swizzle
)
233 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
238 static void vp_dump_inputs(struct r200_vertex_program
*vp
, char *caller
)
243 fprintf(stderr
, "vp null in call to %s from %s\n", __FUNCTION__
, caller
);
247 fprintf(stderr
, "%s:<", caller
);
248 for(i
=0; i
< VERT_ATTRIB_MAX
; i
++)
249 fprintf(stderr
, "%d ", vp
->inputs
[i
]);
250 fprintf(stderr
, ">\n");
255 static unsigned long t_src_index(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
261 if(src
->File
== PROGRAM_INPUT
){
262 /* if(vp->inputs[src->Index] != -1)
263 return vp->inputs[src->Index];
265 for(i=0; i < VERT_ATTRIB_MAX; i++)
266 if(vp->inputs[i] > max_reg)
267 max_reg = vp->inputs[i];
269 vp->inputs[src->Index] = max_reg+1;*/
271 //vp_dump_inputs(vp, __FUNCTION__);
272 assert(vp
->inputs
[src
->Index
] != -1);
273 return vp
->inputs
[src
->Index
];
275 if (src
->Index
< 0) {
276 fprintf(stderr
, "WARNING negative offsets for indirect addressing do not work\n");
283 static unsigned long t_src(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
286 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
287 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
288 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
289 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
290 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
291 t_src_class(src
->File
),
292 src
->NegateBase
) | (src
->RelAddr
<< 4);
295 static unsigned long t_src_scalar(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
298 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
299 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
300 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
301 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
302 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
303 t_src_class(src
->File
),
304 src
->NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
->RelAddr
<< 4);
307 static unsigned long t_opcode(enum prog_opcode opcode
)
311 case OPCODE_ADD
: return R200_VPI_OUT_OP_ADD
;
312 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
313 * seems to ignore neg offsets which isn't quite correct...
315 case OPCODE_ARL
: return R200_VPI_OUT_OP_ARL
;
316 case OPCODE_DP4
: return R200_VPI_OUT_OP_DOT
;
317 case OPCODE_DST
: return R200_VPI_OUT_OP_DST
;
318 case OPCODE_EX2
: return R200_VPI_OUT_OP_EX2
;
319 case OPCODE_EXP
: return R200_VPI_OUT_OP_EXP
;
320 case OPCODE_FRC
: return R200_VPI_OUT_OP_FRC
;
321 case OPCODE_LG2
: return R200_VPI_OUT_OP_LG2
;
322 case OPCODE_LIT
: return R200_VPI_OUT_OP_LIT
;
323 case OPCODE_LOG
: return R200_VPI_OUT_OP_LOG
;
324 case OPCODE_MAX
: return R200_VPI_OUT_OP_MAX
;
325 case OPCODE_MIN
: return R200_VPI_OUT_OP_MIN
;
326 case OPCODE_MUL
: return R200_VPI_OUT_OP_MUL
;
327 case OPCODE_RCP
: return R200_VPI_OUT_OP_RCP
;
328 case OPCODE_RSQ
: return R200_VPI_OUT_OP_RSQ
;
329 case OPCODE_SGE
: return R200_VPI_OUT_OP_SGE
;
330 case OPCODE_SLT
: return R200_VPI_OUT_OP_SLT
;
333 fprintf(stderr
, "%s: Should not be called with opcode %d!", __FUNCTION__
, opcode
);
339 static unsigned long op_operands(enum prog_opcode opcode
)
343 /* Can we trust mesas opcodes to be in order ? */
344 for(i
=0; i
< sizeof(op_names
) / sizeof(*op_names
); i
++)
345 if(op_names
[i
].opcode
== opcode
)
346 return op_names
[i
].ip
;
348 fprintf(stderr
, "op %d not found in op_names\n", opcode
);
353 /* TODO: Get rid of t_src_class call */
354 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
355 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
356 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
357 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
358 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
360 /* fglrx on rv250 codes up unused sources as follows:
361 unused but necessary sources are same as previous source, zero-ed out.
362 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
363 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
364 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
366 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
367 Those are NOT semantically equivalent to the r300 ones, requires code changes */
368 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
369 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
370 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
371 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
372 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
374 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
375 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
376 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
377 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
378 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
380 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
381 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
382 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
383 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
384 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
386 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
388 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
390 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
394 * Generate an R200 vertex program from Mesa's internal representation.
396 * \return GL_TRUE for success, GL_FALSE for failure.
398 static GLboolean
r200_translate_vertex_program(GLcontext
*ctx
, struct r200_vertex_program
*vp
)
400 struct gl_vertex_program
*mesa_vp
= &vp
->mesa_program
;
401 struct prog_instruction
*vpi
;
403 VERTEX_SHADER_INSTRUCTION
*o_inst
;
404 unsigned long operands
;
412 vp
->native
= GL_FALSE
;
413 vp
->translated
= GL_TRUE
;
414 vp
->fogmode
= ctx
->Fog
.Mode
;
416 if (mesa_vp
->Base
.NumInstructions
== 0)
420 if ((mesa_vp
->Base
.InputsRead
&
421 ~(VERT_BIT_POS
| VERT_BIT_NORMAL
| VERT_BIT_COLOR0
| VERT_BIT_COLOR1
|
422 VERT_BIT_FOG
| VERT_BIT_TEX0
| VERT_BIT_TEX1
| VERT_BIT_TEX2
|
423 VERT_BIT_TEX3
| VERT_BIT_TEX4
| VERT_BIT_TEX5
)) != 0) {
424 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
425 fprintf(stderr
, "can't handle vert prog inputs 0x%x\n",
426 mesa_vp
->Base
.InputsRead
);
432 if ((mesa_vp
->Base
.OutputsWritten
&
433 ~((1 << VERT_RESULT_HPOS
) | (1 << VERT_RESULT_COL0
) | (1 << VERT_RESULT_COL1
) |
434 (1 << VERT_RESULT_FOGC
) | (1 << VERT_RESULT_TEX0
) | (1 << VERT_RESULT_TEX1
) |
435 (1 << VERT_RESULT_TEX2
) | (1 << VERT_RESULT_TEX3
) | (1 << VERT_RESULT_TEX4
) |
436 (1 << VERT_RESULT_TEX5
) | (1 << VERT_RESULT_PSIZ
))) != 0) {
437 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
438 fprintf(stderr
, "can't handle vert prog outputs 0x%x\n",
439 mesa_vp
->Base
.OutputsWritten
);
444 if (mesa_vp
->IsNVProgram
) {
445 /* subtle differences in spec like guaranteed initialized regs could cause
446 headaches. Might want to remove the driconf option to enable it completely */
449 /* Initial value should be last tmp reg that hw supports.
450 Strangely enough r300 doesnt mind even though these would be out of range.
451 Smart enough to realize that it doesnt need it? */
452 int u_temp_i
= R200_VSF_MAX_TEMPS
- 1;
453 struct prog_src_register src
[3];
454 struct prog_dst_register dst
;
456 /* FIXME: is changing the prog safe to do here? */
457 if (mesa_vp
->IsPositionInvariant
&&
458 /* make sure we only do this once */
459 !(mesa_vp
->Base
.OutputsWritten
& (1 << VERT_RESULT_HPOS
))) {
460 _mesa_insert_mvp_code(ctx
, mesa_vp
);
463 /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
464 base e isn't directly available neither. */
465 if ((mesa_vp
->Base
.OutputsWritten
& (1 << VERT_RESULT_FOGC
)) && !vp
->fogpidx
) {
466 struct gl_program_parameter_list
*paramList
;
467 gl_state_index tokens
[STATE_LENGTH
] = { STATE_FOG_PARAMS
, 0, 0, 0, 0 };
468 paramList
= mesa_vp
->Base
.Parameters
;
469 vp
->fogpidx
= _mesa_add_state_reference(paramList
, tokens
);
473 mesa_vp
->Base
.NumNativeInstructions
= 0;
474 if (mesa_vp
->Base
.Parameters
)
475 mesa_vp
->Base
.NumNativeParameters
= mesa_vp
->Base
.Parameters
->NumParameters
;
477 mesa_vp
->Base
.NumNativeParameters
= 0;
479 for(i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
481 for(i
= 0; i
< 15; i
++)
482 vp
->inputmap_rev
[i
] = 255;
483 free_inputs
= 0x2ffd;
485 /* fglrx uses fixed inputs as follows for conventional attribs.
486 generic attribs use non-fixed assignment, fglrx will always use the
487 lowest attrib values available. We'll just do the same.
488 There are 12 generic attribs possible, corresponding to attrib 0, 2-11
489 and 13 in a hw vertex prog.
490 attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
491 (correspond to vertex normal/weight - maybe weight actually could be made vec4).
492 Additionally, not more than 12 arrays in total are possible I think.
493 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
494 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
495 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
496 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
499 /* attr 4,5 and 13 are only used with generic attribs.
500 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
501 not possibe to use with vertex progs as it is lacking in vert prog specification) */
502 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
503 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_POS
) {
504 vp
->inputs
[VERT_ATTRIB_POS
] = 0;
505 vp
->inputmap_rev
[0] = VERT_ATTRIB_POS
;
506 free_inputs
&= ~(1 << 0);
509 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_WEIGHT
) {
510 vp
->inputs
[VERT_ATTRIB_WEIGHT
] = 12;
511 vp
->inputmap_rev
[1] = VERT_ATTRIB_WEIGHT
;
514 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_NORMAL
) {
515 vp
->inputs
[VERT_ATTRIB_NORMAL
] = 1;
516 vp
->inputmap_rev
[2] = VERT_ATTRIB_NORMAL
;
519 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_COLOR0
) {
520 vp
->inputs
[VERT_ATTRIB_COLOR0
] = 2;
521 vp
->inputmap_rev
[4] = VERT_ATTRIB_COLOR0
;
522 free_inputs
&= ~(1 << 2);
525 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_COLOR1
) {
526 vp
->inputs
[VERT_ATTRIB_COLOR1
] = 3;
527 vp
->inputmap_rev
[5] = VERT_ATTRIB_COLOR1
;
528 free_inputs
&= ~(1 << 3);
531 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_FOG
) {
532 vp
->inputs
[VERT_ATTRIB_FOG
] = 15; array_count
++;
533 vp
->inputmap_rev
[3] = VERT_ATTRIB_FOG
;
536 for (i
= VERT_ATTRIB_TEX0
; i
<= VERT_ATTRIB_TEX5
; i
++) {
537 if (mesa_vp
->Base
.InputsRead
& (1 << i
)) {
538 vp
->inputs
[i
] = i
- VERT_ATTRIB_TEX0
+ 6;
539 vp
->inputmap_rev
[8 + i
- VERT_ATTRIB_TEX0
] = i
;
540 free_inputs
&= ~(1 << (i
- VERT_ATTRIB_TEX0
+ 6));
544 /* using VERT_ATTRIB_TEX6/7 would be illegal */
545 /* completely ignore aliasing? */
546 for (i
= VERT_ATTRIB_GENERIC0
; i
< VERT_ATTRIB_MAX
; i
++) {
548 /* completely ignore aliasing? */
549 if (mesa_vp
->Base
.InputsRead
& (1 << i
)) {
551 if (array_count
> 12) {
552 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
553 fprintf(stderr
, "more than 12 attribs used in vert prog\n");
557 for (j
= 0; j
< 14; j
++) {
558 /* will always find one due to limited array_count */
559 if (free_inputs
& (1 << j
)) {
560 free_inputs
&= ~(1 << j
);
562 if (j
== 0) vp
->inputmap_rev
[j
] = i
; /* mapped to pos */
563 else if (j
< 12) vp
->inputmap_rev
[j
+ 2] = i
; /* mapped to col/tex */
564 else vp
->inputmap_rev
[j
+ 1] = i
; /* mapped to pos1 */
571 if (!(mesa_vp
->Base
.OutputsWritten
& (1 << VERT_RESULT_HPOS
))) {
572 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
573 fprintf(stderr
, "can't handle vert prog without position output\n");
577 if (free_inputs
& 1) {
578 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
579 fprintf(stderr
, "can't handle vert prog without position input\n");
585 for (vpi
= mesa_vp
->Base
.Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++, o_inst
++){
586 operands
= op_operands(vpi
->Opcode
);
587 are_srcs_scalar
= operands
& SCALAR_FLAG
;
590 for(i
= 0; i
< operands
; i
++) {
591 src
[i
] = vpi
->SrcReg
[i
];
592 /* hack up default attrib values as per spec as swizzling.
593 normal, fog, secondary color. Crazy?
594 May need more if we don't submit vec4 elements? */
595 if (src
[i
].File
== PROGRAM_INPUT
) {
596 if (src
[i
].Index
== VERT_ATTRIB_NORMAL
) {
598 for (j
= 0; j
< 4; j
++) {
599 if (GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_W
) {
600 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
601 src
[i
].Swizzle
|= SWIZZLE_ONE
<< (j
*3);
605 else if (src
[i
].Index
== VERT_ATTRIB_COLOR1
) {
607 for (j
= 0; j
< 4; j
++) {
608 if (GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_W
) {
609 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
610 src
[i
].Swizzle
|= SWIZZLE_ZERO
<< (j
*3);
614 else if (src
[i
].Index
== VERT_ATTRIB_FOG
) {
616 for (j
= 0; j
< 4; j
++) {
617 if (GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_W
) {
618 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
619 src
[i
].Swizzle
|= SWIZZLE_ONE
<< (j
*3);
621 else if ((GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_Y
) ||
622 GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_Z
) {
623 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
624 src
[i
].Swizzle
|= SWIZZLE_ZERO
<< (j
*3);
632 if( CMP_SRCS(src
[1], src
[2]) || CMP_SRCS(src
[0], src
[2]) ){
633 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
634 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
637 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[2]),
638 SWIZZLE_X
, SWIZZLE_Y
,
639 SWIZZLE_Z
, SWIZZLE_W
,
640 t_src_class(src
[2].File
), VSF_FLAG_NONE
) | (src
[2].RelAddr
<< 4);
642 o_inst
->src1
= ZERO_SRC_0
;
643 o_inst
->src2
= UNUSED_SRC_1
;
646 src
[2].File
= PROGRAM_TEMPORARY
;
647 src
[2].Index
= u_temp_i
;
654 if( CMP_SRCS(src
[1], src
[0]) ){
655 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
656 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
659 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
660 SWIZZLE_X
, SWIZZLE_Y
,
661 SWIZZLE_Z
, SWIZZLE_W
,
662 t_src_class(src
[0].File
), VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
664 o_inst
->src1
= ZERO_SRC_0
;
665 o_inst
->src2
= UNUSED_SRC_1
;
668 src
[0].File
= PROGRAM_TEMPORARY
;
669 src
[0].Index
= u_temp_i
;
676 if (dst
.File
== PROGRAM_OUTPUT
&&
677 dst
.Index
== VERT_RESULT_FOGC
&&
678 dst
.WriteMask
& WRITEMASK_X
) {
679 fog_temp_i
= u_temp_i
;
680 dst
.File
= PROGRAM_TEMPORARY
;
681 dst
.Index
= fog_temp_i
;
686 /* These ops need special handling. */
689 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
690 So may need to insert additional instruction */
691 if ((src
[0].File
== src
[1].File
) &&
692 (src
[0].Index
== src
[1].Index
)) {
693 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_POW
, t_dst(&dst
),
694 t_dst_mask(dst
.WriteMask
));
695 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
696 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
698 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
700 t_src_class(src
[0].File
),
701 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
702 o_inst
->src1
= UNUSED_SRC_0
;
703 o_inst
->src2
= UNUSED_SRC_0
;
706 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
707 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
709 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
710 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
711 SWIZZLE_ZERO
, SWIZZLE_ZERO
, SWIZZLE_ZERO
,
712 t_src_class(src
[0].File
),
713 src
[0].NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
714 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
715 SWIZZLE_ZERO
, SWIZZLE_ZERO
,
716 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), SWIZZLE_ZERO
,
717 t_src_class(src
[1].File
),
718 src
[1].NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
719 o_inst
->src2
= UNUSED_SRC_1
;
722 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_POW
, t_dst(&dst
),
723 t_dst_mask(dst
.WriteMask
));
724 o_inst
->src0
= MAKE_VSF_SOURCE(u_temp_i
,
731 o_inst
->src1
= UNUSED_SRC_0
;
732 o_inst
->src2
= UNUSED_SRC_0
;
737 case OPCODE_MOV
://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
739 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&dst
),
740 t_dst_mask(dst
.WriteMask
));
741 o_inst
->src0
= t_src(vp
, &src
[0]);
742 o_inst
->src1
= ZERO_SRC_0
;
743 o_inst
->src2
= UNUSED_SRC_1
;
747 hw_op
=(src
[0].File
== PROGRAM_TEMPORARY
&&
748 src
[1].File
== PROGRAM_TEMPORARY
&&
749 src
[2].File
== PROGRAM_TEMPORARY
) ? R200_VPI_OUT_OP_MAD_2
: R200_VPI_OUT_OP_MAD
;
751 o_inst
->op
= MAKE_VSF_OP(hw_op
, t_dst(&dst
),
752 t_dst_mask(dst
.WriteMask
));
753 o_inst
->src0
= t_src(vp
, &src
[0]);
755 if ((o_inst
- vp
->instr
) == 31) {
756 /* fix up the broken vertex program of quake4 demo... */
757 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
758 SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
,
759 t_src_class(src
[1].File
),
760 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
761 o_inst
->src2
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
762 SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
,
763 t_src_class(src
[1].File
),
764 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
767 o_inst
->src1
= t_src(vp
, &src
[1]);
768 o_inst
->src2
= t_src(vp
, &src
[2]);
771 o_inst
->src1
= t_src(vp
, &src
[1]);
772 o_inst
->src2
= t_src(vp
, &src
[2]);
776 case OPCODE_DP3
://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
777 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_DOT
, t_dst(&dst
),
778 t_dst_mask(dst
.WriteMask
));
780 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
781 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
782 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
783 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
785 t_src_class(src
[0].File
),
786 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
788 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
789 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
790 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
791 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
793 t_src_class(src
[1].File
),
794 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
796 o_inst
->src2
= UNUSED_SRC_1
;
799 case OPCODE_DPH
://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
800 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_DOT
, t_dst(&dst
),
801 t_dst_mask(dst
.WriteMask
));
803 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
804 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
805 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
806 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
807 VSF_IN_COMPONENT_ONE
,
808 t_src_class(src
[0].File
),
809 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
810 o_inst
->src1
= t_src(vp
, &src
[1]);
811 o_inst
->src2
= UNUSED_SRC_1
;
814 case OPCODE_SUB
://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
815 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&dst
),
816 t_dst_mask(dst
.WriteMask
));
818 o_inst
->src0
= t_src(vp
, &src
[0]);
819 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
820 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
821 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
822 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
823 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
824 t_src_class(src
[1].File
),
825 (!src
[1].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
826 o_inst
->src2
= UNUSED_SRC_1
;
829 case OPCODE_ABS
://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
830 o_inst
->op
=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX
, t_dst(&dst
),
831 t_dst_mask(dst
.WriteMask
));
833 o_inst
->src0
=t_src(vp
, &src
[0]);
834 o_inst
->src1
=MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
835 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
836 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
837 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
838 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)),
839 t_src_class(src
[0].File
),
840 (!src
[0].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
841 o_inst
->src2
= UNUSED_SRC_1
;
845 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
846 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
848 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_FRC
,
849 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
850 t_dst_mask(dst
.WriteMask
));
852 o_inst
->src0
= t_src(vp
, &src
[0]);
853 o_inst
->src1
= UNUSED_SRC_0
;
854 o_inst
->src2
= UNUSED_SRC_1
;
857 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&dst
),
858 t_dst_mask(dst
.WriteMask
));
860 o_inst
->src0
= t_src(vp
, &src
[0]);
861 o_inst
->src1
= MAKE_VSF_SOURCE(u_temp_i
,
867 /* Not 100% sure about this */
868 (!src
[0].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
/*VSF_FLAG_ALL*/);
870 o_inst
->src2
= UNUSED_SRC_0
;
875 /* mul r0, r1.yzxw, r2.zxyw
876 mad r0, -r2.yzxw, r1.zxyw, r0
877 NOTE: might need MAD_2
880 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
881 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
882 t_dst_mask(dst
.WriteMask
));
884 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
885 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
886 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
887 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
888 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
889 t_src_class(src
[0].File
),
890 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
892 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
893 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
894 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
895 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
896 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
897 t_src_class(src
[1].File
),
898 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
900 o_inst
->src2
= UNUSED_SRC_1
;
904 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MAD
, t_dst(&dst
),
905 t_dst_mask(dst
.WriteMask
));
907 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
908 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
909 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
910 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
911 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
912 t_src_class(src
[1].File
),
913 (!src
[1].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
915 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
916 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
917 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
918 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
919 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
920 t_src_class(src
[0].File
),
921 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
923 o_inst
->src2
= MAKE_VSF_SOURCE(u_temp_i
+1,
938 o_inst
->op
= MAKE_VSF_OP(t_opcode(vpi
->Opcode
), t_dst(&dst
),
939 t_dst_mask(dst
.WriteMask
));
944 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
945 o_inst
->src1
= UNUSED_SRC_0
;
946 o_inst
->src2
= UNUSED_SRC_1
;
950 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
951 o_inst
->src1
= t_src_scalar(vp
, &src
[1]);
952 o_inst
->src2
= UNUSED_SRC_1
;
956 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
957 o_inst
->src1
= t_src_scalar(vp
, &src
[1]);
958 o_inst
->src2
= t_src_scalar(vp
, &src
[2]);
962 fprintf(stderr
, "illegal number of operands %lu\n", operands
);
969 o_inst
->src0
= t_src(vp
, &src
[0]);
970 o_inst
->src1
= UNUSED_SRC_0
;
971 o_inst
->src2
= UNUSED_SRC_1
;
975 o_inst
->src0
= t_src(vp
, &src
[0]);
976 o_inst
->src1
= t_src(vp
, &src
[1]);
977 o_inst
->src2
= UNUSED_SRC_1
;
981 o_inst
->src0
= t_src(vp
, &src
[0]);
982 o_inst
->src1
= t_src(vp
, &src
[1]);
983 o_inst
->src2
= t_src(vp
, &src
[2]);
987 fprintf(stderr
, "illegal number of operands %lu\n", operands
);
996 if (vp
->fogmode
== GL_EXP
) {
997 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
998 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1000 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1001 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, X
, X
, X
, X
, PARAM
, NONE
);
1002 o_inst
->src2
= UNUSED_SRC_1
;
1004 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E
,
1005 R200_VSF_OUT_CLASS_RESULT_FOGC
,
1007 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, ALL
);
1008 o_inst
->src1
= UNUSED_SRC_0
;
1009 o_inst
->src2
= UNUSED_SRC_1
;
1011 else if (vp
->fogmode
== GL_EXP2
) {
1012 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
1013 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1015 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1016 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, X
, X
, X
, X
, PARAM
, NONE
);
1017 o_inst
->src2
= UNUSED_SRC_1
;
1019 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
1020 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1022 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1023 o_inst
->src1
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1024 o_inst
->src2
= UNUSED_SRC_1
;
1026 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E
,
1027 R200_VSF_OUT_CLASS_RESULT_FOGC
,
1029 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, ALL
);
1030 o_inst
->src1
= UNUSED_SRC_0
;
1031 o_inst
->src2
= UNUSED_SRC_1
;
1033 else { /* fogmode == GL_LINEAR */
1034 /* could do that with single op (dot) if using params like
1035 with fixed function pipeline fog */
1036 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
1037 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1039 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, ALL
);
1040 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, Z
, Z
, Z
, Z
, PARAM
, NONE
);
1041 o_inst
->src2
= UNUSED_SRC_1
;
1043 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
1044 R200_VSF_OUT_CLASS_RESULT_FOGC
,
1046 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1047 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, W
, W
, W
, W
, PARAM
, NONE
);
1048 o_inst
->src2
= UNUSED_SRC_1
;
1054 if (mesa_vp
->Base
.NumNativeTemporaries
<
1055 (mesa_vp
->Base
.NumTemporaries
+ (R200_VSF_MAX_TEMPS
- 1 - u_temp_i
))) {
1056 mesa_vp
->Base
.NumNativeTemporaries
=
1057 mesa_vp
->Base
.NumTemporaries
+ (R200_VSF_MAX_TEMPS
- 1 - u_temp_i
);
1059 if (u_temp_i
< mesa_vp
->Base
.NumTemporaries
) {
1060 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
1061 fprintf(stderr
, "Ran out of temps, num temps %d, us %d\n", mesa_vp
->Base
.NumTemporaries
, u_temp_i
);
1065 u_temp_i
= R200_VSF_MAX_TEMPS
- 1;
1066 if(o_inst
- vp
->instr
>= R200_VSF_MAX_INST
) {
1067 mesa_vp
->Base
.NumNativeInstructions
= 129;
1068 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
1069 fprintf(stderr
, "more than 128 native instructions\n");
1073 if ((o_inst
->op
& R200_VSF_OUT_CLASS_MASK
) == R200_VSF_OUT_CLASS_RESULT_POS
) {
1074 vp
->pos_end
= (o_inst
- vp
->instr
);
1078 vp
->native
= GL_TRUE
;
1079 mesa_vp
->Base
.NumNativeInstructions
= (o_inst
- vp
->instr
);
1081 fprintf(stderr
, "hw program:\n");
1082 for(i
=0; i
< vp
->program
.length
; i
++)
1083 fprintf(stderr
, "%08x\n", vp
->instr
[i
]);
1088 void r200SetupVertexProg( GLcontext
*ctx
) {
1089 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
1090 struct r200_vertex_program
*vp
= (struct r200_vertex_program
*)ctx
->VertexProgram
.Current
;
1094 if (!vp
->translated
|| (ctx
->Fog
.Enabled
&& ctx
->Fog
.Mode
!= vp
->fogmode
)) {
1095 rmesa
->curr_vp_hw
= NULL
;
1096 r200_translate_vertex_program(ctx
, vp
);
1098 /* could optimize setting up vertex progs away for non-tcl hw */
1099 fallback
= !(vp
->native
&& r200VertexProgUpdateParams(ctx
, vp
) &&
1100 rmesa
->r200Screen
->drmSupportsVertexProgram
);
1101 TCL_FALLBACK(ctx
, R200_TCL_FALLBACK_VERTEX_PROGRAM
, fallback
);
1102 if (rmesa
->TclFallback
) return;
1104 R200_STATECHANGE( rmesa
, vap
);
1105 /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1106 maybe only when using more than 64 inst / 96 param? */
1107 rmesa
->hw
.vap
.cmd
[VAP_SE_VAP_CNTL
] |= R200_VAP_PROG_VTX_SHADER_ENABLE
/*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1109 R200_STATECHANGE( rmesa
, pvs
);
1111 rmesa
->hw
.pvs
.cmd
[PVS_CNTL_1
] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT
) |
1112 ((vp
->mesa_program
.Base
.NumNativeInstructions
- 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT
) |
1113 (vp
->pos_end
<< R200_PVS_CNTL_1_POS_END_SHIFT
);
1114 rmesa
->hw
.pvs
.cmd
[PVS_CNTL_2
] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT
) |
1115 (vp
->mesa_program
.Base
.NumNativeParameters
<< R200_PVS_CNTL_2_PARAM_COUNT_SHIFT
);
1117 /* maybe user clip planes just work with vertex progs... untested */
1118 if (ctx
->Transform
.ClipPlanesEnabled
) {
1119 R200_STATECHANGE( rmesa
, tcl
);
1120 if (vp
->mesa_program
.IsPositionInvariant
) {
1121 rmesa
->hw
.tcl
.cmd
[TCL_UCP_VERT_BLEND_CTL
] |= (ctx
->Transform
.ClipPlanesEnabled
<< 2);
1124 rmesa
->hw
.tcl
.cmd
[TCL_UCP_VERT_BLEND_CTL
] &= ~(0xfc);
1128 if (vp
!= rmesa
->curr_vp_hw
) {
1129 GLuint count
= vp
->mesa_program
.Base
.NumNativeInstructions
;
1130 drm_radeon_cmd_header_t tmp
;
1132 R200_STATECHANGE( rmesa
, vpi
[0] );
1133 R200_STATECHANGE( rmesa
, vpi
[1] );
1135 /* FIXME: what about using a memcopy... */
1136 for (i
= 0; (i
< 64) && i
< count
; i
++) {
1137 rmesa
->hw
.vpi
[0].cmd
[VPI_OPDST_0
+ 4 * i
] = vp
->instr
[i
].op
;
1138 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC0_0
+ 4 * i
] = vp
->instr
[i
].src0
;
1139 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC1_0
+ 4 * i
] = vp
->instr
[i
].src1
;
1140 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC2_0
+ 4 * i
] = vp
->instr
[i
].src2
;
1142 /* hack up the cmd_size so not the whole state atom is emitted always.
1143 This may require some more thought, we may emit half progs on lost state, but
1144 hopefully it won't matter?
1145 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1146 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1147 rmesa
->hw
.vpi
[0].cmd_size
= 1 + 4 * ((count
> 64) ? 64 : count
);
1148 tmp
.i
= rmesa
->hw
.vpi
[0].cmd
[VPI_CMD_0
];
1149 tmp
.veclinear
.count
= (count
> 64) ? 64 : count
;
1150 rmesa
->hw
.vpi
[0].cmd
[VPI_CMD_0
] = tmp
.i
;
1152 for (i
= 0; i
< (count
- 64); i
++) {
1153 rmesa
->hw
.vpi
[1].cmd
[VPI_OPDST_0
+ 4 * i
] = vp
->instr
[i
+ 64].op
;
1154 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC0_0
+ 4 * i
] = vp
->instr
[i
+ 64].src0
;
1155 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC1_0
+ 4 * i
] = vp
->instr
[i
+ 64].src1
;
1156 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC2_0
+ 4 * i
] = vp
->instr
[i
+ 64].src2
;
1158 rmesa
->hw
.vpi
[1].cmd_size
= 1 + 4 * (count
- 64);
1159 tmp
.i
= rmesa
->hw
.vpi
[1].cmd
[VPI_CMD_0
];
1160 tmp
.veclinear
.count
= count
- 64;
1161 rmesa
->hw
.vpi
[1].cmd
[VPI_CMD_0
] = tmp
.i
;
1163 rmesa
->curr_vp_hw
= vp
;
1169 r200BindProgram(GLcontext
*ctx
, GLenum target
, struct gl_program
*prog
)
1171 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
1174 case GL_VERTEX_PROGRAM_ARB
:
1175 rmesa
->curr_vp_hw
= NULL
;
1178 _mesa_problem(ctx
, "Target not supported yet!");
1183 static struct gl_program
*
1184 r200NewProgram(GLcontext
*ctx
, GLenum target
, GLuint id
)
1186 struct r200_vertex_program
*vp
;
1189 case GL_VERTEX_PROGRAM_ARB
:
1190 vp
= CALLOC_STRUCT(r200_vertex_program
);
1191 return _mesa_init_vertex_program(ctx
, &vp
->mesa_program
, target
, id
);
1192 case GL_FRAGMENT_PROGRAM_ARB
:
1193 case GL_FRAGMENT_PROGRAM_NV
:
1194 return _mesa_init_fragment_program( ctx
, CALLOC_STRUCT(gl_fragment_program
), target
, id
);
1196 _mesa_problem(ctx
, "Bad target in r200NewProgram");
1203 r200DeleteProgram(GLcontext
*ctx
, struct gl_program
*prog
)
1205 _mesa_delete_program(ctx
, prog
);
1209 r200ProgramStringNotify(GLcontext
*ctx
, GLenum target
, struct gl_program
*prog
)
1211 struct r200_vertex_program
*vp
= (void *)prog
;
1212 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
1215 case GL_VERTEX_PROGRAM_ARB
:
1216 vp
->translated
= GL_FALSE
;
1218 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
1219 r200_translate_vertex_program(ctx
, vp
);
1220 rmesa
->curr_vp_hw
= NULL
;
1222 case GL_FRAGMENT_SHADER_ATI
:
1223 rmesa
->afs_loaded
= NULL
;
1226 /* need this for tcl fallbacks */
1227 _tnl_program_string(ctx
, target
, prog
);
1231 r200IsProgramNative(GLcontext
*ctx
, GLenum target
, struct gl_program
*prog
)
1233 struct r200_vertex_program
*vp
= (void *)prog
;
1236 case GL_VERTEX_STATE_PROGRAM_NV
:
1237 case GL_VERTEX_PROGRAM_ARB
:
1238 if (!vp
->translated
) {
1239 r200_translate_vertex_program(ctx
, vp
);
1241 /* does not take parameters etc. into account */
1244 _mesa_problem(ctx
, "Bad target in r200NewProgram");
1249 void r200InitShaderFuncs(struct dd_function_table
*functions
)
1251 functions
->NewProgram
= r200NewProgram
;
1252 functions
->BindProgram
= r200BindProgram
;
1253 functions
->DeleteProgram
= r200DeleteProgram
;
1254 functions
->ProgramStringNotify
= r200ProgramStringNotify
;
1255 functions
->IsProgramNative
= r200IsProgramNative
;