1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "program/program.h"
37 #include "program/prog_instruction.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_statevars.h"
40 #include "program/programopt.h"
43 #include "r200_context.h"
44 #include "r200_vertprog.h"
45 #include "r200_ioctl.h"
48 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
49 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
50 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
51 SWIZZLE_W != VSF_IN_COMPONENT_W || \
52 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
53 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
54 WRITEMASK_X != VSF_FLAG_X || \
55 WRITEMASK_Y != VSF_FLAG_Y || \
56 WRITEMASK_Z != VSF_FLAG_Z || \
57 WRITEMASK_W != VSF_FLAG_W
58 #error Cannot change these!
61 #define SCALAR_FLAG (1<<31)
62 #define FLAG_MASK (1<<31)
63 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
64 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
69 unsigned long ip
; /* number of input operands and flags */
73 OPN(ARL
, 1|SCALAR_FLAG
),
78 OPN(EX2
, 1|SCALAR_FLAG
),
79 OPN(EXP
, 1|SCALAR_FLAG
),
82 OPN(LG2
, 1|SCALAR_FLAG
),
84 OPN(LOG
, 1|SCALAR_FLAG
),
90 OPN(POW
, 2|SCALAR_FLAG
),
91 OPN(RCP
, 1|SCALAR_FLAG
),
92 OPN(RSQ
, 1|SCALAR_FLAG
),
102 static GLboolean
r200VertexProgUpdateParams(struct gl_context
*ctx
, struct r200_vertex_program
*vp
)
104 r200ContextPtr rmesa
= R200_CONTEXT( ctx
);
105 GLfloat
*fcmd
= (GLfloat
*)&rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
+ 1];
107 struct gl_program
*mesa_vp
= &vp
->mesa_program
;
108 struct gl_program_parameter_list
*paramList
;
109 drm_radeon_cmd_header_t tmp
;
111 R200_STATECHANGE( rmesa
, vpp
[0] );
112 R200_STATECHANGE( rmesa
, vpp
[1] );
113 assert(mesa_vp
->Parameters
);
114 _mesa_load_state_parameters(ctx
, mesa_vp
->Parameters
);
115 paramList
= mesa_vp
->Parameters
;
117 if(paramList
->NumParameters
> R200_VSF_MAX_PARAM
){
118 fprintf(stderr
, "%s:Params exhausted\n", __func__
);
122 for(pi
= 0; pi
< paramList
->NumParameters
; pi
++) {
123 unsigned pvo
= paramList
->ParameterValueOffset
[pi
];
125 switch(paramList
->Parameters
[pi
].Type
) {
126 case PROGRAM_STATE_VAR
:
127 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
128 case PROGRAM_CONSTANT
:
129 *fcmd
++ = paramList
->ParameterValues
[pvo
+ 0].f
;
130 *fcmd
++ = paramList
->ParameterValues
[pvo
+ 1].f
;
131 *fcmd
++ = paramList
->ParameterValues
[pvo
+ 2].f
;
132 *fcmd
++ = paramList
->ParameterValues
[pvo
+ 3].f
;
135 _mesa_problem(NULL
, "Bad param type in %s", __func__
);
139 fcmd
= (GLfloat
*)&rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
+ 1];
142 /* hack up the cmd_size so not the whole state atom is emitted always. */
143 rmesa
->hw
.vpp
[0].cmd_size
=
144 1 + 4 * ((paramList
->NumParameters
> 96) ? 96 : paramList
->NumParameters
);
145 tmp
.i
= rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
];
146 tmp
.veclinear
.count
= (paramList
->NumParameters
> 96) ? 96 : paramList
->NumParameters
;
147 rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
] = tmp
.i
;
148 if (paramList
->NumParameters
> 96) {
149 rmesa
->hw
.vpp
[1].cmd_size
= 1 + 4 * (paramList
->NumParameters
- 96);
150 tmp
.i
= rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
];
151 tmp
.veclinear
.count
= paramList
->NumParameters
- 96;
152 rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
] = tmp
.i
;
157 static inline unsigned long t_dst_mask(GLuint mask
)
159 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
160 return mask
& VSF_FLAG_ALL
;
163 static unsigned long t_dst(struct prog_dst_register
*dst
)
166 case PROGRAM_TEMPORARY
:
167 return ((dst
->Index
<< R200_VPI_OUT_REG_INDEX_SHIFT
)
168 | R200_VSF_OUT_CLASS_TMP
);
170 switch (dst
->Index
) {
171 case VARYING_SLOT_POS
:
172 return R200_VSF_OUT_CLASS_RESULT_POS
;
173 case VARYING_SLOT_COL0
:
174 return R200_VSF_OUT_CLASS_RESULT_COLOR
;
175 case VARYING_SLOT_COL1
:
176 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT
)
177 | R200_VSF_OUT_CLASS_RESULT_COLOR
);
178 case VARYING_SLOT_FOGC
:
179 return R200_VSF_OUT_CLASS_RESULT_FOGC
;
180 case VARYING_SLOT_TEX0
:
181 case VARYING_SLOT_TEX1
:
182 case VARYING_SLOT_TEX2
:
183 case VARYING_SLOT_TEX3
:
184 case VARYING_SLOT_TEX4
:
185 case VARYING_SLOT_TEX5
:
186 return (((dst
->Index
- VARYING_SLOT_TEX0
) << R200_VPI_OUT_REG_INDEX_SHIFT
)
187 | R200_VSF_OUT_CLASS_RESULT_TEXC
);
188 case VARYING_SLOT_PSIZ
:
189 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE
;
191 fprintf(stderr
, "problem in %s, unknown dst output reg %d\n", __func__
, dst
->Index
);
195 case PROGRAM_ADDRESS
:
196 assert (dst
->Index
== 0);
197 return R200_VSF_OUT_CLASS_ADDR
;
199 fprintf(stderr
, "problem in %s, unknown register type %d\n", __func__
, dst
->File
);
205 static unsigned long t_src_class(gl_register_file file
)
209 case PROGRAM_TEMPORARY
:
210 return VSF_IN_CLASS_TMP
;
213 return VSF_IN_CLASS_ATTR
;
215 case PROGRAM_CONSTANT
:
216 case PROGRAM_STATE_VAR
:
217 return VSF_IN_CLASS_PARAM
;
220 case PROGRAM_ADDRESS:
223 fprintf(stderr
, "problem in %s", __func__
);
228 static inline unsigned long t_swizzle(GLubyte swizzle
)
230 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
235 static void vp_dump_inputs(struct r200_vertex_program
*vp
, char *caller
)
240 fprintf(stderr
, "vp null in call to %s from %s\n", __func__
, caller
);
244 fprintf(stderr
, "%s:<", caller
);
245 for(i
=0; i
< VERT_ATTRIB_MAX
; i
++)
246 fprintf(stderr
, "%d ", vp
->inputs
[i
]);
247 fprintf(stderr
, ">\n");
252 static unsigned long t_src_index(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
258 if(src
->File
== PROGRAM_INPUT
){
259 /* if(vp->inputs[src->Index] != -1)
260 return vp->inputs[src->Index];
262 for(i=0; i < VERT_ATTRIB_MAX; i++)
263 if(vp->inputs[i] > max_reg)
264 max_reg = vp->inputs[i];
266 vp->inputs[src->Index] = max_reg+1;*/
268 //vp_dump_inputs(vp, __func__);
269 assert(vp
->inputs
[src
->Index
] != -1);
270 return vp
->inputs
[src
->Index
];
272 if (src
->Index
< 0) {
273 fprintf(stderr
, "WARNING negative offsets for indirect addressing do not work\n");
280 static unsigned long t_src(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
283 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
284 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
285 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
286 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
287 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
288 t_src_class(src
->File
),
289 src
->Negate
) | (src
->RelAddr
<< 4);
292 static unsigned long t_src_scalar(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
295 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
296 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
297 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
298 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
299 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
300 t_src_class(src
->File
),
301 src
->Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
->RelAddr
<< 4);
304 static unsigned long t_opcode(enum prog_opcode opcode
)
308 case OPCODE_ADD
: return R200_VPI_OUT_OP_ADD
;
309 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
310 * seems to ignore neg offsets which isn't quite correct...
312 case OPCODE_ARL
: return R200_VPI_OUT_OP_ARL
;
313 case OPCODE_DP4
: return R200_VPI_OUT_OP_DOT
;
314 case OPCODE_DST
: return R200_VPI_OUT_OP_DST
;
315 case OPCODE_EX2
: return R200_VPI_OUT_OP_EX2
;
316 case OPCODE_EXP
: return R200_VPI_OUT_OP_EXP
;
317 case OPCODE_FRC
: return R200_VPI_OUT_OP_FRC
;
318 case OPCODE_LG2
: return R200_VPI_OUT_OP_LG2
;
319 case OPCODE_LIT
: return R200_VPI_OUT_OP_LIT
;
320 case OPCODE_LOG
: return R200_VPI_OUT_OP_LOG
;
321 case OPCODE_MAX
: return R200_VPI_OUT_OP_MAX
;
322 case OPCODE_MIN
: return R200_VPI_OUT_OP_MIN
;
323 case OPCODE_MUL
: return R200_VPI_OUT_OP_MUL
;
324 case OPCODE_RCP
: return R200_VPI_OUT_OP_RCP
;
325 case OPCODE_RSQ
: return R200_VPI_OUT_OP_RSQ
;
326 case OPCODE_SGE
: return R200_VPI_OUT_OP_SGE
;
327 case OPCODE_SLT
: return R200_VPI_OUT_OP_SLT
;
330 fprintf(stderr
, "%s: Should not be called with opcode %d!", __func__
, opcode
);
336 static unsigned long op_operands(enum prog_opcode opcode
)
340 /* Can we trust mesas opcodes to be in order ? */
341 for(i
=0; i
< sizeof(op_names
) / sizeof(*op_names
); i
++)
342 if(op_names
[i
].opcode
== opcode
)
343 return op_names
[i
].ip
;
345 fprintf(stderr
, "op %d not found in op_names\n", opcode
);
350 /* TODO: Get rid of t_src_class call */
351 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
352 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
353 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
354 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
355 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))
357 /* fglrx on rv250 codes up unused sources as follows:
358 unused but necessary sources are same as previous source, zero-ed out.
359 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
360 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
361 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
363 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
364 Those are NOT semantically equivalent to the r300 ones, requires code changes */
365 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
366 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
367 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
368 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
369 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
371 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
372 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
373 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
374 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
375 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
377 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
378 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
379 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
380 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
381 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
383 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
385 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
387 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
391 * Generate an R200 vertex program from Mesa's internal representation.
393 * \return GL_TRUE for success, GL_FALSE for failure.
395 static GLboolean
r200_translate_vertex_program(struct gl_context
*ctx
, struct r200_vertex_program
*vp
)
397 struct gl_program
*mesa_vp
= &vp
->mesa_program
;
398 struct prog_instruction
*vpi
;
400 VERTEX_SHADER_INSTRUCTION
*o_inst
;
401 unsigned long operands
;
410 vp
->native
= GL_FALSE
;
411 vp
->translated
= GL_TRUE
;
412 vp
->fogmode
= ctx
->Fog
.Mode
;
414 if (mesa_vp
->arb
.NumInstructions
== 0)
418 if ((mesa_vp
->info
.inputs_read
&
419 ~(VERT_BIT_POS
| VERT_BIT_NORMAL
| VERT_BIT_COLOR0
| VERT_BIT_COLOR1
|
420 VERT_BIT_FOG
| VERT_BIT_TEX0
| VERT_BIT_TEX1
| VERT_BIT_TEX2
|
421 VERT_BIT_TEX3
| VERT_BIT_TEX4
| VERT_BIT_TEX5
)) != 0) {
422 if (R200_DEBUG
& RADEON_FALLBACKS
) {
423 fprintf(stderr
, "can't handle vert prog inputs 0x%x\n",
424 mesa_vp
->info
.inputs_read
);
430 if ((mesa_vp
->info
.outputs_written
&
431 ~((1 << VARYING_SLOT_POS
) | (1 << VARYING_SLOT_COL0
) | (1 << VARYING_SLOT_COL1
) |
432 (1 << VARYING_SLOT_FOGC
) | (1 << VARYING_SLOT_TEX0
) | (1 << VARYING_SLOT_TEX1
) |
433 (1 << VARYING_SLOT_TEX2
) | (1 << VARYING_SLOT_TEX3
) | (1 << VARYING_SLOT_TEX4
) |
434 (1 << VARYING_SLOT_TEX5
) | (1 << VARYING_SLOT_PSIZ
))) != 0) {
435 if (R200_DEBUG
& RADEON_FALLBACKS
) {
436 fprintf(stderr
, "can't handle vert prog outputs 0x%llx\n",
437 (unsigned long long) mesa_vp
->info
.outputs_written
);
442 /* Initial value should be last tmp reg that hw supports.
443 Strangely enough r300 doesnt mind even though these would be out of range.
444 Smart enough to realize that it doesnt need it? */
445 int u_temp_i
= R200_VSF_MAX_TEMPS
- 1;
446 struct prog_src_register src
[3];
447 struct prog_dst_register dst
;
449 /* FIXME: is changing the prog safe to do here? */
450 if (mesa_vp
->arb
.IsPositionInvariant
&&
451 /* make sure we only do this once */
452 !(mesa_vp
->info
.outputs_written
& (1 << VARYING_SLOT_POS
))) {
453 _mesa_insert_mvp_code(ctx
, mesa_vp
);
456 /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
457 base e isn't directly available neither. */
458 if ((mesa_vp
->info
.outputs_written
& (1 << VARYING_SLOT_FOGC
)) &&
460 struct gl_program_parameter_list
*paramList
;
461 gl_state_index16 tokens
[STATE_LENGTH
] = { STATE_FOG_PARAMS
, 0, 0, 0, 0 };
462 paramList
= mesa_vp
->Parameters
;
463 vp
->fogpidx
= _mesa_add_state_reference(paramList
, tokens
);
467 mesa_vp
->arb
.NumNativeInstructions
= 0;
468 if (mesa_vp
->Parameters
)
469 mesa_vp
->arb
.NumNativeParameters
= mesa_vp
->Parameters
->NumParameters
;
471 mesa_vp
->arb
.NumNativeParameters
= 0;
473 for(i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
475 for(i
= 0; i
< 15; i
++)
476 vp
->inputmap_rev
[i
] = 255;
477 free_inputs
= 0x2ffd;
479 /* fglrx uses fixed inputs as follows for conventional attribs.
480 generic attribs use non-fixed assignment, fglrx will always use the
481 lowest attrib values available. We'll just do the same.
482 There are 12 generic attribs possible, corresponding to attrib 0, 2-11
483 and 13 in a hw vertex prog.
484 attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
485 (correspond to vertex normal/weight - maybe weight actually could be made vec4).
486 Additionally, not more than 12 arrays in total are possible I think.
487 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
488 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
489 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
490 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
493 /* attr 4,5 and 13 are only used with generic attribs.
494 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
495 not possibe to use with vertex progs as it is lacking in vert prog specification) */
496 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
497 if (mesa_vp
->info
.inputs_read
& VERT_BIT_POS
) {
498 vp
->inputs
[VERT_ATTRIB_POS
] = 0;
499 vp
->inputmap_rev
[0] = VERT_ATTRIB_POS
;
500 free_inputs
&= ~(1 << 0);
503 if (mesa_vp
->info
.inputs_read
& VERT_BIT_NORMAL
) {
504 vp
->inputs
[VERT_ATTRIB_NORMAL
] = 1;
505 vp
->inputmap_rev
[2] = VERT_ATTRIB_NORMAL
;
508 if (mesa_vp
->info
.inputs_read
& VERT_BIT_COLOR0
) {
509 vp
->inputs
[VERT_ATTRIB_COLOR0
] = 2;
510 vp
->inputmap_rev
[4] = VERT_ATTRIB_COLOR0
;
511 free_inputs
&= ~(1 << 2);
514 if (mesa_vp
->info
.inputs_read
& VERT_BIT_COLOR1
) {
515 vp
->inputs
[VERT_ATTRIB_COLOR1
] = 3;
516 vp
->inputmap_rev
[5] = VERT_ATTRIB_COLOR1
;
517 free_inputs
&= ~(1 << 3);
520 if (mesa_vp
->info
.inputs_read
& VERT_BIT_FOG
) {
521 vp
->inputs
[VERT_ATTRIB_FOG
] = 15; array_count
++;
522 vp
->inputmap_rev
[3] = VERT_ATTRIB_FOG
;
525 /* VERT_ATTRIB_TEX0-5 */
526 for (i
= 0; i
<= 5; i
++) {
527 if (mesa_vp
->info
.inputs_read
& VERT_BIT_TEX(i
)) {
528 vp
->inputs
[VERT_ATTRIB_TEX(i
)] = i
+ 6;
529 vp
->inputmap_rev
[8 + i
] = VERT_ATTRIB_TEX(i
);
530 free_inputs
&= ~(1 << (i
+ 6));
534 /* using VERT_ATTRIB_TEX6/7 would be illegal */
535 for (; i
< VERT_ATTRIB_TEX_MAX
; i
++) {
536 if (mesa_vp
->info
.inputs_read
& VERT_BIT_TEX(i
)) {
537 if (R200_DEBUG
& RADEON_FALLBACKS
) {
538 fprintf(stderr
, "texture attribute %d in vert prog\n", i
);
543 /* completely ignore aliasing? */
544 for (i
= 0; i
< VERT_ATTRIB_GENERIC_MAX
; i
++) {
546 /* completely ignore aliasing? */
547 if (mesa_vp
->info
.inputs_read
& VERT_BIT_GENERIC(i
)) {
549 if (array_count
> 12) {
550 if (R200_DEBUG
& RADEON_FALLBACKS
) {
551 fprintf(stderr
, "more than 12 attribs used in vert prog\n");
555 for (j
= 0; j
< 14; j
++) {
556 /* will always find one due to limited array_count */
557 if (free_inputs
& (1 << j
)) {
558 free_inputs
&= ~(1 << j
);
559 vp
->inputs
[VERT_ATTRIB_GENERIC(i
)] = j
;
562 vp
->inputmap_rev
[j
] = VERT_ATTRIB_GENERIC(i
);
564 /* mapped to col/tex */
565 vp
->inputmap_rev
[j
+ 2] = VERT_ATTRIB_GENERIC(i
);
568 vp
->inputmap_rev
[j
+ 1] = VERT_ATTRIB_GENERIC(i
);
576 if (!(mesa_vp
->info
.outputs_written
& (1 << VARYING_SLOT_POS
))) {
577 if (R200_DEBUG
& RADEON_FALLBACKS
) {
578 fprintf(stderr
, "can't handle vert prog without position output\n");
582 if (free_inputs
& 1) {
583 if (R200_DEBUG
& RADEON_FALLBACKS
) {
584 fprintf(stderr
, "can't handle vert prog without position input\n");
590 for (vpi
= mesa_vp
->arb
.Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++, o_inst
++){
591 operands
= op_operands(vpi
->Opcode
);
592 are_srcs_scalar
= operands
& SCALAR_FLAG
;
595 for(i
= 0; i
< operands
; i
++) {
596 src
[i
] = vpi
->SrcReg
[i
];
597 /* hack up default attrib values as per spec as swizzling.
598 normal, fog, secondary color. Crazy?
599 May need more if we don't submit vec4 elements? */
600 if (src
[i
].File
== PROGRAM_INPUT
) {
601 if (src
[i
].Index
== VERT_ATTRIB_NORMAL
) {
603 for (j
= 0; j
< 4; j
++) {
604 if (GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_W
) {
605 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
606 src
[i
].Swizzle
|= SWIZZLE_ONE
<< (j
*3);
610 else if (src
[i
].Index
== VERT_ATTRIB_COLOR1
) {
612 for (j
= 0; j
< 4; j
++) {
613 if (GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_W
) {
614 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
615 src
[i
].Swizzle
|= SWIZZLE_ZERO
<< (j
*3);
619 else if (src
[i
].Index
== VERT_ATTRIB_FOG
) {
621 for (j
= 0; j
< 4; j
++) {
622 if (GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_W
) {
623 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
624 src
[i
].Swizzle
|= SWIZZLE_ONE
<< (j
*3);
626 else if ((GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_Y
) ||
627 GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_Z
) {
628 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
629 src
[i
].Swizzle
|= SWIZZLE_ZERO
<< (j
*3);
637 if( CMP_SRCS(src
[1], src
[2]) || CMP_SRCS(src
[0], src
[2]) ){
638 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
639 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
642 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[2]),
643 SWIZZLE_X
, SWIZZLE_Y
,
644 SWIZZLE_Z
, SWIZZLE_W
,
645 t_src_class(src
[2].File
), VSF_FLAG_NONE
) | (src
[2].RelAddr
<< 4);
647 o_inst
->src1
= ZERO_SRC_0
;
648 o_inst
->src2
= UNUSED_SRC_1
;
651 src
[2].File
= PROGRAM_TEMPORARY
;
652 src
[2].Index
= u_temp_i
;
659 if( CMP_SRCS(src
[1], src
[0]) ){
660 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
661 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
664 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
665 SWIZZLE_X
, SWIZZLE_Y
,
666 SWIZZLE_Z
, SWIZZLE_W
,
667 t_src_class(src
[0].File
), VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
669 o_inst
->src1
= ZERO_SRC_0
;
670 o_inst
->src2
= UNUSED_SRC_1
;
673 src
[0].File
= PROGRAM_TEMPORARY
;
674 src
[0].Index
= u_temp_i
;
681 if (dst
.File
== PROGRAM_OUTPUT
&&
682 dst
.Index
== VARYING_SLOT_FOGC
&&
683 dst
.WriteMask
& WRITEMASK_X
) {
684 fog_temp_i
= u_temp_i
;
685 dst
.File
= PROGRAM_TEMPORARY
;
686 dst
.Index
= fog_temp_i
;
691 /* These ops need special handling. */
694 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
695 So may need to insert additional instruction */
696 if ((src
[0].File
== src
[1].File
) &&
697 (src
[0].Index
== src
[1].Index
)) {
698 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_POW
, t_dst(&dst
),
699 t_dst_mask(dst
.WriteMask
));
700 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
701 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
703 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
705 t_src_class(src
[0].File
),
706 src
[0].Negate
) | (src
[0].RelAddr
<< 4);
707 o_inst
->src1
= UNUSED_SRC_0
;
708 o_inst
->src2
= UNUSED_SRC_0
;
711 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
712 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
714 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
715 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
716 SWIZZLE_ZERO
, SWIZZLE_ZERO
, SWIZZLE_ZERO
,
717 t_src_class(src
[0].File
),
718 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
719 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
720 SWIZZLE_ZERO
, SWIZZLE_ZERO
,
721 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), SWIZZLE_ZERO
,
722 t_src_class(src
[1].File
),
723 src
[1].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
724 o_inst
->src2
= UNUSED_SRC_1
;
727 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_POW
, t_dst(&dst
),
728 t_dst_mask(dst
.WriteMask
));
729 o_inst
->src0
= MAKE_VSF_SOURCE(u_temp_i
,
736 o_inst
->src1
= UNUSED_SRC_0
;
737 o_inst
->src2
= UNUSED_SRC_0
;
742 case OPCODE_MOV
://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
744 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&dst
),
745 t_dst_mask(dst
.WriteMask
));
746 o_inst
->src0
= t_src(vp
, &src
[0]);
747 o_inst
->src1
= ZERO_SRC_0
;
748 o_inst
->src2
= UNUSED_SRC_1
;
752 /* only 2 read ports into temp memory thus may need the macro op MAD_2
753 instead (requiring 2 clocks) if all inputs are in temp memory
754 (and, only if they actually reference 3 distinct temps) */
755 hw_op
=(src
[0].File
== PROGRAM_TEMPORARY
&&
756 src
[1].File
== PROGRAM_TEMPORARY
&&
757 src
[2].File
== PROGRAM_TEMPORARY
&&
758 (((src
[0].RelAddr
<< 8) | src
[0].Index
) != ((src
[1].RelAddr
<< 8) | src
[1].Index
)) &&
759 (((src
[0].RelAddr
<< 8) | src
[0].Index
) != ((src
[2].RelAddr
<< 8) | src
[2].Index
)) &&
760 (((src
[1].RelAddr
<< 8) | src
[1].Index
) != ((src
[2].RelAddr
<< 8) | src
[2].Index
))) ?
761 R200_VPI_OUT_OP_MAD_2
: R200_VPI_OUT_OP_MAD
;
763 o_inst
->op
= MAKE_VSF_OP(hw_op
, t_dst(&dst
),
764 t_dst_mask(dst
.WriteMask
));
765 o_inst
->src0
= t_src(vp
, &src
[0]);
767 if ((o_inst
- vp
->instr
) == 31) {
768 /* fix up the broken vertex program of quake4 demo... */
769 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
770 SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
,
771 t_src_class(src
[1].File
),
772 src
[1].Negate
) | (src
[1].RelAddr
<< 4);
773 o_inst
->src2
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
774 SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
,
775 t_src_class(src
[1].File
),
776 src
[1].Negate
) | (src
[1].RelAddr
<< 4);
779 o_inst
->src1
= t_src(vp
, &src
[1]);
780 o_inst
->src2
= t_src(vp
, &src
[2]);
783 o_inst
->src1
= t_src(vp
, &src
[1]);
784 o_inst
->src2
= t_src(vp
, &src
[2]);
788 case OPCODE_DP3
://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
789 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_DOT
, t_dst(&dst
),
790 t_dst_mask(dst
.WriteMask
));
792 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
793 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
794 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
795 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
797 t_src_class(src
[0].File
),
798 src
[0].Negate
) | (src
[0].RelAddr
<< 4);
800 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
801 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
802 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
803 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
805 t_src_class(src
[1].File
),
806 src
[1].Negate
) | (src
[1].RelAddr
<< 4);
808 o_inst
->src2
= UNUSED_SRC_1
;
811 case OPCODE_DPH
://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
812 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_DOT
, t_dst(&dst
),
813 t_dst_mask(dst
.WriteMask
));
815 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
816 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
817 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
818 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
819 VSF_IN_COMPONENT_ONE
,
820 t_src_class(src
[0].File
),
821 src
[0].Negate
) | (src
[0].RelAddr
<< 4);
822 o_inst
->src1
= t_src(vp
, &src
[1]);
823 o_inst
->src2
= UNUSED_SRC_1
;
826 case OPCODE_SUB
://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
827 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&dst
),
828 t_dst_mask(dst
.WriteMask
));
830 o_inst
->src0
= t_src(vp
, &src
[0]);
831 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
832 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
833 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
834 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
835 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
836 t_src_class(src
[1].File
),
837 (!src
[1].Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
838 o_inst
->src2
= UNUSED_SRC_1
;
841 case OPCODE_ABS
://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
842 o_inst
->op
=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX
, t_dst(&dst
),
843 t_dst_mask(dst
.WriteMask
));
845 o_inst
->src0
=t_src(vp
, &src
[0]);
846 o_inst
->src1
=MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
847 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
848 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
849 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
850 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)),
851 t_src_class(src
[0].File
),
852 (!src
[0].Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
853 o_inst
->src2
= UNUSED_SRC_1
;
857 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
858 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
860 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_FRC
,
861 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
862 t_dst_mask(dst
.WriteMask
));
864 o_inst
->src0
= t_src(vp
, &src
[0]);
865 o_inst
->src1
= UNUSED_SRC_0
;
866 o_inst
->src2
= UNUSED_SRC_1
;
869 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&dst
),
870 t_dst_mask(dst
.WriteMask
));
872 o_inst
->src0
= t_src(vp
, &src
[0]);
873 o_inst
->src1
= MAKE_VSF_SOURCE(u_temp_i
,
879 /* Not 100% sure about this */
880 (!src
[0].Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
/*VSF_FLAG_ALL*/);
882 o_inst
->src2
= UNUSED_SRC_0
;
887 /* mul r0, r1.yzxw, r2.zxyw
888 mad r0, -r2.yzxw, r1.zxyw, r0
890 hw_op
=(src
[0].File
== PROGRAM_TEMPORARY
&&
891 src
[1].File
== PROGRAM_TEMPORARY
&&
892 (((src
[0].RelAddr
<< 8) | src
[0].Index
) != ((src
[1].RelAddr
<< 8) | src
[1].Index
))) ?
893 R200_VPI_OUT_OP_MAD_2
: R200_VPI_OUT_OP_MAD
;
895 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
896 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
897 t_dst_mask(dst
.WriteMask
));
899 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
900 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
901 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
902 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
903 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
904 t_src_class(src
[0].File
),
905 src
[0].Negate
) | (src
[0].RelAddr
<< 4);
907 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
908 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
909 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
910 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
911 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
912 t_src_class(src
[1].File
),
913 src
[1].Negate
) | (src
[1].RelAddr
<< 4);
915 o_inst
->src2
= UNUSED_SRC_1
;
919 o_inst
->op
= MAKE_VSF_OP(hw_op
, t_dst(&dst
),
920 t_dst_mask(dst
.WriteMask
));
922 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
923 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
924 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
925 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
926 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
927 t_src_class(src
[1].File
),
928 (!src
[1].Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
930 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
931 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
932 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
933 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
934 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
935 t_src_class(src
[0].File
),
936 src
[0].Negate
) | (src
[0].RelAddr
<< 4);
938 o_inst
->src2
= MAKE_VSF_SOURCE(u_temp_i
+1,
953 o_inst
->op
= MAKE_VSF_OP(t_opcode(vpi
->Opcode
), t_dst(&dst
),
954 t_dst_mask(dst
.WriteMask
));
959 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
960 o_inst
->src1
= UNUSED_SRC_0
;
961 o_inst
->src2
= UNUSED_SRC_1
;
965 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
966 o_inst
->src1
= t_src_scalar(vp
, &src
[1]);
967 o_inst
->src2
= UNUSED_SRC_1
;
971 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
972 o_inst
->src1
= t_src_scalar(vp
, &src
[1]);
973 o_inst
->src2
= t_src_scalar(vp
, &src
[2]);
977 fprintf(stderr
, "illegal number of operands %lu\n", operands
);
984 o_inst
->src0
= t_src(vp
, &src
[0]);
985 o_inst
->src1
= UNUSED_SRC_0
;
986 o_inst
->src2
= UNUSED_SRC_1
;
990 o_inst
->src0
= t_src(vp
, &src
[0]);
991 o_inst
->src1
= t_src(vp
, &src
[1]);
992 o_inst
->src2
= UNUSED_SRC_1
;
996 o_inst
->src0
= t_src(vp
, &src
[0]);
997 o_inst
->src1
= t_src(vp
, &src
[1]);
998 o_inst
->src2
= t_src(vp
, &src
[2]);
1002 fprintf(stderr
, "illegal number of operands %lu\n", operands
);
1011 if (vp
->fogmode
== GL_EXP
) {
1012 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
1013 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1015 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1016 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, X
, X
, X
, X
, PARAM
, NONE
);
1017 o_inst
->src2
= UNUSED_SRC_1
;
1019 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E
,
1020 R200_VSF_OUT_CLASS_RESULT_FOGC
,
1022 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, ALL
);
1023 o_inst
->src1
= UNUSED_SRC_0
;
1024 o_inst
->src2
= UNUSED_SRC_1
;
1026 else if (vp
->fogmode
== GL_EXP2
) {
1027 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
1028 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1030 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1031 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, X
, X
, X
, X
, PARAM
, NONE
);
1032 o_inst
->src2
= UNUSED_SRC_1
;
1034 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
1035 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1037 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1038 o_inst
->src1
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1039 o_inst
->src2
= UNUSED_SRC_1
;
1041 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E
,
1042 R200_VSF_OUT_CLASS_RESULT_FOGC
,
1044 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, ALL
);
1045 o_inst
->src1
= UNUSED_SRC_0
;
1046 o_inst
->src2
= UNUSED_SRC_1
;
1048 else { /* fogmode == GL_LINEAR */
1049 /* could do that with single op (dot) if using params like
1050 with fixed function pipeline fog */
1051 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
1052 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1054 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, ALL
);
1055 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, Z
, Z
, Z
, Z
, PARAM
, NONE
);
1056 o_inst
->src2
= UNUSED_SRC_1
;
1058 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
1059 R200_VSF_OUT_CLASS_RESULT_FOGC
,
1061 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1062 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, W
, W
, W
, W
, PARAM
, NONE
);
1063 o_inst
->src2
= UNUSED_SRC_1
;
1069 u_temp_used
= (R200_VSF_MAX_TEMPS
- 1) - u_temp_i
;
1070 if (mesa_vp
->arb
.NumNativeTemporaries
<
1071 (mesa_vp
->arb
.NumTemporaries
+ u_temp_used
)) {
1072 mesa_vp
->arb
.NumNativeTemporaries
=
1073 mesa_vp
->arb
.NumTemporaries
+ u_temp_used
;
1075 if ((mesa_vp
->arb
.NumTemporaries
+ u_temp_used
) > R200_VSF_MAX_TEMPS
) {
1076 if (R200_DEBUG
& RADEON_FALLBACKS
) {
1077 fprintf(stderr
, "Ran out of temps, num temps %d, us %d\n", mesa_vp
->arb
.NumTemporaries
, u_temp_used
);
1081 u_temp_i
= R200_VSF_MAX_TEMPS
- 1;
1082 if(o_inst
- vp
->instr
>= R200_VSF_MAX_INST
) {
1083 mesa_vp
->arb
.NumNativeInstructions
= 129;
1084 if (R200_DEBUG
& RADEON_FALLBACKS
) {
1085 fprintf(stderr
, "more than 128 native instructions\n");
1089 if ((o_inst
->op
& R200_VSF_OUT_CLASS_MASK
) == R200_VSF_OUT_CLASS_RESULT_POS
) {
1090 vp
->pos_end
= (o_inst
- vp
->instr
);
1094 vp
->native
= GL_TRUE
;
1095 mesa_vp
->arb
.NumNativeInstructions
= (o_inst
- vp
->instr
);
1097 fprintf(stderr
, "hw program:\n");
1098 for(i
=0; i
< vp
->program
.length
; i
++)
1099 fprintf(stderr
, "%08x\n", vp
->instr
[i
]);
1104 void r200SetupVertexProg( struct gl_context
*ctx
) {
1105 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
1106 struct r200_vertex_program
*vp
= (struct r200_vertex_program
*)ctx
->VertexProgram
.Current
;
1110 if (!vp
->translated
|| (ctx
->Fog
.Enabled
&& ctx
->Fog
.Mode
!= vp
->fogmode
)) {
1111 rmesa
->curr_vp_hw
= NULL
;
1112 r200_translate_vertex_program(ctx
, vp
);
1114 /* could optimize setting up vertex progs away for non-tcl hw */
1115 fallback
= !(vp
->native
&& r200VertexProgUpdateParams(ctx
, vp
));
1116 TCL_FALLBACK(ctx
, R200_TCL_FALLBACK_VERTEX_PROGRAM
, fallback
);
1117 if (rmesa
->radeon
.TclFallback
) return;
1119 R200_STATECHANGE( rmesa
, vap
);
1120 /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1121 maybe only when using more than 64 inst / 96 param? */
1122 rmesa
->hw
.vap
.cmd
[VAP_SE_VAP_CNTL
] |= R200_VAP_PROG_VTX_SHADER_ENABLE
/*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1124 R200_STATECHANGE( rmesa
, pvs
);
1126 rmesa
->hw
.pvs
.cmd
[PVS_CNTL_1
] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT
) |
1127 ((vp
->mesa_program
.arb
.NumNativeInstructions
- 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT
) |
1128 (vp
->pos_end
<< R200_PVS_CNTL_1_POS_END_SHIFT
);
1129 rmesa
->hw
.pvs
.cmd
[PVS_CNTL_2
] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT
) |
1130 (vp
->mesa_program
.arb
.NumNativeParameters
<< R200_PVS_CNTL_2_PARAM_COUNT_SHIFT
);
1132 /* maybe user clip planes just work with vertex progs... untested */
1133 if (ctx
->Transform
.ClipPlanesEnabled
) {
1134 R200_STATECHANGE( rmesa
, tcl
);
1135 if (vp
->mesa_program
.arb
.IsPositionInvariant
) {
1136 rmesa
->hw
.tcl
.cmd
[TCL_UCP_VERT_BLEND_CTL
] |= (ctx
->Transform
.ClipPlanesEnabled
<< 2);
1139 rmesa
->hw
.tcl
.cmd
[TCL_UCP_VERT_BLEND_CTL
] &= ~(0xfc);
1143 if (vp
!= rmesa
->curr_vp_hw
) {
1144 GLuint count
= vp
->mesa_program
.arb
.NumNativeInstructions
;
1145 drm_radeon_cmd_header_t tmp
;
1147 R200_STATECHANGE( rmesa
, vpi
[0] );
1148 R200_STATECHANGE( rmesa
, vpi
[1] );
1150 /* FIXME: what about using a memcopy... */
1151 for (i
= 0; (i
< 64) && i
< count
; i
++) {
1152 rmesa
->hw
.vpi
[0].cmd
[VPI_OPDST_0
+ 4 * i
] = vp
->instr
[i
].op
;
1153 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC0_0
+ 4 * i
] = vp
->instr
[i
].src0
;
1154 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC1_0
+ 4 * i
] = vp
->instr
[i
].src1
;
1155 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC2_0
+ 4 * i
] = vp
->instr
[i
].src2
;
1157 /* hack up the cmd_size so not the whole state atom is emitted always.
1158 This may require some more thought, we may emit half progs on lost state, but
1159 hopefully it won't matter?
1160 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1161 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1162 rmesa
->hw
.vpi
[0].cmd_size
= 1 + 4 * ((count
> 64) ? 64 : count
);
1163 tmp
.i
= rmesa
->hw
.vpi
[0].cmd
[VPI_CMD_0
];
1164 tmp
.veclinear
.count
= (count
> 64) ? 64 : count
;
1165 rmesa
->hw
.vpi
[0].cmd
[VPI_CMD_0
] = tmp
.i
;
1167 for (i
= 0; i
< (count
- 64); i
++) {
1168 rmesa
->hw
.vpi
[1].cmd
[VPI_OPDST_0
+ 4 * i
] = vp
->instr
[i
+ 64].op
;
1169 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC0_0
+ 4 * i
] = vp
->instr
[i
+ 64].src0
;
1170 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC1_0
+ 4 * i
] = vp
->instr
[i
+ 64].src1
;
1171 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC2_0
+ 4 * i
] = vp
->instr
[i
+ 64].src2
;
1173 rmesa
->hw
.vpi
[1].cmd_size
= 1 + 4 * (count
- 64);
1174 tmp
.i
= rmesa
->hw
.vpi
[1].cmd
[VPI_CMD_0
];
1175 tmp
.veclinear
.count
= count
- 64;
1176 rmesa
->hw
.vpi
[1].cmd
[VPI_CMD_0
] = tmp
.i
;
1178 rmesa
->curr_vp_hw
= vp
;
1183 static struct gl_program
*
1184 r200NewProgram(struct gl_context
*ctx
, GLenum target
, GLuint id
,
1188 case GL_VERTEX_PROGRAM_ARB
: {
1189 struct r200_vertex_program
*vp
= rzalloc(NULL
,
1190 struct r200_vertex_program
);
1191 return _mesa_init_gl_program(&vp
->mesa_program
, target
, id
, is_arb_asm
);
1193 case GL_FRAGMENT_PROGRAM_ARB
: {
1194 struct gl_program
*prog
= rzalloc(NULL
, struct gl_program
);
1195 return _mesa_init_gl_program(prog
, target
, id
, is_arb_asm
);
1198 _mesa_problem(ctx
, "Bad target in r200NewProgram");
1205 r200DeleteProgram(struct gl_context
*ctx
, struct gl_program
*prog
)
1207 _mesa_delete_program(ctx
, prog
);
1211 r200ProgramStringNotify(struct gl_context
*ctx
, GLenum target
, struct gl_program
*prog
)
1213 struct r200_vertex_program
*vp
= (void *)prog
;
1214 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
1217 case GL_VERTEX_PROGRAM_ARB
:
1218 vp
->translated
= GL_FALSE
;
1220 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_program));*/
1221 r200_translate_vertex_program(ctx
, vp
);
1222 rmesa
->curr_vp_hw
= NULL
;
1224 case GL_FRAGMENT_SHADER_ATI
:
1225 rmesa
->afs_loaded
= NULL
;
1228 /* need this for tcl fallbacks */
1229 (void) _tnl_program_string(ctx
, target
, prog
);
1231 /* XXX check if program is legal, within limits */
1236 r200IsProgramNative(struct gl_context
*ctx
, GLenum target
, struct gl_program
*prog
)
1238 struct r200_vertex_program
*vp
= (void *)prog
;
1241 case GL_VERTEX_PROGRAM_ARB
:
1242 if (!vp
->translated
) {
1243 r200_translate_vertex_program(ctx
, vp
);
1245 /* does not take parameters etc. into account */
1248 _mesa_problem(ctx
, "Bad target in r200NewProgram");
1253 void r200InitShaderFuncs(struct dd_function_table
*functions
)
1255 functions
->NewProgram
= r200NewProgram
;
1256 functions
->DeleteProgram
= r200DeleteProgram
;
1257 functions
->ProgramStringNotify
= r200ProgramStringNotify
;
1258 functions
->IsProgramNative
= r200IsProgramNative
;