1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
29 /* Radeon R5xx Acceleration, Revision 1.2 */
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "shader/program.h"
35 #include "shader/programopt.h"
36 #include "shader/prog_instruction.h"
37 #include "shader/prog_optimize.h"
38 #include "shader/prog_parameter.h"
39 #include "shader/prog_print.h"
40 #include "shader/prog_statevars.h"
43 #include "compiler/radeon_compiler.h"
44 #include "compiler/radeon_nqssadce.h"
45 #include "r300_context.h"
46 #include "r300_state.h"
49 * Write parameter array for the given vertex program into dst.
50 * Return the total number of components written.
52 static int r300VertexProgUpdateParams(GLcontext
* ctx
, struct r300_vertex_program
*vp
, float *dst
)
56 if (vp
->Base
->IsNVProgram
) {
57 _mesa_load_tracked_matrices(ctx
);
59 if (vp
->Base
->Base
.Parameters
) {
60 _mesa_load_state_parameters(ctx
, vp
->Base
->Base
.Parameters
);
64 if (vp
->code
.constants
.Count
* 4 > VSF_MAX_FRAGMENT_LENGTH
) {
65 /* Should have checked this earlier... */
66 fprintf(stderr
, "%s:Params exhausted\n", __FUNCTION__
);
70 for(i
= 0; i
< vp
->code
.constants
.Count
; ++i
) {
71 const float * src
= 0;
72 const struct rc_constant
* constant
= &vp
->code
.constants
.Constants
[i
];
74 switch(constant
->Type
) {
75 case RC_CONSTANT_EXTERNAL
:
76 if (vp
->Base
->IsNVProgram
) {
77 src
= ctx
->VertexProgram
.Parameters
[constant
->u
.External
];
79 src
= vp
->Base
->Base
.Parameters
->ParameterValues
[constant
->u
.External
];
83 case RC_CONSTANT_IMMEDIATE
:
84 src
= constant
->u
.Immediate
;
89 dst
[4*i
+ 1] = src
[1];
90 dst
[4*i
+ 2] = src
[2];
91 dst
[4*i
+ 3] = src
[3];
94 return 4 * vp
->code
.constants
.Count
;
97 static GLbitfield
compute_required_outputs(struct gl_vertex_program
* vp
, GLbitfield fpreads
)
99 GLbitfield outputs
= 0;
102 #define ADD_OUTPUT(fp_attr, vp_result) \
104 if (fpreads & (1 << (fp_attr))) \
105 outputs |= (1 << (vp_result)); \
108 ADD_OUTPUT(FRAG_ATTRIB_COL0
, VERT_RESULT_COL0
);
109 ADD_OUTPUT(FRAG_ATTRIB_COL1
, VERT_RESULT_COL1
);
111 for (i
= 0; i
<= 7; ++i
) {
112 ADD_OUTPUT(FRAG_ATTRIB_TEX0
+ i
, VERT_RESULT_TEX0
+ i
);
117 if ((fpreads
& (1 << FRAG_ATTRIB_COL0
)) &&
118 (vp
->Base
.OutputsWritten
& (1 << VERT_RESULT_BFC0
)))
119 outputs
|= 1 << VERT_RESULT_BFC0
;
120 if ((fpreads
& (1 << FRAG_ATTRIB_COL1
)) &&
121 (vp
->Base
.OutputsWritten
& (1 << VERT_RESULT_BFC1
)))
122 outputs
|= 1 << VERT_RESULT_BFC1
;
124 outputs
|= 1 << VERT_RESULT_HPOS
;
125 if (vp
->Base
.OutputsWritten
& (1 << VERT_RESULT_PSIZ
))
126 outputs
|= 1 << VERT_RESULT_PSIZ
;
132 static void t_inputs_outputs(struct r300_vertex_program_compiler
* c
)
136 GLuint OutputsWritten
, InputsRead
;
138 OutputsWritten
= c
->Base
.Program
.OutputsWritten
;
139 InputsRead
= c
->Base
.Program
.InputsRead
;
142 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
143 if (InputsRead
& (1 << i
))
144 c
->code
->inputs
[i
] = ++cur_reg
;
146 c
->code
->inputs
[i
] = -1;
150 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
151 c
->code
->outputs
[i
] = -1;
153 assert(OutputsWritten
& (1 << VERT_RESULT_HPOS
));
155 if (OutputsWritten
& (1 << VERT_RESULT_HPOS
)) {
156 c
->code
->outputs
[VERT_RESULT_HPOS
] = cur_reg
++;
159 if (OutputsWritten
& (1 << VERT_RESULT_PSIZ
)) {
160 c
->code
->outputs
[VERT_RESULT_PSIZ
] = cur_reg
++;
163 /* If we're writing back facing colors we need to send
164 * four colors to make front/back face colors selection work.
165 * If the vertex program doesn't write all 4 colors, lets
166 * pretend it does by skipping output index reg so the colors
167 * get written into appropriate output vectors.
169 if (OutputsWritten
& (1 << VERT_RESULT_COL0
)) {
170 c
->code
->outputs
[VERT_RESULT_COL0
] = cur_reg
++;
171 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
172 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
176 if (OutputsWritten
& (1 << VERT_RESULT_COL1
)) {
177 c
->code
->outputs
[VERT_RESULT_COL1
] = cur_reg
++;
178 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
179 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
183 if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
184 c
->code
->outputs
[VERT_RESULT_BFC0
] = cur_reg
++;
185 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
189 if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
190 c
->code
->outputs
[VERT_RESULT_BFC1
] = cur_reg
++;
191 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
195 for (i
= VERT_RESULT_TEX0
; i
<= VERT_RESULT_TEX7
; i
++) {
196 if (OutputsWritten
& (1 << i
)) {
197 c
->code
->outputs
[i
] = cur_reg
++;
201 if (OutputsWritten
& (1 << VERT_RESULT_FOGC
)) {
202 c
->code
->outputs
[VERT_RESULT_FOGC
] = cur_reg
++;
207 * The NV_vertex_program spec mandates that all registers be
208 * initialized to zero. We do this here unconditionally.
210 * \note We rely on dead-code elimination in the compiler.
212 static void initialize_NV_registers(struct radeon_compiler
* compiler
)
215 struct rc_instruction
* inst
;
217 for(reg
= 0; reg
< 12; ++reg
) {
218 inst
= rc_insert_new_instruction(compiler
, &compiler
->Program
.Instructions
);
219 inst
->I
.Opcode
= OPCODE_MOV
;
220 inst
->I
.DstReg
.File
= PROGRAM_TEMPORARY
;
221 inst
->I
.DstReg
.Index
= reg
;
222 inst
->I
.SrcReg
[0].File
= PROGRAM_BUILTIN
;
223 inst
->I
.SrcReg
[0].Swizzle
= SWIZZLE_0000
;
226 inst
= rc_insert_new_instruction(compiler
, &compiler
->Program
.Instructions
);
227 inst
->I
.Opcode
= OPCODE_ARL
;
228 inst
->I
.DstReg
.File
= PROGRAM_ADDRESS
;
229 inst
->I
.DstReg
.Index
= 0;
230 inst
->I
.DstReg
.WriteMask
= WRITEMASK_X
;
231 inst
->I
.SrcReg
[0].File
= PROGRAM_BUILTIN
;
232 inst
->I
.SrcReg
[0].Swizzle
= SWIZZLE_0000
;
235 static struct r300_vertex_program
*build_program(GLcontext
*ctx
,
236 struct r300_vertex_program_key
*wanted_key
,
237 const struct gl_vertex_program
*mesa_vp
)
239 struct r300_vertex_program
*vp
;
240 struct r300_vertex_program_compiler compiler
;
242 vp
= _mesa_calloc(sizeof(*vp
));
243 vp
->Base
= (struct gl_vertex_program
*) _mesa_clone_program(ctx
, &mesa_vp
->Base
);
244 _mesa_memcpy(&vp
->key
, wanted_key
, sizeof(vp
->key
));
246 rc_init(&compiler
.Base
);
247 compiler
.Base
.Debug
= (RADEON_DEBUG
& RADEON_VERTS
) ? GL_TRUE
: GL_FALSE
;
249 compiler
.code
= &vp
->code
;
250 compiler
.RequiredOutputs
= compute_required_outputs(vp
->Base
, vp
->key
.FpReads
);
251 compiler
.SetHwInputOutput
= &t_inputs_outputs
;
253 if (compiler
.Base
.Debug
) {
254 fprintf(stderr
, "Initial vertex program:\n");
255 _mesa_print_program(&vp
->Base
->Base
);
259 if (mesa_vp
->IsPositionInvariant
) {
260 _mesa_insert_mvp_code(ctx
, vp
->Base
);
263 rc_mesa_to_rc_program(&compiler
.Base
, &vp
->Base
->Base
);
265 if (mesa_vp
->IsNVProgram
)
266 initialize_NV_registers(&compiler
.Base
);
268 rc_move_output(&compiler
.Base
, VERT_RESULT_PSIZ
, VERT_RESULT_PSIZ
, WRITEMASK_X
);
270 if (vp
->key
.WPosAttr
!= FRAG_ATTRIB_MAX
) {
271 rc_copy_output(&compiler
.Base
,
273 vp
->key
.WPosAttr
- FRAG_ATTRIB_TEX0
+ VERT_RESULT_TEX0
);
276 if (vp
->key
.FogAttr
!= FRAG_ATTRIB_MAX
) {
277 rc_move_output(&compiler
.Base
,
279 vp
->key
.FogAttr
- FRAG_ATTRIB_TEX0
+ VERT_RESULT_TEX0
, WRITEMASK_X
);
282 r3xx_compile_vertex_program(&compiler
);
283 vp
->error
= compiler
.Base
.Error
;
285 vp
->Base
->Base
.InputsRead
= vp
->code
.InputsRead
;
286 vp
->Base
->Base
.OutputsWritten
= vp
->code
.OutputsWritten
;
288 rc_destroy(&compiler
.Base
);
293 struct r300_vertex_program
* r300SelectAndTranslateVertexShader(GLcontext
*ctx
)
295 r300ContextPtr r300
= R300_CONTEXT(ctx
);
296 struct r300_vertex_program_key wanted_key
= { 0 };
297 struct r300_vertex_program_cont
*vpc
;
298 struct r300_vertex_program
*vp
;
300 vpc
= (struct r300_vertex_program_cont
*)ctx
->VertexProgram
._Current
;
301 wanted_key
.FpReads
= r300
->selected_fp
->InputsRead
;
302 wanted_key
.FogAttr
= r300
->selected_fp
->fog_attr
;
303 wanted_key
.WPosAttr
= r300
->selected_fp
->wpos_attr
;
305 for (vp
= vpc
->progs
; vp
; vp
= vp
->next
) {
306 if (_mesa_memcmp(&vp
->key
, &wanted_key
, sizeof(wanted_key
))
308 return r300
->selected_vp
= vp
;
312 vp
= build_program(ctx
, &wanted_key
, &vpc
->mesa_program
);
313 vp
->next
= vpc
->progs
;
316 return r300
->selected_vp
= vp
;
319 #define bump_vpu_count(ptr, new_count) do { \
320 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
321 int _nc=(new_count)/4; \
323 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
326 static void r300EmitVertexProgram(r300ContextPtr r300
, int dest
, struct r300_vertex_program_code
*code
)
330 assert((code
->length
> 0) && (code
->length
% 4 == 0));
332 R300_STATECHANGE( r300
, vap_flush
);
334 switch ((dest
>> 8) & 0xf) {
336 R300_STATECHANGE(r300
, vpi
);
337 for (i
= 0; i
< code
->length
; i
++)
338 r300
->hw
.vpi
.cmd
[R300_VPI_INSTR_0
+ i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
339 bump_vpu_count(r300
->hw
.vpi
.cmd
, code
->length
+ 4 * (dest
& 0xff));
342 R300_STATECHANGE(r300
, vpp
);
343 for (i
= 0; i
< code
->length
; i
++)
344 r300
->hw
.vpp
.cmd
[R300_VPP_PARAM_0
+ i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
345 bump_vpu_count(r300
->hw
.vpp
.cmd
, code
->length
+ 4 * (dest
& 0xff));
348 R300_STATECHANGE(r300
, vps
);
349 for (i
= 0; i
< code
->length
; i
++)
350 r300
->hw
.vps
.cmd
[1 + i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
351 bump_vpu_count(r300
->hw
.vps
.cmd
, code
->length
+ 4 * (dest
& 0xff));
354 fprintf(stderr
, "%s:%s don't know how to handle dest %04x\n", __FILE__
, __FUNCTION__
, dest
);
359 void r300SetupVertexProgram(r300ContextPtr rmesa
)
361 GLcontext
*ctx
= rmesa
->radeon
.glCtx
;
362 struct r300_vertex_program
*prog
= rmesa
->selected_vp
;
366 /* Reset state, in case we don't use something */
367 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vpp
.cmd
)->vpu
.count
= 0;
368 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vpi
.cmd
)->vpu
.count
= 0;
369 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vps
.cmd
)->vpu
.count
= 0;
371 R300_STATECHANGE(rmesa
, vap_flush
);
372 R300_STATECHANGE(rmesa
, vpp
);
373 param_count
= r300VertexProgUpdateParams(ctx
, prog
, (float *)&rmesa
->hw
.vpp
.cmd
[R300_VPP_PARAM_0
]);
374 bump_vpu_count(rmesa
->hw
.vpp
.cmd
, param_count
);
377 r300EmitVertexProgram(rmesa
, R300_PVS_CODE_START
, &(prog
->code
));
378 inst_count
= (prog
->code
.length
/ 4) - 1;
380 r300VapCntl(rmesa
, _mesa_bitcount(prog
->code
.InputsRead
),
381 _mesa_bitcount(prog
->code
.OutputsWritten
), prog
->code
.num_temporaries
);
383 R300_STATECHANGE(rmesa
, pvs
);
384 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_1
] = (0 << R300_PVS_FIRST_INST_SHIFT
) | (inst_count
<< R300_PVS_XYZW_VALID_INST_SHIFT
) |
385 (inst_count
<< R300_PVS_LAST_INST_SHIFT
);
387 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_2
] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT
) | (param_count
<< R300_PVS_MAX_CONST_ADDR_SHIFT
);
388 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_3
] = (inst_count
<< R300_PVS_LAST_VTX_SRC_INST_SHIFT
);