1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
29 /* Radeon R5xx Acceleration, Revision 1.2 */
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "program/program.h"
35 #include "program/programopt.h"
36 #include "program/prog_instruction.h"
37 #include "program/prog_parameter.h"
38 #include "program/prog_print.h"
39 #include "program/prog_statevars.h"
42 #include "compiler/radeon_compiler.h"
43 #include "radeon_mesa_to_rc.h"
44 #include "r300_context.h"
45 #include "r300_fragprog_common.h"
46 #include "r300_state.h"
49 * Write parameter array for the given vertex program into dst.
50 * Return the total number of components written.
52 static int r300VertexProgUpdateParams(GLcontext
* ctx
, struct r300_vertex_program
*vp
, float *dst
)
56 if (vp
->Base
->IsNVProgram
) {
57 _mesa_load_tracked_matrices(ctx
);
59 if (vp
->Base
->Base
.Parameters
) {
60 _mesa_load_state_parameters(ctx
, vp
->Base
->Base
.Parameters
);
64 for(i
= 0; i
< vp
->code
.constants
.Count
; ++i
) {
65 const float * src
= 0;
66 const struct rc_constant
* constant
= &vp
->code
.constants
.Constants
[i
];
68 switch(constant
->Type
) {
69 case RC_CONSTANT_EXTERNAL
:
70 if (vp
->Base
->IsNVProgram
) {
71 src
= ctx
->VertexProgram
.Parameters
[constant
->u
.External
];
73 src
= vp
->Base
->Base
.Parameters
->ParameterValues
[constant
->u
.External
];
77 case RC_CONSTANT_IMMEDIATE
:
78 src
= constant
->u
.Immediate
;
84 dst
[4*i
+ 1] = src
[1];
85 dst
[4*i
+ 2] = src
[2];
86 dst
[4*i
+ 3] = src
[3];
89 return 4 * vp
->code
.constants
.Count
;
92 static GLbitfield
compute_required_outputs(struct gl_vertex_program
* vp
, GLbitfield fpreads
)
94 GLbitfield outputs
= 0;
97 #define ADD_OUTPUT(fp_attr, vp_result) \
99 if (fpreads & (1 << (fp_attr))) \
100 outputs |= (1 << (vp_result)); \
103 ADD_OUTPUT(FRAG_ATTRIB_COL0
, VERT_RESULT_COL0
);
104 ADD_OUTPUT(FRAG_ATTRIB_COL1
, VERT_RESULT_COL1
);
106 for (i
= 0; i
<= 7; ++i
) {
107 ADD_OUTPUT(FRAG_ATTRIB_TEX0
+ i
, VERT_RESULT_TEX0
+ i
);
112 if ((fpreads
& (1 << FRAG_ATTRIB_COL0
)) &&
113 (vp
->Base
.OutputsWritten
& (1 << VERT_RESULT_BFC0
)))
114 outputs
|= 1 << VERT_RESULT_BFC0
;
115 if ((fpreads
& (1 << FRAG_ATTRIB_COL1
)) &&
116 (vp
->Base
.OutputsWritten
& (1 << VERT_RESULT_BFC1
)))
117 outputs
|= 1 << VERT_RESULT_BFC1
;
119 outputs
|= 1 << VERT_RESULT_HPOS
;
120 if (vp
->Base
.OutputsWritten
& (1 << VERT_RESULT_PSIZ
))
121 outputs
|= 1 << VERT_RESULT_PSIZ
;
127 static void t_inputs_outputs(struct r300_vertex_program_compiler
* c
)
131 GLuint OutputsWritten
, InputsRead
;
133 OutputsWritten
= c
->Base
.Program
.OutputsWritten
;
134 InputsRead
= c
->Base
.Program
.InputsRead
;
137 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
138 if (InputsRead
& (1 << i
))
139 c
->code
->inputs
[i
] = ++cur_reg
;
141 c
->code
->inputs
[i
] = -1;
145 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
146 c
->code
->outputs
[i
] = -1;
148 assert(OutputsWritten
& (1 << VERT_RESULT_HPOS
));
150 if (OutputsWritten
& (1 << VERT_RESULT_HPOS
)) {
151 c
->code
->outputs
[VERT_RESULT_HPOS
] = cur_reg
++;
154 if (OutputsWritten
& (1 << VERT_RESULT_PSIZ
)) {
155 c
->code
->outputs
[VERT_RESULT_PSIZ
] = cur_reg
++;
158 /* If we're writing back facing colors we need to send
159 * four colors to make front/back face colors selection work.
160 * If the vertex program doesn't write all 4 colors, lets
161 * pretend it does by skipping output index reg so the colors
162 * get written into appropriate output vectors.
164 if (OutputsWritten
& (1 << VERT_RESULT_COL0
)) {
165 c
->code
->outputs
[VERT_RESULT_COL0
] = cur_reg
++;
166 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
167 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
171 if (OutputsWritten
& (1 << VERT_RESULT_COL1
)) {
172 c
->code
->outputs
[VERT_RESULT_COL1
] = cur_reg
++;
173 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
174 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
178 if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
179 c
->code
->outputs
[VERT_RESULT_BFC0
] = cur_reg
++;
180 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
184 if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
185 c
->code
->outputs
[VERT_RESULT_BFC1
] = cur_reg
++;
186 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
190 for (i
= VERT_RESULT_TEX0
; i
<= VERT_RESULT_TEX7
; i
++) {
191 if (OutputsWritten
& (1 << i
)) {
192 c
->code
->outputs
[i
] = cur_reg
++;
196 if (OutputsWritten
& (1 << VERT_RESULT_FOGC
)) {
197 c
->code
->outputs
[VERT_RESULT_FOGC
] = cur_reg
++;
202 * The NV_vertex_program spec mandates that all registers be
203 * initialized to zero. We do this here unconditionally.
205 * \note We rely on dead-code elimination in the compiler.
207 static void initialize_NV_registers(struct radeon_compiler
* compiler
)
210 struct rc_instruction
* inst
;
212 for(reg
= 0; reg
< 12; ++reg
) {
213 inst
= rc_insert_new_instruction(compiler
, &compiler
->Program
.Instructions
);
214 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
215 inst
->U
.I
.DstReg
.File
= RC_FILE_TEMPORARY
;
216 inst
->U
.I
.DstReg
.Index
= reg
;
217 inst
->U
.I
.SrcReg
[0].File
= RC_FILE_NONE
;
218 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
221 inst
= rc_insert_new_instruction(compiler
, &compiler
->Program
.Instructions
);
222 inst
->U
.I
.Opcode
= RC_OPCODE_ARL
;
223 inst
->U
.I
.DstReg
.File
= RC_FILE_ADDRESS
;
224 inst
->U
.I
.DstReg
.Index
= 0;
225 inst
->U
.I
.DstReg
.WriteMask
= WRITEMASK_X
;
226 inst
->U
.I
.SrcReg
[0].File
= RC_FILE_NONE
;
227 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
230 static struct r300_vertex_program
*build_program(GLcontext
*ctx
,
231 struct r300_vertex_program_key
*wanted_key
,
232 const struct gl_vertex_program
*mesa_vp
)
234 struct r300_vertex_program
*vp
;
235 struct r300_vertex_program_compiler compiler
;
237 vp
= calloc(1, sizeof(*vp
));
238 vp
->Base
= _mesa_clone_vertex_program(ctx
, mesa_vp
);
239 memcpy(&vp
->key
, wanted_key
, sizeof(vp
->key
));
241 rc_init(&compiler
.Base
);
242 compiler
.Base
.Debug
= (RADEON_DEBUG
& RADEON_VERTS
) ? GL_TRUE
: GL_FALSE
;
244 compiler
.code
= &vp
->code
;
245 compiler
.RequiredOutputs
= compute_required_outputs(vp
->Base
, vp
->key
.FpReads
);
246 compiler
.SetHwInputOutput
= &t_inputs_outputs
;
247 compiler
.Base
.is_r500
= R300_CONTEXT(ctx
)->radeon
.radeonScreen
->chip_family
>= CHIP_FAMILY_RV515
;
248 compiler
.Base
.max_temp_regs
= 32;
249 compiler
.Base
.max_constants
= 256;
250 compiler
.Base
.max_alu_insts
= compiler
.Base
.is_r500
? 1024 : 256;
252 if (compiler
.Base
.Debug
) {
253 fprintf(stderr
, "Initial vertex program:\n");
254 _mesa_print_program(&vp
->Base
->Base
);
258 if (mesa_vp
->IsPositionInvariant
) {
259 _mesa_insert_mvp_code(ctx
, vp
->Base
);
262 radeon_mesa_to_rc_program(&compiler
.Base
, &vp
->Base
->Base
);
264 if (mesa_vp
->IsNVProgram
)
265 initialize_NV_registers(&compiler
.Base
);
267 rc_move_output(&compiler
.Base
, VERT_RESULT_PSIZ
, VERT_RESULT_PSIZ
, WRITEMASK_X
);
269 if (vp
->key
.WPosAttr
!= FRAG_ATTRIB_MAX
) {
270 unsigned int vp_wpos_attr
= vp
->key
.WPosAttr
- FRAG_ATTRIB_TEX0
+ VERT_RESULT_TEX0
;
272 /* Set empty writemask for instructions writing to vp_wpos_attr
273 * before moving the wpos attr there.
274 * Such instructions will be removed by DCE.
276 rc_move_output(&compiler
.Base
, vp_wpos_attr
, vp
->key
.WPosAttr
, 0);
277 rc_copy_output(&compiler
.Base
, VERT_RESULT_HPOS
, vp_wpos_attr
);
280 if (vp
->key
.FogAttr
!= FRAG_ATTRIB_MAX
) {
281 unsigned int vp_fog_attr
= vp
->key
.FogAttr
- FRAG_ATTRIB_TEX0
+ VERT_RESULT_TEX0
;
283 /* Set empty writemask for instructions writing to vp_fog_attr
284 * before moving the fog attr there.
285 * Such instructions will be removed by DCE.
287 rc_move_output(&compiler
.Base
, vp_fog_attr
, vp
->key
.FogAttr
, 0);
288 rc_move_output(&compiler
.Base
, VERT_RESULT_FOGC
, vp_fog_attr
, WRITEMASK_X
);
291 r3xx_compile_vertex_program(&compiler
);
293 if (vp
->code
.constants
.Count
> ctx
->Const
.VertexProgram
.MaxParameters
) {
294 rc_error(&compiler
.Base
, "Program exceeds constant buffer size limit\n");
297 vp
->error
= compiler
.Base
.Error
;
299 vp
->Base
->Base
.InputsRead
= vp
->code
.InputsRead
;
300 vp
->Base
->Base
.OutputsWritten
= vp
->code
.OutputsWritten
;
302 rc_destroy(&compiler
.Base
);
307 struct r300_vertex_program
* r300SelectAndTranslateVertexShader(GLcontext
*ctx
)
309 r300ContextPtr r300
= R300_CONTEXT(ctx
);
310 struct r300_vertex_program_key wanted_key
= { 0 };
311 struct r300_vertex_program_cont
*vpc
;
312 struct r300_vertex_program
*vp
;
314 vpc
= (struct r300_vertex_program_cont
*)ctx
->VertexProgram
._Current
;
316 if (!r300
->selected_fp
) {
317 /* This can happen when GetProgramiv is called to check
318 * whether the program runs natively.
320 * To be honest, this is not a very good solution,
321 * but solving the problem of reporting good values
322 * for those queries is tough anyway considering that
323 * we recompile vertex programs based on the precise
324 * fragment program that is in use.
326 r300SelectAndTranslateFragmentShader(ctx
);
329 assert(r300
->selected_fp
);
330 wanted_key
.FpReads
= r300
->selected_fp
->InputsRead
;
331 wanted_key
.FogAttr
= r300
->selected_fp
->fog_attr
;
332 wanted_key
.WPosAttr
= r300
->selected_fp
->wpos_attr
;
334 for (vp
= vpc
->progs
; vp
; vp
= vp
->next
) {
335 if (memcmp(&vp
->key
, &wanted_key
, sizeof(wanted_key
)) == 0) {
336 return r300
->selected_vp
= vp
;
340 vp
= build_program(ctx
, &wanted_key
, &vpc
->mesa_program
);
341 vp
->next
= vpc
->progs
;
344 return r300
->selected_vp
= vp
;
347 #define bump_vpu_count(ptr, new_count) do { \
348 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
349 int _nc=(new_count)/4; \
350 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
353 static void r300EmitVertexProgram(r300ContextPtr r300
, int dest
, struct r300_vertex_program_code
*code
)
357 assert((code
->length
> 0) && (code
->length
% 4 == 0));
359 switch ((dest
>> 8) & 0xf) {
361 R300_STATECHANGE(r300
, vpi
);
362 for (i
= 0; i
< code
->length
; i
++)
363 r300
->hw
.vpi
.cmd
[R300_VPI_INSTR_0
+ i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
364 bump_vpu_count(r300
->hw
.vpi
.cmd
, code
->length
+ 4 * (dest
& 0xff));
367 R300_STATECHANGE(r300
, vpp
);
368 for (i
= 0; i
< code
->length
; i
++)
369 r300
->hw
.vpp
.cmd
[R300_VPP_PARAM_0
+ i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
370 bump_vpu_count(r300
->hw
.vpp
.cmd
, code
->length
+ 4 * (dest
& 0xff));
373 R300_STATECHANGE(r300
, vps
);
374 for (i
= 0; i
< code
->length
; i
++)
375 r300
->hw
.vps
.cmd
[1 + i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
376 bump_vpu_count(r300
->hw
.vps
.cmd
, code
->length
+ 4 * (dest
& 0xff));
379 fprintf(stderr
, "%s:%s don't know how to handle dest %04x\n", __FILE__
, __FUNCTION__
, dest
);
384 void r300SetupVertexProgram(r300ContextPtr rmesa
)
386 GLcontext
*ctx
= rmesa
->radeon
.glCtx
;
387 struct r300_vertex_program
*prog
= rmesa
->selected_vp
;
391 /* Reset state, in case we don't use something */
392 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vpp
.cmd
)->vpu
.count
= 0;
393 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vpi
.cmd
)->vpu
.count
= 0;
394 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vps
.cmd
)->vpu
.count
= 0;
396 R300_STATECHANGE(rmesa
, vap_cntl
);
397 R300_STATECHANGE(rmesa
, vpp
);
398 param_count
= r300VertexProgUpdateParams(ctx
, prog
, (float *)&rmesa
->hw
.vpp
.cmd
[R300_VPP_PARAM_0
]);
399 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
&& param_count
> 255 * 4) {
400 WARN_ONCE("Too many VP params, expect rendering errors\n");
402 /* Prevent the overflow (vpu.count is u8) */
403 bump_vpu_count(rmesa
->hw
.vpp
.cmd
, MIN2(255 * 4, param_count
));
406 r300EmitVertexProgram(rmesa
, R300_PVS_CODE_START
, &(prog
->code
));
407 inst_count
= (prog
->code
.length
/ 4) - 1;
409 r300VapCntl(rmesa
, _mesa_bitcount(prog
->code
.InputsRead
),
410 _mesa_bitcount(prog
->code
.OutputsWritten
), prog
->code
.num_temporaries
);
412 R300_STATECHANGE(rmesa
, pvs
);
413 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_1
] = (0 << R300_PVS_FIRST_INST_SHIFT
) | (inst_count
<< R300_PVS_XYZW_VALID_INST_SHIFT
) |
414 (inst_count
<< R300_PVS_LAST_INST_SHIFT
);
416 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_2
] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT
) | ((param_count
- 1) << R300_PVS_MAX_CONST_ADDR_SHIFT
);
417 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_3
] = (inst_count
<< R300_PVS_LAST_VTX_SRC_INST_SHIFT
);