Add structs describing vertex and pixel shader state to r300_state.
[mesa.git] / src / mesa / drivers / dri / r300 / r300_cmdbuf.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /*
31 * Authors:
32 * Nicolai Haehnle <prefect_@gmx.net>
33 */
34
35 #include "glheader.h"
36 #include "state.h"
37 #include "imports.h"
38 #include "macros.h"
39 #include "context.h"
40 #include "swrast/swrast.h"
41 #include "simple_list.h"
42
43 #include "drm.h"
44 #include "radeon_drm.h"
45
46 #include "radeon_ioctl.h"
47 #include "r300_context.h"
48 #include "r300_ioctl.h"
49 #include "radeon_reg.h"
50 #include "r300_reg.h"
51 #include "r300_cmdbuf.h"
52 #include "r300_emit.h"
53
54
55 // Set this to 1 for extremely verbose debugging of command buffers
56 #define DEBUG_CMDBUF 0
57
58
59 /**
60 * Send the current command buffer via ioctl to the hardware.
61 */
62 int r300FlushCmdBuf(r300ContextPtr r300, const char* caller)
63 {
64 int ret;
65 int i;
66 drm_radeon_cmd_buffer_t cmd;
67 int start;
68
69 if (r300->radeon.lost_context)
70 start = 0;
71 else
72 start = r300->cmdbuf.count_reemit;
73
74 if (RADEON_DEBUG & DEBUG_IOCTL) {
75 fprintf(stderr, "%s from %s - %i cliprects\n",
76 __FUNCTION__, caller, r300->radeon.numClipRects);
77
78 if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE)
79 for (i = start; i < r300->cmdbuf.count_used; ++i)
80 fprintf(stderr, "%d: %08x\n", i,
81 r300->cmdbuf.cmd_buf[i]);
82 }
83
84 LOCK_HARDWARE(&r300->radeon);
85
86 cmd.buf = (char*)(r300->cmdbuf.cmd_buf + start);
87 cmd.bufsz = (r300->cmdbuf.count_used - start) * 4;
88
89 if (r300->radeon.state.scissor.enabled) {
90 cmd.nbox = r300->radeon.state.scissor.numClipRects;
91 cmd.boxes = (drm_clip_rect_t *)r300->radeon.state.scissor.pClipRects;
92 } else {
93 cmd.nbox = r300->radeon.numClipRects;
94 cmd.boxes = (drm_clip_rect_t *)r300->radeon.pClipRects;
95 }
96
97 if (cmd.nbox) {
98 ret = drmCommandWrite(r300->radeon.dri.fd,
99 DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
100 if (ret) {
101 UNLOCK_HARDWARE(&r300->radeon);
102 fprintf(stderr, "drmCommandWrite: %d\n", ret);
103 exit(-1);
104 }
105
106 if (RADEON_DEBUG & DEBUG_SYNC) {
107 fprintf(stderr, "Syncing in %s\n\n", __FUNCTION__);
108 radeonWaitForIdleLocked(&r300->radeon);
109 }
110 } else {
111 if (RADEON_DEBUG & DEBUG_IOCTL)
112 fprintf(stderr, "%s: No cliprects\n", __FUNCTION__);
113 }
114
115 UNLOCK_HARDWARE(&r300->radeon);
116
117 r300->cmdbuf.count_used = 0;
118 r300->cmdbuf.count_reemit = 0;
119
120 return 0;
121 }
122
123
124 static void print_state_atom(struct r300_state_atom *state, int dwords)
125 {
126 int i;
127
128 fprintf(stderr, " emit %s/%d/%d\n", state->name, dwords, state->cmd_size);
129
130 if (RADEON_DEBUG & DEBUG_VERBOSE)
131 for (i = 0; i < dwords; i++)
132 fprintf(stderr, " %s[%d]: %08X\n", state->name, i,
133 state->cmd[i]);
134 }
135
136 /**
137 * Emit all atoms with a dirty field equal to dirty.
138 *
139 * The caller must have ensured that there is enough space in the command
140 * buffer.
141 */
142 static __inline__ void r300DoEmitState(r300ContextPtr r300, GLboolean dirty)
143 {
144 struct r300_state_atom* atom;
145 uint32_t* dest;
146
147 dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used;
148
149 if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
150 foreach(atom, &r300->hw.atomlist) {
151 if ((atom->dirty || r300->hw.all_dirty) == dirty) {
152 int dwords = (*atom->check)(r300, atom);
153
154 if (dwords)
155 print_state_atom(atom, dwords);
156 else
157 fprintf(stderr, " skip state %s\n",
158 atom->name);
159 }
160 }
161 }
162
163 foreach(atom, &r300->hw.atomlist) {
164 if ((atom->dirty || r300->hw.all_dirty) == dirty) {
165 int dwords = (*atom->check)(r300, atom);
166
167 if (dwords) {
168 memcpy(dest, atom->cmd, dwords*4);
169 dest += dwords;
170 r300->cmdbuf.count_used += dwords;
171 atom->dirty = GL_FALSE;
172 }
173 }
174 }
175 }
176
177
178 /**
179 * Copy dirty hardware state atoms into the command buffer.
180 *
181 * We also copy out clean state if we're at the start of a buffer. That makes
182 * it easy to recover from lost contexts.
183 */
184 void r300EmitState(r300ContextPtr r300)
185 {
186 if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS))
187 fprintf(stderr, "%s\n", __FUNCTION__);
188
189 if (r300->cmdbuf.count_used && !r300->hw.is_dirty && !r300->hw.all_dirty)
190 return;
191
192 /* To avoid going across the entire set of states multiple times, just check
193 * for enough space for the case of emitting all state, and inline the
194 * r300AllocCmdBuf code here without all the checks.
195 */
196 r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__);
197
198 if (!r300->cmdbuf.count_used) {
199 if (RADEON_DEBUG & DEBUG_STATE)
200 fprintf(stderr, "Begin reemit state\n");
201
202 r300DoEmitState(r300, GL_FALSE);
203 r300->cmdbuf.count_reemit = r300->cmdbuf.count_used;
204 }
205
206 if (RADEON_DEBUG & DEBUG_STATE)
207 fprintf(stderr, "Begin dirty state\n");
208
209 r300DoEmitState(r300, GL_TRUE);
210
211 assert(r300->cmdbuf.count_used < r300->cmdbuf.size);
212
213 r300->hw.is_dirty = GL_FALSE;
214 r300->hw.all_dirty = GL_FALSE;
215 }
216
217 #if 0
218
219 static __inline__ uint32_t cmducs(int reg, int count)
220 {
221 drm_r300_cmd_header_t cmd;
222
223 cmd.unchecked_state.cmd_type = R300_CMD_UNCHECKED_STATE;
224 cmd.unchecked_state.count = count;
225 cmd.unchecked_state.reghi = ((unsigned int)reg & 0xFF00) >> 8;
226 cmd.unchecked_state.reglo = ((unsigned int)reg & 0x00FF);
227
228 return cmd.u;
229 }
230
231 static __inline__ uint32_t cmdvpu(int addr, int count)
232 {
233 drm_r300_cmd_header_t cmd;
234
235 cmd.vpu.cmd_type = R300_CMD_VPU;
236 cmd.vpu.count = count;
237 cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8;
238 cmd.vpu.adrlo = ((unsigned int)addr & 0x00FF);
239
240 return cmd.u;
241 }
242 #endif
243
244 #define CHECK( NM, COUNT ) \
245 static int check_##NM( r300ContextPtr r300, \
246 struct r300_state_atom* atom ) \
247 { \
248 (void) atom; (void) r300; \
249 return (COUNT); \
250 }
251
252 #define ucscount(ptr) (((drm_r300_cmd_header_t*)(ptr))->unchecked_state.count)
253 #define vpucount(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
254
255 CHECK( always, atom->cmd_size )
256 CHECK( never, 0 )
257 CHECK( variable, ucscount(atom->cmd) ? (1 + ucscount(atom->cmd)) : 0 )
258 CHECK( vpu, vpucount(atom->cmd) ? (1 + vpucount(atom->cmd)*4) : 0 )
259
260 #undef ucscount
261
262 #define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX ) \
263 do { \
264 r300->hw.ATOM.cmd_size = SZ; \
265 r300->hw.ATOM.cmd = (uint32_t*)CALLOC(SZ * sizeof(uint32_t)); \
266 r300->hw.ATOM.name = NM; \
267 r300->hw.ATOM.idx = IDX; \
268 r300->hw.ATOM.check = check_##CHK; \
269 r300->hw.ATOM.dirty = GL_FALSE; \
270 r300->hw.max_state_size += SZ; \
271 } while (0)
272
273
274 /**
275 * Allocate memory for the command buffer and initialize the state atom
276 * list. Note that the initial hardware state is set by r300InitState().
277 */
278 void r300InitCmdBuf(r300ContextPtr r300)
279 {
280 int size, i, mtu;
281
282 r300->hw.max_state_size = 0;
283
284 mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
285 fprintf(stderr, "Using %d maximum texture units..\n", mtu);
286
287 /* Initialize state atoms */
288 ALLOC_STATE( vpt, always, R300_VPT_CMDSIZE, "vpt", 0 );
289 r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmducs(R300_SE_VPORT_XSCALE, 6);
290 ALLOC_STATE( unk2080, always, 2, "unk2080", 0 );
291 r300->hw.unk2080.cmd[0] = cmducs(0x2080, 1);
292 ALLOC_STATE( ovf, always, R300_OVF_CMDSIZE, "ovf", 0 );
293 r300->hw.ovf.cmd[R300_OVF_CMD_0] = cmducs(R300_VAP_OUTPUT_VTX_FMT_0, 2);
294 ALLOC_STATE( vte, always, 3, "vte", 0 );
295 r300->hw.vte.cmd[0] = cmducs(R300_SE_VTE_CNTL, 2);
296 ALLOC_STATE( unk2134, always, 3, "unk2134", 0 );
297 r300->hw.unk2134.cmd[0] = cmducs(0x2134, 2);
298 ALLOC_STATE( unk2140, always, 2, "unk2140", 0 );
299 r300->hw.unk2140.cmd[0] = cmducs(0x2140, 1);
300 ALLOC_STATE( vir[0], variable, R300_VIR_CMDSIZE, "vir/0", 0 );
301 r300->hw.vir[0].cmd[R300_VIR_CMD_0] = cmducs(R300_VAP_INPUT_ROUTE_0_0, 1);
302 ALLOC_STATE( vir[1], variable, R300_VIR_CMDSIZE, "vir/1", 1 );
303 r300->hw.vir[1].cmd[R300_VIR_CMD_0] = cmducs(R300_VAP_INPUT_ROUTE_1_0, 1);
304 ALLOC_STATE( vic, always, R300_VIC_CMDSIZE, "vic", 0 );
305 r300->hw.vic.cmd[R300_VIC_CMD_0] = cmducs(R300_VAP_INPUT_CNTL_0, 2);
306 ALLOC_STATE( unk21DC, always, 2, "unk21DC", 0 );
307 r300->hw.unk21DC.cmd[0] = cmducs(0x21DC, 1);
308 ALLOC_STATE( unk221C, always, 2, "unk221C", 0 );
309 r300->hw.unk221C.cmd[0] = cmducs(0x221C, 1);
310 ALLOC_STATE( unk2220, always, 5, "unk2220", 0 );
311 r300->hw.unk2220.cmd[0] = cmducs(0x2220, 4);
312 ALLOC_STATE( unk2288, always, 2, "unk2288", 0 );
313 r300->hw.unk2288.cmd[0] = cmducs(0x2288, 1);
314 ALLOC_STATE( vof, always, R300_VOF_CMDSIZE, "vof", 0 );
315 r300->hw.vof.cmd[R300_VOF_CMD_0] = cmducs(R300_VAP_OUTPUT_VTX_FMT_0, 2);
316 ALLOC_STATE( pvs, always, R300_PVS_CMDSIZE, "pvs", 0 );
317 r300->hw.pvs.cmd[R300_PVS_CMD_0] = cmducs(R300_VAP_PVS_CNTL_1, 3);
318 ALLOC_STATE( gb_enable, always, 2, "gb_enable", 0 );
319 r300->hw.gb_enable.cmd[0] = cmducs(R300_GB_ENABLE, 1);
320 ALLOC_STATE( gb_misc, always, R300_GB_MISC_CMDSIZE, "gb_misc", 0 );
321 r300->hw.gb_misc.cmd[0] = cmducs(R300_GB_MSPOS0, 5);
322 ALLOC_STATE( txe, always, R300_TXE_CMDSIZE, "txe", 0 );
323 r300->hw.txe.cmd[R300_TXE_CMD_0] = cmducs(R300_TX_ENABLE, 1);
324 ALLOC_STATE( unk4200, always, 5, "unk4200", 0 );
325 r300->hw.unk4200.cmd[0] = cmducs(0x4200, 4);
326 ALLOC_STATE( unk4214, always, 2, "unk4214", 0 );
327 r300->hw.unk4214.cmd[0] = cmducs(0x4214, 1);
328 ALLOC_STATE( ps, always, R300_PS_CMDSIZE, "ps", 0 );
329 r300->hw.ps.cmd[0] = cmducs(R300_RE_POINTSIZE, 1);
330 ALLOC_STATE( unk4230, always, 4, "unk4230", 0 );
331 r300->hw.unk4230.cmd[0] = cmducs(0x4230, 3);
332 ALLOC_STATE( unk4260, always, 4, "unk4260", 0 );
333 r300->hw.unk4260.cmd[0] = cmducs(0x4260, 3);
334 ALLOC_STATE( unk4274, always, 5, "unk4274", 0 );
335 r300->hw.unk4274.cmd[0] = cmducs(0x4274, 4);
336 ALLOC_STATE( unk4288, always, 6, "unk4288", 0 );
337 r300->hw.unk4288.cmd[0] = cmducs(0x4288, 5);
338 ALLOC_STATE( unk42A0, always, 2, "unk42A0", 0 );
339 r300->hw.unk42A0.cmd[0] = cmducs(0x42A0, 1);
340 ALLOC_STATE( unk42B4, always, 2, "unk42B4", 0 );
341 r300->hw.unk42B4.cmd[0] = cmducs(0x42B4, 1);
342 ALLOC_STATE( cul, always, R300_CUL_CMDSIZE, "cul", 0 );
343 r300->hw.cul.cmd[R300_CUL_CMD_0] = cmducs(R300_RE_CULL_CNTL, 1);
344 ALLOC_STATE( unk42C0, always, 3, "unk42C0", 0 );
345 r300->hw.unk42C0.cmd[0] = cmducs(0x42C0, 2);
346 ALLOC_STATE( rc, always, R300_RC_CMDSIZE, "rc", 0 );
347 r300->hw.rc.cmd[R300_RC_CMD_0] = cmducs(R300_RS_CNTL_0, 2);
348 ALLOC_STATE( ri, always, R300_RI_CMDSIZE, "ri", 0 );
349 r300->hw.ri.cmd[R300_RI_CMD_0] = cmducs(R300_RS_INTERP_0, 8);
350 ALLOC_STATE( rr, variable, R300_RR_CMDSIZE, "rr", 0 );
351 r300->hw.rr.cmd[R300_RR_CMD_0] = cmducs(R300_RS_ROUTE_0, 1);
352 ALLOC_STATE( unk43A4, always, 3, "unk43A4", 0 );
353 r300->hw.unk43A4.cmd[0] = cmducs(0x43A4, 2);
354 ALLOC_STATE( unk43E8, always, 2, "unk43E8", 0 );
355 r300->hw.unk43E8.cmd[0] = cmducs(0x43E8, 1);
356 ALLOC_STATE( fp, always, R300_FP_CMDSIZE, "fp", 0 );
357 r300->hw.fp.cmd[R300_FP_CMD_0] = cmducs(R300_PFS_CNTL_0, 3);
358 r300->hw.fp.cmd[R300_FP_CMD_1] = cmducs(R300_PFS_NODE_0, 4);
359 ALLOC_STATE( unk46A4, always, 6, "unk46A4", 0 );
360 r300->hw.unk46A4.cmd[0] = cmducs(0x46A4, 5);
361 ALLOC_STATE( fpi[0], variable, R300_FPI_CMDSIZE, "fpi/0", 0 );
362 r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmducs(R300_PFS_INSTR0_0, 1);
363 ALLOC_STATE( fpi[1], variable, R300_FPI_CMDSIZE, "fpi/1", 1 );
364 r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmducs(R300_PFS_INSTR1_0, 1);
365 ALLOC_STATE( fpi[2], variable, R300_FPI_CMDSIZE, "fpi/2", 2 );
366 r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmducs(R300_PFS_INSTR2_0, 1);
367 ALLOC_STATE( fpi[3], variable, R300_FPI_CMDSIZE, "fpi/3", 3 );
368 r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmducs(R300_PFS_INSTR3_0, 1);
369 ALLOC_STATE( unk4BC0, always, 2, "unk4BC0", 0 );
370 r300->hw.unk4BC0.cmd[0] = cmducs(0x4BC0, 1);
371 ALLOC_STATE( unk4BC8, always, 4, "unk4BC8", 0 );
372 r300->hw.unk4BC8.cmd[0] = cmducs(0x4BC8, 3);
373 ALLOC_STATE( at, always, R300_AT_CMDSIZE, "at", 0 );
374 r300->hw.at.cmd[R300_AT_CMD_0] = cmducs(R300_PP_ALPHA_TEST, 1);
375 ALLOC_STATE( unk4BD8, always, 2, "unk4BD8", 0 );
376 r300->hw.unk4BD8.cmd[0] = cmducs(0x4BD8, 1);
377 ALLOC_STATE( unk4E00, always, 2, "unk4E00", 0 );
378 r300->hw.unk4E00.cmd[0] = cmducs(0x4E00, 1);
379 ALLOC_STATE( bld, always, R300_BLD_CMDSIZE, "bld", 0 );
380 r300->hw.bld.cmd[R300_BLD_CMD_0] = cmducs(R300_RB3D_CBLEND, 2);
381 ALLOC_STATE( cmk, always, R300_CMK_CMDSIZE, "cmk", 0 );
382 r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmducs(R300_RB3D_COLORMASK, 1);
383 ALLOC_STATE( unk4E10, always, 4, "unk4E10", 0 );
384 r300->hw.unk4E10.cmd[0] = cmducs(0x4E10, 3);
385 ALLOC_STATE( cb, always, R300_CB_CMDSIZE, "cb", 0 );
386 r300->hw.cb.cmd[R300_CB_CMD_0] = cmducs(R300_RB3D_COLOROFFSET0, 1);
387 r300->hw.cb.cmd[R300_CB_CMD_1] = cmducs(R300_RB3D_COLORPITCH0, 1);
388 ALLOC_STATE( unk4E50, always, 10, "unk4E50", 0 );
389 r300->hw.unk4E50.cmd[0] = cmducs(0x4E50, 9);
390 ALLOC_STATE( unk4E88, always, 2, "unk4E88", 0 );
391 r300->hw.unk4E88.cmd[0] = cmducs(0x4E88, 1);
392 ALLOC_STATE( unk4EA0, always, 3, "unk4EA0 R350 only", 0 );
393 r300->hw.unk4EA0.cmd[0] = cmducs(0x4EA0, 2);
394 ALLOC_STATE( zc, always, R300_ZC_CMDSIZE, "zc", 0 );
395 r300->hw.zc.cmd[R300_ZC_CMD_0] = cmducs(R300_RB3D_ZCNTL_0, 2);
396 ALLOC_STATE( unk4F08, always, 2, "unk4F08", 0 );
397 r300->hw.unk4F08.cmd[0] = cmducs(0x4F08, 1);
398 ALLOC_STATE( unk4F10, always, 5, "unk4F10", 0 );
399 r300->hw.unk4F10.cmd[0] = cmducs(0x4F10, 4);
400 ALLOC_STATE( zb, always, R300_ZB_CMDSIZE, "zb", 0 );
401 r300->hw.zb.cmd[R300_ZB_CMD_0] = cmducs(R300_RB3D_DEPTHOFFSET, 2);
402 ALLOC_STATE( unk4F28, always, 2, "unk4F28", 0 );
403 r300->hw.unk4F28.cmd[0] = cmducs(0x4F28, 1);
404 ALLOC_STATE( unk4F30, always, 3, "unk4F30", 0 );
405 r300->hw.unk4F30.cmd[0] = cmducs(0x4F30, 2);
406 ALLOC_STATE( unk4F44, always, 2, "unk4F44", 0 );
407 r300->hw.unk4F44.cmd[0] = cmducs(0x4F44, 1);
408 ALLOC_STATE( unk4F54, always, 2, "unk4F54", 0 );
409 r300->hw.unk4F54.cmd[0] = cmducs(0x4F54, 1);
410
411 ALLOC_STATE( vpi, vpu, R300_VPI_CMDSIZE, "vpi", 0 );
412 r300->hw.vpi.cmd[R300_VPI_CMD_0] = cmdvpu(R300_PVS_UPLOAD_PROGRAM, 0);
413 ALLOC_STATE( vpp, vpu, R300_VPP_CMDSIZE, "vpp", 0 );
414 r300->hw.vpp.cmd[R300_VPP_CMD_0] = cmdvpu(R300_PVS_UPLOAD_PARAMETERS, 0);
415 ALLOC_STATE( vps, vpu, R300_VPS_CMDSIZE, "vps", 0 );
416 r300->hw.vps.cmd[R300_VPS_CMD_0] = cmdvpu(R300_PVS_UPLOAD_POINTSIZE, 1);
417
418 /* Textures */
419 ALLOC_STATE( tex.filter, always, mtu, "tex_filter", 0 );
420 r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmducs(R300_TX_FILTER_0, mtu-1);
421
422 ALLOC_STATE( tex.unknown1, always, mtu, "tex_unknown1", 0 );
423 r300->hw.tex.unknown1.cmd[R300_TEX_CMD_0] = cmducs(R300_TX_UNK1_0, mtu-1);
424
425 ALLOC_STATE( tex.size, always, mtu, "tex_size", 0 );
426 r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmducs(R300_TX_SIZE_0, mtu-1);
427
428 ALLOC_STATE( tex.format, always, mtu, "tex_format", 0 );
429 r300->hw.tex.format.cmd[R300_TEX_CMD_0] = cmducs(R300_TX_FORMAT_0, mtu-1);
430
431 ALLOC_STATE( tex.offset, always, mtu, "tex_offset", 0 );
432 r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = cmducs(R300_TX_OFFSET_0, mtu-1);
433
434 ALLOC_STATE( tex.unknown4, always, mtu, "tex_unknown4", 0 );
435 r300->hw.tex.unknown4.cmd[R300_TEX_CMD_0] = cmducs(R300_TX_UNK4_0, mtu-1);
436
437 ALLOC_STATE( tex.unknown5, always, mtu, "tex_unknown5", 0 );
438 r300->hw.tex.unknown5.cmd[R300_TEX_CMD_0] = cmducs(R300_TX_UNK5_0, mtu-1);
439
440
441 /* Setup the atom linked list */
442 make_empty_list(&r300->hw.atomlist);
443 r300->hw.atomlist.name = "atom-list";
444
445 insert_at_tail(&r300->hw.atomlist, &r300->hw.vpt);
446 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2080);
447 insert_at_tail(&r300->hw.atomlist, &r300->hw.ovf);
448 insert_at_tail(&r300->hw.atomlist, &r300->hw.vte);
449 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2134);
450 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2140);
451 insert_at_tail(&r300->hw.atomlist, &r300->hw.vir[0]);
452 insert_at_tail(&r300->hw.atomlist, &r300->hw.vir[1]);
453 insert_at_tail(&r300->hw.atomlist, &r300->hw.vic);
454 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk21DC);
455 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk221C);
456 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2220);
457 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2288);
458 insert_at_tail(&r300->hw.atomlist, &r300->hw.vof);
459 insert_at_tail(&r300->hw.atomlist, &r300->hw.pvs);
460 insert_at_tail(&r300->hw.atomlist, &r300->hw.gb_enable);
461 insert_at_tail(&r300->hw.atomlist, &r300->hw.gb_misc);
462 insert_at_tail(&r300->hw.atomlist, &r300->hw.txe);
463 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4200);
464 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4214);
465 insert_at_tail(&r300->hw.atomlist, &r300->hw.ps);
466 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4230);
467 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4260);
468 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4274);
469 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4288);
470 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42A0);
471 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42B4);
472 insert_at_tail(&r300->hw.atomlist, &r300->hw.cul);
473 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42C0);
474 insert_at_tail(&r300->hw.atomlist, &r300->hw.rc);
475 insert_at_tail(&r300->hw.atomlist, &r300->hw.ri);
476 insert_at_tail(&r300->hw.atomlist, &r300->hw.rr);
477 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk43A4);
478 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk43E8);
479 insert_at_tail(&r300->hw.atomlist, &r300->hw.fp);
480 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk46A4);
481 insert_at_tail(&r300->hw.atomlist, &r300->hw.fpi[0]);
482 insert_at_tail(&r300->hw.atomlist, &r300->hw.fpi[1]);
483 insert_at_tail(&r300->hw.atomlist, &r300->hw.fpi[2]);
484 insert_at_tail(&r300->hw.atomlist, &r300->hw.fpi[3]);
485 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4BC0);
486 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4BC8);
487 insert_at_tail(&r300->hw.atomlist, &r300->hw.at);
488 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4BD8);
489 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E00);
490 insert_at_tail(&r300->hw.atomlist, &r300->hw.bld);
491 insert_at_tail(&r300->hw.atomlist, &r300->hw.cmk);
492 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E10);
493 insert_at_tail(&r300->hw.atomlist, &r300->hw.cb);
494 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E50);
495 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E88);
496 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4EA0);
497 insert_at_tail(&r300->hw.atomlist, &r300->hw.zc);
498 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F08);
499 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F10);
500 insert_at_tail(&r300->hw.atomlist, &r300->hw.zb);
501 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F28);
502 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F30);
503 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F44);
504 insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F54);
505
506 insert_at_tail(&r300->hw.atomlist, &r300->hw.vpi);
507 insert_at_tail(&r300->hw.atomlist, &r300->hw.vpp);
508 insert_at_tail(&r300->hw.atomlist, &r300->hw.vps);
509
510 insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.filter);
511 insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.unknown1);
512 insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.size);
513 insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.format);
514 insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.offset);
515 insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.unknown4);
516 insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.unknown5);
517
518 r300->hw.is_dirty = GL_TRUE;
519 r300->hw.all_dirty = GL_TRUE;
520
521 /* Initialize command buffer */
522 size = 256 * driQueryOptioni(&r300->radeon.optionCache, "command_buffer_size");
523 if (size < 2*r300->hw.max_state_size)
524 size = 2*r300->hw.max_state_size;
525
526 if (RADEON_DEBUG & DEBUG_IOCTL)
527 fprintf(stderr,
528 "Allocating %d bytes command buffer (max state is %d bytes)\n",
529 size*4, r300->hw.max_state_size*4);
530
531 r300->cmdbuf.size = size;
532 r300->cmdbuf.cmd_buf = (uint32_t*)CALLOC(size*4);
533 r300->cmdbuf.count_used = 0;
534 r300->cmdbuf.count_reemit = 0;
535 }
536
537
538 /**
539 * Destroy the command buffer and state atoms.
540 */
541 void r300DestroyCmdBuf(r300ContextPtr r300)
542 {
543 struct r300_state_atom* atom;
544
545 FREE(r300->cmdbuf.cmd_buf);
546
547 foreach(atom, &r300->hw.atomlist) {
548 FREE(atom->cmd);
549 }
550 }
551
552 void r300EmitBlit(r300ContextPtr rmesa,
553 GLuint color_fmt,
554 GLuint src_pitch,
555 GLuint src_offset,
556 GLuint dst_pitch,
557 GLuint dst_offset,
558 GLint srcx, GLint srcy,
559 GLint dstx, GLint dsty, GLuint w, GLuint h)
560 {
561 drm_radeon_cmd_header_t *cmd;
562
563 if (RADEON_DEBUG & DEBUG_IOCTL)
564 fprintf(stderr,
565 "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
566 __FUNCTION__, src_pitch, src_offset, srcx, srcy,
567 dst_pitch, dst_offset, dstx, dsty, w, h);
568
569 assert((src_pitch & 63) == 0);
570 assert((dst_pitch & 63) == 0);
571 assert((src_offset & 1023) == 0);
572 assert((dst_offset & 1023) == 0);
573 assert(w < (1 << 16));
574 assert(h < (1 << 16));
575
576 cmd =
577 (drm_radeon_cmd_header_t *) r200AllocCmdBuf(rmesa, 8 * sizeof(int),
578 __FUNCTION__);
579
580 cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
581 cmd[1].i = R200_CP_CMD_BITBLT_MULTI | (5 << 16);
582 cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
583 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
584 RADEON_GMC_BRUSH_NONE |
585 (color_fmt << 8) |
586 RADEON_GMC_SRC_DATATYPE_COLOR |
587 RADEON_ROP3_S |
588 RADEON_DP_SRC_SOURCE_MEMORY |
589 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
590
591 cmd[3].i = ((src_pitch / 64) << 22) | (src_offset >> 10);
592 cmd[4].i = ((dst_pitch / 64) << 22) | (dst_offset >> 10);
593 cmd[5].i = (srcx << 16) | srcy;
594 cmd[6].i = (dstx << 16) | dsty; /* dst */
595 cmd[7].i = (w << 16) | h;
596 }
597
598 void r300EmitWait(r300ContextPtr rmesa, GLuint flags)
599 {
600 if (rmesa->radeon.dri.drmMinor >= 6) {
601 drm_radeon_cmd_header_t *cmd;
602
603 assert(!(flags & ~(RADEON_WAIT_2D | RADEON_WAIT_3D)));
604
605 cmd =
606 (drm_radeon_cmd_header_t *) r200AllocCmdBuf(rmesa,
607 1 * sizeof(int),
608 __FUNCTION__);
609 cmd[0].i = 0;
610 cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
611 cmd[0].wait.flags = flags;
612 }
613 }
614
615 void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start)
616 {
617 int i, a, count;
618 GLuint dw;
619 LOCAL_VARS
620
621 count=rmesa->state.aos_count;
622
623 a=1+(count>>1)*3+(count & 1)*2;
624 start_packet3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, a-1);
625 e32(count);
626 for(i=0;i+1<count;i+=2){
627 e32( (rmesa->state.aos[i].element_size << 0)
628 |(rmesa->state.aos[i].stride << 8)
629 |(rmesa->state.aos[i+1].element_size << 16)
630 |(rmesa->state.aos[i+1].stride << 24)
631 );
632 e32(rmesa->state.aos[i].offset+start*4*rmesa->state.aos[i].stride);
633 e32(rmesa->state.aos[i+1].offset+start*4*rmesa->state.aos[i+1].stride);
634 }
635 if(count & 1){
636 e32( (rmesa->state.aos[count-1].element_size << 0)
637 |(rmesa->state.aos[count-1].stride << 8)
638 );
639 e32(rmesa->state.aos[count-1].offset+start*4*rmesa->state.aos[count-1].stride);
640 }
641
642 /* delay ? */
643 #if 0
644 e32(RADEON_CP_PACKET2);
645 e32(RADEON_CP_PACKET2);
646 #endif
647 }
648
649 void static inline upload_vertex_shader_fragment(PREFIX int dest, struct r300_vertex_shader_fragment *vsf)
650 {
651 int i;
652 LOCAL_VARS
653
654 if(vsf->length==0)return;
655
656 if(vsf->length & 0x3){
657 fprintf(stderr,"VERTEX_SHADER_FRAGMENT must have length divisible by 4\n");
658 exit(-1);
659 }
660
661 vsf_start_fragment(dest, vsf->length);
662 for(i=0;i<vsf->length;i++)
663 e32(vsf->body.d[i]);
664
665 }
666
667 void r300EmitVertexShader(r300ContextPtr rmesa)
668 {
669 LOCAL_VARS
670
671 upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_PROGRAM, &(rmesa->state.vertex_shader.program));
672
673 upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_MATRIX0, &(rmesa->state.vertex_shader.matrix[0]));
674 upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_MATRIX1, &(rmesa->state.vertex_shader.matrix[0]));
675 upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_MATRIX2, &(rmesa->state.vertex_shader.matrix[0]));
676
677 upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_VECTOR0, &(rmesa->state.vertex_shader.vector[0]));
678 upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_VECTOR1, &(rmesa->state.vertex_shader.vector[1]));
679
680 upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_UNKNOWN1, &(rmesa->state.vertex_shader.unknown1));
681 upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_UNKNOWN2, &(rmesa->state.vertex_shader.unknown2));
682
683 reg_start(R300_VAP_PVS_CNTL_1, 2);
684 e32( (rmesa->state.vertex_shader.program_start << R300_PVS_CNTL_1_PROGRAM_START_SHIFT)
685 | (rmesa->state.vertex_shader.unknown_ptr1 << R300_PVS_CNTL_1_UNKNOWN_SHIFT)
686 | (rmesa->state.vertex_shader.program_end << R300_PVS_CNTL_1_PROGRAM_END_SHIFT)
687 );
688 e32( (rmesa->state.vertex_shader.param_offset << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT)
689 | (rmesa->state.vertex_shader.param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT)
690 );
691 e32( (rmesa->state.vertex_shader.unknown_ptr2 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT)
692 | (rmesa->state.vertex_shader.unknown_ptr3 << 0));
693
694 reg_start(R300_VAP_PVS_WAITIDLE,0);
695 e32(0x00000000);
696 }
697
698 void r300EmitPixelShader(r300ContextPtr rmesa)
699 {
700 int i,k;
701 LOCAL_VARS
702
703 if(rmesa->state.pixel_shader.program.tex.length>0){
704 reg_start(R300_PFS_TEXI_0, rmesa->state.pixel_shader.program.tex.length-1);
705 for(i=0;i<rmesa->state.pixel_shader.program.tex.length;i++)
706 e32(rmesa->state.pixel_shader.program.tex.inst[i]);
707 }
708
709 if(rmesa->state.pixel_shader.program.alu.length>0){
710 #define OUTPUT_FIELD(reg, field) \
711 reg_start(reg,rmesa->state.pixel_shader.program.alu.length-1); \
712 for(i=0;i<rmesa->state.pixel_shader.program.alu.length;i++) \
713 e32(rmesa->state.pixel_shader.program.alu.inst[i].field);
714
715 OUTPUT_FIELD(R300_PFS_INSTR0_0, inst0);
716 OUTPUT_FIELD(R300_PFS_INSTR1_0, inst1);
717 OUTPUT_FIELD(R300_PFS_INSTR2_0, inst2);
718 OUTPUT_FIELD(R300_PFS_INSTR3_0, inst3);
719 #undef OUTPUT_FIELD
720 }
721
722 reg_start(R300_PFS_NODE_0, 3);
723 for(i=0;i<4;i++){
724 e32( (rmesa->state.pixel_shader.program.node[i].alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT)
725 | (rmesa->state.pixel_shader.program.node[i].alu_end << R300_PFS_NODE_ALU_END_SHIFT)
726 | (rmesa->state.pixel_shader.program.node[i].tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT)
727 | (rmesa->state.pixel_shader.program.node[i].tex_end << R300_PFS_NODE_TEX_END_SHIFT)
728 | ( (i==3) ? R300_PFS_NODE_LAST_NODE : 0)
729 );
730 }
731
732 reg_start(R300_PFS_CNTL_0, 2);
733 /* PFS_CNTL_0 */
734 e32((rmesa->state.pixel_shader.program.active_nodes-1) | (rmesa->state.pixel_shader.program.first_node_has_tex<<3));
735 /* PFS_CNTL_1 */
736 e32(rmesa->state.pixel_shader.program.temp_register_count);
737 /* PFS_CNTL_2 */
738 e32( (rmesa->state.pixel_shader.program.alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
739 | (rmesa->state.pixel_shader.program.alu_end << R300_PFS_CNTL_ALU_END_SHIFT)
740 | (rmesa->state.pixel_shader.program.tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
741 | (rmesa->state.pixel_shader.program.tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
742 );
743
744 if(rmesa->state.pixel_shader.param_length>0){
745 reg_start(R300_PFS_PARAM_0_X, rmesa->state.pixel_shader.param_length*4-1);
746 for(i=0;i<rmesa->state.pixel_shader.param_length;i++){
747 efloat(rmesa->state.pixel_shader.param[i].x);
748 efloat(rmesa->state.pixel_shader.param[i].y);
749 efloat(rmesa->state.pixel_shader.param[i].z);
750 efloat(rmesa->state.pixel_shader.param[i].w);
751 }
752 }
753
754 }