Merge remote branch 'main/master' into radeon-rewrite
[mesa.git] / src / mesa / drivers / dri / r200 / r200_cmdbuf.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 */
28
29 /*
30 * Authors:
31 * Keith Whitwell <keith@tungstengraphics.com>
32 */
33
34 #include "main/glheader.h"
35 #include "main/imports.h"
36 #include "main/macros.h"
37 #include "main/context.h"
38 #include "swrast/swrast.h"
39 #include "main/simple_list.h"
40
41 #include "radeon_common.h"
42 #include "r200_context.h"
43 #include "r200_state.h"
44 #include "r200_ioctl.h"
45 #include "r200_tcl.h"
46 #include "r200_sanity.h"
47 #include "radeon_reg.h"
48
49 /* The state atoms will be emitted in the order they appear in the atom list,
50 * so this step is important.
51 */
52 void r200SetUpAtomList( r200ContextPtr rmesa )
53 {
54 int i, mtu;
55
56 mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
57
58 make_empty_list(&rmesa->radeon.hw.atomlist);
59 rmesa->radeon.hw.atomlist.name = "atom-list";
60
61 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ctx );
62 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.set );
63 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lin );
64 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msk );
65 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpt );
66 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vtx );
67 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vap );
68 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vte );
69 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msc );
70 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cst );
71 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.zbs );
72 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcl );
73 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msl );
74 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcg );
75 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.grd );
76 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.fog );
77 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tam );
78 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tf );
79 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.atf );
80 for (i = 0; i < mtu; ++i)
81 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i] );
82 for (i = 0; i < mtu; ++i)
83 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i] );
84 for (i = 0; i < 6; ++i)
85 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pix[i] );
86 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[0] );
87 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[1] );
88 for (i = 0; i < 8; ++i)
89 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i] );
90 for (i = 0; i < 3 + mtu; ++i)
91 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i] );
92 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.eye );
93 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.glt );
94 for (i = 0; i < 2; ++i)
95 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mtl[i] );
96 for (i = 0; i < 6; ++i)
97 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i] );
98 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.spr );
99 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ptp );
100 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.prf );
101 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pvs );
102 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[0] );
103 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[1] );
104 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[0] );
105 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[1] );
106 }
107
108 /* Fire a section of the retained (indexed_verts) buffer as a regular
109 * primtive.
110 */
111 void r200EmitVbufPrim( r200ContextPtr rmesa,
112 GLuint primitive,
113 GLuint vertex_nr )
114 {
115 drm_radeon_cmd_header_t *cmd;
116 BATCH_LOCALS(&rmesa->radeon);
117
118 assert(!(primitive & R200_VF_PRIM_WALK_IND));
119
120 radeonEmitState(&rmesa->radeon);
121
122 if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
123 fprintf(stderr, "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__,
124 rmesa->store.cmd_used/4, primitive, vertex_nr);
125
126 BEGIN_BATCH(3);
127 OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
128 OUT_BATCH(primitive | R200_VF_PRIM_WALK_LIST | R200_VF_COLOR_ORDER_RGBA |
129 (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
130 END_BATCH();
131 }
132
133 static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type)
134 {
135 BATCH_LOCALS(&rmesa->radeon);
136
137 if (vertex_count > 0) {
138 BEGIN_BATCH(8+2);
139 OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_INDX_2, 0);
140 OUT_BATCH(R200_VF_PRIM_WALK_IND |
141 ((vertex_count + 0) << 16) |
142 type);
143
144 if (!rmesa->radeon.radeonScreen->kernel_mm) {
145 OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
146 OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
147 OUT_BATCH_RELOC(rmesa->tcl.elt_dma_offset,
148 rmesa->tcl.elt_dma_bo,
149 rmesa->tcl.elt_dma_offset,
150 RADEON_GEM_DOMAIN_GTT, 0, 0);
151 OUT_BATCH(vertex_count/2);
152 } else {
153 OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
154 OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
155 OUT_BATCH(rmesa->tcl.elt_dma_offset);
156 OUT_BATCH(vertex_count/2);
157 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
158 rmesa->tcl.elt_dma_bo,
159 RADEON_GEM_DOMAIN_GTT, 0, 0);
160 }
161 END_BATCH();
162 }
163 }
164
165 void r200FlushElts(GLcontext *ctx)
166 {
167 r200ContextPtr rmesa = R200_CONTEXT(ctx);
168 int dwords;
169 int nr, elt_used = rmesa->tcl.elt_used;
170
171 if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
172 fprintf(stderr, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used);
173
174 assert( rmesa->radeon.dma.flush == r200FlushElts );
175 rmesa->radeon.dma.flush = NULL;
176
177 elt_used = (elt_used + 2) & ~2;
178
179 nr = elt_used / 2;
180
181 radeon_bo_unmap(rmesa->tcl.elt_dma_bo);
182
183 r200FireEB(rmesa, nr, rmesa->tcl.hw_primitive);
184
185 radeon_bo_unref(rmesa->tcl.elt_dma_bo);
186 rmesa->tcl.elt_dma_bo = NULL;
187
188 if (R200_DEBUG & DEBUG_SYNC) {
189 fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
190 radeonFinish( rmesa->radeon.glCtx );
191 }
192 }
193
194
195 GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
196 GLuint primitive,
197 GLuint min_nr )
198 {
199 GLushort *retval;
200
201 if (R200_DEBUG & DEBUG_IOCTL)
202 fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
203
204 assert((primitive & R200_VF_PRIM_WALK_IND));
205
206 radeonEmitState(&rmesa->radeon);
207
208 rmesa->tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
209 0, R200_ELT_BUF_SZ, 4,
210 RADEON_GEM_DOMAIN_GTT, 0);
211 rmesa->tcl.elt_dma_offset = 0;
212 rmesa->tcl.elt_used = min_nr * 2;
213
214 radeon_bo_map(rmesa->tcl.elt_dma_bo, 1);
215 retval = rmesa->tcl.elt_dma_bo->ptr + rmesa->tcl.elt_dma_offset;
216
217
218 if (R200_DEBUG & DEBUG_PRIMS)
219 fprintf(stderr, "%s: header prim %x \n",
220 __FUNCTION__, primitive);
221
222 assert(!rmesa->radeon.dma.flush);
223 rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
224 rmesa->radeon.dma.flush = r200FlushElts;
225
226 return retval;
227 }
228
229
230
231 void r200EmitVertexAOS( r200ContextPtr rmesa,
232 GLuint vertex_size,
233 struct radeon_bo *bo,
234 GLuint offset )
235 {
236 BATCH_LOCALS(&rmesa->radeon);
237
238 if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
239 fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n",
240 __FUNCTION__, vertex_size, offset);
241
242
243 BEGIN_BATCH(5);
244 OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, 2);
245 OUT_BATCH(1);
246 OUT_BATCH(vertex_size | (vertex_size << 8));
247 OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
248 END_BATCH();
249 }
250
251 void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset)
252 {
253 BATCH_LOCALS(&rmesa->radeon);
254 uint32_t voffset;
255 int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
256 int i;
257
258 if (RADEON_DEBUG & DEBUG_VERTS)
259 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
260 offset);
261
262 BEGIN_BATCH(sz+2+ (nr*2));
263 OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1);
264 OUT_BATCH(nr);
265
266
267 if (!rmesa->radeon.radeonScreen->kernel_mm) {
268 for (i = 0; i + 1 < nr; i += 2) {
269 OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
270 (rmesa->tcl.aos[i].stride << 8) |
271 (rmesa->tcl.aos[i + 1].components << 16) |
272 (rmesa->tcl.aos[i + 1].stride << 24));
273
274 voffset = rmesa->tcl.aos[i + 0].offset +
275 offset * 4 * rmesa->tcl.aos[i + 0].stride;
276 OUT_BATCH_RELOC(voffset,
277 rmesa->tcl.aos[i].bo,
278 voffset,
279 RADEON_GEM_DOMAIN_GTT,
280 0, 0);
281 voffset = rmesa->tcl.aos[i + 1].offset +
282 offset * 4 * rmesa->tcl.aos[i + 1].stride;
283 OUT_BATCH_RELOC(voffset,
284 rmesa->tcl.aos[i+1].bo,
285 voffset,
286 RADEON_GEM_DOMAIN_GTT,
287 0, 0);
288 }
289
290 if (nr & 1) {
291 OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
292 (rmesa->tcl.aos[nr - 1].stride << 8));
293 voffset = rmesa->tcl.aos[nr - 1].offset +
294 offset * 4 * rmesa->tcl.aos[nr - 1].stride;
295 OUT_BATCH_RELOC(voffset,
296 rmesa->tcl.aos[nr - 1].bo,
297 voffset,
298 RADEON_GEM_DOMAIN_GTT,
299 0, 0);
300 }
301 } else {
302 for (i = 0; i + 1 < nr; i += 2) {
303 OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
304 (rmesa->tcl.aos[i].stride << 8) |
305 (rmesa->tcl.aos[i + 1].components << 16) |
306 (rmesa->tcl.aos[i + 1].stride << 24));
307
308 voffset = rmesa->tcl.aos[i + 0].offset +
309 offset * 4 * rmesa->tcl.aos[i + 0].stride;
310 OUT_BATCH(voffset);
311 voffset = rmesa->tcl.aos[i + 1].offset +
312 offset * 4 * rmesa->tcl.aos[i + 1].stride;
313 OUT_BATCH(voffset);
314 }
315
316 if (nr & 1) {
317 OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
318 (rmesa->tcl.aos[nr - 1].stride << 8));
319 voffset = rmesa->tcl.aos[nr - 1].offset +
320 offset * 4 * rmesa->tcl.aos[nr - 1].stride;
321 OUT_BATCH(voffset);
322 }
323 for (i = 0; i + 1 < nr; i += 2) {
324 voffset = rmesa->tcl.aos[i + 0].offset +
325 offset * 4 * rmesa->tcl.aos[i + 0].stride;
326 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
327 rmesa->tcl.aos[i+0].bo,
328 RADEON_GEM_DOMAIN_GTT,
329 0, 0);
330 voffset = rmesa->tcl.aos[i + 1].offset +
331 offset * 4 * rmesa->tcl.aos[i + 1].stride;
332 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
333 rmesa->tcl.aos[i+1].bo,
334 RADEON_GEM_DOMAIN_GTT,
335 0, 0);
336 }
337 if (nr & 1) {
338 voffset = rmesa->tcl.aos[nr - 1].offset +
339 offset * 4 * rmesa->tcl.aos[nr - 1].stride;
340 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
341 rmesa->tcl.aos[nr-1].bo,
342 RADEON_GEM_DOMAIN_GTT,
343 0, 0);
344 }
345 }
346 END_BATCH();
347 }
348
349 void r200FireAOS(r200ContextPtr rmesa, int vertex_count, int type)
350 {
351 BATCH_LOCALS(&rmesa->radeon);
352
353 BEGIN_BATCH(3);
354 OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
355 OUT_BATCH(R200_VF_PRIM_WALK_LIST | (vertex_count << 16) | type);
356 END_BATCH();
357 }
358