Merge branch 'mesa_7_5_branch' into mesa_7_6_branch
[mesa.git] / src / mesa / drivers / dri / r600 / r600_cmdbuf.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /**
31 * Mostly coppied from \radeon\radeon_cs_legacy.c
32 */
33
34 #include <errno.h>
35
36 #include "main/glheader.h"
37 #include "main/state.h"
38 #include "main/imports.h"
39 #include "main/macros.h"
40 #include "main/context.h"
41 #include "main/simple_list.h"
42 #include "swrast/swrast.h"
43
44 #include "drm.h"
45 #include "radeon_drm.h"
46
47 #include "r600_context.h"
48 #include "radeon_reg.h"
49 #include "r600_cmdbuf.h"
50 #include "r600_emit.h"
51 #include "radeon_bocs_wrapper.h"
52 #include "radeon_mipmap_tree.h"
53 #include "radeon_reg.h"
54
55
56
57 static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm,
58 uint32_t ndw)
59 {
60 struct radeon_cs *cs;
61
62 cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
63 if (cs == NULL) {
64 return NULL;
65 }
66 cs->csm = csm;
67 cs->ndw = (ndw + 0x3FF) & (~0x3FF);
68 cs->packets = (uint32_t*)malloc(4*cs->ndw);
69 if (cs->packets == NULL) {
70 free(cs);
71 return NULL;
72 }
73 cs->relocs_total_size = 0;
74 return cs;
75 }
76
77 static int r600_cs_write_reloc(struct radeon_cs *cs,
78 struct radeon_bo *bo,
79 uint32_t read_domain,
80 uint32_t write_domain,
81 uint32_t flags)
82 {
83 struct r600_cs_reloc_legacy *relocs;
84 int i;
85
86 relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
87 /* check domains */
88 if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
89 /* in one CS a bo can only be in read or write domain but not
90 * in read & write domain at the same sime
91 */
92 return -EINVAL;
93 }
94 if (read_domain == RADEON_GEM_DOMAIN_CPU) {
95 return -EINVAL;
96 }
97 if (write_domain == RADEON_GEM_DOMAIN_CPU) {
98 return -EINVAL;
99 }
100 /* check if bo is already referenced */
101 for(i = 0; i < cs->crelocs; i++) {
102 uint32_t *indices;
103 uint32_t *reloc_indices;
104
105 if (relocs[i].base.bo->handle == bo->handle) {
106 /* Check domains must be in read or write. As we check already
107 * checked that in argument one of the read or write domain was
108 * set we only need to check that if previous reloc as the read
109 * domain set then the read_domain should also be set for this
110 * new relocation.
111 */
112 if (relocs[i].base.read_domain && !read_domain) {
113 return -EINVAL;
114 }
115 if (relocs[i].base.write_domain && !write_domain) {
116 return -EINVAL;
117 }
118 relocs[i].base.read_domain |= read_domain;
119 relocs[i].base.write_domain |= write_domain;
120 /* save indice */
121 relocs[i].cindices++;
122 indices = (uint32_t*)realloc(relocs[i].indices,
123 relocs[i].cindices * 4);
124 reloc_indices = (uint32_t*)realloc(relocs[i].reloc_indices,
125 relocs[i].cindices * 4);
126 if ( (indices == NULL) || (reloc_indices == NULL) ) {
127 relocs[i].cindices -= 1;
128 return -ENOMEM;
129 }
130 relocs[i].indices = indices;
131 relocs[i].reloc_indices = reloc_indices;
132 relocs[i].indices[relocs[i].cindices - 1] = cs->cdw;
133 relocs[i].reloc_indices[relocs[i].cindices - 1] = cs->cdw;
134 cs->section_cdw += 2;
135 cs->cdw += 2;
136
137 return 0;
138 }
139 }
140 /* add bo to reloc */
141 relocs = (struct r600_cs_reloc_legacy*)
142 realloc(cs->relocs,
143 sizeof(struct r600_cs_reloc_legacy) * (cs->crelocs + 1));
144 if (relocs == NULL) {
145 return -ENOMEM;
146 }
147 cs->relocs = relocs;
148 relocs[cs->crelocs].base.bo = bo;
149 relocs[cs->crelocs].base.read_domain = read_domain;
150 relocs[cs->crelocs].base.write_domain = write_domain;
151 relocs[cs->crelocs].base.flags = flags;
152 relocs[cs->crelocs].indices = (uint32_t*)malloc(4);
153 relocs[cs->crelocs].reloc_indices = (uint32_t*)malloc(4);
154 if ( (relocs[cs->crelocs].indices == NULL) || (relocs[cs->crelocs].reloc_indices == NULL) )
155 {
156 return -ENOMEM;
157 }
158
159 relocs[cs->crelocs].indices[0] = cs->cdw;
160 relocs[cs->crelocs].reloc_indices[0] = cs->cdw;
161 cs->section_cdw += 2;
162 cs->cdw += 2;
163 relocs[cs->crelocs].cindices = 1;
164 cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
165 cs->crelocs++;
166
167 radeon_bo_ref(bo);
168
169 return 0;
170 }
171
172 static int r600_cs_begin(struct radeon_cs *cs,
173 uint32_t ndw,
174 const char *file,
175 const char *func,
176 int line)
177 {
178 if (cs->section) {
179 fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
180 cs->section_file, cs->section_func, cs->section_line);
181 fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
182 file, func, line);
183 return -EPIPE;
184 }
185
186 cs->section = 1;
187 cs->section_ndw = ndw;
188 cs->section_cdw = 0;
189 cs->section_file = file;
190 cs->section_func = func;
191 cs->section_line = line;
192
193 if (cs->cdw + ndw > cs->ndw) {
194 uint32_t tmp, *ptr;
195 int num = (ndw > 0x400) ? ndw : 0x400;
196
197 tmp = (cs->cdw + num + 0x3FF) & (~0x3FF);
198 ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
199 if (ptr == NULL) {
200 return -ENOMEM;
201 }
202 cs->packets = ptr;
203 cs->ndw = tmp;
204 }
205
206 return 0;
207 }
208
209 static int r600_cs_end(struct radeon_cs *cs,
210 const char *file,
211 const char *func,
212 int line)
213
214 {
215 if (!cs->section) {
216 fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
217 file, func, line);
218 return -EPIPE;
219 }
220 cs->section = 0;
221
222 if ( cs->section_ndw != cs->section_cdw ) {
223 fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
224 cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
225 fprintf(stderr, "cs->section_ndw = %d, cs->cdw = %d, cs->section_cdw = %d \n",
226 cs->section_ndw, cs->cdw, cs->section_cdw);
227 fprintf(stderr, "CS section end at (%s,%s,%d)\n",
228 file, func, line);
229 return -EPIPE;
230 }
231
232 if (cs->cdw > cs->ndw) {
233 fprintf(stderr, "CS section overflow at (%s,%s,%d) cdw %d ndw %d\n",
234 cs->section_file, cs->section_func, cs->section_line,cs->cdw,cs->ndw);
235 fprintf(stderr, "CS section end at (%s,%s,%d)\n",
236 file, func, line);
237 assert(0);
238 }
239
240 return 0;
241 }
242
243 static int r600_cs_process_relocs(struct radeon_cs *cs,
244 uint32_t * reloc_chunk,
245 uint32_t * length_dw_reloc_chunk)
246 {
247 struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
248 struct r600_cs_reloc_legacy *relocs;
249 int i, j, r;
250
251 uint32_t offset_dw = 0;
252
253 csm = (struct r600_cs_manager_legacy*)cs->csm;
254 relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
255 restart:
256 for (i = 0; i < cs->crelocs; i++) {
257 uint32_t soffset, eoffset, asicoffset;
258
259 r = radeon_bo_legacy_validate(relocs[i].base.bo,
260 &soffset, &eoffset);
261 if (r == -EAGAIN) {
262 goto restart;
263 }
264 if (r) {
265 fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
266 relocs[i].base.bo, soffset, eoffset);
267 return r;
268 }
269 asicoffset = soffset;
270
271 for (j = 0; j < relocs[i].cindices; j++) {
272 if (asicoffset >= eoffset) {
273 /* radeon_bo_debug(relocs[i].base.bo, 12); */
274 fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
275 relocs[i].base.bo, soffset, eoffset);
276 fprintf(stderr, "above end: %p 0x%08X 0x%08X\n",
277 relocs[i].base.bo,
278 cs->packets[relocs[i].indices[j]],
279 eoffset);
280 exit(0);
281 return -EINVAL;
282 }
283 /* pkt3 nop header in ib chunk */
284 cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000;
285 /* reloc index in ib chunk */
286 cs->packets[relocs[i].reloc_indices[j] + 1] = offset_dw;
287 }
288
289 /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */
290 reloc_chunk[offset_dw] = asicoffset;
291 reloc_chunk[offset_dw + 3] = 0;
292
293 offset_dw += 4;
294 }
295
296 *length_dw_reloc_chunk = offset_dw;
297
298 return 0;
299 }
300
301 static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */
302 {
303 struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
304 struct r600_cs_reloc_legacy *relocs;
305 int i;
306
307 relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
308 for (i = 0; i < cs->crelocs; i++) {
309 radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
310 radeon_bo_unref(relocs[i].base.bo);
311 }
312 return 0;
313 }
314
315 #if 0
316 static void dump_cmdbuf(struct radeon_cs *cs)
317 {
318 int i;
319 fprintf(stderr,"--start--\n");
320 for (i = 0; i < cs->cdw; i++){
321 fprintf(stderr,"0x%08x\n", cs->packets[i]);
322 }
323 fprintf(stderr,"--end--\n");
324
325 }
326 #endif
327
328 static int r600_cs_emit(struct radeon_cs *cs)
329 {
330 struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
331 struct drm_radeon_cs cs_cmd;
332 struct drm_radeon_cs_chunk cs_chunk[2];
333 uint32_t length_dw_reloc_chunk;
334 uint64_t chunk_ptrs[2];
335 uint32_t *reloc_chunk;
336 int r;
337 int retry = 0;
338
339 /* TODO : put chip level things here if need. */
340 /* csm->ctx->vtbl.emit_cs_header(cs, csm->ctx); */
341
342 csm->pending_count = 1;
343
344 reloc_chunk = (uint32_t*)calloc(1, cs->crelocs * 4 * 4);
345
346 r = r600_cs_process_relocs(cs, reloc_chunk, &length_dw_reloc_chunk);
347 if (r) {
348 free(reloc_chunk);
349 return 0;
350 }
351
352 /* raw ib chunk */
353 cs_chunk[0].chunk_id = RADEON_CHUNK_ID_IB;
354 cs_chunk[0].length_dw = cs->cdw;
355 cs_chunk[0].chunk_data = (unsigned long)(cs->packets);
356
357 /* reloc chaunk */
358 cs_chunk[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
359 cs_chunk[1].length_dw = length_dw_reloc_chunk;
360 cs_chunk[1].chunk_data = (unsigned long)reloc_chunk;
361
362 chunk_ptrs[0] = (uint64_t)(unsigned long)&(cs_chunk[0]);
363 chunk_ptrs[1] = (uint64_t)(unsigned long)&(cs_chunk[1]);
364
365 cs_cmd.num_chunks = 2;
366 /* cs_cmd.cs_id = 0; */
367 cs_cmd.chunks = (uint64_t)(unsigned long)chunk_ptrs;
368
369 //dump_cmdbuf(cs);
370
371 do
372 {
373 r = drmCommandWriteRead(cs->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd));
374 retry++;
375 } while (r == -EAGAIN && retry < 1000);
376
377 if (r) {
378 free(reloc_chunk);
379 return r;
380 }
381
382 csm->pending_age = cs_cmd.cs_id;
383
384 r600_cs_set_age(cs);
385
386 cs->csm->read_used = 0;
387 cs->csm->vram_write_used = 0;
388 cs->csm->gart_write_used = 0;
389
390 free(reloc_chunk);
391
392 return 0;
393 }
394
395 static void inline r600_cs_free_reloc(void *relocs_p, int crelocs)
396 {
397 struct r600_cs_reloc_legacy *relocs = relocs_p;
398 int i;
399 if (!relocs_p)
400 return;
401 for (i = 0; i < crelocs; i++)
402 {
403 free(relocs[i].indices);
404 free(relocs[i].reloc_indices);
405 }
406 }
407
408 static int r600_cs_destroy(struct radeon_cs *cs)
409 {
410 r600_cs_free_reloc(cs->relocs, cs->crelocs);
411 free(cs->relocs);
412 free(cs->packets);
413 free(cs);
414 return 0;
415 }
416
417 static int r600_cs_erase(struct radeon_cs *cs)
418 {
419 r600_cs_free_reloc(cs->relocs, cs->crelocs);
420 free(cs->relocs);
421 cs->relocs_total_size = 0;
422 cs->relocs = NULL;
423 cs->crelocs = 0;
424 cs->cdw = 0;
425 cs->section = 0;
426 return 0;
427 }
428
429 static int r600_cs_need_flush(struct radeon_cs *cs)
430 {
431 /* this function used to flush when the BO usage got to
432 * a certain size, now the higher levels handle this better */
433 return 0;
434 }
435
436 static void r600_cs_print(struct radeon_cs *cs, FILE *file)
437 {
438 }
439
440 static struct radeon_cs_funcs r600_cs_funcs = {
441 r600_cs_create,
442 r600_cs_write_reloc,
443 r600_cs_begin,
444 r600_cs_end,
445 r600_cs_emit,
446 r600_cs_destroy,
447 r600_cs_erase,
448 r600_cs_need_flush,
449 r600_cs_print
450 };
451
452 struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx)
453 {
454 struct r600_cs_manager_legacy *csm;
455
456 csm = (struct r600_cs_manager_legacy*)
457 calloc(1, sizeof(struct r600_cs_manager_legacy));
458 if (csm == NULL) {
459 return NULL;
460 }
461 csm->base.funcs = &r600_cs_funcs;
462 csm->base.fd = ctx->dri.fd;
463 csm->ctx = ctx;
464 csm->pending_age = 1;
465 return (struct radeon_cs_manager*)csm;
466 }
467
468 void r600InitCmdBuf(context_t *r600) /* from rcommonInitCmdBuf */
469 {
470 radeonContextPtr rmesa = &r600->radeon;
471 GLuint size;
472
473 r600InitAtoms(r600);
474
475 /* Initialize command buffer */
476 size = 256 * driQueryOptioni(&rmesa->optionCache,
477 "command_buffer_size");
478 if (size < 2 * rmesa->hw.max_state_size) {
479 size = 2 * rmesa->hw.max_state_size + 65535;
480 }
481 if (size > 64 * 256)
482 size = 64 * 256;
483
484 if (rmesa->radeonScreen->kernel_mm) {
485 int fd = rmesa->radeonScreen->driScreen->fd;
486 rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
487 } else {
488 rmesa->cmdbuf.csm = r600_radeon_cs_manager_legacy_ctor(rmesa);
489 }
490 if (rmesa->cmdbuf.csm == NULL) {
491 /* FIXME: fatal error */
492 return;
493 }
494 rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size);
495 assert(rmesa->cmdbuf.cs != NULL);
496 rmesa->cmdbuf.size = size;
497
498 radeon_cs_space_set_flush(rmesa->cmdbuf.cs,
499 (void (*)(void *))rmesa->glCtx->Driver.Flush, rmesa->glCtx);
500
501 if (!rmesa->radeonScreen->kernel_mm) {
502 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]);
503 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size);
504 } else {
505 struct drm_radeon_gem_info mminfo;
506
507 if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo)))
508 {
509 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, mminfo.vram_visible);
510 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, mminfo.gart_size);
511 }
512 }
513 }
514