Merge branch 'mesa_7_5_branch'
[mesa.git] / src / mesa / drivers / dri / r600 / r600_cmdbuf.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /**
31 * Mostly coppied from \radeon\radeon_cs_legacy.c
32 */
33
34 #include <errno.h>
35
36 #include "main/glheader.h"
37 #include "main/state.h"
38 #include "main/imports.h"
39 #include "main/macros.h"
40 #include "main/context.h"
41 #include "main/simple_list.h"
42 #include "swrast/swrast.h"
43
44 #include "drm.h"
45 #include "radeon_drm.h"
46
47 #include "r600_context.h"
48 #include "radeon_reg.h"
49 #include "r600_cmdbuf.h"
50 #include "r600_emit.h"
51 #include "radeon_bocs_wrapper.h"
52 #include "radeon_mipmap_tree.h"
53 #include "radeon_reg.h"
54
55
56
57 static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm,
58 uint32_t ndw)
59 {
60 struct radeon_cs *cs;
61
62 cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
63 if (cs == NULL) {
64 return NULL;
65 }
66 cs->csm = csm;
67 cs->ndw = (ndw + 0x3FF) & (~0x3FF);
68 cs->packets = (uint32_t*)malloc(4*cs->ndw);
69 if (cs->packets == NULL) {
70 free(cs);
71 return NULL;
72 }
73 cs->relocs_total_size = 0;
74 return cs;
75 }
76
77 int r600_cs_write_reloc(struct radeon_cs *cs,
78 struct radeon_bo *bo,
79 uint32_t read_domain,
80 uint32_t write_domain,
81 uint32_t flags,
82 offset_modifiers* poffset_mod)
83 {
84 struct r600_cs_reloc_legacy *relocs;
85 int i;
86
87 relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
88 /* check domains */
89 if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
90 /* in one CS a bo can only be in read or write domain but not
91 * in read & write domain at the same sime
92 */
93 return -EINVAL;
94 }
95 if (read_domain == RADEON_GEM_DOMAIN_CPU) {
96 return -EINVAL;
97 }
98 if (write_domain == RADEON_GEM_DOMAIN_CPU) {
99 return -EINVAL;
100 }
101 /* check if bo is already referenced */
102 for(i = 0; i < cs->crelocs; i++) {
103 uint32_t *indices;
104 uint32_t *reloc_indices;
105
106 if (relocs[i].base.bo->handle == bo->handle) {
107 /* Check domains must be in read or write. As we check already
108 * checked that in argument one of the read or write domain was
109 * set we only need to check that if previous reloc as the read
110 * domain set then the read_domain should also be set for this
111 * new relocation.
112 */
113 if (relocs[i].base.read_domain && !read_domain) {
114 return -EINVAL;
115 }
116 if (relocs[i].base.write_domain && !write_domain) {
117 return -EINVAL;
118 }
119 relocs[i].base.read_domain |= read_domain;
120 relocs[i].base.write_domain |= write_domain;
121 /* save indice */
122 relocs[i].cindices++;
123 indices = (uint32_t*)realloc(relocs[i].indices,
124 relocs[i].cindices * 4);
125 reloc_indices = (uint32_t*)realloc(relocs[i].reloc_indices,
126 relocs[i].cindices * 4);
127 if ( (indices == NULL) || (reloc_indices == NULL) ) {
128 relocs[i].cindices -= 1;
129 return -ENOMEM;
130 }
131 relocs[i].indices = indices;
132 relocs[i].reloc_indices = reloc_indices;
133 relocs[i].indices[relocs[i].cindices - 1] = cs->cdw - 1;
134 relocs[i].reloc_indices[relocs[i].cindices - 1] = cs->section_cdw;
135 cs->section_ndw += 2;
136 cs->section_cdw += 2;
137
138 relocs[i].offset_mod.shift = poffset_mod->shift;
139 relocs[i].offset_mod.shiftbits = poffset_mod->shiftbits;
140 relocs[i].offset_mod.mask = poffset_mod->mask;
141
142 return 0;
143 }
144 }
145 /* add bo to reloc */
146 relocs = (struct r600_cs_reloc_legacy*)
147 realloc(cs->relocs,
148 sizeof(struct r600_cs_reloc_legacy) * (cs->crelocs + 1));
149 if (relocs == NULL) {
150 return -ENOMEM;
151 }
152 cs->relocs = relocs;
153 relocs[cs->crelocs].base.bo = bo;
154 relocs[cs->crelocs].base.read_domain = read_domain;
155 relocs[cs->crelocs].base.write_domain = write_domain;
156 relocs[cs->crelocs].base.flags = flags;
157 relocs[cs->crelocs].indices = (uint32_t*)malloc(4);
158 relocs[cs->crelocs].reloc_indices = (uint32_t*)malloc(4);
159 if ( (relocs[cs->crelocs].indices == NULL) || (relocs[cs->crelocs].reloc_indices == NULL) )
160 {
161 return -ENOMEM;
162 }
163 relocs[cs->crelocs].offset_mod.shift = poffset_mod->shift;
164 relocs[cs->crelocs].offset_mod.shiftbits = poffset_mod->shiftbits;
165 relocs[cs->crelocs].offset_mod.mask = poffset_mod->mask;
166
167 relocs[cs->crelocs].indices[0] = cs->cdw - 1;
168 relocs[cs->crelocs].reloc_indices[0] = cs->section_cdw;
169 cs->section_ndw += 2;
170 cs->section_cdw += 2;
171 relocs[cs->crelocs].cindices = 1;
172 cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
173 cs->crelocs++;
174
175 radeon_bo_ref(bo);
176
177 return 0;
178 }
179
180 static int r600_cs_begin(struct radeon_cs *cs,
181 uint32_t ndw,
182 const char *file,
183 const char *func,
184 int line)
185 {
186 if (cs->section) {
187 fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
188 cs->section_file, cs->section_func, cs->section_line);
189 fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
190 file, func, line);
191 return -EPIPE;
192 }
193
194 if (cs->cdw + ndw + 32 > cs->ndw) { /* Left 32 DWORD (8 offset+pitch) spare room for reloc indices */
195 uint32_t tmp, *ptr;
196 int num = (ndw > 0x3FF) ? ndw : 0x3FF;
197
198 tmp = (cs->cdw + 1 + num) & (~num);
199 ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
200 if (ptr == NULL) {
201 return -ENOMEM;
202 }
203 cs->packets = ptr;
204 cs->ndw = tmp;
205 }
206
207 cs->section = 1;
208 cs->section_ndw = 0;
209 cs->section_cdw = cs->cdw + ndw; /* start of reloc indices. */
210 cs->section_file = file;
211 cs->section_func = func;
212 cs->section_line = line;
213
214 return 0;
215 }
216
217 static int r600_cs_end(struct radeon_cs *cs,
218 const char *file,
219 const char *func,
220 int line)
221
222 {
223 if (!cs->section) {
224 fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
225 file, func, line);
226 return -EPIPE;
227 }
228 cs->section = 0;
229
230 if ( (cs->section_ndw + cs->cdw) != cs->section_cdw )
231 {
232 fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
233 cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
234 fprintf(stderr, "cs->section_ndw = %d, cs->cdw = %d, cs->section_cdw = %d \n",
235 cs->section_ndw, cs->cdw, cs->section_cdw);
236 fprintf(stderr, "CS section end at (%s,%s,%d)\n",
237 file, func, line);
238 return -EPIPE;
239 }
240
241 cs->cdw = cs->section_cdw;
242 return 0;
243 }
244
245 static int r600_cs_process_relocs(struct radeon_cs *cs,
246 uint32_t * reloc_chunk,
247 uint32_t * length_dw_reloc_chunk)
248 {
249 struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
250 struct r600_cs_reloc_legacy *relocs;
251 int i, j, r;
252
253 uint32_t offset_dw = 0;
254
255 csm = (struct r600_cs_manager_legacy*)cs->csm;
256 relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
257 restart:
258 for (i = 0; i < cs->crelocs; i++)
259 {
260 for (j = 0; j < relocs[i].cindices; j++)
261 {
262 uint32_t soffset, eoffset, asicoffset;
263
264 r = radeon_bo_legacy_validate(relocs[i].base.bo,
265 &soffset, &eoffset);
266 if (r == -EAGAIN)
267 {
268 goto restart;
269 }
270 if (r)
271 {
272 fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
273 relocs[i].base.bo, soffset, eoffset);
274 return r;
275 }
276 asicoffset = soffset;
277 if (asicoffset >= eoffset)
278 {
279 /* radeon_bo_debug(relocs[i].base.bo, 12); */
280 fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
281 relocs[i].base.bo, soffset, eoffset);
282 fprintf(stderr, "above end: %p 0x%08X 0x%08X\n",
283 relocs[i].base.bo,
284 cs->packets[relocs[i].indices[j]],
285 eoffset);
286 exit(0);
287 return -EINVAL;
288 }
289 /* apply offset operator */
290 switch (relocs[i].offset_mod.shift)
291 {
292 case NO_SHIFT:
293 asicoffset = asicoffset & relocs[i].offset_mod.mask;
294 break;
295 case LEFT_SHIFT:
296 asicoffset = (asicoffset << relocs[i].offset_mod.shiftbits) & relocs[i].offset_mod.mask;
297 break;
298 case RIGHT_SHIFT:
299 asicoffset = (asicoffset >> relocs[i].offset_mod.shiftbits) & relocs[i].offset_mod.mask;
300 break;
301 default:
302 break;
303 };
304
305 /* pkt3 nop header in ib chunk */
306 cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000;
307
308 /* reloc index in ib chunk */
309 cs->packets[relocs[i].reloc_indices[j] + 1] = offset_dw;
310
311 /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */
312 reloc_chunk[offset_dw] = asicoffset;
313 reloc_chunk[offset_dw + 3] = 0;
314
315 offset_dw += 4;
316 }
317 }
318
319 *length_dw_reloc_chunk = offset_dw;
320
321 return 0;
322 }
323
324 static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */
325 {
326 struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
327 struct r600_cs_reloc_legacy *relocs;
328 int i;
329
330 relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
331 for (i = 0; i < cs->crelocs; i++) {
332 radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
333 radeon_bo_unref(relocs[i].base.bo);
334 }
335 return 0;
336 }
337
338 static void dump_cmdbuf(struct radeon_cs *cs)
339 {
340 int i;
341 fprintf(stderr,"--start--\n");
342 for (i = 0; i < cs->cdw; i++){
343 fprintf(stderr,"0x%08x\n", cs->packets[i]);
344 }
345 fprintf(stderr,"--end--\n");
346
347 }
348
349 static int r600_cs_emit(struct radeon_cs *cs)
350 {
351 struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
352 struct drm_radeon_cs cs_cmd;
353 struct drm_radeon_cs_chunk cs_chunk[2];
354 drm_radeon_cmd_buffer_t cmd;
355 /* drm_r300_cmd_header_t age; */
356 uint32_t length_dw_reloc_chunk;
357 uint64_t ull;
358 uint64_t chunk_ptrs[2];
359 uint32_t reloc_chunk[128];
360 int r;
361 int retry = 0;
362
363 /* TODO : put chip level things here if need. */
364 /* csm->ctx->vtbl.emit_cs_header(cs, csm->ctx); */
365
366 BATCH_LOCALS(csm->ctx);
367 drm_radeon_getparam_t gp;
368 uint32_t current_scratchx_age;
369
370 gp.param = RADEON_PARAM_LAST_CLEAR;
371 gp.value = (int *)&current_scratchx_age;
372 r = drmCommandWriteRead(cs->csm->fd,
373 DRM_RADEON_GETPARAM,
374 &gp,
375 sizeof(gp));
376 if (r)
377 {
378 fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, r);
379 exit(1);
380 }
381
382 csm->pending_age = 0;
383 csm->pending_count = 1;
384
385 current_scratchx_age++;
386 csm->pending_age = current_scratchx_age;
387
388 BEGIN_BATCH_NO_AUTOSTATE(3);
389 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
390 R600_OUT_BATCH((SCRATCH_REG2 - R600_SET_CONFIG_REG_OFFSET) >> 2);
391 R600_OUT_BATCH(current_scratchx_age);
392 END_BATCH();
393 COMMIT_BATCH();
394
395 //TODO ioctl to get back cs id assigned in drm
396 //csm->pending_age = cs_id_back;
397
398 r = r600_cs_process_relocs(cs, &(reloc_chunk[0]), &length_dw_reloc_chunk);
399 if (r) {
400 return 0;
401 }
402
403 /* raw ib chunk */
404 cs_chunk[0].chunk_id = RADEON_CHUNK_ID_IB;
405 cs_chunk[0].length_dw = cs->cdw;
406 cs_chunk[0].chunk_data = (unsigned long)(cs->packets);
407
408 /* reloc chaunk */
409 cs_chunk[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
410 cs_chunk[1].length_dw = length_dw_reloc_chunk;
411 cs_chunk[1].chunk_data = (unsigned long)&(reloc_chunk[0]);
412
413 chunk_ptrs[0] = (uint64_t)(unsigned long)&(cs_chunk[0]);
414 chunk_ptrs[1] = (uint64_t)(unsigned long)&(cs_chunk[1]);
415
416 cs_cmd.num_chunks = 2;
417 /* cs_cmd.cs_id = 0; */
418 cs_cmd.chunks = (uint64_t)(unsigned long)chunk_ptrs;
419
420 //dump_cmdbuf(cs);
421
422 do
423 {
424 r = drmCommandWriteRead(cs->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd));
425 retry++;
426 } while (r == -EAGAIN && retry < 1000);
427
428 if (r) {
429 return r;
430 }
431
432 r600_cs_set_age(cs);
433
434 cs->csm->read_used = 0;
435 cs->csm->vram_write_used = 0;
436 cs->csm->gart_write_used = 0;
437
438 return 0;
439 }
440
441 static void inline r600_cs_free_reloc(void *relocs_p, int crelocs)
442 {
443 struct r600_cs_reloc_legacy *relocs = relocs_p;
444 int i;
445 if (!relocs_p)
446 return;
447 for (i = 0; i < crelocs; i++)
448 {
449 free(relocs[i].indices);
450 free(relocs[i].reloc_indices);
451 }
452 }
453
454 static int r600_cs_destroy(struct radeon_cs *cs)
455 {
456 r600_cs_free_reloc(cs->relocs, cs->crelocs);
457 free(cs->relocs);
458 free(cs->packets);
459 free(cs);
460 return 0;
461 }
462
463 static int r600_cs_erase(struct radeon_cs *cs)
464 {
465 r600_cs_free_reloc(cs->relocs, cs->crelocs);
466 free(cs->relocs);
467 cs->relocs_total_size = 0;
468 cs->relocs = NULL;
469 cs->crelocs = 0;
470 cs->cdw = 0;
471 cs->section = 0;
472 return 0;
473 }
474
475 static int r600_cs_need_flush(struct radeon_cs *cs)
476 {
477 /* this function used to flush when the BO usage got to
478 * a certain size, now the higher levels handle this better */
479 return 0;
480 }
481
482 static void r600_cs_print(struct radeon_cs *cs, FILE *file)
483 {
484 }
485
486 static struct radeon_cs_funcs r600_cs_funcs = {
487 r600_cs_create,
488 r600_cs_write_reloc,
489 r600_cs_begin,
490 r600_cs_end,
491 r600_cs_emit,
492 r600_cs_destroy,
493 r600_cs_erase,
494 r600_cs_need_flush,
495 r600_cs_print
496 };
497
498 struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx)
499 {
500 struct r600_cs_manager_legacy *csm;
501
502 csm = (struct r600_cs_manager_legacy*)
503 calloc(1, sizeof(struct r600_cs_manager_legacy));
504 if (csm == NULL) {
505 return NULL;
506 }
507 csm->base.funcs = &r600_cs_funcs;
508 csm->base.fd = ctx->dri.fd;
509 csm->ctx = ctx;
510 csm->pending_age = 1;
511 return (struct radeon_cs_manager*)csm;
512 }
513
514 void r600InitCmdBuf(context_t *r600) /* from rcommonInitCmdBuf */
515 {
516 radeonContextPtr rmesa = &r600->radeon;
517
518 GLuint size;
519 /* Initialize command buffer */
520 size = 256 * driQueryOptioni(&rmesa->optionCache,
521 "command_buffer_size");
522 if (size < 2 * rmesa->hw.max_state_size) {
523 size = 2 * rmesa->hw.max_state_size + 65535;
524 }
525 if (size > 64 * 256)
526 size = 64 * 256;
527
528 if (rmesa->radeonScreen->kernel_mm) {
529 int fd = rmesa->radeonScreen->driScreen->fd;
530 rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
531 } else {
532 rmesa->cmdbuf.csm = r600_radeon_cs_manager_legacy_ctor(rmesa);
533 }
534 if (rmesa->cmdbuf.csm == NULL) {
535 /* FIXME: fatal error */
536 return;
537 }
538 rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size);
539 assert(rmesa->cmdbuf.cs != NULL);
540 rmesa->cmdbuf.size = size;
541
542 if (!rmesa->radeonScreen->kernel_mm) {
543 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]);
544 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size);
545 } else {
546 struct drm_radeon_gem_info mminfo;
547
548 if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo)))
549 {
550 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, mminfo.vram_visible);
551 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, mminfo.gart_size);
552 }
553 }
554 }
555