Merge branch 'master' of ssh://git.freedesktop.org/git/mesa/mesa into r600_state_predict
[mesa.git] / src / mesa / drivers / dri / r600 / r600_cmdbuf.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /**
31 * Mostly coppied from \radeon\radeon_cs_legacy.c
32 */
33
34 #include <errno.h>
35
36 #include "main/glheader.h"
37 #include "main/state.h"
38 #include "main/imports.h"
39 #include "main/macros.h"
40 #include "main/context.h"
41 #include "main/simple_list.h"
42 #include "swrast/swrast.h"
43
44 #include "drm.h"
45 #include "radeon_drm.h"
46
47 #include "r600_context.h"
48 #include "radeon_reg.h"
49 #include "r600_cmdbuf.h"
50 #include "r600_emit.h"
51 #include "radeon_bocs_wrapper.h"
52 #include "radeon_mipmap_tree.h"
53 #include "radeon_reg.h"
54
55
56
57 static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm,
58 uint32_t ndw)
59 {
60 struct radeon_cs *cs;
61
62 cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
63 if (cs == NULL) {
64 return NULL;
65 }
66 cs->csm = csm;
67 cs->ndw = (ndw + 0x3FF) & (~0x3FF);
68 cs->packets = (uint32_t*)malloc(4*cs->ndw);
69 if (cs->packets == NULL) {
70 free(cs);
71 return NULL;
72 }
73 cs->relocs_total_size = 0;
74 return cs;
75 }
76
77 int r600_cs_write_reloc(struct radeon_cs *cs,
78 struct radeon_bo *bo,
79 uint32_t read_domain,
80 uint32_t write_domain,
81 uint32_t flags)
82 {
83 struct r600_cs_reloc_legacy *relocs;
84 int i;
85
86 relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
87 /* check domains */
88 if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
89 /* in one CS a bo can only be in read or write domain but not
90 * in read & write domain at the same sime
91 */
92 return -EINVAL;
93 }
94 if (read_domain == RADEON_GEM_DOMAIN_CPU) {
95 return -EINVAL;
96 }
97 if (write_domain == RADEON_GEM_DOMAIN_CPU) {
98 return -EINVAL;
99 }
100 /* check if bo is already referenced */
101 for(i = 0; i < cs->crelocs; i++) {
102 uint32_t *indices;
103 uint32_t *reloc_indices;
104
105 if (relocs[i].base.bo->handle == bo->handle) {
106 /* Check domains must be in read or write. As we check already
107 * checked that in argument one of the read or write domain was
108 * set we only need to check that if previous reloc as the read
109 * domain set then the read_domain should also be set for this
110 * new relocation.
111 */
112 if (relocs[i].base.read_domain && !read_domain) {
113 return -EINVAL;
114 }
115 if (relocs[i].base.write_domain && !write_domain) {
116 return -EINVAL;
117 }
118 relocs[i].base.read_domain |= read_domain;
119 relocs[i].base.write_domain |= write_domain;
120 /* save indice */
121 relocs[i].cindices++;
122 indices = (uint32_t*)realloc(relocs[i].indices,
123 relocs[i].cindices * 4);
124 reloc_indices = (uint32_t*)realloc(relocs[i].reloc_indices,
125 relocs[i].cindices * 4);
126 if ( (indices == NULL) || (reloc_indices == NULL) ) {
127 relocs[i].cindices -= 1;
128 return -ENOMEM;
129 }
130 relocs[i].indices = indices;
131 relocs[i].reloc_indices = reloc_indices;
132 relocs[i].indices[relocs[i].cindices - 1] = cs->cdw;
133 relocs[i].reloc_indices[relocs[i].cindices - 1] = cs->cdw;
134 cs->section_cdw += 2;
135 cs->cdw += 2;
136
137 return 0;
138 }
139 }
140 /* add bo to reloc */
141 relocs = (struct r600_cs_reloc_legacy*)
142 realloc(cs->relocs,
143 sizeof(struct r600_cs_reloc_legacy) * (cs->crelocs + 1));
144 if (relocs == NULL) {
145 return -ENOMEM;
146 }
147 cs->relocs = relocs;
148 relocs[cs->crelocs].base.bo = bo;
149 relocs[cs->crelocs].base.read_domain = read_domain;
150 relocs[cs->crelocs].base.write_domain = write_domain;
151 relocs[cs->crelocs].base.flags = flags;
152 relocs[cs->crelocs].indices = (uint32_t*)malloc(4);
153 relocs[cs->crelocs].reloc_indices = (uint32_t*)malloc(4);
154 if ( (relocs[cs->crelocs].indices == NULL) || (relocs[cs->crelocs].reloc_indices == NULL) )
155 {
156 return -ENOMEM;
157 }
158
159 relocs[cs->crelocs].indices[0] = cs->cdw;
160 relocs[cs->crelocs].reloc_indices[0] = cs->cdw;
161 cs->section_cdw += 2;
162 cs->cdw += 2;
163 relocs[cs->crelocs].cindices = 1;
164 cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
165 cs->crelocs++;
166
167 radeon_bo_ref(bo);
168
169 return 0;
170 }
171
172 static int r600_cs_begin(struct radeon_cs *cs,
173 uint32_t ndw,
174 const char *file,
175 const char *func,
176 int line)
177 {
178 if (cs->section) {
179 fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
180 cs->section_file, cs->section_func, cs->section_line);
181 fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
182 file, func, line);
183 return -EPIPE;
184 }
185
186 cs->section = 1;
187 cs->section_ndw = ndw;
188 cs->section_cdw = 0;
189 cs->section_file = file;
190 cs->section_func = func;
191 cs->section_line = line;
192
193 if (cs->cdw + ndw > cs->ndw) {
194 uint32_t tmp, *ptr;
195 int num = (ndw > 0x3FF) ? ndw : 0x3FF;
196
197 tmp = (cs->cdw + 1 + num) & (~num);
198 ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
199 if (ptr == NULL) {
200 return -ENOMEM;
201 }
202 cs->packets = ptr;
203 cs->ndw = tmp;
204 }
205
206 return 0;
207 }
208
209 static int r600_cs_end(struct radeon_cs *cs,
210 const char *file,
211 const char *func,
212 int line)
213
214 {
215 if (!cs->section) {
216 fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
217 file, func, line);
218 return -EPIPE;
219 }
220 cs->section = 0;
221
222 if ( cs->section_ndw != cs->section_cdw ) {
223 fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
224 cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
225 fprintf(stderr, "cs->section_ndw = %d, cs->cdw = %d, cs->section_cdw = %d \n",
226 cs->section_ndw, cs->cdw, cs->section_cdw);
227 fprintf(stderr, "CS section end at (%s,%s,%d)\n",
228 file, func, line);
229 return -EPIPE;
230 }
231
232 return 0;
233 }
234
235 static int r600_cs_process_relocs(struct radeon_cs *cs,
236 uint32_t * reloc_chunk,
237 uint32_t * length_dw_reloc_chunk)
238 {
239 struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
240 struct r600_cs_reloc_legacy *relocs;
241 int i, j, r;
242
243 uint32_t offset_dw = 0;
244
245 csm = (struct r600_cs_manager_legacy*)cs->csm;
246 relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
247 restart:
248 for (i = 0; i < cs->crelocs; i++) {
249 uint32_t soffset, eoffset, asicoffset;
250
251 r = radeon_bo_legacy_validate(relocs[i].base.bo,
252 &soffset, &eoffset);
253 if (r == -EAGAIN) {
254 goto restart;
255 }
256 if (r) {
257 fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
258 relocs[i].base.bo, soffset, eoffset);
259 return r;
260 }
261 asicoffset = soffset;
262
263 for (j = 0; j < relocs[i].cindices; j++) {
264 if (asicoffset >= eoffset) {
265 /* radeon_bo_debug(relocs[i].base.bo, 12); */
266 fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
267 relocs[i].base.bo, soffset, eoffset);
268 fprintf(stderr, "above end: %p 0x%08X 0x%08X\n",
269 relocs[i].base.bo,
270 cs->packets[relocs[i].indices[j]],
271 eoffset);
272 exit(0);
273 return -EINVAL;
274 }
275 /* pkt3 nop header in ib chunk */
276 cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000;
277 /* reloc index in ib chunk */
278 cs->packets[relocs[i].reloc_indices[j] + 1] = offset_dw;
279 }
280
281 /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */
282 reloc_chunk[offset_dw] = asicoffset;
283 reloc_chunk[offset_dw + 3] = 0;
284
285 offset_dw += 4;
286 }
287
288 *length_dw_reloc_chunk = offset_dw;
289
290 return 0;
291 }
292
293 static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */
294 {
295 struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
296 struct r600_cs_reloc_legacy *relocs;
297 int i;
298
299 relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
300 for (i = 0; i < cs->crelocs; i++) {
301 radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
302 radeon_bo_unref(relocs[i].base.bo);
303 }
304 return 0;
305 }
306
307 static void dump_cmdbuf(struct radeon_cs *cs)
308 {
309 int i;
310 fprintf(stderr,"--start--\n");
311 for (i = 0; i < cs->cdw; i++){
312 fprintf(stderr,"0x%08x\n", cs->packets[i]);
313 }
314 fprintf(stderr,"--end--\n");
315
316 }
317
318 static int r600_cs_emit(struct radeon_cs *cs)
319 {
320 struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
321 struct drm_radeon_cs cs_cmd;
322 struct drm_radeon_cs_chunk cs_chunk[2];
323 uint32_t length_dw_reloc_chunk;
324 uint64_t chunk_ptrs[2];
325 uint32_t reloc_chunk[256];
326 int r;
327 int retry = 0;
328
329 /* TODO : put chip level things here if need. */
330 /* csm->ctx->vtbl.emit_cs_header(cs, csm->ctx); */
331
332 csm->pending_count = 1;
333
334 r = r600_cs_process_relocs(cs, &(reloc_chunk[0]), &length_dw_reloc_chunk);
335 if (r) {
336 return 0;
337 }
338
339 /* raw ib chunk */
340 cs_chunk[0].chunk_id = RADEON_CHUNK_ID_IB;
341 cs_chunk[0].length_dw = cs->cdw;
342 cs_chunk[0].chunk_data = (unsigned long)(cs->packets);
343
344 /* reloc chaunk */
345 cs_chunk[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
346 cs_chunk[1].length_dw = length_dw_reloc_chunk;
347 cs_chunk[1].chunk_data = (unsigned long)&(reloc_chunk[0]);
348
349 chunk_ptrs[0] = (uint64_t)(unsigned long)&(cs_chunk[0]);
350 chunk_ptrs[1] = (uint64_t)(unsigned long)&(cs_chunk[1]);
351
352 cs_cmd.num_chunks = 2;
353 /* cs_cmd.cs_id = 0; */
354 cs_cmd.chunks = (uint64_t)(unsigned long)chunk_ptrs;
355
356 //dump_cmdbuf(cs);
357
358 do
359 {
360 r = drmCommandWriteRead(cs->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd));
361 retry++;
362 } while (r == -EAGAIN && retry < 1000);
363
364 if (r) {
365 return r;
366 }
367
368 csm->pending_age = cs_cmd.cs_id;
369
370 r600_cs_set_age(cs);
371
372 cs->csm->read_used = 0;
373 cs->csm->vram_write_used = 0;
374 cs->csm->gart_write_used = 0;
375
376 return 0;
377 }
378
379 static void inline r600_cs_free_reloc(void *relocs_p, int crelocs)
380 {
381 struct r600_cs_reloc_legacy *relocs = relocs_p;
382 int i;
383 if (!relocs_p)
384 return;
385 for (i = 0; i < crelocs; i++)
386 {
387 free(relocs[i].indices);
388 free(relocs[i].reloc_indices);
389 }
390 }
391
392 static int r600_cs_destroy(struct radeon_cs *cs)
393 {
394 r600_cs_free_reloc(cs->relocs, cs->crelocs);
395 free(cs->relocs);
396 free(cs->packets);
397 free(cs);
398 return 0;
399 }
400
401 static int r600_cs_erase(struct radeon_cs *cs)
402 {
403 r600_cs_free_reloc(cs->relocs, cs->crelocs);
404 free(cs->relocs);
405 cs->relocs_total_size = 0;
406 cs->relocs = NULL;
407 cs->crelocs = 0;
408 cs->cdw = 0;
409 cs->section = 0;
410 return 0;
411 }
412
413 static int r600_cs_need_flush(struct radeon_cs *cs)
414 {
415 /* this function used to flush when the BO usage got to
416 * a certain size, now the higher levels handle this better */
417 return 0;
418 }
419
420 static void r600_cs_print(struct radeon_cs *cs, FILE *file)
421 {
422 }
423
424 static struct radeon_cs_funcs r600_cs_funcs = {
425 r600_cs_create,
426 r600_cs_write_reloc,
427 r600_cs_begin,
428 r600_cs_end,
429 r600_cs_emit,
430 r600_cs_destroy,
431 r600_cs_erase,
432 r600_cs_need_flush,
433 r600_cs_print
434 };
435
436 struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx)
437 {
438 struct r600_cs_manager_legacy *csm;
439
440 csm = (struct r600_cs_manager_legacy*)
441 calloc(1, sizeof(struct r600_cs_manager_legacy));
442 if (csm == NULL) {
443 return NULL;
444 }
445 csm->base.funcs = &r600_cs_funcs;
446 csm->base.fd = ctx->dri.fd;
447 csm->ctx = ctx;
448 csm->pending_age = 1;
449 return (struct radeon_cs_manager*)csm;
450 }
451
452 void r600InitCmdBuf(context_t *r600) /* from rcommonInitCmdBuf */
453 {
454 radeonContextPtr rmesa = &r600->radeon;
455 GLuint size;
456
457 r600InitAtoms(r600);
458
459 /* Initialize command buffer */
460 size = 256 * driQueryOptioni(&rmesa->optionCache,
461 "command_buffer_size");
462 if (size < 2 * rmesa->hw.max_state_size) {
463 size = 2 * rmesa->hw.max_state_size + 65535;
464 }
465 if (size > 64 * 256)
466 size = 64 * 256;
467
468 if (rmesa->radeonScreen->kernel_mm) {
469 int fd = rmesa->radeonScreen->driScreen->fd;
470 rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
471 } else {
472 rmesa->cmdbuf.csm = r600_radeon_cs_manager_legacy_ctor(rmesa);
473 }
474 if (rmesa->cmdbuf.csm == NULL) {
475 /* FIXME: fatal error */
476 return;
477 }
478 rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size);
479 assert(rmesa->cmdbuf.cs != NULL);
480 rmesa->cmdbuf.size = size;
481
482 if (!rmesa->radeonScreen->kernel_mm) {
483 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]);
484 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size);
485 } else {
486 struct drm_radeon_gem_info mminfo;
487
488 if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo)))
489 {
490 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, mminfo.vram_visible);
491 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, mminfo.gart_size);
492 }
493 }
494 }
495