gallium/draw: initial code to properly support llvm in the draw module
[mesa.git] / src / mesa / drivers / dri / r600 / r600_cmdbuf.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /**
31 * Mostly coppied from \radeon\radeon_cs_legacy.c
32 */
33
34 #include <errno.h>
35
36 #include "main/glheader.h"
37 #include "main/state.h"
38 #include "main/imports.h"
39 #include "main/macros.h"
40 #include "main/context.h"
41 #include "main/simple_list.h"
42
43 #include "drm.h"
44 #include "radeon_drm.h"
45
46 #include "r600_context.h"
47 #include "radeon_reg.h"
48 #include "r600_cmdbuf.h"
49 #include "r600_emit.h"
50 #include "radeon_bocs_wrapper.h"
51 #include "radeon_reg.h"
52
53 #ifdef HAVE_LIBDRM_RADEON
54 #include "radeon_cs_int.h"
55 #else
56 #include "radeon_cs_int_drm.h"
57 #endif
58
59 struct r600_cs_manager_legacy
60 {
61 struct radeon_cs_manager base;
62 struct radeon_context *ctx;
63 /* hack for scratch stuff */
64 uint32_t pending_age;
65 uint32_t pending_count;
66 };
67
68 struct r600_cs_reloc_legacy {
69 struct radeon_cs_reloc base;
70 uint32_t cindices;
71 uint32_t *indices;
72 uint32_t *reloc_indices;
73 };
74
75 static struct radeon_cs_int *r600_cs_create(struct radeon_cs_manager *csm,
76 uint32_t ndw)
77 {
78 struct radeon_cs_int *csi;
79
80 csi = (struct radeon_cs_int*)calloc(1, sizeof(struct radeon_cs_int));
81 if (csi == NULL) {
82 return NULL;
83 }
84 csi->csm = csm;
85 csi->ndw = (ndw + 0x3FF) & (~0x3FF);
86 csi->packets = (uint32_t*)malloc(4*csi->ndw);
87 if (csi->packets == NULL) {
88 free(csi);
89 return NULL;
90 }
91 csi->relocs_total_size = 0;
92 return csi;
93 }
94
95 static int r600_cs_write_reloc(struct radeon_cs_int *csi,
96 struct radeon_bo *bo,
97 uint32_t read_domain,
98 uint32_t write_domain,
99 uint32_t flags)
100 {
101 struct r600_cs_reloc_legacy *relocs;
102 int i;
103
104 relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
105 /* check domains */
106 if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
107 /* in one CS a bo can only be in read or write domain but not
108 * in read & write domain at the same sime
109 */
110 return -EINVAL;
111 }
112 if (read_domain == RADEON_GEM_DOMAIN_CPU) {
113 return -EINVAL;
114 }
115 if (write_domain == RADEON_GEM_DOMAIN_CPU) {
116 return -EINVAL;
117 }
118 /* check if bo is already referenced */
119 for(i = 0; i < csi->crelocs; i++) {
120 uint32_t *indices;
121 uint32_t *reloc_indices;
122
123 if (relocs[i].base.bo->handle == bo->handle) {
124 /* Check domains must be in read or write. As we check already
125 * checked that in argument one of the read or write domain was
126 * set we only need to check that if previous reloc as the read
127 * domain set then the read_domain should also be set for this
128 * new relocation.
129 */
130 if (relocs[i].base.read_domain && !read_domain) {
131 return -EINVAL;
132 }
133 if (relocs[i].base.write_domain && !write_domain) {
134 return -EINVAL;
135 }
136 relocs[i].base.read_domain |= read_domain;
137 relocs[i].base.write_domain |= write_domain;
138 /* save indice */
139 relocs[i].cindices++;
140 indices = (uint32_t*)realloc(relocs[i].indices,
141 relocs[i].cindices * 4);
142 reloc_indices = (uint32_t*)realloc(relocs[i].reloc_indices,
143 relocs[i].cindices * 4);
144 if ( (indices == NULL) || (reloc_indices == NULL) ) {
145 relocs[i].cindices -= 1;
146 return -ENOMEM;
147 }
148 relocs[i].indices = indices;
149 relocs[i].reloc_indices = reloc_indices;
150 relocs[i].indices[relocs[i].cindices - 1] = csi->cdw;
151 relocs[i].reloc_indices[relocs[i].cindices - 1] = csi->cdw;
152 csi->section_cdw += 2;
153 csi->cdw += 2;
154
155 return 0;
156 }
157 }
158 /* add bo to reloc */
159 relocs = (struct r600_cs_reloc_legacy*)
160 realloc(csi->relocs,
161 sizeof(struct r600_cs_reloc_legacy) * (csi->crelocs + 1));
162 if (relocs == NULL) {
163 return -ENOMEM;
164 }
165 csi->relocs = relocs;
166 relocs[csi->crelocs].base.bo = bo;
167 relocs[csi->crelocs].base.read_domain = read_domain;
168 relocs[csi->crelocs].base.write_domain = write_domain;
169 relocs[csi->crelocs].base.flags = flags;
170 relocs[csi->crelocs].indices = (uint32_t*)malloc(4);
171 relocs[csi->crelocs].reloc_indices = (uint32_t*)malloc(4);
172 if ( (relocs[csi->crelocs].indices == NULL) || (relocs[csi->crelocs].reloc_indices == NULL) )
173 {
174 return -ENOMEM;
175 }
176
177 relocs[csi->crelocs].indices[0] = csi->cdw;
178 relocs[csi->crelocs].reloc_indices[0] = csi->cdw;
179 csi->section_cdw += 2;
180 csi->cdw += 2;
181 relocs[csi->crelocs].cindices = 1;
182 csi->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
183 csi->crelocs++;
184
185 radeon_bo_ref(bo);
186
187 return 0;
188 }
189
190 static int r600_cs_begin(struct radeon_cs_int *csi,
191 uint32_t ndw,
192 const char *file,
193 const char *func,
194 int line)
195 {
196 if (csi->section_ndw) {
197 fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
198 csi->section_file, csi->section_func, csi->section_line);
199 fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
200 file, func, line);
201 return -EPIPE;
202 }
203
204 csi->section_ndw = ndw;
205 csi->section_cdw = 0;
206 csi->section_file = file;
207 csi->section_func = func;
208 csi->section_line = line;
209
210 if (csi->cdw + ndw > csi->ndw) {
211 uint32_t tmp, *ptr;
212 int num = (ndw > 0x400) ? ndw : 0x400;
213
214 tmp = (csi->cdw + num + 0x3FF) & (~0x3FF);
215 ptr = (uint32_t*)realloc(csi->packets, 4 * tmp);
216 if (ptr == NULL) {
217 return -ENOMEM;
218 }
219 csi->packets = ptr;
220 csi->ndw = tmp;
221 }
222
223 return 0;
224 }
225
226 static int r600_cs_end(struct radeon_cs_int *csi,
227 const char *file,
228 const char *func,
229 int line)
230
231 {
232 if (!csi->section_ndw) {
233 fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
234 file, func, line);
235 return -EPIPE;
236 }
237
238 if ( csi->section_ndw != csi->section_cdw ) {
239 fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
240 csi->section_file, csi->section_func, csi->section_line, csi->section_ndw, csi->section_cdw);
241 fprintf(stderr, "csi->section_ndw = %d, csi->cdw = %d, csi->section_cdw = %d \n",
242 csi->section_ndw, csi->cdw, csi->section_cdw);
243 fprintf(stderr, "CS section end at (%s,%s,%d)\n",
244 file, func, line);
245 return -EPIPE;
246 }
247 csi->section_ndw = 0;
248
249 if (csi->cdw > csi->ndw) {
250 fprintf(stderr, "CS section overflow at (%s,%s,%d) cdw %d ndw %d\n",
251 csi->section_file, csi->section_func, csi->section_line,csi->cdw,csi->ndw);
252 fprintf(stderr, "CS section end at (%s,%s,%d)\n",
253 file, func, line);
254 assert(0);
255 }
256
257 return 0;
258 }
259
260 static int r600_cs_process_relocs(struct radeon_cs_int *csi,
261 uint32_t * reloc_chunk,
262 uint32_t * length_dw_reloc_chunk)
263 {
264 struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
265 struct r600_cs_reloc_legacy *relocs;
266 int i, j, r;
267
268 uint32_t offset_dw = 0;
269
270 csm = (struct r600_cs_manager_legacy*)csi->csm;
271 relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
272 restart:
273 for (i = 0; i < csi->crelocs; i++) {
274 uint32_t soffset, eoffset;
275
276 r = radeon_bo_legacy_validate(relocs[i].base.bo,
277 &soffset, &eoffset);
278 if (r == -EAGAIN) {
279 goto restart;
280 }
281 if (r) {
282 fprintf(stderr, "invalid bo(%p) [0x%08X, 0x%08X]\n",
283 relocs[i].base.bo, soffset, eoffset);
284 return r;
285 }
286
287 for (j = 0; j < relocs[i].cindices; j++) {
288 /* pkt3 nop header in ib chunk */
289 csi->packets[relocs[i].reloc_indices[j]] = 0xC0001000;
290 /* reloc index in ib chunk */
291 csi->packets[relocs[i].reloc_indices[j] + 1] = offset_dw;
292 }
293
294 /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */
295 reloc_chunk[offset_dw] = soffset;
296 reloc_chunk[offset_dw + 3] = 0;
297
298 offset_dw += 4;
299 }
300
301 *length_dw_reloc_chunk = offset_dw;
302
303 return 0;
304 }
305
306 static int r600_cs_set_age(struct radeon_cs_int *csi) /* -------------- */
307 {
308 struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
309 struct r600_cs_reloc_legacy *relocs;
310 int i;
311
312 relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
313 for (i = 0; i < csi->crelocs; i++) {
314 radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
315 radeon_bo_unref(relocs[i].base.bo);
316 }
317 return 0;
318 }
319
320 #if 0
321 static void dump_cmdbuf(struct radeon_cs_int *csi)
322 {
323 int i;
324 fprintf(stderr,"--start--\n");
325 for (i = 0; i < csi->cdw; i++){
326 fprintf(stderr,"0x%08x\n", csi->packets[i]);
327 }
328 fprintf(stderr,"--end--\n");
329
330 }
331 #endif
332
333 static int r600_cs_emit(struct radeon_cs_int *csi)
334 {
335 struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
336 struct drm_radeon_cs cs_cmd;
337 struct drm_radeon_cs_chunk cs_chunk[2];
338 uint32_t length_dw_reloc_chunk;
339 uint64_t chunk_ptrs[2];
340 uint32_t *reloc_chunk;
341 int r;
342 int retry = 0;
343
344 /* TODO : put chip level things here if need. */
345 /* csm->ctx->vtbl.emit_cs_header(cs, csm->ctx); */
346
347 csm->pending_count = 1;
348
349 reloc_chunk = (uint32_t*)calloc(1, csi->crelocs * 4 * 4);
350
351 r = r600_cs_process_relocs(csi, reloc_chunk, &length_dw_reloc_chunk);
352 if (r) {
353 free(reloc_chunk);
354 return 0;
355 }
356
357 /* raw ib chunk */
358 cs_chunk[0].chunk_id = RADEON_CHUNK_ID_IB;
359 cs_chunk[0].length_dw = csi->cdw;
360 cs_chunk[0].chunk_data = (unsigned long)(csi->packets);
361
362 /* reloc chaunk */
363 cs_chunk[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
364 cs_chunk[1].length_dw = length_dw_reloc_chunk;
365 cs_chunk[1].chunk_data = (unsigned long)reloc_chunk;
366
367 chunk_ptrs[0] = (uint64_t)(unsigned long)&(cs_chunk[0]);
368 chunk_ptrs[1] = (uint64_t)(unsigned long)&(cs_chunk[1]);
369
370 cs_cmd.num_chunks = 2;
371 /* cs_cmd.cs_id = 0; */
372 cs_cmd.chunks = (uint64_t)(unsigned long)chunk_ptrs;
373
374 //dump_cmdbuf(cs);
375
376 do
377 {
378 r = drmCommandWriteRead(csi->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd));
379 retry++;
380 } while (r == -EAGAIN && retry < 1000);
381
382 if (r) {
383 free(reloc_chunk);
384 return r;
385 }
386
387 csm->pending_age = cs_cmd.cs_id;
388
389 r600_cs_set_age(csi);
390
391 csi->csm->read_used = 0;
392 csi->csm->vram_write_used = 0;
393 csi->csm->gart_write_used = 0;
394
395 free(reloc_chunk);
396
397 return 0;
398 }
399
400 static void inline r600_cs_free_reloc(void *relocs_p, int crelocs)
401 {
402 struct r600_cs_reloc_legacy *relocs = relocs_p;
403 int i;
404 if (!relocs_p)
405 return;
406 for (i = 0; i < crelocs; i++)
407 {
408 free(relocs[i].indices);
409 free(relocs[i].reloc_indices);
410 }
411 }
412
413 static int r600_cs_destroy(struct radeon_cs_int *csi)
414 {
415 r600_cs_free_reloc(csi->relocs, csi->crelocs);
416 free(csi->relocs);
417 free(csi->packets);
418 free(csi);
419 return 0;
420 }
421
422 static int r600_cs_erase(struct radeon_cs_int *csi)
423 {
424 r600_cs_free_reloc(csi->relocs, csi->crelocs);
425 free(csi->relocs);
426 csi->relocs_total_size = 0;
427 csi->relocs = NULL;
428 csi->crelocs = 0;
429 csi->cdw = 0;
430 return 0;
431 }
432
433 static int r600_cs_need_flush(struct radeon_cs_int *csi)
434 {
435 /* this function used to flush when the BO usage got to
436 * a certain size, now the higher levels handle this better */
437 return 0;
438 }
439
440 static void r600_cs_print(struct radeon_cs_int *csi, FILE *file)
441 {
442 }
443
444 static struct radeon_cs_funcs r600_cs_funcs = {
445 r600_cs_create,
446 r600_cs_write_reloc,
447 r600_cs_begin,
448 r600_cs_end,
449 r600_cs_emit,
450 r600_cs_destroy,
451 r600_cs_erase,
452 r600_cs_need_flush,
453 r600_cs_print
454 };
455
456 struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx)
457 {
458 struct r600_cs_manager_legacy *csm;
459
460 csm = (struct r600_cs_manager_legacy*)
461 calloc(1, sizeof(struct r600_cs_manager_legacy));
462 if (csm == NULL) {
463 return NULL;
464 }
465 csm->base.funcs = &r600_cs_funcs;
466 csm->base.fd = ctx->dri.fd;
467 csm->ctx = ctx;
468 csm->pending_age = 1;
469 return (struct radeon_cs_manager*)csm;
470 }
471
472 void r600InitCmdBuf(context_t *r600) /* from rcommonInitCmdBuf */
473 {
474 radeonContextPtr rmesa = &r600->radeon;
475 GLuint size;
476
477 r600InitAtoms(r600);
478
479 /* Initialize command buffer */
480 size = 256 * driQueryOptioni(&rmesa->optionCache,
481 "command_buffer_size");
482 if (size < 2 * rmesa->hw.max_state_size) {
483 size = 2 * rmesa->hw.max_state_size + 65535;
484 }
485 if (size > 64 * 256)
486 size = 64 * 256;
487
488 if (rmesa->radeonScreen->kernel_mm) {
489 int fd = rmesa->radeonScreen->driScreen->fd;
490 rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
491 } else {
492 rmesa->cmdbuf.csm = r600_radeon_cs_manager_legacy_ctor(rmesa);
493 }
494 if (rmesa->cmdbuf.csm == NULL) {
495 /* FIXME: fatal error */
496 return;
497 }
498 rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size);
499 assert(rmesa->cmdbuf.cs != NULL);
500 rmesa->cmdbuf.size = size;
501
502 radeon_cs_space_set_flush(rmesa->cmdbuf.cs,
503 (void (*)(void *))rmesa->glCtx->Driver.Flush, rmesa->glCtx);
504
505 if (!rmesa->radeonScreen->kernel_mm) {
506 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]);
507 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size);
508 } else {
509 struct drm_radeon_gem_info mminfo;
510
511 if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo)))
512 {
513 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, mminfo.vram_visible);
514 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, mminfo.gart_size);
515 }
516 }
517 }
518