i965: Base HW depth format setup based on MESA_FORMAT, not bpp.
[mesa.git] / src / mesa / drivers / dri / intel / intel_batchbuffer.c
1 /**************************************************************************
2 *
3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "intel_context.h"
29 #include "intel_batchbuffer.h"
30 #include "intel_buffer_objects.h"
31 #include "intel_decode.h"
32 #include "intel_reg.h"
33 #include "intel_bufmgr.h"
34 #include "intel_buffers.h"
35
36 struct cached_batch_item {
37 struct cached_batch_item *next;
38 uint16_t header;
39 uint16_t size;
40 };
41
42 static void clear_cache( struct intel_context *intel )
43 {
44 struct cached_batch_item *item = intel->batch.cached_items;
45
46 while (item) {
47 struct cached_batch_item *next = item->next;
48 free(item);
49 item = next;
50 }
51
52 intel->batch.cached_items = NULL;
53 }
54
55 void
56 intel_batchbuffer_init(struct intel_context *intel)
57 {
58 intel_batchbuffer_reset(intel);
59
60 if (intel->gen == 6) {
61 /* We can't just use brw_state_batch to get a chunk of space for
62 * the gen6 workaround because it involves actually writing to
63 * the buffer, and the kernel doesn't let us write to the batch.
64 */
65 intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr,
66 "gen6 workaround",
67 4096, 4096);
68 }
69 }
70
71 void
72 intel_batchbuffer_reset(struct intel_context *intel)
73 {
74 if (intel->batch.last_bo != NULL) {
75 drm_intel_bo_unreference(intel->batch.last_bo);
76 intel->batch.last_bo = NULL;
77 }
78 intel->batch.last_bo = intel->batch.bo;
79
80 clear_cache(intel);
81
82 intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer",
83 intel->maxBatchSize, 4096);
84
85 intel->batch.reserved_space = BATCH_RESERVED;
86 intel->batch.state_batch_offset = intel->batch.bo->size;
87 intel->batch.used = 0;
88 }
89
90 void
91 intel_batchbuffer_save_state(struct intel_context *intel)
92 {
93 intel->batch.saved.used = intel->batch.used;
94 intel->batch.saved.reloc_count =
95 drm_intel_gem_bo_get_reloc_count(intel->batch.bo);
96 }
97
98 void
99 intel_batchbuffer_reset_to_saved(struct intel_context *intel)
100 {
101 drm_intel_gem_bo_clear_relocs(intel->batch.bo, intel->batch.saved.reloc_count);
102
103 intel->batch.used = intel->batch.saved.used;
104
105 /* Cached batch state is dead, since we just cleared some unknown part of the
106 * batchbuffer. Assume that the caller resets any other state necessary.
107 */
108 clear_cache(intel);
109 }
110
111 void
112 intel_batchbuffer_free(struct intel_context *intel)
113 {
114 drm_intel_bo_unreference(intel->batch.last_bo);
115 drm_intel_bo_unreference(intel->batch.bo);
116 drm_intel_bo_unreference(intel->batch.workaround_bo);
117 clear_cache(intel);
118 }
119
120
121 /* TODO: Push this whole function into bufmgr.
122 */
123 static int
124 do_flush_locked(struct intel_context *intel)
125 {
126 struct intel_batchbuffer *batch = &intel->batch;
127 int ret = 0;
128
129 ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
130 if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
131 ret = drm_intel_bo_subdata(batch->bo,
132 batch->state_batch_offset,
133 batch->bo->size - batch->state_batch_offset,
134 (char *)batch->map + batch->state_batch_offset);
135 }
136
137 if (!intel->intelScreen->no_hw) {
138 int ring;
139
140 if (intel->gen < 6 || !batch->is_blit) {
141 ring = I915_EXEC_RENDER;
142 } else {
143 ring = I915_EXEC_BLT;
144 }
145
146 if (ret == 0)
147 ret = drm_intel_bo_mrb_exec(batch->bo, 4*batch->used, NULL, 0, 0, ring);
148 }
149
150 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
151 drm_intel_bo_map(batch->bo, false);
152 intel_decode(batch->bo->virtual, batch->used,
153 batch->bo->offset,
154 intel->intelScreen->deviceID, true);
155 drm_intel_bo_unmap(batch->bo);
156
157 if (intel->vtbl.debug_batch != NULL)
158 intel->vtbl.debug_batch(intel);
159 }
160
161 if (ret != 0) {
162 fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret));
163 exit(1);
164 }
165 intel->vtbl.new_batch(intel);
166
167 return ret;
168 }
169
170 int
171 _intel_batchbuffer_flush(struct intel_context *intel,
172 const char *file, int line)
173 {
174 int ret;
175
176 /* No batch should be emitted that uses a mapped region, because that would
177 * cause the map to be incoherent with GPU rendering done by the
178 * batchbuffer. To ensure that condition, we assert a condition that is
179 * stronger but easier to implement: that *no* region is mapped.
180 */
181 assert(intel->num_mapped_regions == 0);
182
183 if (intel->batch.used == 0)
184 return 0;
185
186 if (intel->first_post_swapbuffers_batch == NULL) {
187 intel->first_post_swapbuffers_batch = intel->batch.bo;
188 drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
189 }
190
191 if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
192 fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
193 4*intel->batch.used);
194
195 intel->batch.reserved_space = 0;
196
197 /* Mark the end of the buffer. */
198 intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END);
199 if (intel->batch.used & 1) {
200 /* Round batchbuffer usage to 2 DWORDs. */
201 intel_batchbuffer_emit_dword(intel, MI_NOOP);
202 }
203
204 if (intel->vtbl.finish_batch)
205 intel->vtbl.finish_batch(intel);
206
207 intel_upload_finish(intel);
208
209 /* Check that we didn't just wrap our batchbuffer at a bad time. */
210 assert(!intel->no_batch_wrap);
211
212 ret = do_flush_locked(intel);
213
214 if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
215 fprintf(stderr, "waiting for idle\n");
216 drm_intel_bo_wait_rendering(intel->batch.bo);
217 }
218
219 /* Reset the buffer:
220 */
221 intel_batchbuffer_reset(intel);
222
223 return ret;
224 }
225
226
227 /* This is the only way buffers get added to the validate list.
228 */
229 bool
230 intel_batchbuffer_emit_reloc(struct intel_context *intel,
231 drm_intel_bo *buffer,
232 uint32_t read_domains, uint32_t write_domain,
233 uint32_t delta)
234 {
235 int ret;
236
237 ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used,
238 buffer, delta,
239 read_domains, write_domain);
240 assert(ret == 0);
241 (void)ret;
242
243 /*
244 * Using the old buffer offset, write in what the right data would be, in case
245 * the buffer doesn't move and we can short-circuit the relocation processing
246 * in the kernel
247 */
248 intel_batchbuffer_emit_dword(intel, buffer->offset + delta);
249
250 return true;
251 }
252
253 bool
254 intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel,
255 drm_intel_bo *buffer,
256 uint32_t read_domains,
257 uint32_t write_domain,
258 uint32_t delta)
259 {
260 int ret;
261
262 ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used,
263 buffer, delta,
264 read_domains, write_domain);
265 assert(ret == 0);
266 (void)ret;
267
268 /*
269 * Using the old buffer offset, write in what the right data would
270 * be, in case the buffer doesn't move and we can short-circuit the
271 * relocation processing in the kernel
272 */
273 intel_batchbuffer_emit_dword(intel, buffer->offset + delta);
274
275 return true;
276 }
277
278 void
279 intel_batchbuffer_data(struct intel_context *intel,
280 const void *data, GLuint bytes, bool is_blit)
281 {
282 assert((bytes & 3) == 0);
283 intel_batchbuffer_require_space(intel, bytes, is_blit);
284 __memcpy(intel->batch.map + intel->batch.used, data, bytes);
285 intel->batch.used += bytes >> 2;
286 }
287
288 void
289 intel_batchbuffer_cached_advance(struct intel_context *intel)
290 {
291 struct cached_batch_item **prev = &intel->batch.cached_items, *item;
292 uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t);
293 uint32_t *start = intel->batch.map + intel->batch.emit;
294 uint16_t op = *start >> 16;
295
296 while (*prev) {
297 uint32_t *old;
298
299 item = *prev;
300 old = intel->batch.map + item->header;
301 if (op == *old >> 16) {
302 if (item->size == sz && memcmp(old, start, sz) == 0) {
303 if (prev != &intel->batch.cached_items) {
304 *prev = item->next;
305 item->next = intel->batch.cached_items;
306 intel->batch.cached_items = item;
307 }
308 intel->batch.used = intel->batch.emit;
309 return;
310 }
311
312 goto emit;
313 }
314 prev = &item->next;
315 }
316
317 item = malloc(sizeof(struct cached_batch_item));
318 if (item == NULL)
319 return;
320
321 item->next = intel->batch.cached_items;
322 intel->batch.cached_items = item;
323
324 emit:
325 item->size = sz;
326 item->header = intel->batch.emit;
327 }
328
329 /**
330 * Restriction [DevSNB, DevIVB]:
331 *
332 * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
333 * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
334 * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
335 * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
336 * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
337 * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
338 * unless SW can otherwise guarantee that the pipeline from WM onwards is
339 * already flushed (e.g., via a preceding MI_FLUSH).
340 */
341 void
342 intel_emit_depth_stall_flushes(struct intel_context *intel)
343 {
344 assert(intel->gen >= 6 && intel->gen <= 7);
345
346 BEGIN_BATCH(4);
347 OUT_BATCH(_3DSTATE_PIPE_CONTROL);
348 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);
349 OUT_BATCH(0); /* address */
350 OUT_BATCH(0); /* write data */
351 ADVANCE_BATCH()
352
353 BEGIN_BATCH(4);
354 OUT_BATCH(_3DSTATE_PIPE_CONTROL);
355 OUT_BATCH(PIPE_CONTROL_DEPTH_CACHE_FLUSH);
356 OUT_BATCH(0); /* address */
357 OUT_BATCH(0); /* write data */
358 ADVANCE_BATCH();
359
360 BEGIN_BATCH(4);
361 OUT_BATCH(_3DSTATE_PIPE_CONTROL);
362 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);
363 OUT_BATCH(0); /* address */
364 OUT_BATCH(0); /* write data */
365 ADVANCE_BATCH();
366 }
367
368 /**
369 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
370 * implementing two workarounds on gen6. From section 1.4.7.1
371 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
372 *
373 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
374 * produced by non-pipelined state commands), software needs to first
375 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
376 * 0.
377 *
378 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
379 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
380 *
381 * And the workaround for these two requires this workaround first:
382 *
383 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
384 * BEFORE the pipe-control with a post-sync op and no write-cache
385 * flushes.
386 *
387 * And this last workaround is tricky because of the requirements on
388 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
389 * volume 2 part 1:
390 *
391 * "1 of the following must also be set:
392 * - Render Target Cache Flush Enable ([12] of DW1)
393 * - Depth Cache Flush Enable ([0] of DW1)
394 * - Stall at Pixel Scoreboard ([1] of DW1)
395 * - Depth Stall ([13] of DW1)
396 * - Post-Sync Operation ([13] of DW1)
397 * - Notify Enable ([8] of DW1)"
398 *
399 * The cache flushes require the workaround flush that triggered this
400 * one, so we can't use it. Depth stall would trigger the same.
401 * Post-sync nonzero is what triggered this second workaround, so we
402 * can't use that one either. Notify enable is IRQs, which aren't
403 * really our business. That leaves only stall at scoreboard.
404 */
405 void
406 intel_emit_post_sync_nonzero_flush(struct intel_context *intel)
407 {
408 if (!intel->batch.need_workaround_flush)
409 return;
410
411 BEGIN_BATCH(4);
412 OUT_BATCH(_3DSTATE_PIPE_CONTROL);
413 OUT_BATCH(PIPE_CONTROL_CS_STALL |
414 PIPE_CONTROL_STALL_AT_SCOREBOARD);
415 OUT_BATCH(0); /* address */
416 OUT_BATCH(0); /* write data */
417 ADVANCE_BATCH();
418
419 BEGIN_BATCH(4);
420 OUT_BATCH(_3DSTATE_PIPE_CONTROL);
421 OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
422 OUT_RELOC(intel->batch.workaround_bo,
423 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
424 OUT_BATCH(0); /* write data */
425 ADVANCE_BATCH();
426
427 intel->batch.need_workaround_flush = false;
428 }
429
430 /* Emit a pipelined flush to either flush render and texture cache for
431 * reading from a FBO-drawn texture, or flush so that frontbuffer
432 * render appears on the screen in DRI1.
433 *
434 * This is also used for the always_flush_cache driconf debug option.
435 */
436 void
437 intel_batchbuffer_emit_mi_flush(struct intel_context *intel)
438 {
439 if (intel->gen >= 6) {
440 if (intel->batch.is_blit) {
441 BEGIN_BATCH_BLT(4);
442 OUT_BATCH(MI_FLUSH_DW);
443 OUT_BATCH(0);
444 OUT_BATCH(0);
445 OUT_BATCH(0);
446 ADVANCE_BATCH();
447 } else {
448 if (intel->gen == 6) {
449 /* Hardware workaround: SNB B-Spec says:
450 *
451 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache
452 * Flush Enable =1, a PIPE_CONTROL with any non-zero
453 * post-sync-op is required.
454 */
455 intel_emit_post_sync_nonzero_flush(intel);
456 }
457
458 BEGIN_BATCH(4);
459 OUT_BATCH(_3DSTATE_PIPE_CONTROL);
460 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
461 PIPE_CONTROL_WRITE_FLUSH |
462 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
463 PIPE_CONTROL_TC_FLUSH |
464 PIPE_CONTROL_NO_WRITE);
465 OUT_BATCH(0); /* write address */
466 OUT_BATCH(0); /* write data */
467 ADVANCE_BATCH();
468 }
469 } else if (intel->gen >= 4) {
470 BEGIN_BATCH(4);
471 OUT_BATCH(_3DSTATE_PIPE_CONTROL |
472 PIPE_CONTROL_WRITE_FLUSH |
473 PIPE_CONTROL_NO_WRITE);
474 OUT_BATCH(0); /* write address */
475 OUT_BATCH(0); /* write data */
476 OUT_BATCH(0); /* write data */
477 ADVANCE_BATCH();
478 } else {
479 BEGIN_BATCH(1);
480 OUT_BATCH(MI_FLUSH);
481 ADVANCE_BATCH();
482 }
483 }