1 /**************************************************************************
3 * Copyright 2006 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "intel_batchbuffer.h"
29 #include "intel_buffer_objects.h"
30 #include "intel_reg.h"
31 #include "intel_bufmgr.h"
32 #include "intel_buffers.h"
33 #include "intel_fbo.h"
34 #include "brw_context.h"
35 #include "brw_defines.h"
36 #include "brw_state.h"
42 intel_batchbuffer_reset(struct brw_context
*brw
);
45 intel_batchbuffer_init(struct brw_context
*brw
)
47 intel_batchbuffer_reset(brw
);
50 brw
->batch
.cpu_map
= malloc(BATCH_SZ
);
51 brw
->batch
.map
= brw
->batch
.cpu_map
;
52 brw
->batch
.map_next
= brw
->batch
.cpu_map
;
57 intel_batchbuffer_reset(struct brw_context
*brw
)
59 if (brw
->batch
.last_bo
!= NULL
) {
60 drm_intel_bo_unreference(brw
->batch
.last_bo
);
61 brw
->batch
.last_bo
= NULL
;
63 brw
->batch
.last_bo
= brw
->batch
.bo
;
65 brw_render_cache_set_clear(brw
);
67 brw
->batch
.bo
= drm_intel_bo_alloc(brw
->bufmgr
, "batchbuffer",
70 drm_intel_bo_map(brw
->batch
.bo
, true);
71 brw
->batch
.map
= brw
->batch
.bo
->virtual;
73 brw
->batch
.map_next
= brw
->batch
.map
;
75 brw
->batch
.reserved_space
= BATCH_RESERVED
;
76 brw
->batch
.state_batch_offset
= brw
->batch
.bo
->size
;
77 brw
->batch
.needs_sol_reset
= false;
79 /* We don't know what ring the new batch will be sent to until we see the
80 * first BEGIN_BATCH or BEGIN_BATCH_BLT. Mark it as unknown.
82 brw
->batch
.ring
= UNKNOWN_RING
;
86 intel_batchbuffer_save_state(struct brw_context
*brw
)
88 brw
->batch
.saved
.map_next
= brw
->batch
.map_next
;
89 brw
->batch
.saved
.reloc_count
=
90 drm_intel_gem_bo_get_reloc_count(brw
->batch
.bo
);
94 intel_batchbuffer_reset_to_saved(struct brw_context
*brw
)
96 drm_intel_gem_bo_clear_relocs(brw
->batch
.bo
, brw
->batch
.saved
.reloc_count
);
98 brw
->batch
.map_next
= brw
->batch
.saved
.map_next
;
99 if (USED_BATCH(brw
->batch
) == 0)
100 brw
->batch
.ring
= UNKNOWN_RING
;
104 intel_batchbuffer_free(struct brw_context
*brw
)
106 free(brw
->batch
.cpu_map
);
107 drm_intel_bo_unreference(brw
->batch
.last_bo
);
108 drm_intel_bo_unreference(brw
->batch
.bo
);
112 do_batch_dump(struct brw_context
*brw
)
114 struct drm_intel_decode
*decode
;
115 struct intel_batchbuffer
*batch
= &brw
->batch
;
118 decode
= drm_intel_decode_context_alloc(brw
->intelScreen
->deviceID
);
122 ret
= drm_intel_bo_map(batch
->bo
, false);
124 drm_intel_decode_set_batch_pointer(decode
,
130 "WARNING: failed to map batchbuffer (%s), "
131 "dumping uploaded data instead.\n", strerror(ret
));
133 drm_intel_decode_set_batch_pointer(decode
,
139 drm_intel_decode_set_output_file(decode
, stderr
);
140 drm_intel_decode(decode
);
142 drm_intel_decode_context_free(decode
);
145 drm_intel_bo_unmap(batch
->bo
);
147 brw_debug_batch(brw
);
152 intel_batchbuffer_emit_render_ring_prelude(struct brw_context
*brw
)
154 /* We may need to enable and snapshot OA counters. */
155 brw_perf_monitor_new_batch(brw
);
159 * Called when starting a new batch buffer.
162 brw_new_batch(struct brw_context
*brw
)
164 /* Create a new batchbuffer and reset the associated state: */
165 drm_intel_gem_bo_clear_relocs(brw
->batch
.bo
, 0);
166 intel_batchbuffer_reset(brw
);
168 /* If the kernel supports hardware contexts, then most hardware state is
169 * preserved between batches; we only need to re-emit state that is required
170 * to be in every batch. Otherwise we need to re-emit all the state that
171 * would otherwise be stored in the context (which for all intents and
172 * purposes means everything).
174 if (brw
->hw_ctx
== NULL
)
175 brw
->ctx
.NewDriverState
|= BRW_NEW_CONTEXT
;
177 brw
->ctx
.NewDriverState
|= BRW_NEW_BATCH
;
179 brw
->state_batch_count
= 0;
183 /* We need to periodically reap the shader time results, because rollover
184 * happens every few seconds. We also want to see results every once in a
185 * while, because many programs won't cleanly destroy our context, so the
186 * end-of-run printout may not happen.
188 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
)
189 brw_collect_and_report_shader_time(brw
);
191 if (INTEL_DEBUG
& DEBUG_PERFMON
)
192 brw_dump_perf_monitors(brw
);
196 * Called from intel_batchbuffer_flush before emitting MI_BATCHBUFFER_END and
199 * This function can emit state (say, to preserve registers that aren't saved
200 * between batches). All of this state MUST fit in the reserved space at the
201 * end of the batchbuffer. If you add more GPU state, increase the reserved
202 * space by updating the BATCH_RESERVED macro.
205 brw_finish_batch(struct brw_context
*brw
)
207 /* Capture the closing pipeline statistics register values necessary to
208 * support query objects (in the non-hardware context world).
210 brw_emit_query_end(brw
);
212 if (brw
->batch
.ring
== RENDER_RING
) {
213 /* We may also need to snapshot and disable OA counters. */
214 brw_perf_monitor_finish_batch(brw
);
216 if (brw
->is_haswell
) {
217 /* From the Haswell PRM, Volume 2b, Command Reference: Instructions,
218 * 3DSTATE_CC_STATE_POINTERS > "Note":
220 * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every
221 * 3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall."
223 * From the example in the docs, it seems to expect a regular pipe control
224 * flush here as well. We may have done it already, but meh.
226 * See also WaAvoidRCZCounterRollover.
228 brw_emit_mi_flush(brw
);
230 OUT_BATCH(_3DSTATE_CC_STATE_POINTERS
<< 16 | (2 - 2));
231 OUT_BATCH(brw
->cc
.state_offset
| 1);
233 brw_emit_pipe_control_flush(brw
, PIPE_CONTROL_RENDER_TARGET_FLUSH
|
234 PIPE_CONTROL_CS_STALL
);
238 /* Mark that the current program cache BO has been used by the GPU.
239 * It will be reallocated if we need to put new programs in for the
242 brw
->cache
.bo_used_by_gpu
= true;
246 throttle(struct brw_context
*brw
)
248 /* Wait for the swapbuffers before the one we just emitted, so we
249 * don't get too many swaps outstanding for apps that are GPU-heavy
252 * We're using intelDRI2Flush (called from the loader before
253 * swapbuffer) and glFlush (for front buffer rendering) as the
254 * indicator that a frame is done and then throttle when we get
255 * here as we prepare to render the next frame. At this point for
256 * round trips for swap/copy and getting new buffers are done and
257 * we'll spend less time waiting on the GPU.
259 * Unfortunately, we don't have a handle to the batch containing
260 * the swap, and getting our hands on that doesn't seem worth it,
261 * so we just use the first batch we emitted after the last swap.
263 if (brw
->need_swap_throttle
&& brw
->throttle_batch
[0]) {
264 if (brw
->throttle_batch
[1]) {
265 if (!brw
->disable_throttling
)
266 drm_intel_bo_wait_rendering(brw
->throttle_batch
[1]);
267 drm_intel_bo_unreference(brw
->throttle_batch
[1]);
269 brw
->throttle_batch
[1] = brw
->throttle_batch
[0];
270 brw
->throttle_batch
[0] = NULL
;
271 brw
->need_swap_throttle
= false;
272 /* Throttling here is more precise than the throttle ioctl, so skip it */
273 brw
->need_flush_throttle
= false;
276 if (brw
->need_flush_throttle
) {
277 __DRIscreen
*psp
= brw
->intelScreen
->driScrnPriv
;
278 drmCommandNone(psp
->fd
, DRM_I915_GEM_THROTTLE
);
279 brw
->need_flush_throttle
= false;
283 /* Drop when RS headers get pulled to libdrm */
284 #ifndef I915_EXEC_RESOURCE_STREAMER
285 #define I915_EXEC_RESOURCE_STREAMER (1<<15)
288 /* TODO: Push this whole function into bufmgr.
291 do_flush_locked(struct brw_context
*brw
)
293 struct intel_batchbuffer
*batch
= &brw
->batch
;
297 drm_intel_bo_unmap(batch
->bo
);
299 ret
= drm_intel_bo_subdata(batch
->bo
, 0, 4 * USED_BATCH(*batch
), batch
->map
);
300 if (ret
== 0 && batch
->state_batch_offset
!= batch
->bo
->size
) {
301 ret
= drm_intel_bo_subdata(batch
->bo
,
302 batch
->state_batch_offset
,
303 batch
->bo
->size
- batch
->state_batch_offset
,
304 (char *)batch
->map
+ batch
->state_batch_offset
);
308 if (!brw
->intelScreen
->no_hw
) {
311 if (brw
->gen
>= 6 && batch
->ring
== BLT_RING
) {
312 flags
= I915_EXEC_BLT
;
314 flags
= I915_EXEC_RENDER
|
315 (brw
->use_resource_streamer
? I915_EXEC_RESOURCE_STREAMER
: 0);
317 if (batch
->needs_sol_reset
)
318 flags
|= I915_EXEC_GEN7_SOL_RESET
;
321 if (unlikely(INTEL_DEBUG
& DEBUG_AUB
))
322 brw_annotate_aub(brw
);
324 if (brw
->hw_ctx
== NULL
|| batch
->ring
!= RENDER_RING
) {
325 ret
= drm_intel_bo_mrb_exec(batch
->bo
, 4 * USED_BATCH(*batch
),
328 ret
= drm_intel_gem_bo_context_exec(batch
->bo
, brw
->hw_ctx
,
329 4 * USED_BATCH(*batch
), flags
);
336 if (unlikely(INTEL_DEBUG
& DEBUG_BATCH
))
340 fprintf(stderr
, "intel_do_flush_locked failed: %s\n", strerror(-ret
));
348 _intel_batchbuffer_flush(struct brw_context
*brw
,
349 const char *file
, int line
)
353 if (USED_BATCH(brw
->batch
) == 0)
356 if (brw
->throttle_batch
[0] == NULL
) {
357 brw
->throttle_batch
[0] = brw
->batch
.bo
;
358 drm_intel_bo_reference(brw
->throttle_batch
[0]);
361 if (unlikely(INTEL_DEBUG
& DEBUG_BATCH
)) {
362 int bytes_for_commands
= 4 * USED_BATCH(brw
->batch
);
363 int bytes_for_state
= brw
->batch
.bo
->size
- brw
->batch
.state_batch_offset
;
364 int total_bytes
= bytes_for_commands
+ bytes_for_state
;
365 fprintf(stderr
, "%s:%d: Batchbuffer flush with %4db (pkt) + "
366 "%4db (state) = %4db (%0.1f%%)\n", file
, line
,
367 bytes_for_commands
, bytes_for_state
,
369 100.0f
* total_bytes
/ BATCH_SZ
);
372 brw
->batch
.reserved_space
= 0;
374 brw_finish_batch(brw
);
376 /* Mark the end of the buffer. */
377 intel_batchbuffer_emit_dword(brw
, MI_BATCH_BUFFER_END
);
378 if (USED_BATCH(brw
->batch
) & 1) {
379 /* Round batchbuffer usage to 2 DWORDs. */
380 intel_batchbuffer_emit_dword(brw
, MI_NOOP
);
383 intel_upload_finish(brw
);
385 /* Check that we didn't just wrap our batchbuffer at a bad time. */
386 assert(!brw
->no_batch_wrap
);
388 ret
= do_flush_locked(brw
);
390 if (unlikely(INTEL_DEBUG
& DEBUG_SYNC
)) {
391 fprintf(stderr
, "waiting for idle\n");
392 drm_intel_bo_wait_rendering(brw
->batch
.bo
);
395 if (brw
->use_resource_streamer
)
396 gen7_reset_hw_bt_pool_offsets(brw
);
398 /* Start a new batch buffer. */
405 /* This is the only way buffers get added to the validate list.
408 intel_batchbuffer_reloc(struct brw_context
*brw
,
409 drm_intel_bo
*buffer
, uint32_t offset
,
410 uint32_t read_domains
, uint32_t write_domain
,
415 ret
= drm_intel_bo_emit_reloc(brw
->batch
.bo
, offset
,
417 read_domains
, write_domain
);
421 /* Using the old buffer offset, write in what the right data would be, in
422 * case the buffer doesn't move and we can short-circuit the relocation
423 * processing in the kernel
425 return buffer
->offset64
+ delta
;
429 intel_batchbuffer_reloc64(struct brw_context
*brw
,
430 drm_intel_bo
*buffer
, uint32_t offset
,
431 uint32_t read_domains
, uint32_t write_domain
,
434 int ret
= drm_intel_bo_emit_reloc(brw
->batch
.bo
, offset
,
436 read_domains
, write_domain
);
440 /* Using the old buffer offset, write in what the right data would be, in
441 * case the buffer doesn't move and we can short-circuit the relocation
442 * processing in the kernel
444 return buffer
->offset64
+ delta
;
449 intel_batchbuffer_data(struct brw_context
*brw
,
450 const void *data
, GLuint bytes
, enum brw_gpu_ring ring
)
452 assert((bytes
& 3) == 0);
453 intel_batchbuffer_require_space(brw
, bytes
, ring
);
454 memcpy(brw
->batch
.map_next
, data
, bytes
);
455 brw
->batch
.map_next
+= bytes
>> 2;
459 load_sized_register_mem(struct brw_context
*brw
,
462 uint32_t read_domains
, uint32_t write_domain
,
468 /* MI_LOAD_REGISTER_MEM only exists on Gen7+. */
469 assert(brw
->gen
>= 7);
472 BEGIN_BATCH(4 * size
);
473 for (i
= 0; i
< size
; i
++) {
474 OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM
| (4 - 2));
475 OUT_BATCH(reg
+ i
* 4);
476 OUT_RELOC64(bo
, read_domains
, write_domain
, offset
+ i
* 4);
480 BEGIN_BATCH(3 * size
);
481 for (i
= 0; i
< size
; i
++) {
482 OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM
| (3 - 2));
483 OUT_BATCH(reg
+ i
* 4);
484 OUT_RELOC(bo
, read_domains
, write_domain
, offset
+ i
* 4);
491 brw_load_register_mem(struct brw_context
*brw
,
494 uint32_t read_domains
, uint32_t write_domain
,
497 load_sized_register_mem(brw
, reg
, bo
, read_domains
, write_domain
, offset
, 1);
501 brw_load_register_mem64(struct brw_context
*brw
,
504 uint32_t read_domains
, uint32_t write_domain
,
507 load_sized_register_mem(brw
, reg
, bo
, read_domains
, write_domain
, offset
, 2);