i965: Make intel_bachbuffer_reloc() take a batchbuffer argument
[mesa.git] / src / mesa / drivers / dri / i965 / intel_batchbuffer.c
1 /*
2 * Copyright 2006 VMware, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 #include "intel_batchbuffer.h"
27 #include "intel_buffer_objects.h"
28 #include "intel_bufmgr.h"
29 #include "intel_buffers.h"
30 #include "intel_fbo.h"
31 #include "brw_context.h"
32 #include "brw_defines.h"
33 #include "brw_state.h"
34
35 #include <xf86drm.h>
36 #include <i915_drm.h>
37
38 static void
39 intel_batchbuffer_reset(struct brw_context *brw);
40
41 void
42 intel_batchbuffer_init(struct brw_context *brw)
43 {
44 intel_batchbuffer_reset(brw);
45
46 if (!brw->has_llc) {
47 brw->batch.cpu_map = malloc(BATCH_SZ);
48 brw->batch.map = brw->batch.cpu_map;
49 brw->batch.map_next = brw->batch.cpu_map;
50 }
51 }
52
53 static void
54 intel_batchbuffer_reset(struct brw_context *brw)
55 {
56 if (brw->batch.last_bo != NULL) {
57 drm_intel_bo_unreference(brw->batch.last_bo);
58 brw->batch.last_bo = NULL;
59 }
60 brw->batch.last_bo = brw->batch.bo;
61
62 brw_render_cache_set_clear(brw);
63
64 brw->batch.bo = drm_intel_bo_alloc(brw->bufmgr, "batchbuffer",
65 BATCH_SZ, 4096);
66 if (brw->has_llc) {
67 drm_intel_bo_map(brw->batch.bo, true);
68 brw->batch.map = brw->batch.bo->virtual;
69 }
70 brw->batch.map_next = brw->batch.map;
71
72 brw->batch.reserved_space = BATCH_RESERVED;
73 brw->batch.state_batch_offset = brw->batch.bo->size;
74 brw->batch.needs_sol_reset = false;
75 brw->batch.state_base_address_emitted = false;
76
77 /* We don't know what ring the new batch will be sent to until we see the
78 * first BEGIN_BATCH or BEGIN_BATCH_BLT. Mark it as unknown.
79 */
80 brw->batch.ring = UNKNOWN_RING;
81 }
82
83 void
84 intel_batchbuffer_save_state(struct brw_context *brw)
85 {
86 brw->batch.saved.map_next = brw->batch.map_next;
87 brw->batch.saved.reloc_count =
88 drm_intel_gem_bo_get_reloc_count(brw->batch.bo);
89 }
90
91 void
92 intel_batchbuffer_reset_to_saved(struct brw_context *brw)
93 {
94 drm_intel_gem_bo_clear_relocs(brw->batch.bo, brw->batch.saved.reloc_count);
95
96 brw->batch.map_next = brw->batch.saved.map_next;
97 if (USED_BATCH(brw->batch) == 0)
98 brw->batch.ring = UNKNOWN_RING;
99 }
100
101 void
102 intel_batchbuffer_free(struct brw_context *brw)
103 {
104 free(brw->batch.cpu_map);
105 drm_intel_bo_unreference(brw->batch.last_bo);
106 drm_intel_bo_unreference(brw->batch.bo);
107 }
108
109 void
110 intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
111 enum brw_gpu_ring ring)
112 {
113 /* If we're switching rings, implicitly flush the batch. */
114 if (unlikely(ring != brw->batch.ring) && brw->batch.ring != UNKNOWN_RING &&
115 brw->gen >= 6) {
116 intel_batchbuffer_flush(brw);
117 }
118
119 #ifdef DEBUG
120 assert(sz < BATCH_SZ - BATCH_RESERVED);
121 #endif
122 if (intel_batchbuffer_space(brw) < sz)
123 intel_batchbuffer_flush(brw);
124
125 enum brw_gpu_ring prev_ring = brw->batch.ring;
126 /* The intel_batchbuffer_flush() calls above might have changed
127 * brw->batch.ring to UNKNOWN_RING, so we need to set it here at the end.
128 */
129 brw->batch.ring = ring;
130
131 if (unlikely(prev_ring == UNKNOWN_RING && ring == RENDER_RING))
132 intel_batchbuffer_emit_render_ring_prelude(brw);
133 }
134
135 static void
136 do_batch_dump(struct brw_context *brw)
137 {
138 struct drm_intel_decode *decode;
139 struct intel_batchbuffer *batch = &brw->batch;
140 int ret;
141
142 decode = drm_intel_decode_context_alloc(brw->screen->deviceID);
143 if (!decode)
144 return;
145
146 ret = drm_intel_bo_map(batch->bo, false);
147 if (ret == 0) {
148 drm_intel_decode_set_batch_pointer(decode,
149 batch->bo->virtual,
150 batch->bo->offset64,
151 USED_BATCH(*batch));
152 } else {
153 fprintf(stderr,
154 "WARNING: failed to map batchbuffer (%s), "
155 "dumping uploaded data instead.\n", strerror(ret));
156
157 drm_intel_decode_set_batch_pointer(decode,
158 batch->map,
159 batch->bo->offset64,
160 USED_BATCH(*batch));
161 }
162
163 drm_intel_decode_set_output_file(decode, stderr);
164 drm_intel_decode(decode);
165
166 drm_intel_decode_context_free(decode);
167
168 if (ret == 0) {
169 drm_intel_bo_unmap(batch->bo);
170
171 brw_debug_batch(brw);
172 }
173 }
174
175 void
176 intel_batchbuffer_emit_render_ring_prelude(struct brw_context *brw)
177 {
178 /* Un-used currently */
179 }
180
181 /**
182 * Called when starting a new batch buffer.
183 */
184 static void
185 brw_new_batch(struct brw_context *brw)
186 {
187 /* Create a new batchbuffer and reset the associated state: */
188 drm_intel_gem_bo_clear_relocs(brw->batch.bo, 0);
189 intel_batchbuffer_reset(brw);
190
191 /* If the kernel supports hardware contexts, then most hardware state is
192 * preserved between batches; we only need to re-emit state that is required
193 * to be in every batch. Otherwise we need to re-emit all the state that
194 * would otherwise be stored in the context (which for all intents and
195 * purposes means everything).
196 */
197 if (brw->hw_ctx == NULL)
198 brw->ctx.NewDriverState |= BRW_NEW_CONTEXT;
199
200 brw->ctx.NewDriverState |= BRW_NEW_BATCH;
201
202 brw->state_batch_count = 0;
203
204 brw->ib.type = -1;
205
206 /* We need to periodically reap the shader time results, because rollover
207 * happens every few seconds. We also want to see results every once in a
208 * while, because many programs won't cleanly destroy our context, so the
209 * end-of-run printout may not happen.
210 */
211 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
212 brw_collect_and_report_shader_time(brw);
213 }
214
215 /**
216 * Called from intel_batchbuffer_flush before emitting MI_BATCHBUFFER_END and
217 * sending it off.
218 *
219 * This function can emit state (say, to preserve registers that aren't saved
220 * between batches). All of this state MUST fit in the reserved space at the
221 * end of the batchbuffer. If you add more GPU state, increase the reserved
222 * space by updating the BATCH_RESERVED macro.
223 */
224 static void
225 brw_finish_batch(struct brw_context *brw)
226 {
227 /* Capture the closing pipeline statistics register values necessary to
228 * support query objects (in the non-hardware context world).
229 */
230 brw_emit_query_end(brw);
231
232 if (brw->batch.ring == RENDER_RING) {
233 /* Work around L3 state leaks into contexts set MI_RESTORE_INHIBIT which
234 * assume that the L3 cache is configured according to the hardware
235 * defaults.
236 */
237 if (brw->gen >= 7)
238 gen7_restore_default_l3_config(brw);
239
240 if (brw->is_haswell) {
241 /* From the Haswell PRM, Volume 2b, Command Reference: Instructions,
242 * 3DSTATE_CC_STATE_POINTERS > "Note":
243 *
244 * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every
245 * 3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall."
246 *
247 * From the example in the docs, it seems to expect a regular pipe control
248 * flush here as well. We may have done it already, but meh.
249 *
250 * See also WaAvoidRCZCounterRollover.
251 */
252 brw_emit_mi_flush(brw);
253 BEGIN_BATCH(2);
254 OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
255 OUT_BATCH(brw->cc.state_offset | 1);
256 ADVANCE_BATCH();
257 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH |
258 PIPE_CONTROL_CS_STALL);
259 }
260 }
261
262 /* Mark that the current program cache BO has been used by the GPU.
263 * It will be reallocated if we need to put new programs in for the
264 * next batch.
265 */
266 brw->cache.bo_used_by_gpu = true;
267 }
268
269 static void
270 throttle(struct brw_context *brw)
271 {
272 /* Wait for the swapbuffers before the one we just emitted, so we
273 * don't get too many swaps outstanding for apps that are GPU-heavy
274 * but not CPU-heavy.
275 *
276 * We're using intelDRI2Flush (called from the loader before
277 * swapbuffer) and glFlush (for front buffer rendering) as the
278 * indicator that a frame is done and then throttle when we get
279 * here as we prepare to render the next frame. At this point for
280 * round trips for swap/copy and getting new buffers are done and
281 * we'll spend less time waiting on the GPU.
282 *
283 * Unfortunately, we don't have a handle to the batch containing
284 * the swap, and getting our hands on that doesn't seem worth it,
285 * so we just use the first batch we emitted after the last swap.
286 */
287 if (brw->need_swap_throttle && brw->throttle_batch[0]) {
288 if (brw->throttle_batch[1]) {
289 if (!brw->disable_throttling)
290 drm_intel_bo_wait_rendering(brw->throttle_batch[1]);
291 drm_intel_bo_unreference(brw->throttle_batch[1]);
292 }
293 brw->throttle_batch[1] = brw->throttle_batch[0];
294 brw->throttle_batch[0] = NULL;
295 brw->need_swap_throttle = false;
296 /* Throttling here is more precise than the throttle ioctl, so skip it */
297 brw->need_flush_throttle = false;
298 }
299
300 if (brw->need_flush_throttle) {
301 __DRIscreen *dri_screen = brw->screen->driScrnPriv;
302 drmCommandNone(dri_screen->fd, DRM_I915_GEM_THROTTLE);
303 brw->need_flush_throttle = false;
304 }
305 }
306
307 /* Drop when RS headers get pulled to libdrm */
308 #ifndef I915_EXEC_RESOURCE_STREAMER
309 #define I915_EXEC_RESOURCE_STREAMER (1<<15)
310 #endif
311
312 /* TODO: Push this whole function into bufmgr.
313 */
314 static int
315 do_flush_locked(struct brw_context *brw)
316 {
317 struct intel_batchbuffer *batch = &brw->batch;
318 int ret = 0;
319
320 if (brw->has_llc) {
321 drm_intel_bo_unmap(batch->bo);
322 } else {
323 ret = drm_intel_bo_subdata(batch->bo, 0, 4 * USED_BATCH(*batch), batch->map);
324 if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
325 ret = drm_intel_bo_subdata(batch->bo,
326 batch->state_batch_offset,
327 batch->bo->size - batch->state_batch_offset,
328 (char *)batch->map + batch->state_batch_offset);
329 }
330 }
331
332 if (!brw->screen->no_hw) {
333 int flags;
334
335 if (brw->gen >= 6 && batch->ring == BLT_RING) {
336 flags = I915_EXEC_BLT;
337 } else {
338 flags = I915_EXEC_RENDER |
339 (brw->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0);
340 }
341 if (batch->needs_sol_reset)
342 flags |= I915_EXEC_GEN7_SOL_RESET;
343
344 if (ret == 0) {
345 if (unlikely(INTEL_DEBUG & DEBUG_AUB))
346 brw_annotate_aub(brw);
347
348 if (brw->hw_ctx == NULL || batch->ring != RENDER_RING) {
349 ret = drm_intel_bo_mrb_exec(batch->bo, 4 * USED_BATCH(*batch),
350 NULL, 0, 0, flags);
351 } else {
352 ret = drm_intel_gem_bo_context_exec(batch->bo, brw->hw_ctx,
353 4 * USED_BATCH(*batch), flags);
354 }
355 }
356
357 throttle(brw);
358 }
359
360 if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
361 do_batch_dump(brw);
362
363 if (brw->ctx.Const.ResetStrategy == GL_LOSE_CONTEXT_ON_RESET_ARB)
364 brw_check_for_reset(brw);
365
366 if (ret != 0) {
367 fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret));
368 exit(1);
369 }
370
371 return ret;
372 }
373
374 int
375 _intel_batchbuffer_flush(struct brw_context *brw,
376 const char *file, int line)
377 {
378 int ret;
379
380 if (USED_BATCH(brw->batch) == 0)
381 return 0;
382
383 if (brw->throttle_batch[0] == NULL) {
384 brw->throttle_batch[0] = brw->batch.bo;
385 drm_intel_bo_reference(brw->throttle_batch[0]);
386 }
387
388 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
389 int bytes_for_commands = 4 * USED_BATCH(brw->batch);
390 int bytes_for_state = brw->batch.bo->size - brw->batch.state_batch_offset;
391 int total_bytes = bytes_for_commands + bytes_for_state;
392 fprintf(stderr, "%s:%d: Batchbuffer flush with %4db (pkt) + "
393 "%4db (state) = %4db (%0.1f%%)\n", file, line,
394 bytes_for_commands, bytes_for_state,
395 total_bytes,
396 100.0f * total_bytes / BATCH_SZ);
397 }
398
399 brw->batch.reserved_space = 0;
400
401 brw_finish_batch(brw);
402
403 /* Mark the end of the buffer. */
404 intel_batchbuffer_emit_dword(brw, MI_BATCH_BUFFER_END);
405 if (USED_BATCH(brw->batch) & 1) {
406 /* Round batchbuffer usage to 2 DWORDs. */
407 intel_batchbuffer_emit_dword(brw, MI_NOOP);
408 }
409
410 intel_upload_finish(brw);
411
412 /* Check that we didn't just wrap our batchbuffer at a bad time. */
413 assert(!brw->no_batch_wrap);
414
415 ret = do_flush_locked(brw);
416
417 if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
418 fprintf(stderr, "waiting for idle\n");
419 drm_intel_bo_wait_rendering(brw->batch.bo);
420 }
421
422 if (brw->use_resource_streamer)
423 gen7_reset_hw_bt_pool_offsets(brw);
424
425 /* Start a new batch buffer. */
426 brw_new_batch(brw);
427
428 return ret;
429 }
430
431
432 /* This is the only way buffers get added to the validate list.
433 */
434 uint32_t
435 intel_batchbuffer_reloc(struct intel_batchbuffer *batch,
436 drm_intel_bo *buffer, uint32_t offset,
437 uint32_t read_domains, uint32_t write_domain,
438 uint32_t delta)
439 {
440 int ret;
441
442 ret = drm_intel_bo_emit_reloc(batch->bo, offset,
443 buffer, delta,
444 read_domains, write_domain);
445 assert(ret == 0);
446 (void)ret;
447
448 /* Using the old buffer offset, write in what the right data would be, in
449 * case the buffer doesn't move and we can short-circuit the relocation
450 * processing in the kernel
451 */
452 return buffer->offset64 + delta;
453 }
454
455 uint64_t
456 intel_batchbuffer_reloc64(struct intel_batchbuffer *batch,
457 drm_intel_bo *buffer, uint32_t offset,
458 uint32_t read_domains, uint32_t write_domain,
459 uint32_t delta)
460 {
461 int ret = drm_intel_bo_emit_reloc(batch->bo, offset,
462 buffer, delta,
463 read_domains, write_domain);
464 assert(ret == 0);
465 (void) ret;
466
467 /* Using the old buffer offset, write in what the right data would be, in
468 * case the buffer doesn't move and we can short-circuit the relocation
469 * processing in the kernel
470 */
471 return buffer->offset64 + delta;
472 }
473
474
475 void
476 intel_batchbuffer_data(struct brw_context *brw,
477 const void *data, GLuint bytes, enum brw_gpu_ring ring)
478 {
479 assert((bytes & 3) == 0);
480 intel_batchbuffer_require_space(brw, bytes, ring);
481 memcpy(brw->batch.map_next, data, bytes);
482 brw->batch.map_next += bytes >> 2;
483 }
484
485 static void
486 load_sized_register_mem(struct brw_context *brw,
487 uint32_t reg,
488 drm_intel_bo *bo,
489 uint32_t read_domains, uint32_t write_domain,
490 uint32_t offset,
491 int size)
492 {
493 int i;
494
495 /* MI_LOAD_REGISTER_MEM only exists on Gen7+. */
496 assert(brw->gen >= 7);
497
498 if (brw->gen >= 8) {
499 BEGIN_BATCH(4 * size);
500 for (i = 0; i < size; i++) {
501 OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (4 - 2));
502 OUT_BATCH(reg + i * 4);
503 OUT_RELOC64(bo, read_domains, write_domain, offset + i * 4);
504 }
505 ADVANCE_BATCH();
506 } else {
507 BEGIN_BATCH(3 * size);
508 for (i = 0; i < size; i++) {
509 OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
510 OUT_BATCH(reg + i * 4);
511 OUT_RELOC(bo, read_domains, write_domain, offset + i * 4);
512 }
513 ADVANCE_BATCH();
514 }
515 }
516
517 void
518 brw_load_register_mem(struct brw_context *brw,
519 uint32_t reg,
520 drm_intel_bo *bo,
521 uint32_t read_domains, uint32_t write_domain,
522 uint32_t offset)
523 {
524 load_sized_register_mem(brw, reg, bo, read_domains, write_domain, offset, 1);
525 }
526
527 void
528 brw_load_register_mem64(struct brw_context *brw,
529 uint32_t reg,
530 drm_intel_bo *bo,
531 uint32_t read_domains, uint32_t write_domain,
532 uint32_t offset)
533 {
534 load_sized_register_mem(brw, reg, bo, read_domains, write_domain, offset, 2);
535 }
536
537 /*
538 * Write an arbitrary 32-bit register to a buffer via MI_STORE_REGISTER_MEM.
539 */
540 void
541 brw_store_register_mem32(struct brw_context *brw,
542 drm_intel_bo *bo, uint32_t reg, uint32_t offset)
543 {
544 assert(brw->gen >= 6);
545
546 if (brw->gen >= 8) {
547 BEGIN_BATCH(4);
548 OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2));
549 OUT_BATCH(reg);
550 OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
551 offset);
552 ADVANCE_BATCH();
553 } else {
554 BEGIN_BATCH(3);
555 OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
556 OUT_BATCH(reg);
557 OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
558 offset);
559 ADVANCE_BATCH();
560 }
561 }
562
563 /*
564 * Write an arbitrary 64-bit register to a buffer via MI_STORE_REGISTER_MEM.
565 */
566 void
567 brw_store_register_mem64(struct brw_context *brw,
568 drm_intel_bo *bo, uint32_t reg, uint32_t offset)
569 {
570 assert(brw->gen >= 6);
571
572 /* MI_STORE_REGISTER_MEM only stores a single 32-bit value, so to
573 * read a full 64-bit register, we need to do two of them.
574 */
575 if (brw->gen >= 8) {
576 BEGIN_BATCH(8);
577 OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2));
578 OUT_BATCH(reg);
579 OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
580 offset);
581 OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2));
582 OUT_BATCH(reg + sizeof(uint32_t));
583 OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
584 offset + sizeof(uint32_t));
585 ADVANCE_BATCH();
586 } else {
587 BEGIN_BATCH(6);
588 OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
589 OUT_BATCH(reg);
590 OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
591 offset);
592 OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
593 OUT_BATCH(reg + sizeof(uint32_t));
594 OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
595 offset + sizeof(uint32_t));
596 ADVANCE_BATCH();
597 }
598 }
599
600 /*
601 * Write a 32-bit register using immediate data.
602 */
603 void
604 brw_load_register_imm32(struct brw_context *brw, uint32_t reg, uint32_t imm)
605 {
606 assert(brw->gen >= 6);
607
608 BEGIN_BATCH(3);
609 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
610 OUT_BATCH(reg);
611 OUT_BATCH(imm);
612 ADVANCE_BATCH();
613 }
614
615 /*
616 * Write a 64-bit register using immediate data.
617 */
618 void
619 brw_load_register_imm64(struct brw_context *brw, uint32_t reg, uint64_t imm)
620 {
621 assert(brw->gen >= 6);
622
623 BEGIN_BATCH(5);
624 OUT_BATCH(MI_LOAD_REGISTER_IMM | (5 - 2));
625 OUT_BATCH(reg);
626 OUT_BATCH(imm & 0xffffffff);
627 OUT_BATCH(reg + 4);
628 OUT_BATCH(imm >> 32);
629 ADVANCE_BATCH();
630 }
631
632 /*
633 * Copies a 32-bit register.
634 */
635 void
636 brw_load_register_reg(struct brw_context *brw, uint32_t src, uint32_t dest)
637 {
638 assert(brw->gen >= 8 || brw->is_haswell);
639
640 BEGIN_BATCH(3);
641 OUT_BATCH(MI_LOAD_REGISTER_REG | (3 - 2));
642 OUT_BATCH(src);
643 OUT_BATCH(dest);
644 ADVANCE_BATCH();
645 }
646
647 /*
648 * Copies a 64-bit register.
649 */
650 void
651 brw_load_register_reg64(struct brw_context *brw, uint32_t src, uint32_t dest)
652 {
653 assert(brw->gen >= 8 || brw->is_haswell);
654
655 BEGIN_BATCH(6);
656 OUT_BATCH(MI_LOAD_REGISTER_REG | (3 - 2));
657 OUT_BATCH(src);
658 OUT_BATCH(dest);
659 OUT_BATCH(MI_LOAD_REGISTER_REG | (3 - 2));
660 OUT_BATCH(src + sizeof(uint32_t));
661 OUT_BATCH(dest + sizeof(uint32_t));
662 ADVANCE_BATCH();
663 }
664
665 /*
666 * Write 32-bits of immediate data to a GPU memory buffer.
667 */
668 void
669 brw_store_data_imm32(struct brw_context *brw, drm_intel_bo *bo,
670 uint32_t offset, uint32_t imm)
671 {
672 assert(brw->gen >= 6);
673
674 BEGIN_BATCH(4);
675 OUT_BATCH(MI_STORE_DATA_IMM | (4 - 2));
676 if (brw->gen >= 8)
677 OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
678 offset);
679 else {
680 OUT_BATCH(0); /* MBZ */
681 OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
682 offset);
683 }
684 OUT_BATCH(imm);
685 ADVANCE_BATCH();
686 }
687
688 /*
689 * Write 64-bits of immediate data to a GPU memory buffer.
690 */
691 void
692 brw_store_data_imm64(struct brw_context *brw, drm_intel_bo *bo,
693 uint32_t offset, uint64_t imm)
694 {
695 assert(brw->gen >= 6);
696
697 BEGIN_BATCH(5);
698 OUT_BATCH(MI_STORE_DATA_IMM | (5 - 2));
699 if (brw->gen >= 8)
700 OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
701 offset);
702 else {
703 OUT_BATCH(0); /* MBZ */
704 OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
705 offset);
706 }
707 OUT_BATCH(imm & 0xffffffffu);
708 OUT_BATCH(imm >> 32);
709 ADVANCE_BATCH();
710 }