X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fintel_batchbuffer.h;h=91720dad5b4674eb4d92ba95467aa8de15ad745e;hb=eb3047c094abfa03e071453d7c373e9c2c574370;hp=779a7ccd05c023427945435aaa6204c4c9f59b26;hpb=f5dd608db2d6a67cfe27efed948408414a057fe3;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index 779a7ccd05c..91720dad5b4 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -4,149 +4,96 @@ #include "main/mtypes.h" #include "brw_context.h" -#include "intel_bufmgr.h" -#include "intel_reg.h" +#include "brw_bufmgr.h" #ifdef __cplusplus extern "C" { #endif -/** - * Number of bytes to reserve for commands necessary to complete a batch. - * - * This includes: - * - MI_BATCHBUFFER_END (4 bytes) - * - Optional MI_NOOP for ensuring the batch length is qword aligned (4 bytes) - * - Any state emitted by vtbl->finish_batch(): - * - Gen4-5 record ending occlusion query values (4 * 4 = 16 bytes) - * - Disabling OA counters on Gen6+ (3 DWords = 12 bytes) - * - Ending MI_REPORT_PERF_COUNT on Gen5+, plus associated PIPE_CONTROLs: - * - Two sets of PIPE_CONTROLs, which become 3 PIPE_CONTROLs each on SNB, - * which are 4 DWords each ==> 2 * 3 * 4 * 4 = 96 bytes - * - 3 DWords for MI_REPORT_PERF_COUNT itself on Gen6+. ==> 12 bytes. - * On Ironlake, it's 6 DWords, but we have some slack due to the lack of - * Sandybridge PIPE_CONTROL madness. +/* The kernel assumes batchbuffers are smaller than 256kB. */ +#define MAX_BATCH_SIZE (256 * 1024) + +/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base + * Address, which means that we can't put binding tables beyond 64kB. This + * effectively limits the maximum statebuffer size to 64kB. */ -#define BATCH_RESERVED 146 +#define MAX_STATE_SIZE (64 * 1024) struct intel_batchbuffer; -void intel_batchbuffer_emit_render_ring_prelude(struct brw_context *brw); void intel_batchbuffer_init(struct brw_context *brw); -void intel_batchbuffer_free(struct brw_context *brw); +void intel_batchbuffer_free(struct intel_batchbuffer *batch); void intel_batchbuffer_save_state(struct brw_context *brw); +bool intel_batchbuffer_saved_state_is_empty(struct brw_context *brw); void intel_batchbuffer_reset_to_saved(struct brw_context *brw); -void intel_batchbuffer_clear_cache(struct brw_context *brw); - -int _intel_batchbuffer_flush(struct brw_context *brw, - const char *file, int line); - -#define intel_batchbuffer_flush(intel) \ - _intel_batchbuffer_flush(intel, __FILE__, __LINE__) +void intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz); +int _intel_batchbuffer_flush_fence(struct brw_context *brw, + int in_fence_fd, int *out_fence_fd, + const char *file, int line); +#define intel_batchbuffer_flush(brw) \ + _intel_batchbuffer_flush_fence((brw), -1, NULL, __FILE__, __LINE__) +#define intel_batchbuffer_flush_fence(brw, in_fence_fd, out_fence_fd) \ + _intel_batchbuffer_flush_fence((brw), (in_fence_fd), (out_fence_fd), \ + __FILE__, __LINE__) /* Unlike bmBufferData, this currently requires the buffer be mapped. * Consider it a convenience function wrapping multple * intel_buffer_dword() calls. */ void intel_batchbuffer_data(struct brw_context *brw, - const void *data, GLuint bytes, - enum brw_gpu_ring ring); - -bool intel_batchbuffer_emit_reloc(struct brw_context *brw, - drm_intel_bo *buffer, - uint32_t read_domains, - uint32_t write_domain, - uint32_t offset); -bool intel_batchbuffer_emit_reloc64(struct brw_context *brw, - drm_intel_bo *buffer, - uint32_t read_domains, - uint32_t write_domain, - uint32_t offset); -void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags); -void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags, - drm_intel_bo *bo, uint32_t offset, - uint32_t imm_lower, uint32_t imm_upper); -void intel_batchbuffer_emit_mi_flush(struct brw_context *brw); -void intel_emit_post_sync_nonzero_flush(struct brw_context *brw); -void intel_emit_depth_stall_flushes(struct brw_context *brw); -void gen7_emit_vs_workaround_flush(struct brw_context *brw); -void gen7_emit_cs_stall_flush(struct brw_context *brw); - -static inline uint32_t float_as_int(float f) -{ - union { - float f; - uint32_t d; - } fi; + const void *data, GLuint bytes); - fi.f = f; - return fi.d; -} - -/* Inline functions - might actually be better off with these - * non-inlined. Certainly better off switching all command packets to - * be passed as structs rather than dwords, but that's a little bit of - * work... - */ -static inline unsigned -intel_batchbuffer_space(struct brw_context *brw) +static inline bool +brw_batch_has_aperture_space(struct brw_context *brw, uint64_t extra_space) { - return (brw->batch.state_batch_offset - brw->batch.reserved_space) - - brw->batch.used*4; + return brw->batch.aperture_space + extra_space <= + brw->screen->aperture_threshold; } +bool brw_batch_references(struct intel_batchbuffer *batch, struct brw_bo *bo); -static inline void -intel_batchbuffer_emit_dword(struct brw_context *brw, GLuint dword) -{ -#ifdef DEBUG - assert(intel_batchbuffer_space(brw) >= 4); -#endif - brw->batch.map[brw->batch.used++] = dword; - assert(brw->batch.ring != UNKNOWN_RING); -} +#define RELOC_WRITE EXEC_OBJECT_WRITE +#define RELOC_NEEDS_GGTT EXEC_OBJECT_NEEDS_GTT +/* Inverted meaning, but using the same bit...emit_reloc will flip it. */ +#define RELOC_32BIT EXEC_OBJECT_SUPPORTS_48B_ADDRESS -static inline void -intel_batchbuffer_emit_float(struct brw_context *brw, float f) -{ - intel_batchbuffer_emit_dword(brw, float_as_int(f)); -} +void brw_use_pinned_bo(struct intel_batchbuffer *batch, struct brw_bo *bo, + unsigned writeable_flag); -static inline void -intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, - enum brw_gpu_ring ring) -{ - /* If we're switching rings, implicitly flush the batch. */ - if (unlikely(ring != brw->batch.ring) && brw->batch.ring != UNKNOWN_RING && - brw->gen >= 6) { - intel_batchbuffer_flush(brw); - } +uint64_t brw_batch_reloc(struct intel_batchbuffer *batch, + uint32_t batch_offset, + struct brw_bo *target, + uint32_t target_offset, + unsigned flags); +uint64_t brw_state_reloc(struct intel_batchbuffer *batch, + uint32_t batch_offset, + struct brw_bo *target, + uint32_t target_offset, + unsigned flags); -#ifdef DEBUG - assert(sz < BATCH_SZ - BATCH_RESERVED); -#endif - if (intel_batchbuffer_space(brw) < sz) - intel_batchbuffer_flush(brw); +#define USED_BATCH(_batch) \ + ((uintptr_t)((_batch).map_next - (_batch).batch.map)) - enum brw_gpu_ring prev_ring = brw->batch.ring; - /* The intel_batchbuffer_flush() calls above might have changed - * brw->batch.ring to UNKNOWN_RING, so we need to set it here at the end. - */ - brw->batch.ring = ring; +static inline uint32_t float_as_int(float f) +{ + union { + float f; + uint32_t d; + } fi; - if (unlikely(prev_ring == UNKNOWN_RING && ring == RENDER_RING)) - intel_batchbuffer_emit_render_ring_prelude(brw); + fi.f = f; + return fi.d; } static inline void -intel_batchbuffer_begin(struct brw_context *brw, int n, enum brw_gpu_ring ring) +intel_batchbuffer_begin(struct brw_context *brw, int n) { - intel_batchbuffer_require_space(brw, n * 4, ring); + intel_batchbuffer_require_space(brw, n * 4); - brw->batch.emit = brw->batch.used; #ifdef DEBUG + brw->batch.emit = USED_BATCH(brw->batch); brw->batch.total = n; #endif } @@ -156,7 +103,7 @@ intel_batchbuffer_advance(struct brw_context *brw) { #ifdef DEBUG struct intel_batchbuffer *batch = &brw->batch; - unsigned int _n = batch->used - batch->emit; + unsigned int _n = USED_BATCH(*batch) - batch->emit; assert(batch->total != 0); if (_n != batch->total) { fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", @@ -164,24 +111,52 @@ intel_batchbuffer_advance(struct brw_context *brw) abort(); } batch->total = 0; +#else + (void) brw; #endif } -#define BEGIN_BATCH(n) intel_batchbuffer_begin(brw, n, RENDER_RING) -#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(brw, n, BLT_RING) -#define OUT_BATCH(d) intel_batchbuffer_emit_dword(brw, d) -#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(brw, f) -#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ - intel_batchbuffer_emit_reloc(brw, buf, \ - read_domains, write_domain, delta); \ +static inline bool +brw_ptr_in_state_buffer(struct intel_batchbuffer *batch, void *p) +{ + return (char *) p >= (char *) batch->state.map && + (char *) p < (char *) batch->state.map + batch->state.bo->size; +} + +#define BEGIN_BATCH(n) do { \ + intel_batchbuffer_begin(brw, (n)); \ + uint32_t *__map = brw->batch.map_next; \ + brw->batch.map_next += (n) + +#define BEGIN_BATCH_BLT(n) do { \ + assert(brw->screen->devinfo.gen < 6); \ + intel_batchbuffer_begin(brw, (n)); \ + uint32_t *__map = brw->batch.map_next; \ + brw->batch.map_next += (n) + +#define OUT_BATCH(d) *__map++ = (d) +#define OUT_BATCH_F(f) OUT_BATCH(float_as_int((f))) + +#define OUT_RELOC(buf, flags, delta) do { \ + uint32_t __offset = (__map - brw->batch.batch.map) * 4; \ + uint32_t reloc = \ + brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \ + OUT_BATCH(reloc); \ } while (0) /* Handle 48-bit address relocations for Gen8+ */ -#define OUT_RELOC64(buf, read_domains, write_domain, delta) do { \ - intel_batchbuffer_emit_reloc64(brw, buf, read_domains, write_domain, delta); \ +#define OUT_RELOC64(buf, flags, delta) do { \ + uint32_t __offset = (__map - brw->batch.batch.map) * 4; \ + uint64_t reloc64 = \ + brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \ + OUT_BATCH(reloc64); \ + OUT_BATCH(reloc64 >> 32); \ } while (0) -#define ADVANCE_BATCH() intel_batchbuffer_advance(brw); +#define ADVANCE_BATCH() \ + assert(__map == brw->batch.map_next); \ + intel_batchbuffer_advance(brw); \ +} while (0) #ifdef __cplusplus }