ac/surface: compute tile swizzle for GFX9

[mesa.git] / src / gallium / drivers / radeon / r600_cs.h
diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h

index 03a04b754d6bf869e1371fed9f42b3242e4b407d..89d6c7c16a111c41dae993900e90da5b6879114b 100644 (file)
--- a/src/gallium/drivers/radeon/r600_cs.h
+++ b/src/gallium/drivers/radeon/r600_cs.h
@@ -19,8 +19,6 @@
   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
   * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Marek Olšák <maraeo@gmail.com>
   */
  
  /**
@@ -30,103 +28,164 @@
  #ifndef R600_CS_H
  #define R600_CS_H
  
-#include "r600_pipe_common.h"
-#include "r600d_common.h"
+#include "radeonsi/si_pipe.h"
+#include "amd/common/sid.h"
  
-static inline unsigned r600_context_bo_reloc(struct r600_common_context *rctx,
-                                            struct r600_ring *ring,
-                                            struct r600_resource *rbo,
-                                            enum radeon_bo_usage usage,
-                                            enum radeon_bo_priority priority)
+/**
+ * Return true if there is enough memory in VRAM and GTT for the buffers
+ * added so far.
+ *
+ * \param vram      VRAM memory size not added to the buffer list yet
+ * \param gtt       GTT memory size not added to the buffer list yet
+ */
+static inline bool
+radeon_cs_memory_below_limit(struct si_screen *screen,
+                            struct radeon_winsys_cs *cs,
+                            uint64_t vram, uint64_t gtt)
  {
-       assert(usage);
+       vram += cs->used_vram;
+       gtt += cs->used_gart;
+
+       /* Anything that goes above the VRAM size should go to GTT. */
+       if (vram > screen->info.vram_size)
+               gtt += vram - screen->info.vram_size;
  
-       /* Make sure that all previous rings are flushed so that everything
-        * looks serialized from the driver point of view.
-        */
-       if (!ring->flushing) {
-               if (ring == &rctx->rings.gfx) {
-                       if (rctx->rings.dma.cs) {
-                               /* flush dma ring */
-                               rctx->rings.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
-                       }
-               } else {
-                       /* flush gfx ring */
-                       rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
-               }
-       }
-       return rctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage,
-                                     rbo->domains, priority) * 4;
+       /* Now we just need to check if we have enough GTT. */
+       return gtt < screen->info.gart_size * 0.7;
  }
  
-static inline void r600_emit_reloc(struct r600_common_context *rctx,
-                                  struct r600_ring *ring, struct r600_resource *rbo,
-                                  enum radeon_bo_usage usage,
-                                  enum radeon_bo_priority priority)
+/**
+ * Add a buffer to the buffer list for the given command stream (CS).
+ *
+ * All buffers used by a CS must be added to the list. This tells the kernel
+ * driver which buffers are used by GPU commands. Other buffers can
+ * be swapped out (not accessible) during execution.
+ *
+ * The buffer list becomes empty after every context flush and must be
+ * rebuilt.
+ */
+static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rctx,
+                                                struct r600_ring *ring,
+                                                struct r600_resource *rbo,
+                                                enum radeon_bo_usage usage,
+                                                enum radeon_bo_priority priority)
  {
-       struct radeon_winsys_cs *cs = ring->cs;
-       bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_virtual_address;
-       unsigned reloc = r600_context_bo_reloc(rctx, ring, rbo, usage, priority);
-
-       if (!has_vm) {
-               radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-               radeon_emit(cs, reloc);
-       }
+       assert(usage);
+       return rctx->ws->cs_add_buffer(
+               ring->cs, rbo->buf,
+               (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),
+               rbo->domains, priority) * 4;
  }
  
-static inline void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+/**
+ * Same as above, but also checks memory usage and flushes the context
+ * accordingly.
+ *
+ * When this SHOULD NOT be used:
+ *
+ * - if r600_context_add_resource_size has been called for the buffer
+ *   followed by *_need_cs_space for checking the memory usage
+ *
+ * - if r600_need_dma_space has been called for the buffer
+ *
+ * - when emitting state packets and draw packets (because preceding packets
+ *   can't be re-emitted at that point)
+ *
+ * - if shader resource "enabled_mask" is not up-to-date or there is
+ *   a different constraint disallowing a context flush
+ */
+static inline unsigned
+radeon_add_to_buffer_list_check_mem(struct r600_common_context *rctx,
+                                   struct r600_ring *ring,
+                                   struct r600_resource *rbo,
+                                   enum radeon_bo_usage usage,
+                                   enum radeon_bo_priority priority,
+                                   bool check_mem)
+{
+       if (check_mem &&
+           !radeon_cs_memory_below_limit(rctx->screen, ring->cs,
+                                         rctx->vram + rbo->vram_usage,
+                                         rctx->gtt + rbo->gart_usage))
+               ring->flush(rctx, PIPE_FLUSH_ASYNC, NULL);
+
+       return radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
+}
+
+static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
  {
-       assert(reg < R600_CONTEXT_REG_OFFSET);
-       assert(cs->cdw+2+num <= cs->max_dw);
+       assert(reg < SI_CONTEXT_REG_OFFSET);
+       assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
         radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
-       radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
+       radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
  }
  
-static inline void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
  {
-       r600_write_config_reg_seq(cs, reg, 1);
+       radeon_set_config_reg_seq(cs, reg, 1);
         radeon_emit(cs, value);
  }
  
-static inline void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
  {
-       assert(reg >= R600_CONTEXT_REG_OFFSET);
-       assert(cs->cdw+2+num <= cs->max_dw);
+       assert(reg >= SI_CONTEXT_REG_OFFSET);
+       assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
         radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
-       radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
+       radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
  }
  
-static inline void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
  {
-       r600_write_context_reg_seq(cs, reg, 1);
+       radeon_set_context_reg_seq(cs, reg, 1);
         radeon_emit(cs, value);
  }
  
-static inline void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_context_reg_idx(struct radeon_winsys_cs *cs,
+                                             unsigned reg, unsigned idx,
+                                             unsigned value)
+{
+       assert(reg >= SI_CONTEXT_REG_OFFSET);
+       assert(cs->current.cdw + 3 <= cs->current.max_dw);
+       radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
+       radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
+       radeon_emit(cs, value);
+}
+
+static inline void radeon_set_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
  {
         assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
-       assert(cs->cdw+2+num <= cs->max_dw);
+       assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
         radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
         radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
  }
  
-static inline void si_write_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
  {
-       si_write_sh_reg_seq(cs, reg, 1);
+       radeon_set_sh_reg_seq(cs, reg, 1);
         radeon_emit(cs, value);
  }
  
-static inline void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
  {
         assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
-       assert(cs->cdw+2+num <= cs->max_dw);
+       assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
         radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
         radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
  }
  
-static inline void cik_write_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
  {
-       cik_write_uconfig_reg_seq(cs, reg, 1);
+       radeon_set_uconfig_reg_seq(cs, reg, 1);
+       radeon_emit(cs, value);
+}
+
+static inline void radeon_set_uconfig_reg_idx(struct radeon_winsys_cs *cs,
+                                             unsigned reg, unsigned idx,
+                                             unsigned value)
+{
+       assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
+       assert(cs->current.cdw + 3 <= cs->current.max_dw);
+       radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0));
+       radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
         radeon_emit(cs, value);
  }