ac/surface: compute tile swizzle for GFX9

[mesa.git] / src / gallium / drivers / radeon / r600_cs.h
diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h

index caf7deef37c0dd11645985a1d900fb8aaff860cd..89d6c7c16a111c41dae993900e90da5b6879114b 100644 (file)
--- a/src/gallium/drivers/radeon/r600_cs.h
+++ b/src/gallium/drivers/radeon/r600_cs.h
@@ -19,8 +19,6 @@
   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
   * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Marek Olšák <maraeo@gmail.com>
   */
  
  /**
@@ -30,8 +28,31 @@
  #ifndef R600_CS_H
  #define R600_CS_H
  
-#include "r600_pipe_common.h"
-#include "r600d_common.h"
+#include "radeonsi/si_pipe.h"
+#include "amd/common/sid.h"
+
+/**
+ * Return true if there is enough memory in VRAM and GTT for the buffers
+ * added so far.
+ *
+ * \param vram      VRAM memory size not added to the buffer list yet
+ * \param gtt       GTT memory size not added to the buffer list yet
+ */
+static inline bool
+radeon_cs_memory_below_limit(struct si_screen *screen,
+                            struct radeon_winsys_cs *cs,
+                            uint64_t vram, uint64_t gtt)
+{
+       vram += cs->used_vram;
+       gtt += cs->used_gart;
+
+       /* Anything that goes above the VRAM size should go to GTT. */
+       if (vram > screen->info.vram_size)
+               gtt += vram - screen->info.vram_size;
+
+       /* Now we just need to check if we have enough GTT. */
+       return gtt < screen->info.gart_size * 0.7;
+}
  
  /**
   * Add a buffer to the buffer list for the given command stream (CS).
@@ -50,31 +71,52 @@ static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rct
                                                  enum radeon_bo_priority priority)
  {
         assert(usage);
-       return rctx->ws->cs_add_buffer(ring->cs, rbo->buf, usage,
-                                     rbo->domains, priority) * 4;
+       return rctx->ws->cs_add_buffer(
+               ring->cs, rbo->buf,
+               (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),
+               rbo->domains, priority) * 4;
  }
  
-static inline void r600_emit_reloc(struct r600_common_context *rctx,
-                                  struct r600_ring *ring, struct r600_resource *rbo,
-                                  enum radeon_bo_usage usage,
-                                  enum radeon_bo_priority priority)
+/**
+ * Same as above, but also checks memory usage and flushes the context
+ * accordingly.
+ *
+ * When this SHOULD NOT be used:
+ *
+ * - if r600_context_add_resource_size has been called for the buffer
+ *   followed by *_need_cs_space for checking the memory usage
+ *
+ * - if r600_need_dma_space has been called for the buffer
+ *
+ * - when emitting state packets and draw packets (because preceding packets
+ *   can't be re-emitted at that point)
+ *
+ * - if shader resource "enabled_mask" is not up-to-date or there is
+ *   a different constraint disallowing a context flush
+ */
+static inline unsigned
+radeon_add_to_buffer_list_check_mem(struct r600_common_context *rctx,
+                                   struct r600_ring *ring,
+                                   struct r600_resource *rbo,
+                                   enum radeon_bo_usage usage,
+                                   enum radeon_bo_priority priority,
+                                   bool check_mem)
  {
-       struct radeon_winsys_cs *cs = ring->cs;
-       bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_virtual_address;
-       unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
-
-       if (!has_vm) {
-               radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-               radeon_emit(cs, reloc);
-       }
+       if (check_mem &&
+           !radeon_cs_memory_below_limit(rctx->screen, ring->cs,
+                                         rctx->vram + rbo->vram_usage,
+                                         rctx->gtt + rbo->gart_usage))
+               ring->flush(rctx, PIPE_FLUSH_ASYNC, NULL);
+
+       return radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
  }
  
  static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
  {
-       assert(reg < R600_CONTEXT_REG_OFFSET);
-       assert(cs->cdw+2+num <= cs->max_dw);
+       assert(reg < SI_CONTEXT_REG_OFFSET);
+       assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
         radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
-       radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
+       radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
  }
  
  static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
@@ -85,10 +127,10 @@ static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned r
  
  static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
  {
-       assert(reg >= R600_CONTEXT_REG_OFFSET);
-       assert(cs->cdw+2+num <= cs->max_dw);
+       assert(reg >= SI_CONTEXT_REG_OFFSET);
+       assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
         radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
-       radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
+       radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
  }
  
  static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
@@ -97,10 +139,21 @@ static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned
         radeon_emit(cs, value);
  }
  
+static inline void radeon_set_context_reg_idx(struct radeon_winsys_cs *cs,
+                                             unsigned reg, unsigned idx,
+                                             unsigned value)
+{
+       assert(reg >= SI_CONTEXT_REG_OFFSET);
+       assert(cs->current.cdw + 3 <= cs->current.max_dw);
+       radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
+       radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
+       radeon_emit(cs, value);
+}
+
  static inline void radeon_set_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
  {
         assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
-       assert(cs->cdw+2+num <= cs->max_dw);
+       assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
         radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
         radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
  }
@@ -114,7 +167,7 @@ static inline void radeon_set_sh_reg(struct radeon_winsys_cs *cs, unsigned reg,
  static inline void radeon_set_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
  {
         assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
-       assert(cs->cdw+2+num <= cs->max_dw);
+       assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
         radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
         radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
  }
@@ -125,4 +178,15 @@ static inline void radeon_set_uconfig_reg(struct radeon_winsys_cs *cs, unsigned
         radeon_emit(cs, value);
  }
  
+static inline void radeon_set_uconfig_reg_idx(struct radeon_winsys_cs *cs,
+                                             unsigned reg, unsigned idx,
+                                             unsigned value)
+{
+       assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
+       assert(cs->current.cdw + 3 <= cs->current.max_dw);
+       radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0));
+       radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
+       radeon_emit(cs, value);
+}
+
  #endif