#include "brw_gs.h"
#include "brw_wm.h"
#include "brw_cs.h"
+#include "genxml/genX_bits.h"
#include "main/framebuffer.h"
+void
+brw_enable_obj_preemption(struct brw_context *brw, bool enable)
+{
+ ASSERTED const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ assert(devinfo->gen >= 9);
+
+ if (enable == brw->object_preemption)
+ return;
+
+ /* A fixed function pipe flush is required before modifying this field */
+ brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
+
+ bool replay_mode = enable ?
+ GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER;
+
+ /* enable object level preemption */
+ brw_load_register_imm32(brw, CS_CHICKEN1,
+ replay_mode | GEN9_REPLAY_MODE_MASK);
+
+ brw->object_preemption = enable;
+}
+
+static void
+brw_upload_gen11_slice_hashing_state(struct brw_context *brw)
+{
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ int subslices_delta =
+ devinfo->ppipe_subslices[0] - devinfo->ppipe_subslices[1];
+ if (subslices_delta == 0)
+ return;
+
+ unsigned size = GEN11_SLICE_HASH_TABLE_length * 4;
+ uint32_t hash_address;
+
+ uint32_t *map = brw_state_batch(brw, size, 64, &hash_address);
+
+ unsigned idx = 0;
+
+ unsigned sl_small = 0;
+ unsigned sl_big = 1;
+ if (subslices_delta > 0) {
+ sl_small = 1;
+ sl_big = 0;
+ }
+
+ /**
+ * Create a 16x16 slice hashing table like the following one:
+ *
+ * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
+ * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
+ * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
+ * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
+ * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
+ * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
+ * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
+ * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
+ * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
+ * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
+ * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
+ * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
+ * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
+ * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
+ * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
+ * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
+ *
+ * The table above is used when the pixel pipe 0 has less subslices than
+ * pixel pipe 1. When pixel pipe 0 has more subslices, then a similar table
+ * with 0's and 1's inverted is used.
+ */
+ for (int i = 0; i < GEN11_SLICE_HASH_TABLE_length; i++) {
+ uint32_t dw = 0;
+
+ for (int j = 0; j < 8; j++) {
+ unsigned slice = idx++ % 3 ? sl_big : sl_small;
+ dw |= slice << (j * 4);
+ }
+ map[i] = dw;
+ }
+
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_SLICE_TABLE_STATE_POINTERS << 16 | (2 - 2));
+ OUT_RELOC(brw->batch.state.bo, 0, hash_address | 1);
+ ADVANCE_BATCH();
+
+ /* From gen10/gen11 workaround table in h/w specs:
+ *
+ * "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
+ * a value of 0xFFFF"
+ *
+ * This means that whenever we update a field with this instruction, we need
+ * to update all the others.
+ *
+ * Since this is the first time we emit this
+ * instruction, we are only setting the fSLICE_HASHING_TABLE_ENABLE flag,
+ * and leaving everything else at their default state (0).
+ */
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_3D_MODE << 16 | (2 - 2));
+ OUT_BATCH(0xffff | SLICE_HASHING_TABLE_ENABLE);
+ ADVANCE_BATCH();
+}
+
static void
brw_upload_initial_gpu_state(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ const struct brw_compiler *compiler = brw->screen->compiler;
/* On platforms with hardware contexts, we can set our initial GPU state
* right away rather than doing it via state atoms. This saves a small
brw_upload_invariant_state(brw);
- if (devinfo->gen == 10 || devinfo->gen == 11) {
- brw_load_register_imm32(brw, GEN10_CACHE_MODE_SS,
- REG_MASK(GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
- GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE);
-
- /* From gen10 workaround table in h/w specs:
- *
- * "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
- * a value of 0xFFFF"
- *
- * This means that we end up setting the entire 3D_MODE state. Bits
- * in this register control things such as slice hashing and we want
- * the default values of zero at the moment.
+ if (devinfo->gen == 11) {
+ /* The default behavior of bit 5 "Headerless Message for Pre-emptable
+ * Contexts" in SAMPLER MODE register is set to 0, which means
+ * headerless sampler messages are not allowed for pre-emptable
+ * contexts. Set the bit 5 to 1 to allow them.
*/
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_3D_MODE << 16 | (2 - 2));
- OUT_BATCH(0xFFFF << 16);
- ADVANCE_BATCH();
+ brw_load_register_imm32(brw, GEN11_SAMPLER_MODE,
+ HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
+ HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
+
+ /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
+ * HALF_SLICE_CHICKEN7 register.
+ */
+ brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
+ TEXEL_OFFSET_FIX_MASK |
+ TEXEL_OFFSET_FIX_ENABLE);
+
+ /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set
+ * in L3CNTLREG register. The default setting of the bit is not the
+ * desirable behavior.
+ */
+ brw_load_register_imm32(brw, GEN8_L3CNTLREG,
+ GEN8_L3CNTLREG_EDBC_NO_HANG);
+ }
+
+ /* hardware specification recommends disabling repacking for
+ * the compatibility with decompression mechanism in display controller.
+ */
+ if (devinfo->disable_ccs_repack) {
+ brw_load_register_imm32(brw, GEN7_CACHE_MODE_0,
+ GEN11_DISABLE_REPACKING_FOR_COMPRESSION |
+ REG_MASK(GEN11_DISABLE_REPACKING_FOR_COMPRESSION));
}
if (devinfo->gen == 9) {
REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
-
- if (gen_device_info_is_9lp(devinfo)) {
- brw_load_register_imm32(brw, GEN7_GT_MODE,
- GEN9_SUBSLICE_HASHING_MASK_BITS |
- GEN9_SUBSLICE_HASHING_16x16);
- }
}
if (devinfo->gen >= 8) {
OUT_BATCH(0);
ADVANCE_BATCH();
}
+
+ /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
+ * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
+ *
+ * This is only safe on kernels with context isolation support.
+ */
+ if (!compiler->constant_buffer_0_is_relative) {
+ if (devinfo->gen >= 9) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+ OUT_BATCH(CS_DEBUG_MODE2);
+ OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
+ CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
+ ADVANCE_BATCH();
+ } else if (devinfo->gen == 8) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+ OUT_BATCH(INSTPM);
+ OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
+ INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
+ ADVANCE_BATCH();
+ }
+ }
+
+ brw->object_preemption = false;
+
+ if (devinfo->gen >= 10)
+ brw_enable_obj_preemption(brw, true);
+
+ if (devinfo->gen == 11)
+ brw_upload_gen11_slice_hashing_state(brw);
}
static inline const struct brw_tracked_state *
DEFINE_BIT(_NEW_TRANSFORM),
DEFINE_BIT(_NEW_VIEWPORT),
DEFINE_BIT(_NEW_TEXTURE_STATE),
- DEFINE_BIT(_NEW_ARRAY),
DEFINE_BIT(_NEW_RENDERMODE),
DEFINE_BIT(_NEW_BUFFERS),
DEFINE_BIT(_NEW_CURRENT_ATTRIB),
brw_select_pipeline(brw, pipeline);
+ if (pipeline == BRW_RENDER_PIPELINE && brw->current_hash_scale != 1)
+ brw_emit_hashing_mode(brw, UINT_MAX, UINT_MAX, 1);
+
if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
/* Always re-emit all state. */
brw->NewGLState = ~0;