From: Dave Airlie Date: Fri, 9 Nov 2018 02:13:30 +0000 (+1000) Subject: iris: handle qbo fragment shader invocation workaround X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=df60241ff7d212eeecfdad7090a7a4aa9fc8a4a5;p=mesa.git iris: handle qbo fragment shader invocation workaround --- diff --git a/src/gallium/drivers/iris/iris_query.c b/src/gallium/drivers/iris/iris_query.c index fcc443e422a..de579f25584 100644 --- a/src/gallium/drivers/iris/iris_query.c +++ b/src/gallium/drivers/iris/iris_query.c @@ -606,6 +606,52 @@ keep_gpr0_lower_n_bits(struct iris_context *ice, uint32_t n) iris_batch_emit(batch, math, sizeof(math)); } +/* + * GPR0 = GPR0 << 30; + */ +static void +shl_gpr0_by_30_bits(struct iris_context *ice) +{ + struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; + /* First we mask 34 bits of GPR0 to prevent overflow */ + keep_gpr0_lower_n_bits(ice, 34); + + static const uint32_t shl_math[] = { + MI_ALU2(LOAD, SRCA, R0), + MI_ALU2(LOAD, SRCB, R0), + MI_ALU0(ADD), + MI_ALU2(STORE, R0, ACCU), + }; + + const uint32_t outer_count = 5; + const uint32_t inner_count = 6; + const uint32_t cmd_len = 1 + inner_count * ARRAY_SIZE(shl_math); + const uint32_t batch_len = cmd_len * outer_count; + uint32_t *map = iris_get_command_space(batch, batch_len * 4); + uint32_t offset = 0; + for (int o = 0; o < outer_count; o++) { + map[offset++] = MI_MATH | (cmd_len - 2); + for (int i = 0; i < inner_count; i++) { + memcpy(&map[offset], shl_math, sizeof(shl_math)); + offset += 4; + } + } +} + +/* + * GPR0 = GPR0 >> 2; + * + * Note that the upper 30 bits of GPR0 are lost! + */ +static void +shr_gpr0_by_2_bits(struct iris_context *ice) +{ + struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; + shl_gpr0_by_30_bits(ice); + ice->vtbl.load_register_reg32(batch, CS_GPR(0) + 4, CS_GPR(0)); + ice->vtbl.load_register_imm32(batch, CS_GPR(0) + 4, 0); +} + /** * Calculate the result and store it to CS_GPR0. */ @@ -614,6 +660,8 @@ calculate_result_on_gpu(struct iris_context *ice, struct iris_query *q) { struct iris_batch *batch = &ice->batches[q->batch_idx]; struct iris_screen *screen = (void *) ice->ctx.screen; + const struct gen_device_info *devinfo = &batch->screen->devinfo; + if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE || q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { overflow_result_to_gpr0(ice, q); @@ -646,6 +694,10 @@ calculate_result_on_gpu(struct iris_context *ice, struct iris_query *q) }; iris_batch_emit(batch, math, sizeof(math)); + /* WaDividePSInvocationCountBy4:HSW,BDW */ + if (q->type == PIPE_QUERY_PIPELINE_STATISTICS && q->index == 7 && devinfo->gen == 8) + shr_gpr0_by_2_bits(ice); + if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE || q->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) gpr0_to_bool(ice);