From ee21c8b1e6d3a506fa04d8f86e99b2afe9fca841 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 22 May 2013 11:26:03 -0700 Subject: [PATCH] i965/fs: Allocate more register classes on gen7. For texturing from GRFs, we now have payloads of arbitrary sizes up to the message length limit. v2 (Kenneth Graunke): Rebase on intel_context -> brw_context change. v3: Add some comment text. v4: Change some magic 16s to BRW_MAX_MRF (noted by Ken). Leave the 11, which is the magic "max sampler message length". BRW_MAX_MRF sizing on the little int arrays is retained because I could see us needing to extend in the future if we move to GRFs for FB writes (those go to at least 12 long in a quick scan of the specs) Reviewed-by: Kenneth Graunke (v2) Acked-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_context.h | 7 ++-- .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 41 ++++++++++++------- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index c6e66550329..fee4e1a7734 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1300,10 +1300,11 @@ struct brw_context struct { struct ra_regs *regs; - /** Array of the ra classes for the unaligned contiguous - * register block sizes used. + /** + * Array of the ra classes for the unaligned contiguous register + * block sizes used, indexed by register size. */ - int *classes; + int classes[16]; /** * Mapping for register-allocated objects in *regs to the first diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 68501021eed..f0f4ad9a928 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -83,9 +83,9 @@ brw_alloc_reg_set(struct brw_context *brw, int reg_width) * aggregates of scalar values at the GLSL level were split to scalar * values by split_virtual_grfs(). * - * However, texture SEND messages return a series of contiguous registers. - * We currently always ask for 4 registers, but we may convert that to use - * less some day. + * However, texture SEND messages return a series of contiguous registers + * to write into. We currently always ask for 4 registers, but we may + * convert that to use less some day. * * Additionally, on gen5 we need aligned pairs of registers for the PLN * instruction, and on gen4 we need 8 contiguous regs for workaround simd16 @@ -94,9 +94,22 @@ brw_alloc_reg_set(struct brw_context *brw, int reg_width) * So we have a need for classes for 1, 2, 4, and 8 registers currently, * and we add in '3' to make indexing the array easier for the common case * (since we'll probably want it for texturing later). + * + * And, on gen7 and newer, we do texturing SEND messages from GRFs, which + * means that we may need any size up to the sampler message size limit (11 + * regs). */ - const int class_count = 5; - const int class_sizes[class_count] = {1, 2, 3, 4, 8}; + int class_count; + int class_sizes[BRW_MAX_MRF]; + + if (brw->gen >= 7) { + for (class_count = 0; class_count < 11; class_count++) + class_sizes[class_count] = class_count + 1; + } else { + for (class_count = 0; class_count < 4; class_count++) + class_sizes[class_count] = class_count + 1; + class_sizes[class_count++] = 8; + } /* Compute the total number of registers across all classes. */ int ra_reg_count = 0; @@ -159,7 +172,10 @@ brw_alloc_reg_set(struct brw_context *brw, int reg_width) ra_set_finalize(regs, NULL); brw->wm.reg_sets[index].regs = regs; - brw->wm.reg_sets[index].classes = classes; + for (unsigned i = 0; i < ARRAY_SIZE(brw->wm.reg_sets[index].classes); i++) + brw->wm.reg_sets[index].classes[i] = -1; + for (int i = 0; i < class_count; i++) + brw->wm.reg_sets[index].classes[class_sizes[i] - 1] = classes[i]; brw->wm.reg_sets[index].ra_reg_to_grf = ra_reg_to_grf; brw->wm.reg_sets[index].aligned_pairs_class = aligned_pairs_class; } @@ -411,17 +427,12 @@ fs_visitor::assign_regs() node_count); for (int i = 0; i < this->virtual_grf_count; i++) { - int size = this->virtual_grf_sizes[i]; + unsigned size = this->virtual_grf_sizes[i]; int c; - if (size == 8) { - c = 4; - } else { - assert(size >= 1 && - size <= 4 && - "Register allocation relies on split_virtual_grfs()"); - c = brw->wm.reg_sets[rsi].classes[size - 1]; - } + assert(size <= ARRAY_SIZE(brw->wm.reg_sets[rsi].classes) && + "Register allocation relies on split_virtual_grfs()"); + c = brw->wm.reg_sets[rsi].classes[size - 1]; /* Special case: on pre-GEN6 hardware that supports PLN, the * second operand of a PLN instruction needs to be an -- 2.30.2