From 0b830081f0ae633911f4dd3e60f27b4ebdb67a2f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 May 2018 23:49:29 -0700 Subject: [PATCH] intel/fs: Rework KSP data to be SIMD width-based Reviewed-by: Matt Turner --- src/intel/compiler/brw_compiler.h | 53 +++++++++++++-------------- src/intel/compiler/brw_fs.cpp | 35 +++++++++--------- src/intel/compiler/brw_fs_visitor.cpp | 2 +- 3 files changed, 43 insertions(+), 47 deletions(-) diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index a8ae243e9f5..d3d1f35d016 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -683,11 +683,11 @@ struct brw_wm_prog_data { GLuint num_varying_inputs; - uint8_t reg_blocks_0; - uint8_t reg_blocks_2; + uint8_t reg_blocks_8; + uint8_t reg_blocks_16; - uint8_t dispatch_grf_start_reg_2; - uint32_t prog_offset_2; + uint8_t dispatch_grf_start_reg_16; + uint32_t prog_offset_16; struct { /** @{ @@ -784,51 +784,48 @@ brw_fs_simd_width_for_ksp(unsigned ksp_idx, bool simd8_enabled, static inline uint32_t _brw_wm_prog_data_prog_offset(const struct brw_wm_prog_data *prog_data, - unsigned ksp_idx) + unsigned simd_width) { - switch (ksp_idx) { - case 0: return 0; - case 1: return 0; - case 2: return prog_data->prog_offset_2; - default: - unreachable("Invalid KSP index"); + switch (simd_width) { + case 8: return 0; + case 16: return prog_data->prog_offset_16; + default: return 0; } } #define brw_wm_prog_data_prog_offset(prog_data, wm_state, ksp_idx) \ - _brw_wm_prog_data_prog_offset(prog_data, ksp_idx) + _brw_wm_prog_data_prog_offset(prog_data, \ + brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx)) static inline uint8_t _brw_wm_prog_data_dispatch_grf_start_reg(const struct brw_wm_prog_data *prog_data, - unsigned ksp_idx) + unsigned simd_width) { - switch (ksp_idx) { - case 0: return prog_data->base.dispatch_grf_start_reg; - case 1: return 0; - case 2: return prog_data->dispatch_grf_start_reg_2; - default: - unreachable("Invalid KSP index"); + switch (simd_width) { + case 8: return prog_data->base.dispatch_grf_start_reg; + case 16: return prog_data->dispatch_grf_start_reg_16; + default: return 0; } } #define brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm_state, ksp_idx) \ - _brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ksp_idx) + _brw_wm_prog_data_dispatch_grf_start_reg(prog_data, \ + brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx)) static inline uint8_t _brw_wm_prog_data_reg_blocks(const struct brw_wm_prog_data *prog_data, - unsigned ksp_idx) + unsigned simd_width) { - switch (ksp_idx) { - case 0: return prog_data->reg_blocks_0; - case 1: return 0; - case 2: return prog_data->reg_blocks_2; - default: - unreachable("Invalid KSP index"); + switch (simd_width) { + case 8: return prog_data->reg_blocks_8; + case 16: return prog_data->reg_blocks_16; + default: return 0; } } #define brw_wm_prog_data_reg_blocks(prog_data, wm_state, ksp_idx) \ - _brw_wm_prog_data_reg_blocks(prog_data, ksp_idx) + _brw_wm_prog_data_reg_blocks(prog_data, \ + brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx)) struct brw_push_const_block { unsigned dwords; /* Dword count, not reg aligned */ diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 5361e5cccf3..ee0d1967ecc 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -7099,8 +7099,6 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, brw_compute_barycentric_interp_modes(compiler->devinfo, shader); cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL; - uint8_t simd8_grf_start = 0, simd16_grf_start = 0; - unsigned simd8_grf_used = 0, simd16_grf_used = 0; fs_visitor v8(compiler, log_data, mem_ctx, key, &prog_data->base, prog, shader, 8, @@ -7112,8 +7110,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, return NULL; } else if (likely(!(INTEL_DEBUG & DEBUG_NO8))) { simd8_cfg = v8.cfg; - simd8_grf_start = v8.payload.num_regs; - simd8_grf_used = v8.grf_used; + prog_data->base.dispatch_grf_start_reg = v8.payload.num_regs; + prog_data->reg_blocks_8 = brw_register_blocks(v8.grf_used); } if (v8.max_dispatch_width >= 16 && @@ -7129,8 +7127,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, v16.fail_msg); } else { simd16_cfg = v16.cfg; - simd16_grf_start = v16.payload.num_regs; - simd16_grf_used = v16.grf_used; + prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs; + prog_data->reg_blocks_16 = brw_register_blocks(v16.grf_used); } } @@ -7146,6 +7144,16 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, if (compiler->devinfo->gen < 5 && simd16_cfg) simd8_cfg = NULL; + if (compiler->devinfo->gen <= 5 && !simd8_cfg) { + /* Iron lake and earlier only have one Dispatch GRF start field. Make + * the data available in the base prog data struct for convenience. + */ + if (simd16_cfg) { + prog_data->base.dispatch_grf_start_reg = + prog_data->dispatch_grf_start_reg_16; + } + } + if (prog_data->persample_dispatch) { /* Starting with SandyBridge (where we first get MSAA), the different * pixel dispatch combinations are grouped into classifications A @@ -7184,20 +7192,11 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, if (simd8_cfg) { prog_data->dispatch_8 = true; g.generate_code(simd8_cfg, 8); - prog_data->base.dispatch_grf_start_reg = simd8_grf_start; - prog_data->reg_blocks_0 = brw_register_blocks(simd8_grf_used); + } - if (simd16_cfg) { - prog_data->dispatch_16 = true; - prog_data->prog_offset_2 = g.generate_code(simd16_cfg, 16); - prog_data->dispatch_grf_start_reg_2 = simd16_grf_start; - prog_data->reg_blocks_2 = brw_register_blocks(simd16_grf_used); - } - } else if (simd16_cfg) { + if (simd16_cfg) { prog_data->dispatch_16 = true; - g.generate_code(simd16_cfg, 16); - prog_data->base.dispatch_grf_start_reg = simd16_grf_start; - prog_data->reg_blocks_0 = brw_register_blocks(simd16_grf_used); + prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16); } return g.get_assembly(); diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index a24808eac69..d3d69135711 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -126,7 +126,7 @@ fs_visitor::emit_dummy_fs() stage_prog_data->nr_pull_params = 0; stage_prog_data->curb_read_length = 0; stage_prog_data->dispatch_grf_start_reg = 2; - wm_prog_data->dispatch_grf_start_reg_2 = 2; + wm_prog_data->dispatch_grf_start_reg_16 = 2; grf_used = 1; /* Gen4-5 don't allow zero GRF blocks */ calculate_cfg(); -- 2.30.2