From dd50c88386c8220f4631115b68a10930378ead6c Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 26 Nov 2012 21:46:27 -0800 Subject: [PATCH] i965/vs: Move some functions from brw_vec4_emit.cpp to brw_vec4.cpp. This leaves only the final code generation stage in brw_vec4_emit.cpp, moving the payload setup, run(), and brw_vs_emit functions to brw_vec4.cpp. The fragment shader backend puts these functions in brw_fs.cpp, so this patch also helps with consistency. Reviewed-by: Eric Anholt Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 265 ++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 263 ------------------- 2 files changed, 265 insertions(+), 263 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 10a8310ff88..227accae45f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -22,13 +22,18 @@ */ #include "brw_vec4.h" +#include "glsl/ir_print_visitor.h" + extern "C" { #include "main/macros.h" +#include "program/prog_print.h" #include "program/prog_parameter.h" } #define MAX_INSTRUCTION (1 << 30) +using namespace brw; + namespace brw { /** @@ -913,4 +918,264 @@ vec4_visitor::dump_instructions() } } +int +vec4_visitor::setup_attributes(int payload_reg) +{ + int nr_attributes; + int attribute_map[VERT_ATTRIB_MAX + 1]; + + nr_attributes = 0; + for (int i = 0; i < VERT_ATTRIB_MAX; i++) { + if (prog_data->inputs_read & BITFIELD64_BIT(i)) { + attribute_map[i] = payload_reg + nr_attributes; + nr_attributes++; + } + } + + /* VertexID is stored by the VF as the last vertex element, but we + * don't represent it with a flag in inputs_read, so we call it + * VERT_ATTRIB_MAX. + */ + if (prog_data->uses_vertexid) { + attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes; + nr_attributes++; + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + /* We have to support ATTR as a destination for GL_FIXED fixup. */ + if (inst->dst.file == ATTR) { + int grf = attribute_map[inst->dst.reg + inst->dst.reg_offset]; + + struct brw_reg reg = brw_vec8_grf(grf, 0); + reg.type = inst->dst.type; + reg.dw1.bits.writemask = inst->dst.writemask; + + inst->dst.file = HW_REG; + inst->dst.fixed_hw_reg = reg; + } + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file != ATTR) + continue; + + int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset]; + + struct brw_reg reg = brw_vec8_grf(grf, 0); + reg.dw1.bits.swizzle = inst->src[i].swizzle; + reg.type = inst->src[i].type; + if (inst->src[i].abs) + reg = brw_abs(reg); + if (inst->src[i].negate) + reg = negate(reg); + + inst->src[i].file = HW_REG; + inst->src[i].fixed_hw_reg = reg; + } + } + + /* The BSpec says we always have to read at least one thing from + * the VF, and it appears that the hardware wedges otherwise. + */ + if (nr_attributes == 0) + nr_attributes = 1; + + prog_data->urb_read_length = (nr_attributes + 1) / 2; + + unsigned vue_entries = MAX2(nr_attributes, c->prog_data.vue_map.num_slots); + + if (intel->gen == 6) + c->prog_data.urb_entry_size = ALIGN(vue_entries, 8) / 8; + else + c->prog_data.urb_entry_size = ALIGN(vue_entries, 4) / 4; + + return payload_reg + nr_attributes; +} + +int +vec4_visitor::setup_uniforms(int reg) +{ + /* The pre-gen6 VS requires that some push constants get loaded no + * matter what, or the GPU would hang. + */ + if (intel->gen < 6 && this->uniforms == 0) { + this->uniform_vector_size[this->uniforms] = 1; + + for (unsigned int i = 0; i < 4; i++) { + unsigned int slot = this->uniforms * 4 + i; + static float zero = 0.0; + c->prog_data.param[slot] = &zero; + } + + this->uniforms++; + reg++; + } else { + reg += ALIGN(uniforms, 2) / 2; + } + + c->prog_data.nr_params = this->uniforms * 4; + + c->prog_data.curb_read_length = reg - 1; + + return reg; +} + +void +vec4_visitor::setup_payload(void) +{ + int reg = 0; + + /* The payload always contains important data in g0, which contains + * the URB handles that are passed on to the URB write at the end + * of the thread. So, we always start push constants at g1. + */ + reg++; + + reg = setup_uniforms(reg); + + reg = setup_attributes(reg); + + this->first_non_payload_grf = reg; +} + +bool +vec4_visitor::run() +{ + emit_attribute_fixups(); + + /* Generate VS IR for main(). (the visitor only descends into + * functions called "main"). + */ + if (shader) { + visit_instructions(shader->ir); + } else { + emit_vertex_program_code(); + } + + if (c->key.userclip_active && !c->key.uses_clip_distance) + setup_uniform_clipplane_values(); + + emit_urb_writes(); + + /* Before any optimization, push array accesses out to scratch + * space where we need them to be. This pass may allocate new + * virtual GRFs, so we want to do it early. It also makes sure + * that we have reladdr computations available for CSE, since we'll + * often do repeated subexpressions for those. + */ + if (shader) { + move_grf_array_access_to_scratch(); + move_uniform_array_access_to_pull_constants(); + } else { + /* The ARB_vertex_program frontend emits pull constant loads directly + * rather than using reladdr, so we don't need to walk through all the + * instructions looking for things to move. There isn't anything. + * + * We do still need to split things to vec4 size. + */ + split_uniform_registers(); + } + pack_uniform_registers(); + move_push_constants_to_pull_constants(); + split_virtual_grfs(); + + bool progress; + do { + progress = false; + progress = dead_code_eliminate() || progress; + progress = opt_copy_propagation() || progress; + progress = opt_algebraic() || progress; + progress = opt_compute_to_mrf() || progress; + } while (progress); + + + if (failed) + return false; + + setup_payload(); + + if (false) { + /* Debug of register spilling: Go spill everything. */ + const int grf_count = virtual_grf_count; + float spill_costs[virtual_grf_count]; + bool no_spill[virtual_grf_count]; + evaluate_spill_costs(spill_costs, no_spill); + for (int i = 0; i < grf_count; i++) { + if (no_spill[i]) + continue; + spill_reg(i); + } + } + + while (!reg_allocate()) { + if (failed) + break; + } + + if (failed) + return false; + + brw_set_access_mode(p, BRW_ALIGN_16); + + generate_code(); + + return !failed; +} + } /* namespace brw */ + +extern "C" { + +bool +brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c) +{ + struct brw_context *brw = c->func.brw; + struct intel_context *intel = &c->func.brw->intel; + bool start_busy = false; + float start_time = 0; + + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + start_busy = (intel->batch.last_bo && + drm_intel_bo_busy(intel->batch.last_bo)); + start_time = get_time(); + } + + struct brw_shader *shader = NULL; + if (prog) + shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + if (shader) { + printf("GLSL IR for native vertex shader %d:\n", prog->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n\n"); + } else { + printf("ARB_vertex_program %d for native vertex shader\n", + c->vp->program.Base.Id); + _mesa_print_program(&c->vp->program.Base); + } + } + + if (unlikely(INTEL_DEBUG & DEBUG_PERF) && shader) { + if (shader->compiled_once) { + brw_vs_debug_recompile(brw, prog, &c->key); + } + if (start_busy && !drm_intel_bo_busy(intel->batch.last_bo)) { + perf_debug("VS compile took %.03f ms and stalled the GPU\n", + (get_time() - start_time) * 1000); + } + shader->compiled_once = true; + } + + vec4_visitor v(c, prog, shader); + if (!v.run()) { + prog->LinkStatus = false; + ralloc_strcat(&prog->InfoLog, v.fail_msg); + return false; + } + + return true; +} + +} /* extern "C" */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index dbfcd0f8b1c..b911983c0ac 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -21,7 +21,6 @@ */ #include "brw_vec4.h" -#include "glsl/ir_print_visitor.h" extern "C" { #include "brw_eu.h" @@ -30,131 +29,8 @@ extern "C" { #include "program/prog_parameter.h" }; -using namespace brw; - namespace brw { -int -vec4_visitor::setup_attributes(int payload_reg) -{ - int nr_attributes; - int attribute_map[VERT_ATTRIB_MAX + 1]; - - nr_attributes = 0; - for (int i = 0; i < VERT_ATTRIB_MAX; i++) { - if (prog_data->inputs_read & BITFIELD64_BIT(i)) { - attribute_map[i] = payload_reg + nr_attributes; - nr_attributes++; - } - } - - /* VertexID is stored by the VF as the last vertex element, but we - * don't represent it with a flag in inputs_read, so we call it - * VERT_ATTRIB_MAX. - */ - if (prog_data->uses_vertexid) { - attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes; - nr_attributes++; - } - - foreach_list(node, &this->instructions) { - vec4_instruction *inst = (vec4_instruction *)node; - - /* We have to support ATTR as a destination for GL_FIXED fixup. */ - if (inst->dst.file == ATTR) { - int grf = attribute_map[inst->dst.reg + inst->dst.reg_offset]; - - struct brw_reg reg = brw_vec8_grf(grf, 0); - reg.type = inst->dst.type; - reg.dw1.bits.writemask = inst->dst.writemask; - - inst->dst.file = HW_REG; - inst->dst.fixed_hw_reg = reg; - } - - for (int i = 0; i < 3; i++) { - if (inst->src[i].file != ATTR) - continue; - - int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset]; - - struct brw_reg reg = brw_vec8_grf(grf, 0); - reg.dw1.bits.swizzle = inst->src[i].swizzle; - reg.type = inst->src[i].type; - if (inst->src[i].abs) - reg = brw_abs(reg); - if (inst->src[i].negate) - reg = negate(reg); - - inst->src[i].file = HW_REG; - inst->src[i].fixed_hw_reg = reg; - } - } - - /* The BSpec says we always have to read at least one thing from - * the VF, and it appears that the hardware wedges otherwise. - */ - if (nr_attributes == 0) - nr_attributes = 1; - - prog_data->urb_read_length = (nr_attributes + 1) / 2; - - unsigned vue_entries = MAX2(nr_attributes, c->prog_data.vue_map.num_slots); - - if (intel->gen == 6) - c->prog_data.urb_entry_size = ALIGN(vue_entries, 8) / 8; - else - c->prog_data.urb_entry_size = ALIGN(vue_entries, 4) / 4; - - return payload_reg + nr_attributes; -} - -int -vec4_visitor::setup_uniforms(int reg) -{ - /* The pre-gen6 VS requires that some push constants get loaded no - * matter what, or the GPU would hang. - */ - if (intel->gen < 6 && this->uniforms == 0) { - this->uniform_vector_size[this->uniforms] = 1; - - for (unsigned int i = 0; i < 4; i++) { - unsigned int slot = this->uniforms * 4 + i; - static float zero = 0.0; - c->prog_data.param[slot] = &zero; - } - - this->uniforms++; - reg++; - } else { - reg += ALIGN(uniforms, 2) / 2; - } - - c->prog_data.nr_params = this->uniforms * 4; - - c->prog_data.curb_read_length = reg - 1; - - return reg; -} - -void -vec4_visitor::setup_payload(void) -{ - int reg = 0; - - /* The payload always contains important data in g0, which contains - * the URB handles that are passed on to the URB write at the end - * of the thread. So, we always start push constants at g1. - */ - reg++; - - reg = setup_uniforms(reg); - - reg = setup_attributes(reg); - - this->first_non_payload_grf = reg; -} - struct brw_reg vec4_instruction::get_dst(void) { @@ -773,90 +649,6 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, } } -bool -vec4_visitor::run() -{ - emit_attribute_fixups(); - - /* Generate VS IR for main(). (the visitor only descends into - * functions called "main"). - */ - if (shader) { - visit_instructions(shader->ir); - } else { - emit_vertex_program_code(); - } - - if (c->key.userclip_active && !c->key.uses_clip_distance) - setup_uniform_clipplane_values(); - - emit_urb_writes(); - - /* Before any optimization, push array accesses out to scratch - * space where we need them to be. This pass may allocate new - * virtual GRFs, so we want to do it early. It also makes sure - * that we have reladdr computations available for CSE, since we'll - * often do repeated subexpressions for those. - */ - if (shader) { - move_grf_array_access_to_scratch(); - move_uniform_array_access_to_pull_constants(); - } else { - /* The ARB_vertex_program frontend emits pull constant loads directly - * rather than using reladdr, so we don't need to walk through all the - * instructions looking for things to move. There isn't anything. - * - * We do still need to split things to vec4 size. - */ - split_uniform_registers(); - } - pack_uniform_registers(); - move_push_constants_to_pull_constants(); - split_virtual_grfs(); - - bool progress; - do { - progress = false; - progress = dead_code_eliminate() || progress; - progress = opt_copy_propagation() || progress; - progress = opt_algebraic() || progress; - progress = opt_compute_to_mrf() || progress; - } while (progress); - - - if (failed) - return false; - - setup_payload(); - - if (false) { - /* Debug of register spilling: Go spill everything. */ - const int grf_count = virtual_grf_count; - float spill_costs[virtual_grf_count]; - bool no_spill[virtual_grf_count]; - evaluate_spill_costs(spill_costs, no_spill); - for (int i = 0; i < grf_count; i++) { - if (no_spill[i]) - continue; - spill_reg(i); - } - } - - while (!reg_allocate()) { - if (failed) - break; - } - - if (failed) - return false; - - brw_set_access_mode(p, BRW_ALIGN_16); - - generate_code(); - - return !failed; -} - void vec4_visitor::generate_code() { @@ -1052,59 +844,4 @@ vec4_visitor::generate_code() } } -extern "C" { - -bool -brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c) -{ - struct brw_context *brw = c->func.brw; - struct intel_context *intel = &c->func.brw->intel; - bool start_busy = false; - float start_time = 0; - - if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { - start_busy = (intel->batch.last_bo && - drm_intel_bo_busy(intel->batch.last_bo)); - start_time = get_time(); - } - - struct brw_shader *shader = NULL; - if (prog) - shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; - - if (unlikely(INTEL_DEBUG & DEBUG_VS)) { - if (shader) { - printf("GLSL IR for native vertex shader %d:\n", prog->Name); - _mesa_print_ir(shader->ir, NULL); - printf("\n\n"); - } else { - printf("ARB_vertex_program %d for native vertex shader\n", - c->vp->program.Base.Id); - _mesa_print_program(&c->vp->program.Base); - } - } - - if (unlikely(INTEL_DEBUG & DEBUG_PERF) && shader) { - if (shader->compiled_once) { - brw_vs_debug_recompile(brw, prog, &c->key); - } - if (start_busy && !drm_intel_bo_busy(intel->batch.last_bo)) { - perf_debug("VS compile took %.03f ms and stalled the GPU\n", - (get_time() - start_time) * 1000); - } - shader->compiled_once = true; - } - - vec4_visitor v(c, prog, shader); - if (!v.run()) { - prog->LinkStatus = false; - ralloc_strcat(&prog->InfoLog, v.fail_msg); - return false; - } - - return true; -} - -} /* extern "C" */ - } /* namespace brw */ -- 2.30.2