X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_vec4_visitor.cpp;h=76b2a05700f6b06d65070f159515186a70c53bb8;hb=cea360a7087f9533ce596f052070195254a28c9e;hp=2ab141fdf21a48f8ae0046d75f20bf5197810611;hpb=58d4751fa0c5a38069879e9f72047b75f8351d93;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 2ab141fdf21..76b2a05700f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -183,6 +183,7 @@ ALU3(MAD) ALU2_ACC(ADDC) ALU2_ACC(SUBB) ALU2(MAC) +ALU1(DIM) /** Gen4 predicated IF. */ vec4_instruction * @@ -566,18 +567,12 @@ vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0) emit(VEC4_OPCODE_PACK_BYTES, dst, bytes); } -/** - * Returns the minimum number of vec4 elements needed to pack a type. - * - * For simple types, it will return 1 (a single vec4); for matrices, the - * number of columns; for array and struct, the sum of the vec4_size of - * each of its elements; and for sampler and atomic, zero. - * - * This method is useful to calculate how much register space is needed to - * store a particular type. +/* + * Returns the minimum number of vec4 (as_vec4 == true) or dvec4 (as_vec4 == + * false) elements needed to pack a type. */ -extern "C" int -type_size_vec4(const struct glsl_type *type) +static int +type_size_xvec4(const struct glsl_type *type, bool as_vec4) { unsigned int i; int size; @@ -587,23 +582,27 @@ type_size_vec4(const struct glsl_type *type) case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: if (type->is_matrix()) { - return type->matrix_columns; + const glsl_type *col_type = type->column_type(); + unsigned col_slots = + (as_vec4 && col_type->is_dual_slot()) ? 2 : 1; + return type->matrix_columns * col_slots; } else { - /* Regardless of size of vector, it gets a vec4. This is bad - * packing for things like floats, but otherwise arrays become a - * mess. Hopefully a later pass over the code can pack scalars - * down if appropriate. - */ - return 1; + /* Regardless of size of vector, it gets a vec4. This is bad + * packing for things like floats, but otherwise arrays become a + * mess. Hopefully a later pass over the code can pack scalars + * down if appropriate. + */ + return (as_vec4 && type->is_dual_slot()) ? 2 : 1; } case GLSL_TYPE_ARRAY: assert(type->length > 0); - return type_size_vec4(type->fields.array) * type->length; + return type_size_xvec4(type->fields.array, as_vec4) * type->length; case GLSL_TYPE_STRUCT: size = 0; for (i = 0; i < type->length; i++) { - size += type_size_vec4(type->fields.structure[i].type); + size += type_size_xvec4(type->fields.structure[i].type, as_vec4); } return size; case GLSL_TYPE_SUBROUTINE: @@ -619,7 +618,6 @@ type_size_vec4(const struct glsl_type *type) case GLSL_TYPE_IMAGE: return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4); case GLSL_TYPE_VOID: - case GLSL_TYPE_DOUBLE: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_FUNCTION: @@ -629,6 +627,47 @@ type_size_vec4(const struct glsl_type *type) return 0; } +/** + * Returns the minimum number of vec4 elements needed to pack a type. + * + * For simple types, it will return 1 (a single vec4); for matrices, the + * number of columns; for array and struct, the sum of the vec4_size of + * each of its elements; and for sampler and atomic, zero. + * + * This method is useful to calculate how much register space is needed to + * store a particular type. + */ +extern "C" int +type_size_vec4(const struct glsl_type *type) +{ + return type_size_xvec4(type, true); +} + +/** + * Returns the minimum number of dvec4 elements needed to pack a type. + * + * For simple types, it will return 1 (a single dvec4); for matrices, the + * number of columns; for array and struct, the sum of the dvec4_size of + * each of its elements; and for sampler and atomic, zero. + * + * This method is useful to calculate how much register space is needed to + * store a particular type. + * + * Measuring double-precision vertex inputs as dvec4 is required because + * ARB_vertex_attrib_64bit states that these uses the same number of locations + * than the single-precision version. That is, two consecutives dvec4 would be + * located in location "x" and location "x+1", not "x+2". + * + * In order to map vec4/dvec4 vertex inputs in the proper ATTRs, + * remap_vs_attrs() will take in account both the location and also if the + * type fits in one or two vec4 slots. + */ +extern "C" int +type_size_dvec4(const struct glsl_type *type) +{ + return type_size_xvec4(type, false); +} + src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) { init(); @@ -758,7 +797,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst, pull->mlen = 2; pull->header_size = 1; } else if (devinfo->gen >= 7) { - dst_reg grf_offset = dst_reg(this, glsl_type::int_type); + dst_reg grf_offset = dst_reg(this, glsl_type::uint_type); grf_offset.type = offset_reg.type; @@ -868,7 +907,6 @@ vec4_visitor::emit_texture(ir_texture_opcode op, uint32_t constant_offset, src_reg offset_value, src_reg mcs, - bool is_cube_array, uint32_t surface, src_reg surface_reg, uint32_t sampler, @@ -1056,16 +1094,10 @@ vec4_visitor::emit_texture(ir_texture_opcode op, /* fixup num layers (z) for cube arrays: hardware returns faces * layers; * spec requires layers. */ - if (op == ir_txs) { - if (is_cube_array) { - emit_math(SHADER_OPCODE_INT_QUOTIENT, - writemask(inst->dst, WRITEMASK_Z), - src_reg(inst->dst), brw_imm_d(6)); - } else if (devinfo->gen < 7) { - /* Gen4-6 return 0 instead of 1 for single layer surfaces. */ - emit_minmax(BRW_CONDITIONAL_GE, writemask(inst->dst, WRITEMASK_Z), - src_reg(inst->dst), brw_imm_d(1)); - } + if (op == ir_txs && devinfo->gen < 7) { + /* Gen4-6 return 0 instead of 1 for single layer surfaces. */ + emit_minmax(BRW_CONDITIONAL_GE, writemask(inst->dst, WRITEMASK_Z), + src_reg(inst->dst), brw_imm_d(1)); } if (devinfo->gen == 6 && op == ir_tg4) { @@ -1240,12 +1272,34 @@ vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying) assert(varying < VARYING_SLOT_MAX); assert(output_reg[varying].type == reg.type); current_annotation = output_reg_annotation[varying]; - if (output_reg[varying].file != BAD_FILE) + if (output_reg[varying].file != BAD_FILE) { return emit(MOV(reg, src_reg(output_reg[varying]))); - else + } else return NULL; } +void +vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying, int component) +{ + assert(varying < VARYING_SLOT_MAX); + assert(varying >= VARYING_SLOT_VAR0); + varying = varying - VARYING_SLOT_VAR0; + + unsigned num_comps = output_generic_num_components[varying][component]; + if (num_comps == 0) + return; + + assert(output_generic_reg[varying][component].type == reg.type); + current_annotation = output_reg_annotation[varying]; + if (output_generic_reg[varying][component].file != BAD_FILE) { + src_reg src = src_reg(output_generic_reg[varying][component]); + src.swizzle = BRW_SWZ_COMP_OUTPUT(component); + reg.writemask = + brw_writemask_for_component_packing(num_comps, component); + emit(MOV(reg, src)); + } +} + void vec4_visitor::emit_urb_slot(dst_reg reg, int varying) { @@ -1285,7 +1339,13 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying) /* No need to write to this slot */ break; default: - emit_generic_urb_slot(reg, varying); + if (varying >= VARYING_SLOT_VAR0) { + for (int i = 0; i < 4; i++) { + emit_generic_urb_slot(reg, varying, i); + } + } else { + emit_generic_urb_slot(reg, varying); + } break; } } @@ -1410,27 +1470,6 @@ vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst, } } -src_reg -vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst, - src_reg *reladdr, int reg_offset) -{ - if (reladdr) { - src_reg index = src_reg(this, glsl_type::int_type); - - emit_before(block, inst, ADD(dst_reg(index), *reladdr, - brw_imm_d(reg_offset * 16))); - - return index; - } else if (devinfo->gen >= 8) { - /* Store the offset in a GRF so we can send-from-GRF. */ - src_reg offset = src_reg(this, glsl_type::int_type); - emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16))); - return offset; - } else { - return brw_imm_d(reg_offset * 16); - } -} - /** * Emits an instruction before @inst to load the value named by @orig_src * from scratch space at @base_offset to @temp. @@ -1608,12 +1647,24 @@ vec4_visitor::move_grf_array_access_to_scratch() void vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, dst_reg temp, src_reg orig_src, - int base_offset) + int base_offset, src_reg indirect) { int reg_offset = base_offset + orig_src.reg_offset; const unsigned index = prog_data->base.binding_table.pull_constants_start; - src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr, - reg_offset); + + src_reg offset; + if (indirect.file != BAD_FILE) { + offset = src_reg(this, glsl_type::uint_type); + + emit_before(block, inst, ADD(dst_reg(offset), indirect, + brw_imm_ud(reg_offset * 16))); + } else if (devinfo->gen >= 8) { + /* Store the offset in a GRF so we can send-from-GRF. */ + offset = src_reg(this, glsl_type::uint_type); + emit_before(block, inst, MOV(dst_reg(offset), brw_imm_ud(reg_offset * 16))); + } else { + offset = brw_imm_d(reg_offset * 16); + } emit_pull_constant_load_reg(temp, brw_imm_ud(index), @@ -1638,61 +1689,65 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, void vec4_visitor::move_uniform_array_access_to_pull_constants() { + /* The vulkan dirver doesn't support pull constants other than UBOs so + * everything has to be pushed regardless. + */ + if (stage_prog_data->pull_param == NULL) { + split_uniform_registers(); + return; + } + int pull_constant_loc[this->uniforms]; memset(pull_constant_loc, -1, sizeof(pull_constant_loc)); - bool nested_reladdr; - /* Walk through and find array access of uniforms. Put a copy of that - * uniform in the pull constant buffer. - * - * Note that we don't move constant-indexed accesses to arrays. No - * testing has been done of the performance impact of this choice. + /* First, walk through the instructions and determine which things need to + * be pulled. We mark something as needing to be pulled by setting + * pull_constant_loc to 0. */ - do { - nested_reladdr = false; - - foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { - for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr) - continue; + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + /* We only care about MOV_INDIRECT of a uniform */ + if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT || + inst->src[0].file != UNIFORM) + continue; - int uniform = inst->src[i].nr; + int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset; - if (inst->src[i].reladdr->reladdr) - nested_reladdr = true; /* will need another pass */ + for (unsigned j = 0; j < DIV_ROUND_UP(inst->src[2].ud, 16); j++) + pull_constant_loc[uniform_nr + j] = 0; + } - /* If this array isn't already present in the pull constant buffer, - * add it. - */ - if (pull_constant_loc[uniform] == -1) { - const gl_constant_value **values = - &stage_prog_data->param[uniform * 4]; + /* Next, we walk the list of uniforms and assign real pull constant + * locations and set their corresponding entries in pull_param. + */ + for (int j = 0; j < this->uniforms; j++) { + if (pull_constant_loc[j] < 0) + continue; - pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4; + pull_constant_loc[j] = stage_prog_data->nr_pull_params / 4; - assert(uniform < uniform_array_size); - for (int j = 0; j < uniform_size[uniform] * 4; j++) { - stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] - = values[j]; - } - } + for (int i = 0; i < 4; i++) { + stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] + = stage_prog_data->param[j * 4 + i]; + } + } - /* Set up the annotation tracking for new generated instructions. */ - base_ir = inst->ir; - current_annotation = inst->annotation; + /* Finally, we can walk through the instructions and lower MOV_INDIRECT + * instructions to actual uniform pulls. + */ + foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { + /* We only care about MOV_INDIRECT of a uniform */ + if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT || + inst->src[0].file != UNIFORM) + continue; - dst_reg temp = dst_reg(this, glsl_type::vec4_type); + int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset; - emit_pull_constant_load(block, inst, temp, inst->src[i], - pull_constant_loc[uniform]); + assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP); - inst->src[i].file = temp.file; - inst->src[i].nr = temp.nr; - inst->src[i].reg_offset = temp.reg_offset; - inst->src[i].reladdr = NULL; - } - } - } while (nested_reladdr); + emit_pull_constant_load(block, inst, inst->dst, inst->src[0], + pull_constant_loc[uniform_nr], inst->src[1]); + inst->remove(block); + } /* Now there are no accesses of the UNIFORM file with a reladdr, so * no need to track them as larger-than-vec4 objects. This will be @@ -1738,6 +1793,9 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, this->current_annotation = NULL; memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation)); + memset(this->output_generic_num_components, 0, + sizeof(this->output_generic_num_components)); + this->virtual_grf_start = NULL; this->virtual_grf_end = NULL; this->live_intervals = NULL; @@ -1745,17 +1803,6 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; this->uniforms = 0; - - /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires - * at least one. See setup_uniforms() in brw_vec4.cpp. - */ - this->uniform_array_size = 1; - if (prog_data) { - this->uniform_array_size = - MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1); - } - - this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size); } vec4_visitor::~vec4_visitor()