X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_vec4_visitor.cpp;h=5346fde950a9b2082e61cce583a515a9047658a0;hb=b38fcd0aea8d17919ecd9cc7afc518cfb2c01c27;hp=6fee798038c4e44c57eb8311dd67b78af61fb8dd;hpb=80511d176a49e754a18ce585bab413db7af63bf7;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 6fee798038c..5346fde950a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -338,7 +338,7 @@ vec4_visitor::fix_math_operand(src_reg src) return src_reg(expanded); } -void +vec4_instruction * vec4_visitor::emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0, const src_reg &src1) @@ -350,11 +350,13 @@ vec4_visitor::emit_math(enum opcode opcode, /* MATH on Gen6 must be align1, so we can't do writemasks. */ math->dst = dst_reg(this, glsl_type::vec4_type); math->dst.type = dst.type; - emit(MOV(dst, src_reg(math->dst))); + math = emit(MOV(dst, src_reg(math->dst))); } else if (devinfo->gen < 6) { math->base_mrf = 1; math->mlen = src1.file == BAD_FILE ? 1 : 2; } + + return math; } void @@ -572,9 +574,18 @@ vec4_visitor::visit_instructions(const exec_list *list) } } - -static int -type_size(const struct glsl_type *type) +/** + * Returns the minimum number of vec4 elements needed to pack a type. + * + * For simple types, it will return 1 (a single vec4); for matrices, the + * number of columns; for array and struct, the sum of the vec4_size of + * each of its elements; and for sampler and atomic, zero. + * + * This method is useful to calculate how much register space is needed to + * store a particular type. + */ +int +vec4_visitor::type_size(const struct glsl_type *type) { unsigned int i; int size; @@ -629,7 +640,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) init(); this->file = GRF; - this->reg = v->alloc.allocate(type_size(type)); + this->reg = v->alloc.allocate(v->type_size(type)); if (type->is_array() || type->is_record()) { this->swizzle = BRW_SWIZZLE_NOOP; @@ -647,7 +658,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size) init(); this->file = GRF; - this->reg = v->alloc.allocate(type_size(type) * size); + this->reg = v->alloc.allocate(v->type_size(type) * size); this->swizzle = BRW_SWIZZLE_NOOP; @@ -659,7 +670,7 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) init(); this->file = GRF; - this->reg = v->alloc.allocate(type_size(type)); + this->reg = v->alloc.allocate(v->type_size(type)); if (type->is_array() || type->is_record()) { this->writemask = WRITEMASK_XYZW; @@ -670,6 +681,21 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) this->type = brw_type_for_base_type(type); } +void +vec4_visitor::setup_vector_uniform_values(const gl_constant_value *values, + unsigned n) +{ + static const gl_constant_value zero = { 0 }; + + for (unsigned i = 0; i < n; ++i) + stage_prog_data->param[4 * uniforms + i] = &values[i]; + + for (unsigned i = n; i < 4; ++i) + stage_prog_data->param[4 * uniforms + i] = &zero; + + uniform_vector_size[uniforms++] = n; +} + /* Our support for uniforms is piggy-backed on the struct * gl_fragment_program, because that's where the values actually * get stored, rather than in some global gl_shader_program uniform @@ -699,26 +725,13 @@ vec4_visitor::setup_uniform_values(ir_variable *ir) continue; } - gl_constant_value *components = storage->storage; - unsigned vector_count = (MAX2(storage->array_elements, 1) * - storage->type->matrix_columns); + const unsigned vector_count = (MAX2(storage->array_elements, 1) * + storage->type->matrix_columns); + const unsigned vector_size = storage->type->vector_elements; - for (unsigned s = 0; s < vector_count; s++) { - assert(uniforms < uniform_array_size); - uniform_vector_size[uniforms] = storage->type->vector_elements; - - int i; - for (i = 0; i < uniform_vector_size[uniforms]; i++) { - stage_prog_data->param[uniforms * 4 + i] = components; - components++; - } - for (; i < 4; i++) { - static gl_constant_value zero = { 0.0 }; - stage_prog_data->param[uniforms * 4 + i] = &zero; - } - - uniforms++; - } + for (unsigned s = 0; s < vector_count; s++) + setup_vector_uniform_values(&storage->storage[s * vector_size], + vector_size); } } @@ -1045,8 +1058,6 @@ vec4_visitor::visit(ir_variable *ir) for (int i = 0; i < type_size(ir->type); i++) { output_reg[ir->data.location + i] = *reg; output_reg[ir->data.location + i].reg_offset = i; - output_reg[ir->data.location + i].type = - brw_type_for_base_type(ir->type->get_scalar_type()); output_reg_annotation[ir->data.location + i] = ir->name; } break; @@ -1083,7 +1094,7 @@ vec4_visitor::visit(ir_variable *ir) break; case ir_var_system_value: - reg = make_reg_for_system_value(ir); + reg = make_reg_for_system_value(ir->data.location, ir->type); break; default: @@ -1255,7 +1266,7 @@ vec4_visitor::try_emit_b2f_of_compare(ir_expression *ir) return true; } -void +vec4_instruction * vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, src_reg src0, src_reg src1) { @@ -1270,9 +1281,11 @@ vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, inst = emit(BRW_OPCODE_SEL, dst, src0, src1); inst->predicate = BRW_PREDICATE_NORMAL; } + + return inst; } -void +vec4_instruction * vec4_visitor::emit_lrp(const dst_reg &dst, const src_reg &x, const src_reg &y, const src_reg &a) { @@ -1280,8 +1293,8 @@ vec4_visitor::emit_lrp(const dst_reg &dst, /* Note that the instruction's argument order is reversed from GLSL * and the IR. */ - emit(LRP(dst, - fix_3src_operand(a), fix_3src_operand(y), fix_3src_operand(x))); + return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y), + fix_3src_operand(x))); } else { /* Earlier generations don't support three source operations, so we * need to emit x*(1-a) + y*a. @@ -1296,7 +1309,7 @@ vec4_visitor::emit_lrp(const dst_reg &dst, emit(MUL(y_times_a, y, a)); emit(ADD(one_minus_a, negate(a), src_reg(1.0f))); emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a))); - emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a))); + return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a))); } } @@ -2437,6 +2450,8 @@ vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir) emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset, src_reg(), src_reg()); } + + brw_mark_surface_used(stage_prog_data, surf_index); } void @@ -3090,6 +3105,7 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) vec4_instruction *inst; inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6))); inst->predicate = BRW_PREDICATE_NORMAL; + output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F; inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f))); inst->predicate = BRW_PREDICATE_NORMAL; } @@ -3102,18 +3118,23 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { dst_reg reg_w = reg; reg_w.writemask = WRITEMASK_W; - emit(MOV(reg_w, src_reg(output_reg[VARYING_SLOT_PSIZ]))); + src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ]); + reg_as_src.type = reg_w.type; + reg_as_src.swizzle = brw_swizzle_for_size(1); + emit(MOV(reg_w, reg_as_src)); } if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) { dst_reg reg_y = reg; reg_y.writemask = WRITEMASK_Y; reg_y.type = BRW_REGISTER_TYPE_D; + output_reg[VARYING_SLOT_LAYER].type = reg_y.type; emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER]))); } if (prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) { dst_reg reg_z = reg; reg_z.writemask = WRITEMASK_Z; reg_z.type = BRW_REGISTER_TYPE_D; + output_reg[VARYING_SLOT_VIEWPORT].type = reg_z.type; emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT]))); } } @@ -3151,8 +3172,8 @@ vec4_visitor::emit_clip_distances(dst_reg reg, int offset) vec4_instruction * vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying) { - assert (varying < VARYING_SLOT_MAX); - reg.type = output_reg[varying].type; + assert(varying < VARYING_SLOT_MAX); + assert(output_reg[varying].type == reg.type); current_annotation = output_reg_annotation[varying]; /* Copy the register, saturating if necessary */ return emit(MOV(reg, src_reg(output_reg[varying]))); @@ -3162,6 +3183,7 @@ void vec4_visitor::emit_urb_slot(dst_reg reg, int varying) { reg.type = BRW_REGISTER_TYPE_F; + output_reg[varying].type = reg.type; switch (varying) { case VARYING_SLOT_PSIZ: