X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_vec4_visitor.cpp;h=f18fd9e38ee688edb177b931a61d061f6a17ec95;hb=5c5555a862754a5b43fee2abf4fc34e888d22a06;hp=ae516196b15163b5d516b1ffe250b246d02e6080;hpb=8d90e2883954eb7022cf8fc98be3773cc5513e7b;p=mesa.git diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp index ae516196b15..f18fd9e38ee 100644 --- a/src/intel/compiler/brw_vec4_visitor.cpp +++ b/src/intel/compiler/brw_vec4_visitor.cpp @@ -24,6 +24,7 @@ #include "brw_vec4.h" #include "brw_cfg.h" #include "brw_eu.h" +#include "util/u_math.h" namespace brw { @@ -46,6 +47,7 @@ vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst, this->predicate_inverse = false; this->target = 0; this->shadow_compare = false; + this->eot = false; this->ir = NULL; this->urb_write_flags = BRW_URB_WRITE_NO_FLAGS; this->header_size = 0; @@ -574,7 +576,7 @@ vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0) * false) elements needed to pack a type. */ static int -type_size_xvec4(const struct glsl_type *type, bool as_vec4) +type_size_xvec4(const struct glsl_type *type, bool as_vec4, bool bindless) { unsigned int i; int size; @@ -583,8 +585,13 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4) case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: + case GLSL_TYPE_FLOAT16: case GLSL_TYPE_BOOL: case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_UINT16: + case GLSL_TYPE_INT16: + case GLSL_TYPE_UINT8: + case GLSL_TYPE_INT8: case GLSL_TYPE_UINT64: case GLSL_TYPE_INT64: if (type->is_matrix()) { @@ -602,11 +609,14 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4) } case GLSL_TYPE_ARRAY: assert(type->length > 0); - return type_size_xvec4(type->fields.array, as_vec4) * type->length; + return type_size_xvec4(type->fields.array, as_vec4, bindless) * + type->length; case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: size = 0; for (i = 0; i < type->length; i++) { - size += type_size_xvec4(type->fields.structure[i].type, as_vec4); + size += type_size_xvec4(type->fields.structure[i].type, as_vec4, + bindless); } return size; case GLSL_TYPE_SUBROUTINE: @@ -616,14 +626,13 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4) /* Samplers take up no register space, since they're baked in at * link time. */ - return 0; + return bindless ? 1 : 0; case GLSL_TYPE_ATOMIC_UINT: return 0; case GLSL_TYPE_IMAGE: - return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4); + return bindless ? 1 : DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4); case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: - case GLSL_TYPE_INTERFACE: case GLSL_TYPE_FUNCTION: unreachable("not reached"); } @@ -642,9 +651,9 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4) * store a particular type. */ extern "C" int -type_size_vec4(const struct glsl_type *type) +type_size_vec4(const struct glsl_type *type, bool bindless) { - return type_size_xvec4(type, true); + return type_size_xvec4(type, true, bindless); } /** @@ -667,9 +676,9 @@ type_size_vec4(const struct glsl_type *type) * type fits in one or two vec4 slots. */ extern "C" int -type_size_dvec4(const struct glsl_type *type) +type_size_dvec4(const struct glsl_type *type, bool bindless) { - return type_size_xvec4(type, false); + return type_size_xvec4(type, false, bindless); } src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) @@ -677,9 +686,9 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) init(); this->file = VGRF; - this->nr = v->alloc.allocate(type_size_vec4(type)); + this->nr = v->alloc.allocate(type_size_vec4(type, false)); - if (type->is_array() || type->is_record()) { + if (type->is_array() || type->is_struct()) { this->swizzle = BRW_SWIZZLE_NOOP; } else { this->swizzle = brw_swizzle_for_size(type->vector_elements); @@ -695,7 +704,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size) init(); this->file = VGRF; - this->nr = v->alloc.allocate(type_size_vec4(type) * size); + this->nr = v->alloc.allocate(type_size_vec4(type, false) * size); this->swizzle = BRW_SWIZZLE_NOOP; @@ -707,9 +716,9 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) init(); this->file = VGRF; - this->nr = v->alloc.allocate(type_size_vec4(type)); + this->nr = v->alloc.allocate(type_size_vec4(type, false)); - if (type->is_array() || type->is_record()) { + if (type->is_array() || type->is_struct()) { this->writemask = WRITEMASK_XYZW; } else { this->writemask = (1 << type->vector_elements) - 1; @@ -727,34 +736,6 @@ vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, return inst; } -vec4_instruction * -vec4_visitor::emit_lrp(const dst_reg &dst, - const src_reg &x, const src_reg &y, const src_reg &a) -{ - if (devinfo->gen >= 6) { - /* Note that the instruction's argument order is reversed from GLSL - * and the IR. - */ - return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y), - fix_3src_operand(x))); - } else { - /* Earlier generations don't support three source operations, so we - * need to emit x*(1-a) + y*a. - */ - dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type); - dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type); - dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type); - y_times_a.writemask = dst.writemask; - one_minus_a.writemask = dst.writemask; - x_times_one_minus_a.writemask = dst.writemask; - - emit(MUL(y_times_a, y, a)); - emit(ADD(one_minus_a, negate(a), brw_imm_f(1.0f))); - emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a))); - return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a))); - } -} - /** * Emits the instructions needed to perform a pull constant load. before_block * and before_inst can be NULL in which case the instruction will be appended @@ -856,7 +837,7 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type, dst_reg(this, glsl_type::uvec4_type)); inst->base_mrf = 2; inst->src[1] = surface; - inst->src[2] = surface; + inst->src[2] = brw_imm_ud(0); /* sampler */ int param_base; @@ -915,18 +896,6 @@ vec4_visitor::emit_texture(ir_texture_opcode op, src_reg surface_reg, src_reg sampler_reg) { - /* The sampler can only meaningfully compute LOD for fragment shader - * messages. For all other stages, we change the opcode to TXL and hardcode - * the LOD to 0. - * - * textureQueryLevels() is implemented in terms of TXS so we need to pass a - * valid LOD argument. - */ - if (op == ir_tex || op == ir_query_levels) { - assert(lod.file == BAD_FILE); - lod = brw_imm_f(0.0f); - } - enum opcode opcode; switch (op) { case ir_tex: opcode = SHADER_OPCODE_TXL; break; @@ -1206,12 +1175,14 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) if (output_reg[VARYING_SLOT_CLIP_DIST0][0].file != BAD_FILE) { current_annotation = "Clipping flags"; dst_reg flags0 = dst_reg(this, glsl_type::uint_type); - dst_reg flags1 = dst_reg(this, glsl_type::uint_type); emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L)); emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, brw_imm_d(0)); emit(OR(header1_w, src_reg(header1_w), src_reg(flags0))); + } + if (output_reg[VARYING_SLOT_CLIP_DIST1][0].file != BAD_FILE) { + dst_reg flags1 = dst_reg(this, glsl_type::uint_type); emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L)); emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, brw_imm_d(0)); emit(SHL(flags1, src_reg(flags1), brw_imm_d(4))); @@ -1323,7 +1294,7 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying) * determine which edges should be drawn as wireframe. */ current_annotation = "edge flag"; - int edge_attr = _mesa_bitcount_64(nir->info.inputs_read & + int edge_attr = util_bitcount64(nir->info.inputs_read & BITFIELD64_MASK(VERT_ATTRIB_EDGEFLAG)); emit(MOV(reg, src_reg(dst_reg(ATTR, edge_attr, glsl_type::float_type, WRITEMASK_XYZW)))); @@ -1340,8 +1311,8 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying) } } -static int -align_interleaved_urb_mlen(const struct gen_device_info *devinfo, int mlen) +static unsigned +align_interleaved_urb_mlen(const struct gen_device_info *devinfo, unsigned mlen) { if (devinfo->gen >= 6) { /* URB data written (does not include the message header reg) must @@ -1751,8 +1722,6 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, src = byte_offset(src, 16); } - brw_mark_surface_used(&prog_data->base, index); - if (is_64bit) { temp = retype(temp, BRW_REGISTER_TYPE_DF); shuffle_64bit_data(orig_temp, src_reg(temp), false, block, inst); @@ -1872,6 +1841,7 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, prog_data(prog_data), fail_msg(NULL), first_non_payload_grf(0), + live_analysis(this), performance_analysis(this), need_all_constants_in_pull_buffer(false), no_spills(no_spills), shader_time_index(shader_time_index), @@ -1885,17 +1855,12 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, memset(this->output_num_components, 0, sizeof(this->output_num_components)); - this->virtual_grf_start = NULL; - this->virtual_grf_end = NULL; - this->live_intervals = NULL; - this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; this->uniforms = 0; -} -vec4_visitor::~vec4_visitor() -{ + this->nir_locals = NULL; + this->nir_ssa_values = NULL; }