X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_vec4_visitor.cpp;h=76b2a05700f6b06d65070f159515186a70c53bb8;hb=cea360a7087f9533ce596f052070195254a28c9e;hp=6d1552858205950e886ea0c7378a221d87fd7f99;hpb=fed60e3c73c7be7c1e2194054daf29381d0ddc18;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 6d155285820..76b2a05700f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -23,8 +23,8 @@ #include "brw_vec4.h" #include "brw_cfg.h" -#include "glsl/ir_uniform.h" -#include "program/sampler.h" +#include "brw_eu.h" +#include "brw_program.h" namespace brw { @@ -183,6 +183,7 @@ ALU3(MAD) ALU2_ACC(ADDC) ALU2_ACC(SUBB) ALU2(MAC) +ALU1(DIM) /** Gen4 predicated IF. */ vec4_instruction * @@ -237,8 +238,6 @@ vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, * type to match src0 so we can compact the instruction. */ dst.type = src0.type; - if (dst.file == HW_REG) - dst.fixed_hw_reg.type = dst.type; resolve_ud_negate(&src0); resolve_ud_negate(&src1); @@ -410,7 +409,7 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0) * You should inspect the disasm output in order to verify that the MOV is * not optimized away. */ - emit(MOV(tmp_dst, src_reg(0x12345678u))); + emit(MOV(tmp_dst, brw_imm_ud(0x12345678u))); #endif /* Give tmp the form below, where "." means untouched. @@ -429,7 +428,7 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0) * 0xhhhh0000 */ tmp_src.swizzle = BRW_SWIZZLE_YYYY; - emit(SHL(dst, tmp_src, src_reg(16u))); + emit(SHL(dst, tmp_src, brw_imm_ud(16u))); /* Finally, give the write-channels of dst the form of packHalf2x16's * output: @@ -468,10 +467,10 @@ vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0) src_reg tmp_src(tmp_dst); tmp_dst.writemask = WRITEMASK_X; - emit(AND(tmp_dst, src0, src_reg(0xffffu))); + emit(AND(tmp_dst, src0, brw_imm_ud(0xffffu))); tmp_dst.writemask = WRITEMASK_Y; - emit(SHR(tmp_dst, src0, src_reg(16u))); + emit(SHR(tmp_dst, src0, brw_imm_ud(16u))); dst.writemask = WRITEMASK_XY; emit(F16TO32(dst, tmp_src)); @@ -486,7 +485,7 @@ vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0) * vector float and a type-converting MOV. */ dst_reg shift(this, glsl_type::uvec4_type); - emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78))); + emit(MOV(shift, brw_imm_vf4(0x00, 0x60, 0x70, 0x78))); dst_reg shifted(this, glsl_type::uvec4_type); src0.swizzle = BRW_SWIZZLE_XXXX; @@ -496,7 +495,7 @@ vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0) dst_reg f(this, glsl_type::vec4_type); emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted)); - emit(MUL(dst, src_reg(f), src_reg(1.0f / 255.0f))); + emit(MUL(dst, src_reg(f), brw_imm_f(1.0f / 255.0f))); } void @@ -508,7 +507,7 @@ vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0) * vector float and a type-converting MOV. */ dst_reg shift(this, glsl_type::uvec4_type); - emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78))); + emit(MOV(shift, brw_imm_vf4(0x00, 0x60, 0x70, 0x78))); dst_reg shifted(this, glsl_type::uvec4_type); src0.swizzle = BRW_SWIZZLE_XXXX; @@ -519,11 +518,11 @@ vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0) emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted)); dst_reg scaled(this, glsl_type::vec4_type); - emit(MUL(scaled, src_reg(f), src_reg(1.0f / 127.0f))); + emit(MUL(scaled, src_reg(f), brw_imm_f(1.0f / 127.0f))); dst_reg max(this, glsl_type::vec4_type); - emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), src_reg(-1.0f)); - emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), src_reg(1.0f)); + emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), brw_imm_f(-1.0f)); + emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), brw_imm_f(1.0f)); } void @@ -534,7 +533,7 @@ vec4_visitor::emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0) inst->saturate = true; dst_reg scaled(this, glsl_type::vec4_type); - emit(MUL(scaled, src_reg(saturated), src_reg(255.0f))); + emit(MUL(scaled, src_reg(saturated), brw_imm_f(255.0f))); dst_reg rounded(this, glsl_type::vec4_type); emit(RNDE(rounded, src_reg(scaled))); @@ -550,13 +549,13 @@ void vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0) { dst_reg max(this, glsl_type::vec4_type); - emit_minmax(BRW_CONDITIONAL_GE, max, src0, src_reg(-1.0f)); + emit_minmax(BRW_CONDITIONAL_GE, max, src0, brw_imm_f(-1.0f)); dst_reg min(this, glsl_type::vec4_type); - emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), src_reg(1.0f)); + emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), brw_imm_f(1.0f)); dst_reg scaled(this, glsl_type::vec4_type); - emit(MUL(scaled, src_reg(min), src_reg(127.0f))); + emit(MUL(scaled, src_reg(min), brw_imm_f(127.0f))); dst_reg rounded(this, glsl_type::vec4_type); emit(RNDE(rounded, src_reg(scaled))); @@ -568,18 +567,12 @@ vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0) emit(VEC4_OPCODE_PACK_BYTES, dst, bytes); } -/** - * Returns the minimum number of vec4 elements needed to pack a type. - * - * For simple types, it will return 1 (a single vec4); for matrices, the - * number of columns; for array and struct, the sum of the vec4_size of - * each of its elements; and for sampler and atomic, zero. - * - * This method is useful to calculate how much register space is needed to - * store a particular type. +/* + * Returns the minimum number of vec4 (as_vec4 == true) or dvec4 (as_vec4 == + * false) elements needed to pack a type. */ -extern "C" int -type_size_vec4(const struct glsl_type *type) +static int +type_size_xvec4(const struct glsl_type *type, bool as_vec4) { unsigned int i; int size; @@ -589,23 +582,27 @@ type_size_vec4(const struct glsl_type *type) case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: if (type->is_matrix()) { - return type->matrix_columns; + const glsl_type *col_type = type->column_type(); + unsigned col_slots = + (as_vec4 && col_type->is_dual_slot()) ? 2 : 1; + return type->matrix_columns * col_slots; } else { - /* Regardless of size of vector, it gets a vec4. This is bad - * packing for things like floats, but otherwise arrays become a - * mess. Hopefully a later pass over the code can pack scalars - * down if appropriate. - */ - return 1; + /* Regardless of size of vector, it gets a vec4. This is bad + * packing for things like floats, but otherwise arrays become a + * mess. Hopefully a later pass over the code can pack scalars + * down if appropriate. + */ + return (as_vec4 && type->is_dual_slot()) ? 2 : 1; } case GLSL_TYPE_ARRAY: assert(type->length > 0); - return type_size_vec4(type->fields.array) * type->length; + return type_size_xvec4(type->fields.array, as_vec4) * type->length; case GLSL_TYPE_STRUCT: size = 0; for (i = 0; i < type->length; i++) { - size += type_size_vec4(type->fields.structure[i].type); + size += type_size_xvec4(type->fields.structure[i].type, as_vec4); } return size; case GLSL_TYPE_SUBROUTINE: @@ -621,7 +618,6 @@ type_size_vec4(const struct glsl_type *type) case GLSL_TYPE_IMAGE: return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4); case GLSL_TYPE_VOID: - case GLSL_TYPE_DOUBLE: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_FUNCTION: @@ -631,12 +627,53 @@ type_size_vec4(const struct glsl_type *type) return 0; } +/** + * Returns the minimum number of vec4 elements needed to pack a type. + * + * For simple types, it will return 1 (a single vec4); for matrices, the + * number of columns; for array and struct, the sum of the vec4_size of + * each of its elements; and for sampler and atomic, zero. + * + * This method is useful to calculate how much register space is needed to + * store a particular type. + */ +extern "C" int +type_size_vec4(const struct glsl_type *type) +{ + return type_size_xvec4(type, true); +} + +/** + * Returns the minimum number of dvec4 elements needed to pack a type. + * + * For simple types, it will return 1 (a single dvec4); for matrices, the + * number of columns; for array and struct, the sum of the dvec4_size of + * each of its elements; and for sampler and atomic, zero. + * + * This method is useful to calculate how much register space is needed to + * store a particular type. + * + * Measuring double-precision vertex inputs as dvec4 is required because + * ARB_vertex_attrib_64bit states that these uses the same number of locations + * than the single-precision version. That is, two consecutives dvec4 would be + * located in location "x" and location "x+1", not "x+2". + * + * In order to map vec4/dvec4 vertex inputs in the proper ATTRs, + * remap_vs_attrs() will take in account both the location and also if the + * type fits in one or two vec4 slots. + */ +extern "C" int +type_size_dvec4(const struct glsl_type *type) +{ + return type_size_xvec4(type, false); +} + src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) { init(); - this->file = GRF; - this->reg = v->alloc.allocate(type_size_vec4(type)); + this->file = VGRF; + this->nr = v->alloc.allocate(type_size_vec4(type)); if (type->is_array() || type->is_record()) { this->swizzle = BRW_SWIZZLE_NOOP; @@ -653,8 +690,8 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size) init(); - this->file = GRF; - this->reg = v->alloc.allocate(type_size_vec4(type) * size); + this->file = VGRF; + this->nr = v->alloc.allocate(type_size_vec4(type) * size); this->swizzle = BRW_SWIZZLE_NOOP; @@ -665,8 +702,8 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) { init(); - this->file = GRF; - this->reg = v->alloc.allocate(type_size_vec4(type)); + this->file = VGRF; + this->nr = v->alloc.allocate(type_size_vec4(type)); if (type->is_array() || type->is_record()) { this->writemask = WRITEMASK_XYZW; @@ -681,18 +718,8 @@ vec4_instruction * vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, src_reg src0, src_reg src1) { - vec4_instruction *inst; - - if (devinfo->gen >= 6) { - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->conditional_mod = conditionalmod; - } else { - emit(CMP(dst, src0, src1, conditionalmod)); - - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->predicate = BRW_PREDICATE_NORMAL; - } - + vec4_instruction *inst = emit(BRW_OPCODE_SEL, dst, src0, src1); + inst->conditional_mod = conditionalmod; return inst; } @@ -718,7 +745,7 @@ vec4_visitor::emit_lrp(const dst_reg &dst, x_times_one_minus_a.writemask = dst.writemask; emit(MUL(y_times_a, y, a)); - emit(ADD(one_minus_a, negate(a), src_reg(1.0f))); + emit(ADD(one_minus_a, negate(a), brw_imm_f(1.0f))); emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a))); return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a))); } @@ -770,7 +797,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst, pull->mlen = 2; pull->header_size = 1; } else if (devinfo->gen >= 7) { - dst_reg grf_offset = dst_reg(this, glsl_type::int_type); + dst_reg grf_offset = dst_reg(this, glsl_type::uint_type); grf_offset.type = offset_reg.type; @@ -818,13 +845,14 @@ vec4_visitor::emit_uniformize(const src_reg &src) src_reg vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type, - src_reg coordinate, src_reg sampler) + src_reg coordinate, src_reg surface) { vec4_instruction *inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS, dst_reg(this, glsl_type::uvec4_type)); inst->base_mrf = 2; - inst->src[1] = sampler; + inst->src[1] = surface; + inst->src[2] = surface; int param_base; @@ -852,7 +880,7 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type, coordinate)); emit(MOV(dst_reg(MRF, param_base, coordinate_type, zero_mask), - src_reg(0))); + brw_imm_d(0))); emit(inst); return src_reg(inst->dst); @@ -864,7 +892,7 @@ vec4_visitor::is_high_sampler(src_reg sampler) if (devinfo->gen < 8 && !devinfo->is_haswell) return false; - return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16; + return sampler.file != IMM || sampler.ud >= 16; } void @@ -879,17 +907,31 @@ vec4_visitor::emit_texture(ir_texture_opcode op, uint32_t constant_offset, src_reg offset_value, src_reg mcs, - bool is_cube_array, + uint32_t surface, + src_reg surface_reg, uint32_t sampler, src_reg sampler_reg) { + /* The sampler can only meaningfully compute LOD for fragment shader + * messages. For all other stages, we change the opcode to TXL and hardcode + * the LOD to 0. + * + * textureQueryLevels() is implemented in terms of TXS so we need to pass a + * valid LOD argument. + */ + if (op == ir_tex || op == ir_query_levels) { + assert(lod.file == BAD_FILE); + lod = brw_imm_f(0.0f); + } + enum opcode opcode; switch (op) { case ir_tex: opcode = SHADER_OPCODE_TXL; break; case ir_txl: opcode = SHADER_OPCODE_TXL; break; case ir_txd: opcode = SHADER_OPCODE_TXD; break; case ir_txf: opcode = SHADER_OPCODE_TXF; break; - case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break; + case ir_txf_ms: opcode = (devinfo->gen >= 9 ? SHADER_OPCODE_TXF_CMS_W : + SHADER_OPCODE_TXF_CMS); break; case ir_txs: opcode = SHADER_OPCODE_TXS; break; case ir_tg4: opcode = offset_value.file != BAD_FILE ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break; @@ -899,12 +941,18 @@ vec4_visitor::emit_texture(ir_texture_opcode op, unreachable("TXB is not valid for vertex shaders."); case ir_lod: unreachable("LOD is not valid for vertex shaders."); + case ir_samples_identical: { + /* There are some challenges implementing this for vec4, and it seems + * unlikely to be used anyway. For now, just return false ways. + */ + emit(MOV(dest, brw_imm_ud(0u))); + return; + } default: unreachable("Unrecognized tex op"); } - vec4_instruction *inst = new(mem_ctx) vec4_instruction( - opcode, dst_reg(this, dest_type)); + vec4_instruction *inst = new(mem_ctx) vec4_instruction(opcode, dest); inst->offset = constant_offset; @@ -926,7 +974,8 @@ vec4_visitor::emit_texture(ir_texture_opcode op, inst->dst.writemask = WRITEMASK_XYZW; inst->shadow_compare = shadow_comparitor.file != BAD_FILE; - inst->src[1] = sampler_reg; + inst->src[1] = surface_reg; + inst->src[2] = sampler_reg; /* MRF for the first parameter */ int param_base = inst->base_mrf + inst->header_size; @@ -949,7 +998,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op, if (zero_mask != 0) { emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask), - src_reg(0))); + brw_imm_d(0))); } /* Load the shadow comparitor */ if (shadow_comparitor.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) { @@ -981,7 +1030,16 @@ vec4_visitor::emit_texture(ir_texture_opcode op, } else if (op == ir_txf_ms) { emit(MOV(dst_reg(MRF, param_base + 1, sample_index.type, WRITEMASK_X), sample_index)); - if (devinfo->gen >= 7) { + if (opcode == SHADER_OPCODE_TXF_CMS_W) { + /* MCS data is stored in the first two channels of ‘mcs’, but we + * need to get it into the .y and .z channels of the second vec4 + * of params. + */ + mcs.swizzle = BRW_SWIZZLE4(0, 0, 1, 1); + emit(MOV(dst_reg(MRF, param_base + 1, + glsl_type::uint_type, WRITEMASK_YZ), + mcs)); + } else if (devinfo->gen >= 7) { /* MCS data is in the first channel of `mcs`, but we need to get it into * the .y channel of the second vec4 of params, so replicate .x across * the whole vec4 and then mask off everything except .y @@ -1036,18 +1094,23 @@ vec4_visitor::emit_texture(ir_texture_opcode op, /* fixup num layers (z) for cube arrays: hardware returns faces * layers; * spec requires layers. */ - if (op == ir_txs && is_cube_array) { - emit_math(SHADER_OPCODE_INT_QUOTIENT, - writemask(inst->dst, WRITEMASK_Z), - src_reg(inst->dst), src_reg(6)); + if (op == ir_txs && devinfo->gen < 7) { + /* Gen4-6 return 0 instead of 1 for single layer surfaces. */ + emit_minmax(BRW_CONDITIONAL_GE, writemask(inst->dst, WRITEMASK_Z), + src_reg(inst->dst), brw_imm_d(1)); } if (devinfo->gen == 6 && op == ir_tg4) { - emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst); + emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], inst->dst); } - swizzle_result(op, dest, - src_reg(inst->dst), sampler, dest_type); + if (op == ir_query_levels) { + /* # levels is in .w */ + src_reg swizzled(dest); + swizzled.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, + SWIZZLE_W, SWIZZLE_W); + emit(MOV(dest, swizzled)); + } } /** @@ -1064,7 +1127,7 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst) dst_f.type = BRW_REGISTER_TYPE_F; /* Convert from UNORM to UINT */ - emit(MUL(dst_f, src_reg(dst_f), src_reg((float)((1 << width) - 1)))); + emit(MUL(dst_f, src_reg(dst_f), brw_imm_f((float)((1 << width) - 1)))); emit(MOV(dst, src_reg(dst_f))); if (wa & WA_SIGN) { @@ -1072,89 +1135,8 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst) * shifting the sign bit into place, then shifting back * preserving sign. */ - emit(SHL(dst, src_reg(dst), src_reg(32 - width))); - emit(ASR(dst, src_reg(dst), src_reg(32 - width))); - } -} - -/** - * Set up the gather channel based on the swizzle, for gather4. - */ -uint32_t -vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler) -{ - int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component); - switch (swiz) { - case SWIZZLE_X: return 0; - case SWIZZLE_Y: - /* gather4 sampler is broken for green channel on RG32F -- - * we must ask for blue instead. - */ - if (key_tex->gather_channel_quirk_mask & (1 << sampler)) - return 2; - return 1; - case SWIZZLE_Z: return 2; - case SWIZZLE_W: return 3; - default: - unreachable("Not reached"); /* zero, one swizzles handled already */ - } -} - -void -vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest, - src_reg orig_val, uint32_t sampler, - const glsl_type *dest_type) -{ - int s = key_tex->swizzles[sampler]; - - dst_reg swizzled_result = dest; - - if (op == ir_query_levels) { - /* # levels is in .w */ - orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); - emit(MOV(swizzled_result, orig_val)); - return; - } - - if (op == ir_txs || dest_type == glsl_type::float_type - || s == SWIZZLE_NOOP || op == ir_tg4) { - emit(MOV(swizzled_result, orig_val)); - return; - } - - - int zero_mask = 0, one_mask = 0, copy_mask = 0; - int swizzle[4] = {0}; - - for (int i = 0; i < 4; i++) { - switch (GET_SWZ(s, i)) { - case SWIZZLE_ZERO: - zero_mask |= (1 << i); - break; - case SWIZZLE_ONE: - one_mask |= (1 << i); - break; - default: - copy_mask |= (1 << i); - swizzle[i] = GET_SWZ(s, i); - break; - } - } - - if (copy_mask) { - orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); - swizzled_result.writemask = copy_mask; - emit(MOV(swizzled_result, orig_val)); - } - - if (zero_mask) { - swizzled_result.writemask = zero_mask; - emit(MOV(swizzled_result, src_reg(0.0f))); - } - - if (one_mask) { - swizzled_result.writemask = one_mask; - emit(MOV(swizzled_result, src_reg(1.0f))); + emit(SHL(dst, src_reg(dst), brw_imm_d(32 - width))); + emit(ASR(dst, src_reg(dst), brw_imm_d(32 - width))); } } @@ -1170,55 +1152,6 @@ vec4_visitor::gs_end_primitive() unreachable("not reached"); } -void -vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, - dst_reg dst, src_reg offset, - src_reg src0, src_reg src1) -{ - unsigned mlen = 0; - - /* Set the atomic operation offset. */ - emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset)); - mlen++; - - /* Set the atomic operation arguments. */ - if (src0.file != BAD_FILE) { - emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0)); - mlen++; - } - - if (src1.file != BAD_FILE) { - emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1)); - mlen++; - } - - /* Emit the instruction. Note that this maps to the normal SIMD8 - * untyped atomic message on Ivy Bridge, but that's OK because - * unused channels will be masked out. - */ - vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, - brw_message_reg(0), - src_reg(surf_index), src_reg(atomic_op)); - inst->mlen = mlen; -} - -void -vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst, - src_reg offset) -{ - /* Set the surface read offset. */ - emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset)); - - /* Emit the instruction. Note that this maps to the normal SIMD8 - * untyped surface read message, but that's OK because unused - * channels will be masked out. - */ - vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, - brw_message_reg(0), - src_reg(surf_index), src_reg(1)); - inst->mlen = 1; -} - void vec4_visitor::emit_ndc_computation() { @@ -1256,14 +1189,14 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) dst_reg header1_w = header1; header1_w.writemask = WRITEMASK_W; - emit(MOV(header1, 0u)); + emit(MOV(header1, brw_imm_ud(0u))); if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]); current_annotation = "Point size"; - emit(MUL(header1_w, psiz, src_reg((float)(1 << 11)))); - emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8)); + emit(MUL(header1_w, psiz, brw_imm_f((float)(1 << 11)))); + emit(AND(header1_w, src_reg(header1_w), brw_imm_d(0x7ff << 8))); } if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) { @@ -1271,13 +1204,13 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) dst_reg flags0 = dst_reg(this, glsl_type::uint_type); dst_reg flags1 = dst_reg(this, glsl_type::uint_type); - emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L)); - emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0)); + emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L)); + emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, brw_imm_d(0)); emit(OR(header1_w, src_reg(header1_w), src_reg(flags0))); - emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L)); - emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0)); - emit(SHL(flags1, src_reg(flags1), src_reg(4))); + emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), brw_imm_f(0.0f), BRW_CONDITIONAL_L)); + emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, brw_imm_d(0)); + emit(SHL(flags1, src_reg(flags1), brw_imm_d(4))); emit(OR(header1_w, src_reg(header1_w), src_reg(flags1))); } @@ -1294,20 +1227,20 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) { src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]); ndc_w.swizzle = BRW_SWIZZLE_WWWW; - emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L)); + emit(CMP(dst_null_f(), ndc_w, brw_imm_f(0.0f), BRW_CONDITIONAL_L)); vec4_instruction *inst; - inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6))); + inst = emit(OR(header1_w, src_reg(header1_w), brw_imm_ud(1u << 6))); inst->predicate = BRW_PREDICATE_NORMAL; output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F; - inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f))); + inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], brw_imm_f(0.0f))); inst->predicate = BRW_PREDICATE_NORMAL; } emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1))); } else if (devinfo->gen < 6) { - emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u)); + emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u))); } else { - emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0))); + emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), brw_imm_d(0))); if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { dst_reg reg_w = reg; reg_w.writemask = WRITEMASK_W; @@ -1339,12 +1272,34 @@ vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying) assert(varying < VARYING_SLOT_MAX); assert(output_reg[varying].type == reg.type); current_annotation = output_reg_annotation[varying]; - if (output_reg[varying].file != BAD_FILE) + if (output_reg[varying].file != BAD_FILE) { return emit(MOV(reg, src_reg(output_reg[varying]))); - else + } else return NULL; } +void +vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying, int component) +{ + assert(varying < VARYING_SLOT_MAX); + assert(varying >= VARYING_SLOT_VAR0); + varying = varying - VARYING_SLOT_VAR0; + + unsigned num_comps = output_generic_num_components[varying][component]; + if (num_comps == 0) + return; + + assert(output_generic_reg[varying][component].type == reg.type); + current_annotation = output_reg_annotation[varying]; + if (output_generic_reg[varying][component].file != BAD_FILE) { + src_reg src = src_reg(output_generic_reg[varying][component]); + src.swizzle = BRW_SWZ_COMP_OUTPUT(component); + reg.writemask = + brw_writemask_for_component_packing(num_comps, component); + emit(MOV(reg, src)); + } +} + void vec4_visitor::emit_urb_slot(dst_reg reg, int varying) { @@ -1384,7 +1339,13 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying) /* No need to write to this slot */ break; default: - emit_generic_urb_slot(reg, varying); + if (varying >= VARYING_SLOT_VAR0) { + for (int i = 0; i < 4; i++) { + emit_generic_urb_slot(reg, varying, i); + } + } else { + emit_generic_urb_slot(reg, varying); + } break; } } @@ -1499,42 +1460,13 @@ vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst, src_reg index = src_reg(this, glsl_type::int_type); emit_before(block, inst, ADD(dst_reg(index), *reladdr, - src_reg(reg_offset))); + brw_imm_d(reg_offset))); emit_before(block, inst, MUL(dst_reg(index), index, - src_reg(message_header_scale))); - - return index; - } else { - return src_reg(reg_offset * message_header_scale); - } -} - -src_reg -vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst, - src_reg *reladdr, int reg_offset) -{ - if (reladdr) { - src_reg index = src_reg(this, glsl_type::int_type); - - emit_before(block, inst, ADD(dst_reg(index), *reladdr, - src_reg(reg_offset))); - - /* Pre-gen6, the message header uses byte offsets instead of vec4 - * (16-byte) offset units. - */ - if (devinfo->gen < 6) { - emit_before(block, inst, MUL(dst_reg(index), index, src_reg(16))); - } + brw_imm_d(message_header_scale))); return index; - } else if (devinfo->gen >= 8) { - /* Store the offset in a GRF so we can send-from-GRF. */ - src_reg offset = src_reg(this, glsl_type::int_type); - emit_before(block, inst, MOV(dst_reg(offset), src_reg(reg_offset))); - return offset; } else { - int message_header_scale = devinfo->gen < 6 ? 16 : 1; - return src_reg(reg_offset * message_header_scale); + return brw_imm_d(reg_offset * message_header_scale); } } @@ -1590,7 +1522,7 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst, inst->insert_after(block, write); inst->dst.file = temp.file; - inst->dst.reg = temp.reg; + inst->dst.nr = temp.nr; inst->dst.reg_offset = temp.reg_offset; inst->dst.reladdr = NULL; } @@ -1617,10 +1549,10 @@ vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block, *src.reladdr); /* Now handle scratch access on src */ - if (src.file == GRF && scratch_loc[src.reg] != -1) { + if (src.file == VGRF && scratch_loc[src.nr] != -1) { dst_reg temp = dst_reg(this, glsl_type::vec4_type); - emit_scratch_read(block, inst, temp, src, scratch_loc[src.reg]); - src.reg = temp.reg; + emit_scratch_read(block, inst, temp, src, scratch_loc[src.nr]); + src.nr = temp.nr; src.reg_offset = temp.reg_offset; src.reladdr = NULL; } @@ -1645,18 +1577,18 @@ vec4_visitor::move_grf_array_access_to_scratch() * scratch. */ foreach_block_and_inst(block, vec4_instruction, inst, cfg) { - if (inst->dst.file == GRF && inst->dst.reladdr) { - if (scratch_loc[inst->dst.reg] == -1) { - scratch_loc[inst->dst.reg] = last_scratch; - last_scratch += this->alloc.sizes[inst->dst.reg]; + if (inst->dst.file == VGRF && inst->dst.reladdr) { + if (scratch_loc[inst->dst.nr] == -1) { + scratch_loc[inst->dst.nr] = last_scratch; + last_scratch += this->alloc.sizes[inst->dst.nr]; } for (src_reg *iter = inst->dst.reladdr; iter->reladdr; iter = iter->reladdr) { - if (iter->file == GRF && scratch_loc[iter->reg] == -1) { - scratch_loc[iter->reg] = last_scratch; - last_scratch += this->alloc.sizes[iter->reg]; + if (iter->file == VGRF && scratch_loc[iter->nr] == -1) { + scratch_loc[iter->nr] = last_scratch; + last_scratch += this->alloc.sizes[iter->nr]; } } } @@ -1665,9 +1597,9 @@ vec4_visitor::move_grf_array_access_to_scratch() for (src_reg *iter = &inst->src[i]; iter->reladdr; iter = iter->reladdr) { - if (iter->file == GRF && scratch_loc[iter->reg] == -1) { - scratch_loc[iter->reg] = last_scratch; - last_scratch += this->alloc.sizes[iter->reg]; + if (iter->file == VGRF && scratch_loc[iter->nr] == -1) { + scratch_loc[iter->nr] = last_scratch; + last_scratch += this->alloc.sizes[iter->nr]; } } } @@ -1693,8 +1625,8 @@ vec4_visitor::move_grf_array_access_to_scratch() /* Now that we have handled any (possibly recursive) reladdr scratch * accesses for dst we can safely do the scratch write for dst itself */ - if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) - emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]); + if (inst->dst.file == VGRF && scratch_loc[inst->dst.nr] != -1) + emit_scratch_write(block, inst, scratch_loc[inst->dst.nr]); /* Now handle scratch access on any src. In this case, since inst->src[i] * already is a src_reg, we can just call emit_resolve_reladdr with @@ -1715,17 +1647,31 @@ vec4_visitor::move_grf_array_access_to_scratch() void vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, dst_reg temp, src_reg orig_src, - int base_offset) + int base_offset, src_reg indirect) { int reg_offset = base_offset + orig_src.reg_offset; - src_reg index = src_reg(prog_data->base.binding_table.pull_constants_start); - src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr, - reg_offset); + const unsigned index = prog_data->base.binding_table.pull_constants_start; + + src_reg offset; + if (indirect.file != BAD_FILE) { + offset = src_reg(this, glsl_type::uint_type); + + emit_before(block, inst, ADD(dst_reg(offset), indirect, + brw_imm_ud(reg_offset * 16))); + } else if (devinfo->gen >= 8) { + /* Store the offset in a GRF so we can send-from-GRF. */ + offset = src_reg(this, glsl_type::uint_type); + emit_before(block, inst, MOV(dst_reg(offset), brw_imm_ud(reg_offset * 16))); + } else { + offset = brw_imm_d(reg_offset * 16); + } emit_pull_constant_load_reg(temp, - index, + brw_imm_ud(index), offset, block, inst); + + brw_mark_surface_used(&prog_data->base, index); } /** @@ -1743,61 +1689,65 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, void vec4_visitor::move_uniform_array_access_to_pull_constants() { + /* The vulkan dirver doesn't support pull constants other than UBOs so + * everything has to be pushed regardless. + */ + if (stage_prog_data->pull_param == NULL) { + split_uniform_registers(); + return; + } + int pull_constant_loc[this->uniforms]; memset(pull_constant_loc, -1, sizeof(pull_constant_loc)); - bool nested_reladdr; - /* Walk through and find array access of uniforms. Put a copy of that - * uniform in the pull constant buffer. - * - * Note that we don't move constant-indexed accesses to arrays. No - * testing has been done of the performance impact of this choice. + /* First, walk through the instructions and determine which things need to + * be pulled. We mark something as needing to be pulled by setting + * pull_constant_loc to 0. */ - do { - nested_reladdr = false; - - foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { - for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr) - continue; + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + /* We only care about MOV_INDIRECT of a uniform */ + if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT || + inst->src[0].file != UNIFORM) + continue; - int uniform = inst->src[i].reg; + int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset; - if (inst->src[i].reladdr->reladdr) - nested_reladdr = true; /* will need another pass */ + for (unsigned j = 0; j < DIV_ROUND_UP(inst->src[2].ud, 16); j++) + pull_constant_loc[uniform_nr + j] = 0; + } - /* If this array isn't already present in the pull constant buffer, - * add it. - */ - if (pull_constant_loc[uniform] == -1) { - const gl_constant_value **values = - &stage_prog_data->param[uniform * 4]; + /* Next, we walk the list of uniforms and assign real pull constant + * locations and set their corresponding entries in pull_param. + */ + for (int j = 0; j < this->uniforms; j++) { + if (pull_constant_loc[j] < 0) + continue; - pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4; + pull_constant_loc[j] = stage_prog_data->nr_pull_params / 4; - assert(uniform < uniform_array_size); - for (int j = 0; j < uniform_size[uniform] * 4; j++) { - stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] - = values[j]; - } - } + for (int i = 0; i < 4; i++) { + stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] + = stage_prog_data->param[j * 4 + i]; + } + } - /* Set up the annotation tracking for new generated instructions. */ - base_ir = inst->ir; - current_annotation = inst->annotation; + /* Finally, we can walk through the instructions and lower MOV_INDIRECT + * instructions to actual uniform pulls. + */ + foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { + /* We only care about MOV_INDIRECT of a uniform */ + if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT || + inst->src[0].file != UNIFORM) + continue; - dst_reg temp = dst_reg(this, glsl_type::vec4_type); + int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset; - emit_pull_constant_load(block, inst, temp, inst->src[i], - pull_constant_loc[uniform]); + assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP); - inst->src[i].file = temp.file; - inst->src[i].reg = temp.reg; - inst->src[i].reg_offset = temp.reg_offset; - inst->src[i].reladdr = NULL; - } - } - } while (nested_reladdr); + emit_pull_constant_load(block, inst, inst->dst, inst->src[0], + pull_constant_loc[uniform_nr], inst->src[1]); + inst->remove(block); + } /* Now there are no accesses of the UNIFORM file with a reladdr, so * no need to track them as larger-than-vec4 objects. This will be @@ -1843,6 +1793,9 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, this->current_annotation = NULL; memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation)); + memset(this->output_generic_num_components, 0, + sizeof(this->output_generic_num_components)); + this->virtual_grf_start = NULL; this->virtual_grf_end = NULL; this->live_intervals = NULL; @@ -1850,17 +1803,6 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; this->uniforms = 0; - - /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires - * at least one. See setup_uniforms() in brw_vec4.cpp. - */ - this->uniform_array_size = 1; - if (prog_data) { - this->uniform_array_size = - MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1); - } - - this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size); } vec4_visitor::~vec4_visitor()