X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_vec4_nir.cpp;h=20c063d0010fc64abf56a5b2db5a90bf4893a93d;hb=6ba291db4ba4f03ac94560eaae861bc162ac838e;hp=8a87759fcb97f45f05a52e1a47857e00ebad402e;hpb=fa4e3c3c9f6f3a72a032499fccaa6e222d6a7fa4;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 8a87759fcb9..20c063d0010 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -58,25 +58,24 @@ vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr) unreachable("should be lowered by lower_vertex_id()."); case nir_intrinsic_load_vertex_id_zero_base: - reg = &this->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]; + reg = &nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]; if (reg->file == BAD_FILE) - *reg = - *this->make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, - glsl_type::int_type); + *reg = *make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, + glsl_type::int_type); break; case nir_intrinsic_load_base_vertex: - reg = &this->nir_system_values[SYSTEM_VALUE_BASE_VERTEX]; + reg = &nir_system_values[SYSTEM_VALUE_BASE_VERTEX]; if (reg->file == BAD_FILE) - *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX, - glsl_type::int_type); + *reg = *make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX, + glsl_type::int_type); break; case nir_intrinsic_load_instance_id: - reg = &this->nir_system_values[SYSTEM_VALUE_INSTANCE_ID]; + reg = &nir_system_values[SYSTEM_VALUE_INSTANCE_ID]; if (reg->file == BAD_FILE) - *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID, - glsl_type::int_type); + *reg = *make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID, + glsl_type::int_type); break; default: @@ -119,7 +118,7 @@ vec4_visitor::nir_setup_inputs(nir_shader *shader) foreach_list_typed(nir_variable, var, node, &shader->inputs) { int offset = var->data.driver_location; - unsigned size = type_size(var->type); + unsigned size = type_size_vec4(var->type); for (unsigned i = 0; i < size; i++) { src_reg src = src_reg(ATTR, var->data.location + i, var->type); nir_inputs[offset + i] = src; @@ -132,20 +131,17 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader) { uniforms = 0; - nir_uniform_driver_location = - rzalloc_array(mem_ctx, unsigned, this->uniform_array_size); - if (shader_prog) { foreach_list_typed(nir_variable, var, node, &shader->uniforms) { /* UBO's, atomics and samplers don't take up space in the uniform file */ if (var->interface_type != NULL || var->type->contains_atomic() || - type_size(var->type) == 0) { + type_size_vec4(var->type) == 0) { continue; } assert(uniforms < uniform_array_size); - this->uniform_size[uniforms] = type_size(var->type); + uniform_size[uniforms] = type_size_vec4(var->type); if (strncmp(var->name, "gl_", 3) == 0) nir_setup_builtin_uniform(var); @@ -153,8 +149,37 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader) nir_setup_uniform(var); } } else { - /* ARB_vertex_program is not supported yet */ - assert("Not implemented"); + /* For ARB_vertex_program, only a single "parameters" variable is + * generated to support uniform data. + */ + nir_variable *var = (nir_variable *) shader->uniforms.get_head(); + assert(shader->uniforms.length() == 1 && + strcmp(var->name, "parameters") == 0); + + assert(uniforms < uniform_array_size); + uniform_size[uniforms] = type_size_vec4(var->type); + + struct gl_program_parameter_list *plist = prog->Parameters; + for (unsigned p = 0; p < plist->NumParameters; p++) { + uniform_vector_size[uniforms] = plist->Parameters[p].Size; + + /* Parameters should be either vec4 uniforms or single component + * constants; matrices and other larger types should have been broken + * down earlier. + */ + assert(uniform_vector_size[uniforms] <= 4); + + int i; + for (i = 0; i < uniform_vector_size[uniforms]; i++) { + stage_prog_data->param[uniforms * 4 + i] = &plist->ParameterValues[p][i]; + } + for (; i < 4; i++) { + static const gl_constant_value zero = { 0.0 }; + stage_prog_data->param[uniforms * 4 + i] = &zero; + } + + uniforms++; + } } } @@ -200,7 +225,6 @@ vec4_visitor::nir_setup_uniform(nir_variable *var) stage_prog_data->param[uniforms * 4 + i] = &zero; } - nir_uniform_driver_location[uniforms] = var->data.driver_location; uniforms++; } } @@ -218,10 +242,10 @@ vec4_visitor::nir_setup_builtin_uniform(nir_variable *var) * ParameterValues directly, since unlike brw_fs.cpp, we never * add new state references during compile. */ - int index = _mesa_add_state_reference(this->prog->Parameters, + int index = _mesa_add_state_reference(prog->Parameters, (gl_state_index *)slots[i].tokens); gl_constant_value *values = - &this->prog->Parameters->ParameterValues[index][0]; + &prog->Parameters->ParameterValues[index][0]; assert(uniforms < uniform_array_size); @@ -229,11 +253,10 @@ vec4_visitor::nir_setup_builtin_uniform(nir_variable *var) stage_prog_data->param[uniforms * 4 + j] = &values[GET_SWZ(slots[i].swizzle, j)]; - this->uniform_vector_size[uniforms] = + uniform_vector_size[uniforms] = (var->type->is_scalar() || var->type->is_vector() || var->type->is_matrix() ? var->type->vector_elements : 4); - nir_uniform_driver_location[uniforms] = var->data.driver_location; uniforms++; } } @@ -320,7 +343,7 @@ vec4_visitor::nir_emit_block(nir_block *block) void vec4_visitor::nir_emit_instr(nir_instr *instr) { - this->base_ir = instr; + base_ir = instr; switch (instr->type) { case nir_instr_type_load_const: @@ -343,6 +366,10 @@ vec4_visitor::nir_emit_instr(nir_instr *instr) nir_emit_texture(nir_instr_as_tex(instr)); break; + case nir_instr_type_ssa_undef: + nir_emit_undef(nir_instr_as_ssa_undef(instr)); + break; + default: fprintf(stderr, "VS instruction not yet implemented by NIR->vec4\n"); break; @@ -369,9 +396,14 @@ dst_reg_for_nir_reg(vec4_visitor *v, nir_register *nir_reg, dst_reg vec4_visitor::get_nir_dest(nir_dest dest) { - assert(!dest.is_ssa); - return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset, - dest.reg.indirect); + if (dest.is_ssa) { + dst_reg dst = dst_reg(GRF, alloc.allocate(1)); + nir_ssa_values[dest.ssa.index] = dst; + return dst; + } else { + return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset, + dest.reg.indirect); + } } dst_reg @@ -426,15 +458,30 @@ void vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr) { dst_reg reg = dst_reg(GRF, alloc.allocate(1)); - reg.type = BRW_REGISTER_TYPE_F; + reg.type = BRW_REGISTER_TYPE_D; + + unsigned remaining = brw_writemask_for_size(instr->def.num_components); /* @FIXME: consider emitting vector operations to save some MOVs in * cases where the components are representable in 8 bits. - * By now, we emit a MOV for each component. + * For now, we emit a MOV for each distinct value. */ - for (unsigned i = 0; i < instr->def.num_components; ++i) { - reg.writemask = 1 << i; - emit(MOV(reg, src_reg(instr->value.f[i]))); + for (unsigned i = 0; i < instr->def.num_components; i++) { + unsigned writemask = 1 << i; + + if ((remaining & writemask) == 0) + continue; + + for (unsigned j = i; j < instr->def.num_components; j++) { + if (instr->value.u[i] == instr->value.u[j]) { + writemask |= 1 << j; + } + } + + reg.writemask = writemask; + emit(MOV(reg, src_reg(instr->value.i[i]))); + + remaining &= ~writemask; } /* Set final writemask */ @@ -494,30 +541,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_load_vertex_id: unreachable("should be lowered by lower_vertex_id()"); - case nir_intrinsic_load_vertex_id_zero_base: { - src_reg vertex_id = - src_reg(nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]); - assert(vertex_id.file != BAD_FILE); - dest = get_nir_dest(instr->dest, vertex_id.type); - emit(MOV(dest, vertex_id)); - break; - } - - case nir_intrinsic_load_base_vertex: { - src_reg base_vertex = - src_reg(nir_system_values[SYSTEM_VALUE_BASE_VERTEX]); - assert(base_vertex.file != BAD_FILE); - dest = get_nir_dest(instr->dest, base_vertex.type); - emit(MOV(dest, base_vertex)); - break; - } - + case nir_intrinsic_load_vertex_id_zero_base: + case nir_intrinsic_load_base_vertex: case nir_intrinsic_load_instance_id: { - src_reg instance_id = - src_reg(nir_system_values[SYSTEM_VALUE_INSTANCE_ID]); - assert(instance_id.file != BAD_FILE); - dest = get_nir_dest(instr->dest, instance_id.type); - emit(MOV(dest, instance_id)); + gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); + src_reg val = src_reg(nir_system_values[sv]); + assert(val.file != BAD_FILE); + dest = get_nir_dest(instr->dest, val.type); + emit(MOV(dest, val)); break; } @@ -525,24 +556,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) has_indirect = true; /* fallthrough */ case nir_intrinsic_load_uniform: { - int uniform = instr->const_index[0]; - dest = get_nir_dest(instr->dest); - if (has_indirect) { - /* Split addressing into uniform and offset */ - int offset = uniform - nir_uniform_driver_location[uniform]; - assert(offset >= 0); - - uniform -= offset; - assert(uniform >= 0); + src = src_reg(dst_reg(UNIFORM, instr->const_index[0])); + src.reg_offset = instr->const_index[1]; - src = src_reg(dst_reg(UNIFORM, uniform)); - src.reg_offset = offset; + if (has_indirect) { src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1); src.reladdr = new(mem_ctx) src_reg(tmp); - } else { - src = src_reg(dst_reg(UNIFORM, uniform)); } emit(MOV(dest, src)); @@ -745,31 +766,35 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_imul: { - nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src); - nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src); - - /* For integer multiplication, the MUL uses the low 16 bits of one of - * the operands (src0 through SNB, src1 on IVB and later). The MACH - * accumulates in the contribution of the upper 16 bits of that - * operand. If we can determine that one of the args is in the low - * 16 bits, though, we can just emit a single MUL. - */ - if (value0 && value0->u[0] < (1 << 16)) { - if (devinfo->gen < 7) - emit(MUL(dst, op[0], op[1])); - else - emit(MUL(dst, op[1], op[0])); - } else if (value1 && value1->u[0] < (1 << 16)) { - if (devinfo->gen < 7) - emit(MUL(dst, op[1], op[0])); - else - emit(MUL(dst, op[0], op[1])); + if (devinfo->gen < 8) { + nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src); + nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src); + + /* For integer multiplication, the MUL uses the low 16 bits of one of + * the operands (src0 through SNB, src1 on IVB and later). The MACH + * accumulates in the contribution of the upper 16 bits of that + * operand. If we can determine that one of the args is in the low + * 16 bits, though, we can just emit a single MUL. + */ + if (value0 && value0->u[0] < (1 << 16)) { + if (devinfo->gen < 7) + emit(MUL(dst, op[0], op[1])); + else + emit(MUL(dst, op[1], op[0])); + } else if (value1 && value1->u[0] < (1 << 16)) { + if (devinfo->gen < 7) + emit(MUL(dst, op[1], op[0])); + else + emit(MUL(dst, op[0], op[1])); + } else { + struct brw_reg acc = retype(brw_acc_reg(8), dst.type); + + emit(MUL(acc, op[0], op[1])); + emit(MACH(dst_null_d(), op[0], op[1])); + emit(MOV(dst, src_reg(acc))); + } } else { - struct brw_reg acc = retype(brw_acc_reg(8), dst.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(dst_null_d(), op[0], op[1])); - emit(MOV(dst, src_reg(acc))); + emit(MUL(dst, op[0], op[1])); } break; } @@ -990,18 +1015,33 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) } case nir_op_inot: + if (devinfo->gen >= 8) { + op[0] = resolve_source_modifiers(op[0]); + } emit(NOT(dst, op[0])); break; case nir_op_ixor: + if (devinfo->gen >= 8) { + op[0] = resolve_source_modifiers(op[0]); + op[1] = resolve_source_modifiers(op[1]); + } emit(XOR(dst, op[0], op[1])); break; case nir_op_ior: + if (devinfo->gen >= 8) { + op[0] = resolve_source_modifiers(op[0]); + op[1] = resolve_source_modifiers(op[1]); + } emit(OR(dst, op[0], op[1])); break; case nir_op_iand: + if (devinfo->gen >= 8) { + op[0] = resolve_source_modifiers(op[0]); + op[1] = resolve_source_modifiers(op[1]); + } emit(AND(dst, op[0], op[1])); break; @@ -1213,36 +1253,294 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) inst->predicate = BRW_PREDICATE_NORMAL; break; - case nir_op_fdot2: + case nir_op_fdot_replicated2: inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]); inst->saturate = instr->dest.saturate; break; - case nir_op_fdot3: + case nir_op_fdot_replicated3: inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]); inst->saturate = instr->dest.saturate; break; - case nir_op_fdot4: + case nir_op_fdot_replicated4: inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]); inst->saturate = instr->dest.saturate; break; + case nir_op_bany2: + case nir_op_bany3: + case nir_op_bany4: { + dst_reg tmp = dst_reg(this, glsl_type::bool_type); + tmp.writemask = brw_writemask_for_size(nir_op_infos[instr->op].input_sizes[0]); + + emit(CMP(tmp, op[0], src_reg(0), BRW_CONDITIONAL_NZ)); + + emit(MOV(dst, src_reg(0))); + inst = emit(MOV(dst, src_reg(~0))); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + break; + } + + case nir_op_fabs: + case nir_op_iabs: + case nir_op_fneg: + case nir_op_ineg: + case nir_op_fsat: + unreachable("not reached: should be lowered by lower_source mods"); + + case nir_op_fdiv: + unreachable("not reached: should be lowered by DIV_TO_MUL_RCP in the compiler"); + + case nir_op_fmod: + unreachable("not reached: should be lowered by MOD_TO_FLOOR in the compiler"); + + case nir_op_fsub: + case nir_op_isub: + unreachable("not reached: should be handled by ir_sub_to_add_neg"); + default: unreachable("Unimplemented ALU operation"); } + + /* If we need to do a boolean resolve, replace the result with -(x & 1) + * to sign extend the low bit to 0/~0 + */ + if (devinfo->gen <= 5 && + (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == + BRW_NIR_BOOLEAN_NEEDS_RESOLVE) { + dst_reg masked = dst_reg(this, glsl_type::int_type); + masked.writemask = dst.writemask; + emit(AND(masked, src_reg(dst), src_reg(1))); + src_reg masked_neg = src_reg(masked); + masked_neg.negate = true; + emit(MOV(retype(dst, BRW_REGISTER_TYPE_D), masked_neg)); + } } void vec4_visitor::nir_emit_jump(nir_jump_instr *instr) { - /* @TODO: Not yet implemented */ + switch (instr->type) { + case nir_jump_break: + emit(BRW_OPCODE_BREAK); + break; + + case nir_jump_continue: + emit(BRW_OPCODE_CONTINUE); + break; + + case nir_jump_return: + /* fall through */ + default: + unreachable("unknown jump"); + } +} + +enum ir_texture_opcode +ir_texture_opcode_for_nir_texop(nir_texop texop) +{ + enum ir_texture_opcode op; + + switch (texop) { + case nir_texop_lod: op = ir_lod; break; + case nir_texop_query_levels: op = ir_query_levels; break; + case nir_texop_texture_samples: op = ir_texture_samples; break; + case nir_texop_tex: op = ir_tex; break; + case nir_texop_tg4: op = ir_tg4; break; + case nir_texop_txb: op = ir_txb; break; + case nir_texop_txd: op = ir_txd; break; + case nir_texop_txf: op = ir_txf; break; + case nir_texop_txf_ms: op = ir_txf_ms; break; + case nir_texop_txl: op = ir_txl; break; + case nir_texop_txs: op = ir_txs; break; + default: + unreachable("unknown texture opcode"); + } + + return op; +} +const glsl_type * +glsl_type_for_nir_alu_type(nir_alu_type alu_type, + unsigned components) +{ + switch (alu_type) { + case nir_type_float: + return glsl_type::vec(components); + case nir_type_int: + return glsl_type::ivec(components); + case nir_type_unsigned: + return glsl_type::uvec(components); + case nir_type_bool: + return glsl_type::bvec(components); + default: + return glsl_type::error_type; + } + + return glsl_type::error_type; } void vec4_visitor::nir_emit_texture(nir_tex_instr *instr) { - /* @TODO: Not yet implemented */ + unsigned sampler = instr->sampler_index; + src_reg sampler_reg = src_reg(sampler); + src_reg coordinate; + const glsl_type *coord_type = NULL; + src_reg shadow_comparitor; + src_reg offset_value; + src_reg lod, lod2; + src_reg sample_index; + src_reg mcs; + + const glsl_type *dest_type = + glsl_type_for_nir_alu_type(instr->dest_type, + nir_tex_instr_dest_size(instr)); + dst_reg dest = get_nir_dest(instr->dest, instr->dest_type); + + /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother + * emitting anything other than setting up the constant result. + */ + if (instr->op == nir_texop_tg4) { + int swiz = GET_SWZ(key_tex->swizzles[sampler], instr->component); + if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) { + emit(MOV(dest, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f))); + return; + } + } + + /* Load the texture operation sources */ + for (unsigned i = 0; i < instr->num_srcs; i++) { + switch (instr->src[i].src_type) { + case nir_tex_src_comparitor: + shadow_comparitor = get_nir_src(instr->src[i].src, + BRW_REGISTER_TYPE_F, 1); + break; + + case nir_tex_src_coord: { + unsigned src_size = nir_tex_instr_src_size(instr, i); + + switch (instr->op) { + case nir_texop_txf: + case nir_texop_txf_ms: + coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, + src_size); + coord_type = glsl_type::ivec(src_size); + break; + + default: + coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F, + src_size); + coord_type = glsl_type::vec(src_size); + break; + } + break; + } + + case nir_tex_src_ddx: + lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F, + nir_tex_instr_src_size(instr, i)); + break; + + case nir_tex_src_ddy: + lod2 = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F, + nir_tex_instr_src_size(instr, i)); + break; + + case nir_tex_src_lod: + switch (instr->op) { + case nir_texop_txs: + case nir_texop_txf: + lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1); + break; + + default: + lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F, 1); + break; + } + break; + + case nir_tex_src_ms_index: { + sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1); + assert(coord_type != NULL); + if (devinfo->gen >= 7 && + key_tex->compressed_multisample_layout_mask & (1 << sampler)) { + mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg); + } else { + mcs = src_reg(0u); + } + mcs = retype(mcs, BRW_REGISTER_TYPE_UD); + break; + } + + case nir_tex_src_offset: + offset_value = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2); + break; + + case nir_tex_src_sampler_offset: { + /* The highest sampler which may be used by this operation is + * the last element of the array. Mark it here, because the generator + * doesn't have enough information to determine the bound. + */ + uint32_t array_size = instr->sampler_array_size; + uint32_t max_used = sampler + array_size - 1; + if (instr->op == nir_texop_tg4) { + max_used += prog_data->base.binding_table.gather_texture_start; + } else { + max_used += prog_data->base.binding_table.texture_start; + } + + brw_mark_surface_used(&prog_data->base, max_used); + + /* Emit code to evaluate the actual indexing expression */ + src_reg src = get_nir_src(instr->src[i].src, 1); + src_reg temp(this, glsl_type::uint_type); + emit(ADD(dst_reg(temp), src, src_reg(sampler))); + sampler_reg = emit_uniformize(temp); + break; + } + + case nir_tex_src_projector: + unreachable("Should be lowered by do_lower_texture_projection"); + + case nir_tex_src_bias: + unreachable("LOD bias is not valid for vertex shaders.\n"); + + default: + unreachable("unknown texture source"); + } + } + + uint32_t constant_offset = 0; + for (unsigned i = 0; i < 3; i++) { + if (instr->const_offset[i] != 0) { + constant_offset = brw_texture_offset(instr->const_offset, 3); + break; + } + } + + /* Stuff the channel select bits in the top of the texture offset */ + if (instr->op == nir_texop_tg4) + constant_offset |= gather_channel(instr->component, sampler) << 16; + + ir_texture_opcode op = ir_texture_opcode_for_nir_texop(instr->op); + + bool is_cube_array = + instr->op == nir_texop_txs && + instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && + instr->is_array; + + emit_texture(op, dest, dest_type, coordinate, instr->coord_components, + shadow_comparitor, + lod, lod2, sample_index, + constant_offset, offset_value, + mcs, is_cube_array, sampler, sampler_reg); +} + +void +vec4_visitor::nir_emit_undef(nir_ssa_undef_instr *instr) +{ + nir_ssa_values[instr->def.index] = dst_reg(GRF, alloc.allocate(1)); } }