X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fstate_tracker%2Fst_glsl_to_tgsi.cpp;h=f7d79e9f50cd6b320d01d4f85b49a25e21359a17;hb=a43f68810a347f3e952a0bc401be6edb91e1baea;hp=d7a1ba80e1da9703e9549a4fe14b0d8cc85f03ac;hpb=5164244df02f33d6ad9e0a286f4b6d6af2dfbc75;p=mesa.git diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index d7a1ba80e1d..f7d79e9f50c 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -295,6 +295,7 @@ public: bool indirect_addr_consts; int glsl_version; + bool native_integers; variable_storage *find_variable_storage(ir_variable *var); @@ -372,11 +373,11 @@ public: /** * Emit the correct dot-product instruction for the type of arguments */ - void emit_dp(ir_instruction *ir, - st_dst_reg dst, - st_src_reg src0, - st_src_reg src1, - unsigned elements); + glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, + st_dst_reg dst, + st_src_reg src0, + st_src_reg src1, + unsigned elements); void emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0); @@ -600,7 +601,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) type = GLSL_TYPE_FLOAT; - else if (glsl_version >= 130) + else if (native_integers) type = src0.type; #define case4(c, f, i, u) \ @@ -641,7 +642,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, return op; } -void +glsl_to_tgsi_instruction * glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, st_dst_reg dst, st_src_reg src0, st_src_reg src1, unsigned elements) @@ -650,7 +651,7 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 }; - emit(ir, dot_opcodes[elements - 2], dst, src0, src1); + return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); } /** @@ -881,7 +882,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val) st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); union gl_constant_value uval; - assert(glsl_version >= 130); + assert(native_integers); uval.i = val; src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); @@ -892,7 +893,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val) struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) { - if (glsl_version >= 130) + if (native_integers) return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : st_src_reg_for_int(val); else @@ -950,7 +951,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) { st_src_reg src; - src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT; + src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; src.file = PROGRAM_TEMPORARY; src.index = next_temp; src.reladdr = NULL; @@ -1053,7 +1054,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) this->next_temp += type_size(ir->type); dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, - glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT)); + native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); } @@ -1069,7 +1070,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) } } else { st_src_reg src(PROGRAM_STATE_VAR, index, - glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT); + native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT); src.swizzle = slots[i].swizzle; emit(ir, TGSI_OPCODE_MOV, dst, src); /* even a float takes up a whole vec4 reg in a struct/array. */ @@ -1335,7 +1336,17 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0)); + if (result_dst.type != GLSL_TYPE_FLOAT) + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0)); + else { + /* Previously 'SEQ dst, src, 0.0' was used for this. However, many + * older GPUs implement SEQ using multiple instructions (i915 uses two + * SGE instructions and a MUL instruction). Since our logic values are + * 0.0 and 1.0, 1-x also implements !x. + */ + op[0].negate = ~op[0].negate; + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); + } break; case ir_unop_neg: assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT); @@ -1444,7 +1455,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* "==" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(glsl_version >= 130 ? + st_src_reg temp = get_temp(native_integers ? glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); @@ -1459,7 +1470,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* "!=" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(glsl_version >= 130 ? + st_src_reg temp = get_temp(native_integers ? glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); @@ -1471,22 +1482,69 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) } break; - case ir_unop_any: + case ir_unop_any: { assert(ir->operands[0]->type->is_vector()); - emit_dp(ir, result_dst, op[0], op[0], - ir->operands[0]->type->vector_elements); - emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *const dp = + emit_dp(ir, result_dst, op[0], op[0], + ir->operands[0]->type->vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && + result_dst.type == GLSL_TYPE_FLOAT) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else if (result_dst.type == GLSL_TYPE_FLOAT) { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } + else { + /* Use SNE 0 if integers are being used as boolean values. */ + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + } break; + } case ir_binop_logic_xor: emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); break; - case ir_binop_logic_or: - /* This could be a saturated add and skip the SNE. */ - emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); - emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + case ir_binop_logic_or: { + /* After the addition, the value will be an integer on the + * range [0,2]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *add = + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && + result_dst.type == GLSL_TYPE_FLOAT) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate if floats are being used as boolean values. + */ + add->saturate = true; + } else if (result_dst.type == GLSL_TYPE_FLOAT) { + /* Negating the result of the addition gives values on the range + * [-2, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } else { + /* Use an SNE on the result of the addition. Zero stays zero, + * 1 stays 1, and 2 becomes 1. + */ + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + } break; + } case ir_binop_logic_and: /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ @@ -1514,7 +1572,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_i2f: case ir_unop_b2f: - if (glsl_version >= 130) { + if (native_integers) { emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); break; } @@ -1526,7 +1584,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) result_src = op[0]; break; case ir_unop_f2i: - if (glsl_version >= 130) + if (native_integers) emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); else emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); @@ -1567,7 +1625,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; } case ir_unop_u2f: - if (glsl_version >= 130) { + if (native_integers) { emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); break; } @@ -1719,7 +1777,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) } this->result = st_src_reg(entry->file, entry->index, var->type); - if (glsl_version <= 120) + if (!native_integers) this->result.type = GLSL_TYPE_FLOAT; } @@ -1994,15 +2052,17 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) } else if (ir->rhs->as_expression() && this->instructions.get_tail() && ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && - type_size(ir->lhs->type) == 1) { + type_size(ir->lhs->type) == 1 && + l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) { /* To avoid emitting an extra MOV when assigning an expression to a * variable, emit the last instruction of the expression again, but * replace the destination register with the target of the assignment. * Dead code elimination will remove the original instruction. */ - glsl_to_tgsi_instruction *inst; + glsl_to_tgsi_instruction *inst, *new_inst; inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); - emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); + new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); + new_inst->saturate = inst->saturate; } else { for (i = 0; i < type_size(ir->lhs->type); i++) { emit(ir, TGSI_OPCODE_MOV, l, r); @@ -2107,27 +2167,27 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } break; case GLSL_TYPE_UINT: - gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT; + gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - if (glsl_version >= 130) + if (native_integers) values[i].u = ir->value.u[i]; else values[i].f = ir->value.u[i]; } break; case GLSL_TYPE_INT: - gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT; + gl_type = native_integers ? GL_INT : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - if (glsl_version >= 130) + if (native_integers) values[i].i = ir->value.i[i]; else values[i].f = ir->value.i[i]; } break; case GLSL_TYPE_BOOL: - gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT; + gl_type = native_integers ? GL_BOOL : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - if (glsl_version >= 130) + if (native_integers) values[i].b = ir->value.b[i]; else values[i].f = ir->value.b[i]; @@ -3441,7 +3501,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) /* Continuing the block, clear any channels from the write array that * are read by this instruction. */ - for (int i = 0; i < 4; i++) { + for (unsigned i = 0; i < Elements(inst->src); i++) { if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ /* Any temporary might be read, so no dead code elimination * across this instruction. @@ -3609,6 +3669,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, v->ctx = original->ctx; v->prog = prog; v->glsl_version = original->glsl_version; + v->native_integers = original->native_integers; v->options = original->options; v->next_temp = original->next_temp; v->num_address_regs = original->num_address_regs; @@ -3737,6 +3798,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, v->ctx = original->ctx; v->prog = prog; v->glsl_version = original->glsl_version; + v->native_integers = original->native_integers; v->options = original->options; v->next_temp = original->next_temp; v->num_address_regs = original->num_address_regs; @@ -4672,6 +4734,7 @@ get_mesa_program(struct gl_context *ctx, v->shader_program = shader_program; v->options = options; v->glsl_version = ctx->Const.GLSLVersion; + v->native_integers = ctx->Const.NativeIntegers; add_uniforms_to_parameters_list(shader_program, shader, prog);