X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fstate_tracker%2Fst_glsl_to_tgsi.cpp;h=6cc655d70cf5f95b79f9ff36df30aef204e7b8e8;hb=20b0daf82de91fd57b7e8d825786789149f6358d;hp=b238c267c817d288ea345de6226343d8dcce02c6;hpb=f3dce133f0422c42ca61f07f488237107efc30e6;p=mesa.git diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index b238c267c81..6cc655d70cf 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -42,17 +42,17 @@ #include "ir_optimization.h" #include "ast.h" -extern "C" { #include "main/mtypes.h" -#include "main/shaderapi.h" #include "main/shaderobj.h" -#include "main/uniforms.h" #include "program/hash_table.h" + +extern "C" { +#include "main/shaderapi.h" +#include "main/uniforms.h" #include "program/prog_instruction.h" #include "program/prog_optimize.h" #include "program/prog_print.h" #include "program/program.h" -#include "program/prog_uniform.h" #include "program/prog_parameter.h" #include "program/sampler.h" @@ -78,8 +78,17 @@ extern "C" { (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) +/** + * Maximum number of temporary registers. + * + * It is too big for stack allocated arrays -- it will cause stack overflow on + * Windows and likely Mac OS X. + */ #define MAX_TEMPS 4096 +/* will be 4 for GLSL 4.00 */ +#define MAX_GLSL_TEXTURE_OFFSET 1 + class st_src_reg; class st_dst_reg; @@ -211,6 +220,8 @@ public: int sampler; /**< sampler index */ int tex_target; /**< One of TEXTURE_*_INDEX */ GLboolean tex_shadow; + struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; + unsigned tex_offset_num_offset; int dead_mask; /**< Used in dead code elimination */ class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ @@ -385,14 +396,18 @@ public: void emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); + void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst); + void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); void emit_scs(ir_instruction *ir, unsigned op, st_dst_reg dst, const st_src_reg &src); - GLboolean try_emit_mad(ir_expression *ir, - int mul_operand); - GLboolean try_emit_sat(ir_expression *ir); + bool try_emit_mad(ir_expression *ir, + int mul_operand); + bool try_emit_mad_for_and_not(ir_expression *ir, + int mul_operand); + bool try_emit_sat(ir_expression *ir); void emit_swz(ir_expression *ir); @@ -510,7 +525,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, inst->function = NULL; - if (op == TGSI_OPCODE_ARL) + if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL) this->num_address_regs = 1; /* Update indirect addressing status used by TGSI */ @@ -560,7 +575,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, } this->instructions.push_tail(inst); - + + if (native_integers) + try_emit_float_set(ir, op, dst); + return inst; } @@ -586,11 +604,28 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); } + /** + * Emits the code to convert the result of float SET instructions to integers. + */ +void +glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op, + st_dst_reg dst) +{ + if ((op == TGSI_OPCODE_SEQ || + op == TGSI_OPCODE_SNE || + op == TGSI_OPCODE_SGE || + op == TGSI_OPCODE_SLT)) + { + st_src_reg src = st_src_reg(dst); + src.negate = ~src.negate; + dst.type = GLSL_TYPE_FLOAT; + emit(ir, TGSI_OPCODE_F2I, dst, src); + } +} + /** * Determines whether to use an integer, unsigned integer, or float opcode * based on the operands and input opcode, then emits the result. - * - * TODO: type checking for remaining TGSI opcodes */ unsigned glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, @@ -602,7 +637,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) type = GLSL_TYPE_FLOAT; else if (native_integers) - type = src0.type; + type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; #define case4(c, f, i, u) \ case TGSI_OPCODE_##c: \ @@ -628,12 +663,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, case3(SGE, ISGE, USGE); case3(SLT, ISLT, USLT); - case2iu(SHL, SHL); case2iu(ISHR, USHR); - case2iu(NOT, NOT); - case2iu(AND, AND); - case2iu(OR, OR); - case2iu(XOR, XOR); default: break; } @@ -722,16 +752,12 @@ void glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0) { - st_src_reg tmp = get_temp(glsl_type::float_type); + int op = TGSI_OPCODE_ARL; - if (src0.type == GLSL_TYPE_INT) - emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); - else if (src0.type == GLSL_TYPE_UINT) - emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); - else - tmp = src0; - - emit(NULL, TGSI_OPCODE_ARL, dst, tmp); + if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) + op = TGSI_OPCODE_UARL; + + emit(NULL, op, dst, src0); } /** @@ -864,7 +890,7 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file, } } -struct st_src_reg +st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_float(float val) { st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); @@ -876,7 +902,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val) return src; } -struct st_src_reg +st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_int(int val) { st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); @@ -890,7 +916,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val) return src; } -struct st_src_reg +st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) { if (native_integers) @@ -991,29 +1017,6 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) fp->OriginUpperLeft = ir->origin_upper_left; fp->PixelCenterInteger = ir->pixel_center_integer; - - } else if (strcmp(ir->name, "gl_FragDepth") == 0) { - struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; - switch (ir->depth_layout) { - case ir_depth_layout_none: - fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; - break; - case ir_depth_layout_any: - fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; - break; - case ir_depth_layout_greater: - fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; - break; - case ir_depth_layout_less: - fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; - break; - case ir_depth_layout_unchanged: - fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; - break; - default: - assert(0); - break; - } } if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { @@ -1033,7 +1036,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) } } - struct variable_storage *storage; + variable_storage *storage; st_dst_reg dst; if (i == ir->num_state_slots) { /* We'll set the index later. */ @@ -1184,7 +1187,7 @@ glsl_to_tgsi_visitor::visit(ir_function *ir) } } -GLboolean +bool glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) { int nonmul_operand = 1 - mul_operand; @@ -1210,7 +1213,47 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) return true; } -GLboolean +/** + * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) + * + * The logic values are 1.0 for true and 0.0 for false. Logical-and is + * implemented using multiplication, and logical-or is implemented using + * addition. Logical-not can be implemented as (true - x), or (1.0 - x). + * As result, the logical expression (a & !b) can be rewritten as: + * + * - a * !b + * - a * (1 - b) + * - (a * 1) - (a * b) + * - a + -(a * b) + * - a + (a * -b) + * + * This final expression can be implemented as a single MAD(a, -b, a) + * instruction. + */ +bool +glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) +{ + const int other_operand = 1 - try_operand; + st_src_reg a, b; + + ir_expression *expr = ir->operands[try_operand]->as_expression(); + if (!expr || expr->operation != ir_unop_logic_not) + return false; + + ir->operands[other_operand]->accept(this); + a = this->result; + expr->operands[0]->accept(this); + b = this->result; + + b.negate = ~b.negate; + + this->result = get_temp(ir->type); + emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); + + return true; +} + +bool glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) { /* Saturates were only introduced to vertex programs in @@ -1291,6 +1334,16 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (try_emit_mad(ir, 0)) return; } + + /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) + */ + if (ir->operation == ir_binop_logic_and) { + if (try_emit_mad_for_and_not(ir, 1)) + return; + if (try_emit_mad_for_and_not(ir, 0)) + return; + } + if (try_emit_sat(ir)) return; @@ -1337,7 +1390,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: if (result_dst.type != GLSL_TYPE_FLOAT) - emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0)); + emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); else { /* Previously 'SEQ dst, src, 0.0' was used for this. However, many * older GPUs implement SEQ using multiple instructions (i915 uses two @@ -1437,10 +1490,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); break; case ir_binop_greater: - emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); break; case ir_binop_lequal: - emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); break; case ir_binop_gequal: emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); @@ -1458,10 +1511,53 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) st_src_reg temp = get_temp(native_integers ? glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); - emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); - emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); + + if (native_integers) { + st_dst_reg temp_dst = st_dst_reg(temp); + st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); + + emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); + + /* Emit 1-3 AND operations to combine the SEQ results. */ + switch (ir->operands[0]->type->vector_elements) { + case 2: + break; + case 3: + temp_dst.writemask = WRITEMASK_Y; + temp1.swizzle = SWIZZLE_YYYY; + temp2.swizzle = SWIZZLE_ZZZZ; + emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); + break; + case 4: + temp_dst.writemask = WRITEMASK_X; + temp1.swizzle = SWIZZLE_XXXX; + temp2.swizzle = SWIZZLE_YYYY; + emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); + temp_dst.writemask = WRITEMASK_Y; + temp1.swizzle = SWIZZLE_ZZZZ; + temp2.swizzle = SWIZZLE_WWWW; + emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); + } + + temp1.swizzle = SWIZZLE_XXXX; + temp2.swizzle = SWIZZLE_YYYY; + emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); + } else { + emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero becomes 1.0, and positive values become zero. + */ + emit_dp(ir, result_dst, temp, temp, vector_elements); + + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero becomes 1.0, and negative values become zero. + * This is achieved using SGE. + */ + st_src_reg sge_src = result_src; + sge_src.negate = ~sge_src.negate; + emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); + } } else { emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); } @@ -1473,30 +1569,56 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) st_src_reg temp = get_temp(native_integers ? glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); - /* After the dot-product, the value will be an integer on the - * range [0,4]. Zero stays zero, and positive values become 1.0. - */ - glsl_to_tgsi_instruction *const dp = - emit_dp(ir, result_dst, temp, temp, vector_elements); - if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && - result_dst.type == GLSL_TYPE_FLOAT) { - /* The clamping to [0,1] can be done for free in the fragment - * shader with a saturate. - */ - dp->saturate = true; - } else if (result_dst.type == GLSL_TYPE_FLOAT) { - /* Negating the result of the dot-product gives values on the range - * [-4, 0]. Zero stays zero, and negative values become 1.0. This - * achieved using SLT. - */ - st_src_reg slt_src = result_src; - slt_src.negate = ~slt_src.negate; - emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + if (native_integers) { + st_dst_reg temp_dst = st_dst_reg(temp); + st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); + + /* Emit 1-3 OR operations to combine the SNE results. */ + switch (ir->operands[0]->type->vector_elements) { + case 2: + break; + case 3: + temp_dst.writemask = WRITEMASK_Y; + temp1.swizzle = SWIZZLE_YYYY; + temp2.swizzle = SWIZZLE_ZZZZ; + emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); + break; + case 4: + temp_dst.writemask = WRITEMASK_X; + temp1.swizzle = SWIZZLE_XXXX; + temp2.swizzle = SWIZZLE_YYYY; + emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); + temp_dst.writemask = WRITEMASK_Y; + temp1.swizzle = SWIZZLE_ZZZZ; + temp2.swizzle = SWIZZLE_WWWW; + emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); + } + + temp1.swizzle = SWIZZLE_XXXX; + temp2.swizzle = SWIZZLE_YYYY; + emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); } else { - emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *const dp = + emit_dp(ir, result_dst, temp, temp, vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } } } else { emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); @@ -1535,41 +1657,52 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) } case ir_binop_logic_xor: - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); + if (native_integers) + emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); + else + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); break; case ir_binop_logic_or: { - /* After the addition, the value will be an integer on the - * range [0,2]. Zero stays zero, and positive values become 1.0. - */ - glsl_to_tgsi_instruction *add = - emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); - if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && - result_dst.type == GLSL_TYPE_FLOAT) { - /* The clamping to [0,1] can be done for free in the fragment - * shader with a saturate if floats are being used as boolean values. - */ - add->saturate = true; - } else if (result_dst.type == GLSL_TYPE_FLOAT) { - /* Negating the result of the addition gives values on the range - * [-2, 0]. Zero stays zero, and negative values become 1.0. This - * is achieved using SLT. + if (native_integers) { + /* If integers are used as booleans, we can use an actual "or" + * instruction. */ - st_src_reg slt_src = result_src; - slt_src.negate = ~slt_src.negate; - emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + assert(native_integers); + emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); } else { - /* Use an SNE on the result of the addition. Zero stays zero, - * 1 stays 1, and 2 becomes 1. + /* After the addition, the value will be an integer on the + * range [0,2]. Zero stays zero, and positive values become 1.0. */ - emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + glsl_to_tgsi_instruction *add = + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate if floats are being used as boolean values. + */ + add->saturate = true; + } else { + /* Negating the result of the addition gives values on the range + * [-2, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } } break; } case ir_binop_logic_and: - /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ - emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); + /* If native integers are disabled, the bool args are stored as float 0.0 + * or 1.0, so "mul" gives us "and". If they're enabled, just use the + * actual AND opcode. + */ + if (native_integers) + emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); + else + emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_dot: @@ -1592,18 +1725,36 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); break; case ir_unop_i2f: - case ir_unop_b2f: if (native_integers) { emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); break; } + /* fallthrough to next case otherwise */ + case ir_unop_b2f: + if (native_integers) { + emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); + break; + } + /* fallthrough to next case otherwise */ case ir_unop_i2u: case ir_unop_u2i: /* Converting between signed and unsigned integers is a no-op. */ - case ir_unop_b2i: - /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */ result_src = op[0]; break; + case ir_unop_b2i: + if (native_integers) { + /* Booleans are stored as integers using ~0 for true and 0 for false. + * GLSL requires that int(bool) return 1 for true and 0 for false. + * This conversion is done with AND, but it could be done with NEG. + */ + emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); + } else { + /* Booleans and integers are both stored as floats when native + * integers are disabled. + */ + result_src = op[0]; + } + break; case ir_unop_f2i: if (native_integers) emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); @@ -1611,9 +1762,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_f2b: + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); + break; case ir_unop_i2b: - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], - st_src_reg_for_type(result_dst.type, 0)); + if (native_integers) + emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); + else + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); break; case ir_unop_trunc: emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); @@ -1641,7 +1796,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_bit_not: - if (glsl_version >= 130) { + if (native_integers) { emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); break; } @@ -1651,27 +1806,27 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; } case ir_binop_lshift: - if (glsl_version >= 130) { + if (native_integers) { emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]); break; } case ir_binop_rshift: - if (glsl_version >= 130) { + if (native_integers) { emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]); break; } case ir_binop_bit_and: - if (glsl_version >= 130) { + if (native_integers) { emit(ir, TGSI_OPCODE_AND, result_dst, op[0]); break; } case ir_binop_bit_xor: - if (glsl_version >= 130) { + if (native_integers) { emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]); break; } case ir_binop_bit_or: - if (glsl_version >= 130) { + if (native_integers) { emit(ir, TGSI_OPCODE_OR, result_dst, op[0]); break; } @@ -1761,14 +1916,6 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) entry = new(mem_ctx) variable_storage(var, PROGRAM_INPUT, var->location); - if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && - var->location >= VERT_ATTRIB_GENERIC0) { - _mesa_add_attribute(this->prog->Attributes, - var->name, - _mesa_sizeof_glsl_type(var->type->gl_type), - var->type->gl_type, - var->location - VERT_ATTRIB_GENERIC0); - } break; case ir_var_out: assert(var->location != -1); @@ -1828,17 +1975,19 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) if (element_size == 1) { index_reg = this->result; } else { - index_reg = get_temp(glsl_type::float_type); + index_reg = get_temp(native_integers ? + glsl_type::int_type : glsl_type::float_type); emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), - this->result, st_src_reg_for_float(element_size)); + this->result, st_src_reg_for_type(index_reg.type, element_size)); } /* If there was already a relative address register involved, add the * new and the old together to get the new offset. */ if (src.reladdr != NULL) { - st_src_reg accum_reg = get_temp(glsl_type::float_type); + st_src_reg accum_reg = get_temp(native_integers ? + glsl_type::int_type : glsl_type::float_type); emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), index_reg, *src.reladdr); @@ -2059,12 +2208,25 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) for (i = 0; i < type_size(ir->lhs->type); i++) { st_src_reg l_src = st_src_reg(l); + st_src_reg condition_temp = condition; l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); + if (native_integers) { + /* This is necessary because TGSI's CMP instruction expects the + * condition to be a float, and we store booleans as integers. + * If TGSI had a UCMP instruction or similar, this extra + * instruction would not be necessary. + */ + condition_temp = get_temp(glsl_type::vec4_type); + condition.negate = 0; + emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition); + condition_temp.swizzle = condition.swizzle; + } + if (switch_order) { - emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r); + emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r); } else { - emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src); + emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src); } l.index++; @@ -2084,6 +2246,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); new_inst->saturate = inst->saturate; + inst->dead_mask = inst->dst.writemask; } else { for (i = 0; i < type_size(ir->lhs->type); i++) { emit(ir, TGSI_OPCODE_MOV, l, r); @@ -2351,21 +2514,23 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) void glsl_to_tgsi_visitor::visit(ir_texture *ir) { - st_src_reg result_src, coord, lod_info, projector, dx, dy; + st_src_reg result_src, coord, lod_info, projector, dx, dy, offset; st_dst_reg result_dst, coord_dst; glsl_to_tgsi_instruction *inst = NULL; unsigned opcode = TGSI_OPCODE_NOP; - ir->coordinate->accept(this); + if (ir->coordinate) { + ir->coordinate->accept(this); - /* Put our coords in a temp. We'll need to modify them for shadow, - * projection, or LOD, so the only case we'd use it as is is if - * we're doing plain old texturing. The optimization passes on - * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. - */ - coord = get_temp(glsl_type::vec4_type); - coord_dst = st_dst_reg(coord); - emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + /* Put our coords in a temp. We'll need to modify them for shadow, + * projection, or LOD, so the only case we'd use it as is is if + * we're doing plain old texturing. The optimization passes on + * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. + */ + coord = get_temp(glsl_type::vec4_type); + coord_dst = st_dst_reg(coord); + emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + } if (ir->projector) { ir->projector->accept(this); @@ -2399,11 +2564,24 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) ir->lod_info.grad.dPdy->accept(this); dy = this->result; break; - case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */ - assert(!"GLSL 1.30 features unsupported"); + case ir_txs: + opcode = TGSI_OPCODE_TXQ; + ir->lod_info.lod->accept(this); + lod_info = this->result; + break; + case ir_txf: + opcode = TGSI_OPCODE_TXF; + ir->lod_info.lod->accept(this); + lod_info = this->result; + if (ir->offset) { + ir->offset->accept(this); + offset = this->result; + } break; } + const glsl_type *sampler_type = ir->sampler->type; + if (ir->projector) { if (opcode == TGSI_OPCODE_TEX) { /* Slot the projector in as the last component of the coord. */ @@ -2435,6 +2613,9 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) tmp_src = get_temp(glsl_type::vec4_type); st_dst_reg tmp_dst = st_dst_reg(tmp_src); + /* Projective division not allowed for array samplers. */ + assert(!sampler_type->sampler_array); + tmp_dst.writemask = WRITEMASK_Z; emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); @@ -2459,12 +2640,21 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) * coord. */ ir->shadow_comparitor->accept(this); - coord_dst.writemask = WRITEMASK_Z; + + /* XXX This will need to be updated for cubemap array samplers. */ + if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && + sampler_type->sampler_array) { + coord_dst.writemask = WRITEMASK_W; + } else { + coord_dst.writemask = WRITEMASK_Z; + } + emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); coord_dst.writemask = WRITEMASK_XYZW; } - if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) { + if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || + opcode == TGSI_OPCODE_TXF) { /* TGSI stores LOD or LOD bias in the last channel of the coords. */ coord_dst.writemask = WRITEMASK_W; emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); @@ -2473,7 +2663,11 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) if (opcode == TGSI_OPCODE_TXD) inst = emit(ir, opcode, result_dst, coord, dx, dy); - else + else if (opcode == TGSI_OPCODE_TXQ) + inst = emit(ir, opcode, result_dst, lod_info); + else if (opcode == TGSI_OPCODE_TXF) { + inst = emit(ir, opcode, result_dst, coord); + } else inst = emit(ir, opcode, result_dst, coord); if (ir->shadow_comparitor) @@ -2483,7 +2677,14 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) this->shader_program, this->prog); - const glsl_type *sampler_type = ir->sampler->type; + if (ir->offset) { + inst->tex_offset_num_offset = 1; + inst->tex_offsets[0].Index = offset.index; + inst->tex_offsets[0].File = offset.file; + inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0); + inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1); + inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2); + } switch (sampler_type->sampler_dimensionality) { case GLSL_SAMPLER_DIM_1D: @@ -2506,6 +2707,9 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) case GLSL_SAMPLER_DIM_BUF: assert(!"FINISHME: Implement ARB_texture_buffer_object"); break; + case GLSL_SAMPLER_DIM_EXTERNAL: + inst->tex_target = TEXTURE_EXTERNAL_INDEX; + break; default: assert(!"Should not get here."); } @@ -2646,171 +2850,6 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) _mesa_update_shader_textures_used(prog); } - -/** - * Check if the given vertex/fragment/shader program is within the - * resource limits of the context (number of texture units, etc). - * If any of those checks fail, record a linker error. - * - * XXX more checks are needed... - */ -static void -check_resources(const struct gl_context *ctx, - struct gl_shader_program *shader_program, - glsl_to_tgsi_visitor *prog, - struct gl_program *proginfo) -{ - switch (proginfo->Target) { - case GL_VERTEX_PROGRAM_ARB: - if (_mesa_bitcount(prog->samplers_used) > - ctx->Const.MaxVertexTextureImageUnits) { - fail_link(shader_program, "Too many vertex shader texture samplers"); - } - if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many vertex shader constants"); - } - break; - case MESA_GEOMETRY_PROGRAM: - if (_mesa_bitcount(prog->samplers_used) > - ctx->Const.MaxGeometryTextureImageUnits) { - fail_link(shader_program, "Too many geometry shader texture samplers"); - } - if (proginfo->Parameters->NumParameters > - MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { - fail_link(shader_program, "Too many geometry shader constants"); - } - break; - case GL_FRAGMENT_PROGRAM_ARB: - if (_mesa_bitcount(prog->samplers_used) > - ctx->Const.MaxTextureImageUnits) { - fail_link(shader_program, "Too many fragment shader texture samplers"); - } - if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many fragment shader constants"); - } - break; - default: - _mesa_problem(ctx, "unexpected program type in check_resources()"); - } -} - - - -struct uniform_sort { - struct gl_uniform *u; - int pos; -}; - -/* The shader_program->Uniforms list is almost sorted in increasing - * uniform->{Frag,Vert}Pos locations, but not quite when there are - * uniforms shared between targets. We need to add parameters in - * increasing order for the targets. - */ -static int -sort_uniforms(const void *a, const void *b) -{ - struct uniform_sort *u1 = (struct uniform_sort *)a; - struct uniform_sort *u2 = (struct uniform_sort *)b; - - return u1->pos - u2->pos; -} - -/* Add the uniforms to the parameters. The linker chose locations - * in our parameters lists (which weren't created yet), which the - * uniforms code will use to poke values into our parameters list - * when uniforms are updated. - */ -static void -add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, - struct gl_shader *shader, - struct gl_program *prog) -{ - unsigned int i; - unsigned int next_sampler = 0, num_uniforms = 0; - struct uniform_sort *sorted_uniforms; - - sorted_uniforms = ralloc_array(NULL, struct uniform_sort, - shader_program->Uniforms->NumUniforms); - - for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) { - struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i; - int parameter_index = -1; - - switch (shader->Type) { - case GL_VERTEX_SHADER: - parameter_index = uniform->VertPos; - break; - case GL_FRAGMENT_SHADER: - parameter_index = uniform->FragPos; - break; - case GL_GEOMETRY_SHADER: - parameter_index = uniform->GeomPos; - break; - } - - /* Only add uniforms used in our target. */ - if (parameter_index != -1) { - sorted_uniforms[num_uniforms].pos = parameter_index; - sorted_uniforms[num_uniforms].u = uniform; - num_uniforms++; - } - } - - qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort), - sort_uniforms); - - for (i = 0; i < num_uniforms; i++) { - struct gl_uniform *uniform = sorted_uniforms[i].u; - int parameter_index = sorted_uniforms[i].pos; - const glsl_type *type = uniform->Type; - unsigned int size; - - if (type->is_vector() || - type->is_scalar()) { - size = type->vector_elements; - } else { - size = type_size(type) * 4; - } - - gl_register_file file; - if (type->is_sampler() || - (type->is_array() && type->fields.array->is_sampler())) { - file = PROGRAM_SAMPLER; - } else { - file = PROGRAM_UNIFORM; - } - - GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1, - uniform->Name); - - if (index < 0) { - index = _mesa_add_parameter(prog->Parameters, file, - uniform->Name, size, type->gl_type, - NULL, NULL, 0x0); - - /* Sampler uniform values are stored in prog->SamplerUnits, - * and the entry in that array is selected by this index we - * store in ParameterValues[]. - */ - if (file == PROGRAM_SAMPLER) { - for (unsigned int j = 0; j < size / 4; j++) - prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; - } - - /* The location chosen in the Parameters list here (returned - * from _mesa_add_uniform) has to match what the linker chose. - */ - if (index != parameter_index) { - fail_link(shader_program, "Allocation of uniform `%s' to target " - "failed (%d vs %d)\n", - uniform->Name, index, parameter_index); - } - } - } - - ralloc_free(sorted_uniforms); -} - static void set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, struct gl_shader_program *shader_program, @@ -2872,43 +2911,13 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, element_type->matrix_columns, element_type->vector_elements, loc, 1, GL_FALSE, (GLfloat *)values); - loc += element_type->matrix_columns; } else { _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, values, element_type->gl_type); - loc += type_size(element_type); } - } -} - -static void -set_uniform_initializers(struct gl_context *ctx, - struct gl_shader_program *shader_program) -{ - void *mem_ctx = NULL; - - for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) { - struct gl_shader *shader = shader_program->_LinkedShaders[i]; - - if (shader == NULL) - continue; - - foreach_iter(exec_list_iterator, iter, *shader->ir) { - ir_instruction *ir = (ir_instruction *)iter.get(); - ir_variable *var = ir->as_variable(); - - if (!var || var->mode != ir_var_uniform || !var->constant_value) - continue; - if (!mem_ctx) - mem_ctx = ralloc_context(NULL); - - set_uniform_initializer(ctx, mem_ctx, shader_program, var->name, - var->type, var->constant_value); - } + loc++; } - - ralloc_free(mem_ctx); } /* @@ -2928,9 +2937,13 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) GLint outputMap[VERT_RESULT_MAX]; GLint outputTypes[VERT_RESULT_MAX]; GLuint numVaryingReads = 0; - GLboolean usedTemps[MAX_TEMPS]; + GLboolean *usedTemps; GLuint firstTemp = 0; + usedTemps = new GLboolean[MAX_TEMPS]; + if (!usedTemps) { + return; + } _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, usedTemps, MAX_TEMPS); @@ -2963,6 +2976,8 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) } } + delete [] usedTemps; + if (numVaryingReads == 0) return; /* nothing to be done */ @@ -3034,9 +3049,13 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src) void glsl_to_tgsi_visitor::simplify_cmp(void) { - unsigned tempWrites[MAX_TEMPS]; + unsigned *tempWrites; unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; + tempWrites = new unsigned[MAX_TEMPS]; + if (!tempWrites) { + return; + } memset(tempWrites, 0, sizeof(tempWrites)); memset(outputWrites, 0, sizeof(outputWrites)); @@ -3052,7 +3071,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void) inst->op == TGSI_OPCODE_END || inst->op == TGSI_OPCODE_ENDSUB || inst->op == TGSI_OPCODE_RET) { - return; + break; } if (inst->dst.file == PROGRAM_OUTPUT) { @@ -3077,6 +3096,8 @@ glsl_to_tgsi_visitor::simplify_cmp(void) inst->src[0] = inst->src[1]; } } + + delete [] tempWrites; } /* Replaces all references to a temporary register index with another index. */ @@ -3493,25 +3514,23 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) break; case TGSI_OPCODE_ENDIF: - --level; - break; - case TGSI_OPCODE_ELSE: - /* Clear all channels written inside the preceding if block from the - * write array, but leave those that were not touched. - * - * FIXME: This destroys opportunities to remove dead code inside of - * IF blocks that are followed by an ELSE block. + /* Promote the recorded level all channels written inside the preceding + * if or else block to the level above the if/else block. */ for (int r = 0; r < this->next_temp; r++) { for (int c = 0; c < 4; c++) { if (!writes[4 * r + c]) continue; - if (write_level[4 * r + c] >= level) - writes[4 * r + c] = NULL; + if (write_level[4 * r + c] == level) + write_level[4 * r + c] = level-1; } } + + if(inst->op == TGSI_OPCODE_ENDIF) + --level; + break; case TGSI_OPCODE_IF: @@ -3584,7 +3603,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) if (!inst->dead_mask || !inst->dst.writemask) continue; - else if (inst->dead_mask == inst->dst.writemask) { + else if ((inst->dst.writemask & ~inst->dead_mask) == 0) { iter.remove(); delete inst; removed++; @@ -3710,7 +3729,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, inst->sampler = 0; inst->tex_target = TEXTURE_2D_INDEX; - prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->InputsRead |= FRAG_BIT_TEX0; prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ v->samplers_used |= (1 << 0); @@ -3781,7 +3800,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, src_regs[i].index = src0.index; } else if (src_regs[i].file == PROGRAM_INPUT) - prog->InputsRead |= (1 << src_regs[i].index); + prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); } v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); @@ -3790,8 +3809,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, /* Make modifications to fragment program info. */ prog->Parameters = _mesa_combine_parameter_lists(params, original->prog->Parameters); - prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); - prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); _mesa_free_parameter_list(params); count_resources(v, prog); fp->glsl_to_tgsi = v; @@ -3836,7 +3853,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, inst->sampler = samplerIndex; inst->tex_target = TEXTURE_2D_INDEX; - prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->InputsRead |= FRAG_BIT_TEX0; prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ v->samplers_used |= (1 << samplerIndex); @@ -3858,7 +3875,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, for (int i=0; i<3; i++) { src_regs[i] = inst->src[i]; if (src_regs[i].file == PROGRAM_INPUT) - prog->InputsRead |= (1 << src_regs[i].index); + prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); } v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); @@ -3866,8 +3883,6 @@ get_bitmap_visitor(struct st_fragment_program *fp, /* Make modifications to fragment program info. */ prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); - prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); - prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); count_resources(v, prog); fp->glsl_to_tgsi = v; } @@ -3926,6 +3941,7 @@ struct st_translate { /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { TGSI_SEMANTIC_FACE, + TGSI_SEMANTIC_VERTEXID, TGSI_SEMANTIC_INSTANCEID }; @@ -4164,14 +4180,33 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg) return src; } +static struct tgsi_texture_offset +translate_tex_offset(struct st_translate *t, + const struct tgsi_texture_offset *in_offset) +{ + struct tgsi_texture_offset offset; + + assert(in_offset->File == PROGRAM_IMMEDIATE); + + offset.File = TGSI_FILE_IMMEDIATE; + offset.Index = in_offset->Index; + offset.SwizzleX = in_offset->SwizzleX; + offset.SwizzleY = in_offset->SwizzleY; + offset.SwizzleZ = in_offset->SwizzleZ; + + return offset; +} + static void compile_tgsi_instruction(struct st_translate *t, - const struct glsl_to_tgsi_instruction *inst) + const glsl_to_tgsi_instruction *inst) { struct ureg_program *ureg = t->ureg; GLuint i; struct ureg_dst dst[1]; struct ureg_src src[4]; + struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; + unsigned num_dst; unsigned num_src; @@ -4205,11 +4240,17 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_TXD: case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXQ: + case TGSI_OPCODE_TXF: src[num_src++] = t->samplers[inst->sampler]; + for (i = 0; i < inst->tex_offset_num_offset; i++) { + texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); + } ureg_tex_insn(ureg, inst->op, dst, num_dst, translate_texture_target(inst->tex_target, inst->tex_shadow), + texoffsets, inst->tex_offset_num_offset, src, num_src); return; @@ -4228,37 +4269,15 @@ compile_tgsi_instruction(struct st_translate *t, } /** - * Emit the TGSI instructions to adjust the WPOS pixel center convention - * Basically, add (adjX, adjY) to the fragment position. - */ -static void -emit_adjusted_wpos(struct st_translate *t, - const struct gl_program *program, - float adjX, float adjY) -{ - struct ureg_program *ureg = t->ureg; - struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); - struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; - - /* Note that we bias X and Y and pass Z and W through unchanged. - * The shader might also use gl_FragCoord.w and .z. - */ - ureg_ADD(ureg, wpos_temp, wpos_input, - ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f)); - - t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); -} - - -/** - * Emit the TGSI instructions for inverting the WPOS y coordinate. + * Emit the TGSI instructions for inverting and adjusting WPOS. * This code is unavoidable because it also depends on whether * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). */ static void -emit_wpos_inversion(struct st_translate *t, - const struct gl_program *program, - bool invert) +emit_wpos_adjustment( struct st_translate *t, + const struct gl_program *program, + boolean invert, + GLfloat adjX, GLfloat adjY[2]) { struct ureg_program *ureg = t->ureg; @@ -4277,35 +4296,55 @@ emit_wpos_inversion(struct st_translate *t, unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, wposTransformState); - struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst); - struct ureg_dst wpos_temp; + struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); + struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; - /* MOV wpos_temp, input[wpos] - */ - if (wpos_input.File == TGSI_FILE_TEMPORARY) - wpos_temp = ureg_dst(wpos_input); - else { - wpos_temp = ureg_DECL_temporary(ureg); - ureg_MOV(ureg, wpos_temp, wpos_input); + /* First, apply the coordinate shift: */ + if (adjX || adjY[0] || adjY[1]) { + if (adjY[0] != adjY[1]) { + /* Adjust the y coordinate by adjY[1] or adjY[0] respectively + * depending on whether inversion is actually going to be applied + * or not, which is determined by testing against the inversion + * state variable used below, which will be either +1 or -1. + */ + struct ureg_dst adj_temp = ureg_DECL_temporary(ureg); + + ureg_CMP(ureg, adj_temp, + ureg_scalar(wpostrans, invert ? 2 : 0), + ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), + ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); + ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); + } else { + ureg_ADD(ureg, wpos_temp, wpos_input, + ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); + } + wpos_input = ureg_src(wpos_temp); + } else { + /* MOV wpos_temp, input[wpos] + */ + ureg_MOV( ureg, wpos_temp, wpos_input ); } + /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be + * inversion/identity, or the other way around if we're drawing to an FBO. + */ if (invert) { /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy */ - ureg_MAD(ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), - wpos_input, - ureg_scalar(wpostrans, 0), - ureg_scalar(wpostrans, 1)); + ureg_MAD( ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), + wpos_input, + ureg_scalar(wpostrans, 0), + ureg_scalar(wpostrans, 1)); } else { /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww */ - ureg_MAD(ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), - wpos_input, - ureg_scalar(wpostrans, 2), - ureg_scalar(wpostrans, 3)); + ureg_MAD( ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), + wpos_input, + ureg_scalar(wpostrans, 2), + ureg_scalar(wpostrans, 3)); } /* Use wpos_temp as position input from here on: @@ -4326,8 +4365,37 @@ emit_wpos(struct st_context *st, const struct gl_fragment_program *fp = (const struct gl_fragment_program *) program; struct pipe_screen *pscreen = st->pipe->screen; + GLfloat adjX = 0.0f; + GLfloat adjY[2] = { 0.0f, 0.0f }; boolean invert = FALSE; + /* Query the pixel center conventions supported by the pipe driver and set + * adjX, adjY to help out if it cannot handle the requested one internally. + * + * The bias of the y-coordinate depends on whether y-inversion takes place + * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are + * drawing to an FBO (causes additional inversion), and whether the the pipe + * driver origin and the requested origin differ (the latter condition is + * stored in the 'invert' variable). + * + * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): + * + * center shift only: + * i -> h: +0.5 + * h -> i: -0.5 + * + * inversion only: + * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 + * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 + * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 + * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 + * + * inversion and center shift: + * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 + * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 + * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 + * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 + */ if (fp->OriginUpperLeft) { /* Fragment shader wants origin in upper-left */ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { @@ -4355,12 +4423,17 @@ emit_wpos(struct st_context *st, if (fp->PixelCenterInteger) { /* Fragment shader wants pixel center integer */ - if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { /* the driver supports pixel center integer */ + adjY[1] = 1.0f; ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); - else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) + } + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { /* the driver supports pixel center half integer, need to bias X,Y */ - emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f); + adjX = -0.5f; + adjY[0] = -0.5f; + adjY[1] = 0.5f; + } else assert(0); } @@ -4371,8 +4444,8 @@ emit_wpos(struct st_context *st, } else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { /* the driver supports pixel center integer, need to bias X,Y */ + adjX = adjY[0] = adjY[1] = 0.5f; ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); - emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f); } else assert(0); @@ -4380,7 +4453,7 @@ emit_wpos(struct st_context *st, /* we invert after adjustment so that we avoid the MOV to temporary, * and reuse the adjustment ADD instead */ - emit_wpos_inversion(t, program, invert); + emit_wpos_adjustment(t, program, invert, adjX, adjY); } /** @@ -4451,14 +4524,19 @@ st_translate_program( const ubyte outputSemanticIndex[], boolean passthrough_edgeflags) { - struct st_translate translate, *t; + struct st_translate *t; unsigned i; enum pipe_error ret = PIPE_OK; assert(numInputs <= Elements(t->inputs)); assert(numOutputs <= Elements(t->outputs)); - t = &translate; + t = CALLOC_STRUCT(st_translate); + if (!t) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + memset(t, 0, sizeof *t); t->procType = procType; @@ -4513,7 +4591,8 @@ st_translate_program( break; default: assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); - return PIPE_ERROR_BAD_INPUT; + ret = PIPE_ERROR_BAD_INPUT; + goto out; } } } @@ -4694,13 +4773,17 @@ st_translate_program( } out: - FREE(t->insn); - FREE(t->labels); - FREE(t->constants); - FREE(t->immediates); + if (t) { + FREE(t->insn); + FREE(t->labels); + FREE(t->constants); + FREE(t->immediates); + + if (t->error) { + debug_printf("%s: translate error flag set\n", __FUNCTION__); + } - if (t->error) { - debug_printf("%s: translate error flag set\n", __FUNCTION__); + FREE(t); } return ret; @@ -4718,6 +4801,8 @@ get_mesa_program(struct gl_context *ctx, { glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); struct gl_program *prog; + struct pipe_screen * screen = st_context(ctx)->pipe->screen; + unsigned pipe_shader_type; GLenum target; const char *target_string; bool progress; @@ -4728,14 +4813,17 @@ get_mesa_program(struct gl_context *ctx, case GL_VERTEX_SHADER: target = GL_VERTEX_PROGRAM_ARB; target_string = "vertex"; + pipe_shader_type = PIPE_SHADER_VERTEX; break; case GL_FRAGMENT_SHADER: target = GL_FRAGMENT_PROGRAM_ARB; target_string = "fragment"; + pipe_shader_type = PIPE_SHADER_FRAGMENT; break; case GL_GEOMETRY_SHADER: target = GL_GEOMETRY_PROGRAM_NV; target_string = "geometry"; + pipe_shader_type = PIPE_SHADER_GEOMETRY; break; default: assert(!"should not be reached"); @@ -4748,8 +4836,6 @@ get_mesa_program(struct gl_context *ctx, if (!prog) return NULL; prog->Parameters = _mesa_new_parameter_list(); - prog->Varying = _mesa_new_parameter_list(); - prog->Attributes = _mesa_new_parameter_list(); v->ctx = ctx; v->prog = prog; v->shader_program = shader_program; @@ -4757,7 +4843,8 @@ get_mesa_program(struct gl_context *ctx, v->glsl_version = ctx->Const.GLSLVersion; v->native_integers = ctx->Const.NativeIntegers; - add_uniforms_to_parameters_list(shader_program, shader, prog); + _mesa_generate_parameters_list_for_uniforms(shader_program, shader, + prog->Parameters); /* Emit intermediate IR for main(). */ visit_exec_list(shader->ir, v); @@ -4805,10 +4892,13 @@ get_mesa_program(struct gl_context *ctx, } #endif - /* Remove reads to output registers, and to varyings in vertex shaders. */ - v->remove_output_reads(PROGRAM_OUTPUT); - if (target == GL_VERTEX_PROGRAM_ARB) - v->remove_output_reads(PROGRAM_VARYING); + if (!screen->get_shader_param(screen, pipe_shader_type, + PIPE_SHADER_CAP_OUTPUT_READ)) { + /* Remove reads to output registers, and to varyings in vertex shaders. */ + v->remove_output_reads(PROGRAM_OUTPUT); + if (target == GL_VERTEX_PROGRAM_ARB) + v->remove_output_reads(PROGRAM_VARYING); + } /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->simplify_cmp(); @@ -4836,18 +4926,26 @@ get_mesa_program(struct gl_context *ctx, _mesa_print_ir(shader->ir, NULL); printf("\n"); printf("\n"); + fflush(stdout); } prog->Instructions = NULL; prog->NumInstructions = 0; - do_set_program_inouts(shader->ir, prog); + do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER); count_resources(v, prog); - check_resources(ctx, shader_program, v, prog); - _mesa_reference_program(ctx, &shader->Program, prog); + /* This has to be done last. Any operation the can cause + * prog->ParameterValues to get reallocated (e.g., anything that adds a + * program constant) has to happen before creating this linkage. + */ + _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); + if (!shader_program->LinkStatus) { + return NULL; + } + struct st_vertex_program *stvp; struct st_fragment_program *stfp; struct st_geometry_program *stgp; @@ -4928,19 +5026,21 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) /* Lowering */ do_mat_op_to_vec(ir); lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 - | LOG_TO_LOG2 + | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; - progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; + progress = do_common_optimization(ir, true, true, + options->MaxUnrollIterations) + || progress; progress = lower_quadop_vector(ir, false) || progress; - if (options->EmitNoIfs) { + if (options->MaxIfDepth == 0) progress = lower_discard(ir) || progress; - progress = lower_if_to_cond_assign(ir) || progress; - } + + progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress; if (options->EmitNoNoise) progress = lower_noise(ir) || progress; @@ -4973,29 +5073,18 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); if (linked_prog) { - bool ok = true; - - switch (prog->_LinkedShaders[i]->Type) { - case GL_VERTEX_SHADER: - _mesa_reference_vertprog(ctx, &prog->VertexProgram, - (struct gl_vertex_program *)linked_prog); - ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, - linked_prog); - break; - case GL_FRAGMENT_SHADER: - _mesa_reference_fragprog(ctx, &prog->FragmentProgram, - (struct gl_fragment_program *)linked_prog); - ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, - linked_prog); - break; - case GL_GEOMETRY_SHADER: - _mesa_reference_geomprog(ctx, &prog->GeometryProgram, - (struct gl_geometry_program *)linked_prog); - ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV, - linked_prog); - break; - } - if (!ok) { + static const GLenum targets[] = { + GL_VERTEX_PROGRAM_ARB, + GL_FRAGMENT_PROGRAM_ARB, + GL_GEOMETRY_PROGRAM_NV + }; + + _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, + linked_prog); + if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) { + _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, + NULL); + _mesa_reference_program(ctx, &linked_prog, NULL); return GL_FALSE; } } @@ -5006,53 +5095,4 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) return GL_TRUE; } - -/** - * Link a GLSL shader program. Called via glLinkProgram(). - */ -void -st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) -{ - unsigned int i; - - _mesa_clear_shader_program_data(ctx, prog); - - prog->LinkStatus = GL_TRUE; - - for (i = 0; i < prog->NumShaders; i++) { - if (!prog->Shaders[i]->CompileStatus) { - fail_link(prog, "linking with uncompiled shader"); - prog->LinkStatus = GL_FALSE; - } - } - - prog->Varying = _mesa_new_parameter_list(); - _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL); - _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL); - _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL); - - if (prog->LinkStatus) { - link_shaders(ctx, prog); - } - - if (prog->LinkStatus) { - if (!ctx->Driver.LinkShader(ctx, prog)) { - prog->LinkStatus = GL_FALSE; - } - } - - set_uniform_initializers(ctx, prog); - - if (ctx->Shader.Flags & GLSL_DUMP) { - if (!prog->LinkStatus) { - printf("GLSL shader program %d failed to link\n", prog->Name); - } - - if (prog->InfoLog && prog->InfoLog[0] != 0) { - printf("GLSL shader program %d info log:\n", prog->Name); - printf("%s\n", prog->InfoLog); - } - } -} - } /* extern "C" */