X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fstate_tracker%2Fst_glsl_to_tgsi.cpp;h=4a68882603ff341af72de9c3983ab38d7c5608ef;hb=426ca34b7a2c3b9edfc0189daece8de3aff80627;hp=6cc655d70cf5f95b79f9ff36df30aef204e7b8e8;hpb=20b0daf82de91fd57b7e8d825786789149f6358d;p=mesa.git diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 6cc655d70cf..4a68882603f 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -34,7 +34,6 @@ #include "main/compiler.h" #include "ir.h" #include "ir_visitor.h" -#include "ir_print_visitor.h" #include "ir_expression_flattening.h" #include "glsl_types.h" #include "glsl_parser_extras.h" @@ -74,7 +73,6 @@ extern "C" { #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ (1 << PROGRAM_ENV_PARAM) | \ (1 << PROGRAM_STATE_VAR) | \ - (1 << PROGRAM_NAMED_PARAM) | \ (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) @@ -86,6 +84,11 @@ extern "C" { */ #define MAX_TEMPS 4096 +/** + * Maximum number of arrays + */ +#define MAX_ARRAYS 256 + /* will be 4 for GLSL 4.00 */ #define MAX_GLSL_TEXTURE_OFFSET 1 @@ -108,6 +111,7 @@ public: else this->swizzle = SWIZZLE_XYZW; this->negate = 0; + this->index2D = 0; this->type = type ? type->base_type : GLSL_TYPE_ERROR; this->reladdr = NULL; } @@ -117,6 +121,18 @@ public: this->type = type; this->file = file; this->index = index; + this->index2D = 0; + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->reladdr = NULL; + } + + st_src_reg(gl_register_file file, int index, int type, int index2D) + { + this->type = type; + this->file = file; + this->index = index; + this->index2D = index2D; this->swizzle = SWIZZLE_XYZW; this->negate = 0; this->reladdr = NULL; @@ -127,6 +143,7 @@ public: this->type = GLSL_TYPE_ERROR; this->file = PROGRAM_UNDEFINED; this->index = 0; + this->index2D = 0; this->swizzle = 0; this->negate = 0; this->reladdr = NULL; @@ -135,7 +152,8 @@ public: explicit st_src_reg(st_dst_reg reg); gl_register_file file; /**< PROGRAM_* from Mesa */ - int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ + int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ + int index2D; GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ int negate; /**< NEGATE_XYZW mask from mesa */ int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ @@ -168,7 +186,7 @@ public: explicit st_dst_reg(st_src_reg reg); gl_register_file file; /**< PROGRAM_* from Mesa */ - int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ + int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ GLuint cond_mask:4; int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ @@ -184,6 +202,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) this->swizzle = SWIZZLE_XYZW; this->negate = 0; this->reladdr = reg.reladdr; + this->index2D = 0; } st_dst_reg::st_dst_reg(st_src_reg reg) @@ -286,7 +305,7 @@ public: st_src_reg return_reg; }; -class glsl_to_tgsi_visitor : public ir_visitor { +struct glsl_to_tgsi_visitor : public ir_visitor { public: glsl_to_tgsi_visitor(); ~glsl_to_tgsi_visitor(); @@ -300,13 +319,16 @@ public: int next_temp; + unsigned array_sizes[MAX_ARRAYS]; + unsigned next_array; + int num_address_regs; int samplers_used; - bool indirect_addr_temps; bool indirect_addr_consts; int glsl_version; bool native_integers; + bool have_sqrt; variable_storage *find_variable_storage(ir_variable *var); @@ -356,7 +378,7 @@ public: /** List of immediate_storage */ exec_list immediates; - int num_immediates; + unsigned num_immediates; /** List of function_entry */ exec_list function_signatures; @@ -413,7 +435,6 @@ public: bool process_move_condition(ir_rvalue *ir); - void remove_output_reads(gl_register_file type); void simplify_cmp(void); void rename_temp_register(int index, int new_index); @@ -428,6 +449,9 @@ public: void merge_registers(void); void renumber_registers(void); + void emit_block_mov(ir_assignment *ir, const struct glsl_type *type, + st_dst_reg *l, st_src_reg *r); + void *mem_ctx; }; @@ -531,13 +555,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, /* Update indirect addressing status used by TGSI */ if (dst.reladdr) { switch(dst.file) { - case PROGRAM_TEMPORARY: - this->indirect_addr_temps = true; - break; case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: case PROGRAM_CONSTANT: case PROGRAM_UNIFORM: this->indirect_addr_consts = true; @@ -553,13 +573,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, for (i=0; i<3; i++) { if(inst->src[i].reladdr) { switch(inst->src[i].file) { - case PROGRAM_TEMPORARY: - this->indirect_addr_temps = true; - break; case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: case PROGRAM_CONSTANT: case PROGRAM_UNIFORM: this->indirect_addr_consts = true; @@ -634,6 +650,11 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, { int type = GLSL_TYPE_FLOAT; + assert(src0.type != GLSL_TYPE_ARRAY); + assert(src0.type != GLSL_TYPE_STRUCT); + assert(src1.type != GLSL_TYPE_ARRAY); + assert(src1.type != GLSL_TYPE_STRUCT); + if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) type = GLSL_TYPE_FLOAT; else if (native_integers) @@ -664,6 +685,9 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, case3(SLT, ISLT, USLT); case2iu(ISHR, USHR); + + case2fi(SSG, ISSG); + case3(ABS, IABS, IABS); default: break; } @@ -961,10 +985,13 @@ type_size(const struct glsl_type *type) * at link time. */ return 1; - default: - assert(0); - return 0; + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + assert(!"Invalid type in type_size"); + break; } + return 0; } /** @@ -978,17 +1005,28 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) st_src_reg src; src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; - src.file = PROGRAM_TEMPORARY; - src.index = next_temp; src.reladdr = NULL; - next_temp += type_size(type); + src.negate = 0; + + if (!options->EmitNoIndirectTemp && + (type->is_array() || type->is_matrix())) { + + src.file = PROGRAM_ARRAY; + src.index = next_array << 16 | 0x8000; + array_sizes[next_array] = type_size(type); + ++next_array; + + } else { + src.file = PROGRAM_TEMPORARY; + src.index = next_temp; + next_temp += type_size(type); + } if (type->is_array() || type->is_record()) { src.swizzle = SWIZZLE_NOOP; } else { src.swizzle = swizzle_for_size(type->vector_elements); } - src.negate = 0; return src; } @@ -1051,13 +1089,11 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) */ assert((int) ir->num_state_slots == type_size(ir->type)); - storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, - this->next_temp); - this->variables.push_tail(storage); - this->next_temp += type_size(ir->type); + dst = st_dst_reg(get_temp(ir->type)); - dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, - native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); + storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index); + + this->variables.push_tail(storage); } @@ -1072,8 +1108,12 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) assert(index == storage->index + (int)i); } } else { - st_src_reg src(PROGRAM_STATE_VAR, index, - native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT); + /* We use GLSL_TYPE_FLOAT here regardless of the actual type of + * the data being moved since MOV does not care about the type of + * data it is moving, and we don't want to declare registers with + * array or struct types. + */ + st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT); src.swizzle = slots[i].swizzle; emit(ir, TGSI_OPCODE_MOV, dst, src); /* even a float takes up a whole vec4 reg in a struct/array. */ @@ -1256,11 +1296,12 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan bool glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) { - /* Saturates were only introduced to vertex programs in - * NV_vertex_program3, so don't give them to drivers in the VP. + /* Emit saturates in the vertex shader only if SM 3.0 is supported. */ - if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && + !st_context(this->ctx)->has_shader_model3) { return false; + } ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); if (!sat_src) @@ -1354,9 +1395,9 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) this->result.file = PROGRAM_UNDEFINED; ir->operands[operand]->accept(this); if (this->result.file == PROGRAM_UNDEFINED) { - ir_print_visitor v; printf("Failed to get tree for expression operand:\n"); - ir->operands[operand]->accept(&v); + ir->operands[operand]->print(); + printf("\n"); exit(1); } op[operand] = this->result; @@ -1402,8 +1443,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) } break; case ir_unop_neg: - assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT); - if (result_dst.type == GLSL_TYPE_INT) + if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT) emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); else { op[0].negate = ~op[0].negate; @@ -1411,7 +1451,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) } break; case ir_unop_abs: - assert(result_dst.type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); break; case ir_unop_sign: @@ -1448,9 +1487,29 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); break; case ir_unop_dFdy: - op[0].negate = ~op[0].negate; - emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]); + { + /* The X component contains 1 or -1 depending on whether the framebuffer + * is a FBO or the window system buffer, respectively. + * It is then multiplied with the source operand of DDY. + */ + static const gl_state_index transform_y_state[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM }; + + unsigned transform_y_index = + _mesa_add_state_reference(this->prog->Parameters, + transform_y_state); + + st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR, + transform_y_index, + glsl_type::vec4_type); + transform_y.swizzle = SWIZZLE_XXXX; + + st_src_reg temp = get_temp(glsl_type::vec4_type); + + emit(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]); + emit(ir, TGSI_OPCODE_DDY, result_dst, temp); break; + } case ir_unop_noise: { /* At some point, a motivated person could add a better @@ -1713,13 +1772,18 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_sqrt: - /* sqrt(x) = x * rsq(x). */ - emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); - emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); - /* For incoming channels <= 0, set the result to 0. */ - op[0].negate = ~op[0].negate; - emit(ir, TGSI_OPCODE_CMP, result_dst, - op[0], result_src, st_src_reg_for_float(0.0)); + if (have_sqrt) { + emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]); + } + else { + /* sqrt(x) = x * rsq(x). */ + emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); + emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); + /* For incoming channels <= 0, set the result to 0. */ + op[0].negate = ~op[0].negate; + emit(ir, TGSI_OPCODE_CMP, result_dst, + op[0], result_src, st_src_reg_for_float(0.0)); + } break; case ir_unop_rsq: emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); @@ -1761,6 +1825,18 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) else emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; + case ir_unop_f2u: + if (native_integers) + emit(ir, TGSI_OPCODE_F2U, result_dst, op[0]); + else + emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); + break; + case ir_unop_bitcast_f2i: + case ir_unop_bitcast_f2u: + case ir_unop_bitcast_i2f: + case ir_unop_bitcast_u2f: + result_src = op[0]; + break; case ir_unop_f2b: emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); break; @@ -1774,13 +1850,14 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_ceil: - op[0].negate = ~op[0].negate; - emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); - result_src.negate = ~result_src.negate; + emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]); break; case ir_unop_floor: emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); break; + case ir_unop_round_even: + emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); + break; case ir_unop_fract: emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); break; @@ -1807,37 +1884,102 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) } case ir_binop_lshift: if (native_integers) { - emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]); + emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]); break; } case ir_binop_rshift: if (native_integers) { - emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]); + emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]); break; } case ir_binop_bit_and: if (native_integers) { - emit(ir, TGSI_OPCODE_AND, result_dst, op[0]); + emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); break; } case ir_binop_bit_xor: if (native_integers) { - emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]); + emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); break; } case ir_binop_bit_or: if (native_integers) { - emit(ir, TGSI_OPCODE_OR, result_dst, op[0]); + emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); break; } - case ir_unop_round_even: + assert(!"GLSL 1.30 features unsupported"); break; + case ir_binop_ubo_load: { + ir_constant *uniform_block = ir->operands[0]->as_constant(); + ir_constant *const_offset_ir = ir->operands[1]->as_constant(); + unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0; + st_src_reg index_reg = get_temp(glsl_type::uint_type); + st_src_reg cbuf; + + cbuf.type = glsl_type::vec4_type->base_type; + cbuf.file = PROGRAM_CONSTANT; + cbuf.index = 0; + cbuf.index2D = uniform_block->value.u[0] + 1; + cbuf.reladdr = NULL; + cbuf.negate = 0; + + assert(ir->type->is_vector() || ir->type->is_scalar()); + + if (const_offset_ir) { + index_reg = st_src_reg_for_int(const_offset / 16); + } else { + emit(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1], st_src_reg_for_int(4)); + } + + cbuf.swizzle = swizzle_for_size(ir->type->vector_elements); + cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4, + const_offset % 16 / 4, + const_offset % 16 / 4, + const_offset % 16 / 4); + + cbuf.reladdr = ralloc(mem_ctx, st_src_reg); + memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg)); + + if (ir->type->base_type == GLSL_TYPE_BOOL) { + emit(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0)); + } else { + emit(ir, TGSI_OPCODE_MOV, result_dst, cbuf); + } + break; + } + case ir_triop_lrp: + /* note: we have to reorder the three args here */ + emit(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]); + break; + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_half_2x16: + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_4x8: + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_half_2x16: + case ir_unop_unpack_half_2x16_split_x: + case ir_unop_unpack_half_2x16_split_y: + case ir_unop_unpack_snorm_4x8: + case ir_unop_unpack_unorm_4x8: + case ir_binop_pack_half_2x16_split: + case ir_unop_bitfield_reverse: + case ir_unop_bit_count: + case ir_unop_find_msb: + case ir_unop_find_lsb: + case ir_binop_bfm: + case ir_triop_bfi: + case ir_triop_bitfield_extract: + case ir_quadop_bitfield_insert: case ir_quadop_vector: - /* This operation should have already been handled. + case ir_binop_vector_extract: + case ir_triop_vector_insert: + /* This operation is not supported, or should have already been handled. */ - assert(!"Should not get here."); + assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()"); break; } @@ -1903,25 +2045,22 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) var->location); this->variables.push_tail(entry); break; - case ir_var_in: - case ir_var_inout: + case ir_var_shader_in: /* The linker assigns locations for varyings and attributes, * including deprecated builtins (like gl_Color), user-assign * generic attributes (glBindVertexLocation), and * user-defined varyings. - * - * FINISHME: We would hit this path for function arguments. Fix! */ assert(var->location != -1); entry = new(mem_ctx) variable_storage(var, PROGRAM_INPUT, var->location); break; - case ir_var_out: + case ir_var_shader_out: assert(var->location != -1); entry = new(mem_ctx) variable_storage(var, PROGRAM_OUTPUT, - var->location); + var->location + var->index); break; case ir_var_system_value: entry = new(mem_ctx) variable_storage(var, @@ -1930,11 +2069,11 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) break; case ir_var_auto: case ir_var_temporary: - entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, - this->next_temp); + st_src_reg src = get_temp(var->type); + + entry = new(mem_ctx) variable_storage(var, src.file, src.index); this->variables.push_tail(entry); - next_temp += type_size(var->type); break; } @@ -2005,6 +2144,9 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) else src.swizzle = SWIZZLE_NOOP; + /* Change the register type to the element type of the array. */ + src.type = ir->type->base_type; + this->result = src; } @@ -2030,6 +2172,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) this->result.swizzle = SWIZZLE_NOOP; this->result.index += offset; + this->result.type = ir->type->base_type; } /** @@ -2144,6 +2287,44 @@ glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) return switch_order; } +void +glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type, + st_dst_reg *l, st_src_reg *r) +{ + if (type->base_type == GLSL_TYPE_STRUCT) { + for (unsigned int i = 0; i < type->length; i++) { + emit_block_mov(ir, type->fields.structure[i].type, l, r); + } + return; + } + + if (type->is_array()) { + for (unsigned int i = 0; i < type->length; i++) { + emit_block_mov(ir, type->fields.array, l, r); + } + return; + } + + if (type->is_matrix()) { + const struct glsl_type *vec_type; + + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + type->vector_elements, 1); + + for (int i = 0; i < type->matrix_columns; i++) { + emit_block_mov(ir, vec_type, l, r); + } + return; + } + + assert(type->is_scalar() || type->is_vector()); + + r->type = type->base_type; + emit(ir, TGSI_OPCODE_MOV, *l, *r); + l->index++; + r->index++; +} + void glsl_to_tgsi_visitor::visit(ir_assignment *ir) { @@ -2164,7 +2345,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); l.writemask = WRITEMASK_XYZW; } else if (ir->lhs->type->is_scalar() && - ir->lhs->variable_referenced()->mode == ir_var_out) { + ir->lhs->variable_referenced()->mode == ir_var_shader_out) { /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the * FINISHME: W component of fragment shader output zero, work correctly. */ @@ -2214,8 +2395,8 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) if (native_integers) { /* This is necessary because TGSI's CMP instruction expects the * condition to be a float, and we store booleans as integers. - * If TGSI had a UCMP instruction or similar, this extra - * instruction would not be necessary. + * TODO: really want to avoid i2f path and use UCMP. Requires + * changes to process_move_condition though too. */ condition_temp = get_temp(glsl_type::vec4_type); condition.negate = 0; @@ -2248,11 +2429,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) new_inst->saturate = inst->saturate; inst->dead_mask = inst->dst.writemask; } else { - for (i = 0; i < type_size(ir->lhs->type); i++) { - emit(ir, TGSI_OPCODE_MOV, l, r); - l.index++; - r.index++; - } + emit_block_mov(ir, ir->rhs->type, &l, &r); } } @@ -2372,7 +2549,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) gl_type = native_integers ? GL_BOOL : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { if (native_integers) - values[i].b = ir->value.b[i]; + values[i].u = ir->value.b[i] ? ~0 : 0; else values[i].f = ir->value.b[i]; } @@ -2414,11 +2591,10 @@ glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) storage = find_variable_storage(param); assert(!storage); - storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, - this->next_temp); - this->variables.push_tail(storage); + st_src_reg src = get_temp(param->type); - this->next_temp += type_size(param->type); + storage = new(mem_ctx) variable_storage(param, src.file, src.index); + this->variables.push_tail(storage); } if (!sig->return_type->is_void()) { @@ -2435,7 +2611,7 @@ void glsl_to_tgsi_visitor::visit(ir_call *ir) { glsl_to_tgsi_instruction *call_inst; - ir_function_signature *sig = ir->get_callee(); + ir_function_signature *sig = ir->callee; function_entry *entry = get_function_signature(sig); int i; @@ -2445,8 +2621,8 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) ir_rvalue *param_rval = (ir_rvalue *)iter.get(); ir_variable *param = (ir_variable *)sig_iter.get(); - if (param->mode == ir_var_in || - param->mode == ir_var_inout) { + if (param->mode == ir_var_function_in || + param->mode == ir_var_function_inout) { variable_storage *storage = find_variable_storage(param); assert(storage); @@ -2481,8 +2657,8 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) ir_rvalue *param_rval = (ir_rvalue *)iter.get(); ir_variable *param = (ir_variable *)sig_iter.get(); - if (param->mode == ir_var_out || - param->mode == ir_var_inout) { + if (param->mode == ir_var_function_out || + param->mode == ir_var_function_inout) { variable_storage *storage = find_variable_storage(param); assert(storage); @@ -2514,10 +2690,18 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) void glsl_to_tgsi_visitor::visit(ir_texture *ir) { - st_src_reg result_src, coord, lod_info, projector, dx, dy, offset; - st_dst_reg result_dst, coord_dst; + st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy, offset, sample_index; + st_dst_reg result_dst, coord_dst, cube_sc_dst; glsl_to_tgsi_instruction *inst = NULL; unsigned opcode = TGSI_OPCODE_NOP; + const glsl_type *sampler_type = ir->sampler->type; + bool is_cube_array = false; + + /* if we are a cube array sampler */ + if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && + sampler_type->sampler_array)) { + is_cube_array = true; + } if (ir->coordinate) { ir->coordinate->accept(this); @@ -2529,6 +2713,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) */ coord = get_temp(glsl_type::vec4_type); coord_dst = st_dst_reg(coord); + coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1; emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); } @@ -2540,22 +2725,34 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) /* Storage for our result. Ideally for an assignment we'd be using * the actual storage for the result here, instead. */ - result_src = get_temp(glsl_type::vec4_type); + result_src = get_temp(ir->type); result_dst = st_dst_reg(result_src); switch (ir->op) { case ir_tex: - opcode = TGSI_OPCODE_TEX; + opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX; + if (ir->offset) { + ir->offset->accept(this); + offset = this->result; + } break; case ir_txb: - opcode = TGSI_OPCODE_TXB; + opcode = is_cube_array ? TGSI_OPCODE_TXB2 : TGSI_OPCODE_TXB; ir->lod_info.bias->accept(this); lod_info = this->result; + if (ir->offset) { + ir->offset->accept(this); + offset = this->result; + } break; case ir_txl: - opcode = TGSI_OPCODE_TXL; + opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL; ir->lod_info.lod->accept(this); lod_info = this->result; + if (ir->offset) { + ir->offset->accept(this); + offset = this->result; + } break; case ir_txd: opcode = TGSI_OPCODE_TXD; @@ -2563,6 +2760,10 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) dx = this->result; ir->lod_info.grad.dPdy->accept(this); dy = this->result; + if (ir->offset) { + ir->offset->accept(this); + offset = this->result; + } break; case ir_txs: opcode = TGSI_OPCODE_TXQ; @@ -2574,14 +2775,20 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) ir->lod_info.lod->accept(this); lod_info = this->result; if (ir->offset) { - ir->offset->accept(this); - offset = this->result; + ir->offset->accept(this); + offset = this->result; } break; + case ir_txf_ms: + opcode = TGSI_OPCODE_TXF; + ir->lod_info.sample_index->accept(this); + sample_index = this->result; + break; + case ir_lod: + assert(!"Unexpected ir_lod opcode"); + break; } - const glsl_type *sampler_type = ir->sampler->type; - if (ir->projector) { if (opcode == TGSI_OPCODE_TEX) { /* Slot the projector in as the last component of the coord. */ @@ -2641,19 +2848,32 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) */ ir->shadow_comparitor->accept(this); - /* XXX This will need to be updated for cubemap array samplers. */ - if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && - sampler_type->sampler_array) { - coord_dst.writemask = WRITEMASK_W; - } else { - coord_dst.writemask = WRITEMASK_Z; + if (is_cube_array) { + cube_sc = get_temp(glsl_type::float_type); + cube_sc_dst = st_dst_reg(cube_sc); + cube_sc_dst.writemask = WRITEMASK_X; + emit(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result); + cube_sc_dst.writemask = WRITEMASK_X; + } + else { + if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && + sampler_type->sampler_array) || + sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { + coord_dst.writemask = WRITEMASK_W; + } else { + coord_dst.writemask = WRITEMASK_Z; + } + + emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + coord_dst.writemask = WRITEMASK_XYZW; } - - emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); - coord_dst.writemask = WRITEMASK_XYZW; } - if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || + if (ir->op == ir_txf_ms) { + coord_dst.writemask = WRITEMASK_W; + emit(ir, TGSI_OPCODE_MOV, coord_dst, sample_index); + coord_dst.writemask = WRITEMASK_XYZW; + } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || opcode == TGSI_OPCODE_TXF) { /* TGSI stores LOD or LOD bias in the last channel of the coords. */ coord_dst.writemask = WRITEMASK_W; @@ -2667,7 +2887,11 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) inst = emit(ir, opcode, result_dst, lod_info); else if (opcode == TGSI_OPCODE_TXF) { inst = emit(ir, opcode, result_dst, coord); - } else + } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) { + inst = emit(ir, opcode, result_dst, coord, lod_info); + } else if (opcode == TGSI_OPCODE_TEX2) { + inst = emit(ir, opcode, result_dst, coord, cube_sc); + } else inst = emit(ir, opcode, result_dst, coord); if (ir->shadow_comparitor) @@ -2699,17 +2923,22 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) inst->tex_target = TEXTURE_3D_INDEX; break; case GLSL_SAMPLER_DIM_CUBE: - inst->tex_target = TEXTURE_CUBE_INDEX; + inst->tex_target = (sampler_type->sampler_array) + ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX; break; case GLSL_SAMPLER_DIM_RECT: inst->tex_target = TEXTURE_RECT_INDEX; break; case GLSL_SAMPLER_DIM_BUF: - assert(!"FINISHME: Implement ARB_texture_buffer_object"); + inst->tex_target = TEXTURE_BUFFER_INDEX; break; case GLSL_SAMPLER_DIM_EXTERNAL: inst->tex_target = TEXTURE_EXTERNAL_INDEX; break; + case GLSL_SAMPLER_DIM_MS: + inst->tex_target = (sampler_type->sampler_array) + ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX; + break; default: assert(!"Should not get here."); } @@ -2744,8 +2973,6 @@ glsl_to_tgsi_visitor::visit(ir_return *ir) void glsl_to_tgsi_visitor::visit(ir_discard *ir) { - struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; - if (ir->condition) { ir->condition->accept(this); this->result.negate = ~this->result.negate; @@ -2753,39 +2980,20 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir) } else { emit(ir, TGSI_OPCODE_KILP); } - - fp->UsesKill = GL_TRUE; } void glsl_to_tgsi_visitor::visit(ir_if *ir) { - glsl_to_tgsi_instruction *cond_inst, *if_inst; - glsl_to_tgsi_instruction *prev_inst; - - prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + unsigned if_opcode; + glsl_to_tgsi_instruction *if_inst; ir->condition->accept(this); assert(this->result.file != PROGRAM_UNDEFINED); - if (this->options->EmitCondCodes) { - cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF; - /* See if we actually generated any instruction for generating - * the condition. If not, then cook up a move to a temp so we - * have something to set cond_update on. - */ - if (cond_inst == prev_inst) { - st_src_reg temp = get_temp(glsl_type::bool_type); - cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); - } - cond_inst->cond_update = GL_TRUE; - - if_inst = emit(ir->condition, TGSI_OPCODE_IF); - if_inst->dst.cond_mask = COND_NE; - } else { - if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); - } + if_inst = emit(ir->condition, if_opcode, undef_dst, this->result); this->instructions.push_tail(if_inst); @@ -2803,13 +3011,20 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() { result.file = PROGRAM_UNDEFINED; next_temp = 1; + next_array = 0; next_signature_id = 1; num_immediates = 0; current_function = NULL; num_address_regs = 0; - indirect_addr_temps = false; + samplers_used = 0; indirect_addr_consts = false; + glsl_version = 0; + native_integers = false; mem_ctx = ralloc_context(NULL); + ctx = NULL; + prog = NULL; + shader_program = NULL; + options = NULL; } glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() @@ -2838,8 +3053,6 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) if (is_tex_instruction(inst->op)) { v->samplers_used |= 1 << inst->sampler; - prog->SamplerTargets[inst->sampler] = - (gl_texture_index)inst->tex_target; if (inst->tex_shadow) { prog->ShadowSamplers |= 1 << inst->sampler; } @@ -2847,7 +3060,9 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) } prog->SamplersUsed = v->samplers_used; - _mesa_update_shader_textures_used(prog); + + if (v->shader_program != NULL) + _mesa_update_shader_textures_used(v->shader_program, prog); } static void @@ -2872,13 +3087,15 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, return; } - int loc = _mesa_get_uniform_location(ctx, shader_program, name); - - if (loc == -1) { + unsigned offset; + unsigned index = _mesa_get_uniform_location(ctx, shader_program, name, + &offset); + if (offset == GL_INVALID_INDEX) { fail_link(shader_program, "Couldn't find uniform for initializer %s\n", name); return; } + int loc = _mesa_uniform_merge_location_offset(shader_program, index, offset); for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { ir_constant *element; @@ -2920,89 +3137,6 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, } } -/* - * Scan/rewrite program to remove reads of custom (output) registers. - * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING - * (for vertex shaders). - * In GLSL shaders, varying vars can be read and written. - * On some hardware, trying to read an output register causes trouble. - * So, rewrite the program to use a temporary register in this case. - * - * Based on _mesa_remove_output_reads from programopt.c. - */ -void -glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) -{ - GLuint i; - GLint outputMap[VERT_RESULT_MAX]; - GLint outputTypes[VERT_RESULT_MAX]; - GLuint numVaryingReads = 0; - GLboolean *usedTemps; - GLuint firstTemp = 0; - - usedTemps = new GLboolean[MAX_TEMPS]; - if (!usedTemps) { - return; - } - _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, - usedTemps, MAX_TEMPS); - - assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); - assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); - - for (i = 0; i < VERT_RESULT_MAX; i++) - outputMap[i] = -1; - - /* look for instructions which read from varying vars */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - const GLuint numSrc = num_inst_src_regs(inst->op); - GLuint j; - for (j = 0; j < numSrc; j++) { - if (inst->src[j].file == type) { - /* replace the read with a temp reg */ - const GLuint var = inst->src[j].index; - if (outputMap[var] == -1) { - numVaryingReads++; - outputMap[var] = _mesa_find_free_register(usedTemps, - MAX_TEMPS, - firstTemp); - outputTypes[var] = inst->src[j].type; - firstTemp = outputMap[var] + 1; - } - inst->src[j].file = PROGRAM_TEMPORARY; - inst->src[j].index = outputMap[var]; - } - } - } - - delete [] usedTemps; - - if (numVaryingReads == 0) - return; /* nothing to be done */ - - /* look for instructions which write to the varying vars identified above */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) { - /* change inst to write to the temp reg, instead of the varying */ - inst->dst.file = PROGRAM_TEMPORARY; - inst->dst.index = outputMap[inst->dst.index]; - } - } - - /* insert new MOV instructions at the end */ - for (i = 0; i < VERT_RESULT_MAX; i++) { - if (outputMap[i] >= 0) { - /* MOV VAR[i], TEMP[tmp]; */ - st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]); - st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]); - dst.index = i; - this->emit(NULL, TGSI_OPCODE_MOV, dst, src); - } - } -} - /** * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which * are read from the given src in this instruction @@ -3056,7 +3190,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void) if (!tempWrites) { return; } - memset(tempWrites, 0, sizeof(tempWrites)); + memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS); memset(outputWrites, 0, sizeof(outputWrites)); foreach_iter(exec_list_iterator, iter, this->instructions) { @@ -3082,7 +3216,8 @@ glsl_to_tgsi_visitor::simplify_cmp(void) assert(inst->dst.index < MAX_TEMPS); prevWriteMask = tempWrites[inst->dst.index]; tempWrites[inst->dst.index] |= inst->dst.writemask; - } + } else + continue; /* For a CMP to be considered a conditional write, the destination * register and source register two must be the same. */ @@ -3337,6 +3472,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) break; case TGSI_OPCODE_IF: + case TGSI_OPCODE_UIF: ++level; break; @@ -3415,6 +3551,8 @@ glsl_to_tgsi_visitor::copy_propagate(void) /* If this is a copy, add it to the ACP. */ if (inst->op == TGSI_OPCODE_MOV && inst->dst.file == PROGRAM_TEMPORARY && + !(inst->dst.file == inst->src[0].file && + inst->dst.index == inst->src[0].index) && !inst->dst.reladdr && !inst->saturate && !inst->src[0].reladdr && @@ -3505,18 +3643,23 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) switch (inst->op) { case TGSI_OPCODE_BGNLOOP: case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_CONT: + case TGSI_OPCODE_BRK: /* End of a basic block, clear the write array entirely. - * FIXME: This keeps us from killing dead code when the writes are + * + * This keeps us from killing dead code when the writes are * on either side of a loop, even when the register isn't touched - * inside the loop. + * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit + * dead code of this type, so it shouldn't make a difference as long as + * the dead code elimination pass in the GLSL compiler does its job. */ memset(writes, 0, sizeof(*writes) * this->next_temp * 4); break; case TGSI_OPCODE_ENDIF: case TGSI_OPCODE_ELSE: - /* Promote the recorded level all channels written inside the preceding - * if or else block to the level above the if/else block. + /* Promote the recorded level of all channels written inside the + * preceding if or else block to the level above the if/else block. */ for (int r = 0; r < this->next_temp; r++) { for (int c = 0; c < 4; c++) { @@ -3534,6 +3677,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) break; case TGSI_OPCODE_IF: + case TGSI_OPCODE_UIF: ++level; /* fallthrough to default case to mark the condition as read */ @@ -3708,28 +3852,29 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ v->ctx = original->ctx; v->prog = prog; + v->shader_program = NULL; v->glsl_version = original->glsl_version; v->native_integers = original->native_integers; v->options = original->options; v->next_temp = original->next_temp; v->num_address_regs = original->num_address_regs; v->samplers_used = prog->SamplersUsed = original->samplers_used; - v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); + v->num_immediates = original->num_immediates; /* * Get initial pixel color from the texture. * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; */ - coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); + coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type); src0 = v->get_temp(glsl_type::vec4_type); dst0 = st_dst_reg(src0); inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); inst->sampler = 0; inst->tex_target = TEXTURE_2D_INDEX; - prog->InputsRead |= FRAG_BIT_TEX0; + prog->InputsRead |= VARYING_BIT_TEX0; prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ v->samplers_used |= (1 << 0); @@ -3786,6 +3931,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, * new visitor. */ foreach_iter(exec_list_iterator, iter, original->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + glsl_to_tgsi_instruction *newinst; st_src_reg src_regs[3]; if (inst->dst.file == PROGRAM_OUTPUT) @@ -3794,7 +3940,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, for (int i=0; i<3; i++) { src_regs[i] = inst->src[i]; if (src_regs[i].file == PROGRAM_INPUT && - src_regs[i].index == FRAG_ATTRIB_COL0) + src_regs[i].index == VARYING_SLOT_COL0) { src_regs[i].file = PROGRAM_TEMPORARY; src_regs[i].index = src0.index; @@ -3803,7 +3949,8 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); } - v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + newinst->tex_target = inst->tex_target; } /* Make modifications to fragment program info. */ @@ -3835,25 +3982,26 @@ get_bitmap_visitor(struct st_fragment_program *fp, /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ v->ctx = original->ctx; v->prog = prog; + v->shader_program = NULL; v->glsl_version = original->glsl_version; v->native_integers = original->native_integers; v->options = original->options; v->next_temp = original->next_temp; v->num_address_regs = original->num_address_regs; v->samplers_used = prog->SamplersUsed = original->samplers_used; - v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); + v->num_immediates = original->num_immediates; /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ - coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); + coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type); src0 = v->get_temp(glsl_type::vec4_type); dst0 = st_dst_reg(src0); inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); inst->sampler = samplerIndex; inst->tex_target = TEXTURE_2D_INDEX; - prog->InputsRead |= FRAG_BIT_TEX0; + prog->InputsRead |= VARYING_BIT_TEX0; prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ v->samplers_used |= (1 << samplerIndex); @@ -3867,6 +4015,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, * new visitor. */ foreach_iter(exec_list_iterator, iter, original->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + glsl_to_tgsi_instruction *newinst; st_src_reg src_regs[3]; if (inst->dst.file == PROGRAM_OUTPUT) @@ -3878,7 +4027,8 @@ get_bitmap_visitor(struct st_fragment_program *fp, prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); } - v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + newinst->tex_target = inst->tex_target; } /* Make modifications to fragment program info. */ @@ -3900,6 +4050,7 @@ struct st_translate { struct ureg_program *ureg; struct ureg_dst temps[MAX_TEMPS]; + struct ureg_dst arrays[MAX_ARRAYS]; struct ureg_src *constants; struct ureg_src *immediates; struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; @@ -3908,11 +4059,7 @@ struct st_translate { struct ureg_src samplers[PIPE_MAX_SAMPLERS]; struct ureg_src systemValues[SYSTEM_VALUE_MAX]; - /* Extra info for handling point size clamping in vertex shader */ - struct ureg_dst pointSizeResult; /**< Actual point size output register */ - struct ureg_src pointSizeConst; /**< Point size range constant register */ - GLint pointSizeOutIndex; /**< Temp point size output register */ - GLboolean prevInstWrotePointSize; + unsigned array_sizes[MAX_ARRAYS]; const GLuint *inputMapping; const GLuint *outputMapping; @@ -4024,26 +4171,41 @@ dst_register(struct st_translate *t, gl_register_file file, GLuint index) { + unsigned array; + switch(file) { case PROGRAM_UNDEFINED: return ureg_dst_undef(); case PROGRAM_TEMPORARY: + assert(index >= 0); + assert(index < (int) Elements(t->temps)); + if (ureg_dst_is_undef(t->temps[index])) - t->temps[index] = ureg_DECL_temporary(t->ureg); + t->temps[index] = ureg_DECL_local_temporary(t->ureg); return t->temps[index]; - case PROGRAM_OUTPUT: - if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) - t->prevInstWrotePointSize = GL_TRUE; + case PROGRAM_ARRAY: + array = index >> 16; + + assert(array >= 0); + assert(array < (int) Elements(t->arrays)); + if (ureg_dst_is_undef(t->arrays[array])) + t->arrays[array] = ureg_DECL_array_temporary( + t->ureg, t->array_sizes[array], TRUE); + + return ureg_dst_array_offset(t->arrays[array], + (int)(index & 0xFFFF) - 0x8000); + + case PROGRAM_OUTPUT: if (t->procType == TGSI_PROCESSOR_VERTEX) - assert(index < VERT_RESULT_MAX); + assert(index < VARYING_SLOT_MAX); else if (t->procType == TGSI_PROCESSOR_FRAGMENT) assert(index < FRAG_RESULT_MAX); else - assert(index < GEOM_RESULT_MAX); + assert(index < VARYING_SLOT_MAX); assert(t->outputMapping[index] < Elements(t->outputs)); @@ -4064,20 +4226,16 @@ dst_register(struct st_translate *t, static struct ureg_src src_register(struct st_translate *t, gl_register_file file, - GLuint index) + GLint index, GLint index2D) { switch(file) { case PROGRAM_UNDEFINED: return ureg_src_undef(); case PROGRAM_TEMPORARY: - assert(index >= 0); - assert(index < Elements(t->temps)); - if (ureg_dst_is_undef(t->temps[index])) - t->temps[index] = ureg_DECL_temporary(t->ureg); - return ureg_src(t->temps[index]); + case PROGRAM_ARRAY: + return ureg_src(dst_register(t, file, index)); - case PROGRAM_NAMED_PARAM: case PROGRAM_ENV_PARAM: case PROGRAM_LOCAL_PARAM: case PROGRAM_UNIFORM: @@ -4085,7 +4243,13 @@ src_register(struct st_translate *t, return t->constants[index]; case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: /* ie, immediate */ - if (index < 0) + if (index2D) { + struct ureg_src src; + src = ureg_src_register(TGSI_FILE_CONSTANT, 0); + src.Dimension = 1; + src.DimensionIndex = index2D; + return src; + } else if (index < 0) return ureg_DECL_constant(t->ureg, 0); else return t->constants[index]; @@ -4105,7 +4269,7 @@ src_register(struct st_translate *t, return ureg_src(t->address[index]); case PROGRAM_SYSTEM_VALUE: - assert(index < Elements(t->systemValues)); + assert(index < (int) Elements(t->systemValues)); return t->systemValues[index]; default: @@ -4120,7 +4284,7 @@ src_register(struct st_translate *t, static struct ureg_dst translate_dst(struct st_translate *t, const st_dst_reg *dst_reg, - bool saturate) + bool saturate, bool clamp_color) { struct ureg_dst dst = dst_register(t, dst_reg->file, @@ -4130,9 +4294,32 @@ translate_dst(struct st_translate *t, if (saturate) dst = ureg_saturate(dst); + else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) { + /* Clamp colors for ARB_color_buffer_float. */ + switch (t->procType) { + case TGSI_PROCESSOR_VERTEX: + /* XXX if the geometry shader is present, this must be done there + * instead of here. */ + if (dst_reg->index == VARYING_SLOT_COL0 || + dst_reg->index == VARYING_SLOT_COL1 || + dst_reg->index == VARYING_SLOT_BFC0 || + dst_reg->index == VARYING_SLOT_BFC1) { + dst = ureg_saturate(dst); + } + break; - if (dst_reg->reladdr != NULL) + case TGSI_PROCESSOR_FRAGMENT: + if (dst_reg->index >= FRAG_RESULT_COLOR) { + dst = ureg_saturate(dst); + } + break; + } + } + + if (dst_reg->reladdr != NULL) { + assert(dst_reg->file != PROGRAM_TEMPORARY); dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); + } return dst; } @@ -4143,7 +4330,7 @@ translate_dst(struct st_translate *t, static struct ureg_src translate_src(struct st_translate *t, const st_src_reg *src_reg) { - struct ureg_src src = src_register(t, src_reg->file, src_reg->index); + struct ureg_src src = src_register(t, src_reg->file, src_reg->index, src_reg->index2D); src = ureg_swizzle(src, GET_SWZ(src_reg->swizzle, 0) & 0x3, @@ -4155,26 +4342,8 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg) src = ureg_negate(src); if (src_reg->reladdr != NULL) { - /* Normally ureg_src_indirect() would be used here, but a stupid compiler - * bug in g++ makes ureg_src_indirect (an inline C function) erroneously - * set the bit for src.Negate. So we have to do the operation manually - * here to work around the compiler's problems. */ - /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ - struct ureg_src addr = ureg_src(t->address[0]); - src.Indirect = 1; - src.IndirectFile = addr.File; - src.IndirectIndex = addr.Index; - src.IndirectSwizzle = addr.SwizzleX; - - if (src_reg->file != PROGRAM_INPUT && - src_reg->file != PROGRAM_OUTPUT) { - /* If src_reg->index was negative, it was set to zero in - * src_register(). Reassign it now. But don't do this - * for input/output regs since they get remapped while - * const buffers don't. - */ - src.Index = src_reg->index; - } + assert(src_reg->file != PROGRAM_TEMPORARY); + src = ureg_src_indirect(src, ureg_src(t->address[0])); } return src; @@ -4185,21 +4354,26 @@ translate_tex_offset(struct st_translate *t, const struct tgsi_texture_offset *in_offset) { struct tgsi_texture_offset offset; + struct ureg_src imm_src; assert(in_offset->File == PROGRAM_IMMEDIATE); + imm_src = t->immediates[in_offset->Index]; + offset.File = imm_src.File; + offset.Index = imm_src.Index; + offset.SwizzleX = imm_src.SwizzleX; + offset.SwizzleY = imm_src.SwizzleY; + offset.SwizzleZ = imm_src.SwizzleZ; offset.File = TGSI_FILE_IMMEDIATE; - offset.Index = in_offset->Index; - offset.SwizzleX = in_offset->SwizzleX; - offset.SwizzleY = in_offset->SwizzleY; - offset.SwizzleZ = in_offset->SwizzleZ; + offset.Padding = 0; return offset; } static void compile_tgsi_instruction(struct st_translate *t, - const glsl_to_tgsi_instruction *inst) + const glsl_to_tgsi_instruction *inst, + bool clamp_dst_color_output) { struct ureg_program *ureg = t->ureg; GLuint i; @@ -4209,6 +4383,7 @@ compile_tgsi_instruction(struct st_translate *t, unsigned num_dst; unsigned num_src; + unsigned tex_target; num_dst = num_inst_dst_regs(inst->op); num_src = num_inst_src_regs(inst->op); @@ -4216,7 +4391,8 @@ compile_tgsi_instruction(struct st_translate *t, if (num_dst) dst[0] = translate_dst(t, &inst->dst, - inst->saturate); + inst->saturate, + clamp_dst_color_output); for (i = 0; i < num_src; i++) src[i] = translate_src(t, &inst->src[i]); @@ -4227,6 +4403,7 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_ELSE: case TGSI_OPCODE_ENDLOOP: case TGSI_OPCODE_IF: + case TGSI_OPCODE_UIF: assert(num_dst == 0); ureg_label_insn(ureg, inst->op, @@ -4242,14 +4419,19 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_TXP: case TGSI_OPCODE_TXQ: case TGSI_OPCODE_TXF: + case TGSI_OPCODE_TEX2: + case TGSI_OPCODE_TXB2: + case TGSI_OPCODE_TXL2: src[num_src++] = t->samplers[inst->sampler]; for (i = 0; i < inst->tex_offset_num_offset; i++) { texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); } + tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); + ureg_tex_insn(ureg, inst->op, - dst, num_dst, - translate_texture_target(inst->tex_target, inst->tex_shadow), + dst, num_dst, + tex_target, texoffsets, inst->tex_offset_num_offset, src, num_src); return; @@ -4298,7 +4480,7 @@ emit_wpos_adjustment( struct st_translate *t, struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); - struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; + struct ureg_src wpos_input = t->inputs[t->inputMapping[VARYING_SLOT_POS]]; /* First, apply the coordinate shift: */ if (adjX || adjY[0] || adjY[1]) { @@ -4308,7 +4490,7 @@ emit_wpos_adjustment( struct st_translate *t, * or not, which is determined by testing against the inversion * state variable used below, which will be either +1 or -1. */ - struct ureg_dst adj_temp = ureg_DECL_temporary(ureg); + struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg); ureg_CMP(ureg, adj_temp, ureg_scalar(wpostrans, invert ? 2 : 0), @@ -4349,7 +4531,7 @@ emit_wpos_adjustment( struct st_translate *t, /* Use wpos_temp as position input from here on: */ - t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); + t->inputs[t->inputMapping[VARYING_SLOT_POS]] = ureg_src(wpos_temp); } @@ -4467,21 +4649,21 @@ emit_face_var(struct st_translate *t) { struct ureg_program *ureg = t->ureg; struct ureg_dst face_temp = ureg_DECL_temporary(ureg); - struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; + struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]]; /* MOV_SAT face_temp, input[face] */ face_temp = ureg_saturate(face_temp); ureg_MOV(ureg, face_temp, face_input); /* Use face_temp as face input from here on: */ - t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); + t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp); } static void emit_edgeflags(struct st_translate *t) { struct ureg_program *ureg = t->ureg; - struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; + struct ureg_dst edge_dst = t->outputs[t->outputMapping[VARYING_SLOT_EDGE]]; struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; ureg_MOV(ureg, edge_dst, edge_src); @@ -4518,11 +4700,13 @@ st_translate_program( const ubyte inputSemanticName[], const ubyte inputSemanticIndex[], const GLuint interpMode[], + const GLboolean is_centroid[], GLuint numOutputs, const GLuint outputMapping[], const ubyte outputSemanticName[], const ubyte outputSemanticIndex[], - boolean passthrough_edgeflags) + boolean passthrough_edgeflags, + boolean clamp_color) { struct st_translate *t; unsigned i; @@ -4543,28 +4727,36 @@ st_translate_program( t->inputMapping = inputMapping; t->outputMapping = outputMapping; t->ureg = ureg; - t->pointSizeOutIndex = -1; - t->prevInstWrotePointSize = GL_FALSE; + + if (program->shader_program) { + for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) { + struct gl_uniform_storage *const storage = + &program->shader_program->UniformStorage[i]; + + _mesa_uniform_detach_all_driver_storage(storage); + } + } /* * Declare input attributes. */ if (procType == TGSI_PROCESSOR_FRAGMENT) { for (i = 0; i < numInputs; i++) { - t->inputs[i] = ureg_DECL_fs_input(ureg, - inputSemanticName[i], - inputSemanticIndex[i], - interpMode[i]); + t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg, + inputSemanticName[i], + inputSemanticIndex[i], + interpMode[i], 0, + is_centroid[i]); } - if (proginfo->InputsRead & FRAG_BIT_WPOS) { + if (proginfo->InputsRead & VARYING_BIT_POS) { /* Must do this after setting up t->inputs, and before * emitting constant references, below: */ emit_wpos(st_context(ctx), t, proginfo, ureg); } - if (proginfo->InputsRead & FRAG_BIT_FACE) + if (proginfo->InputsRead & VARYING_BIT_FACE) emit_face_var(t); /* @@ -4621,25 +4813,6 @@ st_translate_program( t->outputs[i] = ureg_DECL_output(ureg, outputSemanticName[i], outputSemanticIndex[i]); - if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { - /* Writing to the point size result register requires special - * handling to implement clamping. - */ - static const gl_state_index pointSizeClampState[STATE_LENGTH] - = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; - /* XXX: note we are modifying the incoming shader here! Need to - * do this before emitting the constant decls below, or this - * will be missed. - */ - unsigned pointSizeClampConst = - _mesa_add_state_reference(proginfo->Parameters, - pointSizeClampState); - struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg); - t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst); - t->pointSizeResult = t->outputs[i]; - t->pointSizeOutIndex = i; - t->outputs[i] = psizregtemp; - } } if (passthrough_edgeflags) emit_edgeflags(t); @@ -4661,28 +4834,41 @@ st_translate_program( if (sysInputs & (1 << i)) { unsigned semName = mesa_sysval_to_semantic[i]; t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); + if (semName == TGSI_SEMANTIC_INSTANCEID || + semName == TGSI_SEMANTIC_VERTEXID) { + /* From Gallium perspective, these system values are always + * integer, and require native integer support. However, if + * native integer is supported on the vertex stage but not the + * pixel stage (e.g, i915g + draw), Mesa will generate IR that + * assumes these system values are floats. To resolve the + * inconsistency, we insert a U2F. + */ + struct st_context *st = st_context(ctx); + struct pipe_screen *pscreen = st->pipe->screen; + assert(procType == TGSI_PROCESSOR_VERTEX); + assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS)); + if (!ctx->Const.NativeIntegers) { + struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg); + ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]); + t->systemValues[i] = ureg_scalar(ureg_src(temp), 0); + } + } numSys++; sysInputs &= ~(1 << i); } } } - if (program->indirect_addr_temps) { - /* If temps are accessed with indirect addressing, declare temporaries - * in sequential order. Else, we declare them on demand elsewhere. - * (Note: the number of temporaries is equal to program->next_temp) - */ - for (i = 0; i < (unsigned)program->next_temp; i++) { - /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ - t->temps[i] = ureg_DECL_temporary(t->ureg); - } - } + /* Copy over array sizes + */ + memcpy(t->array_sizes, program->array_sizes, sizeof(unsigned) * program->next_array); /* Emit constants and uniforms. TGSI uses a single index space for these, * so we put all the translated regs in t->constants. */ if (proginfo->Parameters) { - t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); + t->constants = (struct ureg_src *) + calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0])); if (t->constants == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; @@ -4693,7 +4879,6 @@ st_translate_program( case PROGRAM_ENV_PARAM: case PROGRAM_LOCAL_PARAM: case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: case PROGRAM_UNIFORM: t->constants[i] = ureg_DECL_constant(ureg, i); break; @@ -4718,10 +4903,19 @@ st_translate_program( } } } + + if (program->shader_program) { + unsigned num_ubos = program->shader_program->NumUniformBlocks; + + for (i = 0; i < num_ubos; i++) { + ureg_DECL_constant2D(t->ureg, 0, program->shader_program->UniformBlocks[i].UniformBufferSize / 4, i + 1); + } + } /* Emit immediate values. */ - t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); + t->immediates = (struct ureg_src *) + calloc(program->num_immediates, sizeof(struct ureg_src)); if (t->immediates == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; @@ -4729,11 +4923,13 @@ st_translate_program( i = 0; foreach_iter(exec_list_iterator, iter, program->immediates) { immediate_storage *imm = (immediate_storage *)iter.get(); + assert(i < program->num_immediates); t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); } + assert(i == program->num_immediates); /* texture samplers */ - for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + for (i = 0; i < ctx->Const.FragmentProgram.MaxTextureImageUnits; i++) { if (program->samplers_used & (1 << i)) { t->samplers[i] = ureg_DECL_sampler(ureg, i); } @@ -4743,26 +4939,8 @@ st_translate_program( */ foreach_iter(exec_list_iterator, iter, program->instructions) { set_insn_start(t, ureg_get_instruction_number(ureg)); - compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get()); - - if (t->prevInstWrotePointSize && proginfo->Id) { - /* The previous instruction wrote to the (fake) vertex point size - * result register. Now we need to clamp that value to the min/max - * point size range, putting the result into the real point size - * register. - * Note that we can't do this easily at the end of program due to - * possible early return. - */ - set_insn_start(t, ureg_get_instruction_number(ureg)); - ureg_MAX(t->ureg, - ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), - ureg_src(t->outputs[t->pointSizeOutIndex]), - ureg_swizzle(t->pointSizeConst, 1,1,1,1)); - ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), - ureg_src(t->outputs[t->pointSizeOutIndex]), - ureg_swizzle(t->pointSizeConst, 2,2,2,2)); - } - t->prevInstWrotePointSize = GL_FALSE; + compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get(), + clamp_color); } /* Fix up all emitted labels: @@ -4772,18 +4950,32 @@ st_translate_program( t->insn[t->labels[i].branch_target]); } + if (program->shader_program) { + /* This has to be done last. Any operation the can cause + * prog->ParameterValues to get reallocated (e.g., anything that adds a + * program constant) has to happen before creating this linkage. + */ + for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + if (program->shader_program->_LinkedShaders[i] == NULL) + continue; + + _mesa_associate_uniform_storage(ctx, program->shader_program, + program->shader_program->_LinkedShaders[i]->Program->Parameters); + } + } + out: if (t) { - FREE(t->insn); - FREE(t->labels); - FREE(t->constants); - FREE(t->immediates); + free(t->insn); + free(t->labels); + free(t->constants); + free(t->immediates); if (t->error) { debug_printf("%s: translate error flag set\n", __FUNCTION__); } - FREE(t); + free(t); } return ret; @@ -4797,33 +4989,33 @@ out: static struct gl_program * get_mesa_program(struct gl_context *ctx, struct gl_shader_program *shader_program, - struct gl_shader *shader) + struct gl_shader *shader) { - glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); + glsl_to_tgsi_visitor* v; struct gl_program *prog; - struct pipe_screen * screen = st_context(ctx)->pipe->screen; - unsigned pipe_shader_type; GLenum target; const char *target_string; bool progress; struct gl_shader_compiler_options *options = &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; + struct pipe_screen *pscreen = ctx->st->pipe->screen; + unsigned ptarget; switch (shader->Type) { case GL_VERTEX_SHADER: target = GL_VERTEX_PROGRAM_ARB; + ptarget = PIPE_SHADER_VERTEX; target_string = "vertex"; - pipe_shader_type = PIPE_SHADER_VERTEX; break; case GL_FRAGMENT_SHADER: target = GL_FRAGMENT_PROGRAM_ARB; + ptarget = PIPE_SHADER_FRAGMENT; target_string = "fragment"; - pipe_shader_type = PIPE_SHADER_FRAGMENT; break; case GL_GEOMETRY_SHADER: target = GL_GEOMETRY_PROGRAM_NV; + ptarget = PIPE_SHADER_GEOMETRY; target_string = "geometry"; - pipe_shader_type = PIPE_SHADER_GEOMETRY; break; default: assert(!"should not be reached"); @@ -4836,6 +5028,7 @@ get_mesa_program(struct gl_context *ctx, if (!prog) return NULL; prog->Parameters = _mesa_new_parameter_list(); + v = new glsl_to_tgsi_visitor(); v->ctx = ctx; v->prog = prog; v->shader_program = shader_program; @@ -4843,9 +5036,15 @@ get_mesa_program(struct gl_context *ctx, v->glsl_version = ctx->Const.GLSLVersion; v->native_integers = ctx->Const.NativeIntegers; + v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget, + PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED); + _mesa_generate_parameters_list_for_uniforms(shader_program, shader, prog->Parameters); + /* Remove reads from output registers. */ + lower_output_reads(shader->ir); + /* Emit intermediate IR for main(). */ visit_exec_list(shader->ir, v); @@ -4892,29 +5091,14 @@ get_mesa_program(struct gl_context *ctx, } #endif - if (!screen->get_shader_param(screen, pipe_shader_type, - PIPE_SHADER_CAP_OUTPUT_READ)) { - /* Remove reads to output registers, and to varyings in vertex shaders. */ - v->remove_output_reads(PROGRAM_OUTPUT); - if (target == GL_VERTEX_PROGRAM_ARB) - v->remove_output_reads(PROGRAM_VARYING); - } - /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->simplify_cmp(); v->copy_propagate(); while (v->eliminate_dead_code_advanced()); - /* FIXME: These passes to optimize temporary registers don't work when there - * is indirect addressing of the temporary register space. We need proper - * array support so that we don't have to give up these passes in every - * shader that uses arrays. - */ - if (!v->indirect_addr_temps) { - v->eliminate_dead_code(); - v->merge_registers(); - v->renumber_registers(); - } + v->eliminate_dead_code(); + v->merge_registers(); + v->renumber_registers(); /* Write the END instruction. */ v->emit(NULL, TGSI_OPCODE_END); @@ -5020,45 +5204,62 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) const struct gl_shader_compiler_options *options = &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; + /* If there are forms of indirect addressing that the driver + * cannot handle, perform the lowering pass. + */ + if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput || + options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) { + lower_variable_index_to_cond_assign(ir, + options->EmitNoIndirectInput, + options->EmitNoIndirectOutput, + options->EmitNoIndirectTemp, + options->EmitNoIndirectUniform); + } + + if (ctx->Extensions.ARB_shading_language_packing) { + unsigned lower_inst = LOWER_PACK_SNORM_2x16 | + LOWER_UNPACK_SNORM_2x16 | + LOWER_PACK_UNORM_2x16 | + LOWER_UNPACK_UNORM_2x16 | + LOWER_PACK_SNORM_4x8 | + LOWER_UNPACK_SNORM_4x8 | + LOWER_UNPACK_UNORM_4x8 | + LOWER_PACK_UNORM_4x8 | + LOWER_PACK_HALF_2x16 | + LOWER_UNPACK_HALF_2x16; + + lower_packing_builtins(ir, lower_inst); + } + + do_mat_op_to_vec(ir); + lower_instructions(ir, + MOD_TO_FRACT | + DIV_TO_MUL_RCP | + EXP_TO_EXP2 | + LOG_TO_LOG2 | + (options->EmitNoPow ? POW_TO_EXP2 : 0) | + (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0)); + + lower_ubo_reference(prog->_LinkedShaders[i], ir); + do_vec_index_to_cond_assign(ir); + lower_vector_insert(ir, true); + lower_quadop_vector(ir, false); + lower_noise(ir); + if (options->MaxIfDepth == 0) { + lower_discard(ir); + } + do { progress = false; - /* Lowering */ - do_mat_op_to_vec(ir); - lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 - | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP - | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); - progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; progress = do_common_optimization(ir, true, true, - options->MaxUnrollIterations) + options->MaxUnrollIterations, options) || progress; - progress = lower_quadop_vector(ir, false) || progress; - - if (options->MaxIfDepth == 0) - progress = lower_discard(ir) || progress; - progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress; - if (options->EmitNoNoise) - progress = lower_noise(ir) || progress; - - /* If there are forms of indirect addressing that the driver - * cannot handle, perform the lowering pass. - */ - if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput - || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) - progress = - lower_variable_index_to_cond_assign(ir, - options->EmitNoIndirectInput, - options->EmitNoIndirectOutput, - options->EmitNoIndirectTemp, - options->EmitNoIndirectUniform) - || progress; - - progress = do_vec_index_to_cond_assign(ir) || progress; } while (progress); validate_ir_tree(ir); @@ -5095,4 +5296,28 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) return GL_TRUE; } +void +st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, + const GLuint outputMapping[], + struct pipe_stream_output_info *so) +{ + unsigned i; + struct gl_transform_feedback_info *info = + &glsl_to_tgsi->shader_program->LinkedTransformFeedback; + + for (i = 0; i < info->NumOutputs; i++) { + so->output[i].register_index = + outputMapping[info->Outputs[i].OutputRegister]; + so->output[i].start_component = info->Outputs[i].ComponentOffset; + so->output[i].num_components = info->Outputs[i].NumComponents; + so->output[i].output_buffer = info->Outputs[i].OutputBuffer; + so->output[i].dst_offset = info->Outputs[i].DstOffset; + } + + for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { + so->stride[i] = info->BufferStride[i]; + } + so->num_outputs = info->NumOutputs; +} + } /* extern "C" */