X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fstate_tracker%2Fst_glsl_to_tgsi.cpp;h=6cc655d70cf5f95b79f9ff36df30aef204e7b8e8;hb=20b0daf82de91fd57b7e8d825786789149f6358d;hp=18e8a1db44eb8626095bd8327d1d660ad1830d85;hpb=58a7461e1672935e7d30780a4dd40c00abbc28a5;p=mesa.git diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 18e8a1db44e..6cc655d70cf 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -53,7 +53,6 @@ extern "C" { #include "program/prog_optimize.h" #include "program/prog_print.h" #include "program/program.h" -#include "program/prog_uniform.h" #include "program/prog_parameter.h" #include "program/sampler.h" @@ -79,6 +78,12 @@ extern "C" { (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) +/** + * Maximum number of temporary registers. + * + * It is too big for stack allocated arrays -- it will cause stack overflow on + * Windows and likely Mac OS X. + */ #define MAX_TEMPS 4096 /* will be 4 for GLSL 4.00 */ @@ -1012,29 +1017,6 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) fp->OriginUpperLeft = ir->origin_upper_left; fp->PixelCenterInteger = ir->pixel_center_integer; - - } else if (strcmp(ir->name, "gl_FragDepth") == 0) { - struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; - switch (ir->depth_layout) { - case ir_depth_layout_none: - fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; - break; - case ir_depth_layout_any: - fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; - break; - case ir_depth_layout_greater: - fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; - break; - case ir_depth_layout_less: - fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; - break; - case ir_depth_layout_unchanged: - fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; - break; - default: - assert(0); - break; - } } if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { @@ -2725,6 +2707,9 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) case GLSL_SAMPLER_DIM_BUF: assert(!"FINISHME: Implement ARB_texture_buffer_object"); break; + case GLSL_SAMPLER_DIM_EXTERNAL: + inst->tex_target = TEXTURE_EXTERNAL_INDEX; + break; default: assert(!"Should not get here."); } @@ -2865,55 +2850,6 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) _mesa_update_shader_textures_used(prog); } - -/** - * Check if the given vertex/fragment/shader program is within the - * resource limits of the context (number of texture units, etc). - * If any of those checks fail, record a linker error. - * - * XXX more checks are needed... - */ -static void -check_resources(const struct gl_context *ctx, - struct gl_shader_program *shader_program, - glsl_to_tgsi_visitor *prog, - struct gl_program *proginfo) -{ - switch (proginfo->Target) { - case GL_VERTEX_PROGRAM_ARB: - if (_mesa_bitcount(prog->samplers_used) > - ctx->Const.MaxVertexTextureImageUnits) { - fail_link(shader_program, "Too many vertex shader texture samplers"); - } - if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many vertex shader constants"); - } - break; - case MESA_GEOMETRY_PROGRAM: - if (_mesa_bitcount(prog->samplers_used) > - ctx->Const.MaxGeometryTextureImageUnits) { - fail_link(shader_program, "Too many geometry shader texture samplers"); - } - if (proginfo->Parameters->NumParameters > - MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { - fail_link(shader_program, "Too many geometry shader constants"); - } - break; - case GL_FRAGMENT_PROGRAM_ARB: - if (_mesa_bitcount(prog->samplers_used) > - ctx->Const.MaxTextureImageUnits) { - fail_link(shader_program, "Too many fragment shader texture samplers"); - } - if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many fragment shader constants"); - } - break; - default: - _mesa_problem(ctx, "unexpected program type in check_resources()"); - } -} - - static void set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, struct gl_shader_program *shader_program, @@ -2975,12 +2911,12 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, element_type->matrix_columns, element_type->vector_elements, loc, 1, GL_FALSE, (GLfloat *)values); - loc += element_type->matrix_columns; } else { _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, values, element_type->gl_type); - loc += type_size(element_type); } + + loc++; } } @@ -3001,9 +2937,13 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) GLint outputMap[VERT_RESULT_MAX]; GLint outputTypes[VERT_RESULT_MAX]; GLuint numVaryingReads = 0; - GLboolean usedTemps[MAX_TEMPS]; + GLboolean *usedTemps; GLuint firstTemp = 0; + usedTemps = new GLboolean[MAX_TEMPS]; + if (!usedTemps) { + return; + } _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, usedTemps, MAX_TEMPS); @@ -3036,6 +2976,8 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) } } + delete [] usedTemps; + if (numVaryingReads == 0) return; /* nothing to be done */ @@ -3107,9 +3049,13 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src) void glsl_to_tgsi_visitor::simplify_cmp(void) { - unsigned tempWrites[MAX_TEMPS]; + unsigned *tempWrites; unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; + tempWrites = new unsigned[MAX_TEMPS]; + if (!tempWrites) { + return; + } memset(tempWrites, 0, sizeof(tempWrites)); memset(outputWrites, 0, sizeof(outputWrites)); @@ -3125,7 +3071,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void) inst->op == TGSI_OPCODE_END || inst->op == TGSI_OPCODE_ENDSUB || inst->op == TGSI_OPCODE_RET) { - return; + break; } if (inst->dst.file == PROGRAM_OUTPUT) { @@ -3150,6 +3096,8 @@ glsl_to_tgsi_visitor::simplify_cmp(void) inst->src[0] = inst->src[1]; } } + + delete [] tempWrites; } /* Replaces all references to a temporary register index with another index. */ @@ -3566,25 +3514,23 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) break; case TGSI_OPCODE_ENDIF: - --level; - break; - case TGSI_OPCODE_ELSE: - /* Clear all channels written inside the preceding if block from the - * write array, but leave those that were not touched. - * - * FIXME: This destroys opportunities to remove dead code inside of - * IF blocks that are followed by an ELSE block. + /* Promote the recorded level all channels written inside the preceding + * if or else block to the level above the if/else block. */ for (int r = 0; r < this->next_temp; r++) { for (int c = 0; c < 4; c++) { if (!writes[4 * r + c]) continue; - if (write_level[4 * r + c] >= level) - writes[4 * r + c] = NULL; + if (write_level[4 * r + c] == level) + write_level[4 * r + c] = level-1; } } + + if(inst->op == TGSI_OPCODE_ENDIF) + --level; + break; case TGSI_OPCODE_IF: @@ -3783,7 +3729,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, inst->sampler = 0; inst->tex_target = TEXTURE_2D_INDEX; - prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->InputsRead |= FRAG_BIT_TEX0; prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ v->samplers_used |= (1 << 0); @@ -3854,7 +3800,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, src_regs[i].index = src0.index; } else if (src_regs[i].file == PROGRAM_INPUT) - prog->InputsRead |= (1 << src_regs[i].index); + prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); } v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); @@ -3907,7 +3853,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, inst->sampler = samplerIndex; inst->tex_target = TEXTURE_2D_INDEX; - prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->InputsRead |= FRAG_BIT_TEX0; prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ v->samplers_used |= (1 << samplerIndex); @@ -3929,7 +3875,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, for (int i=0; i<3; i++) { src_regs[i] = inst->src[i]; if (src_regs[i].file == PROGRAM_INPUT) - prog->InputsRead |= (1 << src_regs[i].index); + prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); } v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); @@ -3995,6 +3941,7 @@ struct st_translate { /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { TGSI_SEMANTIC_FACE, + TGSI_SEMANTIC_VERTEXID, TGSI_SEMANTIC_INSTANCEID }; @@ -4322,37 +4269,15 @@ compile_tgsi_instruction(struct st_translate *t, } /** - * Emit the TGSI instructions to adjust the WPOS pixel center convention - * Basically, add (adjX, adjY) to the fragment position. - */ -static void -emit_adjusted_wpos(struct st_translate *t, - const struct gl_program *program, - float adjX, float adjY) -{ - struct ureg_program *ureg = t->ureg; - struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); - struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; - - /* Note that we bias X and Y and pass Z and W through unchanged. - * The shader might also use gl_FragCoord.w and .z. - */ - ureg_ADD(ureg, wpos_temp, wpos_input, - ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f)); - - t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); -} - - -/** - * Emit the TGSI instructions for inverting the WPOS y coordinate. + * Emit the TGSI instructions for inverting and adjusting WPOS. * This code is unavoidable because it also depends on whether * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). */ static void -emit_wpos_inversion(struct st_translate *t, - const struct gl_program *program, - bool invert) +emit_wpos_adjustment( struct st_translate *t, + const struct gl_program *program, + boolean invert, + GLfloat adjX, GLfloat adjY[2]) { struct ureg_program *ureg = t->ureg; @@ -4371,35 +4296,55 @@ emit_wpos_inversion(struct st_translate *t, unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, wposTransformState); - struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst); - struct ureg_dst wpos_temp; + struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); + struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; - /* MOV wpos_temp, input[wpos] - */ - if (wpos_input.File == TGSI_FILE_TEMPORARY) - wpos_temp = ureg_dst(wpos_input); - else { - wpos_temp = ureg_DECL_temporary(ureg); - ureg_MOV(ureg, wpos_temp, wpos_input); + /* First, apply the coordinate shift: */ + if (adjX || adjY[0] || adjY[1]) { + if (adjY[0] != adjY[1]) { + /* Adjust the y coordinate by adjY[1] or adjY[0] respectively + * depending on whether inversion is actually going to be applied + * or not, which is determined by testing against the inversion + * state variable used below, which will be either +1 or -1. + */ + struct ureg_dst adj_temp = ureg_DECL_temporary(ureg); + + ureg_CMP(ureg, adj_temp, + ureg_scalar(wpostrans, invert ? 2 : 0), + ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), + ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); + ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); + } else { + ureg_ADD(ureg, wpos_temp, wpos_input, + ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); + } + wpos_input = ureg_src(wpos_temp); + } else { + /* MOV wpos_temp, input[wpos] + */ + ureg_MOV( ureg, wpos_temp, wpos_input ); } + /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be + * inversion/identity, or the other way around if we're drawing to an FBO. + */ if (invert) { /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy */ - ureg_MAD(ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), - wpos_input, - ureg_scalar(wpostrans, 0), - ureg_scalar(wpostrans, 1)); + ureg_MAD( ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), + wpos_input, + ureg_scalar(wpostrans, 0), + ureg_scalar(wpostrans, 1)); } else { /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww */ - ureg_MAD(ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), - wpos_input, - ureg_scalar(wpostrans, 2), - ureg_scalar(wpostrans, 3)); + ureg_MAD( ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), + wpos_input, + ureg_scalar(wpostrans, 2), + ureg_scalar(wpostrans, 3)); } /* Use wpos_temp as position input from here on: @@ -4420,8 +4365,37 @@ emit_wpos(struct st_context *st, const struct gl_fragment_program *fp = (const struct gl_fragment_program *) program; struct pipe_screen *pscreen = st->pipe->screen; + GLfloat adjX = 0.0f; + GLfloat adjY[2] = { 0.0f, 0.0f }; boolean invert = FALSE; + /* Query the pixel center conventions supported by the pipe driver and set + * adjX, adjY to help out if it cannot handle the requested one internally. + * + * The bias of the y-coordinate depends on whether y-inversion takes place + * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are + * drawing to an FBO (causes additional inversion), and whether the the pipe + * driver origin and the requested origin differ (the latter condition is + * stored in the 'invert' variable). + * + * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): + * + * center shift only: + * i -> h: +0.5 + * h -> i: -0.5 + * + * inversion only: + * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 + * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 + * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 + * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 + * + * inversion and center shift: + * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 + * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 + * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 + * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 + */ if (fp->OriginUpperLeft) { /* Fragment shader wants origin in upper-left */ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { @@ -4449,12 +4423,17 @@ emit_wpos(struct st_context *st, if (fp->PixelCenterInteger) { /* Fragment shader wants pixel center integer */ - if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { /* the driver supports pixel center integer */ + adjY[1] = 1.0f; ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); - else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) + } + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { /* the driver supports pixel center half integer, need to bias X,Y */ - emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f); + adjX = -0.5f; + adjY[0] = -0.5f; + adjY[1] = 0.5f; + } else assert(0); } @@ -4465,8 +4444,8 @@ emit_wpos(struct st_context *st, } else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { /* the driver supports pixel center integer, need to bias X,Y */ + adjX = adjY[0] = adjY[1] = 0.5f; ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); - emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f); } else assert(0); @@ -4474,7 +4453,7 @@ emit_wpos(struct st_context *st, /* we invert after adjustment so that we avoid the MOV to temporary, * and reuse the adjustment ADD instead */ - emit_wpos_inversion(t, program, invert); + emit_wpos_adjustment(t, program, invert, adjX, adjY); } /** @@ -4545,14 +4524,19 @@ st_translate_program( const ubyte outputSemanticIndex[], boolean passthrough_edgeflags) { - struct st_translate translate, *t; + struct st_translate *t; unsigned i; enum pipe_error ret = PIPE_OK; assert(numInputs <= Elements(t->inputs)); assert(numOutputs <= Elements(t->outputs)); - t = &translate; + t = CALLOC_STRUCT(st_translate); + if (!t) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + memset(t, 0, sizeof *t); t->procType = procType; @@ -4607,7 +4591,8 @@ st_translate_program( break; default: assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); - return PIPE_ERROR_BAD_INPUT; + ret = PIPE_ERROR_BAD_INPUT; + goto out; } } } @@ -4788,13 +4773,17 @@ st_translate_program( } out: - FREE(t->insn); - FREE(t->labels); - FREE(t->constants); - FREE(t->immediates); + if (t) { + FREE(t->insn); + FREE(t->labels); + FREE(t->constants); + FREE(t->immediates); + + if (t->error) { + debug_printf("%s: translate error flag set\n", __FUNCTION__); + } - if (t->error) { - debug_printf("%s: translate error flag set\n", __FUNCTION__); + FREE(t); } return ret; @@ -4812,6 +4801,8 @@ get_mesa_program(struct gl_context *ctx, { glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); struct gl_program *prog; + struct pipe_screen * screen = st_context(ctx)->pipe->screen; + unsigned pipe_shader_type; GLenum target; const char *target_string; bool progress; @@ -4822,14 +4813,17 @@ get_mesa_program(struct gl_context *ctx, case GL_VERTEX_SHADER: target = GL_VERTEX_PROGRAM_ARB; target_string = "vertex"; + pipe_shader_type = PIPE_SHADER_VERTEX; break; case GL_FRAGMENT_SHADER: target = GL_FRAGMENT_PROGRAM_ARB; target_string = "fragment"; + pipe_shader_type = PIPE_SHADER_FRAGMENT; break; case GL_GEOMETRY_SHADER: target = GL_GEOMETRY_PROGRAM_NV; target_string = "geometry"; + pipe_shader_type = PIPE_SHADER_GEOMETRY; break; default: assert(!"should not be reached"); @@ -4898,10 +4892,13 @@ get_mesa_program(struct gl_context *ctx, } #endif - /* Remove reads to output registers, and to varyings in vertex shaders. */ - v->remove_output_reads(PROGRAM_OUTPUT); - if (target == GL_VERTEX_PROGRAM_ARB) - v->remove_output_reads(PROGRAM_VARYING); + if (!screen->get_shader_param(screen, pipe_shader_type, + PIPE_SHADER_CAP_OUTPUT_READ)) { + /* Remove reads to output registers, and to varyings in vertex shaders. */ + v->remove_output_reads(PROGRAM_OUTPUT); + if (target == GL_VERTEX_PROGRAM_ARB) + v->remove_output_reads(PROGRAM_VARYING); + } /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->simplify_cmp(); @@ -4929,18 +4926,26 @@ get_mesa_program(struct gl_context *ctx, _mesa_print_ir(shader->ir, NULL); printf("\n"); printf("\n"); + fflush(stdout); } prog->Instructions = NULL; prog->NumInstructions = 0; - do_set_program_inouts(shader->ir, prog); + do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER); count_resources(v, prog); - check_resources(ctx, shader_program, v, prog); - _mesa_reference_program(ctx, &shader->Program, prog); + /* This has to be done last. Any operation the can cause + * prog->ParameterValues to get reallocated (e.g., anything that adds a + * program constant) has to happen before creating this linkage. + */ + _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); + if (!shader_program->LinkStatus) { + return NULL; + } + struct st_vertex_program *stvp; struct st_fragment_program *stfp; struct st_geometry_program *stgp; @@ -5026,7 +5031,9 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; - progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; + progress = do_common_optimization(ir, true, true, + options->MaxUnrollIterations) + || progress; progress = lower_quadop_vector(ir, false) || progress;