X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_shader.cpp;h=951e6b250de95ed246a1a56d6cfade663183856c;hb=ed65e6ef49e17e9cae93a8f98e2968346de2bc6e;hp=d9545685b1b242760ca28a8a44f40d098ead5689;hpb=a5038427c3624e559f954124d77304f9ae9b884c;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index d9545685b1b..951e6b250de 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -26,143 +26,8 @@ #include "brw_eu.h" #include "brw_fs.h" #include "brw_nir.h" -#include "glsl/glsl_parser_extras.h" -#include "main/shaderobj.h" +#include "brw_vec4_tes.h" #include "main/uniforms.h" -#include "util/debug.h" - -static void -shader_debug_log_mesa(void *data, const char *fmt, ...) -{ - struct brw_context *brw = (struct brw_context *)data; - va_list args; - - va_start(args, fmt); - GLuint msg_id = 0; - _mesa_gl_vdebug(&brw->ctx, &msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_OTHER, - MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args); - va_end(args); -} - -static void -shader_perf_log_mesa(void *data, const char *fmt, ...) -{ - struct brw_context *brw = (struct brw_context *)data; - - va_list args; - va_start(args, fmt); - - if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { - va_list args_copy; - va_copy(args_copy, args); - vfprintf(stderr, fmt, args_copy); - va_end(args_copy); - } - - if (brw->perf_debug) { - GLuint msg_id = 0; - _mesa_gl_vdebug(&brw->ctx, &msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_PERFORMANCE, - MESA_DEBUG_SEVERITY_MEDIUM, fmt, args); - } - va_end(args); -} - -struct brw_compiler * -brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) -{ - struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler); - - compiler->devinfo = devinfo; - compiler->shader_debug_log = shader_debug_log_mesa; - compiler->shader_perf_log = shader_perf_log_mesa; - - brw_fs_alloc_reg_sets(compiler); - brw_vec4_alloc_reg_set(compiler); - - compiler->scalar_stage[MESA_SHADER_VERTEX] = - devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); - compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = true; - compiler->scalar_stage[MESA_SHADER_GEOMETRY] = - devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false); - compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; - compiler->scalar_stage[MESA_SHADER_COMPUTE] = true; - - nir_shader_compiler_options *nir_options = - rzalloc(compiler, nir_shader_compiler_options); - nir_options->native_integers = true; - /* In order to help allow for better CSE at the NIR level we tell NIR - * to split all ffma instructions during opt_algebraic and we then - * re-combine them as a later step. - */ - nir_options->lower_ffma = true; - nir_options->lower_sub = true; - /* In the vec4 backend, our dpN instruction replicates its result to all - * the components of a vec4. We would like NIR to give us replicated fdot - * instructions because it can optimize better for us. - * - * For the FS backend, it should be lowered away by the scalarizing pass so - * we should never see fdot anyway. - */ - nir_options->fdot_replicates = true; - - /* We want the GLSL compiler to emit code that uses condition codes */ - for (int i = 0; i < MESA_SHADER_STAGES; i++) { - compiler->glsl_compiler_options[i].MaxUnrollIterations = 32; - compiler->glsl_compiler_options[i].MaxIfDepth = - devinfo->gen < 6 ? 16 : UINT_MAX; - - compiler->glsl_compiler_options[i].EmitCondCodes = true; - compiler->glsl_compiler_options[i].EmitNoNoise = true; - compiler->glsl_compiler_options[i].EmitNoMainReturn = true; - compiler->glsl_compiler_options[i].EmitNoIndirectInput = true; - compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false; - compiler->glsl_compiler_options[i].LowerClipDistance = true; - - bool is_scalar = compiler->scalar_stage[i]; - - compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar; - compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar; - compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar; - - /* !ARB_gpu_shader5 */ - if (devinfo->gen < 7) - compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true; - - compiler->glsl_compiler_options[i].NirOptions = nir_options; - - compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true; - } - - compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false; - - if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) - compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false; - - compiler->glsl_compiler_options[MESA_SHADER_COMPUTE] - .LowerShaderSharedVariables = true; - - return compiler; -} - -extern "C" struct gl_shader * -brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) -{ - struct brw_shader *shader; - - shader = rzalloc(NULL, struct brw_shader); - if (shader) { - shader->base.Type = type; - shader->base.Stage = _mesa_shader_enum_to_shader_stage(type); - shader->base.Name = name; - _mesa_init_shader(ctx, &shader->base); - } - - return &shader->base; -} extern "C" void brw_mark_surface_used(struct brw_stage_prog_data *prog_data, @@ -198,10 +63,12 @@ brw_type_for_base_type(const struct glsl_type *type) return BRW_REGISTER_TYPE_UD; case GLSL_TYPE_IMAGE: return BRW_REGISTER_TYPE_UD; + case GLSL_TYPE_DOUBLE: + return BRW_REGISTER_TYPE_DF; case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: - case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } @@ -280,22 +147,28 @@ brw_texture_offset(int *offsets, unsigned num_components) } const char * -brw_instruction_name(enum opcode op) +brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) { switch (op) { case BRW_OPCODE_ILLEGAL ... BRW_OPCODE_NOP: - assert(opcode_descs[op].name); - return opcode_descs[op].name; + /* The DO instruction doesn't exist on Gen6+, but we use it to mark the + * start of a loop in the IR. + */ + if (devinfo->gen >= 6 && op == BRW_OPCODE_DO) + return "do"; + + assert(brw_opcode_desc(devinfo, op)->name); + return brw_opcode_desc(devinfo, op)->name; case FS_OPCODE_FB_WRITE: return "fb_write"; case FS_OPCODE_FB_WRITE_LOGICAL: return "fb_write_logical"; - case FS_OPCODE_PACK_STENCIL_REF: - return "pack_stencil_ref"; - case FS_OPCODE_BLORP_FB_WRITE: - return "blorp_fb_write"; case FS_OPCODE_REP_FB_WRITE: return "rep_fb_write"; + case FS_OPCODE_FB_READ: + return "fb_read"; + case FS_OPCODE_FB_READ_LOGICAL: + return "fb_read_logical"; case SHADER_OPCODE_RCP: return "rcp"; @@ -330,10 +203,14 @@ brw_instruction_name(enum opcode op) return "txf"; case SHADER_OPCODE_TXF_LOGICAL: return "txf_logical"; + case SHADER_OPCODE_TXF_LZ: + return "txf_lz"; case SHADER_OPCODE_TXL: return "txl"; case SHADER_OPCODE_TXL_LOGICAL: return "txl_logical"; + case SHADER_OPCODE_TXL_LZ: + return "txl_lz"; case SHADER_OPCODE_TXS: return "txs"; case SHADER_OPCODE_TXS_LOGICAL: @@ -372,6 +249,8 @@ brw_instruction_name(enum opcode op) return "tg4_offset_logical"; case SHADER_OPCODE_SAMPLEINFO: return "sampleinfo"; + case SHADER_OPCODE_SAMPLEINFO_LOGICAL: + return "sampleinfo_logical"; case SHADER_OPCODE_SHADER_TIME_ADD: return "shader_time_add"; @@ -405,6 +284,8 @@ brw_instruction_name(enum opcode op) case SHADER_OPCODE_LOAD_PAYLOAD: return "load_payload"; + case FS_OPCODE_PACK: + return "pack"; case SHADER_OPCODE_GEN4_SCRATCH_READ: return "gen4_scratch_read"; @@ -463,10 +344,12 @@ brw_instruction_name(enum opcode op) return "uniform_pull_const"; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: return "uniform_pull_const_gen7"; - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: - return "varying_pull_const"; + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: + return "varying_pull_const_gen4"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: return "varying_pull_const_gen7"; + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: + return "varying_pull_const_logical"; case FS_OPCODE_MOV_DISPATCH_TO_FLAGS: return "mov_dispatch_to_flags"; @@ -488,8 +371,6 @@ brw_instruction_name(enum opcode op) case FS_OPCODE_PLACEHOLDER_HALT: return "placeholder_halt"; - case FS_OPCODE_INTERPOLATE_AT_CENTROID: - return "interp_centroid"; case FS_OPCODE_INTERPOLATE_AT_SAMPLE: return "interp_sample"; case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: @@ -549,6 +430,33 @@ brw_instruction_name(enum opcode op) return "mulh"; case SHADER_OPCODE_MOV_INDIRECT: return "mov_indirect"; + + case VEC4_OPCODE_URB_READ: + return "urb_read"; + case TCS_OPCODE_GET_INSTANCE_ID: + return "tcs_get_instance_id"; + case TCS_OPCODE_URB_WRITE: + return "tcs_urb_write"; + case TCS_OPCODE_SET_INPUT_URB_OFFSETS: + return "tcs_set_input_urb_offsets"; + case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS: + return "tcs_set_output_urb_offsets"; + case TCS_OPCODE_GET_PRIMITIVE_ID: + return "tcs_get_primitive_id"; + case TCS_OPCODE_CREATE_BARRIER_HEADER: + return "tcs_create_barrier_header"; + case TCS_OPCODE_SRC0_010_IS_ZERO: + return "tcs_src0<0,1,0>_is_zero"; + case TCS_OPCODE_RELEASE_INPUT: + return "tcs_release_input"; + case TCS_OPCODE_THREAD_END: + return "tcs_thread_end"; + case TES_OPCODE_CREATE_INPUT_READ_HEADER: + return "tes_create_input_read_header"; + case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: + return "tes_add_indirect_urb_offset"; + case TES_OPCODE_GET_PRIMITIVE_ID: + return "tes_get_primitive_id"; } unreachable("not reached"); @@ -561,7 +469,19 @@ brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg) unsigned ud; int d; float f; - } imm = { reg->ud }, sat_imm = { 0 }; + double df; + } imm, sat_imm = { 0 }; + + const unsigned size = type_sz(type); + + /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise + * irrelevant, so just check the size of the type and copy from/to an + * appropriately sized field. + */ + if (size < 8) + imm.ud = reg->ud; + else + imm.df = reg->df; switch (type) { case BRW_REGISTER_TYPE_UD: @@ -575,6 +495,9 @@ brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg) case BRW_REGISTER_TYPE_F: sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f); break; + case BRW_REGISTER_TYPE_DF: + sat_imm.df = CLAMP(imm.df, 0.0, 1.0); + break; case BRW_REGISTER_TYPE_UB: case BRW_REGISTER_TYPE_B: unreachable("no UB/B immediates"); @@ -582,14 +505,20 @@ brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg) case BRW_REGISTER_TYPE_UV: case BRW_REGISTER_TYPE_VF: unreachable("unimplemented: saturate vector immediate"); - case BRW_REGISTER_TYPE_DF: case BRW_REGISTER_TYPE_HF: - unreachable("unimplemented: saturate DF/HF immediate"); + unreachable("unimplemented: saturate HF immediate"); } - if (imm.ud != sat_imm.ud) { - reg->ud = sat_imm.ud; - return true; + if (size < 8) { + if (imm.ud != sat_imm.ud) { + reg->ud = sat_imm.ud; + return true; + } + } else { + if (imm.df != sat_imm.df) { + reg->df = sat_imm.df; + return true; + } } return false; } @@ -612,6 +541,9 @@ brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg) case BRW_REGISTER_TYPE_VF: reg->ud ^= 0x80808080; return true; + case BRW_REGISTER_TYPE_DF: + reg->df = -reg->df; + return true; case BRW_REGISTER_TYPE_UB: case BRW_REGISTER_TYPE_B: unreachable("no UB/B immediates"); @@ -621,9 +553,8 @@ brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg) case BRW_REGISTER_TYPE_UQ: case BRW_REGISTER_TYPE_Q: assert(!"unimplemented: negate UQ/Q immediate"); - case BRW_REGISTER_TYPE_DF: case BRW_REGISTER_TYPE_HF: - assert(!"unimplemented: negate DF/HF immediate"); + assert(!"unimplemented: negate HF immediate"); } return false; @@ -642,6 +573,9 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) case BRW_REGISTER_TYPE_F: reg->f = fabsf(reg->f); return true; + case BRW_REGISTER_TYPE_DF: + reg->df = fabs(reg->df); + return true; case BRW_REGISTER_TYPE_VF: reg->ud &= ~0x80808080; return true; @@ -660,14 +594,60 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) assert(!"unimplemented: abs V immediate"); case BRW_REGISTER_TYPE_Q: assert(!"unimplemented: abs Q immediate"); - case BRW_REGISTER_TYPE_DF: case BRW_REGISTER_TYPE_HF: - assert(!"unimplemented: abs DF/HF immediate"); + assert(!"unimplemented: abs HF immediate"); } return false; } +unsigned +tesslevel_outer_components(GLenum tes_primitive_mode) +{ + switch (tes_primitive_mode) { + case GL_QUADS: + return 4; + case GL_TRIANGLES: + return 3; + case GL_ISOLINES: + return 2; + default: + unreachable("Bogus tessellation domain"); + } + return 0; +} + +unsigned +tesslevel_inner_components(GLenum tes_primitive_mode) +{ + switch (tes_primitive_mode) { + case GL_QUADS: + return 2; + case GL_TRIANGLES: + return 1; + case GL_ISOLINES: + return 0; + default: + unreachable("Bogus tessellation domain"); + } + return 0; +} + +/** + * Given a normal .xyzw writemask, convert it to a writemask for a vector + * that's stored backwards, i.e. .wzyx. + */ +unsigned +writemask_for_backwards_vector(unsigned mask) +{ + unsigned new_mask = 0; + + for (int i = 0; i < 4; i++) + new_mask |= ((mask >> i) & 1) << (3 - i); + + return new_mask; +} + backend_shader::backend_shader(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, @@ -685,13 +665,14 @@ backend_shader::backend_shader(const struct brw_compiler *compiler, debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); stage_name = _mesa_shader_stage_to_string(stage); stage_abbrev = _mesa_shader_stage_to_abbrev(stage); + is_passthrough_shader = + nir->info.name && strcmp(nir->info.name, "passthrough") == 0; } bool backend_reg::equals(const backend_reg &r) const { - return memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0 && - reg_offset == r.reg_offset; + return brw_regs_equal(this, &r) && offset == r.offset; } bool @@ -700,7 +681,17 @@ backend_reg::is_zero() const if (file != IMM) return false; - return d == 0; + switch (type) { + case BRW_REGISTER_TYPE_F: + return f == 0; + case BRW_REGISTER_TYPE_DF: + return df == 0; + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_UD: + return d == 0; + default: + return false; + } } bool @@ -709,9 +700,17 @@ backend_reg::is_one() const if (file != IMM) return false; - return type == BRW_REGISTER_TYPE_F - ? f == 1.0 - : d == 1; + switch (type) { + case BRW_REGISTER_TYPE_F: + return f == 1.0f; + case BRW_REGISTER_TYPE_DF: + return df == 1.0; + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_UD: + return d == 1; + default: + return false; + } } bool @@ -723,6 +722,8 @@ backend_reg::is_negative_one() const switch (type) { case BRW_REGISTER_TYPE_F: return f == -1.0; + case BRW_REGISTER_TYPE_DF: + return df == -1.0; case BRW_REGISTER_TYPE_D: return d == -1; default: @@ -743,15 +744,6 @@ backend_reg::is_accumulator() const return file == ARF && nr == BRW_ARF_ACCUMULATOR; } -bool -backend_reg::in_range(const backend_reg &r, unsigned n) const -{ - return (file == r.file && - nr == r.nr && - reg_offset >= r.reg_offset && - reg_offset < r.reg_offset + n); -} - bool backend_instruction::is_commutative() const { @@ -776,9 +768,9 @@ backend_instruction::is_commutative() const } bool -backend_instruction::is_3src() const +backend_instruction::is_3src(const struct gen_device_info *devinfo) const { - return ::is_3src(opcode); + return ::is_3src(devinfo, opcode); } bool @@ -788,15 +780,18 @@ backend_instruction::is_tex() const opcode == FS_OPCODE_TXB || opcode == SHADER_OPCODE_TXD || opcode == SHADER_OPCODE_TXF || + opcode == SHADER_OPCODE_TXF_LZ || opcode == SHADER_OPCODE_TXF_CMS || opcode == SHADER_OPCODE_TXF_CMS_W || opcode == SHADER_OPCODE_TXF_UMS || opcode == SHADER_OPCODE_TXF_MCS || opcode == SHADER_OPCODE_TXL || + opcode == SHADER_OPCODE_TXL_LZ || opcode == SHADER_OPCODE_TXS || opcode == SHADER_OPCODE_LOD || opcode == SHADER_OPCODE_TG4 || - opcode == SHADER_OPCODE_TG4_OFFSET); + opcode == SHADER_OPCODE_TG4_OFFSET || + opcode == SHADER_OPCODE_SAMPLEINFO); } bool @@ -955,7 +950,7 @@ backend_instruction::reads_accumulator_implicitly() const } bool -backend_instruction::writes_accumulator_implicitly(const struct brw_device_info *devinfo) const +backend_instruction::writes_accumulator_implicitly(const struct gen_device_info *devinfo) const { return writes_accumulator || (devinfo->gen < 6 && @@ -983,7 +978,10 @@ backend_instruction::has_side_effects() const case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: case FS_OPCODE_FB_WRITE: + case FS_OPCODE_FB_WRITE_LOGICAL: case SHADER_OPCODE_BARRIER: + case TCS_OPCODE_URB_WRITE: + case TCS_OPCODE_RELEASE_INPUT: return true; default: return false; @@ -998,6 +996,9 @@ backend_instruction::is_volatile() const case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: case SHADER_OPCODE_TYPED_SURFACE_READ: case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: + case SHADER_OPCODE_URB_READ_SIMD8: + case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: + case VEC4_OPCODE_URB_READ: return true; default: return false; @@ -1022,7 +1023,7 @@ static void adjust_later_block_ips(bblock_t *start_block, int ip_adjustment) { for (bblock_t *block_iter = start_block->next(); - !block_iter->link.is_tail_sentinel(); + block_iter; block_iter = block_iter->next()) { block_iter->start_ip += ip_adjustment; block_iter->end_ip += ip_adjustment; @@ -1032,6 +1033,8 @@ adjust_later_block_ips(bblock_t *start_block, int ip_adjustment) void backend_instruction::insert_after(bblock_t *block, backend_instruction *inst) { + assert(this != inst); + if (!this->is_head_sentinel()) assert(inst_is_in_block(block, this) || !"Instruction not in block"); @@ -1045,6 +1048,8 @@ backend_instruction::insert_after(bblock_t *block, backend_instruction *inst) void backend_instruction::insert_before(bblock_t *block, backend_instruction *inst) { + assert(this != inst); + if (!this->is_tail_sentinel()) assert(inst_is_in_block(block, this) || !"Instruction not in block"); @@ -1130,13 +1135,6 @@ backend_shader::calculate_cfg() cfg = new(mem_ctx) cfg_t(&this->instructions); } -void -backend_shader::invalidate_cfg() -{ - ralloc_free(this->cfg); - this->cfg = NULL; -} - /** * Sets up the starting offsets for the groups of binding table entries * commong to all pipeline stages. @@ -1145,16 +1143,16 @@ backend_shader::invalidate_cfg() * unused but also make sure that addition of small offsets to them will * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES. */ -void +uint32_t brw_assign_common_binding_table_offsets(gl_shader_stage stage, - const struct brw_device_info *devinfo, + const struct gen_device_info *devinfo, const struct gl_shader_program *shader_prog, const struct gl_program *prog, struct brw_stage_prog_data *stage_prog_data, uint32_t next_binding_table_offset) { - const struct gl_shader *shader = NULL; - int num_textures = _mesa_fls(prog->SamplersUsed); + const struct gl_linked_shader *shader = NULL; + int num_textures = util_last_bit(prog->SamplersUsed); if (shader_prog) shader = shader_prog->_LinkedShaders[stage]; @@ -1212,9 +1210,19 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage, stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset; next_binding_table_offset++; - assert(next_binding_table_offset <= BRW_MAX_SURFACES); + /* Plane 0 is just the regular texture section */ + stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start; + + stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset; + next_binding_table_offset += num_textures; + + stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset; + next_binding_table_offset += num_textures; /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */ + + assert(next_binding_table_offset <= BRW_MAX_SURFACES); + return next_binding_table_offset; } static void @@ -1305,13 +1313,23 @@ brw_compile_tes(const struct brw_compiler *compiler, unsigned *final_assembly_size, char **error_str) { - const struct brw_device_info *devinfo = compiler->devinfo; - struct gl_shader *shader = + const struct gen_device_info *devinfo = compiler->devinfo; + struct gl_linked_shader *shader = shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL]; nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); + nir->info.inputs_read = key->inputs_read; + nir->info.patch_inputs_read = key->patch_inputs_read; + + struct brw_vue_map input_vue_map; + brw_compute_tess_vue_map(&input_vue_map, + nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, + nir->info.patch_inputs_read); + nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); + brw_nir_lower_tes_inputs(nir, &input_vue_map); + brw_nir_lower_vue_outputs(nir, is_scalar); nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); brw_compute_vue_map(devinfo, &prog_data->base.vue_map, @@ -1330,11 +1348,6 @@ brw_compile_tes(const struct brw_compiler *compiler, /* URB entry sizes are stored as a multiple of 64 bytes. */ prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; - struct brw_vue_map input_vue_map; - brw_compute_tess_vue_map(&input_vue_map, - nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, - nir->info.patch_inputs_read); - bool need_patch_header = nir->info.system_values_read & (BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_OUTER) | BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_INNER)); @@ -1363,11 +1376,12 @@ brw_compile_tes(const struct brw_compiler *compiler, return NULL; } + prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; fs_generator g(compiler, log_data, mem_ctx, (void *) key, &prog_data->base.base, v.promoted_constants, false, - "TES"); + MESA_SHADER_TESS_EVAL); if (unlikely(INTEL_DEBUG & DEBUG_TES)) { g.enable_debug(ralloc_asprintf(mem_ctx, "%s tessellation evaluation shader %s", @@ -1380,6 +1394,19 @@ brw_compile_tes(const struct brw_compiler *compiler, return g.get_assembly(final_assembly_size); } else { - unreachable("XXX: vec4 tessellation evalation shaders not merged yet."); + brw::vec4_tes_visitor v(compiler, log_data, key, prog_data, + nir, mem_ctx, shader_time_index); + if (!v.run()) { + if (error_str) + *error_str = ralloc_strdup(mem_ctx, v.fail_msg); + return NULL; + } + + if (unlikely(INTEL_DEBUG & DEBUG_TES)) + v.dump_instructions(); + + return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, + &prog_data->base, v.cfg, + final_assembly_size); } }