X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_vec4_tcs.cpp;h=d4a647d029f0bc8f717366b12c2bae8cf863aa39;hb=d0d4a5f43b4dd79bd7bfff7c7deaade10bfebf7c;hp=8f77b59ea0323fd151360fe1a22413053f90dfac;hpb=8151003ade952c3e9d8284fada9237e1311cf173;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index 8f77b59ea03..d4a647d029f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -29,6 +29,8 @@ #include "brw_nir.h" #include "brw_vec4_tcs.h" +#include "brw_fs.h" +#include "common/gen_debug.h" namespace brw { @@ -47,63 +49,13 @@ vec4_tcs_visitor::vec4_tcs_visitor(const struct brw_compiler *compiler, } -void -vec4_tcs_visitor::emit_nir_code() -{ - if (key->program_string_id != 0) { - /* We have a real application-supplied TCS, emit real code. */ - vec4_visitor::emit_nir_code(); - } else { - /* There is no TCS; automatically generate a passthrough shader - * that writes the API-specified default tessellation levels and - * copies VS outputs to TES inputs. - */ - uniforms = 2; - uniform_size[0] = 1; - uniform_size[1] = 1; - - uint64_t varyings = key->outputs_written; - - src_reg vertex_offset(this, glsl_type::uint_type); - emit(MUL(dst_reg(vertex_offset), invocation_id, - brw_imm_ud(prog_data->vue_map.num_per_vertex_slots))); - - while (varyings != 0) { - const int varying = ffsll(varyings) - 1; - - unsigned in_offset = input_vue_map->varying_to_slot[varying]; - unsigned out_offset = prog_data->vue_map.varying_to_slot[varying]; - assert(out_offset >= 2); - - dst_reg val(this, glsl_type::vec4_type); - emit_input_urb_read(val, invocation_id, in_offset, src_reg()); - emit_urb_write(src_reg(val), WRITEMASK_XYZW, out_offset, - vertex_offset); - - varyings &= ~BITFIELD64_BIT(varying); - } - - /* Only write the tessellation factors from invocation 0. - * There's no point in making other threads do redundant work. - */ - emit(CMP(dst_null_d(), invocation_id, brw_imm_ud(0), - BRW_CONDITIONAL_EQ)); - emit(IF(BRW_PREDICATE_NORMAL)); - emit_urb_write(src_reg(UNIFORM, 0, glsl_type::vec4_type), - WRITEMASK_XYZW, 0, src_reg()); - emit_urb_write(src_reg(UNIFORM, 1, glsl_type::vec4_type), - WRITEMASK_XYZW, 1, src_reg()); - emit(BRW_OPCODE_ENDIF); - } -} - void vec4_tcs_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr) { } dst_reg * -vec4_tcs_visitor::make_reg_for_system_value(int location, const glsl_type *type) +vec4_tcs_visitor::make_reg_for_system_value(int location) { return NULL; } @@ -143,9 +95,10 @@ vec4_tcs_visitor::emit_prolog() * HS instance dispatched will only have its bottom half doing real * work, and so we need to disable the upper half: */ - if (nir->info.tcs.vertices_out % 2) { + if (nir->info->tess.tcs_vertices_out % 2) { emit(CMP(dst_null_d(), invocation_id, - brw_imm_ud(nir->info.tcs.vertices_out), BRW_CONDITIONAL_L)); + brw_imm_ud(nir->info->tess.tcs_vertices_out), + BRW_CONDITIONAL_L)); /* Matching ENDIF is in emit_thread_end() */ emit(IF(BRW_PREDICATE_NORMAL)); @@ -159,7 +112,7 @@ vec4_tcs_visitor::emit_thread_end() vec4_instruction *inst; current_annotation = "thread end"; - if (nir->info.tcs.vertices_out % 2) { + if (nir->info->tess.tcs_vertices_out % 2) { emit(BRW_OPCODE_ENDIF); } @@ -184,7 +137,9 @@ vec4_tcs_visitor::emit_thread_end() * we don't have stride in the vec4 world, nor UV immediates in * align16, so we need an opcode to get invocation_id<0,4,0>. */ - emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(), invocation_id); + set_condmod(BRW_CONDITIONAL_Z, + emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(), + invocation_id)); emit(IF(BRW_PREDICATE_NORMAL)); for (unsigned i = 0; i < key->input_vertices; i += 2) { /* If we have an odd number of input vertices, the last will be @@ -213,6 +168,7 @@ void vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst, const src_reg &vertex_index, unsigned base_offset, + unsigned first_component, const src_reg &indirect_offset) { vec4_instruction *inst; @@ -238,13 +194,16 @@ vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst, if (inst->offset == 0 && indirect_offset.file == BAD_FILE) { emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WWWW))); } else { - emit(MOV(dst, src_reg(temp))); + src_reg src = src_reg(temp); + src.swizzle = BRW_SWZ_COMP_INPUT(first_component); + emit(MOV(dst, src)); } } void vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst, unsigned base_offset, + unsigned first_component, const src_reg &indirect_offset) { vec4_instruction *inst; @@ -252,14 +211,20 @@ vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst, /* Set up the message header to reference the proper parts of the URB */ dst_reg header = dst_reg(this, glsl_type::uvec4_type); inst = emit(TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, header, - brw_imm_ud(dst.writemask), indirect_offset); + brw_imm_ud(dst.writemask << first_component), indirect_offset); inst->force_writemask_all = true; - /* Read into a temporary, ignoring writemasking. */ vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, dst, src_reg(header)); read->offset = base_offset; read->mlen = 1; read->base_mrf = -1; + + if (first_component) { + /* Read into a temporary and copy with a swizzle and writemask. */ + read->dst = retype(dst_reg(this, glsl_type::ivec4_type), dst.type); + emit(MOV(dst, swizzle(src_reg(read->dst), + BRW_SWZ_COMP_INPUT(first_component)))); + } } void @@ -277,7 +242,8 @@ vec4_tcs_visitor::emit_urb_write(const src_reg &value, inst = emit(TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, dst_reg(message), brw_imm_ud(writemask), indirect_offset); inst->force_writemask_all = true; - inst = emit(MOV(offset(dst_reg(retype(message, value.type)), 1), value)); + inst = emit(MOV(byte_offset(dst_reg(retype(message, value.type)), REG_SIZE), + value)); inst->force_writemask_all = true; inst = emit(TCS_OPCODE_URB_WRITE, dst_null_f(), message); @@ -286,53 +252,6 @@ vec4_tcs_visitor::emit_urb_write(const src_reg &value, inst->base_mrf = -1; } -static unsigned -tesslevel_outer_components(GLenum tes_primitive_mode) -{ - switch (tes_primitive_mode) { - case GL_QUADS: - return 4; - case GL_TRIANGLES: - return 3; - case GL_ISOLINES: - return 2; - default: - unreachable("Bogus tessellation domain"); - } - return 0; -} - -static unsigned -tesslevel_inner_components(GLenum tes_primitive_mode) -{ - switch (tes_primitive_mode) { - case GL_QUADS: - return 2; - case GL_TRIANGLES: - return 1; - case GL_ISOLINES: - return 0; - default: - unreachable("Bogus tessellation domain"); - } - return 0; -} - -/** - * Given a normal .xyzw writemask, convert it to a writemask for a vector - * that's stored backwards, i.e. .wzyx. - */ -static unsigned -writemask_for_backwards_vector(unsigned mask) -{ - unsigned new_mask = 0; - - for (int i = 0; i < 4; i++) - new_mask |= ((mask >> i) & 1) << (3 - i); - - return new_mask; -} - void vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) { @@ -355,13 +274,40 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]); src_reg vertex_index = - vertex_const ? src_reg(brw_imm_ud(vertex_const->u[0])) + vertex_const ? src_reg(brw_imm_ud(vertex_const->u32[0])) : get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1); - dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); - dst.writemask = brw_writemask_for_size(instr->num_components); + unsigned first_component = nir_intrinsic_component(instr); + if (nir_dest_bit_size(instr->dest) == 64) { + /* We need to emit up to two 32-bit URB reads, then shuffle + * the result into a temporary, then move to the destination + * honoring the writemask + * + * We don't need to divide first_component by 2 because + * emit_input_urb_read takes a 32-bit type. + */ + dst_reg tmp = dst_reg(this, glsl_type::dvec4_type); + dst_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D); + emit_input_urb_read(tmp_d, vertex_index, imm_offset, + first_component, indirect_offset); + if (instr->num_components > 2) { + emit_input_urb_read(byte_offset(tmp_d, REG_SIZE), vertex_index, + imm_offset + 1, 0, indirect_offset); + } + + src_reg tmp_src = retype(src_reg(tmp_d), BRW_REGISTER_TYPE_DF); + dst_reg shuffled = dst_reg(this, glsl_type::dvec4_type); + shuffle_64bit_data(shuffled, tmp_src, false); - emit_input_urb_read(dst, vertex_index, imm_offset, indirect_offset); + dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF); + dst.writemask = brw_writemask_for_size(instr->num_components); + emit(MOV(dst, src_reg(shuffled))); + } else { + dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); + dst.writemask = brw_writemask_for_size(instr->num_components); + emit_input_urb_read(dst, vertex_index, imm_offset, + first_component, indirect_offset); + } break; } case nir_intrinsic_load_input: @@ -370,62 +316,13 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_load_output: case nir_intrinsic_load_per_vertex_output: { src_reg indirect_offset = get_indirect_offset(instr); - unsigned imm_offset = instr->const_index[0];; + unsigned imm_offset = instr->const_index[0]; dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); - if (imm_offset == 0 && indirect_offset.file == BAD_FILE) { - dst.type = BRW_REGISTER_TYPE_F; - - /* This is a read of gl_TessLevelInner[], which lives in the - * Patch URB header. The layout depends on the domain. - */ - switch (key->tes_primitive_mode) { - case GL_QUADS: { - /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */ - dst_reg tmp(this, glsl_type::vec4_type); - emit_output_urb_read(tmp, 0, src_reg()); - emit(MOV(writemask(dst, WRITEMASK_XY), - swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX))); - break; - } - case GL_TRIANGLES: - /* DWord 4; use offset 1 but normal swizzle/writemask. */ - emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, src_reg()); - break; - case GL_ISOLINES: - /* All channels are undefined. */ - return; - default: - unreachable("Bogus tessellation domain"); - } - } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) { - dst.type = BRW_REGISTER_TYPE_F; - - /* This is a read of gl_TessLevelOuter[], which lives in the - * high 4 DWords of the Patch URB header, in reverse order. - */ - switch (key->tes_primitive_mode) { - case GL_QUADS: - dst.writemask = WRITEMASK_XYZW; - break; - case GL_TRIANGLES: - dst.writemask = WRITEMASK_XYZ; - break; - case GL_ISOLINES: - dst.writemask = WRITEMASK_XY; - return; - default: - unreachable("Bogus tessellation domain"); - } - - dst_reg tmp(this, glsl_type::vec4_type); - emit_output_urb_read(tmp, 1, src_reg()); - emit(MOV(dst, swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX))); - } else { - emit_output_urb_read(dst, imm_offset, indirect_offset); - } + emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr), + indirect_offset); break; } case nir_intrinsic_store_output: @@ -437,50 +334,42 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; - if (imm_offset == 0 && indirect_offset.file == BAD_FILE) { - value.type = BRW_REGISTER_TYPE_F; - - mask &= (1 << tesslevel_inner_components(key->tes_primitive_mode)) - 1; + unsigned first_component = nir_intrinsic_component(instr); + if (first_component) { + if (nir_src_bit_size(instr->src[0]) == 64) + first_component /= 2; + assert(swiz == BRW_SWIZZLE_XYZW); + swiz = BRW_SWZ_COMP_OUTPUT(first_component); + mask = mask << first_component; + } - /* This is a write to gl_TessLevelInner[], which lives in the - * Patch URB header. The layout depends on the domain. + if (nir_src_bit_size(instr->src[0]) == 64) { + /* For 64-bit data we need to shuffle the data before we write and + * emit two messages. Also, since each channel is twice as large we + * need to fix the writemask in each 32-bit message to account for it. */ - switch (key->tes_primitive_mode) { - case GL_QUADS: - /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed). - * We use an XXYX swizzle to reverse put .xy in the .wz - * channels, and use a .zw writemask. - */ - swiz = BRW_SWIZZLE4(0, 0, 1, 0); - mask = writemask_for_backwards_vector(mask); - break; - case GL_TRIANGLES: - /* gl_TessLevelInner[].x lives at DWord 4, so we set the - * writemask to X and bump the URB offset by 1. - */ - imm_offset = 1; - break; - case GL_ISOLINES: - /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */ - return; - default: - unreachable("Bogus tessellation domain"); + value = swizzle(retype(value, BRW_REGISTER_TYPE_DF), swiz); + dst_reg shuffled = dst_reg(this, glsl_type::dvec4_type); + shuffle_64bit_data(shuffled, value, true); + src_reg shuffled_float = src_reg(retype(shuffled, BRW_REGISTER_TYPE_F)); + + for (int n = 0; n < 2; n++) { + unsigned fixed_mask = 0; + if (mask & WRITEMASK_X) + fixed_mask |= WRITEMASK_XY; + if (mask & WRITEMASK_Y) + fixed_mask |= WRITEMASK_ZW; + emit_urb_write(shuffled_float, fixed_mask, + imm_offset, indirect_offset); + + shuffled_float = byte_offset(shuffled_float, REG_SIZE); + mask >>= 2; + imm_offset++; } - } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) { - value.type = BRW_REGISTER_TYPE_F; - - mask &= (1 << tesslevel_outer_components(key->tes_primitive_mode)) - 1; - - /* This is a write to gl_TessLevelOuter[] which lives in the - * Patch URB Header at DWords 4-7. However, it's reversed, so - * instead of .xyzw we have .wzyx. - */ - swiz = BRW_SWIZZLE_WZYX; - mask = writemask_for_backwards_vector(mask); + } else { + emit_urb_write(swizzle(value, swiz), mask, + imm_offset, indirect_offset); } - - emit_urb_write(swizzle(value, swiz), mask, - imm_offset, indirect_offset); break; } @@ -508,29 +397,34 @@ brw_compile_tcs(const struct brw_compiler *compiler, unsigned *final_assembly_size, char **error_str) { - const struct brw_device_info *devinfo = compiler->devinfo; + const struct gen_device_info *devinfo = compiler->devinfo; struct brw_vue_prog_data *vue_prog_data = &prog_data->base; const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_CTRL]; nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); - nir->info.outputs_written = key->outputs_written; - nir->info.patch_outputs_written = key->patch_outputs_written; + nir->info->outputs_written = key->outputs_written; + nir->info->patch_outputs_written = key->patch_outputs_written; struct brw_vue_map input_vue_map; - brw_compute_vue_map(devinfo, &input_vue_map, - nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, - true); - + brw_compute_vue_map(devinfo, &input_vue_map, nir->info->inputs_read, + nir->info->separate_shader); brw_compute_tess_vue_map(&vue_prog_data->vue_map, - nir->info.outputs_written, - nir->info.patch_outputs_written); + nir->info->outputs_written, + nir->info->patch_outputs_written); - nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); + nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar); brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map); - brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map); - nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); + brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map, + key->tes_primitive_mode); + if (key->quads_workaround) + brw_nir_apply_tcs_quads_workaround(nir); + + nir = brw_postprocess_nir(nir, compiler, is_scalar); - prog_data->instances = DIV_ROUND_UP(nir->info.tcs.vertices_out, 2); + if (is_scalar) + prog_data->instances = DIV_ROUND_UP(nir->info->tess.tcs_vertices_out, 8); + else + prog_data->instances = DIV_ROUND_UP(nir->info->tess.tcs_vertices_out, 2); /* Compute URB entry size. The maximum allowed URB entry size is 32k. * That divides up as follows: @@ -549,7 +443,8 @@ brw_compile_tcs(const struct brw_compiler *compiler, unsigned output_size_bytes = 0; /* Note that the patch header is counted in num_per_patch_slots. */ output_size_bytes += num_per_patch_slots * 16; - output_size_bytes += nir->info.tcs.vertices_out * num_per_vertex_slots * 16; + output_size_bytes += nir->info->tess.tcs_vertices_out * + num_per_vertex_slots * 16; assert(output_size_bytes >= 1); if (output_size_bytes > GEN7_MAX_HS_URB_ENTRY_SIZE_BYTES) @@ -571,20 +466,50 @@ brw_compile_tcs(const struct brw_compiler *compiler, brw_print_vue_map(stderr, &vue_prog_data->vue_map); } - vec4_tcs_visitor v(compiler, log_data, key, prog_data, - nir, mem_ctx, shader_time_index, &input_vue_map); - if (!v.run()) { - if (error_str) - *error_str = ralloc_strdup(mem_ctx, v.fail_msg); - return NULL; - } + if (is_scalar) { + fs_visitor v(compiler, log_data, mem_ctx, (void *) key, + &prog_data->base.base, NULL, nir, 8, + shader_time_index, &input_vue_map); + if (!v.run_tcs_single_patch()) { + if (error_str) + *error_str = ralloc_strdup(mem_ctx, v.fail_msg); + return NULL; + } + + prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; + prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; + + fs_generator g(compiler, log_data, mem_ctx, (void *) key, + &prog_data->base.base, v.promoted_constants, false, + MESA_SHADER_TESS_CTRL); + if (unlikely(INTEL_DEBUG & DEBUG_TCS)) { + g.enable_debug(ralloc_asprintf(mem_ctx, + "%s tessellation control shader %s", + nir->info->label ? nir->info->label + : "unnamed", + nir->info->name)); + } - if (unlikely(INTEL_DEBUG & DEBUG_TCS)) - v.dump_instructions(); + g.generate_code(v.cfg, 8); - return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, - &prog_data->base, v.cfg, - final_assembly_size); + return g.get_assembly(final_assembly_size); + } else { + vec4_tcs_visitor v(compiler, log_data, key, prog_data, + nir, mem_ctx, shader_time_index, &input_vue_map); + if (!v.run()) { + if (error_str) + *error_str = ralloc_strdup(mem_ctx, v.fail_msg); + return NULL; + } + + if (unlikely(INTEL_DEBUG & DEBUG_TCS)) + v.dump_instructions(); + + + return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, + &prog_data->base, v.cfg, + final_assembly_size); + } }