From b3a344db3035aa68361c934d08054eadfeab4e1f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 19 Oct 2015 17:05:40 -0700 Subject: [PATCH] anv/compiler: Get rid of GS support. The geometry shader support is currently completely untested. As I go through and re-factor the compiler, I'd rather not refactor dead code that I don't have a way to know if I broke. Let's just remove it for now. We can put it back in easily enough later and then we'll do it properly. --- src/vulkan/anv_compiler.cpp | 279 +----------------------------------- 1 file changed, 3 insertions(+), 276 deletions(-) diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 09c2edb3091..0fbc2d1ecba 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -371,265 +371,6 @@ really_do_wm_prog(struct brw_context *brw, return true; } -bool -anv_codegen_gs_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_geometry_program *gp, - struct brw_gs_prog_key *key, - struct anv_pipeline *pipeline) -{ - struct brw_gs_compile c; - - memset(&c, 0, sizeof(c)); - c.key = *key; - c.gp = gp; - - c.prog_data.include_primitive_id = - (gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0; - - c.prog_data.invocations = gp->program.Invocations; - - set_binding_table_layout(&c.prog_data.base.base, - pipeline, VK_SHADER_STAGE_GEOMETRY); - - /* Allocate the references to the uniforms that will end up in the - * prog_data associated with the compiled program, and which will be freed - * by the state cache. - * - * Note: param_count needs to be num_uniform_components * 4, since we add - * padding around uniform values below vec4 size, so the worst case is that - * every uniform is a float which gets padded to the size of a vec4. - */ - struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; - int param_count = gp->program.Base.nir->num_uniforms * 4; - - c.prog_data.base.base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - c.prog_data.base.base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - c.prog_data.base.base.image_param = - rzalloc_array(NULL, struct brw_image_param, gs->NumImages); - c.prog_data.base.base.nr_params = param_count; - c.prog_data.base.base.nr_image_params = gs->NumImages; - - brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base, - &c.prog_data.base.base, false); - - if (brw->gen >= 8) { - c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 : - nir_gs_count_vertices(gp->program.Base.nir); - } - - if (brw->gen >= 7) { - if (gp->program.OutputType == GL_POINTS) { - /* When the output type is points, the geometry shader may output data - * to multiple streams, and EndPrimitive() has no effect. So we - * configure the hardware to interpret the control data as stream ID. - */ - c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID; - - /* We only have to emit control bits if we are using streams */ - if (prog->Geom.UsesStreams) - c.control_data_bits_per_vertex = 2; - else - c.control_data_bits_per_vertex = 0; - } else { - /* When the output type is triangle_strip or line_strip, EndPrimitive() - * may be used to terminate the current strip and start a new one - * (similar to primitive restart), and outputting data to multiple - * streams is not supported. So we configure the hardware to interpret - * the control data as EndPrimitive information (a.k.a. "cut bits"). - */ - c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT; - - /* We only need to output control data if the shader actually calls - * EndPrimitive(). - */ - c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0; - } - } else { - /* There are no control data bits in gen6. */ - c.control_data_bits_per_vertex = 0; - - /* If it is using transform feedback, enable it */ - if (prog->TransformFeedback.NumVarying) - c.prog_data.gen6_xfb_enabled = true; - else - c.prog_data.gen6_xfb_enabled = false; - } - c.control_data_header_size_bits = - gp->program.VerticesOut * c.control_data_bits_per_vertex; - - /* 1 HWORD = 32 bytes = 256 bits */ - c.prog_data.control_data_header_size_hwords = - ALIGN(c.control_data_header_size_bits, 256) / 256; - - GLbitfield64 outputs_written = gp->program.Base.OutputsWritten; - - brw_compute_vue_map(brw->intelScreen->devinfo, - &c.prog_data.base.vue_map, outputs_written, - prog ? prog->SeparateShader : false); - - /* Compute the output vertex size. - * - * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex - * Size (p168): - * - * [0,62] indicating [1,63] 16B units - * - * Specifies the size of each vertex stored in the GS output entry - * (following any Control Header data) as a number of 128-bit units - * (minus one). - * - * Programming Restrictions: The vertex size must be programmed as a - * multiple of 32B units with the following exception: Rendering is - * disabled (as per SOL stage state) and the vertex size output by the - * GS thread is 16B. - * - * If rendering is enabled (as per SOL state) the vertex size must be - * programmed as a multiple of 32B units. In other words, the only time - * software can program a vertex size with an odd number of 16B units - * is when rendering is disabled. - * - * Note: B=bytes in the above text. - * - * It doesn't seem worth the extra trouble to optimize the case where the - * vertex size is 16B (especially since this would require special-casing - * the GEN assembly that writes to the URB). So we just set the vertex - * size to a multiple of 32B (2 vec4's) in all cases. - * - * The maximum output vertex size is 62*16 = 992 bytes (31 hwords). We - * budget that as follows: - * - * 512 bytes for varyings (a varying component is 4 bytes and - * gl_MaxGeometryOutputComponents = 128) - * 16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16 - * bytes) - * 16 bytes overhead for gl_Position (we allocate it a slot in the VUE - * even if it's not used) - * 32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots - * whenever clip planes are enabled, even if the shader doesn't - * write to gl_ClipDistance) - * 16 bytes overhead since the VUE size must be a multiple of 32 bytes - * (see below)--this causes up to 1 VUE slot to be wasted - * 400 bytes available for varying packing overhead - * - * Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes) - * per interpolation type, so this is plenty. - * - */ - unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16; - assert(brw->gen == 6 || - output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES); - c.prog_data.output_vertex_size_hwords = - ALIGN(output_vertex_size_bytes, 32) / 32; - - /* Compute URB entry size. The maximum allowed URB entry size is 32k. - * That divides up as follows: - * - * 64 bytes for the control data header (cut indices or StreamID bits) - * 4096 bytes for varyings (a varying component is 4 bytes and - * gl_MaxGeometryTotalOutputComponents = 1024) - * 4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16 - * bytes/vertex and gl_MaxGeometryOutputVertices is 256) - * 4096 bytes overhead for gl_Position (we allocate it a slot in the VUE - * even if it's not used) - * 8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots - * whenever clip planes are enabled, even if the shader doesn't - * write to gl_ClipDistance) - * 4096 bytes overhead since the VUE size must be a multiple of 32 - * bytes (see above)--this causes up to 1 VUE slot to be wasted - * 8128 bytes available for varying packing overhead - * - * Worst-case varying packing overhead is 3/4 of a varying slot per - * interpolation type, which works out to 3072 bytes, so this would allow - * us to accommodate 2 interpolation types without any danger of running - * out of URB space. - * - * In practice, the risk of running out of URB space is very small, since - * the above figures are all worst-case, and most of them scale with the - * number of output vertices. So we'll just calculate the amount of space - * we need, and if it's too large, fail to compile. - * - * The above is for gen7+ where we have a single URB entry that will hold - * all the output. In gen6, we will have to allocate URB entries for every - * vertex we emit, so our URB entries only need to be large enough to hold - * a single vertex. Also, gen6 does not have a control data header. - */ - unsigned output_size_bytes; - if (brw->gen >= 7) { - output_size_bytes = - c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut; - output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords; - } else { - output_size_bytes = c.prog_data.output_vertex_size_hwords * 32; - } - - /* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output, - * which comes before the control header. - */ - if (brw->gen >= 8) - output_size_bytes += 32; - - assert(output_size_bytes >= 1); - int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES; - if (brw->gen == 6) - max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; - if (output_size_bytes > max_output_size_bytes) - return false; - - - /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and - * a multiple of 128 bytes in gen6. - */ - if (brw->gen >= 7) - c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; - else - c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128; - - /* FIXME: Need to pull this from nir shader. */ - c.prog_data.output_topology = _3DPRIM_TRISTRIP; - - /* The GLSL linker will have already matched up GS inputs and the outputs - * of prior stages. The driver does extend VS outputs in some cases, but - * only for legacy OpenGL or Gen4-5 hardware, neither of which offer - * geometry shader support. So we can safely ignore that. - * - * For SSO pipelines, we use a fixed VUE map layout based on variable - * locations, so we can rely on rendezvous-by-location making this work. - * - * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not - * written by previous stages and shows up via payload magic. - */ - GLbitfield64 inputs_read = - gp->program.Base.InputsRead & ~VARYING_BIT_PRIMITIVE_ID; - brw_compute_vue_map(brw->intelScreen->devinfo, - &c.input_vue_map, inputs_read, - prog->SeparateShader); - - /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we - * need to program a URB read length of ceiling(num_slots / 2). - */ - c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2; - - void *mem_ctx = ralloc_context(NULL); - unsigned program_size; - const unsigned *program = - brw_compile_gs(brw->intelScreen->compiler, brw, &c, gp->program.Base.nir, - prog, mem_ctx, -1, &program_size, NULL); - if (program == NULL) { - ralloc_free(mem_ctx); - return false; - } - - pipeline->gs_vec4 = upload_kernel(pipeline, program, program_size); - pipeline->gs_vertex_count = gp->program.VerticesIn; - - ralloc_free(mem_ctx); - - return true; -} - static bool brw_codegen_cs_prog(struct brw_context *brw, struct gl_shader_program *prog, @@ -1103,23 +844,9 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) pipeline->vs_vec4 = NO_KERNEL; } - - if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) { - struct brw_gs_prog_key gs_key; - struct gl_geometry_program *gp = (struct gl_geometry_program *) - program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program; - struct brw_geometry_program *bgp = brw_geometry_program(gp); - - success = anv_codegen_gs_prog(brw, program, bgp, &gs_key, pipeline); - fail_if(!success, "do_gs_prog failed\n"); - add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY, - &pipeline->gs_prog_data.base.base); - - if (gp->Base.OutputsWritten & VARYING_SLOT_PSIZ) - pipeline->writes_point_size = true; - } else { - pipeline->gs_vec4 = NO_KERNEL; - } + /* Geometry shaders not yet supported */ + anv_assert(pipeline->shaders[VK_SHADER_STAGE_GEOMETRY] == NULL); + pipeline->gs_vec4 = NO_KERNEL; if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) { struct brw_wm_prog_key wm_key; -- 2.30.2