From: Iago Toral Quiroga Date: Tue, 1 Jul 2014 11:08:25 +0000 (+0200) Subject: i965/gen6/gs: Compute URB entry size for user-provided geometry shaders. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c09ddf82ffa13173b55a1b51075be2671378c4ea;p=mesa.git i965/gen6/gs: Compute URB entry size for user-provided geometry shaders. Acked-by: Kenneth Graunke Reviewed-by: Jordan Justen --- diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 0b5735cc02e..72a21e85d16 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1595,10 +1595,14 @@ enum brw_message_target { # define GEN7_URB_ENTRY_SIZE_SHIFT 16 # define GEN7_URB_STARTING_ADDRESS_SHIFT 25 -/* "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size +/* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size * is 2^9, or 512. It's counted in multiples of 64 bytes. */ -#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64) +#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64) +/* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit + * (128 bytes) URB rows and the maximum allowed value is 5 rows. + */ +#define GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES (5*128) #define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */ #define _3DSTATE_PUSH_CONSTANT_ALLOC_GS 0x7915 /* GEN7+ */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c b/src/mesa/drivers/dri/i965/brw_vec4_gs.c index b00e584f7a2..41c6d8b6593 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c @@ -75,31 +75,36 @@ do_gs_prog(struct brw_context *brw, */ c.prog_data.base.base.nr_params = ALIGN(param_count, 4) / 4 + gs->num_samplers; - if (gp->program.OutputType == GL_POINTS) { - /* When the output type is points, the geometry shader may output data - * to multiple streams, and EndPrimitive() has no effect. So we - * configure the hardware to interpret the control data as stream ID. - */ - c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID; - - /* We only have to emit control bits if we are using streams */ - if (prog->Geom.UsesStreams) - c.control_data_bits_per_vertex = 2; - else - c.control_data_bits_per_vertex = 0; + if (brw->gen >= 7) { + if (gp->program.OutputType == GL_POINTS) { + /* When the output type is points, the geometry shader may output data + * to multiple streams, and EndPrimitive() has no effect. So we + * configure the hardware to interpret the control data as stream ID. + */ + c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID; + + /* We only have to emit control bits if we are using streams */ + if (prog->Geom.UsesStreams) + c.control_data_bits_per_vertex = 2; + else + c.control_data_bits_per_vertex = 0; + } else { + /* When the output type is triangle_strip or line_strip, EndPrimitive() + * may be used to terminate the current strip and start a new one + * (similar to primitive restart), and outputting data to multiple + * streams is not supported. So we configure the hardware to interpret + * the control data as EndPrimitive information (a.k.a. "cut bits"). + */ + c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT; + + /* We only need to output control data if the shader actually calls + * EndPrimitive(). + */ + c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0; + } } else { - /* When the output type is triangle_strip or line_strip, EndPrimitive() - * may be used to terminate the current strip and start a new one - * (similar to primitive restart), and outputting data to multiple - * streams is not supported. So we configure the hardware to interpret - * the control data as EndPrimitive information (a.k.a. "cut bits"). - */ - c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT; - - /* We only need to output control data if the shader actually calls - * EndPrimitive(). - */ - c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0; + /* There are no control data bits in gen6. */ + c.control_data_bits_per_vertex = 0; } c.control_data_header_size_bits = gp->program.VerticesOut * c.control_data_bits_per_vertex; @@ -170,7 +175,8 @@ do_gs_prog(struct brw_context *brw, * */ unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16; - assert(output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES); + assert(brw->gen == 6 || + output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES); c.prog_data.output_vertex_size_hwords = ALIGN(output_vertex_size_bytes, 32) / 32; @@ -200,10 +206,20 @@ do_gs_prog(struct brw_context *brw, * the above figures are all worst-case, and most of them scale with the * number of output vertices. So we'll just calculate the amount of space * we need, and if it's too large, fail to compile. + * + * The above is for gen7+ where we have a single URB entry that will hold + * all the output. In gen6, we will have to allocate URB entries for every + * vertex we emit, so our URB entries only need to be large enough to hold + * a single vertex. Also, gen6 does not have a control data header. */ - unsigned output_size_bytes = - c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut; - output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords; + unsigned output_size_bytes; + if (brw->gen >= 7) { + output_size_bytes = + c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut; + output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords; + } else { + output_size_bytes = c.prog_data.output_vertex_size_hwords * 32; + } /* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output, * which comes before the control header. @@ -212,11 +228,20 @@ do_gs_prog(struct brw_context *brw, output_size_bytes += 32; assert(output_size_bytes >= 1); - if (output_size_bytes > GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES) + int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES; + if (brw->gen == 6) + max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; + if (output_size_bytes > max_output_size_bytes) return false; - /* URB entry sizes are stored as a multiple of 64 bytes. */ - c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; + + /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and + * a multiple of 128 bytes in gen6. + */ + if (brw->gen >= 7) + c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; + else + c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128; c.prog_data.output_topology = get_hw_prim_for_gl_prim(gp->program.OutputType);