From ef83c9b762ee083a2bf1948befdb5dd0fb8df00b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 20 Feb 2015 11:42:19 +1000 Subject: [PATCH] st/mesa: add double input support including lowering (v3.1) This takes a different approach to previously, we cannot index into the inputMapping with anything but the mesa attribute index, so we can't use the just add one to index trick, we need more info to add one to it after we've mapped the input. (Fixed copy propgation and cleaned up a little) v2: drop float64 format check, just attr->Doubles. merge enable patch. v3: cleanup code a bit. v3.1: minor review fixups (comment, newline) (Ilia) Reviewed-by: Ilia Mirkin Signed-off-by: Dave Airlie --- src/mesa/state_tracker/st_atom_array.c | 172 ++++++++++++++++----- src/mesa/state_tracker/st_extensions.c | 4 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 27 +++- src/mesa/state_tracker/st_program.c | 5 + src/mesa/state_tracker/st_program.h | 1 + 5 files changed, 164 insertions(+), 45 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c index d4fb8b862f6..56b8019a36f 100644 --- a/src/mesa/state_tracker/st_atom_array.c +++ b/src/mesa/state_tracker/st_atom_array.c @@ -44,7 +44,6 @@ #include "cso_cache/cso_context.h" #include "util/u_math.h" - #include "main/bufferobj.h" #include "main/glformats.h" @@ -311,6 +310,18 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format, return PIPE_FORMAT_NONE; /* silence compiler warning */ } +static const struct gl_client_array * +get_client_array(const struct st_vertex_program *vp, + const struct gl_client_array **arrays, + int attr) +{ + const GLuint mesaAttr = vp->index_to_input[attr]; + /* st_program uses 0xffffffff to denote a double placeholder attribute */ + if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER) + return NULL; + return arrays[mesaAttr]; +} + /** * Examine the active arrays to determine if we have interleaved * vertex arrays all living in one VBO, or all living in user space. @@ -327,11 +338,16 @@ is_interleaved_arrays(const struct st_vertex_program *vp, GLboolean userSpaceBuffer = GL_FALSE; for (attr = 0; attr < vpv->num_inputs; attr++) { - const GLuint mesaAttr = vp->index_to_input[attr]; - const struct gl_client_array *array = arrays[mesaAttr]; - const struct gl_buffer_object *bufObj = array->BufferObj; - const GLsizei stride = array->StrideB; /* in bytes */ + const struct gl_client_array *array; + const struct gl_buffer_object *bufObj; + GLsizei stride; + + array = get_client_array(vp, arrays, attr); + if (!array) + continue; + stride = array->StrideB; /* in bytes */ + bufObj = array->BufferObj; if (attr == 0) { /* save info about the first array */ firstStride = stride; @@ -358,6 +374,55 @@ is_interleaved_arrays(const struct st_vertex_program *vp, return GL_TRUE; } +static void init_velement(struct pipe_vertex_element *velement, + int src_offset, int format, + int instance_divisor, int vbo_index) +{ + velement->src_offset = src_offset; + velement->src_format = format; + velement->instance_divisor = instance_divisor; + velement->vertex_buffer_index = vbo_index; + assert(velement->src_format); +} + +static void init_velement_lowered(struct st_context *st, + struct pipe_vertex_element *velements, + int src_offset, int format, + int instance_divisor, int vbo_index, + int nr_components, GLboolean doubles, + GLuint *attr_idx) +{ + int idx = *attr_idx; + if (doubles) { + int lower_format; + + if (nr_components == 1) + lower_format = PIPE_FORMAT_R32G32_UINT; + else if (nr_components >= 2) + lower_format = PIPE_FORMAT_R32G32B32A32_UINT; + + init_velement(&velements[idx], src_offset, + lower_format, instance_divisor, vbo_index); + idx++; + + if (nr_components > 2) { + if (nr_components == 3) + lower_format = PIPE_FORMAT_R32G32_UINT; + else if (nr_components >= 4) + lower_format = PIPE_FORMAT_R32G32B32A32_UINT; + + init_velement(&velements[idx], src_offset + 4 * sizeof(float), + lower_format, instance_divisor, vbo_index); + idx++; + } + } else { + init_velement(&velements[idx], src_offset, + format, instance_divisor, vbo_index); + idx++; + } + *attr_idx = idx; +} + /** * Set up for drawing interleaved arrays that all live in one VBO * or all live in user space. @@ -365,13 +430,15 @@ is_interleaved_arrays(const struct st_vertex_program *vp, * \param velements returns vertex element info */ static boolean -setup_interleaved_attribs(const struct st_vertex_program *vp, +setup_interleaved_attribs(struct st_context *st, + const struct st_vertex_program *vp, const struct st_vp_variant *vpv, const struct gl_client_array **arrays, struct pipe_vertex_buffer *vbuffer, - struct pipe_vertex_element velements[]) + struct pipe_vertex_element velements[], + unsigned *num_velements) { - GLuint attr; + GLuint attr, attr_idx; const GLubyte *low_addr = NULL; GLboolean usingVBO; /* all arrays in a VBO? */ struct gl_buffer_object *bufobj; @@ -381,8 +448,10 @@ setup_interleaved_attribs(const struct st_vertex_program *vp, * Init bufobj and stride. */ if (vpv->num_inputs) { - const GLuint mesaAttr0 = vp->index_to_input[0]; - const struct gl_client_array *array = arrays[mesaAttr0]; + const struct gl_client_array *array; + + array = get_client_array(vp, arrays, 0); + assert(array); /* Since we're doing interleaved arrays, we know there'll be at most * one buffer object and the stride will be the same for all arrays. @@ -394,7 +463,11 @@ setup_interleaved_attribs(const struct st_vertex_program *vp, low_addr = arrays[vp->index_to_input[0]]->Ptr; for (attr = 1; attr < vpv->num_inputs; attr++) { - const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr; + const GLubyte *start; + array = get_client_array(vp, arrays, attr); + if (!array) + continue; + start = array->Ptr; low_addr = MIN2(low_addr, start); } } @@ -408,25 +481,33 @@ setup_interleaved_attribs(const struct st_vertex_program *vp, /* are the arrays in user space? */ usingVBO = _mesa_is_bufferobj(bufobj); + attr_idx = 0; for (attr = 0; attr < vpv->num_inputs; attr++) { - const GLuint mesaAttr = vp->index_to_input[attr]; - const struct gl_client_array *array = arrays[mesaAttr]; - unsigned src_offset = (unsigned) (array->Ptr - low_addr); + const struct gl_client_array *array; + unsigned src_offset; + unsigned src_format; + + array = get_client_array(vp, arrays, attr); + if (!array) + continue; + src_offset = (unsigned) (array->Ptr - low_addr); assert(array->_ElementSize == _mesa_bytes_per_vertex_attrib(array->Size, array->Type)); - velements[attr].src_offset = src_offset; - velements[attr].instance_divisor = array->InstanceDivisor; - velements[attr].vertex_buffer_index = 0; - velements[attr].src_format = st_pipe_vertex_format(array->Type, - array->Size, - array->Format, - array->Normalized, - array->Integer); - assert(velements[attr].src_format); + src_format = st_pipe_vertex_format(array->Type, + array->Size, + array->Format, + array->Normalized, + array->Integer); + + init_velement_lowered(st, velements, src_offset, src_format, + array->InstanceDivisor, 0, + array->Size, array->Doubles, &attr_idx); } + *num_velements = attr_idx; + /* * Return the vbuffer info and setup user-space attrib info, if needed. */ @@ -472,17 +553,25 @@ setup_non_interleaved_attribs(struct st_context *st, const struct st_vp_variant *vpv, const struct gl_client_array **arrays, struct pipe_vertex_buffer vbuffer[], - struct pipe_vertex_element velements[]) + struct pipe_vertex_element velements[], + unsigned *num_velements) { struct gl_context *ctx = st->ctx; - GLuint attr; + GLuint attr, attr_idx = 0; for (attr = 0; attr < vpv->num_inputs; attr++) { const GLuint mesaAttr = vp->index_to_input[attr]; - const struct gl_client_array *array = arrays[mesaAttr]; - struct gl_buffer_object *bufobj = array->BufferObj; - GLsizei stride = array->StrideB; + const struct gl_client_array *array; + struct gl_buffer_object *bufobj; + GLsizei stride; + unsigned src_format; + array = get_client_array(vp, arrays, attr); + if (!array) + continue; + + stride = array->StrideB; + bufobj = array->BufferObj; assert(array->_ElementSize == _mesa_bytes_per_vertex_attrib(array->Size, array->Type)); @@ -524,16 +613,19 @@ setup_non_interleaved_attribs(struct st_context *st, /* common-case setup */ vbuffer[attr].stride = stride; /* in bytes */ - velements[attr].src_offset = 0; - velements[attr].instance_divisor = array->InstanceDivisor; - velements[attr].vertex_buffer_index = attr; - velements[attr].src_format = st_pipe_vertex_format(array->Type, - array->Size, - array->Format, - array->Normalized, - array->Integer); - assert(velements[attr].src_format); + src_format = st_pipe_vertex_format(array->Type, + array->Size, + array->Format, + array->Normalized, + array->Integer); + + init_velement_lowered(st, velements, 0, src_format, + array->InstanceDivisor, attr, + array->Size, array->Doubles, &attr_idx); + } + + *num_velements = attr_idx; return TRUE; } @@ -563,25 +655,23 @@ static void update_array(struct st_context *st) * Setup the vbuffer[] and velements[] arrays. */ if (is_interleaved_arrays(vp, vpv, arrays)) { - if (!setup_interleaved_attribs(vp, vpv, arrays, vbuffer, velements)) { + if (!setup_interleaved_attribs(st, vp, vpv, arrays, vbuffer, velements, &num_velements)) { st->vertex_array_out_of_memory = TRUE; return; } num_vbuffers = 1; - num_velements = vpv->num_inputs; if (num_velements == 0) num_vbuffers = 0; } else { if (!setup_non_interleaved_attribs(st, vp, vpv, arrays, vbuffer, - velements)) { + velements, &num_velements)) { st->vertex_array_out_of_memory = TRUE; return; } num_vbuffers = vpv->num_inputs; - num_velements = vpv->num_inputs; } cso_set_vertex_buffers(st->cso_context, 0, num_vbuffers, vbuffer); diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 82e4a30093f..b1057f3eadd 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -909,6 +909,8 @@ void st_init_extensions(struct pipe_screen *screen, if (screen->get_shader_param(screen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_DOUBLES) && screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, - PIPE_SHADER_CAP_DOUBLES)) + PIPE_SHADER_CAP_DOUBLES)) { extensions->ARB_gpu_shader_fp64 = GL_TRUE; + extensions->ARB_vertex_attrib_64bit = GL_TRUE; + } } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 08957dc2e1d..1fea8600a75 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -88,6 +88,7 @@ public: this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; + this->double_reg2 = false; } st_src_reg(gl_register_file file, int index, int type) @@ -101,6 +102,7 @@ public: this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; + this->double_reg2 = false; } st_src_reg(gl_register_file file, int index, int type, int index2D) @@ -114,6 +116,7 @@ public: this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; + this->double_reg2 = false; } st_src_reg() @@ -127,6 +130,7 @@ public: this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; + this->double_reg2 = false; } explicit st_src_reg(st_dst_reg reg); @@ -141,6 +145,11 @@ public: st_src_reg *reladdr; st_src_reg *reladdr2; bool has_index2; + /* + * Is this the second half of a double register pair? + * currently used for input mapping only. + */ + bool double_reg2; }; class st_dst_reg { @@ -197,6 +206,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) this->index2D = 0; this->reladdr2 = NULL; this->has_index2 = false; + this->double_reg2 = false; } st_dst_reg::st_dst_reg(st_src_reg reg) @@ -677,8 +687,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, if (dinst->src[j].type == GLSL_TYPE_DOUBLE) { dinst->src[j].index = initial_src_idx[j]; - if (swz > 1) + if (swz > 1) { + dinst->src[j].double_reg2 = true; dinst->src[j].index++; + } if (swz & 1) dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); @@ -3705,6 +3717,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) } else { if (first->src[0].file != copy_chan->src[0].file || first->src[0].index != copy_chan->src[0].index || + first->src[0].double_reg2 != copy_chan->src[0].double_reg2 || first->src[0].index2D != copy_chan->src[0].index2D) { good = false; break; @@ -3720,6 +3733,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) inst->src[r].index = first->src[0].index; inst->src[r].index2D = first->src[0].index2D; inst->src[r].has_index2 = first->src[0].has_index2; + inst->src[r].double_reg2 = first->src[0].double_reg2; int swizzle = 0; for (int i = 0; i < 4; i++) { @@ -4552,6 +4566,9 @@ dst_register(struct st_translate *t, static struct ureg_src src_register(struct st_translate *t, const st_src_reg *reg) { + int index = reg->index; + int double_reg2 = reg->double_reg2 ? 1 : 0; + switch(reg->file) { case PROGRAM_UNDEFINED: return ureg_imm4f(t->ureg, 0, 0, 0, 0); @@ -4577,8 +4594,12 @@ src_register(struct st_translate *t, const st_src_reg *reg) return t->immediates[reg->index]; case PROGRAM_INPUT: - assert(t->inputMapping[reg->index] < ARRAY_SIZE(t->inputs)); - return t->inputs[t->inputMapping[reg->index]]; + /* GLSL inputs are 64-bit containers, so we have to + * map back to the original index and add the offset after + * mapping. */ + index -= double_reg2; + assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs)); + return t->inputs[t->inputMapping[index] + double_reg2]; case PROGRAM_OUTPUT: assert(t->outputMapping[reg->index] < ARRAY_SIZE(t->outputs)); diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index d93b3c7bcb8..a9110d3c674 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -194,6 +194,11 @@ st_prepare_vertex_program(struct gl_context *ctx, stvp->input_to_index[attr] = stvp->num_inputs; stvp->index_to_input[stvp->num_inputs] = attr; stvp->num_inputs++; + if ((stvp->Base.Base.DoubleInputsRead & BITFIELD64_BIT(attr)) != 0) { + /* add placeholder for second part of a double attribute */ + stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER; + stvp->num_inputs++; + } } } /* bit of a hack, presetup potentially unused edgeflag input */ diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index b2c86faecb7..a2c56062d6e 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -45,6 +45,7 @@ extern "C" { #endif +#define ST_DOUBLE_ATTRIB_PLACEHOLDER 0xffffffff /** Fragment program variant key */ struct st_fp_variant_key -- 2.30.2