radeonsi: implement legacy GL_DOUBLE vertex formats
authorMarek Olšák <marek.olsak@amd.com>
Fri, 10 Feb 2017 00:16:34 +0000 (01:16 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 14 Feb 2017 20:47:51 +0000 (21:47 +0100)
so that we can disable u_vbuf for GL core profiles.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state.c

index 9196de4f926ce2d02bcf0fec6805dd8f4149473d..cfff54a5e00c7e89abc81083047d133e20d67256 100644 (file)
@@ -319,6 +319,21 @@ static LLVMValueRef get_instance_index_for_fetch(
                            LLVMGetParam(radeon_bld->main_fn, param_start_instance), "");
 }
 
+/* Bitcast <4 x float> to <2 x double>, extract the component, and convert
+ * to float. */
+static LLVMValueRef extract_double_to_float(struct si_shader_context *ctx,
+                                           LLVMValueRef vec4,
+                                           unsigned double_index)
+{
+       LLVMBuilderRef builder = ctx->gallivm.builder;
+       LLVMTypeRef f64 = LLVMDoubleTypeInContext(ctx->gallivm.context);
+       LLVMValueRef dvec2 = LLVMBuildBitCast(builder, vec4,
+                                             LLVMVectorType(f64, 2), "");
+       LLVMValueRef index = LLVMConstInt(ctx->i32, double_index, 0);
+       LLVMValueRef value = LLVMBuildExtractElement(builder, dvec2, index, "");
+       return LLVMBuildFPTrunc(builder, value, ctx->f32, "");
+}
+
 static void declare_input_vs(
        struct si_shader_context *ctx,
        unsigned input_index,
@@ -330,14 +345,15 @@ static void declare_input_vs(
 
        unsigned chan;
        unsigned fix_fetch;
+       unsigned num_fetches;
+       unsigned fetch_stride;
 
        LLVMValueRef t_list_ptr;
        LLVMValueRef t_offset;
        LLVMValueRef t_list;
-       LLVMValueRef attribute_offset;
-       LLVMValueRef buffer_index;
+       LLVMValueRef vertex_index;
        LLVMValueRef args[3];
-       LLVMValueRef input;
+       LLVMValueRef input[3];
 
        /* Load the T list */
        t_list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_VERTEX_BUFFERS);
@@ -346,29 +362,42 @@ static void declare_input_vs(
 
        t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
 
-       /* Build the attribute offset */
-       attribute_offset = lp_build_const_int32(gallivm, 0);
-
-       buffer_index = LLVMGetParam(ctx->main_fn,
+       vertex_index = LLVMGetParam(ctx->main_fn,
                                    ctx->param_vertex_index0 +
                                    input_index);
 
+       fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 0xf;
+
+       /* Do multiple loads for double formats. */
+       if (fix_fetch == SI_FIX_FETCH_RGB_64_FLOAT) {
+               num_fetches = 3; /* 3 2-dword loads */
+               fetch_stride = 8;
+       } else if (fix_fetch == SI_FIX_FETCH_RGBA_64_FLOAT) {
+               num_fetches = 2; /* 2 4-dword loads */
+               fetch_stride = 16;
+       } else {
+               num_fetches = 1;
+               fetch_stride = 0;
+       }
+
        args[0] = t_list;
-       args[1] = attribute_offset;
-       args[2] = buffer_index;
-       input = lp_build_intrinsic(gallivm->builder,
-               "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
-               LP_FUNC_ATTR_READNONE);
+       args[2] = vertex_index;
+
+       for (unsigned i = 0; i < num_fetches; i++) {
+               args[1] = LLVMConstInt(ctx->i32, fetch_stride * i, 0);
+
+               input[i] = lp_build_intrinsic(gallivm->builder,
+                       "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
+                       LP_FUNC_ATTR_READNONE);
+       }
 
        /* Break up the vec4 into individual components */
        for (chan = 0; chan < 4; chan++) {
                LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
                out[chan] = LLVMBuildExtractElement(gallivm->builder,
-                                                   input, llvm_chan, "");
+                                                   input[0], llvm_chan, "");
        }
 
-       fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 0xf;
-
        switch (fix_fetch) {
        case SI_FIX_FETCH_A2_SNORM:
        case SI_FIX_FETCH_A2_SSCALED:
@@ -464,6 +493,25 @@ static void declare_input_vs(
                                                    out[chan], ctx->f32, "");
                }
                break;
+       case SI_FIX_FETCH_RG_64_FLOAT:
+               for (chan = 0; chan < 2; chan++)
+                       out[chan] = extract_double_to_float(ctx, input[0], chan);
+
+               out[2] = LLVMConstReal(ctx->f32, 0);
+               out[3] = LLVMConstReal(ctx->f32, 1);
+               break;
+       case SI_FIX_FETCH_RGB_64_FLOAT:
+               for (chan = 0; chan < 3; chan++)
+                       out[chan] = extract_double_to_float(ctx, input[chan], 0);
+
+               out[3] = LLVMConstReal(ctx->f32, 1);
+               break;
+       case SI_FIX_FETCH_RGBA_64_FLOAT:
+               for (chan = 0; chan < 4; chan++) {
+                       out[chan] = extract_double_to_float(ctx, input[chan / 2],
+                                                           chan % 2);
+               }
+               break;
        }
 }
 
index 5464d6747d8b0ce9b9e42afa37704d6cc27da1ed..6398b39a0ecdbb9bb3bdb92eb0a81edba43f561e 100644 (file)
@@ -247,6 +247,10 @@ enum {
        SI_FIX_FETCH_RGBA_32_SSCALED,
        SI_FIX_FETCH_RGBA_32_FIXED,
        SI_FIX_FETCH_RGBX_32_FIXED,
+       SI_FIX_FETCH_RG_64_FLOAT,
+       SI_FIX_FETCH_RGB_64_FLOAT,
+       SI_FIX_FETCH_RGBA_64_FLOAT,
+       SI_FIX_FETCH_RESERVED_15, /* maximum */
 };
 
 struct si_shader;
index 1e0729c3b70c5a98d0fad00b694697e8d538ee3d..107bc06597db7bfd7665d1cc61faca1b1db7f19d 100644 (file)
@@ -1762,6 +1762,19 @@ static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
                        return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
                }
                break;
+       case 64:
+               /* Legacy double formats. */
+               switch (desc->nr_channels) {
+               case 1: /* 1 load */
+                       return V_008F0C_BUF_DATA_FORMAT_32_32;
+               case 2: /* 1 load */
+                       return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
+               case 3: /* 3 loads */
+                       return V_008F0C_BUF_DATA_FORMAT_32_32;
+               case 4: /* 2 loads */
+                       return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
+               }
+               break;
        }
 
        return V_008F0C_BUF_DATA_FORMAT_INVALID;
@@ -3359,6 +3372,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
                unsigned data_format, num_format;
                int first_non_void;
                unsigned vbo_index = elements[i].vertex_buffer_index;
+               unsigned char swizzle[4];
 
                if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
                        FREE(v);
@@ -3375,13 +3389,8 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
                data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
                num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
                channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;
+               memcpy(swizzle, desc->swizzle, sizeof(swizzle));
 
-               v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
-                                  S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
-                                  S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
-                                  S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
-                                  S_008F0C_NUM_FORMAT(num_format) |
-                                  S_008F0C_DATA_FORMAT(data_format);
                v->format_size[i] = desc->block.bits / 8;
 
                /* The hardware always treats the 2-bit alpha channel as
@@ -3421,8 +3430,43 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
                                        v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i);
                                }
                        }
+               } else if (channel && channel->size == 64 &&
+                          channel->type == UTIL_FORMAT_TYPE_FLOAT) {
+                       switch (desc->nr_channels) {
+                       case 1:
+                       case 2:
+                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RG_64_FLOAT << (4 * i);
+                               swizzle[0] = PIPE_SWIZZLE_X;
+                               swizzle[1] = PIPE_SWIZZLE_Y;
+                               swizzle[2] = desc->nr_channels == 2 ? PIPE_SWIZZLE_Z : PIPE_SWIZZLE_0;
+                               swizzle[3] = desc->nr_channels == 2 ? PIPE_SWIZZLE_W : PIPE_SWIZZLE_0;
+                               break;
+                       case 3:
+                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGB_64_FLOAT << (4 * i);
+                               swizzle[0] = PIPE_SWIZZLE_X; /* 3 loads */
+                               swizzle[1] = PIPE_SWIZZLE_Y;
+                               swizzle[2] = PIPE_SWIZZLE_0;
+                               swizzle[3] = PIPE_SWIZZLE_0;
+                               break;
+                       case 4:
+                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_64_FLOAT << (4 * i);
+                               swizzle[0] = PIPE_SWIZZLE_X; /* 2 loads */
+                               swizzle[1] = PIPE_SWIZZLE_Y;
+                               swizzle[2] = PIPE_SWIZZLE_Z;
+                               swizzle[3] = PIPE_SWIZZLE_W;
+                               break;
+                       default:
+                               assert(0);
+                       }
                }
 
+               v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
+                                  S_008F0C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
+                                  S_008F0C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
+                                  S_008F0C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
+                                  S_008F0C_NUM_FORMAT(num_format) |
+                                  S_008F0C_DATA_FORMAT(data_format);
+
                /* We work around the fact that 8_8_8 and 16_16_16 data formats
                 * do not exist by using the corresponding 4-component formats.
                 * This requires a fixup of the descriptor for bounds checks.