nir/xfb: adding varyings on nir_xfb_info and gather_info
authorAlejandro Piñeiro <apinheiro@igalia.com>
Wed, 9 Jan 2019 17:19:45 +0000 (18:19 +0100)
committerAlejandro Piñeiro <apinheiro@igalia.com>
Fri, 8 Mar 2019 14:00:50 +0000 (15:00 +0100)
In order to be used for OpenGL (right now for ARB_gl_spirv).

This commit adds two new structures:

  * nir_xfb_varying_info: that identifies each individual varying. For
    each one, we need to know the type, buffer and xfb_offset

  * nir_xfb_buffer_info: as now for each buffer, in addition to the
    stride, we need to know how many varyings are assigned to it.

For this patch, the only case where num_outputs != num_varyings is
with the case of doubles, that for dvec3/4 could require more than one
output. There are more cases though (like aoa), that will be handled
on following patches.

v2: updated after new nir general XFB support introduced for "anv: Add
    support for VK_EXT_transform_feedback"

v3: compute num_varyings beforehand for allocating, instead of relying
    on num_outputs as approximate value (Timothy Arceri)

Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
src/amd/vulkan/radv_shader_info.c
src/compiler/nir/nir_gather_xfb_info.c
src/compiler/nir/nir_xfb_info.h
src/intel/vulkan/genX_pipeline.c

index 70f4690b09ee956cf16ce27a63a39d84fb266e93..002475953d96d39766efe7843c56f3272514fe40 100644 (file)
@@ -519,7 +519,7 @@ gather_xfb_info(const nir_shader *nir, struct radv_shader_info *info)
        }
 
        for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++) {
-               so->strides[i] = xfb->strides[i] / 4;
+               so->strides[i] = xfb->buffers[i].stride / 4;
        }
 
        ralloc_free(xfb);
index 2c50a256a64ab1c0e018114e56b2f7945536734c..12c5d4507e3bd6d84de3714406a3cd0b4d88ab38 100644 (file)
 
 #include <util/u_math.h>
 
+static nir_xfb_info *
+nir_gather_xfb_info_create(void *mem_ctx, uint16_t output_count, uint16_t varying_count)
+{
+   nir_xfb_info *xfb = rzalloc_size(mem_ctx, sizeof(nir_xfb_info));
+
+   xfb->varyings = rzalloc_size(xfb, sizeof(nir_xfb_varying_info) * varying_count);
+   xfb->outputs = rzalloc_size(xfb, sizeof(nir_xfb_output_info) * output_count);
+
+   return xfb;
+}
+
 static void
 add_var_xfb_outputs(nir_xfb_info *xfb,
                     nir_variable *var,
@@ -51,11 +62,11 @@ add_var_xfb_outputs(nir_xfb_info *xfb,
    } else {
       assert(buffer < NIR_MAX_XFB_BUFFERS);
       if (xfb->buffers_written & (1 << buffer)) {
-         assert(xfb->strides[buffer] == var->data.xfb_stride);
+         assert(xfb->buffers[buffer].stride == var->data.xfb_stride);
          assert(xfb->buffer_to_stream[buffer] == var->data.stream);
       } else {
          xfb->buffers_written |= (1 << buffer);
-         xfb->strides[buffer] = var->data.xfb_stride;
+         xfb->buffers[buffer].stride = var->data.xfb_stride;
          xfb->buffer_to_stream[buffer] = var->data.stream;
       }
 
@@ -88,6 +99,12 @@ add_var_xfb_outputs(nir_xfb_info *xfb,
       uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac;
       unsigned comp_offset = var->data.location_frac;
 
+      nir_xfb_varying_info *varying = &xfb->varyings[xfb->varying_count++];
+      varying->type = type;
+      varying->buffer = var->data.xfb_buffer;
+      varying->offset = *offset;
+      xfb->buffers[var->data.xfb_buffer].varying_count++;
+
       while (comp_mask) {
          nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++];
 
@@ -127,14 +144,17 @@ nir_gather_xfb_info(const nir_shader *shader, void *mem_ctx)
     * it should be good enough for allocation.
     */
    unsigned num_outputs = 0;
+   unsigned num_varyings = 0;
    nir_foreach_variable(var, &shader->outputs) {
-      if (var->data.explicit_xfb_buffer)
+      if (var->data.explicit_xfb_buffer) {
          num_outputs += glsl_count_attribute_slots(var->type, false);
+         num_varyings += glsl_varying_count(var->type);
+      }
    }
-   if (num_outputs == 0)
+   if (num_outputs == 0 || num_varyings == 0)
       return NULL;
 
-   nir_xfb_info *xfb = rzalloc_size(mem_ctx, nir_xfb_info_size(num_outputs));
+   nir_xfb_info *xfb = nir_gather_xfb_info_create(mem_ctx, num_outputs, num_varyings);
 
    /* Walk the list of outputs and add them to the array */
    nir_foreach_variable(var, &shader->outputs) {
index c6a171fde349db8a50b2ae97673b773c2d270bcc..f0b222b325d0940c2ecbaf850a5881f4a6423041 100644 (file)
 #define NIR_MAX_XFB_BUFFERS 4
 #define NIR_MAX_XFB_STREAMS 4
 
+typedef struct {
+   uint16_t stride;
+   uint16_t varying_count;
+} nir_xfb_buffer_info;
+
 typedef struct {
    uint8_t buffer;
    uint16_t offset;
@@ -37,17 +42,29 @@ typedef struct {
    uint8_t component_offset;
 } nir_xfb_output_info;
 
+typedef struct {
+   const struct glsl_type *type;
+   uint8_t buffer;
+   uint16_t offset;
+} nir_xfb_varying_info;
+
 typedef struct nir_xfb_info {
    uint8_t buffers_written;
    uint8_t streams_written;
 
-   uint16_t strides[NIR_MAX_XFB_BUFFERS];
+   nir_xfb_buffer_info buffers[NIR_MAX_XFB_BUFFERS];
    uint8_t buffer_to_stream[NIR_MAX_XFB_STREAMS];
 
+   uint16_t varying_count;
+   nir_xfb_varying_info *varyings;
+
    uint16_t output_count;
-   nir_xfb_output_info outputs[0];
+   nir_xfb_output_info *outputs;
 } nir_xfb_info;
 
+/* This method doesn't take into account varyings, as it is used to compute
+ * how much size is needed to copy only the outputs.
+ */
 static inline size_t
 nir_xfb_info_size(uint16_t output_count)
 {
index f5b667fa9475e8283860a983a1fbd95b5d8367e4..975052deb799d9785db86b139b0cadbf102c56d0 100644 (file)
@@ -1162,10 +1162,10 @@ emit_3dstate_streamout(struct anv_pipeline *pipeline,
          so.RenderStreamSelect = stream_info ?
                                  stream_info->rasterizationStream : 0;
 
-         so.Buffer0SurfacePitch = xfb_info->strides[0];
-         so.Buffer1SurfacePitch = xfb_info->strides[1];
-         so.Buffer2SurfacePitch = xfb_info->strides[2];
-         so.Buffer3SurfacePitch = xfb_info->strides[3];
+         so.Buffer0SurfacePitch = xfb_info->buffers[0].stride;
+         so.Buffer1SurfacePitch = xfb_info->buffers[1].stride;
+         so.Buffer2SurfacePitch = xfb_info->buffers[2].stride;
+         so.Buffer3SurfacePitch = xfb_info->buffers[3].stride;
 
          int urb_entry_read_offset = 0;
          int urb_entry_read_length =