src/compiler/nir/nir_gather_xfb_info.c

   1 /*
   2  * Copyright © 2018 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir_xfb_info.h"
  25
  26 #include <util/u_math.h>
  27
  28 static void
  29 add_var_xfb_varying(nir_xfb_info *xfb,
  30                     nir_xfb_varyings_info *varyings,
  31                     unsigned buffer,
  32                     unsigned offset,
  33                     const struct glsl_type *type)
  34 {
  35    if (varyings == NULL)
  36       return;
  37
  38    nir_xfb_varying_info *varying = &varyings->varyings[varyings->varying_count++];
  39
  40    varying->type = type;
  41    varying->buffer = buffer;
  42    varying->offset = offset;
  43    xfb->buffers[buffer].varying_count++;
  44 }
  45
  46
  47 static nir_xfb_info *
  48 nir_xfb_info_create(void *mem_ctx, uint16_t output_count)
  49 {
  50    return rzalloc_size(mem_ctx, nir_xfb_info_size(output_count));
  51 }
  52
  53 static size_t
  54 nir_xfb_varyings_info_size(uint16_t varying_count)
  55 {
  56    return sizeof(nir_xfb_info) + sizeof(nir_xfb_varying_info) * varying_count;
  57 }
  58
  59 static nir_xfb_varyings_info *
  60 nir_xfb_varyings_info_create(void *mem_ctx, uint16_t varying_count)
  61 {
  62    return rzalloc_size(mem_ctx, nir_xfb_varyings_info_size(varying_count));
  63 }
  64
  65 static void
  66 add_var_xfb_outputs(nir_xfb_info *xfb,
  67                     nir_xfb_varyings_info *varyings,
  68                     nir_variable *var,
  69                     unsigned buffer,
  70                     unsigned *location,
  71                     unsigned *offset,
  72                     const struct glsl_type *type,
  73                     bool varying_added)
  74 {
  75    /* If this type contains a 64-bit value, align to 8 bytes */
  76    if (glsl_type_contains_64bit(type))
  77       *offset = ALIGN_POT(*offset, 8);
  78
  79    if (glsl_type_is_array_or_matrix(type) && !var->data.compact) {
  80       unsigned length = glsl_get_length(type);
  81
  82       const struct glsl_type *child_type = glsl_get_array_element(type);
  83       if (!glsl_type_is_array(child_type) &&
  84           !glsl_type_is_struct(child_type)) {
  85
  86          add_var_xfb_varying(xfb, varyings, buffer, *offset, type);
  87          varying_added = true;
  88       }
  89
  90       for (unsigned i = 0; i < length; i++)
  91          add_var_xfb_outputs(xfb, varyings, var, buffer, location, offset,
  92                              child_type, varying_added);
  93    } else if (glsl_type_is_struct_or_ifc(type)) {
  94       unsigned length = glsl_get_length(type);
  95       for (unsigned i = 0; i < length; i++) {
  96          const struct glsl_type *child_type = glsl_get_struct_field(type, i);
  97          add_var_xfb_outputs(xfb, varyings, var, buffer, location, offset,
  98                              child_type, varying_added);
  99       }
 100    } else {
 101       assert(buffer < NIR_MAX_XFB_BUFFERS);
 102       if (xfb->buffers_written & (1 << buffer)) {
 103          assert(xfb->buffers[buffer].stride == var->data.xfb.stride);
 104          assert(xfb->buffer_to_stream[buffer] == var->data.stream);
 105       } else {
 106          xfb->buffers_written |= (1 << buffer);
 107          xfb->buffers[buffer].stride = var->data.xfb.stride;
 108          xfb->buffer_to_stream[buffer] = var->data.stream;
 109       }
 110
 111       assert(var->data.stream < NIR_MAX_XFB_STREAMS);
 112       xfb->streams_written |= (1 << var->data.stream);
 113
 114       unsigned comp_slots;
 115       if (var->data.compact) {
 116          /* This only happens for clip/cull which are float arrays */
 117          assert(glsl_without_array(type) == glsl_float_type());
 118          assert(var->data.location == VARYING_SLOT_CLIP_DIST0 ||
 119                 var->data.location == VARYING_SLOT_CLIP_DIST1);
 120          comp_slots = glsl_get_length(type);
 121       } else {
 122          comp_slots = glsl_get_component_slots(type);
 123
 124          UNUSED unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
 125          assert(attrib_slots == glsl_count_attribute_slots(type, false));
 126
 127          /* Ensure that we don't have, for instance, a dvec2 with a
 128           * location_frac of 2 which would make it crass a location boundary
 129           * even though it fits in a single slot.  However, you can have a
 130           * dvec3 which crosses the slot boundary with a location_frac of 2.
 131           */
 132          assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) ==
 133                 attrib_slots);
 134       }
 135
 136       assert(var->data.location_frac + comp_slots <= 8);
 137       uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac;
 138       unsigned comp_offset = var->data.location_frac;
 139
 140       if (!varying_added) {
 141          add_var_xfb_varying(xfb, varyings, buffer, *offset, type);
 142       }
 143
 144       while (comp_mask) {
 145          nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++];
 146
 147          output->buffer = buffer;
 148          output->offset = *offset;
 149          output->location = *location;
 150          output->component_mask = comp_mask & 0xf;
 151          output->component_offset = comp_offset;
 152
 153          *offset += util_bitcount(output->component_mask) * 4;
 154          (*location)++;
 155          comp_mask >>= 4;
 156          comp_offset = 0;
 157       }
 158    }
 159 }
 160
 161 static int
 162 compare_xfb_varying_offsets(const void *_a, const void *_b)
 163 {
 164    const nir_xfb_varying_info *a = _a, *b = _b;
 165
 166    if (a->buffer != b->buffer)
 167       return a->buffer - b->buffer;
 168
 169    return a->offset - b->offset;
 170 }
 171
 172 static int
 173 compare_xfb_output_offsets(const void *_a, const void *_b)
 174 {
 175    const nir_xfb_output_info *a = _a, *b = _b;
 176
 177    return a->offset - b->offset;
 178 }
 179
 180 nir_xfb_info *
 181 nir_gather_xfb_info(const nir_shader *shader, void *mem_ctx)
 182 {
 183    return nir_gather_xfb_info_with_varyings(shader, mem_ctx, NULL);
 184 }
 185
 186 nir_xfb_info *
 187 nir_gather_xfb_info_with_varyings(const nir_shader *shader,
 188                                   void *mem_ctx,
 189                                   nir_xfb_varyings_info **varyings_info_out)
 190 {
 191    assert(shader->info.stage == MESA_SHADER_VERTEX ||
 192           shader->info.stage == MESA_SHADER_TESS_EVAL ||
 193           shader->info.stage == MESA_SHADER_GEOMETRY);
 194
 195    /* Compute the number of outputs we have.  This is simply the number of
 196     * cumulative locations consumed by all the variables.  If a location is
 197     * represented by multiple variables, then they each count separately in
 198     * number of outputs.  This is only an estimate as some variables may have
 199     * an xfb_buffer but not an output so it may end up larger than we need but
 200     * it should be good enough for allocation.
 201     */
 202    unsigned num_outputs = 0;
 203    unsigned num_varyings = 0;
 204    nir_xfb_varyings_info *varyings_info = NULL;
 205    nir_foreach_variable(var, &shader->outputs) {
 206       if (var->data.explicit_xfb_buffer) {
 207          num_outputs += glsl_count_attribute_slots(var->type, false);
 208          num_varyings += glsl_varying_count(var->type);
 209       }
 210    }
 211    if (num_outputs == 0 || num_varyings == 0)
 212       return NULL;
 213
 214    nir_xfb_info *xfb = nir_xfb_info_create(mem_ctx, num_outputs);
 215    if (varyings_info_out != NULL) {
 216       *varyings_info_out = nir_xfb_varyings_info_create(mem_ctx, num_varyings);
 217       varyings_info = *varyings_info_out;
 218    }
 219
 220    /* Walk the list of outputs and add them to the array */
 221    nir_foreach_variable(var, &shader->outputs) {
 222       if (!var->data.explicit_xfb_buffer)
 223          continue;
 224
 225       unsigned location = var->data.location;
 226
 227       /* In order to know if we have a array of blocks can't be done just by
 228        * checking if we have an interface type and is an array, because due
 229        * splitting we could end on a case were we received a split struct
 230        * that contains an array.
 231        */
 232       bool is_array_block = var->interface_type != NULL &&
 233          glsl_type_is_array(var->type) &&
 234          glsl_without_array(var->type) == var->interface_type;
 235
 236       if (var->data.explicit_offset && !is_array_block) {
 237          unsigned offset = var->data.offset;
 238          add_var_xfb_outputs(xfb, varyings_info, var, var->data.xfb.buffer,
 239                              &location, &offset, var->type, false);
 240       } else if (is_array_block) {
 241          assert(glsl_type_is_struct_or_ifc(var->interface_type));
 242
 243          unsigned aoa_size = glsl_get_aoa_size(var->type);
 244          const struct glsl_type *itype = var->interface_type;
 245          unsigned nfields = glsl_get_length(itype);
 246          for (unsigned b = 0; b < aoa_size; b++) {
 247             for (unsigned f = 0; f < nfields; f++) {
 248                int foffset = glsl_get_struct_field_offset(itype, f);
 249                const struct glsl_type *ftype = glsl_get_struct_field(itype, f);
 250                if (foffset < 0) {
 251                   location += glsl_count_attribute_slots(ftype, false);
 252                   continue;
 253                }
 254
 255                unsigned offset = foffset;
 256                add_var_xfb_outputs(xfb, varyings_info, var, var->data.xfb.buffer + b,
 257                                    &location, &offset, ftype, false);
 258             }
 259          }
 260       }
 261    }
 262
 263    /* Everything is easier in the state setup code if outputs and varyings are
 264     * sorted in order of output offset (and buffer for varyings).
 265     */
 266    qsort(xfb->outputs, xfb->output_count, sizeof(xfb->outputs[0]),
 267          compare_xfb_output_offsets);
 268
 269    if (varyings_info != NULL) {
 270       qsort(varyings_info->varyings, varyings_info->varying_count,
 271             sizeof(varyings_info->varyings[0]),
 272             compare_xfb_varying_offsets);
 273    }
 274
 275 #ifndef NDEBUG
 276    /* Finally, do a sanity check */
 277    unsigned max_offset[NIR_MAX_XFB_BUFFERS] = {0};
 278    for (unsigned i = 0; i < xfb->output_count; i++) {
 279       assert(xfb->outputs[i].offset >= max_offset[xfb->outputs[i].buffer]);
 280       assert(xfb->outputs[i].component_mask != 0);
 281       unsigned slots = util_bitcount(xfb->outputs[i].component_mask);
 282       max_offset[xfb->outputs[i].buffer] = xfb->outputs[i].offset + slots * 4;
 283    }
 284 #endif
 285
 286    return xfb;
 287 }