intel: Rewrite the world of push/pull params
[mesa.git] / src / intel / vulkan / anv_cmd_buffer.c
index e82cfd298cb7a9fdfbfa7a885eaa26235c03d4b0..64d1417f5b1ab6571eb34c6f38f6f480b8f8ad06 100644 (file)
@@ -117,8 +117,17 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)
 {
    struct anv_cmd_state *state = &cmd_buffer->state;
 
+   cmd_buffer->batch.status = VK_SUCCESS;
+
    memset(&state->descriptors, 0, sizeof(state->descriptors));
-   memset(&state->push_constants, 0, sizeof(state->push_constants));
+   for (uint32_t i = 0; i < ARRAY_SIZE(state->push_descriptors); i++) {
+      vk_free(&cmd_buffer->pool->alloc, state->push_descriptors[i]);
+      state->push_descriptors[i] = NULL;
+   }
+   for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
+      vk_free(&cmd_buffer->pool->alloc, state->push_constants[i]);
+      state->push_constants[i] = NULL;
+   }
    memset(state->binding_tables, 0, sizeof(state->binding_tables));
    memset(state->samplers, 0, sizeof(state->samplers));
 
@@ -131,6 +140,9 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)
    state->descriptors_dirty = 0;
    state->push_constants_dirty = 0;
    state->pipeline = NULL;
+   state->framebuffer = NULL;
+   state->pass = NULL;
+   state->subpass = NULL;
    state->push_constant_stages = 0;
    state->restart_index = UINT32_MAX;
    state->dynamic = default_dynamic_state;
@@ -138,10 +150,8 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)
    state->pma_fix_enabled = false;
    state->hiz_enabled = false;
 
-   if (state->attachments != NULL) {
-      vk_free(&cmd_buffer->pool->alloc, state->attachments);
-      state->attachments = NULL;
-   }
+   vk_free(&cmd_buffer->pool->alloc, state->attachments);
+   state->attachments = NULL;
 
    state->gen7.index_buffer = NULL;
 }
@@ -155,13 +165,17 @@ anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer,
    if (*ptr == NULL) {
       *ptr = vk_alloc(&cmd_buffer->pool->alloc, size, 8,
                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-      if (*ptr == NULL)
+      if (*ptr == NULL) {
+         anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
    } else if ((*ptr)->size < size) {
       *ptr = vk_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8,
                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-      if (*ptr == NULL)
+      if (*ptr == NULL) {
+         anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
    }
    (*ptr)->size = size;
 
@@ -182,6 +196,11 @@ static VkResult anv_create_cmd_buffer(
    if (cmd_buffer == NULL)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
+   cmd_buffer->batch.status = VK_SUCCESS;
+
+   for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
+      cmd_buffer->state.push_constants[i] = NULL;
+   }
    cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
    cmd_buffer->device = device;
    cmd_buffer->pool = pool;
@@ -193,9 +212,12 @@ static VkResult anv_create_cmd_buffer(
       goto fail;
 
    anv_state_stream_init(&cmd_buffer->surface_state_stream,
-                         &device->surface_state_block_pool);
+                         &device->surface_state_pool, 4096);
    anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
-                         &device->dynamic_state_block_pool);
+                         &device->dynamic_state_pool, 16384);
+
+   memset(cmd_buffer->state.push_descriptors, 0,
+          sizeof(cmd_buffer->state.push_descriptors));
 
    if (pool) {
       list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
@@ -254,7 +276,8 @@ anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer)
    anv_state_stream_finish(&cmd_buffer->surface_state_stream);
    anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
 
-   vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
+   anv_cmd_state_reset(cmd_buffer);
+
    vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
 }
 
@@ -284,11 +307,11 @@ anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer)
 
    anv_state_stream_finish(&cmd_buffer->surface_state_stream);
    anv_state_stream_init(&cmd_buffer->surface_state_stream,
-                         &cmd_buffer->device->surface_state_block_pool);
+                         &cmd_buffer->device->surface_state_pool, 4096);
 
    anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
    anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
-                         &cmd_buffer->device->dynamic_state_block_pool);
+                         &cmd_buffer->device->dynamic_state_pool, 16384);
    return VK_SUCCESS;
 }
 
@@ -313,6 +336,8 @@ anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
       return gen8_cmd_buffer_emit_state_base_address(cmd_buffer);
    case 9:
       return gen9_cmd_buffer_emit_state_base_address(cmd_buffer);
+   case 10:
+      return gen10_cmd_buffer_emit_state_base_address(cmd_buffer);
    default:
       unreachable("unsupported gen\n");
    }
@@ -501,42 +526,31 @@ void anv_CmdBindDescriptorSets(
 
    assert(firstSet + descriptorSetCount < MAX_SETS);
 
+   uint32_t dynamic_slot = 0;
    for (uint32_t i = 0; i < descriptorSetCount; i++) {
       ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]);
       set_layout = layout->set[firstSet + i].layout;
 
-      if (cmd_buffer->state.descriptors[firstSet + i] != set) {
-         cmd_buffer->state.descriptors[firstSet + i] = set;
-         cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages;
-      }
+      cmd_buffer->state.descriptors[firstSet + i] = set;
 
       if (set_layout->dynamic_offset_count > 0) {
-         anv_foreach_stage(s, set_layout->shader_stages) {
-            anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic);
-
-            struct anv_push_constants *push =
-               cmd_buffer->state.push_constants[s];
-
-            unsigned d = layout->set[firstSet + i].dynamic_offset_start;
-            const uint32_t *offsets = pDynamicOffsets;
-            struct anv_descriptor *desc = set->descriptors;
-
-            for (unsigned b = 0; b < set_layout->binding_count; b++) {
-               if (set_layout->binding[b].dynamic_offset_index < 0)
-                  continue;
-
-               unsigned array_size = set_layout->binding[b].array_size;
-               for (unsigned j = 0; j < array_size; j++) {
-                  push->dynamic[d].offset = *(offsets++);
-                  push->dynamic[d].range = (desc->buffer_view) ?
-                                            desc->buffer_view->range : 0;
-                  desc++;
-                  d++;
-               }
-            }
-         }
-         cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages;
+         uint32_t dynamic_offset_start =
+            layout->set[firstSet + i].dynamic_offset_start;
+
+         /* Assert that everything is in range */
+         assert(dynamic_offset_start + set_layout->dynamic_offset_count <=
+                ARRAY_SIZE(cmd_buffer->state.dynamic_offsets));
+         assert(dynamic_slot + set_layout->dynamic_offset_count <=
+                dynamicOffsetCount);
+
+         typed_memcpy(&cmd_buffer->state.dynamic_offsets[dynamic_offset_start],
+                      &pDynamicOffsets[dynamic_slot],
+                      set_layout->dynamic_offset_count);
+
+         dynamic_slot += set_layout->dynamic_offset_count;
       }
+
+      cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages;
    }
 }
 
@@ -553,7 +567,7 @@ void anv_CmdBindVertexBuffers(
    /* We have to defer setting up vertex buffer since we need the buffer
     * stride from the pipeline. */
 
-   assert(firstBinding + bindingCount < MAX_VBS);
+   assert(firstBinding + bindingCount <= MAX_VBS);
    for (uint32_t i = 0; i < bindingCount; i++) {
       vb[firstBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]);
       vb[firstBinding + i].offset = pOffsets[i];
@@ -587,8 +601,7 @@ anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
    state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
    memcpy(state.map, data, size);
 
-   if (!cmd_buffer->device->info.has_llc)
-      anv_state_flush(state);
+   anv_state_flush(cmd_buffer->device, state);
 
    VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size));
 
@@ -609,14 +622,33 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
    for (uint32_t i = 0; i < dwords; i++)
       p[i] = a[i] | b[i];
 
-   if (!cmd_buffer->device->info.has_llc)
-      anv_state_flush(state);
+   anv_state_flush(cmd_buffer->device, state);
 
    VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
 
    return state;
 }
 
+static uint32_t
+anv_push_constant_value(struct anv_push_constants *data, uint32_t param)
+{
+   if (BRW_PARAM_IS_BUILTIN(param)) {
+      switch (param) {
+      case BRW_PARAM_BUILTIN_ZERO:
+         return 0;
+      default:
+         unreachable("Invalid param builtin");
+      }
+   } else {
+      uint32_t offset = ANV_PARAM_PUSH_OFFSET(param);
+      assert(offset % sizeof(uint32_t) == 0);
+      if (offset < data->size)
+         return *(uint32_t *)((uint8_t *)data + offset);
+      else
+         return 0;
+   }
+}
+
 struct anv_state
 anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
                               gl_shader_stage stage)
@@ -641,13 +673,10 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
 
    /* Walk through the param array and fill the buffer with data */
    uint32_t *u32_map = state.map;
-   for (unsigned i = 0; i < prog_data->nr_params; i++) {
-      uint32_t offset = (uintptr_t)prog_data->param[i];
-      u32_map[i] = *(uint32_t *)((uint8_t *)data + offset);
-   }
+   for (unsigned i = 0; i < prog_data->nr_params; i++)
+      u32_map[i] = anv_push_constant_value(data, prog_data->param[i]);
 
-   if (!cmd_buffer->device->info.has_llc)
-      anv_state_flush(state);
+   anv_state_flush(cmd_buffer->device, state);
 
    return state;
 }
@@ -684,8 +713,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
       for (unsigned i = 0;
            i < cs_prog_data->push.cross_thread.dwords;
            i++) {
-         uint32_t offset = (uintptr_t)prog_data->param[i];
-         u32_map[i] = *(uint32_t *)((uint8_t *)data + offset);
+         u32_map[i] = anv_push_constant_value(data, prog_data->param[i]);
       }
    }
 
@@ -697,8 +725,8 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
          unsigned src = cs_prog_data->push.cross_thread.dwords;
          for ( ; src < prog_data->nr_params; src++, dst++) {
             if (src != cs_prog_data->thread_local_id_index) {
-               uint32_t offset = (uintptr_t)prog_data->param[src];
-               u32_map[dst] = *(uint32_t *)((uint8_t *)data + offset);
+               u32_map[dst] =
+                  anv_push_constant_value(data, prog_data->param[src]);
             } else {
                u32_map[dst] = t * cs_prog_data->simd_size;
             }
@@ -706,8 +734,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
       }
    }
 
-   if (!cmd_buffer->device->info.has_llc)
-      anv_state_flush(state);
+   anv_state_flush(cmd_buffer->device, state);
 
    return state;
 }
@@ -723,7 +750,11 @@ void anv_CmdPushConstants(
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
    anv_foreach_stage(stage, stageFlags) {
-      anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data);
+      VkResult result =
+         anv_cmd_buffer_ensure_push_constant_field(cmd_buffer,
+                                                   stage, client_data);
+      if (result != VK_SUCCESS)
+         return;
 
       memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset,
              pValues, size);
@@ -809,14 +840,164 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer)
    const struct anv_subpass *subpass = cmd_buffer->state.subpass;
    const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
 
-   if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED)
+   if (subpass->depth_stencil_attachment.attachment == VK_ATTACHMENT_UNUSED)
       return NULL;
 
    const struct anv_image_view *iview =
-      fb->attachments[subpass->depth_stencil_attachment];
+      fb->attachments[subpass->depth_stencil_attachment.attachment];
 
    assert(iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT |
                                 VK_IMAGE_ASPECT_STENCIL_BIT));
 
    return iview;
 }
+
+static VkResult
+anv_cmd_buffer_ensure_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
+                                          uint32_t set)
+{
+   struct anv_push_descriptor_set **push_set =
+      &cmd_buffer->state.push_descriptors[set];
+
+   if (*push_set == NULL) {
+      *push_set = vk_alloc(&cmd_buffer->pool->alloc,
+                           sizeof(struct anv_push_descriptor_set), 8,
+                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+      if (*push_set == NULL) {
+         anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
+   }
+
+   return VK_SUCCESS;
+}
+
+void anv_CmdPushDescriptorSetKHR(
+    VkCommandBuffer commandBuffer,
+    VkPipelineBindPoint pipelineBindPoint,
+    VkPipelineLayout _layout,
+    uint32_t _set,
+    uint32_t descriptorWriteCount,
+    const VkWriteDescriptorSet* pDescriptorWrites)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout);
+
+   assert(pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS ||
+          pipelineBindPoint == VK_PIPELINE_BIND_POINT_COMPUTE);
+   assert(_set < MAX_SETS);
+
+   const struct anv_descriptor_set_layout *set_layout =
+      layout->set[_set].layout;
+
+   if (anv_cmd_buffer_ensure_push_descriptor_set(cmd_buffer, _set) != VK_SUCCESS)
+      return;
+   struct anv_push_descriptor_set *push_set =
+      cmd_buffer->state.push_descriptors[_set];
+   struct anv_descriptor_set *set = &push_set->set;
+
+   set->layout = set_layout;
+   set->size = anv_descriptor_set_layout_size(set_layout);
+   set->buffer_count = set_layout->buffer_count;
+   set->buffer_views = push_set->buffer_views;
+
+   /* Go through the user supplied descriptors. */
+   for (uint32_t i = 0; i < descriptorWriteCount; i++) {
+      const VkWriteDescriptorSet *write = &pDescriptorWrites[i];
+
+      switch (write->descriptorType) {
+      case VK_DESCRIPTOR_TYPE_SAMPLER:
+      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+      case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+      case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+         for (uint32_t j = 0; j < write->descriptorCount; j++) {
+            anv_descriptor_set_write_image_view(set, &cmd_buffer->device->info,
+                                                write->pImageInfo + j,
+                                                write->descriptorType,
+                                                write->dstBinding,
+                                                write->dstArrayElement + j);
+         }
+         break;
+
+      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+         for (uint32_t j = 0; j < write->descriptorCount; j++) {
+            ANV_FROM_HANDLE(anv_buffer_view, bview,
+                            write->pTexelBufferView[j]);
+
+            anv_descriptor_set_write_buffer_view(set,
+                                                 write->descriptorType,
+                                                 bview,
+                                                 write->dstBinding,
+                                                 write->dstArrayElement + j);
+         }
+         break;
+
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+         for (uint32_t j = 0; j < write->descriptorCount; j++) {
+            assert(write->pBufferInfo[j].buffer);
+            ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer);
+            assert(buffer);
+
+            anv_descriptor_set_write_buffer(set,
+                                            cmd_buffer->device,
+                                            &cmd_buffer->surface_state_stream,
+                                            write->descriptorType,
+                                            buffer,
+                                            write->dstBinding,
+                                            write->dstArrayElement + j,
+                                            write->pBufferInfo[j].offset,
+                                            write->pBufferInfo[j].range);
+         }
+         break;
+
+      default:
+         break;
+      }
+   }
+
+   cmd_buffer->state.descriptors[_set] = set;
+   cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages;
+}
+
+void anv_CmdPushDescriptorSetWithTemplateKHR(
+    VkCommandBuffer                             commandBuffer,
+    VkDescriptorUpdateTemplateKHR               descriptorUpdateTemplate,
+    VkPipelineLayout                            _layout,
+    uint32_t                                    _set,
+    const void*                                 pData)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_descriptor_update_template, template,
+                   descriptorUpdateTemplate);
+   ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout);
+
+   assert(_set < MAX_PUSH_DESCRIPTORS);
+
+   const struct anv_descriptor_set_layout *set_layout =
+      layout->set[_set].layout;
+
+   if (anv_cmd_buffer_ensure_push_descriptor_set(cmd_buffer, _set) != VK_SUCCESS)
+      return;
+   struct anv_push_descriptor_set *push_set =
+      cmd_buffer->state.push_descriptors[_set];
+   struct anv_descriptor_set *set = &push_set->set;
+
+   set->layout = set_layout;
+   set->size = anv_descriptor_set_layout_size(set_layout);
+   set->buffer_count = set_layout->buffer_count;
+   set->buffer_views = push_set->buffer_views;
+
+   anv_descriptor_set_write_template(set,
+                                     cmd_buffer->device,
+                                     &cmd_buffer->surface_state_stream,
+                                     template,
+                                     pData);
+
+   cmd_buffer->state.descriptors[_set] = set;
+   cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages;
+}