vk/0.132: Add vkDestroyFence()
[mesa.git] / src / vulkan / device.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "private.h"
31 #include "mesa/main/git_sha1.h"
32
33 static int
34 anv_env_get_int(const char *name)
35 {
36 const char *val = getenv(name);
37
38 if (!val)
39 return 0;
40
41 return strtol(val, NULL, 0);
42 }
43
44 static void
45 anv_physical_device_finish(struct anv_physical_device *device)
46 {
47 if (device->fd >= 0)
48 close(device->fd);
49 }
50
51 static VkResult
52 anv_physical_device_init(struct anv_physical_device *device,
53 struct anv_instance *instance,
54 const char *path)
55 {
56 device->fd = open(path, O_RDWR | O_CLOEXEC);
57 if (device->fd < 0)
58 return vk_error(VK_ERROR_UNAVAILABLE);
59
60 device->instance = instance;
61 device->path = path;
62
63 device->chipset_id = anv_env_get_int("INTEL_DEVID_OVERRIDE");
64 device->no_hw = false;
65 if (device->chipset_id) {
66 /* INTEL_DEVID_OVERRIDE implies INTEL_NO_HW. */
67 device->no_hw = true;
68 } else {
69 device->chipset_id = anv_gem_get_param(device->fd, I915_PARAM_CHIPSET_ID);
70 }
71 if (!device->chipset_id)
72 goto fail;
73
74 device->name = brw_get_device_name(device->chipset_id);
75 device->info = brw_get_device_info(device->chipset_id, -1);
76 if (!device->info)
77 goto fail;
78
79 if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_WAIT_TIMEOUT))
80 goto fail;
81
82 if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_EXECBUF2))
83 goto fail;
84
85 if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_LLC))
86 goto fail;
87
88 if (!anv_gem_get_param(device->fd, I915_PARAM_HAS_EXEC_CONSTANTS))
89 goto fail;
90
91 return VK_SUCCESS;
92
93 fail:
94 anv_physical_device_finish(device);
95 return vk_error(VK_ERROR_UNAVAILABLE);
96 }
97
98 static void *default_alloc(
99 void* pUserData,
100 size_t size,
101 size_t alignment,
102 VkSystemAllocType allocType)
103 {
104 return malloc(size);
105 }
106
107 static void default_free(
108 void* pUserData,
109 void* pMem)
110 {
111 free(pMem);
112 }
113
114 static const VkAllocCallbacks default_alloc_callbacks = {
115 .pUserData = NULL,
116 .pfnAlloc = default_alloc,
117 .pfnFree = default_free
118 };
119
120 VkResult anv_CreateInstance(
121 const VkInstanceCreateInfo* pCreateInfo,
122 VkInstance* pInstance)
123 {
124 struct anv_instance *instance;
125 const VkAllocCallbacks *alloc_callbacks = &default_alloc_callbacks;
126 void *user_data = NULL;
127
128 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
129
130 if (pCreateInfo->pAllocCb) {
131 alloc_callbacks = pCreateInfo->pAllocCb;
132 user_data = pCreateInfo->pAllocCb->pUserData;
133 }
134 instance = alloc_callbacks->pfnAlloc(user_data, sizeof(*instance), 8,
135 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
136 if (!instance)
137 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
138
139 instance->pAllocUserData = alloc_callbacks->pUserData;
140 instance->pfnAlloc = alloc_callbacks->pfnAlloc;
141 instance->pfnFree = alloc_callbacks->pfnFree;
142 instance->apiVersion = pCreateInfo->pAppInfo->apiVersion;
143 instance->physicalDeviceCount = 0;
144
145 *pInstance = anv_instance_to_handle(instance);
146
147 return VK_SUCCESS;
148 }
149
150 VkResult anv_DestroyInstance(
151 VkInstance _instance)
152 {
153 ANV_FROM_HANDLE(anv_instance, instance, _instance);
154
155 if (instance->physicalDeviceCount > 0) {
156 anv_physical_device_finish(&instance->physicalDevice);
157 }
158
159 instance->pfnFree(instance->pAllocUserData, instance);
160
161 return VK_SUCCESS;
162 }
163
164 VkResult anv_EnumeratePhysicalDevices(
165 VkInstance _instance,
166 uint32_t* pPhysicalDeviceCount,
167 VkPhysicalDevice* pPhysicalDevices)
168 {
169 ANV_FROM_HANDLE(anv_instance, instance, _instance);
170 VkResult result;
171
172 if (instance->physicalDeviceCount == 0) {
173 result = anv_physical_device_init(&instance->physicalDevice,
174 instance, "/dev/dri/renderD128");
175 if (result != VK_SUCCESS)
176 return result;
177
178 instance->physicalDeviceCount = 1;
179 }
180
181 /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL;
182 * otherwise it's an inout parameter.
183 *
184 * The Vulkan spec (git aaed022) says:
185 *
186 * pPhysicalDeviceCount is a pointer to an unsigned integer variable
187 * that is initialized with the number of devices the application is
188 * prepared to receive handles to. pname:pPhysicalDevices is pointer to
189 * an array of at least this many VkPhysicalDevice handles [...].
190 *
191 * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices
192 * overwrites the contents of the variable pointed to by
193 * pPhysicalDeviceCount with the number of physical devices in in the
194 * instance; otherwise, vkEnumeratePhysicalDevices overwrites
195 * pPhysicalDeviceCount with the number of physical handles written to
196 * pPhysicalDevices.
197 */
198 if (!pPhysicalDevices) {
199 *pPhysicalDeviceCount = instance->physicalDeviceCount;
200 } else if (*pPhysicalDeviceCount >= 1) {
201 pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice);
202 *pPhysicalDeviceCount = 1;
203 } else {
204 *pPhysicalDeviceCount = 0;
205 }
206
207 return VK_SUCCESS;
208 }
209
210 VkResult anv_GetPhysicalDeviceFeatures(
211 VkPhysicalDevice physicalDevice,
212 VkPhysicalDeviceFeatures* pFeatures)
213 {
214 anv_finishme("Get correct values for PhysicalDeviceFeatures");
215
216 *pFeatures = (VkPhysicalDeviceFeatures) {
217 .robustBufferAccess = false,
218 .fullDrawIndexUint32 = false,
219 .imageCubeArray = false,
220 .independentBlend = false,
221 .geometryShader = true,
222 .tessellationShader = false,
223 .sampleRateShading = false,
224 .dualSourceBlend = true,
225 .logicOp = true,
226 .instancedDrawIndirect = true,
227 .depthClip = false,
228 .depthBiasClamp = false,
229 .fillModeNonSolid = true,
230 .depthBounds = false,
231 .wideLines = true,
232 .largePoints = true,
233 .textureCompressionETC2 = true,
234 .textureCompressionASTC_LDR = true,
235 .textureCompressionBC = true,
236 .pipelineStatisticsQuery = true,
237 .vertexSideEffects = false,
238 .tessellationSideEffects = false,
239 .geometrySideEffects = false,
240 .fragmentSideEffects = false,
241 .shaderTessellationPointSize = false,
242 .shaderGeometryPointSize = true,
243 .shaderTextureGatherExtended = true,
244 .shaderStorageImageExtendedFormats = false,
245 .shaderStorageImageMultisample = false,
246 .shaderStorageBufferArrayConstantIndexing = false,
247 .shaderStorageImageArrayConstantIndexing = false,
248 .shaderUniformBufferArrayDynamicIndexing = true,
249 .shaderSampledImageArrayDynamicIndexing = false,
250 .shaderStorageBufferArrayDynamicIndexing = false,
251 .shaderStorageImageArrayDynamicIndexing = false,
252 .shaderClipDistance = false,
253 .shaderCullDistance = false,
254 .shaderFloat64 = false,
255 .shaderInt64 = false,
256 .shaderFloat16 = false,
257 .shaderInt16 = false,
258 };
259
260 return VK_SUCCESS;
261 }
262
263 VkResult anv_GetPhysicalDeviceLimits(
264 VkPhysicalDevice physicalDevice,
265 VkPhysicalDeviceLimits* pLimits)
266 {
267 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
268 const struct brw_device_info *devinfo = physical_device->info;
269
270 anv_finishme("Get correct values for PhysicalDeviceLimits");
271
272 *pLimits = (VkPhysicalDeviceLimits) {
273 .maxImageDimension1D = (1 << 14),
274 .maxImageDimension2D = (1 << 14),
275 .maxImageDimension3D = (1 << 10),
276 .maxImageDimensionCube = (1 << 14),
277 .maxImageArrayLayers = (1 << 10),
278 .maxTexelBufferSize = (1 << 14),
279 .maxUniformBufferSize = UINT32_MAX,
280 .maxStorageBufferSize = UINT32_MAX,
281 .maxPushConstantsSize = 128,
282 .maxMemoryAllocationCount = UINT32_MAX,
283 .maxBoundDescriptorSets = MAX_SETS,
284 .maxDescriptorSets = UINT32_MAX,
285 .maxPerStageDescriptorSamplers = 64,
286 .maxPerStageDescriptorUniformBuffers = 64,
287 .maxPerStageDescriptorStorageBuffers = 64,
288 .maxPerStageDescriptorSampledImages = 64,
289 .maxPerStageDescriptorStorageImages = 64,
290 .maxDescriptorSetSamplers = 256,
291 .maxDescriptorSetUniformBuffers = 256,
292 .maxDescriptorSetStorageBuffers = 256,
293 .maxDescriptorSetSampledImages = 256,
294 .maxDescriptorSetStorageImages = 256,
295 .maxVertexInputAttributes = 32,
296 .maxVertexInputAttributeOffset = 256,
297 .maxVertexInputBindingStride = 256,
298 .maxVertexOutputComponents = 32,
299 .maxTessGenLevel = 0,
300 .maxTessPatchSize = 0,
301 .maxTessControlPerVertexInputComponents = 0,
302 .maxTessControlPerVertexOutputComponents = 0,
303 .maxTessControlPerPatchOutputComponents = 0,
304 .maxTessControlTotalOutputComponents = 0,
305 .maxTessEvaluationInputComponents = 0,
306 .maxTessEvaluationOutputComponents = 0,
307 .maxGeometryShaderInvocations = 6,
308 .maxGeometryInputComponents = 16,
309 .maxGeometryOutputComponents = 16,
310 .maxGeometryOutputVertices = 16,
311 .maxGeometryTotalOutputComponents = 16,
312 .maxFragmentInputComponents = 16,
313 .maxFragmentOutputBuffers = 8,
314 .maxFragmentDualSourceBuffers = 2,
315 .maxFragmentCombinedOutputResources = 8,
316 .maxComputeSharedMemorySize = 1024,
317 .maxComputeWorkGroupCount = {
318 16 * devinfo->max_cs_threads,
319 16 * devinfo->max_cs_threads,
320 16 * devinfo->max_cs_threads,
321 },
322 .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads,
323 .maxComputeWorkGroupSize = {
324 16 * devinfo->max_cs_threads,
325 16 * devinfo->max_cs_threads,
326 16 * devinfo->max_cs_threads,
327 },
328 .subPixelPrecisionBits = 4 /* FIXME */,
329 .subTexelPrecisionBits = 4 /* FIXME */,
330 .mipmapPrecisionBits = 4 /* FIXME */,
331 .maxDrawIndexedIndexValue = UINT32_MAX,
332 .maxDrawIndirectInstanceCount = UINT32_MAX,
333 .primitiveRestartForPatches = UINT32_MAX,
334 .maxSamplerLodBias = 16,
335 .maxSamplerAnisotropy = 16,
336 .maxViewports = 16,
337 .maxDynamicViewportStates = UINT32_MAX,
338 .maxViewportDimensions = { (1 << 14), (1 << 14) },
339 .viewportBoundsRange = { -1.0, 1.0 }, /* FIXME */
340 .viewportSubPixelBits = 13, /* We take a float? */
341 .minMemoryMapAlignment = 64, /* A cache line */
342 .minTexelBufferOffsetAlignment = 1,
343 .minUniformBufferOffsetAlignment = 1,
344 .minStorageBufferOffsetAlignment = 1,
345 .minTexelOffset = 0, /* FIXME */
346 .maxTexelOffset = 0, /* FIXME */
347 .minTexelGatherOffset = 0, /* FIXME */
348 .maxTexelGatherOffset = 0, /* FIXME */
349 .minInterpolationOffset = 0, /* FIXME */
350 .maxInterpolationOffset = 0, /* FIXME */
351 .subPixelInterpolationOffsetBits = 0, /* FIXME */
352 .maxFramebufferWidth = (1 << 14),
353 .maxFramebufferHeight = (1 << 14),
354 .maxFramebufferLayers = (1 << 10),
355 .maxFramebufferColorSamples = 8,
356 .maxFramebufferDepthSamples = 8,
357 .maxFramebufferStencilSamples = 8,
358 .maxColorAttachments = MAX_RTS,
359 .maxSampledImageColorSamples = 8,
360 .maxSampledImageDepthSamples = 8,
361 .maxSampledImageIntegerSamples = 1,
362 .maxStorageImageSamples = 1,
363 .maxSampleMaskWords = 1,
364 .timestampFrequency = 1000 * 1000 * 1000 / 80,
365 .maxClipDistances = 0 /* FIXME */,
366 .maxCullDistances = 0 /* FIXME */,
367 .maxCombinedClipAndCullDistances = 0 /* FIXME */,
368 .pointSizeRange = { 0.125, 255.875 },
369 .lineWidthRange = { 0.0, 7.9921875 },
370 .pointSizeGranularity = (1.0 / 8.0),
371 .lineWidthGranularity = (1.0 / 128.0),
372 };
373
374 return VK_SUCCESS;
375 }
376
377 VkResult anv_GetPhysicalDeviceProperties(
378 VkPhysicalDevice physicalDevice,
379 VkPhysicalDeviceProperties* pProperties)
380 {
381 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
382
383 *pProperties = (VkPhysicalDeviceProperties) {
384 .apiVersion = 1,
385 .driverVersion = 1,
386 .vendorId = 0x8086,
387 .deviceId = pdevice->chipset_id,
388 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
389 };
390
391 strcpy(pProperties->deviceName, pdevice->name);
392 snprintf((char *)pProperties->pipelineCacheUUID, VK_UUID_LENGTH,
393 "anv-%s", MESA_GIT_SHA1 + 4);
394
395 return VK_SUCCESS;
396 }
397
398 VkResult anv_GetPhysicalDeviceQueueCount(
399 VkPhysicalDevice physicalDevice,
400 uint32_t* pCount)
401 {
402 *pCount = 1;
403
404 return VK_SUCCESS;
405 }
406
407 VkResult anv_GetPhysicalDeviceQueueProperties(
408 VkPhysicalDevice physicalDevice,
409 uint32_t count,
410 VkPhysicalDeviceQueueProperties* pQueueProperties)
411 {
412 assert(count == 1);
413
414 *pQueueProperties = (VkPhysicalDeviceQueueProperties) {
415 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
416 VK_QUEUE_COMPUTE_BIT |
417 VK_QUEUE_DMA_BIT,
418 .queueCount = 1,
419 .supportsTimestamps = true,
420 };
421
422 return VK_SUCCESS;
423 }
424
425 VkResult anv_GetPhysicalDeviceMemoryProperties(
426 VkPhysicalDevice physicalDevice,
427 VkPhysicalDeviceMemoryProperties* pMemoryProperties)
428 {
429 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
430
431 size_t aperture_size;
432 size_t heap_size;
433
434 if (anv_gem_get_aperture(physical_device, &aperture_size) == -1)
435 return vk_error(VK_ERROR_UNAVAILABLE);
436
437 /* Reserve some wiggle room for the driver by exposing only 75% of the
438 * aperture to the heap.
439 */
440 heap_size = 3 * aperture_size / 4;
441
442 /* The property flags below are valid only for llc platforms. */
443 pMemoryProperties->memoryTypeCount = 1;
444 pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
445 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
446 .heapIndex = 1,
447 };
448
449 pMemoryProperties->memoryHeapCount = 1;
450 pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) {
451 .size = heap_size,
452 .flags = VK_MEMORY_HEAP_HOST_LOCAL,
453 };
454
455 return VK_SUCCESS;
456 }
457
458 PFN_vkVoidFunction anv_GetInstanceProcAddr(
459 VkInstance instance,
460 const char* pName)
461 {
462 return anv_lookup_entrypoint(pName);
463 }
464
465 PFN_vkVoidFunction anv_GetDeviceProcAddr(
466 VkDevice device,
467 const char* pName)
468 {
469 return anv_lookup_entrypoint(pName);
470 }
471
472 static void
473 parse_debug_flags(struct anv_device *device)
474 {
475 const char *debug, *p, *end;
476
477 debug = getenv("INTEL_DEBUG");
478 device->dump_aub = false;
479 if (debug) {
480 for (p = debug; *p; p = end + 1) {
481 end = strchrnul(p, ',');
482 if (end - p == 3 && memcmp(p, "aub", 3) == 0)
483 device->dump_aub = true;
484 if (end - p == 5 && memcmp(p, "no_hw", 5) == 0)
485 device->no_hw = true;
486 if (*end == '\0')
487 break;
488 }
489 }
490 }
491
492 static VkResult
493 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
494 {
495 queue->device = device;
496 queue->pool = &device->surface_state_pool;
497
498 queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4);
499 if (queue->completed_serial.map == NULL)
500 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
501
502 *(uint32_t *)queue->completed_serial.map = 0;
503 queue->next_serial = 1;
504
505 return VK_SUCCESS;
506 }
507
508 static void
509 anv_queue_finish(struct anv_queue *queue)
510 {
511 #ifdef HAVE_VALGRIND
512 /* This gets torn down with the device so we only need to do this if
513 * valgrind is present.
514 */
515 anv_state_pool_free(queue->pool, queue->completed_serial);
516 #endif
517 }
518
519 static void
520 anv_device_init_border_colors(struct anv_device *device)
521 {
522 static const VkClearColorValue border_colors[] = {
523 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 0.0 } },
524 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 1.0 } },
525 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .f32 = { 1.0, 1.0, 1.0, 1.0 } },
526 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .u32 = { 0, 0, 0, 0 } },
527 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .u32 = { 0, 0, 0, 1 } },
528 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .u32 = { 1, 1, 1, 1 } },
529 };
530
531 device->border_colors =
532 anv_state_pool_alloc(&device->dynamic_state_pool,
533 sizeof(border_colors), 32);
534 memcpy(device->border_colors.map, border_colors, sizeof(border_colors));
535 }
536
537 static const uint32_t BATCH_SIZE = 8192;
538
539 VkResult anv_CreateDevice(
540 VkPhysicalDevice physicalDevice,
541 const VkDeviceCreateInfo* pCreateInfo,
542 VkDevice* pDevice)
543 {
544 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
545 struct anv_instance *instance = physical_device->instance;
546 struct anv_device *device;
547
548 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
549
550 device = instance->pfnAlloc(instance->pAllocUserData,
551 sizeof(*device), 8,
552 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
553 if (!device)
554 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
555
556 device->no_hw = physical_device->no_hw;
557 parse_debug_flags(device);
558
559 device->instance = physical_device->instance;
560
561 /* XXX(chadv): Can we dup() physicalDevice->fd here? */
562 device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
563 if (device->fd == -1)
564 goto fail_device;
565
566 device->context_id = anv_gem_create_context(device);
567 if (device->context_id == -1)
568 goto fail_fd;
569
570 anv_bo_pool_init(&device->batch_bo_pool, device, BATCH_SIZE);
571
572 anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048);
573
574 anv_state_pool_init(&device->dynamic_state_pool,
575 &device->dynamic_state_block_pool);
576
577 anv_block_pool_init(&device->instruction_block_pool, device, 2048);
578 anv_block_pool_init(&device->surface_state_block_pool, device, 2048);
579
580 anv_state_pool_init(&device->surface_state_pool,
581 &device->surface_state_block_pool);
582
583 anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
584
585 device->info = *physical_device->info;
586
587 device->compiler = anv_compiler_create(device);
588 device->aub_writer = NULL;
589
590 pthread_mutex_init(&device->mutex, NULL);
591
592 anv_queue_init(device, &device->queue);
593
594 anv_device_init_meta(device);
595
596 anv_device_init_border_colors(device);
597
598 *pDevice = anv_device_to_handle(device);
599
600 return VK_SUCCESS;
601
602 fail_fd:
603 close(device->fd);
604 fail_device:
605 anv_device_free(device, device);
606
607 return vk_error(VK_ERROR_UNAVAILABLE);
608 }
609
610 VkResult anv_DestroyDevice(
611 VkDevice _device)
612 {
613 ANV_FROM_HANDLE(anv_device, device, _device);
614
615 anv_compiler_destroy(device->compiler);
616
617 anv_queue_finish(&device->queue);
618
619 anv_device_finish_meta(device);
620
621 #ifdef HAVE_VALGRIND
622 /* We only need to free these to prevent valgrind errors. The backing
623 * BO will go away in a couple of lines so we don't actually leak.
624 */
625 anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
626 #endif
627
628 anv_bo_pool_finish(&device->batch_bo_pool);
629 anv_block_pool_finish(&device->dynamic_state_block_pool);
630 anv_block_pool_finish(&device->instruction_block_pool);
631 anv_block_pool_finish(&device->surface_state_block_pool);
632
633 close(device->fd);
634
635 if (device->aub_writer)
636 anv_aub_writer_destroy(device->aub_writer);
637
638 anv_device_free(device, device);
639
640 return VK_SUCCESS;
641 }
642
643 static const VkExtensionProperties global_extensions[] = {
644 {
645 .extName = "VK_WSI_LunarG",
646 .version = 3
647 }
648 };
649
650 VkResult anv_GetGlobalExtensionCount(
651 uint32_t* pCount)
652 {
653 *pCount = ARRAY_SIZE(global_extensions);
654
655 return VK_SUCCESS;
656 }
657
658
659 VkResult anv_GetGlobalExtensionProperties(
660 uint32_t extensionIndex,
661 VkExtensionProperties* pProperties)
662 {
663 assert(extensionIndex < ARRAY_SIZE(global_extensions));
664
665 *pProperties = global_extensions[extensionIndex];
666
667 return VK_SUCCESS;
668 }
669
670 VkResult anv_GetPhysicalDeviceExtensionCount(
671 VkPhysicalDevice physicalDevice,
672 uint32_t* pCount)
673 {
674 /* None supported at this time */
675 *pCount = 0;
676
677 return VK_SUCCESS;
678 }
679
680 VkResult anv_GetPhysicalDeviceExtensionProperties(
681 VkPhysicalDevice physicalDevice,
682 uint32_t extensionIndex,
683 VkExtensionProperties* pProperties)
684 {
685 /* None supported at this time */
686 return vk_error(VK_ERROR_INVALID_EXTENSION);
687 }
688
689 VkResult anv_EnumerateLayers(
690 VkPhysicalDevice physicalDevice,
691 size_t maxStringSize,
692 size_t* pLayerCount,
693 char* const* pOutLayers,
694 void* pReserved)
695 {
696 *pLayerCount = 0;
697
698 return VK_SUCCESS;
699 }
700
701 VkResult anv_GetDeviceQueue(
702 VkDevice _device,
703 uint32_t queueNodeIndex,
704 uint32_t queueIndex,
705 VkQueue* pQueue)
706 {
707 ANV_FROM_HANDLE(anv_device, device, _device);
708
709 assert(queueIndex == 0);
710
711 *pQueue = anv_queue_to_handle(&device->queue);
712
713 return VK_SUCCESS;
714 }
715
716 VkResult
717 anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device)
718 {
719 list->num_relocs = 0;
720 list->array_length = 256;
721 list->relocs =
722 anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8,
723 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
724
725 if (list->relocs == NULL)
726 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
727
728 list->reloc_bos =
729 anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8,
730 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
731
732 if (list->relocs == NULL) {
733 anv_device_free(device, list->relocs);
734 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
735 }
736
737 return VK_SUCCESS;
738 }
739
740 void
741 anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device)
742 {
743 anv_device_free(device, list->relocs);
744 anv_device_free(device, list->reloc_bos);
745 }
746
747 static VkResult
748 anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device,
749 size_t num_additional_relocs)
750 {
751 if (list->num_relocs + num_additional_relocs <= list->array_length)
752 return VK_SUCCESS;
753
754 size_t new_length = list->array_length * 2;
755 while (new_length < list->num_relocs + num_additional_relocs)
756 new_length *= 2;
757
758 struct drm_i915_gem_relocation_entry *new_relocs =
759 anv_device_alloc(device, new_length * sizeof(*list->relocs), 8,
760 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
761 if (new_relocs == NULL)
762 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
763
764 struct anv_bo **new_reloc_bos =
765 anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8,
766 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
767 if (new_relocs == NULL) {
768 anv_device_free(device, new_relocs);
769 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
770 }
771
772 memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs));
773 memcpy(new_reloc_bos, list->reloc_bos,
774 list->num_relocs * sizeof(*list->reloc_bos));
775
776 anv_device_free(device, list->relocs);
777 anv_device_free(device, list->reloc_bos);
778
779 list->relocs = new_relocs;
780 list->reloc_bos = new_reloc_bos;
781
782 return VK_SUCCESS;
783 }
784
785 static VkResult
786 anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out)
787 {
788 VkResult result;
789
790 struct anv_batch_bo *bbo =
791 anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
792 if (bbo == NULL)
793 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
794
795 bbo->num_relocs = 0;
796 bbo->prev_batch_bo = NULL;
797
798 result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo);
799 if (result != VK_SUCCESS) {
800 anv_device_free(device, bbo);
801 return result;
802 }
803
804 *bbo_out = bbo;
805
806 return VK_SUCCESS;
807 }
808
809 static void
810 anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
811 size_t batch_padding)
812 {
813 batch->next = batch->start = bbo->bo.map;
814 batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
815 bbo->first_reloc = batch->relocs.num_relocs;
816 }
817
818 static void
819 anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
820 {
821 assert(batch->start == bbo->bo.map);
822 bbo->length = batch->next - batch->start;
823 VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length));
824 bbo->num_relocs = batch->relocs.num_relocs - bbo->first_reloc;
825 }
826
827 static void
828 anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device)
829 {
830 anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo);
831 anv_device_free(device, bbo);
832 }
833
834 void *
835 anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords)
836 {
837 if (batch->next + num_dwords * 4 > batch->end)
838 batch->extend_cb(batch, batch->user_data);
839
840 void *p = batch->next;
841
842 batch->next += num_dwords * 4;
843 assert(batch->next <= batch->end);
844
845 return p;
846 }
847
848 static void
849 anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device,
850 struct anv_reloc_list *other, uint32_t offset)
851 {
852 anv_reloc_list_grow(list, device, other->num_relocs);
853 /* TODO: Handle failure */
854
855 memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
856 other->num_relocs * sizeof(other->relocs[0]));
857 memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
858 other->num_relocs * sizeof(other->reloc_bos[0]));
859
860 for (uint32_t i = 0; i < other->num_relocs; i++)
861 list->relocs[i + list->num_relocs].offset += offset;
862
863 list->num_relocs += other->num_relocs;
864 }
865
866 static uint64_t
867 anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device,
868 uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
869 {
870 struct drm_i915_gem_relocation_entry *entry;
871 int index;
872
873 anv_reloc_list_grow(list, device, 1);
874 /* TODO: Handle failure */
875
876 /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
877 index = list->num_relocs++;
878 list->reloc_bos[index] = target_bo;
879 entry = &list->relocs[index];
880 entry->target_handle = target_bo->gem_handle;
881 entry->delta = delta;
882 entry->offset = offset;
883 entry->presumed_offset = target_bo->offset;
884 entry->read_domains = 0;
885 entry->write_domain = 0;
886
887 return target_bo->offset + delta;
888 }
889
890 void
891 anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
892 {
893 uint32_t size, offset;
894
895 size = other->next - other->start;
896 assert(size % 4 == 0);
897
898 if (batch->next + size > batch->end)
899 batch->extend_cb(batch, batch->user_data);
900
901 assert(batch->next + size <= batch->end);
902
903 memcpy(batch->next, other->start, size);
904
905 offset = batch->next - batch->start;
906 anv_reloc_list_append(&batch->relocs, batch->device,
907 &other->relocs, offset);
908
909 batch->next += size;
910 }
911
912 uint64_t
913 anv_batch_emit_reloc(struct anv_batch *batch,
914 void *location, struct anv_bo *bo, uint32_t delta)
915 {
916 return anv_reloc_list_add(&batch->relocs, batch->device,
917 location - batch->start, bo, delta);
918 }
919
920 VkResult anv_QueueSubmit(
921 VkQueue _queue,
922 uint32_t cmdBufferCount,
923 const VkCmdBuffer* pCmdBuffers,
924 VkFence _fence)
925 {
926 ANV_FROM_HANDLE(anv_queue, queue, _queue);
927 ANV_FROM_HANDLE(anv_fence, fence, _fence);
928 struct anv_device *device = queue->device;
929 int ret;
930
931 for (uint32_t i = 0; i < cmdBufferCount; i++) {
932 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCmdBuffers[i]);
933
934 if (device->dump_aub)
935 anv_cmd_buffer_dump(cmd_buffer);
936
937 if (!device->no_hw) {
938 ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf);
939 if (ret != 0)
940 return vk_error(VK_ERROR_UNKNOWN);
941
942 if (fence) {
943 ret = anv_gem_execbuffer(device, &fence->execbuf);
944 if (ret != 0)
945 return vk_error(VK_ERROR_UNKNOWN);
946 }
947
948 for (uint32_t i = 0; i < cmd_buffer->bo_count; i++)
949 cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset;
950 } else {
951 *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial;
952 }
953 }
954
955 return VK_SUCCESS;
956 }
957
958 VkResult anv_QueueWaitIdle(
959 VkQueue _queue)
960 {
961 ANV_FROM_HANDLE(anv_queue, queue, _queue);
962
963 return vkDeviceWaitIdle(anv_device_to_handle(queue->device));
964 }
965
966 VkResult anv_DeviceWaitIdle(
967 VkDevice _device)
968 {
969 ANV_FROM_HANDLE(anv_device, device, _device);
970 struct anv_state state;
971 struct anv_batch batch;
972 struct drm_i915_gem_execbuffer2 execbuf;
973 struct drm_i915_gem_exec_object2 exec2_objects[1];
974 struct anv_bo *bo = NULL;
975 VkResult result;
976 int64_t timeout;
977 int ret;
978
979 state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32);
980 bo = &device->dynamic_state_pool.block_pool->bo;
981 batch.start = batch.next = state.map;
982 batch.end = state.map + 32;
983 anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
984 anv_batch_emit(&batch, GEN8_MI_NOOP);
985
986 exec2_objects[0].handle = bo->gem_handle;
987 exec2_objects[0].relocation_count = 0;
988 exec2_objects[0].relocs_ptr = 0;
989 exec2_objects[0].alignment = 0;
990 exec2_objects[0].offset = bo->offset;
991 exec2_objects[0].flags = 0;
992 exec2_objects[0].rsvd1 = 0;
993 exec2_objects[0].rsvd2 = 0;
994
995 execbuf.buffers_ptr = (uintptr_t) exec2_objects;
996 execbuf.buffer_count = 1;
997 execbuf.batch_start_offset = state.offset;
998 execbuf.batch_len = batch.next - state.map;
999 execbuf.cliprects_ptr = 0;
1000 execbuf.num_cliprects = 0;
1001 execbuf.DR1 = 0;
1002 execbuf.DR4 = 0;
1003
1004 execbuf.flags =
1005 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
1006 execbuf.rsvd1 = device->context_id;
1007 execbuf.rsvd2 = 0;
1008
1009 if (!device->no_hw) {
1010 ret = anv_gem_execbuffer(device, &execbuf);
1011 if (ret != 0) {
1012 result = vk_error(VK_ERROR_UNKNOWN);
1013 goto fail;
1014 }
1015
1016 timeout = INT64_MAX;
1017 ret = anv_gem_wait(device, bo->gem_handle, &timeout);
1018 if (ret != 0) {
1019 result = vk_error(VK_ERROR_UNKNOWN);
1020 goto fail;
1021 }
1022 }
1023
1024 anv_state_pool_free(&device->dynamic_state_pool, state);
1025
1026 return VK_SUCCESS;
1027
1028 fail:
1029 anv_state_pool_free(&device->dynamic_state_pool, state);
1030
1031 return result;
1032 }
1033
1034 void *
1035 anv_device_alloc(struct anv_device * device,
1036 size_t size,
1037 size_t alignment,
1038 VkSystemAllocType allocType)
1039 {
1040 return device->instance->pfnAlloc(device->instance->pAllocUserData,
1041 size,
1042 alignment,
1043 allocType);
1044 }
1045
1046 void
1047 anv_device_free(struct anv_device * device,
1048 void * mem)
1049 {
1050 return device->instance->pfnFree(device->instance->pAllocUserData,
1051 mem);
1052 }
1053
1054 VkResult
1055 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
1056 {
1057 bo->gem_handle = anv_gem_create(device, size);
1058 if (!bo->gem_handle)
1059 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
1060
1061 bo->map = NULL;
1062 bo->index = 0;
1063 bo->offset = 0;
1064 bo->size = size;
1065
1066 return VK_SUCCESS;
1067 }
1068
1069 VkResult anv_AllocMemory(
1070 VkDevice _device,
1071 const VkMemoryAllocInfo* pAllocInfo,
1072 VkDeviceMemory* pMem)
1073 {
1074 ANV_FROM_HANDLE(anv_device, device, _device);
1075 struct anv_device_memory *mem;
1076 VkResult result;
1077
1078 assert(pAllocInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO);
1079
1080 if (pAllocInfo->memoryTypeIndex != 0) {
1081 /* We support exactly one memory heap. */
1082 return vk_error(VK_ERROR_INVALID_VALUE);
1083 }
1084
1085 /* FINISHME: Fail if allocation request exceeds heap size. */
1086
1087 mem = anv_device_alloc(device, sizeof(*mem), 8,
1088 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1089 if (mem == NULL)
1090 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1091
1092 result = anv_bo_init_new(&mem->bo, device, pAllocInfo->allocationSize);
1093 if (result != VK_SUCCESS)
1094 goto fail;
1095
1096 *pMem = anv_device_memory_to_handle(mem);
1097
1098 return VK_SUCCESS;
1099
1100 fail:
1101 anv_device_free(device, mem);
1102
1103 return result;
1104 }
1105
1106 VkResult anv_FreeMemory(
1107 VkDevice _device,
1108 VkDeviceMemory _mem)
1109 {
1110 ANV_FROM_HANDLE(anv_device, device, _device);
1111 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1112
1113 if (mem->bo.map)
1114 anv_gem_munmap(mem->bo.map, mem->bo.size);
1115
1116 if (mem->bo.gem_handle != 0)
1117 anv_gem_close(device, mem->bo.gem_handle);
1118
1119 anv_device_free(device, mem);
1120
1121 return VK_SUCCESS;
1122 }
1123
1124 VkResult anv_MapMemory(
1125 VkDevice _device,
1126 VkDeviceMemory _mem,
1127 VkDeviceSize offset,
1128 VkDeviceSize size,
1129 VkMemoryMapFlags flags,
1130 void** ppData)
1131 {
1132 ANV_FROM_HANDLE(anv_device, device, _device);
1133 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1134
1135 /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
1136 * takes a VkDeviceMemory pointer, it seems like only one map of the memory
1137 * at a time is valid. We could just mmap up front and return an offset
1138 * pointer here, but that may exhaust virtual memory on 32 bit
1139 * userspace. */
1140
1141 mem->map = anv_gem_mmap(device, mem->bo.gem_handle, offset, size);
1142 mem->map_size = size;
1143
1144 *ppData = mem->map;
1145
1146 return VK_SUCCESS;
1147 }
1148
1149 VkResult anv_UnmapMemory(
1150 VkDevice _device,
1151 VkDeviceMemory _mem)
1152 {
1153 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1154
1155 anv_gem_munmap(mem->map, mem->map_size);
1156
1157 return VK_SUCCESS;
1158 }
1159
1160 VkResult anv_FlushMappedMemoryRanges(
1161 VkDevice device,
1162 uint32_t memRangeCount,
1163 const VkMappedMemoryRange* pMemRanges)
1164 {
1165 /* clflush here for !llc platforms */
1166
1167 return VK_SUCCESS;
1168 }
1169
1170 VkResult anv_InvalidateMappedMemoryRanges(
1171 VkDevice device,
1172 uint32_t memRangeCount,
1173 const VkMappedMemoryRange* pMemRanges)
1174 {
1175 return anv_FlushMappedMemoryRanges(device, memRangeCount, pMemRanges);
1176 }
1177
1178 VkResult anv_DestroyObject(
1179 VkDevice _device,
1180 VkObjectType objType,
1181 VkObject _object)
1182 {
1183 ANV_FROM_HANDLE(anv_device, device, _device);
1184 struct anv_object *object = (struct anv_object *) _object;
1185
1186 switch (objType) {
1187 case VK_OBJECT_TYPE_INSTANCE:
1188 return anv_DestroyInstance((VkInstance) _object);
1189
1190 case VK_OBJECT_TYPE_PHYSICAL_DEVICE:
1191 /* We don't want to actually destroy physical devices */
1192 return VK_SUCCESS;
1193
1194 case VK_OBJECT_TYPE_DEVICE:
1195 assert(_device == (VkDevice) _object);
1196 return anv_DestroyDevice((VkDevice) _object);
1197
1198 case VK_OBJECT_TYPE_QUEUE:
1199 /* TODO */
1200 return VK_SUCCESS;
1201
1202 case VK_OBJECT_TYPE_DEVICE_MEMORY:
1203 return anv_FreeMemory(_device, (VkDeviceMemory) _object);
1204
1205 case VK_OBJECT_TYPE_DESCRIPTOR_POOL:
1206 case VK_OBJECT_TYPE_PIPELINE_CACHE:
1207 /* These are just dummys anyway, so we don't need to destroy them */
1208 return VK_SUCCESS;
1209
1210 case VK_OBJECT_TYPE_BUFFER_VIEW:
1211 return anv_DestroyBufferView(_device, _object);
1212
1213 case VK_OBJECT_TYPE_IMAGE_VIEW:
1214 return anv_DestroyImageView(_device, _object);
1215
1216 case VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW:
1217 return anv_DestroyColorAttachmentView(_device, _object);
1218
1219 case VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW:
1220 return anv_DestroyDepthStencilView(_device, _object);
1221
1222 case VK_OBJECT_TYPE_IMAGE:
1223 return anv_DestroyImage(_device, _object);
1224
1225 case VK_OBJECT_TYPE_BUFFER:
1226 case VK_OBJECT_TYPE_SHADER:
1227 case VK_OBJECT_TYPE_SHADER_MODULE:
1228 case VK_OBJECT_TYPE_PIPELINE_LAYOUT:
1229 case VK_OBJECT_TYPE_SAMPLER:
1230 case VK_OBJECT_TYPE_DESCRIPTOR_SET:
1231 case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT:
1232 case VK_OBJECT_TYPE_DYNAMIC_RS_STATE:
1233 case VK_OBJECT_TYPE_DYNAMIC_CB_STATE:
1234 case VK_OBJECT_TYPE_DYNAMIC_DS_STATE:
1235 case VK_OBJECT_TYPE_RENDER_PASS:
1236 /* These are trivially destroyable */
1237 anv_device_free(device, (void *) _object);
1238 return VK_SUCCESS;
1239
1240 case VK_OBJECT_TYPE_COMMAND_BUFFER:
1241 case VK_OBJECT_TYPE_PIPELINE:
1242 case VK_OBJECT_TYPE_DYNAMIC_VP_STATE:
1243 case VK_OBJECT_TYPE_FENCE:
1244 case VK_OBJECT_TYPE_QUERY_POOL:
1245 case VK_OBJECT_TYPE_FRAMEBUFFER:
1246 (object->destructor)(device, object, objType);
1247 return VK_SUCCESS;
1248
1249 case VK_OBJECT_TYPE_SEMAPHORE:
1250 case VK_OBJECT_TYPE_EVENT:
1251 stub_return(VK_UNSUPPORTED);
1252
1253 default:
1254 unreachable("Invalid object type");
1255 }
1256 }
1257
1258 VkResult anv_GetObjectMemoryRequirements(
1259 VkDevice device,
1260 VkObjectType objType,
1261 VkObject object,
1262 VkMemoryRequirements* pMemoryRequirements)
1263 {
1264
1265 /* The Vulkan spec (git aaed022) says:
1266 *
1267 * memoryTypeBits is a bitfield and contains one bit set for every
1268 * supported memory type for the resource. The bit `1<<i` is set if and
1269 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1270 * structure for the physical device is supported.
1271 *
1272 * We support exactly one memory type.
1273 */
1274 pMemoryRequirements->memoryTypeBits = 1;
1275
1276 switch (objType) {
1277 case VK_OBJECT_TYPE_BUFFER: {
1278 struct anv_buffer *buffer = anv_buffer_from_handle(object);
1279 pMemoryRequirements->size = buffer->size;
1280 pMemoryRequirements->alignment = 16;
1281 break;
1282 }
1283 case VK_OBJECT_TYPE_IMAGE: {
1284 struct anv_image *image = anv_image_from_handle(object);
1285 pMemoryRequirements->size = image->size;
1286 pMemoryRequirements->alignment = image->alignment;
1287 break;
1288 }
1289 default:
1290 pMemoryRequirements->size = 0;
1291 break;
1292 }
1293
1294 return VK_SUCCESS;
1295 }
1296
1297 VkResult anv_BindObjectMemory(
1298 VkDevice device,
1299 VkObjectType objType,
1300 VkObject object,
1301 VkDeviceMemory _mem,
1302 VkDeviceSize memOffset)
1303 {
1304 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1305 struct anv_buffer *buffer;
1306 struct anv_image *image;
1307
1308 switch (objType) {
1309 case VK_OBJECT_TYPE_BUFFER:
1310 buffer = anv_buffer_from_handle(object);
1311 buffer->bo = &mem->bo;
1312 buffer->offset = memOffset;
1313 break;
1314 case VK_OBJECT_TYPE_IMAGE:
1315 image = anv_image_from_handle(object);
1316 image->bo = &mem->bo;
1317 image->offset = memOffset;
1318 break;
1319 default:
1320 break;
1321 }
1322
1323 return VK_SUCCESS;
1324 }
1325
1326 VkResult anv_QueueBindSparseBufferMemory(
1327 VkQueue queue,
1328 VkBuffer buffer,
1329 VkDeviceSize rangeOffset,
1330 VkDeviceSize rangeSize,
1331 VkDeviceMemory mem,
1332 VkDeviceSize memOffset)
1333 {
1334 stub_return(VK_UNSUPPORTED);
1335 }
1336
1337 VkResult anv_QueueBindSparseImageMemory(
1338 VkQueue queue,
1339 VkImage image,
1340 const VkImageMemoryBindInfo* pBindInfo,
1341 VkDeviceMemory mem,
1342 VkDeviceSize memOffset)
1343 {
1344 stub_return(VK_UNSUPPORTED);
1345 }
1346
1347 static void
1348 anv_fence_destroy(struct anv_device *device,
1349 struct anv_object *object,
1350 VkObjectType obj_type)
1351 {
1352 struct anv_fence *fence = (struct anv_fence *) object;
1353
1354 assert(obj_type == VK_OBJECT_TYPE_FENCE);
1355
1356 anv_DestroyFence(anv_device_to_handle(device),
1357 anv_fence_to_handle(fence));
1358 }
1359
1360 VkResult anv_CreateFence(
1361 VkDevice _device,
1362 const VkFenceCreateInfo* pCreateInfo,
1363 VkFence* pFence)
1364 {
1365 ANV_FROM_HANDLE(anv_device, device, _device);
1366 struct anv_fence *fence;
1367 struct anv_batch batch;
1368 VkResult result;
1369
1370 const uint32_t fence_size = 128;
1371
1372 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1373
1374 fence = anv_device_alloc(device, sizeof(*fence), 8,
1375 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1376 if (fence == NULL)
1377 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1378
1379 result = anv_bo_init_new(&fence->bo, device, fence_size);
1380 if (result != VK_SUCCESS)
1381 goto fail;
1382
1383 fence->base.destructor = anv_fence_destroy;
1384
1385 fence->bo.map =
1386 anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size);
1387 batch.next = batch.start = fence->bo.map;
1388 batch.end = fence->bo.map + fence->bo.size;
1389 anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
1390 anv_batch_emit(&batch, GEN8_MI_NOOP);
1391
1392 fence->exec2_objects[0].handle = fence->bo.gem_handle;
1393 fence->exec2_objects[0].relocation_count = 0;
1394 fence->exec2_objects[0].relocs_ptr = 0;
1395 fence->exec2_objects[0].alignment = 0;
1396 fence->exec2_objects[0].offset = fence->bo.offset;
1397 fence->exec2_objects[0].flags = 0;
1398 fence->exec2_objects[0].rsvd1 = 0;
1399 fence->exec2_objects[0].rsvd2 = 0;
1400
1401 fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects;
1402 fence->execbuf.buffer_count = 1;
1403 fence->execbuf.batch_start_offset = 0;
1404 fence->execbuf.batch_len = batch.next - fence->bo.map;
1405 fence->execbuf.cliprects_ptr = 0;
1406 fence->execbuf.num_cliprects = 0;
1407 fence->execbuf.DR1 = 0;
1408 fence->execbuf.DR4 = 0;
1409
1410 fence->execbuf.flags =
1411 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
1412 fence->execbuf.rsvd1 = device->context_id;
1413 fence->execbuf.rsvd2 = 0;
1414
1415 *pFence = anv_fence_to_handle(fence);
1416
1417 return VK_SUCCESS;
1418
1419 fail:
1420 anv_device_free(device, fence);
1421
1422 return result;
1423 }
1424
1425 VkResult anv_DestroyFence(
1426 VkDevice _device,
1427 VkFence _fence)
1428 {
1429 ANV_FROM_HANDLE(anv_device, device, _device);
1430 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1431
1432 anv_gem_munmap(fence->bo.map, fence->bo.size);
1433 anv_gem_close(device, fence->bo.gem_handle);
1434 anv_device_free(device, fence);
1435
1436 return VK_SUCCESS;
1437 }
1438
1439 VkResult anv_ResetFences(
1440 VkDevice _device,
1441 uint32_t fenceCount,
1442 const VkFence* pFences)
1443 {
1444 struct anv_fence **fences = (struct anv_fence **) pFences;
1445
1446 for (uint32_t i = 0; i < fenceCount; i++)
1447 fences[i]->ready = false;
1448
1449 return VK_SUCCESS;
1450 }
1451
1452 VkResult anv_GetFenceStatus(
1453 VkDevice _device,
1454 VkFence _fence)
1455 {
1456 ANV_FROM_HANDLE(anv_device, device, _device);
1457 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1458 int64_t t = 0;
1459 int ret;
1460
1461 if (fence->ready)
1462 return VK_SUCCESS;
1463
1464 ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
1465 if (ret == 0) {
1466 fence->ready = true;
1467 return VK_SUCCESS;
1468 }
1469
1470 return VK_NOT_READY;
1471 }
1472
1473 VkResult anv_WaitForFences(
1474 VkDevice _device,
1475 uint32_t fenceCount,
1476 const VkFence* pFences,
1477 VkBool32 waitAll,
1478 uint64_t timeout)
1479 {
1480 ANV_FROM_HANDLE(anv_device, device, _device);
1481 int64_t t = timeout;
1482 int ret;
1483
1484 /* FIXME: handle !waitAll */
1485
1486 for (uint32_t i = 0; i < fenceCount; i++) {
1487 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1488 ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
1489 if (ret == -1 && errno == ETIME)
1490 return VK_TIMEOUT;
1491 else if (ret == -1)
1492 return vk_error(VK_ERROR_UNKNOWN);
1493 }
1494
1495 return VK_SUCCESS;
1496 }
1497
1498 // Queue semaphore functions
1499
1500 VkResult anv_CreateSemaphore(
1501 VkDevice device,
1502 const VkSemaphoreCreateInfo* pCreateInfo,
1503 VkSemaphore* pSemaphore)
1504 {
1505 stub_return(VK_UNSUPPORTED);
1506 }
1507
1508 VkResult anv_QueueSignalSemaphore(
1509 VkQueue queue,
1510 VkSemaphore semaphore)
1511 {
1512 stub_return(VK_UNSUPPORTED);
1513 }
1514
1515 VkResult anv_QueueWaitSemaphore(
1516 VkQueue queue,
1517 VkSemaphore semaphore)
1518 {
1519 stub_return(VK_UNSUPPORTED);
1520 }
1521
1522 // Event functions
1523
1524 VkResult anv_CreateEvent(
1525 VkDevice device,
1526 const VkEventCreateInfo* pCreateInfo,
1527 VkEvent* pEvent)
1528 {
1529 stub_return(VK_UNSUPPORTED);
1530 }
1531
1532 VkResult anv_GetEventStatus(
1533 VkDevice device,
1534 VkEvent event)
1535 {
1536 stub_return(VK_UNSUPPORTED);
1537 }
1538
1539 VkResult anv_SetEvent(
1540 VkDevice device,
1541 VkEvent event)
1542 {
1543 stub_return(VK_UNSUPPORTED);
1544 }
1545
1546 VkResult anv_ResetEvent(
1547 VkDevice device,
1548 VkEvent event)
1549 {
1550 stub_return(VK_UNSUPPORTED);
1551 }
1552
1553 // Buffer functions
1554
1555 VkResult anv_CreateBuffer(
1556 VkDevice _device,
1557 const VkBufferCreateInfo* pCreateInfo,
1558 VkBuffer* pBuffer)
1559 {
1560 ANV_FROM_HANDLE(anv_device, device, _device);
1561 struct anv_buffer *buffer;
1562
1563 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
1564
1565 buffer = anv_device_alloc(device, sizeof(*buffer), 8,
1566 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1567 if (buffer == NULL)
1568 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1569
1570 buffer->size = pCreateInfo->size;
1571 buffer->bo = NULL;
1572 buffer->offset = 0;
1573
1574 *pBuffer = anv_buffer_to_handle(buffer);
1575
1576 return VK_SUCCESS;
1577 }
1578
1579 // Buffer view functions
1580
1581 static void
1582 fill_buffer_surface_state(void *state, VkFormat format,
1583 uint32_t offset, uint32_t range)
1584 {
1585 const struct anv_format *info;
1586
1587 info = anv_format_for_vk_format(format);
1588 /* This assumes RGBA float format. */
1589 uint32_t stride = 4;
1590 uint32_t num_elements = range / stride;
1591
1592 struct GEN8_RENDER_SURFACE_STATE surface_state = {
1593 .SurfaceType = SURFTYPE_BUFFER,
1594 .SurfaceArray = false,
1595 .SurfaceFormat = info->surface_format,
1596 .SurfaceVerticalAlignment = VALIGN4,
1597 .SurfaceHorizontalAlignment = HALIGN4,
1598 .TileMode = LINEAR,
1599 .VerticalLineStride = 0,
1600 .VerticalLineStrideOffset = 0,
1601 .SamplerL2BypassModeDisable = true,
1602 .RenderCacheReadWriteMode = WriteOnlyCache,
1603 .MemoryObjectControlState = GEN8_MOCS,
1604 .BaseMipLevel = 0.0,
1605 .SurfaceQPitch = 0,
1606 .Height = (num_elements >> 7) & 0x3fff,
1607 .Width = num_elements & 0x7f,
1608 .Depth = (num_elements >> 21) & 0x3f,
1609 .SurfacePitch = stride - 1,
1610 .MinimumArrayElement = 0,
1611 .NumberofMultisamples = MULTISAMPLECOUNT_1,
1612 .XOffset = 0,
1613 .YOffset = 0,
1614 .SurfaceMinLOD = 0,
1615 .MIPCountLOD = 0,
1616 .AuxiliarySurfaceMode = AUX_NONE,
1617 .RedClearColor = 0,
1618 .GreenClearColor = 0,
1619 .BlueClearColor = 0,
1620 .AlphaClearColor = 0,
1621 .ShaderChannelSelectRed = SCS_RED,
1622 .ShaderChannelSelectGreen = SCS_GREEN,
1623 .ShaderChannelSelectBlue = SCS_BLUE,
1624 .ShaderChannelSelectAlpha = SCS_ALPHA,
1625 .ResourceMinLOD = 0.0,
1626 /* FIXME: We assume that the image must be bound at this time. */
1627 .SurfaceBaseAddress = { NULL, offset },
1628 };
1629
1630 GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state);
1631 }
1632
1633 VkResult anv_CreateBufferView(
1634 VkDevice _device,
1635 const VkBufferViewCreateInfo* pCreateInfo,
1636 VkBufferView* pView)
1637 {
1638 ANV_FROM_HANDLE(anv_device, device, _device);
1639 ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer);
1640 struct anv_surface_view *view;
1641
1642 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO);
1643
1644 view = anv_device_alloc(device, sizeof(*view), 8,
1645 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1646 if (view == NULL)
1647 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1648
1649 view->bo = buffer->bo;
1650 view->offset = buffer->offset + pCreateInfo->offset;
1651 view->surface_state =
1652 anv_state_pool_alloc(&device->surface_state_pool, 64, 64);
1653 view->format = pCreateInfo->format;
1654 view->range = pCreateInfo->range;
1655
1656 fill_buffer_surface_state(view->surface_state.map,
1657 pCreateInfo->format, view->offset, pCreateInfo->range);
1658
1659 *pView = (VkBufferView) view;
1660
1661 return VK_SUCCESS;
1662 }
1663
1664 VkResult anv_DestroyBufferView(
1665 VkDevice _device,
1666 VkBufferView _view)
1667 {
1668 ANV_FROM_HANDLE(anv_device, device, _device);
1669
1670 anv_surface_view_destroy(device, (struct anv_surface_view *)_view);
1671
1672 return VK_SUCCESS;
1673 }
1674
1675 // Sampler functions
1676
1677 VkResult anv_CreateSampler(
1678 VkDevice _device,
1679 const VkSamplerCreateInfo* pCreateInfo,
1680 VkSampler* pSampler)
1681 {
1682 ANV_FROM_HANDLE(anv_device, device, _device);
1683 struct anv_sampler *sampler;
1684 uint32_t mag_filter, min_filter, max_anisotropy;
1685
1686 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
1687
1688 sampler = anv_device_alloc(device, sizeof(*sampler), 8,
1689 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1690 if (!sampler)
1691 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1692
1693 static const uint32_t vk_to_gen_tex_filter[] = {
1694 [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST,
1695 [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR
1696 };
1697
1698 static const uint32_t vk_to_gen_mipmap_mode[] = {
1699 [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE,
1700 [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST,
1701 [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR
1702 };
1703
1704 static const uint32_t vk_to_gen_tex_address[] = {
1705 [VK_TEX_ADDRESS_WRAP] = TCM_WRAP,
1706 [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR,
1707 [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP,
1708 [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE,
1709 [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER,
1710 };
1711
1712 static const uint32_t vk_to_gen_compare_op[] = {
1713 [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER,
1714 [VK_COMPARE_OP_LESS] = PREFILTEROPLESS,
1715 [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL,
1716 [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL,
1717 [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER,
1718 [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL,
1719 [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL,
1720 [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS,
1721 };
1722
1723 if (pCreateInfo->maxAnisotropy > 1) {
1724 mag_filter = MAPFILTER_ANISOTROPIC;
1725 min_filter = MAPFILTER_ANISOTROPIC;
1726 max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2;
1727 } else {
1728 mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter];
1729 min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter];
1730 max_anisotropy = RATIO21;
1731 }
1732
1733 struct GEN8_SAMPLER_STATE sampler_state = {
1734 .SamplerDisable = false,
1735 .TextureBorderColorMode = DX10OGL,
1736 .LODPreClampMode = 0,
1737 .BaseMipLevel = 0.0,
1738 .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode],
1739 .MagModeFilter = mag_filter,
1740 .MinModeFilter = min_filter,
1741 .TextureLODBias = pCreateInfo->mipLodBias * 256,
1742 .AnisotropicAlgorithm = EWAApproximation,
1743 .MinLOD = pCreateInfo->minLod,
1744 .MaxLOD = pCreateInfo->maxLod,
1745 .ChromaKeyEnable = 0,
1746 .ChromaKeyIndex = 0,
1747 .ChromaKeyMode = 0,
1748 .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp],
1749 .CubeSurfaceControlMode = 0,
1750
1751 .IndirectStatePointer =
1752 device->border_colors.offset +
1753 pCreateInfo->borderColor * sizeof(float) * 4,
1754
1755 .LODClampMagnificationMode = MIPNONE,
1756 .MaximumAnisotropy = max_anisotropy,
1757 .RAddressMinFilterRoundingEnable = 0,
1758 .RAddressMagFilterRoundingEnable = 0,
1759 .VAddressMinFilterRoundingEnable = 0,
1760 .VAddressMagFilterRoundingEnable = 0,
1761 .UAddressMinFilterRoundingEnable = 0,
1762 .UAddressMagFilterRoundingEnable = 0,
1763 .TrilinearFilterQuality = 0,
1764 .NonnormalizedCoordinateEnable = 0,
1765 .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU],
1766 .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV],
1767 .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW],
1768 };
1769
1770 GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state);
1771
1772 *pSampler = anv_sampler_to_handle(sampler);
1773
1774 return VK_SUCCESS;
1775 }
1776
1777 // Descriptor set functions
1778
1779 VkResult anv_CreateDescriptorSetLayout(
1780 VkDevice _device,
1781 const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
1782 VkDescriptorSetLayout* pSetLayout)
1783 {
1784 ANV_FROM_HANDLE(anv_device, device, _device);
1785 struct anv_descriptor_set_layout *set_layout;
1786
1787 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
1788
1789 uint32_t sampler_count[VK_SHADER_STAGE_NUM] = { 0, };
1790 uint32_t surface_count[VK_SHADER_STAGE_NUM] = { 0, };
1791 uint32_t num_dynamic_buffers = 0;
1792 uint32_t count = 0;
1793 uint32_t stages = 0;
1794 uint32_t s;
1795
1796 for (uint32_t i = 0; i < pCreateInfo->count; i++) {
1797 switch (pCreateInfo->pBinding[i].descriptorType) {
1798 case VK_DESCRIPTOR_TYPE_SAMPLER:
1799 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1800 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1801 sampler_count[s] += pCreateInfo->pBinding[i].arraySize;
1802 break;
1803 default:
1804 break;
1805 }
1806
1807 switch (pCreateInfo->pBinding[i].descriptorType) {
1808 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1809 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1810 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1811 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1812 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1813 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1814 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1815 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1816 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1817 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1818 surface_count[s] += pCreateInfo->pBinding[i].arraySize;
1819 break;
1820 default:
1821 break;
1822 }
1823
1824 switch (pCreateInfo->pBinding[i].descriptorType) {
1825 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1826 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1827 num_dynamic_buffers += pCreateInfo->pBinding[i].arraySize;
1828 break;
1829 default:
1830 break;
1831 }
1832
1833 stages |= pCreateInfo->pBinding[i].stageFlags;
1834 count += pCreateInfo->pBinding[i].arraySize;
1835 }
1836
1837 uint32_t sampler_total = 0;
1838 uint32_t surface_total = 0;
1839 for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) {
1840 sampler_total += sampler_count[s];
1841 surface_total += surface_count[s];
1842 }
1843
1844 size_t size = sizeof(*set_layout) +
1845 (sampler_total + surface_total) * sizeof(set_layout->entries[0]);
1846 set_layout = anv_device_alloc(device, size, 8,
1847 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1848 if (!set_layout)
1849 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1850
1851 set_layout->num_dynamic_buffers = num_dynamic_buffers;
1852 set_layout->count = count;
1853 set_layout->shader_stages = stages;
1854
1855 struct anv_descriptor_slot *p = set_layout->entries;
1856 struct anv_descriptor_slot *sampler[VK_SHADER_STAGE_NUM];
1857 struct anv_descriptor_slot *surface[VK_SHADER_STAGE_NUM];
1858 for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) {
1859 set_layout->stage[s].surface_count = surface_count[s];
1860 set_layout->stage[s].surface_start = surface[s] = p;
1861 p += surface_count[s];
1862 set_layout->stage[s].sampler_count = sampler_count[s];
1863 set_layout->stage[s].sampler_start = sampler[s] = p;
1864 p += sampler_count[s];
1865 }
1866
1867 uint32_t descriptor = 0;
1868 int8_t dynamic_slot = 0;
1869 bool is_dynamic;
1870 for (uint32_t i = 0; i < pCreateInfo->count; i++) {
1871 switch (pCreateInfo->pBinding[i].descriptorType) {
1872 case VK_DESCRIPTOR_TYPE_SAMPLER:
1873 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1874 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1875 for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) {
1876 sampler[s]->index = descriptor + j;
1877 sampler[s]->dynamic_slot = -1;
1878 sampler[s]++;
1879 }
1880 break;
1881 default:
1882 break;
1883 }
1884
1885 switch (pCreateInfo->pBinding[i].descriptorType) {
1886 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1887 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1888 is_dynamic = true;
1889 break;
1890 default:
1891 is_dynamic = false;
1892 break;
1893 }
1894
1895 switch (pCreateInfo->pBinding[i].descriptorType) {
1896 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1897 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1898 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1899 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1900 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1901 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1902 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1903 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1904 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1905 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1906 for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) {
1907 surface[s]->index = descriptor + j;
1908 if (is_dynamic)
1909 surface[s]->dynamic_slot = dynamic_slot + j;
1910 else
1911 surface[s]->dynamic_slot = -1;
1912 surface[s]++;
1913 }
1914 break;
1915 default:
1916 break;
1917 }
1918
1919 if (is_dynamic)
1920 dynamic_slot += pCreateInfo->pBinding[i].arraySize;
1921
1922 descriptor += pCreateInfo->pBinding[i].arraySize;
1923 }
1924
1925 *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout);
1926
1927 return VK_SUCCESS;
1928 }
1929
1930 VkResult anv_CreateDescriptorPool(
1931 VkDevice device,
1932 VkDescriptorPoolUsage poolUsage,
1933 uint32_t maxSets,
1934 const VkDescriptorPoolCreateInfo* pCreateInfo,
1935 VkDescriptorPool* pDescriptorPool)
1936 {
1937 *pDescriptorPool = 1;
1938
1939 return VK_SUCCESS;
1940 }
1941
1942 VkResult anv_ResetDescriptorPool(
1943 VkDevice device,
1944 VkDescriptorPool descriptorPool)
1945 {
1946 return VK_SUCCESS;
1947 }
1948
1949 VkResult anv_AllocDescriptorSets(
1950 VkDevice _device,
1951 VkDescriptorPool descriptorPool,
1952 VkDescriptorSetUsage setUsage,
1953 uint32_t count,
1954 const VkDescriptorSetLayout* pSetLayouts,
1955 VkDescriptorSet* pDescriptorSets,
1956 uint32_t* pCount)
1957 {
1958 ANV_FROM_HANDLE(anv_device, device, _device);
1959 struct anv_descriptor_set *set;
1960 size_t size;
1961
1962 for (uint32_t i = 0; i < count; i++) {
1963 ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, pSetLayouts[i]);
1964 size = sizeof(*set) + layout->count * sizeof(set->descriptors[0]);
1965 set = anv_device_alloc(device, size, 8,
1966 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1967 if (!set) {
1968 *pCount = i;
1969 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1970 }
1971
1972 /* Descriptor sets may not be 100% filled out so we need to memset to
1973 * ensure that we can properly detect and handle holes.
1974 */
1975 memset(set, 0, size);
1976
1977 pDescriptorSets[i] = anv_descriptor_set_to_handle(set);
1978 }
1979
1980 *pCount = count;
1981
1982 return VK_SUCCESS;
1983 }
1984
1985 VkResult anv_UpdateDescriptorSets(
1986 VkDevice device,
1987 uint32_t writeCount,
1988 const VkWriteDescriptorSet* pDescriptorWrites,
1989 uint32_t copyCount,
1990 const VkCopyDescriptorSet* pDescriptorCopies)
1991 {
1992 for (uint32_t i = 0; i < writeCount; i++) {
1993 const VkWriteDescriptorSet *write = &pDescriptorWrites[i];
1994 ANV_FROM_HANDLE(anv_descriptor_set, set, write->destSet);
1995
1996 switch (write->descriptorType) {
1997 case VK_DESCRIPTOR_TYPE_SAMPLER:
1998 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1999 for (uint32_t j = 0; j < write->count; j++) {
2000 set->descriptors[write->destBinding + j].sampler =
2001 anv_sampler_from_handle(write->pDescriptors[j].sampler);
2002 }
2003
2004 if (write->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER)
2005 break;
2006
2007 /* fallthrough */
2008
2009 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
2010 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2011 for (uint32_t j = 0; j < write->count; j++) {
2012 set->descriptors[write->destBinding + j].view =
2013 (struct anv_surface_view *)write->pDescriptors[j].imageView;
2014 }
2015 break;
2016
2017 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2018 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2019 anv_finishme("texel buffers not implemented");
2020 break;
2021
2022 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2023 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
2024 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2025 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
2026 for (uint32_t j = 0; j < write->count; j++) {
2027 set->descriptors[write->destBinding + j].view =
2028 (struct anv_surface_view *)write->pDescriptors[j].bufferView;
2029 }
2030
2031 default:
2032 break;
2033 }
2034 }
2035
2036 for (uint32_t i = 0; i < copyCount; i++) {
2037 const VkCopyDescriptorSet *copy = &pDescriptorCopies[i];
2038 ANV_FROM_HANDLE(anv_descriptor_set, src, copy->destSet);
2039 ANV_FROM_HANDLE(anv_descriptor_set, dest, copy->destSet);
2040 for (uint32_t j = 0; j < copy->count; j++) {
2041 dest->descriptors[copy->destBinding + j] =
2042 src->descriptors[copy->srcBinding + j];
2043 }
2044 }
2045
2046 return VK_SUCCESS;
2047 }
2048
2049 // State object functions
2050
2051 static inline int64_t
2052 clamp_int64(int64_t x, int64_t min, int64_t max)
2053 {
2054 if (x < min)
2055 return min;
2056 else if (x < max)
2057 return x;
2058 else
2059 return max;
2060 }
2061
2062 static void
2063 anv_dynamic_vp_state_destroy(struct anv_device *device,
2064 struct anv_object *object,
2065 VkObjectType obj_type)
2066 {
2067 struct anv_dynamic_vp_state *state = (void *)object;
2068
2069 assert(obj_type == VK_OBJECT_TYPE_DYNAMIC_VP_STATE);
2070
2071 anv_state_pool_free(&device->dynamic_state_pool, state->sf_clip_vp);
2072 anv_state_pool_free(&device->dynamic_state_pool, state->cc_vp);
2073 anv_state_pool_free(&device->dynamic_state_pool, state->scissor);
2074
2075 anv_device_free(device, state);
2076 }
2077
2078 VkResult anv_CreateDynamicViewportState(
2079 VkDevice _device,
2080 const VkDynamicViewportStateCreateInfo* pCreateInfo,
2081 VkDynamicViewportState* pState)
2082 {
2083 ANV_FROM_HANDLE(anv_device, device, _device);
2084 struct anv_dynamic_vp_state *state;
2085
2086 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO);
2087
2088 state = anv_device_alloc(device, sizeof(*state), 8,
2089 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2090 if (state == NULL)
2091 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2092
2093 state->base.destructor = anv_dynamic_vp_state_destroy;
2094
2095 unsigned count = pCreateInfo->viewportAndScissorCount;
2096 state->sf_clip_vp = anv_state_pool_alloc(&device->dynamic_state_pool,
2097 count * 64, 64);
2098 state->cc_vp = anv_state_pool_alloc(&device->dynamic_state_pool,
2099 count * 8, 32);
2100 state->scissor = anv_state_pool_alloc(&device->dynamic_state_pool,
2101 count * 32, 32);
2102
2103 for (uint32_t i = 0; i < pCreateInfo->viewportAndScissorCount; i++) {
2104 const VkViewport *vp = &pCreateInfo->pViewports[i];
2105 const VkRect2D *s = &pCreateInfo->pScissors[i];
2106
2107 struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = {
2108 .ViewportMatrixElementm00 = vp->width / 2,
2109 .ViewportMatrixElementm11 = vp->height / 2,
2110 .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2,
2111 .ViewportMatrixElementm30 = vp->originX + vp->width / 2,
2112 .ViewportMatrixElementm31 = vp->originY + vp->height / 2,
2113 .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2,
2114 .XMinClipGuardband = -1.0f,
2115 .XMaxClipGuardband = 1.0f,
2116 .YMinClipGuardband = -1.0f,
2117 .YMaxClipGuardband = 1.0f,
2118 .XMinViewPort = vp->originX,
2119 .XMaxViewPort = vp->originX + vp->width - 1,
2120 .YMinViewPort = vp->originY,
2121 .YMaxViewPort = vp->originY + vp->height - 1,
2122 };
2123
2124 struct GEN8_CC_VIEWPORT cc_viewport = {
2125 .MinimumDepth = vp->minDepth,
2126 .MaximumDepth = vp->maxDepth
2127 };
2128
2129 /* Since xmax and ymax are inclusive, we have to have xmax < xmin or
2130 * ymax < ymin for empty clips. In case clip x, y, width height are all
2131 * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't
2132 * what we want. Just special case empty clips and produce a canonical
2133 * empty clip. */
2134 static const struct GEN8_SCISSOR_RECT empty_scissor = {
2135 .ScissorRectangleYMin = 1,
2136 .ScissorRectangleXMin = 1,
2137 .ScissorRectangleYMax = 0,
2138 .ScissorRectangleXMax = 0
2139 };
2140
2141 const int max = 0xffff;
2142 struct GEN8_SCISSOR_RECT scissor = {
2143 /* Do this math using int64_t so overflow gets clamped correctly. */
2144 .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max),
2145 .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max),
2146 .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max),
2147 .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max)
2148 };
2149
2150 GEN8_SF_CLIP_VIEWPORT_pack(NULL, state->sf_clip_vp.map + i * 64, &sf_clip_viewport);
2151 GEN8_CC_VIEWPORT_pack(NULL, state->cc_vp.map + i * 32, &cc_viewport);
2152
2153 if (s->extent.width <= 0 || s->extent.height <= 0) {
2154 GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &empty_scissor);
2155 } else {
2156 GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &scissor);
2157 }
2158 }
2159
2160 *pState = anv_dynamic_vp_state_to_handle(state);
2161
2162 return VK_SUCCESS;
2163 }
2164
2165 VkResult anv_CreateDynamicRasterState(
2166 VkDevice _device,
2167 const VkDynamicRasterStateCreateInfo* pCreateInfo,
2168 VkDynamicRasterState* pState)
2169 {
2170 ANV_FROM_HANDLE(anv_device, device, _device);
2171 struct anv_dynamic_rs_state *state;
2172
2173 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO);
2174
2175 state = anv_device_alloc(device, sizeof(*state), 8,
2176 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2177 if (state == NULL)
2178 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2179
2180 struct GEN8_3DSTATE_SF sf = {
2181 GEN8_3DSTATE_SF_header,
2182 .LineWidth = pCreateInfo->lineWidth,
2183 };
2184
2185 GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf);
2186
2187 bool enable_bias = pCreateInfo->depthBias != 0.0f ||
2188 pCreateInfo->slopeScaledDepthBias != 0.0f;
2189 struct GEN8_3DSTATE_RASTER raster = {
2190 .GlobalDepthOffsetEnableSolid = enable_bias,
2191 .GlobalDepthOffsetEnableWireframe = enable_bias,
2192 .GlobalDepthOffsetEnablePoint = enable_bias,
2193 .GlobalDepthOffsetConstant = pCreateInfo->depthBias,
2194 .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias,
2195 .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp
2196 };
2197
2198 GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster);
2199
2200 *pState = anv_dynamic_rs_state_to_handle(state);
2201
2202 return VK_SUCCESS;
2203 }
2204
2205 VkResult anv_CreateDynamicColorBlendState(
2206 VkDevice _device,
2207 const VkDynamicColorBlendStateCreateInfo* pCreateInfo,
2208 VkDynamicColorBlendState* pState)
2209 {
2210 ANV_FROM_HANDLE(anv_device, device, _device);
2211 struct anv_dynamic_cb_state *state;
2212
2213 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO);
2214
2215 state = anv_device_alloc(device, sizeof(*state), 8,
2216 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2217 if (state == NULL)
2218 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2219
2220 struct GEN8_COLOR_CALC_STATE color_calc_state = {
2221 .BlendConstantColorRed = pCreateInfo->blendConst[0],
2222 .BlendConstantColorGreen = pCreateInfo->blendConst[1],
2223 .BlendConstantColorBlue = pCreateInfo->blendConst[2],
2224 .BlendConstantColorAlpha = pCreateInfo->blendConst[3]
2225 };
2226
2227 GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state);
2228
2229 *pState = anv_dynamic_cb_state_to_handle(state);
2230
2231 return VK_SUCCESS;
2232 }
2233
2234 VkResult anv_CreateDynamicDepthStencilState(
2235 VkDevice _device,
2236 const VkDynamicDepthStencilStateCreateInfo* pCreateInfo,
2237 VkDynamicDepthStencilState* pState)
2238 {
2239 ANV_FROM_HANDLE(anv_device, device, _device);
2240 struct anv_dynamic_ds_state *state;
2241
2242 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO);
2243
2244 state = anv_device_alloc(device, sizeof(*state), 8,
2245 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2246 if (state == NULL)
2247 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2248
2249 struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
2250 GEN8_3DSTATE_WM_DEPTH_STENCIL_header,
2251
2252 /* Is this what we need to do? */
2253 .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0,
2254
2255 .StencilTestMask = pCreateInfo->stencilReadMask & 0xff,
2256 .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
2257
2258 .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff,
2259 .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
2260 };
2261
2262 GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil,
2263 &wm_depth_stencil);
2264
2265 struct GEN8_COLOR_CALC_STATE color_calc_state = {
2266 .StencilReferenceValue = pCreateInfo->stencilFrontRef,
2267 .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef
2268 };
2269
2270 GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state);
2271
2272 *pState = anv_dynamic_ds_state_to_handle(state);
2273
2274 return VK_SUCCESS;
2275 }
2276
2277 // Command buffer functions
2278
2279 static void
2280 anv_cmd_buffer_destroy(struct anv_device *device,
2281 struct anv_object *object,
2282 VkObjectType obj_type)
2283 {
2284 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object;
2285
2286 assert(obj_type == VK_OBJECT_TYPE_COMMAND_BUFFER);
2287
2288 /* Destroy all of the batch buffers */
2289 struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo;
2290 while (bbo) {
2291 struct anv_batch_bo *prev = bbo->prev_batch_bo;
2292 anv_batch_bo_destroy(bbo, device);
2293 bbo = prev;
2294 }
2295 anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
2296
2297 /* Destroy all of the surface state buffers */
2298 bbo = cmd_buffer->surface_batch_bo;
2299 while (bbo) {
2300 struct anv_batch_bo *prev = bbo->prev_batch_bo;
2301 anv_batch_bo_destroy(bbo, device);
2302 bbo = prev;
2303 }
2304 anv_reloc_list_finish(&cmd_buffer->surface_relocs, device);
2305
2306 anv_state_stream_finish(&cmd_buffer->surface_state_stream);
2307 anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
2308 anv_device_free(device, cmd_buffer->exec2_objects);
2309 anv_device_free(device, cmd_buffer->exec2_bos);
2310 anv_device_free(device, cmd_buffer);
2311 }
2312
2313 static VkResult
2314 anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data)
2315 {
2316 struct anv_cmd_buffer *cmd_buffer = _data;
2317
2318 struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->last_batch_bo;
2319
2320 VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
2321 if (result != VK_SUCCESS)
2322 return result;
2323
2324 /* We set the end of the batch a little short so we would be sure we
2325 * have room for the chaining command. Since we're about to emit the
2326 * chaining command, let's set it back where it should go.
2327 */
2328 batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
2329 assert(batch->end == old_bbo->bo.map + old_bbo->bo.size);
2330
2331 anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START,
2332 GEN8_MI_BATCH_BUFFER_START_header,
2333 ._2ndLevelBatchBuffer = _1stlevelbatch,
2334 .AddressSpaceIndicator = ASI_PPGTT,
2335 .BatchBufferStartAddress = { &new_bbo->bo, 0 },
2336 );
2337
2338 /* Pad out to a 2-dword aligned boundary with zeros */
2339 if ((uintptr_t)batch->next % 8 != 0) {
2340 *(uint32_t *)batch->next = 0;
2341 batch->next += 4;
2342 }
2343
2344 anv_batch_bo_finish(cmd_buffer->last_batch_bo, batch);
2345
2346 new_bbo->prev_batch_bo = old_bbo;
2347 cmd_buffer->last_batch_bo = new_bbo;
2348
2349 anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4);
2350
2351 return VK_SUCCESS;
2352 }
2353
2354 VkResult anv_CreateCommandBuffer(
2355 VkDevice _device,
2356 const VkCmdBufferCreateInfo* pCreateInfo,
2357 VkCmdBuffer* pCmdBuffer)
2358 {
2359 ANV_FROM_HANDLE(anv_device, device, _device);
2360 struct anv_cmd_buffer *cmd_buffer;
2361 VkResult result;
2362
2363 assert(pCreateInfo->level == VK_CMD_BUFFER_LEVEL_PRIMARY);
2364
2365 cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8,
2366 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2367 if (cmd_buffer == NULL)
2368 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2369
2370 cmd_buffer->base.destructor = anv_cmd_buffer_destroy;
2371
2372 cmd_buffer->device = device;
2373 cmd_buffer->rs_state = NULL;
2374 cmd_buffer->vp_state = NULL;
2375 cmd_buffer->cb_state = NULL;
2376 cmd_buffer->ds_state = NULL;
2377 memset(&cmd_buffer->state_vf, 0, sizeof(cmd_buffer->state_vf));
2378 memset(&cmd_buffer->descriptors, 0, sizeof(cmd_buffer->descriptors));
2379
2380 result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo);
2381 if (result != VK_SUCCESS)
2382 goto fail;
2383
2384 result = anv_reloc_list_init(&cmd_buffer->batch.relocs, device);
2385 if (result != VK_SUCCESS)
2386 goto fail_batch_bo;
2387
2388 cmd_buffer->batch.device = device;
2389 cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch;
2390 cmd_buffer->batch.user_data = cmd_buffer;
2391
2392 anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch,
2393 GEN8_MI_BATCH_BUFFER_START_length * 4);
2394
2395 result = anv_batch_bo_create(device, &cmd_buffer->surface_batch_bo);
2396 if (result != VK_SUCCESS)
2397 goto fail_batch_relocs;
2398 cmd_buffer->surface_batch_bo->first_reloc = 0;
2399
2400 result = anv_reloc_list_init(&cmd_buffer->surface_relocs, device);
2401 if (result != VK_SUCCESS)
2402 goto fail_ss_batch_bo;
2403
2404 /* Start surface_next at 1 so surface offset 0 is invalid. */
2405 cmd_buffer->surface_next = 1;
2406
2407 cmd_buffer->exec2_objects = NULL;
2408 cmd_buffer->exec2_bos = NULL;
2409 cmd_buffer->exec2_array_length = 0;
2410
2411 anv_state_stream_init(&cmd_buffer->surface_state_stream,
2412 &device->surface_state_block_pool);
2413 anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
2414 &device->dynamic_state_block_pool);
2415
2416 cmd_buffer->dirty = 0;
2417 cmd_buffer->vb_dirty = 0;
2418 cmd_buffer->descriptors_dirty = 0;
2419 cmd_buffer->pipeline = NULL;
2420 cmd_buffer->vp_state = NULL;
2421 cmd_buffer->rs_state = NULL;
2422 cmd_buffer->ds_state = NULL;
2423
2424 *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer);
2425
2426 return VK_SUCCESS;
2427
2428 fail_ss_batch_bo:
2429 anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, device);
2430 fail_batch_relocs:
2431 anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
2432 fail_batch_bo:
2433 anv_batch_bo_destroy(cmd_buffer->last_batch_bo, device);
2434 fail:
2435 anv_device_free(device, cmd_buffer);
2436
2437 return result;
2438 }
2439
2440 static void
2441 anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
2442 {
2443 struct anv_device *device = cmd_buffer->device;
2444 struct anv_bo *scratch_bo = NULL;
2445
2446 cmd_buffer->scratch_size = device->scratch_block_pool.size;
2447 if (cmd_buffer->scratch_size > 0)
2448 scratch_bo = &device->scratch_block_pool.bo;
2449
2450 anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS,
2451 .GeneralStateBaseAddress = { scratch_bo, 0 },
2452 .GeneralStateMemoryObjectControlState = GEN8_MOCS,
2453 .GeneralStateBaseAddressModifyEnable = true,
2454 .GeneralStateBufferSize = 0xfffff,
2455 .GeneralStateBufferSizeModifyEnable = true,
2456
2457 .SurfaceStateBaseAddress = { &cmd_buffer->surface_batch_bo->bo, 0 },
2458 .SurfaceStateMemoryObjectControlState = GEN8_MOCS,
2459 .SurfaceStateBaseAddressModifyEnable = true,
2460
2461 .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 },
2462 .DynamicStateMemoryObjectControlState = GEN8_MOCS,
2463 .DynamicStateBaseAddressModifyEnable = true,
2464 .DynamicStateBufferSize = 0xfffff,
2465 .DynamicStateBufferSizeModifyEnable = true,
2466
2467 .IndirectObjectBaseAddress = { NULL, 0 },
2468 .IndirectObjectMemoryObjectControlState = GEN8_MOCS,
2469 .IndirectObjectBaseAddressModifyEnable = true,
2470 .IndirectObjectBufferSize = 0xfffff,
2471 .IndirectObjectBufferSizeModifyEnable = true,
2472
2473 .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
2474 .InstructionMemoryObjectControlState = GEN8_MOCS,
2475 .InstructionBaseAddressModifyEnable = true,
2476 .InstructionBufferSize = 0xfffff,
2477 .InstructionBuffersizeModifyEnable = true);
2478 }
2479
2480 VkResult anv_BeginCommandBuffer(
2481 VkCmdBuffer cmdBuffer,
2482 const VkCmdBufferBeginInfo* pBeginInfo)
2483 {
2484 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2485
2486 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
2487 cmd_buffer->current_pipeline = UINT32_MAX;
2488
2489 return VK_SUCCESS;
2490 }
2491
2492 static VkResult
2493 anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer,
2494 struct anv_bo *bo,
2495 struct drm_i915_gem_relocation_entry *relocs,
2496 size_t num_relocs)
2497 {
2498 struct drm_i915_gem_exec_object2 *obj;
2499
2500 if (bo->index < cmd_buffer->bo_count &&
2501 cmd_buffer->exec2_bos[bo->index] == bo)
2502 return VK_SUCCESS;
2503
2504 if (cmd_buffer->bo_count >= cmd_buffer->exec2_array_length) {
2505 uint32_t new_len = cmd_buffer->exec2_objects ?
2506 cmd_buffer->exec2_array_length * 2 : 64;
2507
2508 struct drm_i915_gem_exec_object2 *new_objects =
2509 anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects),
2510 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
2511 if (new_objects == NULL)
2512 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2513
2514 struct anv_bo **new_bos =
2515 anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos),
2516 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
2517 if (new_objects == NULL) {
2518 anv_device_free(cmd_buffer->device, new_objects);
2519 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2520 }
2521
2522 if (cmd_buffer->exec2_objects) {
2523 memcpy(new_objects, cmd_buffer->exec2_objects,
2524 cmd_buffer->bo_count * sizeof(*new_objects));
2525 memcpy(new_bos, cmd_buffer->exec2_bos,
2526 cmd_buffer->bo_count * sizeof(*new_bos));
2527 }
2528
2529 cmd_buffer->exec2_objects = new_objects;
2530 cmd_buffer->exec2_bos = new_bos;
2531 cmd_buffer->exec2_array_length = new_len;
2532 }
2533
2534 assert(cmd_buffer->bo_count < cmd_buffer->exec2_array_length);
2535
2536 bo->index = cmd_buffer->bo_count++;
2537 obj = &cmd_buffer->exec2_objects[bo->index];
2538 cmd_buffer->exec2_bos[bo->index] = bo;
2539
2540 obj->handle = bo->gem_handle;
2541 obj->relocation_count = 0;
2542 obj->relocs_ptr = 0;
2543 obj->alignment = 0;
2544 obj->offset = bo->offset;
2545 obj->flags = 0;
2546 obj->rsvd1 = 0;
2547 obj->rsvd2 = 0;
2548
2549 if (relocs) {
2550 obj->relocation_count = num_relocs;
2551 obj->relocs_ptr = (uintptr_t) relocs;
2552 }
2553
2554 return VK_SUCCESS;
2555 }
2556
2557 static void
2558 anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer,
2559 struct anv_reloc_list *list)
2560 {
2561 for (size_t i = 0; i < list->num_relocs; i++)
2562 anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL, 0);
2563 }
2564
2565 static void
2566 anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer,
2567 struct anv_reloc_list *list)
2568 {
2569 struct anv_bo *bo;
2570
2571 /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in
2572 * struct drm_i915_gem_exec_object2 against the bos current offset and if
2573 * all bos haven't moved it will skip relocation processing alltogether.
2574 * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming
2575 * value of offset so we can set it either way. For that to work we need
2576 * to make sure all relocs use the same presumed offset.
2577 */
2578
2579 for (size_t i = 0; i < list->num_relocs; i++) {
2580 bo = list->reloc_bos[i];
2581 if (bo->offset != list->relocs[i].presumed_offset)
2582 cmd_buffer->need_reloc = true;
2583
2584 list->relocs[i].target_handle = bo->index;
2585 }
2586 }
2587
2588 VkResult anv_EndCommandBuffer(
2589 VkCmdBuffer cmdBuffer)
2590 {
2591 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2592 struct anv_device *device = cmd_buffer->device;
2593 struct anv_batch *batch = &cmd_buffer->batch;
2594
2595 anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_END);
2596
2597 /* Round batch up to an even number of dwords. */
2598 if ((batch->next - batch->start) & 4)
2599 anv_batch_emit(batch, GEN8_MI_NOOP);
2600
2601 anv_batch_bo_finish(cmd_buffer->last_batch_bo, &cmd_buffer->batch);
2602 cmd_buffer->surface_batch_bo->num_relocs =
2603 cmd_buffer->surface_relocs.num_relocs - cmd_buffer->surface_batch_bo->first_reloc;
2604 cmd_buffer->surface_batch_bo->length = cmd_buffer->surface_next;
2605
2606 cmd_buffer->bo_count = 0;
2607 cmd_buffer->need_reloc = false;
2608
2609 /* Lock for access to bo->index. */
2610 pthread_mutex_lock(&device->mutex);
2611
2612 /* Add surface state bos first so we can add them with their relocs. */
2613 for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo;
2614 bbo != NULL; bbo = bbo->prev_batch_bo) {
2615 anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo,
2616 &cmd_buffer->surface_relocs.relocs[bbo->first_reloc],
2617 bbo->num_relocs);
2618 }
2619
2620 /* Add all of the BOs referenced by surface state */
2621 anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs);
2622
2623 /* Add all but the first batch BO */
2624 struct anv_batch_bo *batch_bo = cmd_buffer->last_batch_bo;
2625 while (batch_bo->prev_batch_bo) {
2626 anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo,
2627 &batch->relocs.relocs[batch_bo->first_reloc],
2628 batch_bo->num_relocs);
2629 batch_bo = batch_bo->prev_batch_bo;
2630 }
2631
2632 /* Add everything referenced by the batches */
2633 anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->relocs);
2634
2635 /* Add the first batch bo last */
2636 assert(batch_bo->prev_batch_bo == NULL && batch_bo->first_reloc == 0);
2637 anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo,
2638 &batch->relocs.relocs[batch_bo->first_reloc],
2639 batch_bo->num_relocs);
2640 assert(batch_bo->bo.index == cmd_buffer->bo_count - 1);
2641
2642 anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
2643 anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs);
2644
2645 cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects;
2646 cmd_buffer->execbuf.buffer_count = cmd_buffer->bo_count;
2647 cmd_buffer->execbuf.batch_start_offset = 0;
2648 cmd_buffer->execbuf.batch_len = batch->next - batch->start;
2649 cmd_buffer->execbuf.cliprects_ptr = 0;
2650 cmd_buffer->execbuf.num_cliprects = 0;
2651 cmd_buffer->execbuf.DR1 = 0;
2652 cmd_buffer->execbuf.DR4 = 0;
2653
2654 cmd_buffer->execbuf.flags = I915_EXEC_HANDLE_LUT;
2655 if (!cmd_buffer->need_reloc)
2656 cmd_buffer->execbuf.flags |= I915_EXEC_NO_RELOC;
2657 cmd_buffer->execbuf.flags |= I915_EXEC_RENDER;
2658 cmd_buffer->execbuf.rsvd1 = device->context_id;
2659 cmd_buffer->execbuf.rsvd2 = 0;
2660
2661 pthread_mutex_unlock(&device->mutex);
2662
2663 return VK_SUCCESS;
2664 }
2665
2666 VkResult anv_ResetCommandBuffer(
2667 VkCmdBuffer cmdBuffer)
2668 {
2669 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2670
2671 /* Delete all but the first batch bo */
2672 while (cmd_buffer->last_batch_bo->prev_batch_bo) {
2673 struct anv_batch_bo *prev = cmd_buffer->last_batch_bo->prev_batch_bo;
2674 anv_batch_bo_destroy(cmd_buffer->last_batch_bo, cmd_buffer->device);
2675 cmd_buffer->last_batch_bo = prev;
2676 }
2677 assert(cmd_buffer->last_batch_bo->prev_batch_bo == NULL);
2678
2679 cmd_buffer->batch.relocs.num_relocs = 0;
2680 anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch,
2681 GEN8_MI_BATCH_BUFFER_START_length * 4);
2682
2683 /* Delete all but the first batch bo */
2684 while (cmd_buffer->surface_batch_bo->prev_batch_bo) {
2685 struct anv_batch_bo *prev = cmd_buffer->surface_batch_bo->prev_batch_bo;
2686 anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, cmd_buffer->device);
2687 cmd_buffer->surface_batch_bo = prev;
2688 }
2689 assert(cmd_buffer->surface_batch_bo->prev_batch_bo == NULL);
2690
2691 cmd_buffer->surface_next = 1;
2692 cmd_buffer->surface_relocs.num_relocs = 0;
2693
2694 cmd_buffer->rs_state = NULL;
2695 cmd_buffer->vp_state = NULL;
2696 cmd_buffer->cb_state = NULL;
2697 cmd_buffer->ds_state = NULL;
2698
2699 return VK_SUCCESS;
2700 }
2701
2702 // Command buffer building functions
2703
2704 void anv_CmdBindPipeline(
2705 VkCmdBuffer cmdBuffer,
2706 VkPipelineBindPoint pipelineBindPoint,
2707 VkPipeline _pipeline)
2708 {
2709 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2710 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
2711
2712 switch (pipelineBindPoint) {
2713 case VK_PIPELINE_BIND_POINT_COMPUTE:
2714 cmd_buffer->compute_pipeline = pipeline;
2715 cmd_buffer->compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
2716 break;
2717
2718 case VK_PIPELINE_BIND_POINT_GRAPHICS:
2719 cmd_buffer->pipeline = pipeline;
2720 cmd_buffer->vb_dirty |= pipeline->vb_used;
2721 cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
2722 break;
2723
2724 default:
2725 assert(!"invalid bind point");
2726 break;
2727 }
2728 }
2729
2730 void anv_CmdBindDynamicStateObject(
2731 VkCmdBuffer cmdBuffer,
2732 VkStateBindPoint stateBindPoint,
2733 VkDynamicStateObject dynamicState)
2734 {
2735 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2736
2737 switch (stateBindPoint) {
2738 case VK_STATE_BIND_POINT_VIEWPORT:
2739 cmd_buffer->vp_state = anv_dynamic_vp_state_from_handle(dynamicState);
2740 cmd_buffer->dirty |= ANV_CMD_BUFFER_VP_DIRTY;
2741 break;
2742 case VK_STATE_BIND_POINT_RASTER:
2743 cmd_buffer->rs_state = anv_dynamic_rs_state_from_handle(dynamicState);
2744 cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY;
2745 break;
2746 case VK_STATE_BIND_POINT_COLOR_BLEND:
2747 cmd_buffer->cb_state = anv_dynamic_cb_state_from_handle(dynamicState);
2748 cmd_buffer->dirty |= ANV_CMD_BUFFER_CB_DIRTY;
2749 break;
2750 case VK_STATE_BIND_POINT_DEPTH_STENCIL:
2751 cmd_buffer->ds_state = anv_dynamic_ds_state_from_handle(dynamicState);
2752 cmd_buffer->dirty |= ANV_CMD_BUFFER_DS_DIRTY;
2753 break;
2754 default:
2755 break;
2756 };
2757 }
2758
2759 static struct anv_state
2760 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer,
2761 uint32_t size, uint32_t alignment)
2762 {
2763 struct anv_state state;
2764
2765 state.offset = align_u32(cmd_buffer->surface_next, alignment);
2766 if (state.offset + size > cmd_buffer->surface_batch_bo->bo.size)
2767 return (struct anv_state) { 0 };
2768
2769 state.map = cmd_buffer->surface_batch_bo->bo.map + state.offset;
2770 state.alloc_size = size;
2771 cmd_buffer->surface_next = state.offset + size;
2772
2773 assert(state.offset + size <= cmd_buffer->surface_batch_bo->bo.size);
2774
2775 return state;
2776 }
2777
2778 static VkResult
2779 anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer)
2780 {
2781 struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->surface_batch_bo;
2782
2783 /* Finish off the old buffer */
2784 old_bbo->num_relocs =
2785 cmd_buffer->surface_relocs.num_relocs - old_bbo->first_reloc;
2786 old_bbo->length = cmd_buffer->surface_next;
2787
2788 VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
2789 if (result != VK_SUCCESS)
2790 return result;
2791
2792 new_bbo->first_reloc = cmd_buffer->surface_relocs.num_relocs;
2793 cmd_buffer->surface_next = 1;
2794
2795 new_bbo->prev_batch_bo = old_bbo;
2796 cmd_buffer->surface_batch_bo = new_bbo;
2797
2798 /* Re-emit state base addresses so we get the new surface state base
2799 * address before we start emitting binding tables etc.
2800 */
2801 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
2802
2803 /* It seems like just changing the state base addresses isn't enough.
2804 * Invalidating the cache seems to be enough to cause things to
2805 * propagate. However, I'm not 100% sure what we're supposed to do.
2806 */
2807 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
2808 .TextureCacheInvalidationEnable = true);
2809
2810 return VK_SUCCESS;
2811 }
2812
2813 void anv_CmdBindDescriptorSets(
2814 VkCmdBuffer cmdBuffer,
2815 VkPipelineBindPoint pipelineBindPoint,
2816 VkPipelineLayout _layout,
2817 uint32_t firstSet,
2818 uint32_t setCount,
2819 const VkDescriptorSet* pDescriptorSets,
2820 uint32_t dynamicOffsetCount,
2821 const uint32_t* pDynamicOffsets)
2822 {
2823 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2824 ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout);
2825 struct anv_descriptor_set_layout *set_layout;
2826
2827 assert(firstSet + setCount < MAX_SETS);
2828
2829 uint32_t dynamic_slot = 0;
2830 for (uint32_t i = 0; i < setCount; i++) {
2831 ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]);
2832 set_layout = layout->set[firstSet + i].layout;
2833
2834 cmd_buffer->descriptors[firstSet + i].set = set;
2835
2836 assert(set_layout->num_dynamic_buffers <
2837 ARRAY_SIZE(cmd_buffer->descriptors[0].dynamic_offsets));
2838 memcpy(cmd_buffer->descriptors[firstSet + i].dynamic_offsets,
2839 pDynamicOffsets + dynamic_slot,
2840 set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets));
2841
2842 cmd_buffer->descriptors_dirty |= set_layout->shader_stages;
2843
2844 dynamic_slot += set_layout->num_dynamic_buffers;
2845 }
2846 }
2847
2848 void anv_CmdBindIndexBuffer(
2849 VkCmdBuffer cmdBuffer,
2850 VkBuffer _buffer,
2851 VkDeviceSize offset,
2852 VkIndexType indexType)
2853 {
2854 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2855 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
2856
2857 static const uint32_t vk_to_gen_index_type[] = {
2858 [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
2859 [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
2860 };
2861
2862 struct GEN8_3DSTATE_VF vf = {
2863 GEN8_3DSTATE_VF_header,
2864 .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX,
2865 };
2866 GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state_vf, &vf);
2867
2868 cmd_buffer->dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY;
2869
2870 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER,
2871 .IndexFormat = vk_to_gen_index_type[indexType],
2872 .MemoryObjectControlState = GEN8_MOCS,
2873 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
2874 .BufferSize = buffer->size - offset);
2875 }
2876
2877 void anv_CmdBindVertexBuffers(
2878 VkCmdBuffer cmdBuffer,
2879 uint32_t startBinding,
2880 uint32_t bindingCount,
2881 const VkBuffer* pBuffers,
2882 const VkDeviceSize* pOffsets)
2883 {
2884 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2885 struct anv_vertex_binding *vb = cmd_buffer->vertex_bindings;
2886
2887 /* We have to defer setting up vertex buffer since we need the buffer
2888 * stride from the pipeline. */
2889
2890 assert(startBinding + bindingCount < MAX_VBS);
2891 for (uint32_t i = 0; i < bindingCount; i++) {
2892 vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]);
2893 vb[startBinding + i].offset = pOffsets[i];
2894 cmd_buffer->vb_dirty |= 1 << (startBinding + i);
2895 }
2896 }
2897
2898 static VkResult
2899 cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
2900 unsigned stage, struct anv_state *bt_state)
2901 {
2902 struct anv_pipeline_layout *layout;
2903 uint32_t color_attachments, bias, size;
2904
2905 if (stage == VK_SHADER_STAGE_COMPUTE)
2906 layout = cmd_buffer->compute_pipeline->layout;
2907 else
2908 layout = cmd_buffer->pipeline->layout;
2909
2910 if (stage == VK_SHADER_STAGE_FRAGMENT) {
2911 bias = MAX_RTS;
2912 color_attachments = cmd_buffer->framebuffer->color_attachment_count;
2913 } else {
2914 bias = 0;
2915 color_attachments = 0;
2916 }
2917
2918 /* This is a little awkward: layout can be NULL but we still have to
2919 * allocate and set a binding table for the PS stage for render
2920 * targets. */
2921 uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0;
2922
2923 if (color_attachments + surface_count == 0)
2924 return VK_SUCCESS;
2925
2926 size = (bias + surface_count) * sizeof(uint32_t);
2927 *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32);
2928 uint32_t *bt_map = bt_state->map;
2929
2930 if (bt_state->map == NULL)
2931 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2932
2933 for (uint32_t ca = 0; ca < color_attachments; ca++) {
2934 const struct anv_surface_view *view =
2935 cmd_buffer->framebuffer->color_attachments[ca];
2936
2937 struct anv_state state =
2938 anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
2939
2940 if (state.map == NULL)
2941 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2942
2943 memcpy(state.map, view->surface_state.map, 64);
2944
2945 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
2946 *(uint64_t *)(state.map + 8 * 4) =
2947 anv_reloc_list_add(&cmd_buffer->surface_relocs,
2948 cmd_buffer->device,
2949 state.offset + 8 * 4,
2950 view->bo, view->offset);
2951
2952 bt_map[ca] = state.offset;
2953 }
2954
2955 if (layout == NULL)
2956 return VK_SUCCESS;
2957
2958 for (uint32_t set = 0; set < layout->num_sets; set++) {
2959 struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set];
2960 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2961 struct anv_descriptor_slot *surface_slots =
2962 set_layout->stage[stage].surface_start;
2963
2964 uint32_t start = bias + layout->set[set].surface_start[stage];
2965
2966 for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) {
2967 struct anv_surface_view *view =
2968 d->set->descriptors[surface_slots[b].index].view;
2969
2970 if (!view)
2971 continue;
2972
2973 struct anv_state state =
2974 anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
2975
2976 if (state.map == NULL)
2977 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2978
2979 uint32_t offset;
2980 if (surface_slots[b].dynamic_slot >= 0) {
2981 uint32_t dynamic_offset =
2982 d->dynamic_offsets[surface_slots[b].dynamic_slot];
2983
2984 offset = view->offset + dynamic_offset;
2985 fill_buffer_surface_state(state.map, view->format, offset,
2986 view->range - dynamic_offset);
2987 } else {
2988 offset = view->offset;
2989 memcpy(state.map, view->surface_state.map, 64);
2990 }
2991
2992 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
2993 *(uint64_t *)(state.map + 8 * 4) =
2994 anv_reloc_list_add(&cmd_buffer->surface_relocs,
2995 cmd_buffer->device,
2996 state.offset + 8 * 4,
2997 view->bo, offset);
2998
2999 bt_map[start + b] = state.offset;
3000 }
3001 }
3002
3003 return VK_SUCCESS;
3004 }
3005
3006 static VkResult
3007 cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
3008 unsigned stage, struct anv_state *state)
3009 {
3010 struct anv_pipeline_layout *layout;
3011 uint32_t sampler_count;
3012
3013 if (stage == VK_SHADER_STAGE_COMPUTE)
3014 layout = cmd_buffer->compute_pipeline->layout;
3015 else
3016 layout = cmd_buffer->pipeline->layout;
3017
3018 sampler_count = layout ? layout->stage[stage].sampler_count : 0;
3019 if (sampler_count == 0)
3020 return VK_SUCCESS;
3021
3022 uint32_t size = sampler_count * 16;
3023 *state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32);
3024
3025 if (state->map == NULL)
3026 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3027
3028 for (uint32_t set = 0; set < layout->num_sets; set++) {
3029 struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set];
3030 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
3031 struct anv_descriptor_slot *sampler_slots =
3032 set_layout->stage[stage].sampler_start;
3033
3034 uint32_t start = layout->set[set].sampler_start[stage];
3035
3036 for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) {
3037 struct anv_sampler *sampler =
3038 d->set->descriptors[sampler_slots[b].index].sampler;
3039
3040 if (!sampler)
3041 continue;
3042
3043 memcpy(state->map + (start + b) * 16,
3044 sampler->state, sizeof(sampler->state));
3045 }
3046 }
3047
3048 return VK_SUCCESS;
3049 }
3050
3051 static VkResult
3052 flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage)
3053 {
3054 struct anv_state surfaces = { 0, }, samplers = { 0, };
3055 VkResult result;
3056
3057 result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers);
3058 if (result != VK_SUCCESS)
3059 return result;
3060 result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces);
3061 if (result != VK_SUCCESS)
3062 return result;
3063
3064 static const uint32_t sampler_state_opcodes[] = {
3065 [VK_SHADER_STAGE_VERTEX] = 43,
3066 [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */
3067 [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */
3068 [VK_SHADER_STAGE_GEOMETRY] = 46,
3069 [VK_SHADER_STAGE_FRAGMENT] = 47,
3070 [VK_SHADER_STAGE_COMPUTE] = 0,
3071 };
3072
3073 static const uint32_t binding_table_opcodes[] = {
3074 [VK_SHADER_STAGE_VERTEX] = 38,
3075 [VK_SHADER_STAGE_TESS_CONTROL] = 39,
3076 [VK_SHADER_STAGE_TESS_EVALUATION] = 40,
3077 [VK_SHADER_STAGE_GEOMETRY] = 41,
3078 [VK_SHADER_STAGE_FRAGMENT] = 42,
3079 [VK_SHADER_STAGE_COMPUTE] = 0,
3080 };
3081
3082 if (samplers.alloc_size > 0) {
3083 anv_batch_emit(&cmd_buffer->batch,
3084 GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS,
3085 ._3DCommandSubOpcode = sampler_state_opcodes[stage],
3086 .PointertoVSSamplerState = samplers.offset);
3087 }
3088
3089 if (surfaces.alloc_size > 0) {
3090 anv_batch_emit(&cmd_buffer->batch,
3091 GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS,
3092 ._3DCommandSubOpcode = binding_table_opcodes[stage],
3093 .PointertoVSBindingTable = surfaces.offset);
3094 }
3095
3096 return VK_SUCCESS;
3097 }
3098
3099 static void
3100 flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
3101 {
3102 uint32_t s, dirty = cmd_buffer->descriptors_dirty &
3103 cmd_buffer->pipeline->active_stages;
3104
3105 VkResult result = VK_SUCCESS;
3106 for_each_bit(s, dirty) {
3107 result = flush_descriptor_set(cmd_buffer, s);
3108 if (result != VK_SUCCESS)
3109 break;
3110 }
3111
3112 if (result != VK_SUCCESS) {
3113 assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
3114
3115 result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
3116 assert(result == VK_SUCCESS);
3117
3118 /* Re-emit all active binding tables */
3119 for_each_bit(s, cmd_buffer->pipeline->active_stages) {
3120 result = flush_descriptor_set(cmd_buffer, s);
3121
3122 /* It had better succeed this time */
3123 assert(result == VK_SUCCESS);
3124 }
3125 }
3126
3127 cmd_buffer->descriptors_dirty &= ~cmd_buffer->pipeline->active_stages;
3128 }
3129
3130 static struct anv_state
3131 anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
3132 uint32_t *a, uint32_t dwords, uint32_t alignment)
3133 {
3134 struct anv_state state;
3135
3136 state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
3137 dwords * 4, alignment);
3138 memcpy(state.map, a, dwords * 4);
3139
3140 VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4));
3141
3142 return state;
3143 }
3144
3145 static struct anv_state
3146 anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
3147 uint32_t *a, uint32_t *b,
3148 uint32_t dwords, uint32_t alignment)
3149 {
3150 struct anv_state state;
3151 uint32_t *p;
3152
3153 state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
3154 dwords * 4, alignment);
3155 p = state.map;
3156 for (uint32_t i = 0; i < dwords; i++)
3157 p[i] = a[i] | b[i];
3158
3159 VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
3160
3161 return state;
3162 }
3163
3164 static VkResult
3165 flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
3166 {
3167 struct anv_device *device = cmd_buffer->device;
3168 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3169 struct anv_state surfaces = { 0, }, samplers = { 0, };
3170 VkResult result;
3171
3172 result = cmd_buffer_emit_samplers(cmd_buffer,
3173 VK_SHADER_STAGE_COMPUTE, &samplers);
3174 if (result != VK_SUCCESS)
3175 return result;
3176 result = cmd_buffer_emit_binding_table(cmd_buffer,
3177 VK_SHADER_STAGE_COMPUTE, &surfaces);
3178 if (result != VK_SUCCESS)
3179 return result;
3180
3181 struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = {
3182 .KernelStartPointer = pipeline->cs_simd,
3183 .KernelStartPointerHigh = 0,
3184 .BindingTablePointer = surfaces.offset,
3185 .BindingTableEntryCount = 0,
3186 .SamplerStatePointer = samplers.offset,
3187 .SamplerCount = 0,
3188 .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
3189 };
3190
3191 uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
3192 struct anv_state state =
3193 anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
3194
3195 GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
3196
3197 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
3198 .InterfaceDescriptorTotalLength = size,
3199 .InterfaceDescriptorDataStartAddress = state.offset);
3200
3201 return VK_SUCCESS;
3202 }
3203
3204 static void
3205 anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
3206 {
3207 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3208 VkResult result;
3209
3210 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
3211
3212 if (cmd_buffer->current_pipeline != GPGPU) {
3213 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
3214 .PipelineSelection = GPGPU);
3215 cmd_buffer->current_pipeline = GPGPU;
3216 }
3217
3218 if (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
3219 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
3220
3221 if ((cmd_buffer->descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
3222 (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) {
3223 result = flush_compute_descriptor_set(cmd_buffer);
3224 if (result != VK_SUCCESS) {
3225 result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
3226 assert(result == VK_SUCCESS);
3227 result = flush_compute_descriptor_set(cmd_buffer);
3228 assert(result == VK_SUCCESS);
3229 }
3230 cmd_buffer->descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE;
3231 }
3232
3233 cmd_buffer->compute_dirty = 0;
3234 }
3235
3236 static void
3237 anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
3238 {
3239 struct anv_pipeline *pipeline = cmd_buffer->pipeline;
3240 uint32_t *p;
3241
3242 uint32_t vb_emit = cmd_buffer->vb_dirty & pipeline->vb_used;
3243
3244 assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
3245
3246 if (cmd_buffer->current_pipeline != _3D) {
3247 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
3248 .PipelineSelection = _3D);
3249 cmd_buffer->current_pipeline = _3D;
3250 }
3251
3252 if (vb_emit) {
3253 const uint32_t num_buffers = __builtin_popcount(vb_emit);
3254 const uint32_t num_dwords = 1 + num_buffers * 4;
3255
3256 p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
3257 GEN8_3DSTATE_VERTEX_BUFFERS);
3258 uint32_t vb, i = 0;
3259 for_each_bit(vb, vb_emit) {
3260 struct anv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer;
3261 uint32_t offset = cmd_buffer->vertex_bindings[vb].offset;
3262
3263 struct GEN8_VERTEX_BUFFER_STATE state = {
3264 .VertexBufferIndex = vb,
3265 .MemoryObjectControlState = GEN8_MOCS,
3266 .AddressModifyEnable = true,
3267 .BufferPitch = pipeline->binding_stride[vb],
3268 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
3269 .BufferSize = buffer->size - offset
3270 };
3271
3272 GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state);
3273 i++;
3274 }
3275 }
3276
3277 if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) {
3278 /* If somebody compiled a pipeline after starting a command buffer the
3279 * scratch bo may have grown since we started this cmd buffer (and
3280 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
3281 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
3282 if (cmd_buffer->scratch_size < pipeline->total_scratch)
3283 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
3284
3285 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
3286 }
3287
3288 if (cmd_buffer->descriptors_dirty)
3289 flush_descriptor_sets(cmd_buffer);
3290
3291 if (cmd_buffer->dirty & ANV_CMD_BUFFER_VP_DIRTY) {
3292 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS,
3293 .ScissorRectPointer = cmd_buffer->vp_state->scissor.offset);
3294 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
3295 .CCViewportPointer = cmd_buffer->vp_state->cc_vp.offset);
3296 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
3297 .SFClipViewportPointer = cmd_buffer->vp_state->sf_clip_vp.offset);
3298 }
3299
3300 if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) {
3301 anv_batch_emit_merge(&cmd_buffer->batch,
3302 cmd_buffer->rs_state->state_sf, pipeline->state_sf);
3303 anv_batch_emit_merge(&cmd_buffer->batch,
3304 cmd_buffer->rs_state->state_raster, pipeline->state_raster);
3305 }
3306
3307 if (cmd_buffer->ds_state &&
3308 (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)))
3309 anv_batch_emit_merge(&cmd_buffer->batch,
3310 cmd_buffer->ds_state->state_wm_depth_stencil,
3311 pipeline->state_wm_depth_stencil);
3312
3313 if (cmd_buffer->dirty & (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) {
3314 struct anv_state state;
3315 if (cmd_buffer->ds_state == NULL)
3316 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
3317 cmd_buffer->cb_state->state_color_calc,
3318 GEN8_COLOR_CALC_STATE_length, 64);
3319 else if (cmd_buffer->cb_state == NULL)
3320 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
3321 cmd_buffer->ds_state->state_color_calc,
3322 GEN8_COLOR_CALC_STATE_length, 64);
3323 else
3324 state = anv_cmd_buffer_merge_dynamic(cmd_buffer,
3325 cmd_buffer->ds_state->state_color_calc,
3326 cmd_buffer->cb_state->state_color_calc,
3327 GEN8_COLOR_CALC_STATE_length, 64);
3328
3329 anv_batch_emit(&cmd_buffer->batch,
3330 GEN8_3DSTATE_CC_STATE_POINTERS,
3331 .ColorCalcStatePointer = state.offset,
3332 .ColorCalcStatePointerValid = true);
3333 }
3334
3335 if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) {
3336 anv_batch_emit_merge(&cmd_buffer->batch,
3337 cmd_buffer->state_vf, pipeline->state_vf);
3338 }
3339
3340 cmd_buffer->vb_dirty &= ~vb_emit;
3341 cmd_buffer->dirty = 0;
3342 }
3343
3344 void anv_CmdDraw(
3345 VkCmdBuffer cmdBuffer,
3346 uint32_t firstVertex,
3347 uint32_t vertexCount,
3348 uint32_t firstInstance,
3349 uint32_t instanceCount)
3350 {
3351 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3352
3353 anv_cmd_buffer_flush_state(cmd_buffer);
3354
3355 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3356 .VertexAccessType = SEQUENTIAL,
3357 .VertexCountPerInstance = vertexCount,
3358 .StartVertexLocation = firstVertex,
3359 .InstanceCount = instanceCount,
3360 .StartInstanceLocation = firstInstance,
3361 .BaseVertexLocation = 0);
3362 }
3363
3364 void anv_CmdDrawIndexed(
3365 VkCmdBuffer cmdBuffer,
3366 uint32_t firstIndex,
3367 uint32_t indexCount,
3368 int32_t vertexOffset,
3369 uint32_t firstInstance,
3370 uint32_t instanceCount)
3371 {
3372 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3373
3374 anv_cmd_buffer_flush_state(cmd_buffer);
3375
3376 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3377 .VertexAccessType = RANDOM,
3378 .VertexCountPerInstance = indexCount,
3379 .StartVertexLocation = firstIndex,
3380 .InstanceCount = instanceCount,
3381 .StartInstanceLocation = firstInstance,
3382 .BaseVertexLocation = vertexOffset);
3383 }
3384
3385 static void
3386 anv_batch_lrm(struct anv_batch *batch,
3387 uint32_t reg, struct anv_bo *bo, uint32_t offset)
3388 {
3389 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
3390 .RegisterAddress = reg,
3391 .MemoryAddress = { bo, offset });
3392 }
3393
3394 static void
3395 anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
3396 {
3397 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM,
3398 .RegisterOffset = reg,
3399 .DataDWord = imm);
3400 }
3401
3402 /* Auto-Draw / Indirect Registers */
3403 #define GEN7_3DPRIM_END_OFFSET 0x2420
3404 #define GEN7_3DPRIM_START_VERTEX 0x2430
3405 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
3406 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
3407 #define GEN7_3DPRIM_START_INSTANCE 0x243C
3408 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
3409
3410 void anv_CmdDrawIndirect(
3411 VkCmdBuffer cmdBuffer,
3412 VkBuffer _buffer,
3413 VkDeviceSize offset,
3414 uint32_t count,
3415 uint32_t stride)
3416 {
3417 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3418 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
3419 struct anv_bo *bo = buffer->bo;
3420 uint32_t bo_offset = buffer->offset + offset;
3421
3422 anv_cmd_buffer_flush_state(cmd_buffer);
3423
3424 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
3425 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
3426 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
3427 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
3428 anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
3429
3430 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3431 .IndirectParameterEnable = true,
3432 .VertexAccessType = SEQUENTIAL);
3433 }
3434
3435 void anv_CmdDrawIndexedIndirect(
3436 VkCmdBuffer cmdBuffer,
3437 VkBuffer _buffer,
3438 VkDeviceSize offset,
3439 uint32_t count,
3440 uint32_t stride)
3441 {
3442 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3443 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
3444 struct anv_bo *bo = buffer->bo;
3445 uint32_t bo_offset = buffer->offset + offset;
3446
3447 anv_cmd_buffer_flush_state(cmd_buffer);
3448
3449 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
3450 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
3451 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
3452 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
3453 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
3454
3455 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3456 .IndirectParameterEnable = true,
3457 .VertexAccessType = RANDOM);
3458 }
3459
3460 void anv_CmdDispatch(
3461 VkCmdBuffer cmdBuffer,
3462 uint32_t x,
3463 uint32_t y,
3464 uint32_t z)
3465 {
3466 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3467 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3468 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
3469
3470 anv_cmd_buffer_flush_compute_state(cmd_buffer);
3471
3472 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
3473 .SIMDSize = prog_data->simd_size / 16,
3474 .ThreadDepthCounterMaximum = 0,
3475 .ThreadHeightCounterMaximum = 0,
3476 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
3477 .ThreadGroupIDXDimension = x,
3478 .ThreadGroupIDYDimension = y,
3479 .ThreadGroupIDZDimension = z,
3480 .RightExecutionMask = pipeline->cs_right_mask,
3481 .BottomExecutionMask = 0xffffffff);
3482
3483 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
3484 }
3485
3486 #define GPGPU_DISPATCHDIMX 0x2500
3487 #define GPGPU_DISPATCHDIMY 0x2504
3488 #define GPGPU_DISPATCHDIMZ 0x2508
3489
3490 void anv_CmdDispatchIndirect(
3491 VkCmdBuffer cmdBuffer,
3492 VkBuffer _buffer,
3493 VkDeviceSize offset)
3494 {
3495 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3496 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
3497 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3498 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
3499 struct anv_bo *bo = buffer->bo;
3500 uint32_t bo_offset = buffer->offset + offset;
3501
3502 anv_cmd_buffer_flush_compute_state(cmd_buffer);
3503
3504 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
3505 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
3506 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
3507
3508 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
3509 .IndirectParameterEnable = true,
3510 .SIMDSize = prog_data->simd_size / 16,
3511 .ThreadDepthCounterMaximum = 0,
3512 .ThreadHeightCounterMaximum = 0,
3513 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
3514 .RightExecutionMask = pipeline->cs_right_mask,
3515 .BottomExecutionMask = 0xffffffff);
3516
3517 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
3518 }
3519
3520 void anv_CmdSetEvent(
3521 VkCmdBuffer cmdBuffer,
3522 VkEvent event,
3523 VkPipeEvent pipeEvent)
3524 {
3525 stub();
3526 }
3527
3528 void anv_CmdResetEvent(
3529 VkCmdBuffer cmdBuffer,
3530 VkEvent event,
3531 VkPipeEvent pipeEvent)
3532 {
3533 stub();
3534 }
3535
3536 void anv_CmdWaitEvents(
3537 VkCmdBuffer cmdBuffer,
3538 VkWaitEvent waitEvent,
3539 uint32_t eventCount,
3540 const VkEvent* pEvents,
3541 VkPipeEventFlags pipeEventMask,
3542 uint32_t memBarrierCount,
3543 const void* const* ppMemBarriers)
3544 {
3545 stub();
3546 }
3547
3548 void anv_CmdPipelineBarrier(
3549 VkCmdBuffer cmdBuffer,
3550 VkWaitEvent waitEvent,
3551 VkPipeEventFlags pipeEventMask,
3552 uint32_t memBarrierCount,
3553 const void* const* ppMemBarriers)
3554 {
3555 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3556 uint32_t b, *dw;
3557
3558 struct GEN8_PIPE_CONTROL cmd = {
3559 GEN8_PIPE_CONTROL_header,
3560 .PostSyncOperation = NoWrite,
3561 };
3562
3563 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
3564
3565 if (anv_clear_mask(&pipeEventMask, VK_PIPE_EVENT_TOP_OF_PIPE_BIT)) {
3566 /* This is just what PIPE_CONTROL does */
3567 }
3568
3569 if (anv_clear_mask(&pipeEventMask,
3570 VK_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE_BIT |
3571 VK_PIPE_EVENT_LOCAL_FRAGMENT_PROCESSING_COMPLETE_BIT |
3572 VK_PIPE_EVENT_FRAGMENT_PROCESSING_COMPLETE_BIT)) {
3573 cmd.StallAtPixelScoreboard = true;
3574 }
3575
3576
3577 if (anv_clear_mask(&pipeEventMask,
3578 VK_PIPE_EVENT_GRAPHICS_PIPELINE_COMPLETE_BIT |
3579 VK_PIPE_EVENT_COMPUTE_PIPELINE_COMPLETE_BIT |
3580 VK_PIPE_EVENT_TRANSFER_COMPLETE_BIT |
3581 VK_PIPE_EVENT_COMMANDS_COMPLETE_BIT)) {
3582 cmd.CommandStreamerStallEnable = true;
3583 }
3584
3585 if (anv_clear_mask(&pipeEventMask, VK_PIPE_EVENT_CPU_SIGNAL_BIT)) {
3586 anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT");
3587 }
3588
3589 /* We checked all known VkPipeEventFlags. */
3590 anv_assert(pipeEventMask == 0);
3591
3592 /* XXX: Right now, we're really dumb and just flush whatever categories
3593 * the app asks for. One of these days we may make this a bit better
3594 * but right now that's all the hardware allows for in most areas.
3595 */
3596 VkMemoryOutputFlags out_flags = 0;
3597 VkMemoryInputFlags in_flags = 0;
3598
3599 for (uint32_t i = 0; i < memBarrierCount; i++) {
3600 const struct anv_common *common = ppMemBarriers[i];
3601 switch (common->sType) {
3602 case VK_STRUCTURE_TYPE_MEMORY_BARRIER: {
3603 const VkMemoryBarrier *barrier = (VkMemoryBarrier *)common;
3604 out_flags |= barrier->outputMask;
3605 in_flags |= barrier->inputMask;
3606 break;
3607 }
3608 case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: {
3609 const VkBufferMemoryBarrier *barrier = (VkBufferMemoryBarrier *)common;
3610 out_flags |= barrier->outputMask;
3611 in_flags |= barrier->inputMask;
3612 break;
3613 }
3614 case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: {
3615 const VkImageMemoryBarrier *barrier = (VkImageMemoryBarrier *)common;
3616 out_flags |= barrier->outputMask;
3617 in_flags |= barrier->inputMask;
3618 break;
3619 }
3620 default:
3621 unreachable("Invalid memory barrier type");
3622 }
3623 }
3624
3625 for_each_bit(b, out_flags) {
3626 switch ((VkMemoryOutputFlags)(1 << b)) {
3627 case VK_MEMORY_OUTPUT_HOST_WRITE_BIT:
3628 break; /* FIXME: Little-core systems */
3629 case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT:
3630 cmd.DCFlushEnable = true;
3631 break;
3632 case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT:
3633 cmd.RenderTargetCacheFlushEnable = true;
3634 break;
3635 case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
3636 cmd.DepthCacheFlushEnable = true;
3637 break;
3638 case VK_MEMORY_OUTPUT_TRANSFER_BIT:
3639 cmd.RenderTargetCacheFlushEnable = true;
3640 cmd.DepthCacheFlushEnable = true;
3641 break;
3642 default:
3643 unreachable("Invalid memory output flag");
3644 }
3645 }
3646
3647 for_each_bit(b, out_flags) {
3648 switch ((VkMemoryInputFlags)(1 << b)) {
3649 case VK_MEMORY_INPUT_HOST_READ_BIT:
3650 break; /* FIXME: Little-core systems */
3651 case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT:
3652 case VK_MEMORY_INPUT_INDEX_FETCH_BIT:
3653 case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT:
3654 cmd.VFCacheInvalidationEnable = true;
3655 break;
3656 case VK_MEMORY_INPUT_UNIFORM_READ_BIT:
3657 cmd.ConstantCacheInvalidationEnable = true;
3658 /* fallthrough */
3659 case VK_MEMORY_INPUT_SHADER_READ_BIT:
3660 cmd.DCFlushEnable = true;
3661 cmd.TextureCacheInvalidationEnable = true;
3662 break;
3663 case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT:
3664 case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
3665 break; /* XXX: Hunh? */
3666 case VK_MEMORY_INPUT_TRANSFER_BIT:
3667 cmd.TextureCacheInvalidationEnable = true;
3668 break;
3669 }
3670 }
3671
3672 dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length);
3673 GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd);
3674 }
3675
3676 static void
3677 anv_framebuffer_destroy(struct anv_device *device,
3678 struct anv_object *object,
3679 VkObjectType obj_type)
3680 {
3681 struct anv_framebuffer *fb = (struct anv_framebuffer *)object;
3682
3683 assert(obj_type == VK_OBJECT_TYPE_FRAMEBUFFER);
3684
3685 anv_DestroyObject(anv_device_to_handle(device),
3686 VK_OBJECT_TYPE_DYNAMIC_VP_STATE,
3687 fb->vp_state);
3688
3689 anv_device_free(device, fb);
3690 }
3691
3692 VkResult anv_CreateFramebuffer(
3693 VkDevice _device,
3694 const VkFramebufferCreateInfo* pCreateInfo,
3695 VkFramebuffer* pFramebuffer)
3696 {
3697 ANV_FROM_HANDLE(anv_device, device, _device);
3698 struct anv_framebuffer *framebuffer;
3699
3700 static const struct anv_depth_stencil_view null_view =
3701 { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 };
3702
3703 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3704
3705 framebuffer = anv_device_alloc(device, sizeof(*framebuffer), 8,
3706 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
3707 if (framebuffer == NULL)
3708 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3709
3710 framebuffer->base.destructor = anv_framebuffer_destroy;
3711
3712 framebuffer->color_attachment_count = pCreateInfo->colorAttachmentCount;
3713 for (uint32_t i = 0; i < pCreateInfo->colorAttachmentCount; i++) {
3714 framebuffer->color_attachments[i] =
3715 (struct anv_surface_view *) pCreateInfo->pColorAttachments[i].view;
3716 }
3717
3718 if (pCreateInfo->pDepthStencilAttachment) {
3719 framebuffer->depth_stencil =
3720 anv_depth_stencil_view_from_handle(pCreateInfo->pDepthStencilAttachment->view);
3721 } else {
3722 framebuffer->depth_stencil = &null_view;
3723 }
3724
3725 framebuffer->sample_count = pCreateInfo->sampleCount;
3726 framebuffer->width = pCreateInfo->width;
3727 framebuffer->height = pCreateInfo->height;
3728 framebuffer->layers = pCreateInfo->layers;
3729
3730 anv_CreateDynamicViewportState(anv_device_to_handle(device),
3731 &(VkDynamicViewportStateCreateInfo) {
3732 .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO,
3733 .viewportAndScissorCount = 1,
3734 .pViewports = (VkViewport[]) {
3735 {
3736 .originX = 0,
3737 .originY = 0,
3738 .width = pCreateInfo->width,
3739 .height = pCreateInfo->height,
3740 .minDepth = 0,
3741 .maxDepth = 1
3742 },
3743 },
3744 .pScissors = (VkRect2D[]) {
3745 { { 0, 0 },
3746 { pCreateInfo->width, pCreateInfo->height } },
3747 }
3748 },
3749 &framebuffer->vp_state);
3750
3751 *pFramebuffer = anv_framebuffer_to_handle(framebuffer);
3752
3753 return VK_SUCCESS;
3754 }
3755
3756 VkResult anv_CreateRenderPass(
3757 VkDevice _device,
3758 const VkRenderPassCreateInfo* pCreateInfo,
3759 VkRenderPass* pRenderPass)
3760 {
3761 ANV_FROM_HANDLE(anv_device, device, _device);
3762 struct anv_render_pass *pass;
3763 size_t size;
3764
3765 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
3766
3767 size = sizeof(*pass) +
3768 pCreateInfo->layers * sizeof(struct anv_render_pass_layer);
3769 pass = anv_device_alloc(device, size, 8,
3770 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
3771 if (pass == NULL)
3772 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3773
3774 pass->render_area = pCreateInfo->renderArea;
3775
3776 pass->num_layers = pCreateInfo->layers;
3777
3778 pass->num_clear_layers = 0;
3779 for (uint32_t i = 0; i < pCreateInfo->layers; i++) {
3780 pass->layers[i].color_load_op = pCreateInfo->pColorLoadOps[i];
3781 pass->layers[i].clear_color = pCreateInfo->pColorLoadClearValues[i];
3782 if (pass->layers[i].color_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
3783 pass->num_clear_layers++;
3784 }
3785
3786 *pRenderPass = anv_render_pass_to_handle(pass);
3787
3788 return VK_SUCCESS;
3789 }
3790
3791 VkResult anv_DestroyRenderPass(
3792 VkDevice _device,
3793 VkRenderPass renderPass)
3794 {
3795 ANV_FROM_HANDLE(anv_device, device, _device);
3796
3797 anv_device_free(device, anv_render_pass_from_handle(renderPass));
3798
3799 return VK_SUCCESS;
3800 }
3801
3802 VkResult anv_GetRenderAreaGranularity(
3803 VkDevice device,
3804 VkRenderPass renderPass,
3805 VkExtent2D* pGranularity)
3806 {
3807 *pGranularity = (VkExtent2D) { 1, 1 };
3808
3809 return VK_SUCCESS;
3810 }
3811
3812 static void
3813 anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
3814 struct anv_render_pass *pass)
3815 {
3816 const struct anv_depth_stencil_view *view =
3817 cmd_buffer->framebuffer->depth_stencil;
3818
3819 /* FIXME: Implement the PMA stall W/A */
3820 /* FIXME: Width and Height are wrong */
3821
3822 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER,
3823 .SurfaceType = SURFTYPE_2D,
3824 .DepthWriteEnable = view->depth_stride > 0,
3825 .StencilWriteEnable = view->stencil_stride > 0,
3826 .HierarchicalDepthBufferEnable = false,
3827 .SurfaceFormat = view->depth_format,
3828 .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0,
3829 .SurfaceBaseAddress = { view->bo, view->depth_offset },
3830 .Height = pass->render_area.extent.height - 1,
3831 .Width = pass->render_area.extent.width - 1,
3832 .LOD = 0,
3833 .Depth = 1 - 1,
3834 .MinimumArrayElement = 0,
3835 .DepthBufferObjectControlState = GEN8_MOCS,
3836 .RenderTargetViewExtent = 1 - 1,
3837 .SurfaceQPitch = view->depth_qpitch >> 2);
3838
3839 /* Disable hierarchial depth buffers. */
3840 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER);
3841
3842 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER,
3843 .StencilBufferEnable = view->stencil_stride > 0,
3844 .StencilBufferObjectControlState = GEN8_MOCS,
3845 .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0,
3846 .SurfaceBaseAddress = { view->bo, view->stencil_offset },
3847 .SurfaceQPitch = view->stencil_qpitch >> 2);
3848
3849 /* Clear the clear params. */
3850 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS);
3851 }
3852
3853 void anv_CmdPushConstants(
3854 VkCmdBuffer cmdBuffer,
3855 VkPipelineLayout layout,
3856 VkShaderStageFlags stageFlags,
3857 uint32_t start,
3858 uint32_t length,
3859 const void* values)
3860 {
3861 stub();
3862 }
3863
3864 void anv_CmdBeginRenderPass(
3865 VkCmdBuffer cmdBuffer,
3866 const VkRenderPassBegin* pRenderPassBegin)
3867 {
3868 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3869 ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass);
3870 ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
3871
3872 assert(pRenderPassBegin->contents == VK_RENDER_PASS_CONTENTS_INLINE);
3873
3874 cmd_buffer->framebuffer = framebuffer;
3875
3876 cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
3877
3878 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE,
3879 .ClippedDrawingRectangleYMin = pass->render_area.offset.y,
3880 .ClippedDrawingRectangleXMin = pass->render_area.offset.x,
3881 .ClippedDrawingRectangleYMax =
3882 pass->render_area.offset.y + pass->render_area.extent.height - 1,
3883 .ClippedDrawingRectangleXMax =
3884 pass->render_area.offset.x + pass->render_area.extent.width - 1,
3885 .DrawingRectangleOriginY = 0,
3886 .DrawingRectangleOriginX = 0);
3887
3888 anv_cmd_buffer_emit_depth_stencil(cmd_buffer, pass);
3889
3890 anv_cmd_buffer_clear(cmd_buffer, pass);
3891 }
3892
3893 void anv_CmdEndRenderPass(
3894 VkCmdBuffer cmdBuffer)
3895 {
3896 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3897
3898 /* Emit a flushing pipe control at the end of a pass. This is kind of a
3899 * hack but it ensures that render targets always actually get written.
3900 * Eventually, we should do flushing based on image format transitions
3901 * or something of that nature.
3902 */
3903 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
3904 .PostSyncOperation = NoWrite,
3905 .RenderTargetCacheFlushEnable = true,
3906 .InstructionCacheInvalidateEnable = true,
3907 .DepthCacheFlushEnable = true,
3908 .VFCacheInvalidationEnable = true,
3909 .TextureCacheInvalidationEnable = true,
3910 .CommandStreamerStallEnable = true);
3911 }
3912
3913 void anv_CmdExecuteCommands(
3914 VkCmdBuffer cmdBuffer,
3915 uint32_t cmdBuffersCount,
3916 const VkCmdBuffer* pCmdBuffers)
3917 {
3918 stub();
3919 }
3920
3921 void vkCmdDbgMarkerBegin(
3922 VkCmdBuffer cmdBuffer,
3923 const char* pMarker)
3924 __attribute__ ((visibility ("default")));
3925
3926 void vkCmdDbgMarkerEnd(
3927 VkCmdBuffer cmdBuffer)
3928 __attribute__ ((visibility ("default")));
3929
3930 VkResult vkDbgSetObjectTag(
3931 VkDevice device,
3932 VkObject object,
3933 size_t tagSize,
3934 const void* pTag)
3935 __attribute__ ((visibility ("default")));
3936
3937
3938 void vkCmdDbgMarkerBegin(
3939 VkCmdBuffer cmdBuffer,
3940 const char* pMarker)
3941 {
3942 }
3943
3944 void vkCmdDbgMarkerEnd(
3945 VkCmdBuffer cmdBuffer)
3946 {
3947 }
3948
3949 VkResult vkDbgSetObjectTag(
3950 VkDevice device,
3951 VkObject object,
3952 size_t tagSize,
3953 const void* pTag)
3954 {
3955 return VK_SUCCESS;
3956 }