vk/formats.c: Use the casting functions
[mesa.git] / src / vulkan / device.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "private.h"
31 #include "mesa/main/git_sha1.h"
32
33 static int
34 anv_env_get_int(const char *name)
35 {
36 const char *val = getenv(name);
37
38 if (!val)
39 return 0;
40
41 return strtol(val, NULL, 0);
42 }
43
44 static VkResult
45 fill_physical_device(struct anv_physical_device *device,
46 struct anv_instance *instance,
47 const char *path)
48 {
49 int fd;
50
51 fd = open(path, O_RDWR | O_CLOEXEC);
52 if (fd < 0)
53 return vk_error(VK_ERROR_UNAVAILABLE);
54
55 device->instance = instance;
56 device->path = path;
57
58 device->chipset_id = anv_env_get_int("INTEL_DEVID_OVERRIDE");
59 device->no_hw = false;
60 if (device->chipset_id) {
61 /* INTEL_DEVID_OVERRIDE implies INTEL_NO_HW. */
62 device->no_hw = true;
63 } else {
64 device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
65 }
66 if (!device->chipset_id)
67 goto fail;
68
69 device->name = brw_get_device_name(device->chipset_id);
70 device->info = brw_get_device_info(device->chipset_id, -1);
71 if (!device->info)
72 goto fail;
73
74 if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT))
75 goto fail;
76
77 if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2))
78 goto fail;
79
80 if (!anv_gem_get_param(fd, I915_PARAM_HAS_LLC))
81 goto fail;
82
83 if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CONSTANTS))
84 goto fail;
85
86 close(fd);
87
88 return VK_SUCCESS;
89
90 fail:
91 close(fd);
92
93 return vk_error(VK_ERROR_UNAVAILABLE);
94 }
95
96 static void *default_alloc(
97 void* pUserData,
98 size_t size,
99 size_t alignment,
100 VkSystemAllocType allocType)
101 {
102 return malloc(size);
103 }
104
105 static void default_free(
106 void* pUserData,
107 void* pMem)
108 {
109 free(pMem);
110 }
111
112 static const VkAllocCallbacks default_alloc_callbacks = {
113 .pUserData = NULL,
114 .pfnAlloc = default_alloc,
115 .pfnFree = default_free
116 };
117
118 VkResult anv_CreateInstance(
119 const VkInstanceCreateInfo* pCreateInfo,
120 VkInstance* pInstance)
121 {
122 struct anv_instance *instance;
123 const VkAllocCallbacks *alloc_callbacks = &default_alloc_callbacks;
124 void *user_data = NULL;
125
126 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
127
128 if (pCreateInfo->pAllocCb) {
129 alloc_callbacks = pCreateInfo->pAllocCb;
130 user_data = pCreateInfo->pAllocCb->pUserData;
131 }
132 instance = alloc_callbacks->pfnAlloc(user_data, sizeof(*instance), 8,
133 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
134 if (!instance)
135 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
136
137 instance->pAllocUserData = alloc_callbacks->pUserData;
138 instance->pfnAlloc = alloc_callbacks->pfnAlloc;
139 instance->pfnFree = alloc_callbacks->pfnFree;
140 instance->apiVersion = pCreateInfo->pAppInfo->apiVersion;
141 instance->physicalDeviceCount = 0;
142
143 *pInstance = anv_instance_to_handle(instance);
144
145 return VK_SUCCESS;
146 }
147
148 VkResult anv_DestroyInstance(
149 VkInstance _instance)
150 {
151 ANV_FROM_HANDLE(anv_instance, instance, _instance);
152
153 instance->pfnFree(instance->pAllocUserData, instance);
154
155 return VK_SUCCESS;
156 }
157
158 VkResult anv_EnumeratePhysicalDevices(
159 VkInstance _instance,
160 uint32_t* pPhysicalDeviceCount,
161 VkPhysicalDevice* pPhysicalDevices)
162 {
163 ANV_FROM_HANDLE(anv_instance, instance, _instance);
164 VkResult result;
165
166 if (instance->physicalDeviceCount == 0) {
167 result = fill_physical_device(&instance->physicalDevice,
168 instance, "/dev/dri/renderD128");
169 if (result != VK_SUCCESS)
170 return result;
171
172 instance->physicalDeviceCount = 1;
173 }
174
175 /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL;
176 * otherwise it's an inout parameter.
177 *
178 * The Vulkan spec (git aaed022) says:
179 *
180 * pPhysicalDeviceCount is a pointer to an unsigned integer variable
181 * that is initialized with the number of devices the application is
182 * prepared to receive handles to. pname:pPhysicalDevices is pointer to
183 * an array of at least this many VkPhysicalDevice handles [...].
184 *
185 * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices
186 * overwrites the contents of the variable pointed to by
187 * pPhysicalDeviceCount with the number of physical devices in in the
188 * instance; otherwise, vkEnumeratePhysicalDevices overwrites
189 * pPhysicalDeviceCount with the number of physical handles written to
190 * pPhysicalDevices.
191 */
192 if (!pPhysicalDevices) {
193 *pPhysicalDeviceCount = instance->physicalDeviceCount;
194 } else if (*pPhysicalDeviceCount >= 1) {
195 pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice);
196 *pPhysicalDeviceCount = 1;
197 } else {
198 *pPhysicalDeviceCount = 0;
199 }
200
201 return VK_SUCCESS;
202 }
203
204 VkResult anv_GetPhysicalDeviceFeatures(
205 VkPhysicalDevice physicalDevice,
206 VkPhysicalDeviceFeatures* pFeatures)
207 {
208 anv_finishme("Get correct values for PhysicalDeviceFeatures");
209
210 *pFeatures = (VkPhysicalDeviceFeatures) {
211 .robustBufferAccess = false,
212 .fullDrawIndexUint32 = false,
213 .imageCubeArray = false,
214 .independentBlend = false,
215 .geometryShader = true,
216 .tessellationShader = false,
217 .sampleRateShading = false,
218 .dualSourceBlend = true,
219 .logicOp = true,
220 .instancedDrawIndirect = true,
221 .depthClip = false,
222 .depthBiasClamp = false,
223 .fillModeNonSolid = true,
224 .depthBounds = false,
225 .wideLines = true,
226 .largePoints = true,
227 .textureCompressionETC2 = true,
228 .textureCompressionASTC_LDR = true,
229 .textureCompressionBC = true,
230 .pipelineStatisticsQuery = true,
231 .vertexSideEffects = false,
232 .tessellationSideEffects = false,
233 .geometrySideEffects = false,
234 .fragmentSideEffects = false,
235 .shaderTessellationPointSize = false,
236 .shaderGeometryPointSize = true,
237 .shaderTextureGatherExtended = true,
238 .shaderStorageImageExtendedFormats = false,
239 .shaderStorageImageMultisample = false,
240 .shaderStorageBufferArrayConstantIndexing = false,
241 .shaderStorageImageArrayConstantIndexing = false,
242 .shaderUniformBufferArrayDynamicIndexing = true,
243 .shaderSampledImageArrayDynamicIndexing = false,
244 .shaderStorageBufferArrayDynamicIndexing = false,
245 .shaderStorageImageArrayDynamicIndexing = false,
246 .shaderClipDistance = false,
247 .shaderCullDistance = false,
248 .shaderFloat64 = false,
249 .shaderInt64 = false,
250 .shaderFloat16 = false,
251 .shaderInt16 = false,
252 };
253
254 return VK_SUCCESS;
255 }
256
257 VkResult anv_GetPhysicalDeviceLimits(
258 VkPhysicalDevice physicalDevice,
259 VkPhysicalDeviceLimits* pLimits)
260 {
261 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
262 const struct brw_device_info *devinfo = physical_device->info;
263
264 anv_finishme("Get correct values for PhysicalDeviceLimits");
265
266 *pLimits = (VkPhysicalDeviceLimits) {
267 .maxImageDimension1D = (1 << 14),
268 .maxImageDimension2D = (1 << 14),
269 .maxImageDimension3D = (1 << 10),
270 .maxImageDimensionCube = (1 << 14),
271 .maxImageArrayLayers = (1 << 10),
272 .maxTexelBufferSize = (1 << 14),
273 .maxUniformBufferSize = UINT32_MAX,
274 .maxStorageBufferSize = UINT32_MAX,
275 .maxPushConstantsSize = 128,
276 .maxMemoryAllocationCount = UINT32_MAX,
277 .maxBoundDescriptorSets = MAX_SETS,
278 .maxDescriptorSets = UINT32_MAX,
279 .maxPerStageDescriptorSamplers = 64,
280 .maxPerStageDescriptorUniformBuffers = 64,
281 .maxPerStageDescriptorStorageBuffers = 64,
282 .maxPerStageDescriptorSampledImages = 64,
283 .maxPerStageDescriptorStorageImages = 64,
284 .maxDescriptorSetSamplers = 256,
285 .maxDescriptorSetUniformBuffers = 256,
286 .maxDescriptorSetStorageBuffers = 256,
287 .maxDescriptorSetSampledImages = 256,
288 .maxDescriptorSetStorageImages = 256,
289 .maxVertexInputAttributes = 32,
290 .maxVertexInputAttributeOffset = 256,
291 .maxVertexInputBindingStride = 256,
292 .maxVertexOutputComponents = 32,
293 .maxTessGenLevel = 0,
294 .maxTessPatchSize = 0,
295 .maxTessControlPerVertexInputComponents = 0,
296 .maxTessControlPerVertexOutputComponents = 0,
297 .maxTessControlPerPatchOutputComponents = 0,
298 .maxTessControlTotalOutputComponents = 0,
299 .maxTessEvaluationInputComponents = 0,
300 .maxTessEvaluationOutputComponents = 0,
301 .maxGeometryShaderInvocations = 6,
302 .maxGeometryInputComponents = 16,
303 .maxGeometryOutputComponents = 16,
304 .maxGeometryOutputVertices = 16,
305 .maxGeometryTotalOutputComponents = 16,
306 .maxFragmentInputComponents = 16,
307 .maxFragmentOutputBuffers = 8,
308 .maxFragmentDualSourceBuffers = 2,
309 .maxFragmentCombinedOutputResources = 8,
310 .maxComputeSharedMemorySize = 1024,
311 .maxComputeWorkGroupCount = {
312 16 * devinfo->max_cs_threads,
313 16 * devinfo->max_cs_threads,
314 16 * devinfo->max_cs_threads,
315 },
316 .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads,
317 .maxComputeWorkGroupSize = {
318 16 * devinfo->max_cs_threads,
319 16 * devinfo->max_cs_threads,
320 16 * devinfo->max_cs_threads,
321 },
322 .subPixelPrecisionBits = 4 /* FIXME */,
323 .subTexelPrecisionBits = 4 /* FIXME */,
324 .mipmapPrecisionBits = 4 /* FIXME */,
325 .maxDrawIndexedIndexValue = UINT32_MAX,
326 .maxDrawIndirectInstanceCount = UINT32_MAX,
327 .primitiveRestartForPatches = UINT32_MAX,
328 .maxSamplerLodBias = 16,
329 .maxSamplerAnisotropy = 16,
330 .maxViewports = 16,
331 .maxDynamicViewportStates = UINT32_MAX,
332 .maxViewportDimensions = { (1 << 14), (1 << 14) },
333 .viewportBoundsRange = { -1.0, 1.0 }, /* FIXME */
334 .viewportSubPixelBits = 13, /* We take a float? */
335 .minMemoryMapAlignment = 64, /* A cache line */
336 .minTexelBufferOffsetAlignment = 1,
337 .minUniformBufferOffsetAlignment = 1,
338 .minStorageBufferOffsetAlignment = 1,
339 .minTexelOffset = 0, /* FIXME */
340 .maxTexelOffset = 0, /* FIXME */
341 .minTexelGatherOffset = 0, /* FIXME */
342 .maxTexelGatherOffset = 0, /* FIXME */
343 .minInterpolationOffset = 0, /* FIXME */
344 .maxInterpolationOffset = 0, /* FIXME */
345 .subPixelInterpolationOffsetBits = 0, /* FIXME */
346 .maxFramebufferWidth = (1 << 14),
347 .maxFramebufferHeight = (1 << 14),
348 .maxFramebufferLayers = (1 << 10),
349 .maxFramebufferColorSamples = 8,
350 .maxFramebufferDepthSamples = 8,
351 .maxFramebufferStencilSamples = 8,
352 .maxColorAttachments = MAX_RTS,
353 .maxSampledImageColorSamples = 8,
354 .maxSampledImageDepthSamples = 8,
355 .maxSampledImageIntegerSamples = 1,
356 .maxStorageImageSamples = 1,
357 .maxSampleMaskWords = 1,
358 .timestampFrequency = 1000 * 1000 * 1000 / 80,
359 .maxClipDistances = 0 /* FIXME */,
360 .maxCullDistances = 0 /* FIXME */,
361 .maxCombinedClipAndCullDistances = 0 /* FIXME */,
362 .pointSizeRange = { 0.125, 255.875 },
363 .lineWidthRange = { 0.0, 7.9921875 },
364 .pointSizeGranularity = (1.0 / 8.0),
365 .lineWidthGranularity = (1.0 / 128.0),
366 };
367
368 return VK_SUCCESS;
369 }
370
371 VkResult anv_GetPhysicalDeviceProperties(
372 VkPhysicalDevice physicalDevice,
373 VkPhysicalDeviceProperties* pProperties)
374 {
375 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
376
377 *pProperties = (VkPhysicalDeviceProperties) {
378 .apiVersion = 1,
379 .driverVersion = 1,
380 .vendorId = 0x8086,
381 .deviceId = pdevice->chipset_id,
382 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
383 };
384
385 strcpy(pProperties->deviceName, pdevice->name);
386 snprintf((char *)pProperties->pipelineCacheUUID, VK_UUID_LENGTH,
387 "anv-%s", MESA_GIT_SHA1 + 4);
388
389 return VK_SUCCESS;
390 }
391
392 VkResult anv_GetPhysicalDeviceQueueCount(
393 VkPhysicalDevice physicalDevice,
394 uint32_t* pCount)
395 {
396 *pCount = 1;
397
398 return VK_SUCCESS;
399 }
400
401 VkResult anv_GetPhysicalDeviceQueueProperties(
402 VkPhysicalDevice physicalDevice,
403 uint32_t count,
404 VkPhysicalDeviceQueueProperties* pQueueProperties)
405 {
406 assert(count == 1);
407
408 *pQueueProperties = (VkPhysicalDeviceQueueProperties) {
409 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
410 VK_QUEUE_COMPUTE_BIT |
411 VK_QUEUE_DMA_BIT,
412 .queueCount = 1,
413 .supportsTimestamps = true,
414 };
415
416 return VK_SUCCESS;
417 }
418
419 PFN_vkVoidFunction anv_GetInstanceProcAddr(
420 VkInstance instance,
421 const char* pName)
422 {
423 return anv_lookup_entrypoint(pName);
424 }
425
426 PFN_vkVoidFunction anv_GetDeviceProcAddr(
427 VkDevice device,
428 const char* pName)
429 {
430 return anv_lookup_entrypoint(pName);
431 }
432
433 static void
434 parse_debug_flags(struct anv_device *device)
435 {
436 const char *debug, *p, *end;
437
438 debug = getenv("INTEL_DEBUG");
439 device->dump_aub = false;
440 if (debug) {
441 for (p = debug; *p; p = end + 1) {
442 end = strchrnul(p, ',');
443 if (end - p == 3 && memcmp(p, "aub", 3) == 0)
444 device->dump_aub = true;
445 if (end - p == 5 && memcmp(p, "no_hw", 5) == 0)
446 device->no_hw = true;
447 if (*end == '\0')
448 break;
449 }
450 }
451 }
452
453 static VkResult
454 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
455 {
456 queue->device = device;
457 queue->pool = &device->surface_state_pool;
458
459 queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4);
460 if (queue->completed_serial.map == NULL)
461 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
462
463 *(uint32_t *)queue->completed_serial.map = 0;
464 queue->next_serial = 1;
465
466 return VK_SUCCESS;
467 }
468
469 static void
470 anv_queue_finish(struct anv_queue *queue)
471 {
472 #ifdef HAVE_VALGRIND
473 /* This gets torn down with the device so we only need to do this if
474 * valgrind is present.
475 */
476 anv_state_pool_free(queue->pool, queue->completed_serial);
477 #endif
478 }
479
480 static void
481 anv_device_init_border_colors(struct anv_device *device)
482 {
483 static const VkClearColorValue border_colors[] = {
484 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 0.0 } },
485 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 1.0 } },
486 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .f32 = { 1.0, 1.0, 1.0, 1.0 } },
487 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .u32 = { 0, 0, 0, 0 } },
488 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .u32 = { 0, 0, 0, 1 } },
489 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .u32 = { 1, 1, 1, 1 } },
490 };
491
492 device->border_colors =
493 anv_state_pool_alloc(&device->dynamic_state_pool,
494 sizeof(border_colors), 32);
495 memcpy(device->border_colors.map, border_colors, sizeof(border_colors));
496 }
497
498 static const uint32_t BATCH_SIZE = 8192;
499
500 VkResult anv_CreateDevice(
501 VkPhysicalDevice physicalDevice,
502 const VkDeviceCreateInfo* pCreateInfo,
503 VkDevice* pDevice)
504 {
505 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
506 struct anv_instance *instance = physical_device->instance;
507 struct anv_device *device;
508
509 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
510
511 device = instance->pfnAlloc(instance->pAllocUserData,
512 sizeof(*device), 8,
513 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
514 if (!device)
515 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
516
517 device->no_hw = physical_device->no_hw;
518 parse_debug_flags(device);
519
520 device->instance = physical_device->instance;
521 device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
522 if (device->fd == -1)
523 goto fail_device;
524
525 device->context_id = anv_gem_create_context(device);
526 if (device->context_id == -1)
527 goto fail_fd;
528
529 anv_bo_pool_init(&device->batch_bo_pool, device, BATCH_SIZE);
530
531 anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048);
532
533 anv_state_pool_init(&device->dynamic_state_pool,
534 &device->dynamic_state_block_pool);
535
536 anv_block_pool_init(&device->instruction_block_pool, device, 2048);
537 anv_block_pool_init(&device->surface_state_block_pool, device, 2048);
538
539 anv_state_pool_init(&device->surface_state_pool,
540 &device->surface_state_block_pool);
541
542 anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
543
544 device->info = *physical_device->info;
545
546 device->compiler = anv_compiler_create(device);
547 device->aub_writer = NULL;
548
549 pthread_mutex_init(&device->mutex, NULL);
550
551 anv_queue_init(device, &device->queue);
552
553 anv_device_init_meta(device);
554
555 anv_device_init_border_colors(device);
556
557 *pDevice = anv_device_to_handle(device);
558
559 return VK_SUCCESS;
560
561 fail_fd:
562 close(device->fd);
563 fail_device:
564 anv_device_free(device, device);
565
566 return vk_error(VK_ERROR_UNAVAILABLE);
567 }
568
569 VkResult anv_DestroyDevice(
570 VkDevice _device)
571 {
572 ANV_FROM_HANDLE(anv_device, device, _device);
573
574 anv_compiler_destroy(device->compiler);
575
576 anv_queue_finish(&device->queue);
577
578 anv_device_finish_meta(device);
579
580 #ifdef HAVE_VALGRIND
581 /* We only need to free these to prevent valgrind errors. The backing
582 * BO will go away in a couple of lines so we don't actually leak.
583 */
584 anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
585 #endif
586
587 anv_bo_pool_finish(&device->batch_bo_pool);
588 anv_block_pool_finish(&device->dynamic_state_block_pool);
589 anv_block_pool_finish(&device->instruction_block_pool);
590 anv_block_pool_finish(&device->surface_state_block_pool);
591
592 close(device->fd);
593
594 if (device->aub_writer)
595 anv_aub_writer_destroy(device->aub_writer);
596
597 anv_device_free(device, device);
598
599 return VK_SUCCESS;
600 }
601
602 static const VkExtensionProperties global_extensions[] = {
603 {
604 .extName = "VK_WSI_LunarG",
605 .version = 3
606 }
607 };
608
609 VkResult anv_GetGlobalExtensionCount(
610 uint32_t* pCount)
611 {
612 *pCount = ARRAY_SIZE(global_extensions);
613
614 return VK_SUCCESS;
615 }
616
617
618 VkResult anv_GetGlobalExtensionProperties(
619 uint32_t extensionIndex,
620 VkExtensionProperties* pProperties)
621 {
622 assert(extensionIndex < ARRAY_SIZE(global_extensions));
623
624 *pProperties = global_extensions[extensionIndex];
625
626 return VK_SUCCESS;
627 }
628
629 VkResult anv_GetPhysicalDeviceExtensionCount(
630 VkPhysicalDevice physicalDevice,
631 uint32_t* pCount)
632 {
633 /* None supported at this time */
634 *pCount = 0;
635
636 return VK_SUCCESS;
637 }
638
639 VkResult anv_GetPhysicalDeviceExtensionProperties(
640 VkPhysicalDevice physicalDevice,
641 uint32_t extensionIndex,
642 VkExtensionProperties* pProperties)
643 {
644 /* None supported at this time */
645 return vk_error(VK_ERROR_INVALID_EXTENSION);
646 }
647
648 VkResult anv_EnumerateLayers(
649 VkPhysicalDevice physicalDevice,
650 size_t maxStringSize,
651 size_t* pLayerCount,
652 char* const* pOutLayers,
653 void* pReserved)
654 {
655 *pLayerCount = 0;
656
657 return VK_SUCCESS;
658 }
659
660 VkResult anv_GetDeviceQueue(
661 VkDevice _device,
662 uint32_t queueNodeIndex,
663 uint32_t queueIndex,
664 VkQueue* pQueue)
665 {
666 ANV_FROM_HANDLE(anv_device, device, _device);
667
668 assert(queueIndex == 0);
669
670 *pQueue = anv_queue_to_handle(&device->queue);
671
672 return VK_SUCCESS;
673 }
674
675 VkResult
676 anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device)
677 {
678 list->num_relocs = 0;
679 list->array_length = 256;
680 list->relocs =
681 anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8,
682 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
683
684 if (list->relocs == NULL)
685 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
686
687 list->reloc_bos =
688 anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8,
689 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
690
691 if (list->relocs == NULL) {
692 anv_device_free(device, list->relocs);
693 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
694 }
695
696 return VK_SUCCESS;
697 }
698
699 void
700 anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device)
701 {
702 anv_device_free(device, list->relocs);
703 anv_device_free(device, list->reloc_bos);
704 }
705
706 static VkResult
707 anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device,
708 size_t num_additional_relocs)
709 {
710 if (list->num_relocs + num_additional_relocs <= list->array_length)
711 return VK_SUCCESS;
712
713 size_t new_length = list->array_length * 2;
714 while (new_length < list->num_relocs + num_additional_relocs)
715 new_length *= 2;
716
717 struct drm_i915_gem_relocation_entry *new_relocs =
718 anv_device_alloc(device, new_length * sizeof(*list->relocs), 8,
719 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
720 if (new_relocs == NULL)
721 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
722
723 struct anv_bo **new_reloc_bos =
724 anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8,
725 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
726 if (new_relocs == NULL) {
727 anv_device_free(device, new_relocs);
728 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
729 }
730
731 memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs));
732 memcpy(new_reloc_bos, list->reloc_bos,
733 list->num_relocs * sizeof(*list->reloc_bos));
734
735 anv_device_free(device, list->relocs);
736 anv_device_free(device, list->reloc_bos);
737
738 list->relocs = new_relocs;
739 list->reloc_bos = new_reloc_bos;
740
741 return VK_SUCCESS;
742 }
743
744 static VkResult
745 anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out)
746 {
747 VkResult result;
748
749 struct anv_batch_bo *bbo =
750 anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
751 if (bbo == NULL)
752 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
753
754 bbo->num_relocs = 0;
755 bbo->prev_batch_bo = NULL;
756
757 result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo);
758 if (result != VK_SUCCESS) {
759 anv_device_free(device, bbo);
760 return result;
761 }
762
763 *bbo_out = bbo;
764
765 return VK_SUCCESS;
766 }
767
768 static void
769 anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
770 size_t batch_padding)
771 {
772 batch->next = batch->start = bbo->bo.map;
773 batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
774 bbo->first_reloc = batch->relocs.num_relocs;
775 }
776
777 static void
778 anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
779 {
780 assert(batch->start == bbo->bo.map);
781 bbo->length = batch->next - batch->start;
782 VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length));
783 bbo->num_relocs = batch->relocs.num_relocs - bbo->first_reloc;
784 }
785
786 static void
787 anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device)
788 {
789 anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo);
790 anv_device_free(device, bbo);
791 }
792
793 void *
794 anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords)
795 {
796 if (batch->next + num_dwords * 4 > batch->end)
797 batch->extend_cb(batch, batch->user_data);
798
799 void *p = batch->next;
800
801 batch->next += num_dwords * 4;
802 assert(batch->next <= batch->end);
803
804 return p;
805 }
806
807 static void
808 anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device,
809 struct anv_reloc_list *other, uint32_t offset)
810 {
811 anv_reloc_list_grow(list, device, other->num_relocs);
812 /* TODO: Handle failure */
813
814 memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
815 other->num_relocs * sizeof(other->relocs[0]));
816 memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
817 other->num_relocs * sizeof(other->reloc_bos[0]));
818
819 for (uint32_t i = 0; i < other->num_relocs; i++)
820 list->relocs[i + list->num_relocs].offset += offset;
821
822 list->num_relocs += other->num_relocs;
823 }
824
825 static uint64_t
826 anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device,
827 uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
828 {
829 struct drm_i915_gem_relocation_entry *entry;
830 int index;
831
832 anv_reloc_list_grow(list, device, 1);
833 /* TODO: Handle failure */
834
835 /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
836 index = list->num_relocs++;
837 list->reloc_bos[index] = target_bo;
838 entry = &list->relocs[index];
839 entry->target_handle = target_bo->gem_handle;
840 entry->delta = delta;
841 entry->offset = offset;
842 entry->presumed_offset = target_bo->offset;
843 entry->read_domains = 0;
844 entry->write_domain = 0;
845
846 return target_bo->offset + delta;
847 }
848
849 void
850 anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
851 {
852 uint32_t size, offset;
853
854 size = other->next - other->start;
855 assert(size % 4 == 0);
856
857 if (batch->next + size > batch->end)
858 batch->extend_cb(batch, batch->user_data);
859
860 assert(batch->next + size <= batch->end);
861
862 memcpy(batch->next, other->start, size);
863
864 offset = batch->next - batch->start;
865 anv_reloc_list_append(&batch->relocs, batch->device,
866 &other->relocs, offset);
867
868 batch->next += size;
869 }
870
871 uint64_t
872 anv_batch_emit_reloc(struct anv_batch *batch,
873 void *location, struct anv_bo *bo, uint32_t delta)
874 {
875 return anv_reloc_list_add(&batch->relocs, batch->device,
876 location - batch->start, bo, delta);
877 }
878
879 VkResult anv_QueueSubmit(
880 VkQueue _queue,
881 uint32_t cmdBufferCount,
882 const VkCmdBuffer* pCmdBuffers,
883 VkFence _fence)
884 {
885 ANV_FROM_HANDLE(anv_queue, queue, _queue);
886 ANV_FROM_HANDLE(anv_fence, fence, _fence);
887 struct anv_device *device = queue->device;
888 int ret;
889
890 for (uint32_t i = 0; i < cmdBufferCount; i++) {
891 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCmdBuffers[i]);
892
893 if (device->dump_aub)
894 anv_cmd_buffer_dump(cmd_buffer);
895
896 if (!device->no_hw) {
897 ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf);
898 if (ret != 0)
899 return vk_error(VK_ERROR_UNKNOWN);
900
901 if (fence) {
902 ret = anv_gem_execbuffer(device, &fence->execbuf);
903 if (ret != 0)
904 return vk_error(VK_ERROR_UNKNOWN);
905 }
906
907 for (uint32_t i = 0; i < cmd_buffer->bo_count; i++)
908 cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset;
909 } else {
910 *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial;
911 }
912 }
913
914 return VK_SUCCESS;
915 }
916
917 VkResult anv_QueueWaitIdle(
918 VkQueue _queue)
919 {
920 ANV_FROM_HANDLE(anv_queue, queue, _queue);
921
922 return vkDeviceWaitIdle(anv_device_to_handle(queue->device));
923 }
924
925 VkResult anv_DeviceWaitIdle(
926 VkDevice _device)
927 {
928 ANV_FROM_HANDLE(anv_device, device, _device);
929 struct anv_state state;
930 struct anv_batch batch;
931 struct drm_i915_gem_execbuffer2 execbuf;
932 struct drm_i915_gem_exec_object2 exec2_objects[1];
933 struct anv_bo *bo = NULL;
934 VkResult result;
935 int64_t timeout;
936 int ret;
937
938 state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32);
939 bo = &device->dynamic_state_pool.block_pool->bo;
940 batch.start = batch.next = state.map;
941 batch.end = state.map + 32;
942 anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
943 anv_batch_emit(&batch, GEN8_MI_NOOP);
944
945 exec2_objects[0].handle = bo->gem_handle;
946 exec2_objects[0].relocation_count = 0;
947 exec2_objects[0].relocs_ptr = 0;
948 exec2_objects[0].alignment = 0;
949 exec2_objects[0].offset = bo->offset;
950 exec2_objects[0].flags = 0;
951 exec2_objects[0].rsvd1 = 0;
952 exec2_objects[0].rsvd2 = 0;
953
954 execbuf.buffers_ptr = (uintptr_t) exec2_objects;
955 execbuf.buffer_count = 1;
956 execbuf.batch_start_offset = state.offset;
957 execbuf.batch_len = batch.next - state.map;
958 execbuf.cliprects_ptr = 0;
959 execbuf.num_cliprects = 0;
960 execbuf.DR1 = 0;
961 execbuf.DR4 = 0;
962
963 execbuf.flags =
964 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
965 execbuf.rsvd1 = device->context_id;
966 execbuf.rsvd2 = 0;
967
968 if (!device->no_hw) {
969 ret = anv_gem_execbuffer(device, &execbuf);
970 if (ret != 0) {
971 result = vk_error(VK_ERROR_UNKNOWN);
972 goto fail;
973 }
974
975 timeout = INT64_MAX;
976 ret = anv_gem_wait(device, bo->gem_handle, &timeout);
977 if (ret != 0) {
978 result = vk_error(VK_ERROR_UNKNOWN);
979 goto fail;
980 }
981 }
982
983 anv_state_pool_free(&device->dynamic_state_pool, state);
984
985 return VK_SUCCESS;
986
987 fail:
988 anv_state_pool_free(&device->dynamic_state_pool, state);
989
990 return result;
991 }
992
993 void *
994 anv_device_alloc(struct anv_device * device,
995 size_t size,
996 size_t alignment,
997 VkSystemAllocType allocType)
998 {
999 return device->instance->pfnAlloc(device->instance->pAllocUserData,
1000 size,
1001 alignment,
1002 allocType);
1003 }
1004
1005 void
1006 anv_device_free(struct anv_device * device,
1007 void * mem)
1008 {
1009 return device->instance->pfnFree(device->instance->pAllocUserData,
1010 mem);
1011 }
1012
1013 VkResult
1014 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
1015 {
1016 bo->gem_handle = anv_gem_create(device, size);
1017 if (!bo->gem_handle)
1018 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
1019
1020 bo->map = NULL;
1021 bo->index = 0;
1022 bo->offset = 0;
1023 bo->size = size;
1024
1025 return VK_SUCCESS;
1026 }
1027
1028 VkResult anv_AllocMemory(
1029 VkDevice _device,
1030 const VkMemoryAllocInfo* pAllocInfo,
1031 VkDeviceMemory* pMem)
1032 {
1033 ANV_FROM_HANDLE(anv_device, device, _device);
1034 struct anv_device_memory *mem;
1035 VkResult result;
1036
1037 assert(pAllocInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO);
1038
1039 mem = anv_device_alloc(device, sizeof(*mem), 8,
1040 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1041 if (mem == NULL)
1042 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1043
1044 result = anv_bo_init_new(&mem->bo, device, pAllocInfo->allocationSize);
1045 if (result != VK_SUCCESS)
1046 goto fail;
1047
1048 *pMem = anv_device_memory_to_handle(mem);
1049
1050 return VK_SUCCESS;
1051
1052 fail:
1053 anv_device_free(device, mem);
1054
1055 return result;
1056 }
1057
1058 VkResult anv_FreeMemory(
1059 VkDevice _device,
1060 VkDeviceMemory _mem)
1061 {
1062 ANV_FROM_HANDLE(anv_device, device, _device);
1063 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1064
1065 if (mem->bo.map)
1066 anv_gem_munmap(mem->bo.map, mem->bo.size);
1067
1068 if (mem->bo.gem_handle != 0)
1069 anv_gem_close(device, mem->bo.gem_handle);
1070
1071 anv_device_free(device, mem);
1072
1073 return VK_SUCCESS;
1074 }
1075
1076 VkResult anv_MapMemory(
1077 VkDevice _device,
1078 VkDeviceMemory _mem,
1079 VkDeviceSize offset,
1080 VkDeviceSize size,
1081 VkMemoryMapFlags flags,
1082 void** ppData)
1083 {
1084 ANV_FROM_HANDLE(anv_device, device, _device);
1085 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1086
1087 /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
1088 * takes a VkDeviceMemory pointer, it seems like only one map of the memory
1089 * at a time is valid. We could just mmap up front and return an offset
1090 * pointer here, but that may exhaust virtual memory on 32 bit
1091 * userspace. */
1092
1093 mem->map = anv_gem_mmap(device, mem->bo.gem_handle, offset, size);
1094 mem->map_size = size;
1095
1096 *ppData = mem->map;
1097
1098 return VK_SUCCESS;
1099 }
1100
1101 VkResult anv_UnmapMemory(
1102 VkDevice _device,
1103 VkDeviceMemory _mem)
1104 {
1105 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1106
1107 anv_gem_munmap(mem->map, mem->map_size);
1108
1109 return VK_SUCCESS;
1110 }
1111
1112 VkResult anv_FlushMappedMemoryRanges(
1113 VkDevice device,
1114 uint32_t memRangeCount,
1115 const VkMappedMemoryRange* pMemRanges)
1116 {
1117 /* clflush here for !llc platforms */
1118
1119 return VK_SUCCESS;
1120 }
1121
1122 VkResult anv_InvalidateMappedMemoryRanges(
1123 VkDevice device,
1124 uint32_t memRangeCount,
1125 const VkMappedMemoryRange* pMemRanges)
1126 {
1127 return anv_FlushMappedMemoryRanges(device, memRangeCount, pMemRanges);
1128 }
1129
1130 VkResult anv_DestroyObject(
1131 VkDevice _device,
1132 VkObjectType objType,
1133 VkObject _object)
1134 {
1135 ANV_FROM_HANDLE(anv_device, device, _device);
1136 struct anv_object *object = (struct anv_object *) _object;
1137
1138 switch (objType) {
1139 case VK_OBJECT_TYPE_INSTANCE:
1140 return anv_DestroyInstance((VkInstance) _object);
1141
1142 case VK_OBJECT_TYPE_PHYSICAL_DEVICE:
1143 /* We don't want to actually destroy physical devices */
1144 return VK_SUCCESS;
1145
1146 case VK_OBJECT_TYPE_DEVICE:
1147 assert(_device == (VkDevice) _object);
1148 return anv_DestroyDevice((VkDevice) _object);
1149
1150 case VK_OBJECT_TYPE_QUEUE:
1151 /* TODO */
1152 return VK_SUCCESS;
1153
1154 case VK_OBJECT_TYPE_DEVICE_MEMORY:
1155 return anv_FreeMemory(_device, (VkDeviceMemory) _object);
1156
1157 case VK_OBJECT_TYPE_DESCRIPTOR_POOL:
1158 case VK_OBJECT_TYPE_PIPELINE_CACHE:
1159 /* These are just dummys anyway, so we don't need to destroy them */
1160 return VK_SUCCESS;
1161
1162 case VK_OBJECT_TYPE_BUFFER:
1163 case VK_OBJECT_TYPE_IMAGE:
1164 case VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW:
1165 case VK_OBJECT_TYPE_SHADER:
1166 case VK_OBJECT_TYPE_SHADER_MODULE:
1167 case VK_OBJECT_TYPE_PIPELINE_LAYOUT:
1168 case VK_OBJECT_TYPE_SAMPLER:
1169 case VK_OBJECT_TYPE_DESCRIPTOR_SET:
1170 case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT:
1171 case VK_OBJECT_TYPE_DYNAMIC_RS_STATE:
1172 case VK_OBJECT_TYPE_DYNAMIC_CB_STATE:
1173 case VK_OBJECT_TYPE_DYNAMIC_DS_STATE:
1174 case VK_OBJECT_TYPE_RENDER_PASS:
1175 /* These are trivially destroyable */
1176 anv_device_free(device, (void *) _object);
1177 return VK_SUCCESS;
1178
1179 case VK_OBJECT_TYPE_COMMAND_BUFFER:
1180 case VK_OBJECT_TYPE_PIPELINE:
1181 case VK_OBJECT_TYPE_DYNAMIC_VP_STATE:
1182 case VK_OBJECT_TYPE_FENCE:
1183 case VK_OBJECT_TYPE_QUERY_POOL:
1184 case VK_OBJECT_TYPE_FRAMEBUFFER:
1185 case VK_OBJECT_TYPE_BUFFER_VIEW:
1186 case VK_OBJECT_TYPE_IMAGE_VIEW:
1187 case VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW:
1188 (object->destructor)(device, object, objType);
1189 return VK_SUCCESS;
1190
1191 case VK_OBJECT_TYPE_SEMAPHORE:
1192 case VK_OBJECT_TYPE_EVENT:
1193 stub_return(VK_UNSUPPORTED);
1194
1195 default:
1196 unreachable("Invalid object type");
1197 }
1198 }
1199
1200 VkResult anv_GetObjectMemoryRequirements(
1201 VkDevice device,
1202 VkObjectType objType,
1203 VkObject object,
1204 VkMemoryRequirements* pMemoryRequirements)
1205 {
1206 pMemoryRequirements->memPropsAllowed =
1207 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1208 /* VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT | */
1209 /* VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT | */
1210 VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT;
1211
1212 pMemoryRequirements->memPropsRequired = 0;
1213
1214 switch (objType) {
1215 case VK_OBJECT_TYPE_BUFFER: {
1216 struct anv_buffer *buffer = anv_buffer_from_handle(object);
1217 pMemoryRequirements->size = buffer->size;
1218 pMemoryRequirements->alignment = 16;
1219 break;
1220 }
1221 case VK_OBJECT_TYPE_IMAGE: {
1222 struct anv_image *image = anv_image_from_handle(object);
1223 pMemoryRequirements->size = image->size;
1224 pMemoryRequirements->alignment = image->alignment;
1225 break;
1226 }
1227 default:
1228 pMemoryRequirements->size = 0;
1229 break;
1230 }
1231
1232 return VK_SUCCESS;
1233 }
1234
1235 VkResult anv_BindObjectMemory(
1236 VkDevice device,
1237 VkObjectType objType,
1238 VkObject object,
1239 VkDeviceMemory _mem,
1240 VkDeviceSize memOffset)
1241 {
1242 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1243 struct anv_buffer *buffer;
1244 struct anv_image *image;
1245
1246 switch (objType) {
1247 case VK_OBJECT_TYPE_BUFFER:
1248 buffer = anv_buffer_from_handle(object);
1249 buffer->bo = &mem->bo;
1250 buffer->offset = memOffset;
1251 break;
1252 case VK_OBJECT_TYPE_IMAGE:
1253 image = anv_image_from_handle(object);
1254 image->bo = &mem->bo;
1255 image->offset = memOffset;
1256 break;
1257 default:
1258 break;
1259 }
1260
1261 return VK_SUCCESS;
1262 }
1263
1264 VkResult anv_QueueBindSparseBufferMemory(
1265 VkQueue queue,
1266 VkBuffer buffer,
1267 VkDeviceSize rangeOffset,
1268 VkDeviceSize rangeSize,
1269 VkDeviceMemory mem,
1270 VkDeviceSize memOffset)
1271 {
1272 stub_return(VK_UNSUPPORTED);
1273 }
1274
1275 VkResult anv_QueueBindSparseImageMemory(
1276 VkQueue queue,
1277 VkImage image,
1278 const VkImageMemoryBindInfo* pBindInfo,
1279 VkDeviceMemory mem,
1280 VkDeviceSize memOffset)
1281 {
1282 stub_return(VK_UNSUPPORTED);
1283 }
1284
1285 static void
1286 anv_fence_destroy(struct anv_device *device,
1287 struct anv_object *object,
1288 VkObjectType obj_type)
1289 {
1290 struct anv_fence *fence = (struct anv_fence *) object;
1291
1292 assert(obj_type == VK_OBJECT_TYPE_FENCE);
1293
1294 anv_gem_munmap(fence->bo.map, fence->bo.size);
1295 anv_gem_close(device, fence->bo.gem_handle);
1296 anv_device_free(device, fence);
1297 }
1298
1299 VkResult anv_CreateFence(
1300 VkDevice _device,
1301 const VkFenceCreateInfo* pCreateInfo,
1302 VkFence* pFence)
1303 {
1304 ANV_FROM_HANDLE(anv_device, device, _device);
1305 struct anv_fence *fence;
1306 struct anv_batch batch;
1307 VkResult result;
1308
1309 const uint32_t fence_size = 128;
1310
1311 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1312
1313 fence = anv_device_alloc(device, sizeof(*fence), 8,
1314 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1315 if (fence == NULL)
1316 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1317
1318 result = anv_bo_init_new(&fence->bo, device, fence_size);
1319 if (result != VK_SUCCESS)
1320 goto fail;
1321
1322 fence->base.destructor = anv_fence_destroy;
1323
1324 fence->bo.map =
1325 anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size);
1326 batch.next = batch.start = fence->bo.map;
1327 batch.end = fence->bo.map + fence->bo.size;
1328 anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
1329 anv_batch_emit(&batch, GEN8_MI_NOOP);
1330
1331 fence->exec2_objects[0].handle = fence->bo.gem_handle;
1332 fence->exec2_objects[0].relocation_count = 0;
1333 fence->exec2_objects[0].relocs_ptr = 0;
1334 fence->exec2_objects[0].alignment = 0;
1335 fence->exec2_objects[0].offset = fence->bo.offset;
1336 fence->exec2_objects[0].flags = 0;
1337 fence->exec2_objects[0].rsvd1 = 0;
1338 fence->exec2_objects[0].rsvd2 = 0;
1339
1340 fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects;
1341 fence->execbuf.buffer_count = 1;
1342 fence->execbuf.batch_start_offset = 0;
1343 fence->execbuf.batch_len = batch.next - fence->bo.map;
1344 fence->execbuf.cliprects_ptr = 0;
1345 fence->execbuf.num_cliprects = 0;
1346 fence->execbuf.DR1 = 0;
1347 fence->execbuf.DR4 = 0;
1348
1349 fence->execbuf.flags =
1350 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
1351 fence->execbuf.rsvd1 = device->context_id;
1352 fence->execbuf.rsvd2 = 0;
1353
1354 *pFence = anv_fence_to_handle(fence);
1355
1356 return VK_SUCCESS;
1357
1358 fail:
1359 anv_device_free(device, fence);
1360
1361 return result;
1362 }
1363
1364 VkResult anv_ResetFences(
1365 VkDevice _device,
1366 uint32_t fenceCount,
1367 const VkFence* pFences)
1368 {
1369 struct anv_fence **fences = (struct anv_fence **) pFences;
1370
1371 for (uint32_t i = 0; i < fenceCount; i++)
1372 fences[i]->ready = false;
1373
1374 return VK_SUCCESS;
1375 }
1376
1377 VkResult anv_GetFenceStatus(
1378 VkDevice _device,
1379 VkFence _fence)
1380 {
1381 ANV_FROM_HANDLE(anv_device, device, _device);
1382 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1383 int64_t t = 0;
1384 int ret;
1385
1386 if (fence->ready)
1387 return VK_SUCCESS;
1388
1389 ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
1390 if (ret == 0) {
1391 fence->ready = true;
1392 return VK_SUCCESS;
1393 }
1394
1395 return VK_NOT_READY;
1396 }
1397
1398 VkResult anv_WaitForFences(
1399 VkDevice _device,
1400 uint32_t fenceCount,
1401 const VkFence* pFences,
1402 bool32_t waitAll,
1403 uint64_t timeout)
1404 {
1405 ANV_FROM_HANDLE(anv_device, device, _device);
1406 int64_t t = timeout;
1407 int ret;
1408
1409 /* FIXME: handle !waitAll */
1410
1411 for (uint32_t i = 0; i < fenceCount; i++) {
1412 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1413 ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
1414 if (ret == -1 && errno == ETIME)
1415 return VK_TIMEOUT;
1416 else if (ret == -1)
1417 return vk_error(VK_ERROR_UNKNOWN);
1418 }
1419
1420 return VK_SUCCESS;
1421 }
1422
1423 // Queue semaphore functions
1424
1425 VkResult anv_CreateSemaphore(
1426 VkDevice device,
1427 const VkSemaphoreCreateInfo* pCreateInfo,
1428 VkSemaphore* pSemaphore)
1429 {
1430 stub_return(VK_UNSUPPORTED);
1431 }
1432
1433 VkResult anv_QueueSignalSemaphore(
1434 VkQueue queue,
1435 VkSemaphore semaphore)
1436 {
1437 stub_return(VK_UNSUPPORTED);
1438 }
1439
1440 VkResult anv_QueueWaitSemaphore(
1441 VkQueue queue,
1442 VkSemaphore semaphore)
1443 {
1444 stub_return(VK_UNSUPPORTED);
1445 }
1446
1447 // Event functions
1448
1449 VkResult anv_CreateEvent(
1450 VkDevice device,
1451 const VkEventCreateInfo* pCreateInfo,
1452 VkEvent* pEvent)
1453 {
1454 stub_return(VK_UNSUPPORTED);
1455 }
1456
1457 VkResult anv_GetEventStatus(
1458 VkDevice device,
1459 VkEvent event)
1460 {
1461 stub_return(VK_UNSUPPORTED);
1462 }
1463
1464 VkResult anv_SetEvent(
1465 VkDevice device,
1466 VkEvent event)
1467 {
1468 stub_return(VK_UNSUPPORTED);
1469 }
1470
1471 VkResult anv_ResetEvent(
1472 VkDevice device,
1473 VkEvent event)
1474 {
1475 stub_return(VK_UNSUPPORTED);
1476 }
1477
1478 // Buffer functions
1479
1480 VkResult anv_CreateBuffer(
1481 VkDevice _device,
1482 const VkBufferCreateInfo* pCreateInfo,
1483 VkBuffer* pBuffer)
1484 {
1485 ANV_FROM_HANDLE(anv_device, device, _device);
1486 struct anv_buffer *buffer;
1487
1488 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
1489
1490 buffer = anv_device_alloc(device, sizeof(*buffer), 8,
1491 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1492 if (buffer == NULL)
1493 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1494
1495 buffer->size = pCreateInfo->size;
1496 buffer->bo = NULL;
1497 buffer->offset = 0;
1498
1499 *pBuffer = anv_buffer_to_handle(buffer);
1500
1501 return VK_SUCCESS;
1502 }
1503
1504 // Buffer view functions
1505
1506 static void
1507 fill_buffer_surface_state(void *state, VkFormat format,
1508 uint32_t offset, uint32_t range)
1509 {
1510 const struct anv_format *info;
1511
1512 info = anv_format_for_vk_format(format);
1513 /* This assumes RGBA float format. */
1514 uint32_t stride = 4;
1515 uint32_t num_elements = range / stride;
1516
1517 struct GEN8_RENDER_SURFACE_STATE surface_state = {
1518 .SurfaceType = SURFTYPE_BUFFER,
1519 .SurfaceArray = false,
1520 .SurfaceFormat = info->surface_format,
1521 .SurfaceVerticalAlignment = VALIGN4,
1522 .SurfaceHorizontalAlignment = HALIGN4,
1523 .TileMode = LINEAR,
1524 .VerticalLineStride = 0,
1525 .VerticalLineStrideOffset = 0,
1526 .SamplerL2BypassModeDisable = true,
1527 .RenderCacheReadWriteMode = WriteOnlyCache,
1528 .MemoryObjectControlState = GEN8_MOCS,
1529 .BaseMipLevel = 0.0,
1530 .SurfaceQPitch = 0,
1531 .Height = (num_elements >> 7) & 0x3fff,
1532 .Width = num_elements & 0x7f,
1533 .Depth = (num_elements >> 21) & 0x3f,
1534 .SurfacePitch = stride - 1,
1535 .MinimumArrayElement = 0,
1536 .NumberofMultisamples = MULTISAMPLECOUNT_1,
1537 .XOffset = 0,
1538 .YOffset = 0,
1539 .SurfaceMinLOD = 0,
1540 .MIPCountLOD = 0,
1541 .AuxiliarySurfaceMode = AUX_NONE,
1542 .RedClearColor = 0,
1543 .GreenClearColor = 0,
1544 .BlueClearColor = 0,
1545 .AlphaClearColor = 0,
1546 .ShaderChannelSelectRed = SCS_RED,
1547 .ShaderChannelSelectGreen = SCS_GREEN,
1548 .ShaderChannelSelectBlue = SCS_BLUE,
1549 .ShaderChannelSelectAlpha = SCS_ALPHA,
1550 .ResourceMinLOD = 0.0,
1551 /* FIXME: We assume that the image must be bound at this time. */
1552 .SurfaceBaseAddress = { NULL, offset },
1553 };
1554
1555 GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state);
1556 }
1557
1558 VkResult anv_CreateBufferView(
1559 VkDevice _device,
1560 const VkBufferViewCreateInfo* pCreateInfo,
1561 VkBufferView* pView)
1562 {
1563 ANV_FROM_HANDLE(anv_device, device, _device);
1564 ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer);
1565 struct anv_surface_view *view;
1566
1567 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO);
1568
1569 view = anv_device_alloc(device, sizeof(*view), 8,
1570 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1571 if (view == NULL)
1572 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1573
1574 view->base.destructor = anv_surface_view_destroy;
1575
1576 view->bo = buffer->bo;
1577 view->offset = buffer->offset + pCreateInfo->offset;
1578 view->surface_state =
1579 anv_state_pool_alloc(&device->surface_state_pool, 64, 64);
1580 view->format = pCreateInfo->format;
1581 view->range = pCreateInfo->range;
1582
1583 fill_buffer_surface_state(view->surface_state.map,
1584 pCreateInfo->format, view->offset, pCreateInfo->range);
1585
1586 *pView = (VkBufferView) view;
1587
1588 return VK_SUCCESS;
1589 }
1590
1591 // Sampler functions
1592
1593 VkResult anv_CreateSampler(
1594 VkDevice _device,
1595 const VkSamplerCreateInfo* pCreateInfo,
1596 VkSampler* pSampler)
1597 {
1598 ANV_FROM_HANDLE(anv_device, device, _device);
1599 struct anv_sampler *sampler;
1600 uint32_t mag_filter, min_filter, max_anisotropy;
1601
1602 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
1603
1604 sampler = anv_device_alloc(device, sizeof(*sampler), 8,
1605 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1606 if (!sampler)
1607 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1608
1609 static const uint32_t vk_to_gen_tex_filter[] = {
1610 [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST,
1611 [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR
1612 };
1613
1614 static const uint32_t vk_to_gen_mipmap_mode[] = {
1615 [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE,
1616 [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST,
1617 [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR
1618 };
1619
1620 static const uint32_t vk_to_gen_tex_address[] = {
1621 [VK_TEX_ADDRESS_WRAP] = TCM_WRAP,
1622 [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR,
1623 [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP,
1624 [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE,
1625 [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER,
1626 };
1627
1628 static const uint32_t vk_to_gen_compare_op[] = {
1629 [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER,
1630 [VK_COMPARE_OP_LESS] = PREFILTEROPLESS,
1631 [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL,
1632 [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL,
1633 [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER,
1634 [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL,
1635 [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL,
1636 [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS,
1637 };
1638
1639 if (pCreateInfo->maxAnisotropy > 1) {
1640 mag_filter = MAPFILTER_ANISOTROPIC;
1641 min_filter = MAPFILTER_ANISOTROPIC;
1642 max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2;
1643 } else {
1644 mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter];
1645 min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter];
1646 max_anisotropy = RATIO21;
1647 }
1648
1649 struct GEN8_SAMPLER_STATE sampler_state = {
1650 .SamplerDisable = false,
1651 .TextureBorderColorMode = DX10OGL,
1652 .LODPreClampMode = 0,
1653 .BaseMipLevel = 0.0,
1654 .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode],
1655 .MagModeFilter = mag_filter,
1656 .MinModeFilter = min_filter,
1657 .TextureLODBias = pCreateInfo->mipLodBias * 256,
1658 .AnisotropicAlgorithm = EWAApproximation,
1659 .MinLOD = pCreateInfo->minLod,
1660 .MaxLOD = pCreateInfo->maxLod,
1661 .ChromaKeyEnable = 0,
1662 .ChromaKeyIndex = 0,
1663 .ChromaKeyMode = 0,
1664 .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp],
1665 .CubeSurfaceControlMode = 0,
1666
1667 .IndirectStatePointer =
1668 device->border_colors.offset +
1669 pCreateInfo->borderColor * sizeof(float) * 4,
1670
1671 .LODClampMagnificationMode = MIPNONE,
1672 .MaximumAnisotropy = max_anisotropy,
1673 .RAddressMinFilterRoundingEnable = 0,
1674 .RAddressMagFilterRoundingEnable = 0,
1675 .VAddressMinFilterRoundingEnable = 0,
1676 .VAddressMagFilterRoundingEnable = 0,
1677 .UAddressMinFilterRoundingEnable = 0,
1678 .UAddressMagFilterRoundingEnable = 0,
1679 .TrilinearFilterQuality = 0,
1680 .NonnormalizedCoordinateEnable = 0,
1681 .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU],
1682 .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV],
1683 .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW],
1684 };
1685
1686 GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state);
1687
1688 *pSampler = anv_sampler_to_handle(sampler);
1689
1690 return VK_SUCCESS;
1691 }
1692
1693 // Descriptor set functions
1694
1695 VkResult anv_CreateDescriptorSetLayout(
1696 VkDevice _device,
1697 const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
1698 VkDescriptorSetLayout* pSetLayout)
1699 {
1700 ANV_FROM_HANDLE(anv_device, device, _device);
1701 struct anv_descriptor_set_layout *set_layout;
1702
1703 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
1704
1705 uint32_t sampler_count[VK_SHADER_STAGE_NUM] = { 0, };
1706 uint32_t surface_count[VK_SHADER_STAGE_NUM] = { 0, };
1707 uint32_t num_dynamic_buffers = 0;
1708 uint32_t count = 0;
1709 uint32_t stages = 0;
1710 uint32_t s;
1711
1712 for (uint32_t i = 0; i < pCreateInfo->count; i++) {
1713 switch (pCreateInfo->pBinding[i].descriptorType) {
1714 case VK_DESCRIPTOR_TYPE_SAMPLER:
1715 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1716 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1717 sampler_count[s] += pCreateInfo->pBinding[i].arraySize;
1718 break;
1719 default:
1720 break;
1721 }
1722
1723 switch (pCreateInfo->pBinding[i].descriptorType) {
1724 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1725 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1726 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1727 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1728 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1729 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1730 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1731 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1732 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1733 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1734 surface_count[s] += pCreateInfo->pBinding[i].arraySize;
1735 break;
1736 default:
1737 break;
1738 }
1739
1740 switch (pCreateInfo->pBinding[i].descriptorType) {
1741 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1742 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1743 num_dynamic_buffers += pCreateInfo->pBinding[i].arraySize;
1744 break;
1745 default:
1746 break;
1747 }
1748
1749 stages |= pCreateInfo->pBinding[i].stageFlags;
1750 count += pCreateInfo->pBinding[i].arraySize;
1751 }
1752
1753 uint32_t sampler_total = 0;
1754 uint32_t surface_total = 0;
1755 for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) {
1756 sampler_total += sampler_count[s];
1757 surface_total += surface_count[s];
1758 }
1759
1760 size_t size = sizeof(*set_layout) +
1761 (sampler_total + surface_total) * sizeof(set_layout->entries[0]);
1762 set_layout = anv_device_alloc(device, size, 8,
1763 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1764 if (!set_layout)
1765 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1766
1767 set_layout->num_dynamic_buffers = num_dynamic_buffers;
1768 set_layout->count = count;
1769 set_layout->shader_stages = stages;
1770
1771 struct anv_descriptor_slot *p = set_layout->entries;
1772 struct anv_descriptor_slot *sampler[VK_SHADER_STAGE_NUM];
1773 struct anv_descriptor_slot *surface[VK_SHADER_STAGE_NUM];
1774 for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) {
1775 set_layout->stage[s].surface_count = surface_count[s];
1776 set_layout->stage[s].surface_start = surface[s] = p;
1777 p += surface_count[s];
1778 set_layout->stage[s].sampler_count = sampler_count[s];
1779 set_layout->stage[s].sampler_start = sampler[s] = p;
1780 p += sampler_count[s];
1781 }
1782
1783 uint32_t descriptor = 0;
1784 int8_t dynamic_slot = 0;
1785 bool is_dynamic;
1786 for (uint32_t i = 0; i < pCreateInfo->count; i++) {
1787 switch (pCreateInfo->pBinding[i].descriptorType) {
1788 case VK_DESCRIPTOR_TYPE_SAMPLER:
1789 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1790 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1791 for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) {
1792 sampler[s]->index = descriptor + j;
1793 sampler[s]->dynamic_slot = -1;
1794 sampler[s]++;
1795 }
1796 break;
1797 default:
1798 break;
1799 }
1800
1801 switch (pCreateInfo->pBinding[i].descriptorType) {
1802 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1803 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1804 is_dynamic = true;
1805 break;
1806 default:
1807 is_dynamic = false;
1808 break;
1809 }
1810
1811 switch (pCreateInfo->pBinding[i].descriptorType) {
1812 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1813 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1814 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1815 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1816 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1817 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1818 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1819 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1820 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1821 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1822 for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) {
1823 surface[s]->index = descriptor + j;
1824 if (is_dynamic)
1825 surface[s]->dynamic_slot = dynamic_slot + j;
1826 else
1827 surface[s]->dynamic_slot = -1;
1828 surface[s]++;
1829 }
1830 break;
1831 default:
1832 break;
1833 }
1834
1835 if (is_dynamic)
1836 dynamic_slot += pCreateInfo->pBinding[i].arraySize;
1837
1838 descriptor += pCreateInfo->pBinding[i].arraySize;
1839 }
1840
1841 *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout);
1842
1843 return VK_SUCCESS;
1844 }
1845
1846 VkResult anv_CreateDescriptorPool(
1847 VkDevice device,
1848 VkDescriptorPoolUsage poolUsage,
1849 uint32_t maxSets,
1850 const VkDescriptorPoolCreateInfo* pCreateInfo,
1851 VkDescriptorPool* pDescriptorPool)
1852 {
1853 *pDescriptorPool = 1;
1854
1855 return VK_SUCCESS;
1856 }
1857
1858 VkResult anv_ResetDescriptorPool(
1859 VkDevice device,
1860 VkDescriptorPool descriptorPool)
1861 {
1862 return VK_SUCCESS;
1863 }
1864
1865 VkResult anv_AllocDescriptorSets(
1866 VkDevice _device,
1867 VkDescriptorPool descriptorPool,
1868 VkDescriptorSetUsage setUsage,
1869 uint32_t count,
1870 const VkDescriptorSetLayout* pSetLayouts,
1871 VkDescriptorSet* pDescriptorSets,
1872 uint32_t* pCount)
1873 {
1874 ANV_FROM_HANDLE(anv_device, device, _device);
1875 struct anv_descriptor_set *set;
1876 size_t size;
1877
1878 for (uint32_t i = 0; i < count; i++) {
1879 ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, pSetLayouts[i]);
1880 size = sizeof(*set) + layout->count * sizeof(set->descriptors[0]);
1881 set = anv_device_alloc(device, size, 8,
1882 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1883 if (!set) {
1884 *pCount = i;
1885 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1886 }
1887
1888 /* Descriptor sets may not be 100% filled out so we need to memset to
1889 * ensure that we can properly detect and handle holes.
1890 */
1891 memset(set, 0, size);
1892
1893 pDescriptorSets[i] = anv_descriptor_set_to_handle(set);
1894 }
1895
1896 *pCount = count;
1897
1898 return VK_SUCCESS;
1899 }
1900
1901 VkResult anv_UpdateDescriptorSets(
1902 VkDevice device,
1903 uint32_t writeCount,
1904 const VkWriteDescriptorSet* pDescriptorWrites,
1905 uint32_t copyCount,
1906 const VkCopyDescriptorSet* pDescriptorCopies)
1907 {
1908 for (uint32_t i = 0; i < writeCount; i++) {
1909 const VkWriteDescriptorSet *write = &pDescriptorWrites[i];
1910 ANV_FROM_HANDLE(anv_descriptor_set, set, write->destSet);
1911
1912 switch (write->descriptorType) {
1913 case VK_DESCRIPTOR_TYPE_SAMPLER:
1914 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1915 for (uint32_t j = 0; j < write->count; j++) {
1916 set->descriptors[write->destBinding + j].sampler =
1917 anv_sampler_from_handle(write->pDescriptors[j].sampler);
1918 }
1919
1920 if (write->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER)
1921 break;
1922
1923 /* fallthrough */
1924
1925 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1926 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1927 for (uint32_t j = 0; j < write->count; j++) {
1928 set->descriptors[write->destBinding + j].view =
1929 (struct anv_surface_view *)write->pDescriptors[j].imageView;
1930 }
1931 break;
1932
1933 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1934 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1935 anv_finishme("texel buffers not implemented");
1936 break;
1937
1938 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1939 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1940 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1941 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1942 for (uint32_t j = 0; j < write->count; j++) {
1943 set->descriptors[write->destBinding + j].view =
1944 (struct anv_surface_view *)write->pDescriptors[j].bufferView;
1945 }
1946
1947 default:
1948 break;
1949 }
1950 }
1951
1952 for (uint32_t i = 0; i < copyCount; i++) {
1953 const VkCopyDescriptorSet *copy = &pDescriptorCopies[i];
1954 ANV_FROM_HANDLE(anv_descriptor_set, src, copy->destSet);
1955 ANV_FROM_HANDLE(anv_descriptor_set, dest, copy->destSet);
1956 for (uint32_t j = 0; j < copy->count; j++) {
1957 dest->descriptors[copy->destBinding + j] =
1958 src->descriptors[copy->srcBinding + j];
1959 }
1960 }
1961
1962 return VK_SUCCESS;
1963 }
1964
1965 // State object functions
1966
1967 static inline int64_t
1968 clamp_int64(int64_t x, int64_t min, int64_t max)
1969 {
1970 if (x < min)
1971 return min;
1972 else if (x < max)
1973 return x;
1974 else
1975 return max;
1976 }
1977
1978 static void
1979 anv_dynamic_vp_state_destroy(struct anv_device *device,
1980 struct anv_object *object,
1981 VkObjectType obj_type)
1982 {
1983 struct anv_dynamic_vp_state *state = (void *)object;
1984
1985 assert(obj_type == VK_OBJECT_TYPE_DYNAMIC_VP_STATE);
1986
1987 anv_state_pool_free(&device->dynamic_state_pool, state->sf_clip_vp);
1988 anv_state_pool_free(&device->dynamic_state_pool, state->cc_vp);
1989 anv_state_pool_free(&device->dynamic_state_pool, state->scissor);
1990
1991 anv_device_free(device, state);
1992 }
1993
1994 VkResult anv_CreateDynamicViewportState(
1995 VkDevice _device,
1996 const VkDynamicVpStateCreateInfo* pCreateInfo,
1997 VkDynamicVpState* pState)
1998 {
1999 ANV_FROM_HANDLE(anv_device, device, _device);
2000 struct anv_dynamic_vp_state *state;
2001
2002 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO);
2003
2004 state = anv_device_alloc(device, sizeof(*state), 8,
2005 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2006 if (state == NULL)
2007 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2008
2009 state->base.destructor = anv_dynamic_vp_state_destroy;
2010
2011 unsigned count = pCreateInfo->viewportAndScissorCount;
2012 state->sf_clip_vp = anv_state_pool_alloc(&device->dynamic_state_pool,
2013 count * 64, 64);
2014 state->cc_vp = anv_state_pool_alloc(&device->dynamic_state_pool,
2015 count * 8, 32);
2016 state->scissor = anv_state_pool_alloc(&device->dynamic_state_pool,
2017 count * 32, 32);
2018
2019 for (uint32_t i = 0; i < pCreateInfo->viewportAndScissorCount; i++) {
2020 const VkViewport *vp = &pCreateInfo->pViewports[i];
2021 const VkRect2D *s = &pCreateInfo->pScissors[i];
2022
2023 struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = {
2024 .ViewportMatrixElementm00 = vp->width / 2,
2025 .ViewportMatrixElementm11 = vp->height / 2,
2026 .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2,
2027 .ViewportMatrixElementm30 = vp->originX + vp->width / 2,
2028 .ViewportMatrixElementm31 = vp->originY + vp->height / 2,
2029 .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2,
2030 .XMinClipGuardband = -1.0f,
2031 .XMaxClipGuardband = 1.0f,
2032 .YMinClipGuardband = -1.0f,
2033 .YMaxClipGuardband = 1.0f,
2034 .XMinViewPort = vp->originX,
2035 .XMaxViewPort = vp->originX + vp->width - 1,
2036 .YMinViewPort = vp->originY,
2037 .YMaxViewPort = vp->originY + vp->height - 1,
2038 };
2039
2040 struct GEN8_CC_VIEWPORT cc_viewport = {
2041 .MinimumDepth = vp->minDepth,
2042 .MaximumDepth = vp->maxDepth
2043 };
2044
2045 /* Since xmax and ymax are inclusive, we have to have xmax < xmin or
2046 * ymax < ymin for empty clips. In case clip x, y, width height are all
2047 * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't
2048 * what we want. Just special case empty clips and produce a canonical
2049 * empty clip. */
2050 static const struct GEN8_SCISSOR_RECT empty_scissor = {
2051 .ScissorRectangleYMin = 1,
2052 .ScissorRectangleXMin = 1,
2053 .ScissorRectangleYMax = 0,
2054 .ScissorRectangleXMax = 0
2055 };
2056
2057 const int max = 0xffff;
2058 struct GEN8_SCISSOR_RECT scissor = {
2059 /* Do this math using int64_t so overflow gets clamped correctly. */
2060 .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max),
2061 .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max),
2062 .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max),
2063 .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max)
2064 };
2065
2066 GEN8_SF_CLIP_VIEWPORT_pack(NULL, state->sf_clip_vp.map + i * 64, &sf_clip_viewport);
2067 GEN8_CC_VIEWPORT_pack(NULL, state->cc_vp.map + i * 32, &cc_viewport);
2068
2069 if (s->extent.width <= 0 || s->extent.height <= 0) {
2070 GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &empty_scissor);
2071 } else {
2072 GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &scissor);
2073 }
2074 }
2075
2076 *pState = anv_dynamic_vp_state_to_handle(state);
2077
2078 return VK_SUCCESS;
2079 }
2080
2081 VkResult anv_CreateDynamicRasterState(
2082 VkDevice _device,
2083 const VkDynamicRsStateCreateInfo* pCreateInfo,
2084 VkDynamicRsState* pState)
2085 {
2086 ANV_FROM_HANDLE(anv_device, device, _device);
2087 struct anv_dynamic_rs_state *state;
2088
2089 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO);
2090
2091 state = anv_device_alloc(device, sizeof(*state), 8,
2092 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2093 if (state == NULL)
2094 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2095
2096 struct GEN8_3DSTATE_SF sf = {
2097 GEN8_3DSTATE_SF_header,
2098 .LineWidth = pCreateInfo->lineWidth,
2099 };
2100
2101 GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf);
2102
2103 bool enable_bias = pCreateInfo->depthBias != 0.0f ||
2104 pCreateInfo->slopeScaledDepthBias != 0.0f;
2105 struct GEN8_3DSTATE_RASTER raster = {
2106 .GlobalDepthOffsetEnableSolid = enable_bias,
2107 .GlobalDepthOffsetEnableWireframe = enable_bias,
2108 .GlobalDepthOffsetEnablePoint = enable_bias,
2109 .GlobalDepthOffsetConstant = pCreateInfo->depthBias,
2110 .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias,
2111 .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp
2112 };
2113
2114 GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster);
2115
2116 *pState = anv_dynamic_rs_state_to_handle(state);
2117
2118 return VK_SUCCESS;
2119 }
2120
2121 VkResult anv_CreateDynamicColorBlendState(
2122 VkDevice _device,
2123 const VkDynamicCbStateCreateInfo* pCreateInfo,
2124 VkDynamicCbState* pState)
2125 {
2126 ANV_FROM_HANDLE(anv_device, device, _device);
2127 struct anv_dynamic_cb_state *state;
2128
2129 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO);
2130
2131 state = anv_device_alloc(device, sizeof(*state), 8,
2132 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2133 if (state == NULL)
2134 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2135
2136 struct GEN8_COLOR_CALC_STATE color_calc_state = {
2137 .BlendConstantColorRed = pCreateInfo->blendConst[0],
2138 .BlendConstantColorGreen = pCreateInfo->blendConst[1],
2139 .BlendConstantColorBlue = pCreateInfo->blendConst[2],
2140 .BlendConstantColorAlpha = pCreateInfo->blendConst[3]
2141 };
2142
2143 GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state);
2144
2145 *pState = anv_dynamic_cb_state_to_handle(state);
2146
2147 return VK_SUCCESS;
2148 }
2149
2150 VkResult anv_CreateDynamicDepthStencilState(
2151 VkDevice _device,
2152 const VkDynamicDsStateCreateInfo* pCreateInfo,
2153 VkDynamicDsState* pState)
2154 {
2155 ANV_FROM_HANDLE(anv_device, device, _device);
2156 struct anv_dynamic_ds_state *state;
2157
2158 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO);
2159
2160 state = anv_device_alloc(device, sizeof(*state), 8,
2161 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2162 if (state == NULL)
2163 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2164
2165 struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
2166 GEN8_3DSTATE_WM_DEPTH_STENCIL_header,
2167
2168 /* Is this what we need to do? */
2169 .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0,
2170
2171 .StencilTestMask = pCreateInfo->stencilReadMask & 0xff,
2172 .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
2173
2174 .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff,
2175 .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
2176 };
2177
2178 GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil,
2179 &wm_depth_stencil);
2180
2181 struct GEN8_COLOR_CALC_STATE color_calc_state = {
2182 .StencilReferenceValue = pCreateInfo->stencilFrontRef,
2183 .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef
2184 };
2185
2186 GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state);
2187
2188 *pState = anv_dynamic_ds_state_to_handle(state);
2189
2190 return VK_SUCCESS;
2191 }
2192
2193 // Command buffer functions
2194
2195 static void
2196 anv_cmd_buffer_destroy(struct anv_device *device,
2197 struct anv_object *object,
2198 VkObjectType obj_type)
2199 {
2200 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object;
2201
2202 assert(obj_type == VK_OBJECT_TYPE_COMMAND_BUFFER);
2203
2204 /* Destroy all of the batch buffers */
2205 struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo;
2206 while (bbo) {
2207 struct anv_batch_bo *prev = bbo->prev_batch_bo;
2208 anv_batch_bo_destroy(bbo, device);
2209 bbo = prev;
2210 }
2211 anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
2212
2213 /* Destroy all of the surface state buffers */
2214 bbo = cmd_buffer->surface_batch_bo;
2215 while (bbo) {
2216 struct anv_batch_bo *prev = bbo->prev_batch_bo;
2217 anv_batch_bo_destroy(bbo, device);
2218 bbo = prev;
2219 }
2220 anv_reloc_list_finish(&cmd_buffer->surface_relocs, device);
2221
2222 anv_state_stream_finish(&cmd_buffer->surface_state_stream);
2223 anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
2224 anv_device_free(device, cmd_buffer->exec2_objects);
2225 anv_device_free(device, cmd_buffer->exec2_bos);
2226 anv_device_free(device, cmd_buffer);
2227 }
2228
2229 static VkResult
2230 anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data)
2231 {
2232 struct anv_cmd_buffer *cmd_buffer = _data;
2233
2234 struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->last_batch_bo;
2235
2236 VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
2237 if (result != VK_SUCCESS)
2238 return result;
2239
2240 /* We set the end of the batch a little short so we would be sure we
2241 * have room for the chaining command. Since we're about to emit the
2242 * chaining command, let's set it back where it should go.
2243 */
2244 batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
2245 assert(batch->end == old_bbo->bo.map + old_bbo->bo.size);
2246
2247 anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START,
2248 GEN8_MI_BATCH_BUFFER_START_header,
2249 ._2ndLevelBatchBuffer = _1stlevelbatch,
2250 .AddressSpaceIndicator = ASI_PPGTT,
2251 .BatchBufferStartAddress = { &new_bbo->bo, 0 },
2252 );
2253
2254 /* Pad out to a 2-dword aligned boundary with zeros */
2255 if ((uintptr_t)batch->next % 8 != 0) {
2256 *(uint32_t *)batch->next = 0;
2257 batch->next += 4;
2258 }
2259
2260 anv_batch_bo_finish(cmd_buffer->last_batch_bo, batch);
2261
2262 new_bbo->prev_batch_bo = old_bbo;
2263 cmd_buffer->last_batch_bo = new_bbo;
2264
2265 anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4);
2266
2267 return VK_SUCCESS;
2268 }
2269
2270 VkResult anv_CreateCommandBuffer(
2271 VkDevice _device,
2272 const VkCmdBufferCreateInfo* pCreateInfo,
2273 VkCmdBuffer* pCmdBuffer)
2274 {
2275 ANV_FROM_HANDLE(anv_device, device, _device);
2276 struct anv_cmd_buffer *cmd_buffer;
2277 VkResult result;
2278
2279 assert(pCreateInfo->level == VK_CMD_BUFFER_LEVEL_PRIMARY);
2280
2281 cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8,
2282 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2283 if (cmd_buffer == NULL)
2284 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2285
2286 cmd_buffer->base.destructor = anv_cmd_buffer_destroy;
2287
2288 cmd_buffer->device = device;
2289 cmd_buffer->rs_state = NULL;
2290 cmd_buffer->vp_state = NULL;
2291 cmd_buffer->cb_state = NULL;
2292 cmd_buffer->ds_state = NULL;
2293 memset(&cmd_buffer->state_vf, 0, sizeof(cmd_buffer->state_vf));
2294 memset(&cmd_buffer->descriptors, 0, sizeof(cmd_buffer->descriptors));
2295
2296 result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo);
2297 if (result != VK_SUCCESS)
2298 goto fail;
2299
2300 result = anv_reloc_list_init(&cmd_buffer->batch.relocs, device);
2301 if (result != VK_SUCCESS)
2302 goto fail_batch_bo;
2303
2304 cmd_buffer->batch.device = device;
2305 cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch;
2306 cmd_buffer->batch.user_data = cmd_buffer;
2307
2308 anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch,
2309 GEN8_MI_BATCH_BUFFER_START_length * 4);
2310
2311 result = anv_batch_bo_create(device, &cmd_buffer->surface_batch_bo);
2312 if (result != VK_SUCCESS)
2313 goto fail_batch_relocs;
2314 cmd_buffer->surface_batch_bo->first_reloc = 0;
2315
2316 result = anv_reloc_list_init(&cmd_buffer->surface_relocs, device);
2317 if (result != VK_SUCCESS)
2318 goto fail_ss_batch_bo;
2319
2320 /* Start surface_next at 1 so surface offset 0 is invalid. */
2321 cmd_buffer->surface_next = 1;
2322
2323 cmd_buffer->exec2_objects = NULL;
2324 cmd_buffer->exec2_bos = NULL;
2325 cmd_buffer->exec2_array_length = 0;
2326
2327 anv_state_stream_init(&cmd_buffer->surface_state_stream,
2328 &device->surface_state_block_pool);
2329 anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
2330 &device->dynamic_state_block_pool);
2331
2332 cmd_buffer->dirty = 0;
2333 cmd_buffer->vb_dirty = 0;
2334 cmd_buffer->descriptors_dirty = 0;
2335 cmd_buffer->pipeline = NULL;
2336 cmd_buffer->vp_state = NULL;
2337 cmd_buffer->rs_state = NULL;
2338 cmd_buffer->ds_state = NULL;
2339
2340 *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer);
2341
2342 return VK_SUCCESS;
2343
2344 fail_ss_batch_bo:
2345 anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, device);
2346 fail_batch_relocs:
2347 anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
2348 fail_batch_bo:
2349 anv_batch_bo_destroy(cmd_buffer->last_batch_bo, device);
2350 fail:
2351 anv_device_free(device, cmd_buffer);
2352
2353 return result;
2354 }
2355
2356 static void
2357 anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
2358 {
2359 struct anv_device *device = cmd_buffer->device;
2360 struct anv_bo *scratch_bo = NULL;
2361
2362 cmd_buffer->scratch_size = device->scratch_block_pool.size;
2363 if (cmd_buffer->scratch_size > 0)
2364 scratch_bo = &device->scratch_block_pool.bo;
2365
2366 anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS,
2367 .GeneralStateBaseAddress = { scratch_bo, 0 },
2368 .GeneralStateMemoryObjectControlState = GEN8_MOCS,
2369 .GeneralStateBaseAddressModifyEnable = true,
2370 .GeneralStateBufferSize = 0xfffff,
2371 .GeneralStateBufferSizeModifyEnable = true,
2372
2373 .SurfaceStateBaseAddress = { &cmd_buffer->surface_batch_bo->bo, 0 },
2374 .SurfaceStateMemoryObjectControlState = GEN8_MOCS,
2375 .SurfaceStateBaseAddressModifyEnable = true,
2376
2377 .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 },
2378 .DynamicStateMemoryObjectControlState = GEN8_MOCS,
2379 .DynamicStateBaseAddressModifyEnable = true,
2380 .DynamicStateBufferSize = 0xfffff,
2381 .DynamicStateBufferSizeModifyEnable = true,
2382
2383 .IndirectObjectBaseAddress = { NULL, 0 },
2384 .IndirectObjectMemoryObjectControlState = GEN8_MOCS,
2385 .IndirectObjectBaseAddressModifyEnable = true,
2386 .IndirectObjectBufferSize = 0xfffff,
2387 .IndirectObjectBufferSizeModifyEnable = true,
2388
2389 .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
2390 .InstructionMemoryObjectControlState = GEN8_MOCS,
2391 .InstructionBaseAddressModifyEnable = true,
2392 .InstructionBufferSize = 0xfffff,
2393 .InstructionBuffersizeModifyEnable = true);
2394 }
2395
2396 VkResult anv_BeginCommandBuffer(
2397 VkCmdBuffer cmdBuffer,
2398 const VkCmdBufferBeginInfo* pBeginInfo)
2399 {
2400 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2401
2402 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
2403 cmd_buffer->current_pipeline = UINT32_MAX;
2404
2405 return VK_SUCCESS;
2406 }
2407
2408 static VkResult
2409 anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer,
2410 struct anv_bo *bo,
2411 struct drm_i915_gem_relocation_entry *relocs,
2412 size_t num_relocs)
2413 {
2414 struct drm_i915_gem_exec_object2 *obj;
2415
2416 if (bo->index < cmd_buffer->bo_count &&
2417 cmd_buffer->exec2_bos[bo->index] == bo)
2418 return VK_SUCCESS;
2419
2420 if (cmd_buffer->bo_count >= cmd_buffer->exec2_array_length) {
2421 uint32_t new_len = cmd_buffer->exec2_objects ?
2422 cmd_buffer->exec2_array_length * 2 : 64;
2423
2424 struct drm_i915_gem_exec_object2 *new_objects =
2425 anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects),
2426 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
2427 if (new_objects == NULL)
2428 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2429
2430 struct anv_bo **new_bos =
2431 anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos),
2432 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
2433 if (new_objects == NULL) {
2434 anv_device_free(cmd_buffer->device, new_objects);
2435 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2436 }
2437
2438 if (cmd_buffer->exec2_objects) {
2439 memcpy(new_objects, cmd_buffer->exec2_objects,
2440 cmd_buffer->bo_count * sizeof(*new_objects));
2441 memcpy(new_bos, cmd_buffer->exec2_bos,
2442 cmd_buffer->bo_count * sizeof(*new_bos));
2443 }
2444
2445 cmd_buffer->exec2_objects = new_objects;
2446 cmd_buffer->exec2_bos = new_bos;
2447 cmd_buffer->exec2_array_length = new_len;
2448 }
2449
2450 assert(cmd_buffer->bo_count < cmd_buffer->exec2_array_length);
2451
2452 bo->index = cmd_buffer->bo_count++;
2453 obj = &cmd_buffer->exec2_objects[bo->index];
2454 cmd_buffer->exec2_bos[bo->index] = bo;
2455
2456 obj->handle = bo->gem_handle;
2457 obj->relocation_count = 0;
2458 obj->relocs_ptr = 0;
2459 obj->alignment = 0;
2460 obj->offset = bo->offset;
2461 obj->flags = 0;
2462 obj->rsvd1 = 0;
2463 obj->rsvd2 = 0;
2464
2465 if (relocs) {
2466 obj->relocation_count = num_relocs;
2467 obj->relocs_ptr = (uintptr_t) relocs;
2468 }
2469
2470 return VK_SUCCESS;
2471 }
2472
2473 static void
2474 anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer,
2475 struct anv_reloc_list *list)
2476 {
2477 for (size_t i = 0; i < list->num_relocs; i++)
2478 anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL, 0);
2479 }
2480
2481 static void
2482 anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer,
2483 struct anv_reloc_list *list)
2484 {
2485 struct anv_bo *bo;
2486
2487 /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in
2488 * struct drm_i915_gem_exec_object2 against the bos current offset and if
2489 * all bos haven't moved it will skip relocation processing alltogether.
2490 * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming
2491 * value of offset so we can set it either way. For that to work we need
2492 * to make sure all relocs use the same presumed offset.
2493 */
2494
2495 for (size_t i = 0; i < list->num_relocs; i++) {
2496 bo = list->reloc_bos[i];
2497 if (bo->offset != list->relocs[i].presumed_offset)
2498 cmd_buffer->need_reloc = true;
2499
2500 list->relocs[i].target_handle = bo->index;
2501 }
2502 }
2503
2504 VkResult anv_EndCommandBuffer(
2505 VkCmdBuffer cmdBuffer)
2506 {
2507 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2508 struct anv_device *device = cmd_buffer->device;
2509 struct anv_batch *batch = &cmd_buffer->batch;
2510
2511 anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_END);
2512
2513 /* Round batch up to an even number of dwords. */
2514 if ((batch->next - batch->start) & 4)
2515 anv_batch_emit(batch, GEN8_MI_NOOP);
2516
2517 anv_batch_bo_finish(cmd_buffer->last_batch_bo, &cmd_buffer->batch);
2518 cmd_buffer->surface_batch_bo->num_relocs =
2519 cmd_buffer->surface_relocs.num_relocs - cmd_buffer->surface_batch_bo->first_reloc;
2520 cmd_buffer->surface_batch_bo->length = cmd_buffer->surface_next;
2521
2522 cmd_buffer->bo_count = 0;
2523 cmd_buffer->need_reloc = false;
2524
2525 /* Lock for access to bo->index. */
2526 pthread_mutex_lock(&device->mutex);
2527
2528 /* Add surface state bos first so we can add them with their relocs. */
2529 for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo;
2530 bbo != NULL; bbo = bbo->prev_batch_bo) {
2531 anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo,
2532 &cmd_buffer->surface_relocs.relocs[bbo->first_reloc],
2533 bbo->num_relocs);
2534 }
2535
2536 /* Add all of the BOs referenced by surface state */
2537 anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs);
2538
2539 /* Add all but the first batch BO */
2540 struct anv_batch_bo *batch_bo = cmd_buffer->last_batch_bo;
2541 while (batch_bo->prev_batch_bo) {
2542 anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo,
2543 &batch->relocs.relocs[batch_bo->first_reloc],
2544 batch_bo->num_relocs);
2545 batch_bo = batch_bo->prev_batch_bo;
2546 }
2547
2548 /* Add everything referenced by the batches */
2549 anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->relocs);
2550
2551 /* Add the first batch bo last */
2552 assert(batch_bo->prev_batch_bo == NULL && batch_bo->first_reloc == 0);
2553 anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo,
2554 &batch->relocs.relocs[batch_bo->first_reloc],
2555 batch_bo->num_relocs);
2556 assert(batch_bo->bo.index == cmd_buffer->bo_count - 1);
2557
2558 anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
2559 anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs);
2560
2561 cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects;
2562 cmd_buffer->execbuf.buffer_count = cmd_buffer->bo_count;
2563 cmd_buffer->execbuf.batch_start_offset = 0;
2564 cmd_buffer->execbuf.batch_len = batch->next - batch->start;
2565 cmd_buffer->execbuf.cliprects_ptr = 0;
2566 cmd_buffer->execbuf.num_cliprects = 0;
2567 cmd_buffer->execbuf.DR1 = 0;
2568 cmd_buffer->execbuf.DR4 = 0;
2569
2570 cmd_buffer->execbuf.flags = I915_EXEC_HANDLE_LUT;
2571 if (!cmd_buffer->need_reloc)
2572 cmd_buffer->execbuf.flags |= I915_EXEC_NO_RELOC;
2573 cmd_buffer->execbuf.flags |= I915_EXEC_RENDER;
2574 cmd_buffer->execbuf.rsvd1 = device->context_id;
2575 cmd_buffer->execbuf.rsvd2 = 0;
2576
2577 pthread_mutex_unlock(&device->mutex);
2578
2579 return VK_SUCCESS;
2580 }
2581
2582 VkResult anv_ResetCommandBuffer(
2583 VkCmdBuffer cmdBuffer)
2584 {
2585 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2586
2587 /* Delete all but the first batch bo */
2588 while (cmd_buffer->last_batch_bo->prev_batch_bo) {
2589 struct anv_batch_bo *prev = cmd_buffer->last_batch_bo->prev_batch_bo;
2590 anv_batch_bo_destroy(cmd_buffer->last_batch_bo, cmd_buffer->device);
2591 cmd_buffer->last_batch_bo = prev;
2592 }
2593 assert(cmd_buffer->last_batch_bo->prev_batch_bo == NULL);
2594
2595 cmd_buffer->batch.relocs.num_relocs = 0;
2596 anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch,
2597 GEN8_MI_BATCH_BUFFER_START_length * 4);
2598
2599 /* Delete all but the first batch bo */
2600 while (cmd_buffer->surface_batch_bo->prev_batch_bo) {
2601 struct anv_batch_bo *prev = cmd_buffer->surface_batch_bo->prev_batch_bo;
2602 anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, cmd_buffer->device);
2603 cmd_buffer->surface_batch_bo = prev;
2604 }
2605 assert(cmd_buffer->surface_batch_bo->prev_batch_bo == NULL);
2606
2607 cmd_buffer->surface_next = 1;
2608 cmd_buffer->surface_relocs.num_relocs = 0;
2609
2610 cmd_buffer->rs_state = NULL;
2611 cmd_buffer->vp_state = NULL;
2612 cmd_buffer->cb_state = NULL;
2613 cmd_buffer->ds_state = NULL;
2614
2615 return VK_SUCCESS;
2616 }
2617
2618 // Command buffer building functions
2619
2620 void anv_CmdBindPipeline(
2621 VkCmdBuffer cmdBuffer,
2622 VkPipelineBindPoint pipelineBindPoint,
2623 VkPipeline _pipeline)
2624 {
2625 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2626 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
2627
2628 switch (pipelineBindPoint) {
2629 case VK_PIPELINE_BIND_POINT_COMPUTE:
2630 cmd_buffer->compute_pipeline = pipeline;
2631 cmd_buffer->compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
2632 break;
2633
2634 case VK_PIPELINE_BIND_POINT_GRAPHICS:
2635 cmd_buffer->pipeline = pipeline;
2636 cmd_buffer->vb_dirty |= pipeline->vb_used;
2637 cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
2638 break;
2639
2640 default:
2641 assert(!"invalid bind point");
2642 break;
2643 }
2644 }
2645
2646 void anv_CmdBindDynamicStateObject(
2647 VkCmdBuffer cmdBuffer,
2648 VkStateBindPoint stateBindPoint,
2649 VkDynamicStateObject dynamicState)
2650 {
2651 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2652
2653 switch (stateBindPoint) {
2654 case VK_STATE_BIND_POINT_VIEWPORT:
2655 cmd_buffer->vp_state = anv_dynamic_vp_state_from_handle(dynamicState);
2656 cmd_buffer->dirty |= ANV_CMD_BUFFER_VP_DIRTY;
2657 break;
2658 case VK_STATE_BIND_POINT_RASTER:
2659 cmd_buffer->rs_state = anv_dynamic_rs_state_from_handle(dynamicState);
2660 cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY;
2661 break;
2662 case VK_STATE_BIND_POINT_COLOR_BLEND:
2663 cmd_buffer->cb_state = anv_dynamic_cb_state_from_handle(dynamicState);
2664 cmd_buffer->dirty |= ANV_CMD_BUFFER_CB_DIRTY;
2665 break;
2666 case VK_STATE_BIND_POINT_DEPTH_STENCIL:
2667 cmd_buffer->ds_state = anv_dynamic_ds_state_from_handle(dynamicState);
2668 cmd_buffer->dirty |= ANV_CMD_BUFFER_DS_DIRTY;
2669 break;
2670 default:
2671 break;
2672 };
2673 }
2674
2675 static struct anv_state
2676 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer,
2677 uint32_t size, uint32_t alignment)
2678 {
2679 struct anv_state state;
2680
2681 state.offset = align_u32(cmd_buffer->surface_next, alignment);
2682 if (state.offset + size > cmd_buffer->surface_batch_bo->bo.size)
2683 return (struct anv_state) { 0 };
2684
2685 state.map = cmd_buffer->surface_batch_bo->bo.map + state.offset;
2686 state.alloc_size = size;
2687 cmd_buffer->surface_next = state.offset + size;
2688
2689 assert(state.offset + size <= cmd_buffer->surface_batch_bo->bo.size);
2690
2691 return state;
2692 }
2693
2694 static VkResult
2695 anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer)
2696 {
2697 struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->surface_batch_bo;
2698
2699 /* Finish off the old buffer */
2700 old_bbo->num_relocs =
2701 cmd_buffer->surface_relocs.num_relocs - old_bbo->first_reloc;
2702 old_bbo->length = cmd_buffer->surface_next;
2703
2704 VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
2705 if (result != VK_SUCCESS)
2706 return result;
2707
2708 new_bbo->first_reloc = cmd_buffer->surface_relocs.num_relocs;
2709 cmd_buffer->surface_next = 1;
2710
2711 new_bbo->prev_batch_bo = old_bbo;
2712 cmd_buffer->surface_batch_bo = new_bbo;
2713
2714 /* Re-emit state base addresses so we get the new surface state base
2715 * address before we start emitting binding tables etc.
2716 */
2717 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
2718
2719 /* It seems like just changing the state base addresses isn't enough.
2720 * Invalidating the cache seems to be enough to cause things to
2721 * propagate. However, I'm not 100% sure what we're supposed to do.
2722 */
2723 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
2724 .TextureCacheInvalidationEnable = true);
2725
2726 return VK_SUCCESS;
2727 }
2728
2729 void anv_CmdBindDescriptorSets(
2730 VkCmdBuffer cmdBuffer,
2731 VkPipelineBindPoint pipelineBindPoint,
2732 VkPipelineLayout _layout,
2733 uint32_t firstSet,
2734 uint32_t setCount,
2735 const VkDescriptorSet* pDescriptorSets,
2736 uint32_t dynamicOffsetCount,
2737 const uint32_t* pDynamicOffsets)
2738 {
2739 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2740 ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout);
2741 struct anv_descriptor_set_layout *set_layout;
2742
2743 assert(firstSet + setCount < MAX_SETS);
2744
2745 uint32_t dynamic_slot = 0;
2746 for (uint32_t i = 0; i < setCount; i++) {
2747 ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]);
2748 set_layout = layout->set[firstSet + i].layout;
2749
2750 cmd_buffer->descriptors[firstSet + i].set = set;
2751
2752 assert(set_layout->num_dynamic_buffers <
2753 ARRAY_SIZE(cmd_buffer->descriptors[0].dynamic_offsets));
2754 memcpy(cmd_buffer->descriptors[firstSet + i].dynamic_offsets,
2755 pDynamicOffsets + dynamic_slot,
2756 set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets));
2757
2758 cmd_buffer->descriptors_dirty |= set_layout->shader_stages;
2759
2760 dynamic_slot += set_layout->num_dynamic_buffers;
2761 }
2762 }
2763
2764 void anv_CmdBindIndexBuffer(
2765 VkCmdBuffer cmdBuffer,
2766 VkBuffer _buffer,
2767 VkDeviceSize offset,
2768 VkIndexType indexType)
2769 {
2770 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2771 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
2772
2773 static const uint32_t vk_to_gen_index_type[] = {
2774 [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
2775 [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
2776 };
2777
2778 struct GEN8_3DSTATE_VF vf = {
2779 GEN8_3DSTATE_VF_header,
2780 .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX,
2781 };
2782 GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state_vf, &vf);
2783
2784 cmd_buffer->dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY;
2785
2786 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER,
2787 .IndexFormat = vk_to_gen_index_type[indexType],
2788 .MemoryObjectControlState = GEN8_MOCS,
2789 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
2790 .BufferSize = buffer->size - offset);
2791 }
2792
2793 void anv_CmdBindVertexBuffers(
2794 VkCmdBuffer cmdBuffer,
2795 uint32_t startBinding,
2796 uint32_t bindingCount,
2797 const VkBuffer* pBuffers,
2798 const VkDeviceSize* pOffsets)
2799 {
2800 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
2801 struct anv_vertex_binding *vb = cmd_buffer->vertex_bindings;
2802
2803 /* We have to defer setting up vertex buffer since we need the buffer
2804 * stride from the pipeline. */
2805
2806 assert(startBinding + bindingCount < MAX_VBS);
2807 for (uint32_t i = 0; i < bindingCount; i++) {
2808 vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]);
2809 vb[startBinding + i].offset = pOffsets[i];
2810 cmd_buffer->vb_dirty |= 1 << (startBinding + i);
2811 }
2812 }
2813
2814 static VkResult
2815 cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
2816 unsigned stage, struct anv_state *bt_state)
2817 {
2818 struct anv_pipeline_layout *layout;
2819 uint32_t color_attachments, bias, size;
2820
2821 if (stage == VK_SHADER_STAGE_COMPUTE)
2822 layout = cmd_buffer->compute_pipeline->layout;
2823 else
2824 layout = cmd_buffer->pipeline->layout;
2825
2826 if (stage == VK_SHADER_STAGE_FRAGMENT) {
2827 bias = MAX_RTS;
2828 color_attachments = cmd_buffer->framebuffer->color_attachment_count;
2829 } else {
2830 bias = 0;
2831 color_attachments = 0;
2832 }
2833
2834 /* This is a little awkward: layout can be NULL but we still have to
2835 * allocate and set a binding table for the PS stage for render
2836 * targets. */
2837 uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0;
2838
2839 if (color_attachments + surface_count == 0)
2840 return VK_SUCCESS;
2841
2842 size = (bias + surface_count) * sizeof(uint32_t);
2843 *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32);
2844 uint32_t *bt_map = bt_state->map;
2845
2846 if (bt_state->map == NULL)
2847 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2848
2849 for (uint32_t ca = 0; ca < color_attachments; ca++) {
2850 const struct anv_surface_view *view =
2851 cmd_buffer->framebuffer->color_attachments[ca];
2852
2853 struct anv_state state =
2854 anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
2855
2856 if (state.map == NULL)
2857 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2858
2859 memcpy(state.map, view->surface_state.map, 64);
2860
2861 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
2862 *(uint64_t *)(state.map + 8 * 4) =
2863 anv_reloc_list_add(&cmd_buffer->surface_relocs,
2864 cmd_buffer->device,
2865 state.offset + 8 * 4,
2866 view->bo, view->offset);
2867
2868 bt_map[ca] = state.offset;
2869 }
2870
2871 if (layout == NULL)
2872 return VK_SUCCESS;
2873
2874 for (uint32_t set = 0; set < layout->num_sets; set++) {
2875 struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set];
2876 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2877 struct anv_descriptor_slot *surface_slots =
2878 set_layout->stage[stage].surface_start;
2879
2880 uint32_t start = bias + layout->set[set].surface_start[stage];
2881
2882 for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) {
2883 struct anv_surface_view *view =
2884 d->set->descriptors[surface_slots[b].index].view;
2885
2886 if (!view)
2887 continue;
2888
2889 struct anv_state state =
2890 anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
2891
2892 if (state.map == NULL)
2893 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2894
2895 uint32_t offset;
2896 if (surface_slots[b].dynamic_slot >= 0) {
2897 uint32_t dynamic_offset =
2898 d->dynamic_offsets[surface_slots[b].dynamic_slot];
2899
2900 offset = view->offset + dynamic_offset;
2901 fill_buffer_surface_state(state.map, view->format, offset,
2902 view->range - dynamic_offset);
2903 } else {
2904 offset = view->offset;
2905 memcpy(state.map, view->surface_state.map, 64);
2906 }
2907
2908 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
2909 *(uint64_t *)(state.map + 8 * 4) =
2910 anv_reloc_list_add(&cmd_buffer->surface_relocs,
2911 cmd_buffer->device,
2912 state.offset + 8 * 4,
2913 view->bo, offset);
2914
2915 bt_map[start + b] = state.offset;
2916 }
2917 }
2918
2919 return VK_SUCCESS;
2920 }
2921
2922 static VkResult
2923 cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
2924 unsigned stage, struct anv_state *state)
2925 {
2926 struct anv_pipeline_layout *layout;
2927 uint32_t sampler_count;
2928
2929 if (stage == VK_SHADER_STAGE_COMPUTE)
2930 layout = cmd_buffer->compute_pipeline->layout;
2931 else
2932 layout = cmd_buffer->pipeline->layout;
2933
2934 sampler_count = layout ? layout->stage[stage].sampler_count : 0;
2935 if (sampler_count == 0)
2936 return VK_SUCCESS;
2937
2938 uint32_t size = sampler_count * 16;
2939 *state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32);
2940
2941 if (state->map == NULL)
2942 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2943
2944 for (uint32_t set = 0; set < layout->num_sets; set++) {
2945 struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set];
2946 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2947 struct anv_descriptor_slot *sampler_slots =
2948 set_layout->stage[stage].sampler_start;
2949
2950 uint32_t start = layout->set[set].sampler_start[stage];
2951
2952 for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) {
2953 struct anv_sampler *sampler =
2954 d->set->descriptors[sampler_slots[b].index].sampler;
2955
2956 if (!sampler)
2957 continue;
2958
2959 memcpy(state->map + (start + b) * 16,
2960 sampler->state, sizeof(sampler->state));
2961 }
2962 }
2963
2964 return VK_SUCCESS;
2965 }
2966
2967 static VkResult
2968 flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage)
2969 {
2970 struct anv_state surfaces = { 0, }, samplers = { 0, };
2971 VkResult result;
2972
2973 result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers);
2974 if (result != VK_SUCCESS)
2975 return result;
2976 result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces);
2977 if (result != VK_SUCCESS)
2978 return result;
2979
2980 static const uint32_t sampler_state_opcodes[] = {
2981 [VK_SHADER_STAGE_VERTEX] = 43,
2982 [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */
2983 [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */
2984 [VK_SHADER_STAGE_GEOMETRY] = 46,
2985 [VK_SHADER_STAGE_FRAGMENT] = 47,
2986 [VK_SHADER_STAGE_COMPUTE] = 0,
2987 };
2988
2989 static const uint32_t binding_table_opcodes[] = {
2990 [VK_SHADER_STAGE_VERTEX] = 38,
2991 [VK_SHADER_STAGE_TESS_CONTROL] = 39,
2992 [VK_SHADER_STAGE_TESS_EVALUATION] = 40,
2993 [VK_SHADER_STAGE_GEOMETRY] = 41,
2994 [VK_SHADER_STAGE_FRAGMENT] = 42,
2995 [VK_SHADER_STAGE_COMPUTE] = 0,
2996 };
2997
2998 if (samplers.alloc_size > 0) {
2999 anv_batch_emit(&cmd_buffer->batch,
3000 GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS,
3001 ._3DCommandSubOpcode = sampler_state_opcodes[stage],
3002 .PointertoVSSamplerState = samplers.offset);
3003 }
3004
3005 if (surfaces.alloc_size > 0) {
3006 anv_batch_emit(&cmd_buffer->batch,
3007 GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS,
3008 ._3DCommandSubOpcode = binding_table_opcodes[stage],
3009 .PointertoVSBindingTable = surfaces.offset);
3010 }
3011
3012 return VK_SUCCESS;
3013 }
3014
3015 static void
3016 flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
3017 {
3018 uint32_t s, dirty = cmd_buffer->descriptors_dirty &
3019 cmd_buffer->pipeline->active_stages;
3020
3021 VkResult result = VK_SUCCESS;
3022 for_each_bit(s, dirty) {
3023 result = flush_descriptor_set(cmd_buffer, s);
3024 if (result != VK_SUCCESS)
3025 break;
3026 }
3027
3028 if (result != VK_SUCCESS) {
3029 assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
3030
3031 result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
3032 assert(result == VK_SUCCESS);
3033
3034 /* Re-emit all active binding tables */
3035 for_each_bit(s, cmd_buffer->pipeline->active_stages) {
3036 result = flush_descriptor_set(cmd_buffer, s);
3037
3038 /* It had better succeed this time */
3039 assert(result == VK_SUCCESS);
3040 }
3041 }
3042
3043 cmd_buffer->descriptors_dirty &= ~cmd_buffer->pipeline->active_stages;
3044 }
3045
3046 static struct anv_state
3047 anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
3048 uint32_t *a, uint32_t dwords, uint32_t alignment)
3049 {
3050 struct anv_state state;
3051
3052 state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
3053 dwords * 4, alignment);
3054 memcpy(state.map, a, dwords * 4);
3055
3056 VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4));
3057
3058 return state;
3059 }
3060
3061 static struct anv_state
3062 anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
3063 uint32_t *a, uint32_t *b,
3064 uint32_t dwords, uint32_t alignment)
3065 {
3066 struct anv_state state;
3067 uint32_t *p;
3068
3069 state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
3070 dwords * 4, alignment);
3071 p = state.map;
3072 for (uint32_t i = 0; i < dwords; i++)
3073 p[i] = a[i] | b[i];
3074
3075 VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
3076
3077 return state;
3078 }
3079
3080 static VkResult
3081 flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
3082 {
3083 struct anv_device *device = cmd_buffer->device;
3084 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3085 struct anv_state surfaces = { 0, }, samplers = { 0, };
3086 VkResult result;
3087
3088 result = cmd_buffer_emit_samplers(cmd_buffer,
3089 VK_SHADER_STAGE_COMPUTE, &samplers);
3090 if (result != VK_SUCCESS)
3091 return result;
3092 result = cmd_buffer_emit_binding_table(cmd_buffer,
3093 VK_SHADER_STAGE_COMPUTE, &surfaces);
3094 if (result != VK_SUCCESS)
3095 return result;
3096
3097 struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = {
3098 .KernelStartPointer = pipeline->cs_simd,
3099 .KernelStartPointerHigh = 0,
3100 .BindingTablePointer = surfaces.offset,
3101 .BindingTableEntryCount = 0,
3102 .SamplerStatePointer = samplers.offset,
3103 .SamplerCount = 0,
3104 .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
3105 };
3106
3107 uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
3108 struct anv_state state =
3109 anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
3110
3111 GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
3112
3113 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
3114 .InterfaceDescriptorTotalLength = size,
3115 .InterfaceDescriptorDataStartAddress = state.offset);
3116
3117 return VK_SUCCESS;
3118 }
3119
3120 static void
3121 anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
3122 {
3123 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3124 VkResult result;
3125
3126 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
3127
3128 if (cmd_buffer->current_pipeline != GPGPU) {
3129 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
3130 .PipelineSelection = GPGPU);
3131 cmd_buffer->current_pipeline = GPGPU;
3132 }
3133
3134 if (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
3135 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
3136
3137 if ((cmd_buffer->descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
3138 (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) {
3139 result = flush_compute_descriptor_set(cmd_buffer);
3140 if (result != VK_SUCCESS) {
3141 result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
3142 assert(result == VK_SUCCESS);
3143 result = flush_compute_descriptor_set(cmd_buffer);
3144 assert(result == VK_SUCCESS);
3145 }
3146 cmd_buffer->descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE;
3147 }
3148
3149 cmd_buffer->compute_dirty = 0;
3150 }
3151
3152 static void
3153 anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
3154 {
3155 struct anv_pipeline *pipeline = cmd_buffer->pipeline;
3156 uint32_t *p;
3157
3158 uint32_t vb_emit = cmd_buffer->vb_dirty & pipeline->vb_used;
3159
3160 assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
3161
3162 if (cmd_buffer->current_pipeline != _3D) {
3163 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
3164 .PipelineSelection = _3D);
3165 cmd_buffer->current_pipeline = _3D;
3166 }
3167
3168 if (vb_emit) {
3169 const uint32_t num_buffers = __builtin_popcount(vb_emit);
3170 const uint32_t num_dwords = 1 + num_buffers * 4;
3171
3172 p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
3173 GEN8_3DSTATE_VERTEX_BUFFERS);
3174 uint32_t vb, i = 0;
3175 for_each_bit(vb, vb_emit) {
3176 struct anv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer;
3177 uint32_t offset = cmd_buffer->vertex_bindings[vb].offset;
3178
3179 struct GEN8_VERTEX_BUFFER_STATE state = {
3180 .VertexBufferIndex = vb,
3181 .MemoryObjectControlState = GEN8_MOCS,
3182 .AddressModifyEnable = true,
3183 .BufferPitch = pipeline->binding_stride[vb],
3184 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
3185 .BufferSize = buffer->size - offset
3186 };
3187
3188 GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state);
3189 i++;
3190 }
3191 }
3192
3193 if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) {
3194 /* If somebody compiled a pipeline after starting a command buffer the
3195 * scratch bo may have grown since we started this cmd buffer (and
3196 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
3197 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
3198 if (cmd_buffer->scratch_size < pipeline->total_scratch)
3199 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
3200
3201 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
3202 }
3203
3204 if (cmd_buffer->descriptors_dirty)
3205 flush_descriptor_sets(cmd_buffer);
3206
3207 if (cmd_buffer->dirty & ANV_CMD_BUFFER_VP_DIRTY) {
3208 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS,
3209 .ScissorRectPointer = cmd_buffer->vp_state->scissor.offset);
3210 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
3211 .CCViewportPointer = cmd_buffer->vp_state->cc_vp.offset);
3212 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
3213 .SFClipViewportPointer = cmd_buffer->vp_state->sf_clip_vp.offset);
3214 }
3215
3216 if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) {
3217 anv_batch_emit_merge(&cmd_buffer->batch,
3218 cmd_buffer->rs_state->state_sf, pipeline->state_sf);
3219 anv_batch_emit_merge(&cmd_buffer->batch,
3220 cmd_buffer->rs_state->state_raster, pipeline->state_raster);
3221 }
3222
3223 if (cmd_buffer->ds_state &&
3224 (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)))
3225 anv_batch_emit_merge(&cmd_buffer->batch,
3226 cmd_buffer->ds_state->state_wm_depth_stencil,
3227 pipeline->state_wm_depth_stencil);
3228
3229 if (cmd_buffer->dirty & (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) {
3230 struct anv_state state;
3231 if (cmd_buffer->ds_state == NULL)
3232 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
3233 cmd_buffer->cb_state->state_color_calc,
3234 GEN8_COLOR_CALC_STATE_length, 64);
3235 else if (cmd_buffer->cb_state == NULL)
3236 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
3237 cmd_buffer->ds_state->state_color_calc,
3238 GEN8_COLOR_CALC_STATE_length, 64);
3239 else
3240 state = anv_cmd_buffer_merge_dynamic(cmd_buffer,
3241 cmd_buffer->ds_state->state_color_calc,
3242 cmd_buffer->cb_state->state_color_calc,
3243 GEN8_COLOR_CALC_STATE_length, 64);
3244
3245 anv_batch_emit(&cmd_buffer->batch,
3246 GEN8_3DSTATE_CC_STATE_POINTERS,
3247 .ColorCalcStatePointer = state.offset,
3248 .ColorCalcStatePointerValid = true);
3249 }
3250
3251 if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) {
3252 anv_batch_emit_merge(&cmd_buffer->batch,
3253 cmd_buffer->state_vf, pipeline->state_vf);
3254 }
3255
3256 cmd_buffer->vb_dirty &= ~vb_emit;
3257 cmd_buffer->dirty = 0;
3258 }
3259
3260 void anv_CmdDraw(
3261 VkCmdBuffer cmdBuffer,
3262 uint32_t firstVertex,
3263 uint32_t vertexCount,
3264 uint32_t firstInstance,
3265 uint32_t instanceCount)
3266 {
3267 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3268
3269 anv_cmd_buffer_flush_state(cmd_buffer);
3270
3271 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3272 .VertexAccessType = SEQUENTIAL,
3273 .VertexCountPerInstance = vertexCount,
3274 .StartVertexLocation = firstVertex,
3275 .InstanceCount = instanceCount,
3276 .StartInstanceLocation = firstInstance,
3277 .BaseVertexLocation = 0);
3278 }
3279
3280 void anv_CmdDrawIndexed(
3281 VkCmdBuffer cmdBuffer,
3282 uint32_t firstIndex,
3283 uint32_t indexCount,
3284 int32_t vertexOffset,
3285 uint32_t firstInstance,
3286 uint32_t instanceCount)
3287 {
3288 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3289
3290 anv_cmd_buffer_flush_state(cmd_buffer);
3291
3292 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3293 .VertexAccessType = RANDOM,
3294 .VertexCountPerInstance = indexCount,
3295 .StartVertexLocation = firstIndex,
3296 .InstanceCount = instanceCount,
3297 .StartInstanceLocation = firstInstance,
3298 .BaseVertexLocation = vertexOffset);
3299 }
3300
3301 static void
3302 anv_batch_lrm(struct anv_batch *batch,
3303 uint32_t reg, struct anv_bo *bo, uint32_t offset)
3304 {
3305 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
3306 .RegisterAddress = reg,
3307 .MemoryAddress = { bo, offset });
3308 }
3309
3310 static void
3311 anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
3312 {
3313 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM,
3314 .RegisterOffset = reg,
3315 .DataDWord = imm);
3316 }
3317
3318 /* Auto-Draw / Indirect Registers */
3319 #define GEN7_3DPRIM_END_OFFSET 0x2420
3320 #define GEN7_3DPRIM_START_VERTEX 0x2430
3321 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
3322 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
3323 #define GEN7_3DPRIM_START_INSTANCE 0x243C
3324 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
3325
3326 void anv_CmdDrawIndirect(
3327 VkCmdBuffer cmdBuffer,
3328 VkBuffer _buffer,
3329 VkDeviceSize offset,
3330 uint32_t count,
3331 uint32_t stride)
3332 {
3333 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3334 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
3335 struct anv_bo *bo = buffer->bo;
3336 uint32_t bo_offset = buffer->offset + offset;
3337
3338 anv_cmd_buffer_flush_state(cmd_buffer);
3339
3340 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
3341 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
3342 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
3343 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
3344 anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
3345
3346 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3347 .IndirectParameterEnable = true,
3348 .VertexAccessType = SEQUENTIAL);
3349 }
3350
3351 void anv_CmdDrawIndexedIndirect(
3352 VkCmdBuffer cmdBuffer,
3353 VkBuffer _buffer,
3354 VkDeviceSize offset,
3355 uint32_t count,
3356 uint32_t stride)
3357 {
3358 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3359 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
3360 struct anv_bo *bo = buffer->bo;
3361 uint32_t bo_offset = buffer->offset + offset;
3362
3363 anv_cmd_buffer_flush_state(cmd_buffer);
3364
3365 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
3366 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
3367 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
3368 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
3369 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
3370
3371 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3372 .IndirectParameterEnable = true,
3373 .VertexAccessType = RANDOM);
3374 }
3375
3376 void anv_CmdDispatch(
3377 VkCmdBuffer cmdBuffer,
3378 uint32_t x,
3379 uint32_t y,
3380 uint32_t z)
3381 {
3382 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3383 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3384 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
3385
3386 anv_cmd_buffer_flush_compute_state(cmd_buffer);
3387
3388 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
3389 .SIMDSize = prog_data->simd_size / 16,
3390 .ThreadDepthCounterMaximum = 0,
3391 .ThreadHeightCounterMaximum = 0,
3392 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
3393 .ThreadGroupIDXDimension = x,
3394 .ThreadGroupIDYDimension = y,
3395 .ThreadGroupIDZDimension = z,
3396 .RightExecutionMask = pipeline->cs_right_mask,
3397 .BottomExecutionMask = 0xffffffff);
3398
3399 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
3400 }
3401
3402 #define GPGPU_DISPATCHDIMX 0x2500
3403 #define GPGPU_DISPATCHDIMY 0x2504
3404 #define GPGPU_DISPATCHDIMZ 0x2508
3405
3406 void anv_CmdDispatchIndirect(
3407 VkCmdBuffer cmdBuffer,
3408 VkBuffer _buffer,
3409 VkDeviceSize offset)
3410 {
3411 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3412 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
3413 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3414 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
3415 struct anv_bo *bo = buffer->bo;
3416 uint32_t bo_offset = buffer->offset + offset;
3417
3418 anv_cmd_buffer_flush_compute_state(cmd_buffer);
3419
3420 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
3421 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
3422 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
3423
3424 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
3425 .IndirectParameterEnable = true,
3426 .SIMDSize = prog_data->simd_size / 16,
3427 .ThreadDepthCounterMaximum = 0,
3428 .ThreadHeightCounterMaximum = 0,
3429 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
3430 .RightExecutionMask = pipeline->cs_right_mask,
3431 .BottomExecutionMask = 0xffffffff);
3432
3433 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
3434 }
3435
3436 void anv_CmdSetEvent(
3437 VkCmdBuffer cmdBuffer,
3438 VkEvent event,
3439 VkPipeEvent pipeEvent)
3440 {
3441 stub();
3442 }
3443
3444 void anv_CmdResetEvent(
3445 VkCmdBuffer cmdBuffer,
3446 VkEvent event,
3447 VkPipeEvent pipeEvent)
3448 {
3449 stub();
3450 }
3451
3452 void anv_CmdWaitEvents(
3453 VkCmdBuffer cmdBuffer,
3454 VkWaitEvent waitEvent,
3455 uint32_t eventCount,
3456 const VkEvent* pEvents,
3457 VkPipeEventFlags pipeEventMask,
3458 uint32_t memBarrierCount,
3459 const void* const* ppMemBarriers)
3460 {
3461 stub();
3462 }
3463
3464 void anv_CmdPipelineBarrier(
3465 VkCmdBuffer cmdBuffer,
3466 VkWaitEvent waitEvent,
3467 VkPipeEventFlags pipeEventMask,
3468 uint32_t memBarrierCount,
3469 const void* const* ppMemBarriers)
3470 {
3471 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3472 uint32_t b, *dw;
3473
3474 struct GEN8_PIPE_CONTROL cmd = {
3475 GEN8_PIPE_CONTROL_header,
3476 .PostSyncOperation = NoWrite,
3477 };
3478
3479 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
3480
3481 if (anv_clear_mask(&pipeEventMask, VK_PIPE_EVENT_TOP_OF_PIPE_BIT)) {
3482 /* This is just what PIPE_CONTROL does */
3483 }
3484
3485 if (anv_clear_mask(&pipeEventMask,
3486 VK_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE_BIT |
3487 VK_PIPE_EVENT_LOCAL_FRAGMENT_PROCESSING_COMPLETE_BIT |
3488 VK_PIPE_EVENT_FRAGMENT_PROCESSING_COMPLETE_BIT)) {
3489 cmd.StallAtPixelScoreboard = true;
3490 }
3491
3492
3493 if (anv_clear_mask(&pipeEventMask,
3494 VK_PIPE_EVENT_GRAPHICS_PIPELINE_COMPLETE_BIT |
3495 VK_PIPE_EVENT_COMPUTE_PIPELINE_COMPLETE_BIT |
3496 VK_PIPE_EVENT_TRANSFER_COMPLETE_BIT |
3497 VK_PIPE_EVENT_COMMANDS_COMPLETE_BIT)) {
3498 cmd.CommandStreamerStallEnable = true;
3499 }
3500
3501 if (anv_clear_mask(&pipeEventMask, VK_PIPE_EVENT_CPU_SIGNAL_BIT)) {
3502 anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT");
3503 }
3504
3505 /* We checked all known VkPipeEventFlags. */
3506 anv_assert(pipeEventMask == 0);
3507
3508 /* XXX: Right now, we're really dumb and just flush whatever categories
3509 * the app asks for. One of these days we may make this a bit better
3510 * but right now that's all the hardware allows for in most areas.
3511 */
3512 VkMemoryOutputFlags out_flags = 0;
3513 VkMemoryInputFlags in_flags = 0;
3514
3515 for (uint32_t i = 0; i < memBarrierCount; i++) {
3516 const struct anv_common *common = ppMemBarriers[i];
3517 switch (common->sType) {
3518 case VK_STRUCTURE_TYPE_MEMORY_BARRIER: {
3519 const VkMemoryBarrier *barrier = (VkMemoryBarrier *)common;
3520 out_flags |= barrier->outputMask;
3521 in_flags |= barrier->inputMask;
3522 break;
3523 }
3524 case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: {
3525 const VkBufferMemoryBarrier *barrier = (VkBufferMemoryBarrier *)common;
3526 out_flags |= barrier->outputMask;
3527 in_flags |= barrier->inputMask;
3528 break;
3529 }
3530 case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: {
3531 const VkImageMemoryBarrier *barrier = (VkImageMemoryBarrier *)common;
3532 out_flags |= barrier->outputMask;
3533 in_flags |= barrier->inputMask;
3534 break;
3535 }
3536 default:
3537 unreachable("Invalid memory barrier type");
3538 }
3539 }
3540
3541 for_each_bit(b, out_flags) {
3542 switch ((VkMemoryOutputFlags)(1 << b)) {
3543 case VK_MEMORY_OUTPUT_HOST_WRITE_BIT:
3544 break; /* FIXME: Little-core systems */
3545 case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT:
3546 cmd.DCFlushEnable = true;
3547 break;
3548 case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT:
3549 cmd.RenderTargetCacheFlushEnable = true;
3550 break;
3551 case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
3552 cmd.DepthCacheFlushEnable = true;
3553 break;
3554 case VK_MEMORY_OUTPUT_TRANSFER_BIT:
3555 cmd.RenderTargetCacheFlushEnable = true;
3556 cmd.DepthCacheFlushEnable = true;
3557 break;
3558 default:
3559 unreachable("Invalid memory output flag");
3560 }
3561 }
3562
3563 for_each_bit(b, out_flags) {
3564 switch ((VkMemoryInputFlags)(1 << b)) {
3565 case VK_MEMORY_INPUT_HOST_READ_BIT:
3566 break; /* FIXME: Little-core systems */
3567 case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT:
3568 case VK_MEMORY_INPUT_INDEX_FETCH_BIT:
3569 case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT:
3570 cmd.VFCacheInvalidationEnable = true;
3571 break;
3572 case VK_MEMORY_INPUT_UNIFORM_READ_BIT:
3573 cmd.ConstantCacheInvalidationEnable = true;
3574 /* fallthrough */
3575 case VK_MEMORY_INPUT_SHADER_READ_BIT:
3576 cmd.DCFlushEnable = true;
3577 cmd.TextureCacheInvalidationEnable = true;
3578 break;
3579 case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT:
3580 case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
3581 break; /* XXX: Hunh? */
3582 case VK_MEMORY_INPUT_TRANSFER_BIT:
3583 cmd.TextureCacheInvalidationEnable = true;
3584 break;
3585 }
3586 }
3587
3588 dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length);
3589 GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd);
3590 }
3591
3592 static void
3593 anv_framebuffer_destroy(struct anv_device *device,
3594 struct anv_object *object,
3595 VkObjectType obj_type)
3596 {
3597 struct anv_framebuffer *fb = (struct anv_framebuffer *)object;
3598
3599 assert(obj_type == VK_OBJECT_TYPE_FRAMEBUFFER);
3600
3601 anv_DestroyObject(anv_device_to_handle(device),
3602 VK_OBJECT_TYPE_DYNAMIC_VP_STATE,
3603 fb->vp_state);
3604
3605 anv_device_free(device, fb);
3606 }
3607
3608 VkResult anv_CreateFramebuffer(
3609 VkDevice _device,
3610 const VkFramebufferCreateInfo* pCreateInfo,
3611 VkFramebuffer* pFramebuffer)
3612 {
3613 ANV_FROM_HANDLE(anv_device, device, _device);
3614 struct anv_framebuffer *framebuffer;
3615
3616 static const struct anv_depth_stencil_view null_view =
3617 { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 };
3618
3619 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3620
3621 framebuffer = anv_device_alloc(device, sizeof(*framebuffer), 8,
3622 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
3623 if (framebuffer == NULL)
3624 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3625
3626 framebuffer->base.destructor = anv_framebuffer_destroy;
3627
3628 framebuffer->color_attachment_count = pCreateInfo->colorAttachmentCount;
3629 for (uint32_t i = 0; i < pCreateInfo->colorAttachmentCount; i++) {
3630 framebuffer->color_attachments[i] =
3631 (struct anv_surface_view *) pCreateInfo->pColorAttachments[i].view;
3632 }
3633
3634 if (pCreateInfo->pDepthStencilAttachment) {
3635 framebuffer->depth_stencil =
3636 anv_depth_stencil_view_from_handle(pCreateInfo->pDepthStencilAttachment->view);
3637 } else {
3638 framebuffer->depth_stencil = &null_view;
3639 }
3640
3641 framebuffer->sample_count = pCreateInfo->sampleCount;
3642 framebuffer->width = pCreateInfo->width;
3643 framebuffer->height = pCreateInfo->height;
3644 framebuffer->layers = pCreateInfo->layers;
3645
3646 anv_CreateDynamicViewportState(anv_device_to_handle(device),
3647 &(VkDynamicVpStateCreateInfo) {
3648 .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO,
3649 .viewportAndScissorCount = 1,
3650 .pViewports = (VkViewport[]) {
3651 {
3652 .originX = 0,
3653 .originY = 0,
3654 .width = pCreateInfo->width,
3655 .height = pCreateInfo->height,
3656 .minDepth = 0,
3657 .maxDepth = 1
3658 },
3659 },
3660 .pScissors = (VkRect2D[]) {
3661 { { 0, 0 },
3662 { pCreateInfo->width, pCreateInfo->height } },
3663 }
3664 },
3665 &framebuffer->vp_state);
3666
3667 *pFramebuffer = anv_framebuffer_to_handle(framebuffer);
3668
3669 return VK_SUCCESS;
3670 }
3671
3672 VkResult anv_CreateRenderPass(
3673 VkDevice _device,
3674 const VkRenderPassCreateInfo* pCreateInfo,
3675 VkRenderPass* pRenderPass)
3676 {
3677 ANV_FROM_HANDLE(anv_device, device, _device);
3678 struct anv_render_pass *pass;
3679 size_t size;
3680
3681 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
3682
3683 size = sizeof(*pass) +
3684 pCreateInfo->layers * sizeof(struct anv_render_pass_layer);
3685 pass = anv_device_alloc(device, size, 8,
3686 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
3687 if (pass == NULL)
3688 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3689
3690 pass->render_area = pCreateInfo->renderArea;
3691
3692 pass->num_layers = pCreateInfo->layers;
3693
3694 pass->num_clear_layers = 0;
3695 for (uint32_t i = 0; i < pCreateInfo->layers; i++) {
3696 pass->layers[i].color_load_op = pCreateInfo->pColorLoadOps[i];
3697 pass->layers[i].clear_color = pCreateInfo->pColorLoadClearValues[i];
3698 if (pass->layers[i].color_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
3699 pass->num_clear_layers++;
3700 }
3701
3702 *pRenderPass = anv_render_pass_to_handle(pass);
3703
3704 return VK_SUCCESS;
3705 }
3706
3707 VkResult anv_GetRenderAreaGranularity(
3708 VkDevice device,
3709 VkRenderPass renderPass,
3710 VkExtent2D* pGranularity)
3711 {
3712 *pGranularity = (VkExtent2D) { 1, 1 };
3713
3714 return VK_SUCCESS;
3715 }
3716
3717 static void
3718 anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
3719 struct anv_render_pass *pass)
3720 {
3721 const struct anv_depth_stencil_view *view =
3722 cmd_buffer->framebuffer->depth_stencil;
3723
3724 /* FIXME: Implement the PMA stall W/A */
3725 /* FIXME: Width and Height are wrong */
3726
3727 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER,
3728 .SurfaceType = SURFTYPE_2D,
3729 .DepthWriteEnable = view->depth_stride > 0,
3730 .StencilWriteEnable = view->stencil_stride > 0,
3731 .HierarchicalDepthBufferEnable = false,
3732 .SurfaceFormat = view->depth_format,
3733 .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0,
3734 .SurfaceBaseAddress = { view->bo, view->depth_offset },
3735 .Height = pass->render_area.extent.height - 1,
3736 .Width = pass->render_area.extent.width - 1,
3737 .LOD = 0,
3738 .Depth = 1 - 1,
3739 .MinimumArrayElement = 0,
3740 .DepthBufferObjectControlState = GEN8_MOCS,
3741 .RenderTargetViewExtent = 1 - 1,
3742 .SurfaceQPitch = view->depth_qpitch >> 2);
3743
3744 /* Disable hierarchial depth buffers. */
3745 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER);
3746
3747 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER,
3748 .StencilBufferEnable = view->stencil_stride > 0,
3749 .StencilBufferObjectControlState = GEN8_MOCS,
3750 .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0,
3751 .SurfaceBaseAddress = { view->bo, view->stencil_offset },
3752 .SurfaceQPitch = view->stencil_qpitch >> 2);
3753
3754 /* Clear the clear params. */
3755 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS);
3756 }
3757
3758 void anv_CmdPushConstants(
3759 VkCmdBuffer cmdBuffer,
3760 VkPipelineLayout layout,
3761 VkShaderStageFlags stageFlags,
3762 uint32_t start,
3763 uint32_t length,
3764 const void* values)
3765 {
3766 stub();
3767 }
3768
3769 void anv_CmdBeginRenderPass(
3770 VkCmdBuffer cmdBuffer,
3771 const VkRenderPassBegin* pRenderPassBegin)
3772 {
3773 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3774 ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass);
3775 ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
3776
3777 assert(pRenderPassBegin->contents == VK_RENDER_PASS_CONTENTS_INLINE);
3778
3779 cmd_buffer->framebuffer = framebuffer;
3780
3781 cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
3782
3783 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE,
3784 .ClippedDrawingRectangleYMin = pass->render_area.offset.y,
3785 .ClippedDrawingRectangleXMin = pass->render_area.offset.x,
3786 .ClippedDrawingRectangleYMax =
3787 pass->render_area.offset.y + pass->render_area.extent.height - 1,
3788 .ClippedDrawingRectangleXMax =
3789 pass->render_area.offset.x + pass->render_area.extent.width - 1,
3790 .DrawingRectangleOriginY = 0,
3791 .DrawingRectangleOriginX = 0);
3792
3793 anv_cmd_buffer_emit_depth_stencil(cmd_buffer, pass);
3794
3795 anv_cmd_buffer_clear(cmd_buffer, pass);
3796 }
3797
3798 void anv_CmdEndRenderPass(
3799 VkCmdBuffer cmdBuffer)
3800 {
3801 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
3802
3803 /* Emit a flushing pipe control at the end of a pass. This is kind of a
3804 * hack but it ensures that render targets always actually get written.
3805 * Eventually, we should do flushing based on image format transitions
3806 * or something of that nature.
3807 */
3808 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
3809 .PostSyncOperation = NoWrite,
3810 .RenderTargetCacheFlushEnable = true,
3811 .InstructionCacheInvalidateEnable = true,
3812 .DepthCacheFlushEnable = true,
3813 .VFCacheInvalidationEnable = true,
3814 .TextureCacheInvalidationEnable = true,
3815 .CommandStreamerStallEnable = true);
3816 }
3817
3818 void anv_CmdExecuteCommands(
3819 VkCmdBuffer cmdBuffer,
3820 uint32_t cmdBuffersCount,
3821 const VkCmdBuffer* pCmdBuffers)
3822 {
3823 stub();
3824 }
3825
3826 void vkCmdDbgMarkerBegin(
3827 VkCmdBuffer cmdBuffer,
3828 const char* pMarker)
3829 __attribute__ ((visibility ("default")));
3830
3831 void vkCmdDbgMarkerEnd(
3832 VkCmdBuffer cmdBuffer)
3833 __attribute__ ((visibility ("default")));
3834
3835 VkResult vkDbgSetObjectTag(
3836 VkDevice device,
3837 VkObject object,
3838 size_t tagSize,
3839 const void* pTag)
3840 __attribute__ ((visibility ("default")));
3841
3842
3843 void vkCmdDbgMarkerBegin(
3844 VkCmdBuffer cmdBuffer,
3845 const char* pMarker)
3846 {
3847 }
3848
3849 void vkCmdDbgMarkerEnd(
3850 VkCmdBuffer cmdBuffer)
3851 {
3852 }
3853
3854 VkResult vkDbgSetObjectTag(
3855 VkDevice device,
3856 VkObject object,
3857 size_t tagSize,
3858 const void* pTag)
3859 {
3860 return VK_SUCCESS;
3861 }