From: Chia-I Wu Date: Sat, 20 Jun 2015 15:27:08 +0000 (+0800) Subject: ilo: add ilo_state_compute X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=57bdcae9e0fbf639014cd375543a8dd356406ac0;p=mesa.git ilo: add ilo_state_compute Replace gen6_idrt_data with ilo_state_compute, which has a bunch of validations and is now preferred. --- diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index e5a0950dc7c..95b6b7a7b16 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -21,6 +21,8 @@ C_SOURCES := \ core/ilo_image.h \ core/ilo_state_cc.c \ core/ilo_state_cc.h \ + core/ilo_state_compute.c \ + core/ilo_state_compute.h \ core/ilo_state_raster.c \ core/ilo_state_raster.h \ core/ilo_state_sampler.c \ diff --git a/src/gallium/drivers/ilo/core/ilo_builder_media.h b/src/gallium/drivers/ilo/core/ilo_builder_media.h index 7fbe6d41635..7197104a23e 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_media.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_media.h @@ -29,57 +29,30 @@ #define ILO_BUILDER_MEDIA_H #include "genhw/genhw.h" -#include "../ilo_shader.h" #include "intel_winsys.h" #include "ilo_core.h" #include "ilo_dev.h" +#include "ilo_state_compute.h" #include "ilo_builder.h" -struct gen6_idrt_data { - const struct ilo_shader_state *cs; - - uint32_t sampler_offset; - uint32_t binding_table_offset; - - unsigned curbe_size; - unsigned thread_group_size; -}; - static inline void gen6_MEDIA_VFE_STATE(struct ilo_builder *builder, - unsigned curbe_alloc, bool use_slm) + const struct ilo_state_compute *compute) { const uint8_t cmd_len = 8; - const unsigned idrt_alloc = - ((ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) ? 64 : 32) * 32; - int max_threads; uint32_t *dw; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); - - max_threads = builder->dev->thread_count; - - curbe_alloc = align(curbe_alloc, 32); - assert(idrt_alloc + curbe_alloc <= builder->dev->urb_size / (use_slm + 1)); + ILO_DEV_ASSERT(builder->dev, 6, 7.5); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_VFE_STATE) | (cmd_len - 2); - dw[1] = 0; /* scratch */ - - dw[2] = (max_threads - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT | - 0 << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT | - GEN6_VFE_DW2_RESET_GATEWAY_TIMER | - GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL; - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) - dw[2] |= GEN7_VFE_DW2_GPGPU_MODE; - + /* see compute_set_gen6_MEDIA_VFE_STATE() */ + dw[1] = compute->vfe[0]; + dw[2] = compute->vfe[1]; dw[3] = 0; - - dw[4] = 0 << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT | - (curbe_alloc / 32); - + dw[4] = compute->vfe[2]; dw[5] = 0; dw[6] = 0; dw[7] = 0; @@ -194,8 +167,10 @@ gen7_GPGPU_WALKER(struct ilo_builder *builder, static inline uint32_t gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder, - const struct gen6_idrt_data *data, - int idrt_count) + const struct ilo_state_compute *compute, + const uint32_t *kernel_offsets, + const uint32_t *sampler_offsets, + const uint32_t *binding_table_offsets) { /* * From the Sandy Bridge PRM, volume 2 part 2, page 34: @@ -211,61 +186,26 @@ gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder, * aligned address of the Interface Descriptor data." */ const int state_align = 32; - const int state_len = (32 / 4) * idrt_count; + const int state_len = (32 / 4) * compute->idrt_count; uint32_t state_offset, *dw; int i; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + ILO_DEV_ASSERT(builder->dev, 6, 7.5); state_offset = ilo_builder_dynamic_pointer(builder, ILO_BUILDER_ITEM_INTERFACE_DESCRIPTOR, state_align, state_len, &dw); - for (i = 0; i < idrt_count; i++) { - const struct gen6_idrt_data *idrt = &data[i]; - const struct ilo_shader_state *cs = idrt->cs; - unsigned sampler_count, bt_size, slm_size; - - sampler_count = - ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT); - assert(sampler_count <= 16); - sampler_count = (sampler_count + 3) / 4; - - bt_size = - ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT); - if (bt_size > 31) - bt_size = 31; - - slm_size = ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE); - - assert(idrt->curbe_size / 32 <= 63); - - dw[0] = ilo_shader_get_kernel_offset(idrt->cs); + for (i = 0; i < compute->idrt_count; i++) { + /* see compute_set_gen6_INTERFACE_DESCRIPTOR_DATA() */ + dw[0] = compute->idrt[i][0] + kernel_offsets[i]; dw[1] = 0; - dw[2] = idrt->sampler_offset | - sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT; - dw[3] = idrt->binding_table_offset | - bt_size << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT; - - dw[4] = (idrt->curbe_size / 32) << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT | - 0 << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT; - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { - dw[5] = GEN7_IDRT_DW5_ROUNDING_MODE_RTNE; - - if (slm_size) { - assert(slm_size <= 64 * 1024); - slm_size = util_next_power_of_two((slm_size + 4095) / 4096); - - dw[5] |= GEN7_IDRT_DW5_BARRIER_ENABLE | - slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT | - idrt->thread_group_size << - GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT; - } - } else { - dw[5] = 0; - } - - dw[6] = 0; + dw[2] = compute->idrt[i][1] | + sampler_offsets[i]; + dw[3] = compute->idrt[i][2] | + binding_table_offsets[i]; + dw[4] = compute->idrt[i][3]; + dw[5] = compute->idrt[i][4]; + dw[6] = compute->idrt[i][5]; dw[7] = 0; dw += 8; diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.c b/src/gallium/drivers/ilo/core/ilo_state_compute.c new file mode 100644 index 00000000000..a5fe5e1a6b0 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_compute.c @@ -0,0 +1,435 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "ilo_debug.h" +#include "ilo_state_compute.h" + +struct compute_urb_configuration { + int idrt_entry_count; + int curbe_entry_count; + + int urb_entry_count; + /* in 256-bit register increments */ + int urb_entry_size; +}; + +static int +get_gen6_rob_entry_count(const struct ilo_dev *dev) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Ivy Bridge PRM, volume 2 part 2, page 60: + * + * "ROB has 64KB of storage; 2048 entries." + * + * From the valid ranges of "CURBE Allocation Size", we can also conclude + * that interface entries and CURBE data must be in ROB. And that ROB + * should be 16KB, or 512 entries, on Gen7 GT1. + */ + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) + return 2048; + else if (ilo_dev_gen(dev) >= ILO_GEN(7)) + return (dev->gt == 2) ? 2048 : 512; + else + return (dev->gt == 2) ? 2048 : 1024; +} + +static int +get_gen6_idrt_entry_count(const struct ilo_dev *dev) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Ivy Bridge PRM, volume 2 part 2, page 21: + * + * "The first 32 URB entries are reserved for the interface + * descriptor..." + * + * From the Haswell PRM, volume 7, page 836: + * + * "The first 64 URB entries are reserved for the interface + * description..." + */ + return (ilo_dev_gen(dev) >= ILO_GEN(7.5)) ? 64 : 32; +} + +static int +get_gen6_curbe_entry_count(const struct ilo_dev *dev, uint32_t curbe_size) +{ + /* + * From the Ivy Bridge PRM, volume 2 part 2, page 21: + * + * "(CURBE Allocation Size) Specifies the total length allocated for + * CURBE, in 256-bit register increments. + */ + const int entry_count = (curbe_size + 31) / 32; + + ILO_DEV_ASSERT(dev, 6, 8); + + assert(get_gen6_idrt_entry_count(dev) + entry_count <= + get_gen6_rob_entry_count(dev)); + + return entry_count; +} + +static bool +compute_get_gen6_urb_configuration(const struct ilo_dev *dev, + const struct ilo_state_compute_info *info, + struct compute_urb_configuration *urb) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + urb->idrt_entry_count = get_gen6_idrt_entry_count(dev); + urb->curbe_entry_count = + get_gen6_curbe_entry_count(dev, info->curbe_alloc_size); + + /* + * From the Broadwell PRM, volume 2b, page 451: + * + * "Please note that 0 is not allowed for this field (Number of URB + * Entries)." + */ + urb->urb_entry_count = (ilo_dev_gen(dev) >= ILO_GEN(8)) ? 1 : 0; + + /* + * From the Ivy Bridge PRM, volume 2 part 2, page 52: + * + * "(URB Entry Allocation Size) Specifies the length of each URB entry + * used by the unit, in 256-bit register increments - 1." + */ + urb->urb_entry_size = 1; + + /* + * From the Ivy Bridge PRM, volume 2 part 2, page 22: + * + * MEDIA_VFE_STATE specifies the amount of CURBE space, the URB handle + * size and the number of URB handles. The driver must ensure that + * ((URB_handle_size * URB_num_handle) - CURBE - 32) <= + * URB_allocation_in_L3." + */ + assert(urb->idrt_entry_count + urb->curbe_entry_count + + urb->urb_entry_count * urb->urb_entry_size <= + info->cv_urb_alloc_size / 32); + + return true; +} + +static int +compute_interface_get_gen6_read_end(const struct ilo_dev *dev, + const struct ilo_state_compute_interface_info *interface) +{ + const int per_thread_read = (interface->curbe_read_length + 31) / 32; + const int cross_thread_read = + (interface->cross_thread_curbe_read_length + 31) / 32; + + ILO_DEV_ASSERT(dev, 6, 8); + + assert(interface->curbe_read_offset % 32 == 0); + + /* + * From the Ivy Bridge PRM, volume 2 part 2, page 60: + * + * "(Constant URB Entry Read Length) [0,63]" + */ + assert(per_thread_read <= 63); + + /* From the Haswell PRM, volume 2d, page 199: + * + * "(Cross-Thread Constant Data Read Length) [0,127]" + */ + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) + assert(cross_thread_read <= 127); + else + assert(!cross_thread_read); + + if (per_thread_read || cross_thread_read) { + return interface->curbe_read_offset / 32 + cross_thread_read + + per_thread_read * interface->thread_group_size; + } else { + return 0; + } +} + +static bool +compute_validate_gen6(const struct ilo_dev *dev, + const struct ilo_state_compute_info *info, + const struct compute_urb_configuration *urb) +{ + int min_curbe_entry_count; + uint8_t i; + + ILO_DEV_ASSERT(dev, 6, 8); + + assert(info->interface_count <= urb->idrt_entry_count); + + min_curbe_entry_count = 0; + for (i = 0; i < info->interface_count; i++) { + const int read_end = + compute_interface_get_gen6_read_end(dev, &info->interfaces[i]); + + if (min_curbe_entry_count < read_end) + min_curbe_entry_count = read_end; + } + + assert(min_curbe_entry_count <= urb->curbe_entry_count); + + /* + * From the Broadwell PRM, volume 2b, page 452: + * + * "CURBE Allocation Size should be 0 for GPGPU workloads that uses + * indirect instead of CURBE." + */ + if (!min_curbe_entry_count) + assert(!urb->curbe_entry_count); + + return true; +} + +static uint8_t +compute_get_gen6_scratch_space(const struct ilo_dev *dev, + const struct ilo_state_compute_info *info) +{ + uint32_t scratch_size = 0; + uint8_t i; + + ILO_DEV_ASSERT(dev, 6, 8); + + for (i = 0; i < info->interface_count; i++) { + if (scratch_size < info->interfaces[i].scratch_size) + scratch_size = info->interfaces[i].scratch_size; + } + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + assert(scratch_size <= 2 * 1024 * 1024); + + /* next power of two, starting from 1KB */ + return (scratch_size > 1024) ? + (util_last_bit(scratch_size - 1) - 10): 0; + } else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { + assert(scratch_size <= 2 * 1024 * 1024); + + /* next power of two, starting from 2KB */ + return (scratch_size > 2048) ? + (util_last_bit(scratch_size - 1) - 11): 0; + } else { + assert(scratch_size <= 12 * 1024); + + return (scratch_size > 1024) ? + (scratch_size - 1) / 1024 : 0; + } +} + +static bool +compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute, + const struct ilo_dev *dev, + const struct ilo_state_compute_info *info) +{ + struct compute_urb_configuration urb; + uint8_t scratch_space; + + uint32_t dw1, dw2, dw4; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (!compute_get_gen6_urb_configuration(dev, info, &urb) || + !compute_validate_gen6(dev, info, &urb)) + return false; + + scratch_space = compute_get_gen6_scratch_space(dev, info); + + dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT; + dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT | + urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT | + GEN6_VFE_DW2_RESET_GATEWAY_TIMER | + GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL; + + if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5)) + dw2 |= GEN7_VFE_DW2_GPGPU_MODE; + + assert(urb.urb_entry_size); + + dw4 = (urb.urb_entry_size - 1) << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT | + urb.curbe_entry_count << GEN6_VFE_DW4_CURBE_SIZE__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(compute->vfe) >= 3); + compute->vfe[0] = dw1; + compute->vfe[1] = dw2; + compute->vfe[2] = dw4; + + return true; +} + +static uint8_t +compute_interface_get_gen6_sampler_count(const struct ilo_dev *dev, + const struct ilo_state_compute_interface_info *interface) +{ + ILO_DEV_ASSERT(dev, 6, 8); + return (interface->sampler_count <= 12) ? + (interface->sampler_count + 3) / 4 : 4; +} + +static uint8_t +compute_interface_get_gen6_surface_count(const struct ilo_dev *dev, + const struct ilo_state_compute_interface_info *interface) +{ + ILO_DEV_ASSERT(dev, 6, 8); + return (interface->surface_count <= 31) ? interface->surface_count : 31; +} + +static uint8_t +compute_interface_get_gen7_slm_size(const struct ilo_dev *dev, + const struct ilo_state_compute_interface_info *interface) +{ + ILO_DEV_ASSERT(dev, 7, 8); + + /* + * From the Ivy Bridge PRM, volume 2 part 2, page 61: + * + * "The amount is specified in 4k blocks, but only powers of 2 are + * allowed: 0, 4k, 8k, 16k, 32k and 64k per half-slice." + */ + assert(interface->slm_size <= 64 * 1024); + + return util_next_power_of_two((interface->slm_size + 4095) / 4096); +} + +static bool +compute_set_gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_state_compute *compute, + const struct ilo_dev *dev, + const struct ilo_state_compute_info *info) +{ + uint8_t i; + + ILO_DEV_ASSERT(dev, 6, 8); + + for (i = 0; i < info->interface_count; i++) { + const struct ilo_state_compute_interface_info *interface = + &info->interfaces[i]; + uint16_t read_offset, per_thread_read_len, cross_thread_read_len; + uint8_t sampler_count, surface_count; + uint32_t dw0, dw2, dw3, dw4, dw5, dw6; + + assert(interface->kernel_offset % 64 == 0); + assert(interface->thread_group_size); + + read_offset = interface->curbe_read_offset / 32; + per_thread_read_len = (interface->curbe_read_length + 31) / 32; + cross_thread_read_len = + (interface->cross_thread_curbe_read_length + 31) / 32; + + sampler_count = + compute_interface_get_gen6_sampler_count(dev, interface); + surface_count = + compute_interface_get_gen6_surface_count(dev, interface); + + dw0 = interface->kernel_offset; + dw2 = sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT; + dw3 = surface_count << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT; + dw4 = per_thread_read_len << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT | + read_offset << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT; + + dw5 = 0; + dw6 = 0; + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + const uint8_t slm_size = + compute_interface_get_gen7_slm_size(dev, interface); + + dw5 |= GEN7_IDRT_DW5_ROUNDING_MODE_RTNE; + + if (slm_size) { + dw5 |= GEN7_IDRT_DW5_BARRIER_ENABLE | + slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT; + } + + /* + * From the Haswell PRM, volume 2d, page 199: + * + * "(Number of Threads in GPGPU Thread Group) Specifies the + * number of threads that are in this thread group. Used to + * program the barrier for the number of messages to expect. The + * minimum value is 0 (which will disable the barrier), while + * the maximum value is the number of threads in a subslice for + * local barriers." + * + * From the Broadwell PRM, volume 2d, page 183: + * + * "(Number of Threads in GPGPU Thread Group) Specifies the + * number of threads that are in this thread group. The minimum + * value is 1, while the maximum value is the number of threads + * in a subslice for local barriers. See vol1b Configurations + * for the number of threads per subslice for different + * products. The maximum value for global barriers is limited + * by the number of threads in the system, or by 511, whichever + * is lower. This field should not be set to 0 even if the + * barrier is disabled, since an accurate value is needed for + * proper pre-emption." + */ + if (slm_size || ilo_dev_gen(dev) >= ILO_GEN(8)) { + dw5 |= interface->thread_group_size << + GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT; + } + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { + dw6 |= cross_thread_read_len << + GEN75_IDRT_DW6_CROSS_THREAD_CURBE_READ_LEN__SHIFT; + } + } + + STATIC_ASSERT(ARRAY_SIZE(compute->idrt[i]) >= 6); + compute->idrt[i][0] = dw0; + compute->idrt[i][1] = dw2; + compute->idrt[i][2] = dw3; + compute->idrt[i][3] = dw4; + compute->idrt[i][4] = dw5; + compute->idrt[i][5] = dw6; + } + + return true; +} + +bool +ilo_state_compute_init(struct ilo_state_compute *compute, + const struct ilo_dev *dev, + const struct ilo_state_compute_info *info) +{ + bool ret = true; + + assert(ilo_is_zeroed(compute, sizeof(*compute))); + assert(ilo_is_zeroed(info->data, info->data_size)); + + assert(ilo_state_compute_data_size(dev, info->interface_count) <= + info->data_size); + compute->idrt = (uint32_t (*)[6]) info->data; + + ret &= compute_set_gen6_MEDIA_VFE_STATE(compute, dev, info); + ret &= compute_set_gen6_INTERFACE_DESCRIPTOR_DATA(compute, dev, info); + + assert(ret); + + return ret; +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.h b/src/gallium/drivers/ilo/core/ilo_state_compute.h new file mode 100644 index 00000000000..346f7b617f4 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_compute.h @@ -0,0 +1,92 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_STATE_COMPUTE_H +#define ILO_STATE_COMPUTE_H + +#include "genhw/genhw.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +/* + * From the Haswell PRM, volume 7, page 836: + * + * "The first 64 URB entries are reserved for the interface + * description..." + */ +#define ILO_STATE_COMPUTE_MAX_INTERFACE_COUNT 64 + +struct ilo_state_compute_interface_info { + /* usually 0 unless there are multiple interfaces */ + uint32_t kernel_offset; + + uint32_t scratch_size; + + uint8_t sampler_count; + uint8_t surface_count; + + uint16_t thread_group_size; + uint32_t slm_size; + + uint16_t curbe_read_offset; + uint16_t curbe_read_length; + uint16_t cross_thread_curbe_read_length; +}; + +struct ilo_state_compute_info { + void *data; + size_t data_size; + + const struct ilo_state_compute_interface_info *interfaces; + uint8_t interface_count; + + uint32_t cv_urb_alloc_size; + uint32_t curbe_alloc_size; +}; + +struct ilo_state_compute { + uint32_t vfe[3]; + + uint32_t (*idrt)[6]; + uint8_t idrt_count; +}; + +static inline size_t +ilo_state_compute_data_size(const struct ilo_dev *dev, + uint8_t interface_count) +{ + const struct ilo_state_compute *compute = NULL; + return sizeof(compute->idrt[0]) * interface_count; +} + +bool +ilo_state_compute_init(struct ilo_state_compute *compute, + const struct ilo_dev *dev, + const struct ilo_state_compute_info *info); + +#endif /* ILO_STATE_COMPUTE_H */ diff --git a/src/gallium/drivers/ilo/ilo_render_dynamic.c b/src/gallium/drivers/ilo/ilo_render_dynamic.c index 5618920a507..3b4c80227a6 100644 --- a/src/gallium/drivers/ilo/ilo_render_dynamic.c +++ b/src/gallium/drivers/ilo/ilo_render_dynamic.c @@ -30,6 +30,7 @@ #include "ilo_common.h" #include "ilo_blitter.h" +#include "ilo_shader.h" #include "ilo_state.h" #include "ilo_render_gen.h" @@ -522,20 +523,39 @@ gen6_emit_launch_grid_dynamic_idrt(struct ilo_render *r, struct ilo_render_launch_grid_session *session) { const struct ilo_shader_state *cs = vec->cs; - struct gen6_idrt_data data; + struct ilo_state_compute_interface_info interface; + struct ilo_state_compute_info info; + uint32_t kernel_offset; ILO_DEV_ASSERT(r->dev, 7, 7.5); - memset(&data, 0, sizeof(data)); + memset(&interface, 0, sizeof(interface)); - data.cs = cs; - data.sampler_offset = r->state.cs.SAMPLER_STATE; - data.binding_table_offset = r->state.cs.BINDING_TABLE_STATE; + interface.sampler_count = + ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT); + interface.surface_count = + ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT); + interface.thread_group_size = session->thread_group_size; + interface.slm_size = + ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE); + interface.curbe_read_length = r->state.cs.PUSH_CONSTANT_BUFFER_size; - data.curbe_size = r->state.cs.PUSH_CONSTANT_BUFFER_size; - data.thread_group_size = session->thread_group_size; + memset(&info, 0, sizeof(info)); + info.data = session->compute_data; + info.data_size = sizeof(session->compute_data); + info.interfaces = &interface; + info.interface_count = 1; + info.cv_urb_alloc_size = r->dev->urb_size; + info.curbe_alloc_size = r->state.cs.PUSH_CONSTANT_BUFFER_size; + + ilo_state_compute_init(&session->compute, r->dev, &info); + + kernel_offset = ilo_shader_get_kernel_offset(cs); + + session->idrt = gen6_INTERFACE_DESCRIPTOR_DATA(r->builder, + &session->compute, &kernel_offset, + &r->state.cs.SAMPLER_STATE, &r->state.cs.BINDING_TABLE_STATE); - session->idrt = gen6_INTERFACE_DESCRIPTOR_DATA(r->builder, &data, 1); session->idrt_size = 32; } diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h index 00c8113a45d..aae4ef2f373 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen.h +++ b/src/gallium/drivers/ilo/ilo_render_gen.h @@ -185,6 +185,9 @@ struct ilo_render_launch_grid_session { uint32_t idrt; int idrt_size; + + uint32_t compute_data[6]; + struct ilo_state_compute compute; }; int diff --git a/src/gallium/drivers/ilo/ilo_render_media.c b/src/gallium/drivers/ilo/ilo_render_media.c index 387920a912c..a0de0024d61 100644 --- a/src/gallium/drivers/ilo/ilo_render_media.c +++ b/src/gallium/drivers/ilo/ilo_render_media.c @@ -30,6 +30,7 @@ #include "core/ilo_builder_mi.h" #include "core/ilo_builder_render.h" +#include "ilo_shader.h" #include "ilo_state.h" #include "ilo_render_gen.h" @@ -206,7 +207,7 @@ ilo_render_emit_launch_grid_commands(struct ilo_render *render, gen6_state_base_address(render->builder, true); - gen6_MEDIA_VFE_STATE(render->builder, pcb_size, use_slm); + gen6_MEDIA_VFE_STATE(render->builder, &session->compute); if (pcb_size) gen6_MEDIA_CURBE_LOAD(render->builder, pcb, pcb_size); diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index e4746d0969b..537e5db120b 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -29,6 +29,7 @@ #define ILO_STATE_H #include "core/ilo_state_cc.h" +#include "core/ilo_state_compute.h" #include "core/ilo_state_raster.h" #include "core/ilo_state_sampler.h" #include "core/ilo_state_sbe.h"