}
}
- assert(prog_data->total_shared <= 64 * 1024);
- uint32_t slm_size = 0;
- if (prog_data->total_shared > 0) {
- /* slm_size is in 4k increments, but must be a power of 2. */
- slm_size = 4 * 1024;
- while (slm_size < prog_data->total_shared)
- slm_size <<= 1;
- slm_size /= 4 * 1024;
- }
+ const uint32_t slm_size = encode_slm_size(GEN_GEN, prog_data->total_shared);
struct anv_state state =
anv_state_pool_emit(&device->dynamic_state_pool,
}
}
- assert(prog_data->total_shared <= 64 * 1024);
- uint32_t slm_size = 0;
- if (prog_data->total_shared > 0) {
- /* slm_size is in 4k increments, but must be a power of 2. */
- slm_size = 4 * 1024;
- while (slm_size < prog_data->total_shared)
- slm_size <<= 1;
- slm_size /= 4 * 1024;
- }
+ const uint32_t slm_size = encode_slm_size(GEN_GEN, prog_data->total_shared);
struct anv_state state =
anv_state_pool_emit(&device->dynamic_state_pool,
#include <stdio.h>
#include "brw_device_info.h"
#include "main/mtypes.h"
+#include "main/macros.h"
#ifdef __cplusplus
extern "C" {
unsigned *final_assembly_size,
char **error_str);
+static inline uint32_t
+encode_slm_size(const struct brw_device_info *devinfo, uint32_t bytes)
+{
+ uint32_t slm_size = 0;
+
+ /* Shared Local Memory is specified as powers of two, and encoded in
+ * INTERFACE_DESCRIPTOR_DATA with the following representations:
+ *
+ * Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB |
+ * -------------------------------------------------------------------
+ * Gen7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 |
+ * -------------------------------------------------------------------
+ * Gen9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ */
+ assert(bytes <= 64 * 1024);
+
+ if (bytes > 0) {
+ /* Shared Local Memory Size is specified as powers of two. */
+ slm_size = util_next_power_of_two(bytes);
+
+ if (devinfo->gen >= 9) {
+ /* Use a minimum of 1kB; turn an exponent of 10 (1024 kB) into 1. */
+ slm_size = ffs(MAX2(slm_size, 1024)) - 10;
+ } else {
+ /* Use a minimum of 4kB; convert to the pre-Gen9 representation. */
+ slm_size = MAX2(slm_size, 4096) / 4096;
+ }
+ }
+
+ return slm_size;
+}
+
#ifdef __cplusplus
} /* extern "C" */
#endif
struct brw_stage_state *stage_state = &brw->cs.base;
struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
+ const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
brw->vtbl.emit_buffer_surface_state(
SET_FIELD(cs_prog_data->threads, MEDIA_GPGPU_THREAD_COUNT);
assert(cs_prog_data->threads <= brw->max_cs_threads);
- assert(prog_data->total_shared <= 64 * 1024);
- uint32_t slm_size = 0;
- if (prog_data->total_shared > 0) {
- /* slm_size is in 4k increments, but must be a power of 2. */
- slm_size = 4 * 1024;
- while (slm_size < prog_data->total_shared)
- slm_size <<= 1;
- slm_size /= 4 * 1024;
- }
+ const uint32_t slm_size = encode_slm_size(devinfo, prog_data->total_shared);
desc[dw++] =
SET_FIELD(cs_prog_data->uses_barrier, MEDIA_BARRIER_ENABLE) |