#include "ilo_state_3d.h"
#include "ilo_state_sampler.h"
#include "ilo_state_sol.h"
+#include "ilo_state_urb.h"
#include "ilo_builder.h"
static inline void
gen6_3DSTATE_URB(struct ilo_builder *builder,
- int vs_total_size, int gs_total_size,
- int vs_entry_size, int gs_entry_size)
+ const struct ilo_state_urb *urb)
{
const uint8_t cmd_len = 3;
- const int row_size = 128; /* 1024 bits */
- int vs_alloc_size, gs_alloc_size;
- int vs_num_entries, gs_num_entries;
uint32_t *dw;
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- /* in 1024-bit URB rows */
- vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
- gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
-
- /* the valid range is [1, 5] */
- if (!vs_alloc_size)
- vs_alloc_size = 1;
- if (!gs_alloc_size)
- gs_alloc_size = 1;
- assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
-
- /* the valid range is [24, 256] in multiples of 4 */
- vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
- if (vs_num_entries > 256)
- vs_num_entries = 256;
- assert(vs_num_entries >= 24);
-
- /* the valid range is [0, 256] in multiples of 4 */
- gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
- if (gs_num_entries > 256)
- gs_num_entries = 256;
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2);
- dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
- vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
- dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
- (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
+ /* see urb_set_gen6_3DSTATE_URB() */
+ dw[1] = urb->urb[0];
+ dw[2] = urb->urb[1];
}
static inline void
-gen7_3dstate_push_constant_alloc(struct ilo_builder *builder,
- int subop, int offset, int size)
+gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder,
+ const struct ilo_state_urb *urb)
{
- const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
- GEN6_RENDER_SUBTYPE_3D |
- subop;
const uint8_t cmd_len = 2;
- const int slice_count = ((ilo_dev_gen(builder->dev) == ILO_GEN(7.5) &&
- builder->dev->gt == 3) ||
- ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 2 : 1;
uint32_t *dw;
- int end;
-
- ILO_DEV_ASSERT(builder->dev, 7, 8);
-
- /* VS, HS, DS, GS, and PS variants */
- assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS &&
- subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS);
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 68:
- *
- * "(A table that says the maximum size of each constant buffer is
- * 16KB")
- *
- * From the Ivy Bridge PRM, volume 2 part 1, page 115:
- *
- * "The sum of the Constant Buffer Offset and the Constant Buffer Size
- * may not exceed the maximum value of the Constant Buffer Size."
- *
- * Thus, the valid range of buffer end is [0KB, 16KB].
- */
- end = (offset + size) / 1024;
- if (end > 16 * slice_count) {
- assert(!"invalid constant buffer end");
- end = 16 * slice_count;
- }
-
- /* the valid range of buffer offset is [0KB, 15KB] */
- offset = (offset + 1023) / 1024;
- if (offset > 15 * slice_count) {
- assert(!"invalid constant buffer offset");
- offset = 15 * slice_count;
- }
-
- if (offset > end) {
- assert(!size);
- offset = end;
- }
-
- /* the valid range of buffer size is [0KB, 15KB] */
- size = end - offset;
- if (size > 15 * slice_count) {
- assert(!"invalid constant buffer size");
- size = 15 * slice_count;
- }
-
- assert(offset % slice_count == 0 && size % slice_count == 0);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = cmd | (cmd_len - 2);
- dw[1] = offset << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT |
- size;
-}
-
-static inline void
-gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder,
- int offset, int size)
-{
- gen7_3dstate_push_constant_alloc(builder,
- GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size);
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_VS) |
+ (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->pcb[0];
}
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder,
- int offset, int size)
+ const struct ilo_state_urb *urb)
{
- gen7_3dstate_push_constant_alloc(builder,
- GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size);
+ const uint8_t cmd_len = 2;
+ uint32_t *dw;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_HS) |
+ (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->pcb[1];
}
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder,
- int offset, int size)
+ const struct ilo_state_urb *urb)
{
- gen7_3dstate_push_constant_alloc(builder,
- GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size);
+ const uint8_t cmd_len = 2;
+ uint32_t *dw;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_DS) |
+ (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->pcb[2];
}
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder,
- int offset, int size)
+ const struct ilo_state_urb *urb)
{
- gen7_3dstate_push_constant_alloc(builder,
- GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size);
-}
+ const uint8_t cmd_len = 2;
+ uint32_t *dw;
-static inline void
-gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder,
- int offset, int size)
-{
- gen7_3dstate_push_constant_alloc(builder,
- GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size);
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_GS) |
+ (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->pcb[3];
}
static inline void
-gen7_3dstate_urb(struct ilo_builder *builder,
- int subop, int offset, int size,
- int entry_size)
+gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder,
+ const struct ilo_state_urb *urb)
{
- const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
- GEN6_RENDER_SUBTYPE_3D |
- subop;
const uint8_t cmd_len = 2;
- const int row_size = 64; /* 512 bits */
- int alloc_size, num_entries, min_entries, max_entries;
uint32_t *dw;
- ILO_DEV_ASSERT(builder->dev, 7, 8);
-
- /* VS, HS, DS, and GS variants */
- assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS &&
- subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS);
-
- /* in multiples of 8KB */
- assert(offset % 8192 == 0);
- offset /= 8192;
-
- /* in multiple of 512-bit rows */
- alloc_size = (entry_size + row_size - 1) / row_size;
- if (!alloc_size)
- alloc_size = 1;
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 34:
- *
- * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
- * cause performance to decrease due to banking in the URB. Element
- * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
- */
- if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5)
- alloc_size = 6;
-
- /* in multiples of 8 */
- num_entries = (size / row_size / alloc_size) & ~7;
-
- switch (subop) {
- case GEN7_RENDER_OPCODE_3DSTATE_URB_VS:
- switch (ilo_dev_gen(builder->dev)) {
- case ILO_GEN(8):
- max_entries = 2560;
- min_entries = 64;
- break;
- case ILO_GEN(7.5):
- max_entries = (builder->dev->gt >= 2) ? 1664 : 640;
- min_entries = (builder->dev->gt >= 2) ? 64 : 32;
- break;
- case ILO_GEN(7):
- default:
- max_entries = (builder->dev->gt == 2) ? 704 : 512;
- min_entries = 32;
- break;
- }
-
- assert(num_entries >= min_entries);
- if (num_entries > max_entries)
- num_entries = max_entries;
- break;
- case GEN7_RENDER_OPCODE_3DSTATE_URB_HS:
- max_entries = (builder->dev->gt == 2) ? 64 : 32;
- if (num_entries > max_entries)
- num_entries = max_entries;
- break;
- case GEN7_RENDER_OPCODE_3DSTATE_URB_DS:
- if (num_entries)
- assert(num_entries >= 138);
- break;
- case GEN7_RENDER_OPCODE_3DSTATE_URB_GS:
- switch (ilo_dev_gen(builder->dev)) {
- case ILO_GEN(8):
- max_entries = 960;
- break;
- case ILO_GEN(7.5):
- max_entries = (builder->dev->gt >= 2) ? 640 : 256;
- break;
- case ILO_GEN(7):
- default:
- max_entries = (builder->dev->gt == 2) ? 320 : 192;
- break;
- }
-
- if (num_entries > max_entries)
- num_entries = max_entries;
- break;
- default:
- break;
- }
-
ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = cmd | (cmd_len - 2);
- dw[1] = offset << GEN7_URB_DW1_OFFSET__SHIFT |
- (alloc_size - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT |
- num_entries;
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_PS) |
+ (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->pcb[4];
}
static inline void
gen7_3DSTATE_URB_VS(struct ilo_builder *builder,
- int offset, int size, int entry_size)
+ const struct ilo_state_urb *urb)
{
- gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS,
- offset, size, entry_size);
+ const uint8_t cmd_len = 2;
+ uint32_t *dw;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_VS) | (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->urb[0];
}
static inline void
gen7_3DSTATE_URB_HS(struct ilo_builder *builder,
- int offset, int size, int entry_size)
+ const struct ilo_state_urb *urb)
{
- gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS,
- offset, size, entry_size);
+ const uint8_t cmd_len = 2;
+ uint32_t *dw;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_HS) | (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->urb[1];
}
static inline void
gen7_3DSTATE_URB_DS(struct ilo_builder *builder,
- int offset, int size, int entry_size)
+ const struct ilo_state_urb *urb)
{
- gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS,
- offset, size, entry_size);
+ const uint8_t cmd_len = 2;
+ uint32_t *dw;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_DS) | (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->urb[2];
}
static inline void
gen7_3DSTATE_URB_GS(struct ilo_builder *builder,
- int offset, int size, int entry_size)
+ const struct ilo_state_urb *urb)
{
- gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS,
- offset, size, entry_size);
+ const uint8_t cmd_len = 2;
+ uint32_t *dw;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_GS) | (cmd_len - 2);
+ /* see urb_set_gen7_3dstate_push_constant_alloc() */
+ dw[1] = urb->urb[3];
}
static inline void
uint32_t depth_clear_value;
+ struct ilo_state_urb urb;
+
struct {
struct ilo_surface_cso dst;
unsigned width, height;
ilo_state_viewport_init_for_rectlist(&blitter->vp, blitter->ilo->dev,
blitter->vp_data, sizeof(blitter->vp_data));
+ ilo_state_urb_init_for_rectlist(&blitter->urb, blitter->ilo->dev,
+ blitter->ve.count + blitter->ve.prepend_nosrc_cso);
+
blitter->initialized = true;
return true;
session->prim_changed = true;
session->primitive_restart_changed = true;
+ ilo_state_urb_full_delta(&vec->urb, render->dev, &session->urb_delta);
+
ilo_state_raster_full_delta(&vec->rasterizer->rs, render->dev,
&session->rs_delta);
session->primitive_restart_changed =
(render->state.primitive_restart != vec->draw->primitive_restart);
+ ilo_state_urb_get_delta(&vec->urb, render->dev,
+ &render->state.urb, &session->urb_delta);
+
if (vec->dirty & ILO_DIRTY_RASTERIZER) {
ilo_state_raster_get_delta(&vec->rasterizer->rs, render->dev,
&render->state.rs, &session->rs_delta);
render->state.reduced_prim = session->reduced_prim;
render->state.primitive_restart = vec->draw->primitive_restart;
+ render->state.urb = vec->urb;
render->state.rs = vec->rasterizer->rs;
render->state.cc = vec->blend->cc;
}
int reduced_prim;
int so_max_vertices;
+ struct ilo_state_urb urb;
struct ilo_state_raster rs;
struct ilo_state_cc cc;
bool prim_changed;
bool primitive_restart_changed;
+ struct ilo_state_urb_delta urb_delta;
struct ilo_state_raster_delta rs_delta;
struct ilo_state_viewport_delta vp_delta;
struct ilo_state_cc_delta cc_delta;
const struct ilo_state_vector *vec,
struct ilo_render_draw_session *session)
{
- /* 3DSTATE_URB */
- if (DIRTY(VE) || DIRTY(VS) || DIRTY(GS)) {
- const bool gs_active = (vec->gs || (vec->vs &&
- ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)));
- int vs_entry_size, gs_entry_size;
- int vs_total_size, gs_total_size;
-
- vs_entry_size = (vec->vs) ?
- ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
-
- /*
- * As indicated by 2e712e41db0c0676e9f30fc73172c0e8de8d84d4, VF and VS
- * share VUE handles. The VUE allocation size must be large enough to
- * store either VF outputs (number of VERTEX_ELEMENTs) and VS outputs.
- *
- * I am not sure if the PRM explicitly states that VF and VS share VUE
- * handles. But here is a citation that implies so:
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 44:
- *
- * "Once a FF stage that spawn threads has sufficient input to
- * initiate a thread, it must guarantee that it is safe to request
- * the thread initiation. For all these FF stages, this check is
- * based on :
- *
- * - The availability of output URB entries:
- * - VS: As the input URB entries are overwritten with the
- * VS-generated output data, output URB availability isn't a
- * factor."
- */
- if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso)
- vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso;
+ const bool gs_active = (vec->gs || (vec->vs &&
+ ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)));
- gs_entry_size = (vec->gs) ?
- ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT) :
- (gs_active) ? vs_entry_size : 0;
-
- /* in bytes */
- vs_entry_size *= sizeof(float) * 4;
- gs_entry_size *= sizeof(float) * 4;
- vs_total_size = r->dev->urb_size;
-
- if (gs_active) {
- vs_total_size /= 2;
- gs_total_size = vs_total_size;
- }
- else {
- gs_total_size = 0;
- }
-
- gen6_3DSTATE_URB(r->builder, vs_total_size, gs_total_size,
- vs_entry_size, gs_entry_size);
+ /* 3DSTATE_URB */
+ if (session->urb_delta.dirty & (ILO_STATE_URB_3DSTATE_URB_VS |
+ ILO_STATE_URB_3DSTATE_URB_GS)) {
+ gen6_3DSTATE_URB(r->builder, &vec->urb);
if (r->state.gs.active && !gs_active)
gen6_wa_post_3dstate_urb_no_gs(r);
-
- r->state.gs.active = gs_active;
}
+
+ r->state.gs.active = gs_active;
}
static void
gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve);
- gen6_3DSTATE_URB(r->builder, r->dev->urb_size, 0,
- (blitter->ve.count + blitter->ve.prepend_nosrc_cso) * 4 * sizeof(float),
- 0);
+ gen6_3DSTATE_URB(r->builder, &blitter->urb);
if (r->state.gs.active) {
gen6_wa_post_3dstate_urb_no_gs(r);
struct ilo_render_draw_session *session)
{
/* 3DSTATE_URB_{VS,GS,HS,DS} */
- if (DIRTY(VE) || DIRTY(VS)) {
- /* the first 16KB are reserved for VS and PS PCBs */
- const int offset =
- (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
- (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
- 32768 : 16384;
- int vs_entry_size, vs_total_size;
-
- vs_entry_size = (vec->vs) ?
- ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 35:
- *
- * "Programming Restriction: As the VS URB entry serves as both the
- * per-vertex input and output of the VS shader, the VS URB
- * Allocation Size must be sized to the maximum of the vertex input
- * and output structures."
- */
- if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso)
- vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso;
-
- vs_entry_size *= sizeof(float) * 4;
- vs_total_size = r->dev->urb_size - offset;
-
+ if (session->urb_delta.dirty & (ILO_STATE_URB_3DSTATE_URB_VS |
+ ILO_STATE_URB_3DSTATE_URB_HS |
+ ILO_STATE_URB_3DSTATE_URB_DS |
+ ILO_STATE_URB_3DSTATE_URB_GS)) {
if (ilo_dev_gen(r->dev) == ILO_GEN(7))
gen7_wa_pre_vs(r);
- gen7_3DSTATE_URB_VS(r->builder,
- offset, vs_total_size, vs_entry_size);
-
- gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
- gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
- gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
+ gen7_3DSTATE_URB_VS(r->builder, &vec->urb);
+ gen7_3DSTATE_URB_GS(r->builder, &vec->urb);
+ gen7_3DSTATE_URB_HS(r->builder, &vec->urb);
+ gen7_3DSTATE_URB_DS(r->builder, &vec->urb);
}
}
struct ilo_render_draw_session *session)
{
/* 3DSTATE_PUSH_CONSTANT_ALLOC_{VS,PS} */
- if (r->hw_ctx_changed) {
- /*
- * Push constant buffers are only allowed to take up at most the first
- * 16KB of the URB. Split the space evenly for VS and FS.
- */
- const int max_size =
- (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
- (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
- 32768 : 16384;
- const int size = max_size / 2;
- int offset = 0;
-
- gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
- offset += size;
-
- gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
+ if (session->urb_delta.dirty &
+ (ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS |
+ ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS)) {
+ gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, &vec->urb);
+ gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(r->builder, &vec->urb);
+ gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, &vec->urb);
if (ilo_dev_gen(r->dev) == ILO_GEN(7))
gen7_wa_post_3dstate_push_constant_alloc_ps(r);
gen7_rectlist_pcb_alloc(struct ilo_render *r,
const struct ilo_blitter *blitter)
{
- /*
- * Push constant buffers are only allowed to take up at most the first
- * 16KB of the URB. Split the space evenly for VS and FS.
- */
- const int max_size =
- (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
- (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
- 32768 : 16384;
- const int size = max_size / 2;
- int offset = 0;
-
- gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
- offset += size;
-
- gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
+ gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, &blitter->urb);
+ gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, &blitter->urb);
if (ilo_dev_gen(r->dev) == ILO_GEN(7))
gen7_wa_post_3dstate_push_constant_alloc_ps(r);
gen7_rectlist_urb(struct ilo_render *r,
const struct ilo_blitter *blitter)
{
- /* the first 16KB are reserved for VS and PS PCBs */
- const int offset =
- (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
- (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
- 32768 : 16384;
-
- gen7_3DSTATE_URB_VS(r->builder, offset, r->dev->urb_size - offset,
- (blitter->ve.count + blitter->ve.prepend_nosrc_cso) *
- 4 * sizeof(float));
-
- gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
- gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
- gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
+ gen7_3DSTATE_URB_VS(r->builder, &blitter->urb);
+ gen7_3DSTATE_URB_GS(r->builder, &blitter->urb);
+ gen7_3DSTATE_URB_HS(r->builder, &blitter->urb);
+ gen7_3DSTATE_URB_DS(r->builder, &blitter->urb);
}
static void
}
}
+static void
+finalize_urb(struct ilo_context *ilo)
+{
+ const uint16_t attr_size = sizeof(uint32_t) * 4;
+ const struct ilo_dev *dev = ilo->dev;
+ struct ilo_state_vector *vec = &ilo->state_vector;
+ struct ilo_state_urb_info info;
+
+ if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VS |
+ ILO_DIRTY_GS | ILO_DIRTY_FS)))
+ return;
+
+ memset(&info, 0, sizeof(info));
+
+ info.ve_entry_size = attr_size *
+ (vec->ve->count + vec->ve->prepend_nosrc_cso);
+
+ if (vec->vs) {
+ info.vs_const_data = (bool)
+ (ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_PCB_CBUF0_SIZE) +
+ ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_PCB_UCP_SIZE));
+ info.vs_entry_size = attr_size *
+ ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT);
+ }
+
+ if (vec->gs) {
+ info.gs_const_data = (bool)
+ ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_PCB_CBUF0_SIZE);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 189:
+ *
+ * "All outputs of a GS thread will be stored in the single GS
+ * thread output URB entry."
+ *
+ * TODO
+ */
+ info.gs_entry_size = attr_size *
+ ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT);
+ }
+
+ if (vec->fs) {
+ info.ps_const_data = (bool)
+ ilo_shader_get_kernel_param(vec->fs, ILO_KERNEL_PCB_CBUF0_SIZE);
+ }
+
+ ilo_state_urb_set_info(&vec->urb, dev, &info);
+}
+
static void
finalize_viewport(struct ilo_context *ilo)
{
finalize_index_buffer(ilo);
finalize_vertex_elements(ilo);
+ finalize_urb(ilo);
finalize_rasterizer(ilo);
finalize_viewport(ilo);
finalize_blend(ilo);
ilo_state_vector_init(const struct ilo_dev *dev,
struct ilo_state_vector *vec)
{
+ struct ilo_state_urb_info urb_info;
+
vec->sample_mask = ~0u;
ilo_state_viewport_init_data_only(&vec->viewport.vp, dev,
ilo_state_sampler_init_disabled(&vec->disabled_sampler, dev);
+ memset(&urb_info, 0, sizeof(urb_info));
+ ilo_state_urb_init(&vec->urb, dev, &urb_info);
+
util_dynarray_init(&vec->global_binding.bindings);
vec->dirty = ILO_DIRTY_ALL;
#include "core/ilo_state_sampler.h"
#include "core/ilo_state_sol.h"
#include "core/ilo_state_surface.h"
+#include "core/ilo_state_urb.h"
#include "core/ilo_state_viewport.h"
#include "core/ilo_state_zs.h"
#include "pipe/p_state.h"
struct ilo_fb_state fb;
+ struct ilo_state_urb urb;
+
/* shader resources */
struct ilo_sampler_state sampler[PIPE_SHADER_TYPES];
struct ilo_view_state view[PIPE_SHADER_TYPES];