From f790d6e0b48ca35291f50aeb7462130723d62870 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 10 May 2017 12:41:39 -0700 Subject: [PATCH] i965: Port Gen4-5 VS_STATE to genxml. It's actually not that much code. Reviewed-by: Rafael Antognolli --- src/intel/genxml/gen4.xml | 2 +- src/intel/genxml/gen45.xml | 2 +- src/intel/genxml/gen5.xml | 2 +- src/mesa/drivers/dri/i965/Makefile.sources | 1 - src/mesa/drivers/dri/i965/brw_state.h | 1 - src/mesa/drivers/dri/i965/brw_structs.h | 35 ---- src/mesa/drivers/dri/i965/brw_vs_state.c | 192 ------------------ src/mesa/drivers/dri/i965/genX_state_upload.c | 74 ++++++- 8 files changed, 71 insertions(+), 238 deletions(-) delete mode 100644 src/mesa/drivers/dri/i965/brw_vs_state.c diff --git a/src/intel/genxml/gen4.xml b/src/intel/genxml/gen4.xml index d3a2f9264e1..4748e0b0d7a 100644 --- a/src/intel/genxml/gen4.xml +++ b/src/intel/genxml/gen4.xml @@ -753,7 +753,7 @@ - + diff --git a/src/intel/genxml/gen45.xml b/src/intel/genxml/gen45.xml index 547e2785444..04251e0ada0 100644 --- a/src/intel/genxml/gen45.xml +++ b/src/intel/genxml/gen45.xml @@ -704,7 +704,7 @@ - + diff --git a/src/intel/genxml/gen5.xml b/src/intel/genxml/gen5.xml index 933a434487e..e834bdf5d45 100644 --- a/src/intel/genxml/gen5.xml +++ b/src/intel/genxml/gen5.xml @@ -860,7 +860,7 @@ - + diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 9e567cbc908..349777cf24a 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -70,7 +70,6 @@ i965_FILES = \ brw_util.h \ brw_vs.c \ brw_vs.h \ - brw_vs_state.c \ brw_vs_surface_state.c \ brw_wm.c \ brw_wm.h \ diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 4727e2abce3..a50ec4d565c 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -79,7 +79,6 @@ extern const struct brw_tracked_state brw_tes_image_surfaces; extern const struct brw_tracked_state brw_gs_ubo_surfaces; extern const struct brw_tracked_state brw_gs_abo_surfaces; extern const struct brw_tracked_state brw_gs_image_surfaces; -extern const struct brw_tracked_state brw_vs_unit; extern const struct brw_tracked_state brw_renderbuffer_surfaces; extern const struct brw_tracked_state brw_renderbuffer_read_surfaces; extern const struct brw_tracked_state brw_texture_surfaces; diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 55338c0e243..f5774138e22 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -483,41 +483,6 @@ struct brw_gs_unit_state }; -struct brw_vs_unit_state -{ - struct thread0 thread0; - struct thread1 thread1; - struct thread2 thread2; - struct thread3 thread3; - - struct - { - unsigned pad0:10; - unsigned stats_enable:1; - unsigned nr_urb_entries:7; - unsigned pad1:1; - unsigned urb_entry_allocation_size:5; - unsigned pad2:1; - unsigned max_threads:6; - unsigned pad3:1; - } thread4; - - struct - { - unsigned sampler_count:3; - unsigned pad0:2; - unsigned sampler_state_pointer:27; - } vs5; - - struct - { - unsigned vs_enable:1; - unsigned vert_cache_disable:1; - unsigned pad0:30; - } vs6; -}; - - struct brw_wm_unit_state { struct thread0 thread0; diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c deleted file mode 100644 index fafe305f4f7..00000000000 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ /dev/null @@ -1,192 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - - -#include "intel_batchbuffer.h" -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "main/macros.h" - -static void -brw_upload_vs_unit(struct brw_context *brw) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - struct brw_stage_state *stage_state = &brw->vs.base; - const struct brw_stage_prog_data *prog_data = stage_state->prog_data; - const struct brw_vue_prog_data *vue_prog_data = - brw_vue_prog_data(stage_state->prog_data); - - struct brw_vs_unit_state *vs; - - vs = brw_state_batch(brw, sizeof(*vs), 32, &stage_state->state_offset); - memset(vs, 0, sizeof(*vs)); - - /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_VS_PROG_DATA */ - vs->thread0.grf_reg_count = ALIGN(vue_prog_data->total_grf, 16) / 16 - 1; - vs->thread0.kernel_start_pointer = - brw_program_reloc(brw, - stage_state->state_offset + - offsetof(struct brw_vs_unit_state, thread0), - stage_state->prog_offset + - (vs->thread0.grf_reg_count << 1)) >> 6; - - if (prog_data->use_alt_mode) - vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - else - vs->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754; - - /* Choosing multiple program flow means that we may get 2-vertex threads, - * which will have the channel mask for dwords 4-7 enabled in the thread, - * and those dwords will be written to the second URB handle when we - * brw_urb_WRITE() results. - */ - /* Force single program flow on Ironlake. We cannot reliably get - * all applications working without it. See: - * https://bugs.freedesktop.org/show_bug.cgi?id=29172 - * - * The most notable and reliably failing application is the Humus - * demo "CelShading" - */ - vs->thread1.single_program_flow = (brw->gen == 5); - - vs->thread1.binding_table_entry_count = - prog_data->binding_table.size_bytes / 4; - - if (prog_data->total_scratch != 0) { - vs->thread2.scratch_space_base_pointer = - stage_state->scratch_bo->offset64 >> 10; /* reloc */ - vs->thread2.per_thread_scratch_space = - ffs(stage_state->per_thread_scratch) - 11; - } else { - vs->thread2.scratch_space_base_pointer = 0; - vs->thread2.per_thread_scratch_space = 0; - } - - vs->thread3.urb_entry_read_length = vue_prog_data->urb_read_length; - vs->thread3.const_urb_entry_read_length = prog_data->curb_read_length; - vs->thread3.dispatch_grf_start_reg = prog_data->dispatch_grf_start_reg; - vs->thread3.urb_entry_read_offset = 0; - - /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ - vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2; - - /* BRW_NEW_URB_FENCE */ - if (brw->gen == 5) { - switch (brw->urb.nr_vs_entries) { - case 8: - case 12: - case 16: - case 32: - case 64: - case 96: - case 128: - case 168: - case 192: - case 224: - case 256: - vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries >> 2; - break; - default: - unreachable("not reached"); - } - } else { - switch (brw->urb.nr_vs_entries) { - case 8: - case 12: - case 16: - case 32: - break; - case 64: - assert(brw->is_g4x); - break; - default: - unreachable("not reached"); - } - vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries; - } - - vs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - - vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2, - 1, devinfo->max_vs_threads) - 1; - - if (brw->gen == 5) - vs->vs5.sampler_count = 0; /* hardware requirement */ - else { - vs->vs5.sampler_count = (stage_state->sampler_count + 3) / 4; - } - - /* Vertex program always enabled: - */ - vs->vs6.vs_enable = 1; - - /* Set the sampler state pointer, and its reloc - */ - if (stage_state->sampler_count) { - /* BRW_NEW_SAMPLER_STATE_TABLE - reloc */ - vs->vs5.sampler_state_pointer = - (brw->batch.bo->offset64 + stage_state->sampler_offset) >> 5; - brw_emit_reloc(&brw->batch, - stage_state->state_offset + - offsetof(struct brw_vs_unit_state, vs5), - brw->batch.bo, - (stage_state->sampler_offset | vs->vs5.sampler_count), - I915_GEM_DOMAIN_INSTRUCTION, 0); - } - - /* Emit scratch space relocation */ - if (prog_data->total_scratch != 0) { - brw_emit_reloc(&brw->batch, - stage_state->state_offset + - offsetof(struct brw_vs_unit_state, thread2), - stage_state->scratch_bo, - vs->thread2.per_thread_scratch_space, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); - } - - brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; -} - -const struct brw_tracked_state brw_vs_unit = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_PROGRAM_CACHE | - BRW_NEW_PUSH_CONSTANT_ALLOCATION | - BRW_NEW_SAMPLER_STATE_TABLE | - BRW_NEW_URB_FENCE | - BRW_NEW_VS_PROG_DATA, - }, - .emit = brw_upload_vs_unit, -}; diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index fccc66864a7..b949d5b448d 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -132,6 +132,17 @@ instruction_bo(struct brw_bo *bo, uint32_t offset) }; } +static inline struct brw_address +instruction_ro_bo(struct brw_bo *bo, uint32_t offset) +{ + return (struct brw_address) { + .bo = bo, + .offset = offset, + .read_domains = I915_GEM_DOMAIN_INSTRUCTION, + .write_domain = 0, + }; +} + static inline struct brw_address vertex_bo(struct brw_bo *bo, uint32_t offset) { @@ -1693,8 +1704,22 @@ static const struct brw_tracked_state genX(wm_state) = { /* ---------------------------------------------------------------------- */ +#if GEN_GEN == 4 +static inline struct brw_address +KSP(struct brw_context *brw, uint32_t offset) +{ + return instruction_bo(brw->cache.bo, offset); +} +#else +static inline uint32_t +KSP(struct brw_context *brw, uint32_t offset) +{ + return offset; +} +#endif + #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \ - pkt.KernelStartPointer = stage_state->prog_offset; \ + pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset); \ pkt.SamplerCount = \ DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \ pkt.BindingTableEntryCount = \ @@ -1716,12 +1741,12 @@ static const struct brw_tracked_state genX(wm_state) = { pkt.StatisticsEnable = true; \ pkt.Enable = true; -#if GEN_GEN >= 6 static void genX(upload_vs_state)(struct brw_context *brw) { + UNUSED struct gl_context *ctx = &brw->ctx; const struct gen_device_info *devinfo = &brw->screen->devinfo; - const struct brw_stage_state *stage_state = &brw->vs.base; + struct brw_stage_state *stage_state = &brw->vs.base; /* BRW_NEW_VS_PROG_DATA */ const struct brw_vue_prog_data *vue_prog_data = @@ -1755,11 +1780,44 @@ genX(upload_vs_state)(struct brw_context *brw) if (GEN_GEN == 7 && devinfo->is_ivybridge) gen7_emit_vs_workaround_flush(brw); +#if GEN_GEN >= 6 brw_batch_emit(brw, GENX(3DSTATE_VS), vs) { +#else + ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; + brw_state_emit(brw, GENX(VS_STATE), 32, &stage_state->state_offset, vs) { +#endif INIT_THREAD_DISPATCH_FIELDS(vs, Vertex); vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1; +#if GEN_GEN < 6 + vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1; + vs.ConstantURBEntryReadLength = stage_prog_data->curb_read_length; + vs.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2; + + vs.NumberofURBEntries = brw->urb.nr_vs_entries >> (GEN_GEN == 5 ? 2 : 0); + vs.URBEntryAllocationSize = brw->urb.vsize - 1; + + vs.MaximumNumberofThreads = + CLAMP(brw->urb.nr_vs_entries / 2, 1, devinfo->max_vs_threads) - 1; + + vs.StatisticsEnable = false; + vs.SamplerStateOffset = + instruction_ro_bo(brw->batch.bo, stage_state->sampler_offset); +#endif + +#if GEN_GEN == 5 + /* Force single program flow on Ironlake. We cannot reliably get + * all applications working without it. See: + * https://bugs.freedesktop.org/show_bug.cgi?id=29172 + * + * The most notable and reliably failing application is the Humus + * demo "CelShading" + */ + vs.SingleProgramFlow = true; + vs.SamplerCount = 0; /* hardware requirement */ +#endif + #if GEN_GEN >= 8 vs.SIMD8DispatchEnable = vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8; @@ -1801,11 +1859,15 @@ static const struct brw_tracked_state genX(vs_state) = { BRW_NEW_BLORP | BRW_NEW_CONTEXT | BRW_NEW_VS_PROG_DATA | - (GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0), + (GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0) | + (GEN_GEN <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION | + BRW_NEW_PROGRAM_CACHE | + BRW_NEW_SAMPLER_STATE_TABLE | + BRW_NEW_URB_FENCE + : 0), }, .emit = genX(upload_vs_state), }; -#endif /* ---------------------------------------------------------------------- */ @@ -3884,7 +3946,7 @@ genX(init_atoms)(struct brw_context *brw) &brw_wm_unit, &brw_sf_vp, &brw_sf_unit, - &brw_vs_unit, /* always required, enabled or not */ + &genX(vs_state), /* always required, enabled or not */ &brw_clip_unit, &brw_gs_unit, -- 2.30.2