i965/gen9: Optimize slice and subslice load balancing behavior.
[mesa.git] / src / mesa / drivers / dri / i965 / gen4_blorp_exec.h
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 static inline struct blorp_address
25 dynamic_state_address(struct blorp_batch *batch, uint32_t offset)
26 {
27 assert(batch->blorp->driver_ctx == batch->driver_batch);
28 struct brw_context *brw = batch->driver_batch;
29
30 return (struct blorp_address) {
31 .buffer = brw->batch.state.bo,
32 .offset = offset,
33 };
34 }
35
36 static inline struct blorp_address
37 instruction_state_address(struct blorp_batch *batch, uint32_t offset)
38 {
39 assert(batch->blorp->driver_ctx == batch->driver_batch);
40 struct brw_context *brw = batch->driver_batch;
41
42 return (struct blorp_address) {
43 .buffer = brw->cache.bo,
44 .offset = offset,
45 };
46 }
47
48 static struct blorp_address
49 blorp_emit_vs_state(struct blorp_batch *batch)
50 {
51 assert(batch->blorp->driver_ctx == batch->driver_batch);
52 struct brw_context *brw = batch->driver_batch;
53
54 uint32_t offset;
55 blorp_emit_dynamic(batch, GENX(VS_STATE), vs, 64, &offset) {
56 vs.Enable = false;
57 vs.URBEntryAllocationSize = brw->urb.vsize - 1;
58 #if GEN_GEN == 5
59 vs.NumberofURBEntries = brw->urb.nr_vs_entries >> 2;
60 #else
61 vs.NumberofURBEntries = brw->urb.nr_vs_entries;
62 #endif
63 }
64
65 return dynamic_state_address(batch, offset);
66 }
67
68 static struct blorp_address
69 blorp_emit_sf_state(struct blorp_batch *batch,
70 const struct blorp_params *params)
71 {
72 assert(batch->blorp->driver_ctx == batch->driver_batch);
73 struct brw_context *brw = batch->driver_batch;
74 const struct brw_sf_prog_data *prog_data = params->sf_prog_data;
75
76 uint32_t offset;
77 blorp_emit_dynamic(batch, GENX(SF_STATE), sf, 64, &offset) {
78 #if GEN_GEN == 4
79 sf.KernelStartPointer =
80 instruction_state_address(batch, params->sf_prog_kernel);
81 #else
82 sf.KernelStartPointer = params->sf_prog_kernel;
83 #endif
84 sf.GRFRegisterCount = DIV_ROUND_UP(prog_data->total_grf, 16) - 1;
85 sf.VertexURBEntryReadLength = prog_data->urb_read_length;
86 sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
87 sf.DispatchGRFStartRegisterForURBData = 3;
88
89 sf.URBEntryAllocationSize = brw->urb.sfsize - 1;
90 sf.NumberofURBEntries = brw->urb.nr_sf_entries;
91
92 #if GEN_GEN == 5
93 sf.MaximumNumberofThreads = MIN2(48, brw->urb.nr_sf_entries) - 1;
94 #else
95 sf.MaximumNumberofThreads = MIN2(24, brw->urb.nr_sf_entries) - 1;
96 #endif
97
98 sf.ViewportTransformEnable = false;
99
100 sf.CullMode = CULLMODE_NONE;
101 }
102
103 return dynamic_state_address(batch, offset);
104 }
105
106 static struct blorp_address
107 blorp_emit_wm_state(struct blorp_batch *batch,
108 const struct blorp_params *params)
109 {
110 const struct brw_wm_prog_data *prog_data = params->wm_prog_data;
111
112 uint32_t offset;
113 blorp_emit_dynamic(batch, GENX(WM_STATE), wm, 64, &offset) {
114 if (params->src.enabled) {
115 /* Iron Lake can't do sampler prefetch */
116 wm.SamplerCount = (GEN_GEN != 5);
117 wm.BindingTableEntryCount = 2;
118 uint32_t sampler = blorp_emit_sampler_state(batch);
119 wm.SamplerStatePointer = dynamic_state_address(batch, sampler);
120 }
121
122 if (prog_data) {
123 wm.DispatchGRFStartRegisterForConstantSetupData0 =
124 prog_data->base.dispatch_grf_start_reg;
125 wm.SetupURBEntryReadLength = prog_data->num_varying_inputs * 2;
126 wm.SetupURBEntryReadOffset = 0;
127
128 wm.DepthCoefficientURBReadOffset = 1;
129 wm.PixelShaderKillsPixel = prog_data->uses_kill;
130 wm.ThreadDispatchEnable = true;
131 wm.EarlyDepthTestEnable = true;
132
133 wm._8PixelDispatchEnable = prog_data->dispatch_8;
134 wm._16PixelDispatchEnable = prog_data->dispatch_16;
135 wm._32PixelDispatchEnable = prog_data->dispatch_32;
136
137 #if GEN_GEN == 4
138 wm.KernelStartPointer0 =
139 instruction_state_address(batch, params->wm_prog_kernel);
140 wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
141 #else
142 wm.KernelStartPointer0 = params->wm_prog_kernel +
143 brw_wm_prog_data_prog_offset(prog_data, wm, 0);
144 wm.KernelStartPointer1 = params->wm_prog_kernel +
145 brw_wm_prog_data_prog_offset(prog_data, wm, 1);
146 wm.KernelStartPointer2 = params->wm_prog_kernel +
147 brw_wm_prog_data_prog_offset(prog_data, wm, 2);
148 wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
149 wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(prog_data, wm, 1);
150 wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(prog_data, wm, 2);
151 #endif
152 }
153
154 wm.MaximumNumberofThreads =
155 batch->blorp->compiler->devinfo->max_wm_threads - 1;
156 }
157
158 return dynamic_state_address(batch, offset);
159 }
160
161 static struct blorp_address
162 blorp_emit_color_calc_state(struct blorp_batch *batch)
163 {
164 uint32_t cc_viewport = blorp_emit_cc_viewport(batch);
165
166 uint32_t offset;
167 blorp_emit_dynamic(batch, GENX(COLOR_CALC_STATE), cc, 64, &offset) {
168 cc.CCViewportStatePointer = dynamic_state_address(batch, cc_viewport);
169 }
170
171 return dynamic_state_address(batch, offset);
172 }
173
174 static void
175 blorp_emit_pipeline(struct blorp_batch *batch,
176 const struct blorp_params *params)
177 {
178 assert(batch->blorp->driver_ctx == batch->driver_batch);
179 struct brw_context *brw = batch->driver_batch;
180
181 emit_urb_config(batch, params);
182
183 blorp_emit(batch, GENX(3DSTATE_PIPELINED_POINTERS), pp) {
184 pp.PointertoVSState = blorp_emit_vs_state(batch);
185 pp.GSEnable = false;
186 pp.ClipEnable = false;
187 pp.PointertoSFState = blorp_emit_sf_state(batch, params);
188 pp.PointertoWMState = blorp_emit_wm_state(batch, params);
189 pp.PointertoColorCalcState = blorp_emit_color_calc_state(batch);
190 }
191
192 brw_upload_urb_fence(brw);
193
194 blorp_emit(batch, GENX(CS_URB_STATE), curb);
195 blorp_emit(batch, GENX(CONSTANT_BUFFER), curb);
196 }