ilo: make ilo_render_emit_query() direct
[mesa.git] / src / gallium / drivers / ilo / ilo_render.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "genhw/genhw.h"
29 #include "intel_winsys.h"
30
31 #include "ilo_builder.h"
32 #include "ilo_builder_mi.h"
33 #include "ilo_builder_render.h"
34 #include "ilo_query.h"
35 #include "ilo_render_gen.h"
36 #include "ilo_render_gen7.h"
37 #include "ilo_render.h"
38
39 /* in U0.4 */
40 struct sample_position {
41 uint8_t x, y;
42 };
43
44 /* \see gen6_get_sample_position() */
45 static const struct sample_position sample_position_1x[1] = {
46 { 8, 8 },
47 };
48
49 static const struct sample_position sample_position_4x[4] = {
50 { 6, 2 }, /* distance from the center is sqrt(40) */
51 { 14, 6 }, /* distance from the center is sqrt(40) */
52 { 2, 10 }, /* distance from the center is sqrt(40) */
53 { 10, 14 }, /* distance from the center is sqrt(40) */
54 };
55
56 static const struct sample_position sample_position_8x[8] = {
57 { 7, 9 }, /* distance from the center is sqrt(2) */
58 { 9, 13 }, /* distance from the center is sqrt(26) */
59 { 11, 3 }, /* distance from the center is sqrt(34) */
60 { 13, 11 }, /* distance from the center is sqrt(34) */
61 { 1, 7 }, /* distance from the center is sqrt(50) */
62 { 5, 1 }, /* distance from the center is sqrt(58) */
63 { 15, 5 }, /* distance from the center is sqrt(58) */
64 { 3, 15 }, /* distance from the center is sqrt(74) */
65 };
66
67 struct ilo_render *
68 ilo_render_create(struct ilo_builder *builder)
69 {
70 struct ilo_render *render;
71 int i;
72
73 render = CALLOC_STRUCT(ilo_render);
74 if (!render)
75 return NULL;
76
77 render->dev = builder->dev;
78 render->builder = builder;
79
80 switch (ilo_dev_gen(render->dev)) {
81 case ILO_GEN(6):
82 ilo_render_init_gen6(render);
83 break;
84 case ILO_GEN(7):
85 case ILO_GEN(7.5):
86 ilo_render_init_gen7(render);
87 break;
88 default:
89 assert(!"unsupported GEN");
90 FREE(render);
91 return NULL;
92 break;
93 }
94
95 render->workaround_bo = intel_winsys_alloc_buffer(builder->winsys,
96 "PIPE_CONTROL workaround", 4096, false);
97 if (!render->workaround_bo) {
98 ilo_warn("failed to allocate PIPE_CONTROL workaround bo\n");
99 FREE(render);
100 return NULL;
101 }
102
103 render->packed_sample_position_1x =
104 sample_position_1x[0].x << 4 |
105 sample_position_1x[0].y;
106
107 /* pack into dwords */
108 for (i = 0; i < 4; i++) {
109 render->packed_sample_position_4x |=
110 sample_position_4x[i].x << (8 * i + 4) |
111 sample_position_4x[i].y << (8 * i);
112
113 render->packed_sample_position_8x[0] |=
114 sample_position_8x[i].x << (8 * i + 4) |
115 sample_position_8x[i].y << (8 * i);
116
117 render->packed_sample_position_8x[1] |=
118 sample_position_8x[4 + i].x << (8 * i + 4) |
119 sample_position_8x[4 + i].y << (8 * i);
120 }
121
122 ilo_render_invalidate_hw(render);
123 ilo_render_invalidate_builder(render);
124
125 return render;
126 }
127
128 void
129 ilo_render_destroy(struct ilo_render *render)
130 {
131 if (render->workaround_bo)
132 intel_bo_unreference(render->workaround_bo);
133
134 FREE(render);
135 }
136
137 void
138 ilo_render_get_sample_position(const struct ilo_render *render,
139 unsigned sample_count,
140 unsigned sample_index,
141 float *x, float *y)
142 {
143 const struct sample_position *pos;
144
145 switch (sample_count) {
146 case 1:
147 assert(sample_index < Elements(sample_position_1x));
148 pos = sample_position_1x;
149 break;
150 case 4:
151 assert(sample_index < Elements(sample_position_4x));
152 pos = sample_position_4x;
153 break;
154 case 8:
155 assert(sample_index < Elements(sample_position_8x));
156 pos = sample_position_8x;
157 break;
158 default:
159 assert(!"unknown sample count");
160 *x = 0.5f;
161 *y = 0.5f;
162 return;
163 break;
164 }
165
166 *x = (float) pos[sample_index].x / 16.0f;
167 *y = (float) pos[sample_index].y / 16.0f;
168 }
169
170 void
171 ilo_render_invalidate_hw(struct ilo_render *render)
172 {
173 render->hw_ctx_changed = true;
174 }
175
176 void
177 ilo_render_invalidate_builder(struct ilo_render *render)
178 {
179 render->batch_bo_changed = true;
180 render->state_bo_changed = true;
181 render->instruction_bo_changed = true;
182
183 /* Kernel flushes everything. Shouldn't we set all bits here? */
184 render->state.current_pipe_control_dw1 = 0;
185 }
186
187 /**
188 * Return the command length of ilo_render_emit_flush().
189 */
190 int
191 ilo_render_get_flush_len(const struct ilo_render *render)
192 {
193 int len;
194
195 ILO_DEV_ASSERT(render->dev, 6, 7.5);
196
197 len = GEN6_PIPE_CONTROL__SIZE;
198
199 /* plus gen6_wa_pre_pipe_control() */
200 if (ilo_dev_gen(render->dev) == ILO_GEN(6))
201 len *= 3;
202
203 return len;
204 }
205
206 /**
207 * Emit PIPE_CONTROLs to flush all caches.
208 */
209 void
210 ilo_render_emit_flush(struct ilo_render *render)
211 {
212 const uint32_t dw1 = GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
213 GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
214 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
215 GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE |
216 GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
217 GEN6_PIPE_CONTROL_CS_STALL;
218
219 ILO_DEV_ASSERT(render->dev, 6, 7.5);
220
221 if (ilo_dev_gen(render->dev) == ILO_GEN(6))
222 gen6_wa_pre_pipe_control(render, dw1);
223
224 gen6_PIPE_CONTROL(render->builder, dw1, NULL, 0, false);
225
226 render->state.current_pipe_control_dw1 |= dw1;
227 render->state.deferred_pipe_control_dw1 &= ~dw1;
228 }
229
230 /**
231 * Return the command length of ilo_render_emit_query().
232 */
233 int
234 ilo_render_get_query_len(const struct ilo_render *render,
235 unsigned query_type)
236 {
237 int len;
238
239 ILO_DEV_ASSERT(render->dev, 6, 7.5);
240
241 switch (query_type) {
242 case PIPE_QUERY_OCCLUSION_COUNTER:
243 len = GEN6_PIPE_CONTROL__SIZE;
244 if (ilo_dev_gen(render->dev) == ILO_GEN(6))
245 len *= 3;
246 break;
247 case PIPE_QUERY_TIMESTAMP:
248 case PIPE_QUERY_TIME_ELAPSED:
249 len = GEN6_PIPE_CONTROL__SIZE;
250 if (ilo_dev_gen(render->dev) == ILO_GEN(6))
251 len *= 2;
252 break;
253 case PIPE_QUERY_PRIMITIVES_GENERATED:
254 case PIPE_QUERY_PRIMITIVES_EMITTED:
255 len = GEN6_PIPE_CONTROL__SIZE;
256 if (ilo_dev_gen(render->dev) == ILO_GEN(6))
257 len *= 3;
258
259 len += GEN6_MI_STORE_REGISTER_MEM__SIZE * 2;
260 break;
261 case PIPE_QUERY_PIPELINE_STATISTICS:
262 if (ilo_dev_gen(render->dev) >= ILO_GEN(7)) {
263 const int num_regs = 10;
264 const int num_pads = 1;
265
266 len = GEN6_PIPE_CONTROL__SIZE +
267 GEN6_MI_STORE_REGISTER_MEM__SIZE * 2 * num_regs +
268 GEN6_MI_STORE_DATA_IMM__SIZE * num_pads;
269 } else {
270 const int num_regs = 8;
271 const int num_pads = 3;
272
273 len = GEN6_PIPE_CONTROL__SIZE * 3 +
274 GEN6_MI_STORE_REGISTER_MEM__SIZE * 2 * num_regs +
275 GEN6_MI_STORE_DATA_IMM__SIZE * num_pads;
276 }
277 break;
278 default:
279 len = 0;
280 break;
281 }
282
283 return len;
284 }
285
286 /**
287 * Emit PIPE_CONTROLs or MI_STORE_REGISTER_MEMs to store register values.
288 */
289 void
290 ilo_render_emit_query(struct ilo_render *render,
291 struct ilo_query *q, uint32_t offset)
292 {
293 const uint32_t pipeline_statistics_regs[11] = {
294 GEN6_REG_IA_VERTICES_COUNT,
295 GEN6_REG_IA_PRIMITIVES_COUNT,
296 GEN6_REG_VS_INVOCATION_COUNT,
297 GEN6_REG_GS_INVOCATION_COUNT,
298 GEN6_REG_GS_PRIMITIVES_COUNT,
299 GEN6_REG_CL_INVOCATION_COUNT,
300 GEN6_REG_CL_PRIMITIVES_COUNT,
301 GEN6_REG_PS_INVOCATION_COUNT,
302 (ilo_dev_gen(render->dev) >= ILO_GEN(7)) ?
303 GEN7_REG_HS_INVOCATION_COUNT : 0,
304 (ilo_dev_gen(render->dev) >= ILO_GEN(7)) ?
305 GEN7_REG_DS_INVOCATION_COUNT : 0,
306 0,
307 };
308 const uint32_t primitives_generated_reg =
309 (ilo_dev_gen(render->dev) >= ILO_GEN(7) && q->index > 0) ?
310 GEN7_REG_SO_PRIM_STORAGE_NEEDED(q->index) :
311 GEN6_REG_CL_INVOCATION_COUNT;
312 const uint32_t primitives_emitted_reg =
313 (ilo_dev_gen(render->dev) >= ILO_GEN(7)) ?
314 GEN7_REG_SO_NUM_PRIMS_WRITTEN(q->index) :
315 GEN6_REG_SO_NUM_PRIMS_WRITTEN;
316 const uint32_t *regs;
317 int reg_count = 0, i;
318 uint32_t pipe_control_dw1 = 0;
319
320 ILO_DEV_ASSERT(render->dev, 6, 7.5);
321
322 switch (q->type) {
323 case PIPE_QUERY_OCCLUSION_COUNTER:
324 pipe_control_dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
325 GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT;
326 break;
327 case PIPE_QUERY_TIMESTAMP:
328 case PIPE_QUERY_TIME_ELAPSED:
329 pipe_control_dw1 = GEN6_PIPE_CONTROL_WRITE_TIMESTAMP;
330 break;
331 case PIPE_QUERY_PRIMITIVES_GENERATED:
332 regs = &primitives_generated_reg;
333 reg_count = 1;
334 break;
335 case PIPE_QUERY_PRIMITIVES_EMITTED:
336 regs = &primitives_emitted_reg;
337 reg_count = 1;
338 break;
339 case PIPE_QUERY_PIPELINE_STATISTICS:
340 regs = pipeline_statistics_regs;
341 reg_count = Elements(pipeline_statistics_regs);
342 break;
343 default:
344 break;
345 }
346
347 if (pipe_control_dw1) {
348 if (ilo_dev_gen(render->dev) == ILO_GEN(6))
349 gen6_wa_pre_pipe_control(render, pipe_control_dw1);
350
351 gen6_PIPE_CONTROL(render->builder, pipe_control_dw1,
352 q->bo, offset, true);
353
354 render->state.current_pipe_control_dw1 |= pipe_control_dw1;
355 render->state.deferred_pipe_control_dw1 &= ~pipe_control_dw1;
356 }
357
358 if (!reg_count)
359 return;
360
361 ilo_render_emit_flush(render);
362
363 for (i = 0; i < reg_count; i++) {
364 if (regs[i]) {
365 /* store lower 32 bits */
366 gen6_MI_STORE_REGISTER_MEM(render->builder, q->bo, offset, regs[i]);
367 /* store higher 32 bits */
368 gen6_MI_STORE_REGISTER_MEM(render->builder, q->bo,
369 offset + 4, regs[i] + 4);
370 } else {
371 gen6_MI_STORE_DATA_IMM(render->builder, q->bo, offset, 0, true);
372 }
373
374 offset += 8;
375 }
376 }