i965: Move program_id to intel_screen instead of brw_context.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_program.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32 #include <pthread.h>
33 #include "main/imports.h"
34 #include "main/enums.h"
35 #include "main/shaderobj.h"
36 #include "program/prog_parameter.h"
37 #include "program/program.h"
38 #include "program/programopt.h"
39 #include "tnl/tnl.h"
40 #include "glsl/ralloc.h"
41
42 #include "brw_context.h"
43 #include "brw_wm.h"
44
45 static unsigned
46 get_new_program_id(struct intel_screen *screen)
47 {
48 static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
49 pthread_mutex_lock(&m);
50 unsigned id = screen->program_id++;
51 pthread_mutex_unlock(&m);
52 return id;
53 }
54
55 static void brwBindProgram( struct gl_context *ctx,
56 GLenum target,
57 struct gl_program *prog )
58 {
59 struct brw_context *brw = brw_context(ctx);
60
61 switch (target) {
62 case GL_VERTEX_PROGRAM_ARB:
63 brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
64 break;
65 case GL_FRAGMENT_PROGRAM_ARB:
66 brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
67 break;
68 }
69 }
70
71 static struct gl_program *brwNewProgram( struct gl_context *ctx,
72 GLenum target,
73 GLuint id )
74 {
75 struct brw_context *brw = brw_context(ctx);
76
77 switch (target) {
78 case GL_VERTEX_PROGRAM_ARB: {
79 struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
80 if (prog) {
81 prog->id = get_new_program_id(brw->intel.intelScreen);
82
83 return _mesa_init_vertex_program( ctx, &prog->program,
84 target, id );
85 }
86 else
87 return NULL;
88 }
89
90 case GL_FRAGMENT_PROGRAM_ARB: {
91 struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
92 if (prog) {
93 prog->id = get_new_program_id(brw->intel.intelScreen);
94
95 return _mesa_init_fragment_program( ctx, &prog->program,
96 target, id );
97 }
98 else
99 return NULL;
100 }
101
102 default:
103 return _mesa_new_program(ctx, target, id);
104 }
105 }
106
107 static void brwDeleteProgram( struct gl_context *ctx,
108 struct gl_program *prog )
109 {
110 _mesa_delete_program( ctx, prog );
111 }
112
113
114 static GLboolean
115 brwIsProgramNative(struct gl_context *ctx,
116 GLenum target,
117 struct gl_program *prog)
118 {
119 return true;
120 }
121
122 static GLboolean
123 brwProgramStringNotify(struct gl_context *ctx,
124 GLenum target,
125 struct gl_program *prog)
126 {
127 struct brw_context *brw = brw_context(ctx);
128
129 if (target == GL_FRAGMENT_PROGRAM_ARB) {
130 struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
131 struct brw_fragment_program *newFP = brw_fragment_program(fprog);
132 const struct brw_fragment_program *curFP =
133 brw_fragment_program_const(brw->fragment_program);
134
135 if (newFP == curFP)
136 brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
137 newFP->id = get_new_program_id(brw->intel.intelScreen);
138 }
139 else if (target == GL_VERTEX_PROGRAM_ARB) {
140 struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
141 struct brw_vertex_program *newVP = brw_vertex_program(vprog);
142 const struct brw_vertex_program *curVP =
143 brw_vertex_program_const(brw->vertex_program);
144
145 if (newVP == curVP)
146 brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
147 if (newVP->program.IsPositionInvariant) {
148 _mesa_insert_mvp_code(ctx, &newVP->program);
149 }
150 newVP->id = get_new_program_id(brw->intel.intelScreen);
151
152 /* Also tell tnl about it:
153 */
154 _tnl_program_string(ctx, target, prog);
155 }
156
157 brw_add_texrect_params(prog);
158
159 return true;
160 }
161
162 void
163 brw_add_texrect_params(struct gl_program *prog)
164 {
165 for (int texunit = 0; texunit < BRW_MAX_TEX_UNIT; texunit++) {
166 if (!(prog->TexturesUsed[texunit] & (1 << TEXTURE_RECT_INDEX)))
167 continue;
168
169 int tokens[STATE_LENGTH] = {
170 STATE_INTERNAL,
171 STATE_TEXRECT_SCALE,
172 texunit,
173 0,
174 0
175 };
176
177 _mesa_add_state_reference(prog->Parameters, (gl_state_index *)tokens);
178 }
179 }
180
181 /* Per-thread scratch space is a power-of-two multiple of 1KB. */
182 int
183 brw_get_scratch_size(int size)
184 {
185 int i;
186
187 for (i = 1024; i < size; i *= 2)
188 ;
189
190 return i;
191 }
192
193 void
194 brw_get_scratch_bo(struct intel_context *intel,
195 drm_intel_bo **scratch_bo, int size)
196 {
197 drm_intel_bo *old_bo = *scratch_bo;
198
199 if (old_bo && old_bo->size < size) {
200 drm_intel_bo_unreference(old_bo);
201 old_bo = NULL;
202 }
203
204 if (!old_bo) {
205 *scratch_bo = drm_intel_bo_alloc(intel->bufmgr, "scratch bo", size, 4096);
206 }
207 }
208
209 void brwInitFragProgFuncs( struct dd_function_table *functions )
210 {
211 assert(functions->ProgramStringNotify == _tnl_program_string);
212
213 functions->BindProgram = brwBindProgram;
214 functions->NewProgram = brwNewProgram;
215 functions->DeleteProgram = brwDeleteProgram;
216 functions->IsProgramNative = brwIsProgramNative;
217 functions->ProgramStringNotify = brwProgramStringNotify;
218
219 functions->NewShader = brw_new_shader;
220 functions->NewShaderProgram = brw_new_shader_program;
221 functions->LinkShader = brw_link_shader;
222 }
223
224 void
225 brw_init_shader_time(struct brw_context *brw)
226 {
227 struct intel_context *intel = &brw->intel;
228
229 const int max_entries = 4096;
230 brw->shader_time.bo = drm_intel_bo_alloc(intel->bufmgr, "shader time",
231 max_entries * 4, 4096);
232 brw->shader_time.programs = rzalloc_array(brw, struct gl_shader_program *,
233 max_entries);
234 brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
235 max_entries);
236 brw->shader_time.cumulative = rzalloc_array(brw, uint64_t,
237 max_entries);
238 brw->shader_time.max_entries = max_entries;
239 }
240
241 static int
242 compare_time(const void *a, const void *b)
243 {
244 uint64_t * const *a_val = a;
245 uint64_t * const *b_val = b;
246
247 /* We don't just subtract because we're turning the value to an int. */
248 if (**a_val < **b_val)
249 return -1;
250 else if (**a_val == **b_val)
251 return 0;
252 else
253 return 1;
254 }
255
256 static void
257 get_written_and_reset(struct brw_context *brw, int i,
258 uint64_t *written, uint64_t *reset)
259 {
260 enum shader_time_shader_type type = brw->shader_time.types[i];
261 assert(type == ST_VS || type == ST_FS8 || type == ST_FS16);
262
263 /* Find where we recorded written and reset. */
264 int wi, ri;
265
266 for (wi = i; brw->shader_time.types[wi] != type + 1; wi++)
267 ;
268
269 for (ri = i; brw->shader_time.types[ri] != type + 2; ri++)
270 ;
271
272 *written = brw->shader_time.cumulative[wi];
273 *reset = brw->shader_time.cumulative[ri];
274 }
275
276 static void
277 print_shader_time_line(const char *name, int shader_num,
278 uint64_t time, uint64_t total)
279 {
280 printf("%s", name);
281 for (int i = strlen(name); i < 10; i++)
282 printf(" ");
283 printf("%4d: ", shader_num);
284
285 printf("%16lld (%7.2f Gcycles) %4.1f%%\n",
286 (long long)time,
287 (double)time / 1000000000.0,
288 (double)time / total * 100.0);
289 }
290
291 static void
292 brw_report_shader_time(struct brw_context *brw)
293 {
294 if (!brw->shader_time.bo || !brw->shader_time.num_entries)
295 return;
296
297 uint64_t scaled[brw->shader_time.num_entries];
298 uint64_t *sorted[brw->shader_time.num_entries];
299 uint64_t total_by_type[ST_FS16 + 1];
300 memset(total_by_type, 0, sizeof(total_by_type));
301 double total = 0;
302 for (int i = 0; i < brw->shader_time.num_entries; i++) {
303 uint64_t written = 0, reset = 0;
304 enum shader_time_shader_type type = brw->shader_time.types[i];
305
306 sorted[i] = &scaled[i];
307
308 switch (type) {
309 case ST_VS_WRITTEN:
310 case ST_VS_RESET:
311 case ST_FS8_WRITTEN:
312 case ST_FS8_RESET:
313 case ST_FS16_WRITTEN:
314 case ST_FS16_RESET:
315 /* We'll handle these when along with the time. */
316 scaled[i] = 0;
317 continue;
318
319 case ST_VS:
320 case ST_FS8:
321 case ST_FS16:
322 get_written_and_reset(brw, i, &written, &reset);
323 break;
324
325 default:
326 /* I sometimes want to print things that aren't the 3 shader times.
327 * Just print the sum in that case.
328 */
329 written = 1;
330 reset = 0;
331 break;
332 }
333
334 uint64_t time = brw->shader_time.cumulative[i];
335 if (written) {
336 scaled[i] = time / written * (written + reset);
337 } else {
338 scaled[i] = time;
339 }
340
341 switch (type) {
342 case ST_VS:
343 case ST_FS8:
344 case ST_FS16:
345 total_by_type[type] += scaled[i];
346 break;
347 default:
348 break;
349 }
350
351 total += scaled[i];
352 }
353
354 if (total == 0) {
355 printf("No shader time collected yet\n");
356 return;
357 }
358
359 qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
360
361 printf("\n");
362 printf("type ID cycles spent %% of total\n");
363 for (int s = 0; s < brw->shader_time.num_entries; s++) {
364 /* Work back from the sorted pointers times to a time to print. */
365 int i = sorted[s] - scaled;
366
367 if (scaled[i] == 0)
368 continue;
369
370 int shader_num = -1;
371 if (brw->shader_time.programs[i]) {
372 shader_num = brw->shader_time.programs[i]->Name;
373 }
374
375 switch (brw->shader_time.types[i]) {
376 case ST_VS:
377 print_shader_time_line("vs", shader_num, scaled[i], total);
378 break;
379 case ST_FS8:
380 print_shader_time_line("fs8", shader_num, scaled[i], total);
381 break;
382 case ST_FS16:
383 print_shader_time_line("fs16", shader_num, scaled[i], total);
384 break;
385 default:
386 print_shader_time_line("other", shader_num, scaled[i], total);
387 break;
388 }
389 }
390
391 printf("\n");
392 print_shader_time_line("total vs", -1, total_by_type[ST_VS], total);
393 print_shader_time_line("total fs8", -1, total_by_type[ST_FS8], total);
394 print_shader_time_line("total fs16", -1, total_by_type[ST_FS16], total);
395 }
396
397 static void
398 brw_collect_shader_time(struct brw_context *brw)
399 {
400 if (!brw->shader_time.bo)
401 return;
402
403 /* This probably stalls on the last rendering. We could fix that by
404 * delaying reading the reports, but it doesn't look like it's a big
405 * overhead compared to the cost of tracking the time in the first place.
406 */
407 drm_intel_bo_map(brw->shader_time.bo, true);
408
409 uint32_t *times = brw->shader_time.bo->virtual;
410
411 for (int i = 0; i < brw->shader_time.num_entries; i++) {
412 brw->shader_time.cumulative[i] += times[i];
413 }
414
415 /* Zero the BO out to clear it out for our next collection.
416 */
417 memset(times, 0, brw->shader_time.bo->size);
418 drm_intel_bo_unmap(brw->shader_time.bo);
419 }
420
421 void
422 brw_collect_and_report_shader_time(struct brw_context *brw)
423 {
424 brw_collect_shader_time(brw);
425
426 if (brw->shader_time.report_time == 0 ||
427 get_time() - brw->shader_time.report_time >= 1.0) {
428 brw_report_shader_time(brw);
429 brw->shader_time.report_time = get_time();
430 }
431 }
432
433 void
434 brw_destroy_shader_time(struct brw_context *brw)
435 {
436 drm_intel_bo_unreference(brw->shader_time.bo);
437 brw->shader_time.bo = NULL;
438 }