2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/imports.h"
34 #include "main/enums.h"
35 #include "main/shaderobj.h"
36 #include "program/prog_parameter.h"
37 #include "program/program.h"
38 #include "program/programopt.h"
40 #include "glsl/ralloc.h"
42 #include "brw_context.h"
46 get_new_program_id(struct intel_screen
*screen
)
48 static pthread_mutex_t m
= PTHREAD_MUTEX_INITIALIZER
;
49 pthread_mutex_lock(&m
);
50 unsigned id
= screen
->program_id
++;
51 pthread_mutex_unlock(&m
);
55 static void brwBindProgram( struct gl_context
*ctx
,
57 struct gl_program
*prog
)
59 struct brw_context
*brw
= brw_context(ctx
);
62 case GL_VERTEX_PROGRAM_ARB
:
63 brw
->state
.dirty
.brw
|= BRW_NEW_VERTEX_PROGRAM
;
65 case GL_FRAGMENT_PROGRAM_ARB
:
66 brw
->state
.dirty
.brw
|= BRW_NEW_FRAGMENT_PROGRAM
;
71 static struct gl_program
*brwNewProgram( struct gl_context
*ctx
,
75 struct brw_context
*brw
= brw_context(ctx
);
78 case GL_VERTEX_PROGRAM_ARB
: {
79 struct brw_vertex_program
*prog
= CALLOC_STRUCT(brw_vertex_program
);
81 prog
->id
= get_new_program_id(brw
->intel
.intelScreen
);
83 return _mesa_init_vertex_program( ctx
, &prog
->program
,
90 case GL_FRAGMENT_PROGRAM_ARB
: {
91 struct brw_fragment_program
*prog
= CALLOC_STRUCT(brw_fragment_program
);
93 prog
->id
= get_new_program_id(brw
->intel
.intelScreen
);
95 return _mesa_init_fragment_program( ctx
, &prog
->program
,
103 return _mesa_new_program(ctx
, target
, id
);
107 static void brwDeleteProgram( struct gl_context
*ctx
,
108 struct gl_program
*prog
)
110 _mesa_delete_program( ctx
, prog
);
115 brwIsProgramNative(struct gl_context
*ctx
,
117 struct gl_program
*prog
)
123 brwProgramStringNotify(struct gl_context
*ctx
,
125 struct gl_program
*prog
)
127 struct brw_context
*brw
= brw_context(ctx
);
129 if (target
== GL_FRAGMENT_PROGRAM_ARB
) {
130 struct gl_fragment_program
*fprog
= (struct gl_fragment_program
*) prog
;
131 struct brw_fragment_program
*newFP
= brw_fragment_program(fprog
);
132 const struct brw_fragment_program
*curFP
=
133 brw_fragment_program_const(brw
->fragment_program
);
136 brw
->state
.dirty
.brw
|= BRW_NEW_FRAGMENT_PROGRAM
;
137 newFP
->id
= get_new_program_id(brw
->intel
.intelScreen
);
139 else if (target
== GL_VERTEX_PROGRAM_ARB
) {
140 struct gl_vertex_program
*vprog
= (struct gl_vertex_program
*) prog
;
141 struct brw_vertex_program
*newVP
= brw_vertex_program(vprog
);
142 const struct brw_vertex_program
*curVP
=
143 brw_vertex_program_const(brw
->vertex_program
);
146 brw
->state
.dirty
.brw
|= BRW_NEW_VERTEX_PROGRAM
;
147 if (newVP
->program
.IsPositionInvariant
) {
148 _mesa_insert_mvp_code(ctx
, &newVP
->program
);
150 newVP
->id
= get_new_program_id(brw
->intel
.intelScreen
);
152 /* Also tell tnl about it:
154 _tnl_program_string(ctx
, target
, prog
);
157 brw_add_texrect_params(prog
);
163 brw_add_texrect_params(struct gl_program
*prog
)
165 for (int texunit
= 0; texunit
< BRW_MAX_TEX_UNIT
; texunit
++) {
166 if (!(prog
->TexturesUsed
[texunit
] & (1 << TEXTURE_RECT_INDEX
)))
169 int tokens
[STATE_LENGTH
] = {
177 _mesa_add_state_reference(prog
->Parameters
, (gl_state_index
*)tokens
);
181 /* Per-thread scratch space is a power-of-two multiple of 1KB. */
183 brw_get_scratch_size(int size
)
187 for (i
= 1024; i
< size
; i
*= 2)
194 brw_get_scratch_bo(struct intel_context
*intel
,
195 drm_intel_bo
**scratch_bo
, int size
)
197 drm_intel_bo
*old_bo
= *scratch_bo
;
199 if (old_bo
&& old_bo
->size
< size
) {
200 drm_intel_bo_unreference(old_bo
);
205 *scratch_bo
= drm_intel_bo_alloc(intel
->bufmgr
, "scratch bo", size
, 4096);
209 void brwInitFragProgFuncs( struct dd_function_table
*functions
)
211 assert(functions
->ProgramStringNotify
== _tnl_program_string
);
213 functions
->BindProgram
= brwBindProgram
;
214 functions
->NewProgram
= brwNewProgram
;
215 functions
->DeleteProgram
= brwDeleteProgram
;
216 functions
->IsProgramNative
= brwIsProgramNative
;
217 functions
->ProgramStringNotify
= brwProgramStringNotify
;
219 functions
->NewShader
= brw_new_shader
;
220 functions
->NewShaderProgram
= brw_new_shader_program
;
221 functions
->LinkShader
= brw_link_shader
;
225 brw_init_shader_time(struct brw_context
*brw
)
227 struct intel_context
*intel
= &brw
->intel
;
229 const int max_entries
= 4096;
230 brw
->shader_time
.bo
= drm_intel_bo_alloc(intel
->bufmgr
, "shader time",
231 max_entries
* 4, 4096);
232 brw
->shader_time
.programs
= rzalloc_array(brw
, struct gl_shader_program
*,
234 brw
->shader_time
.types
= rzalloc_array(brw
, enum shader_time_shader_type
,
236 brw
->shader_time
.cumulative
= rzalloc_array(brw
, uint64_t,
238 brw
->shader_time
.max_entries
= max_entries
;
242 compare_time(const void *a
, const void *b
)
244 uint64_t * const *a_val
= a
;
245 uint64_t * const *b_val
= b
;
247 /* We don't just subtract because we're turning the value to an int. */
248 if (**a_val
< **b_val
)
250 else if (**a_val
== **b_val
)
257 get_written_and_reset(struct brw_context
*brw
, int i
,
258 uint64_t *written
, uint64_t *reset
)
260 enum shader_time_shader_type type
= brw
->shader_time
.types
[i
];
261 assert(type
== ST_VS
|| type
== ST_FS8
|| type
== ST_FS16
);
263 /* Find where we recorded written and reset. */
266 for (wi
= i
; brw
->shader_time
.types
[wi
] != type
+ 1; wi
++)
269 for (ri
= i
; brw
->shader_time
.types
[ri
] != type
+ 2; ri
++)
272 *written
= brw
->shader_time
.cumulative
[wi
];
273 *reset
= brw
->shader_time
.cumulative
[ri
];
277 print_shader_time_line(const char *name
, int shader_num
,
278 uint64_t time
, uint64_t total
)
281 for (int i
= strlen(name
); i
< 10; i
++)
283 printf("%4d: ", shader_num
);
285 printf("%16lld (%7.2f Gcycles) %4.1f%%\n",
287 (double)time
/ 1000000000.0,
288 (double)time
/ total
* 100.0);
292 brw_report_shader_time(struct brw_context
*brw
)
294 if (!brw
->shader_time
.bo
|| !brw
->shader_time
.num_entries
)
297 uint64_t scaled
[brw
->shader_time
.num_entries
];
298 uint64_t *sorted
[brw
->shader_time
.num_entries
];
299 uint64_t total_by_type
[ST_FS16
+ 1];
300 memset(total_by_type
, 0, sizeof(total_by_type
));
302 for (int i
= 0; i
< brw
->shader_time
.num_entries
; i
++) {
303 uint64_t written
= 0, reset
= 0;
304 enum shader_time_shader_type type
= brw
->shader_time
.types
[i
];
306 sorted
[i
] = &scaled
[i
];
313 case ST_FS16_WRITTEN
:
315 /* We'll handle these when along with the time. */
322 get_written_and_reset(brw
, i
, &written
, &reset
);
326 /* I sometimes want to print things that aren't the 3 shader times.
327 * Just print the sum in that case.
334 uint64_t time
= brw
->shader_time
.cumulative
[i
];
336 scaled
[i
] = time
/ written
* (written
+ reset
);
345 total_by_type
[type
] += scaled
[i
];
355 printf("No shader time collected yet\n");
359 qsort(sorted
, brw
->shader_time
.num_entries
, sizeof(sorted
[0]), compare_time
);
362 printf("type ID cycles spent %% of total\n");
363 for (int s
= 0; s
< brw
->shader_time
.num_entries
; s
++) {
364 /* Work back from the sorted pointers times to a time to print. */
365 int i
= sorted
[s
] - scaled
;
371 if (brw
->shader_time
.programs
[i
]) {
372 shader_num
= brw
->shader_time
.programs
[i
]->Name
;
375 switch (brw
->shader_time
.types
[i
]) {
377 print_shader_time_line("vs", shader_num
, scaled
[i
], total
);
380 print_shader_time_line("fs8", shader_num
, scaled
[i
], total
);
383 print_shader_time_line("fs16", shader_num
, scaled
[i
], total
);
386 print_shader_time_line("other", shader_num
, scaled
[i
], total
);
392 print_shader_time_line("total vs", -1, total_by_type
[ST_VS
], total
);
393 print_shader_time_line("total fs8", -1, total_by_type
[ST_FS8
], total
);
394 print_shader_time_line("total fs16", -1, total_by_type
[ST_FS16
], total
);
398 brw_collect_shader_time(struct brw_context
*brw
)
400 if (!brw
->shader_time
.bo
)
403 /* This probably stalls on the last rendering. We could fix that by
404 * delaying reading the reports, but it doesn't look like it's a big
405 * overhead compared to the cost of tracking the time in the first place.
407 drm_intel_bo_map(brw
->shader_time
.bo
, true);
409 uint32_t *times
= brw
->shader_time
.bo
->virtual;
411 for (int i
= 0; i
< brw
->shader_time
.num_entries
; i
++) {
412 brw
->shader_time
.cumulative
[i
] += times
[i
];
415 /* Zero the BO out to clear it out for our next collection.
417 memset(times
, 0, brw
->shader_time
.bo
->size
);
418 drm_intel_bo_unmap(brw
->shader_time
.bo
);
422 brw_collect_and_report_shader_time(struct brw_context
*brw
)
424 brw_collect_shader_time(brw
);
426 if (brw
->shader_time
.report_time
== 0 ||
427 get_time() - brw
->shader_time
.report_time
>= 1.0) {
428 brw_report_shader_time(brw
);
429 brw
->shader_time
.report_time
= get_time();
434 brw_destroy_shader_time(struct brw_context
*brw
)
436 drm_intel_bo_unreference(brw
->shader_time
.bo
);
437 brw
->shader_time
.bo
= NULL
;