* Keith Whitwell <keith@tungstengraphics.com>
*/
+#include <pthread.h>
#include "main/imports.h"
#include "main/enums.h"
#include "main/shaderobj.h"
#include "program/program.h"
#include "program/programopt.h"
#include "tnl/tnl.h"
-#include "talloc.h"
+#include "glsl/ralloc.h"
#include "brw_context.h"
#include "brw_wm.h"
+static unsigned
+get_new_program_id(struct intel_screen *screen)
+{
+ static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
+ pthread_mutex_lock(&m);
+ unsigned id = screen->program_id++;
+ pthread_mutex_unlock(&m);
+ return id;
+}
+
static void brwBindProgram( struct gl_context *ctx,
GLenum target,
struct gl_program *prog )
case GL_VERTEX_PROGRAM_ARB:
brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
break;
+ case MESA_GEOMETRY_PROGRAM:
+ brw->state.dirty.brw |= BRW_NEW_GEOMETRY_PROGRAM;
+ break;
case GL_FRAGMENT_PROGRAM_ARB:
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
break;
case GL_VERTEX_PROGRAM_ARB: {
struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
if (prog) {
- prog->id = brw->program_id++;
+ prog->id = get_new_program_id(brw->intelScreen);
return _mesa_init_vertex_program( ctx, &prog->program,
target, id );
case GL_FRAGMENT_PROGRAM_ARB: {
struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
if (prog) {
- prog->id = brw->program_id++;
+ prog->id = get_new_program_id(brw->intelScreen);
return _mesa_init_fragment_program( ctx, &prog->program,
target, id );
return NULL;
}
+ case MESA_GEOMETRY_PROGRAM: {
+ struct brw_geometry_program *prog = CALLOC_STRUCT(brw_geometry_program);
+ if (prog) {
+ prog->id = get_new_program_id(brw->intelScreen);
+
+ return _mesa_init_geometry_program(ctx, &prog->program, target, id);
+ } else {
+ return NULL;
+ }
+ }
+
default:
- return _mesa_new_program(ctx, target, id);
+ assert(!"Unsupported target in brwNewProgram()");
+ return NULL;
}
}
}
-static GLboolean brwIsProgramNative( struct gl_context *ctx,
- GLenum target,
- struct gl_program *prog )
+static GLboolean
+brwIsProgramNative(struct gl_context *ctx,
+ GLenum target,
+ struct gl_program *prog)
{
- return GL_TRUE;
+ return true;
}
-static void
-shader_error(struct gl_context *ctx, struct gl_program *prog, const char *msg)
-{
- struct gl_shader_program *shader;
-
- shader = _mesa_lookup_shader_program(ctx, prog->Id);
-
- if (shader) {
- shader->InfoLog = talloc_strdup_append(shader->InfoLog, msg);
- shader->LinkStatus = GL_FALSE;
- }
-}
-
-static GLboolean brwProgramStringNotify( struct gl_context *ctx,
- GLenum target,
- struct gl_program *prog )
+static GLboolean
+brwProgramStringNotify(struct gl_context *ctx,
+ GLenum target,
+ struct gl_program *prog)
{
struct brw_context *brw = brw_context(ctx);
- int i;
- if (target == GL_FRAGMENT_PROGRAM_ARB) {
+ switch (target) {
+ case GL_FRAGMENT_PROGRAM_ARB: {
struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
struct brw_fragment_program *newFP = brw_fragment_program(fprog);
const struct brw_fragment_program *curFP =
brw_fragment_program_const(brw->fragment_program);
- struct gl_shader_program *shader_program;
-
- if (fprog->FogOption) {
- _mesa_append_fog_code(ctx, fprog);
- fprog->FogOption = GL_NONE;
- }
if (newFP == curFP)
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
- newFP->id = brw->program_id++;
-
- /* Don't reject fragment shaders for their Mesa IR state when we're
- * using the new FS backend.
- */
- shader_program = _mesa_lookup_shader_program(ctx, prog->Id);
- if (shader_program
- && shader_program->_LinkedShaders[MESA_SHADER_FRAGMENT]) {
- return GL_TRUE;
- }
+ newFP->id = get_new_program_id(brw->intelScreen);
+ break;
}
- else if (target == GL_VERTEX_PROGRAM_ARB) {
+ case GL_VERTEX_PROGRAM_ARB: {
struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
struct brw_vertex_program *newVP = brw_vertex_program(vprog);
const struct brw_vertex_program *curVP =
if (newVP->program.IsPositionInvariant) {
_mesa_insert_mvp_code(ctx, &newVP->program);
}
- newVP->id = brw->program_id++;
+ newVP->id = get_new_program_id(brw->intelScreen);
/* Also tell tnl about it:
*/
_tnl_program_string(ctx, target, prog);
+ break;
+ }
+ default:
+ /*
+ * driver->ProgramStringNotify is only called for ARB programs, fixed
+ * function vertex programs, and ir_to_mesa (which isn't used by the
+ * i965 back-end). Therefore, even after geometry shaders are added,
+ * this function should only ever be called with a target of
+ * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
+ */
+ assert(!"Unexpected target in brwProgramStringNotify");
+ break;
}
- /* Reject programs with subroutines, which are totally broken at the moment
- * (all program flows return when any program flow returns, and
- * the VS also hangs if a function call calls a function.
- *
- * See piglit glsl-{vs,fs}-functions-[23] tests.
- */
- for (i = 0; i < prog->NumInstructions; i++) {
- struct prog_instruction *inst = prog->Instructions + i;
- int r;
-
- if (prog->Instructions[i].Opcode == OPCODE_CAL) {
- shader_error(ctx, prog,
- "i965 driver doesn't yet support uninlined function "
- "calls. Move to using a single return statement at "
- "the end of the function to work around it.\n");
- return GL_FALSE;
- }
+ brw_add_texrect_params(prog);
- if (prog->Instructions[i].Opcode == OPCODE_RET) {
- shader_error(ctx, prog,
- "i965 driver doesn't yet support \"return\" "
- "from main().\n");
- return GL_FALSE;
- }
+ return true;
+}
- for (r = 0; r < _mesa_num_inst_src_regs(inst->Opcode); r++) {
- if (prog->Instructions[i].SrcReg[r].RelAddr &&
- prog->Instructions[i].SrcReg[r].File == PROGRAM_INPUT) {
- shader_error(ctx, prog,
- "Variable indexing of shader inputs unsupported\n");
- return GL_FALSE;
- }
- }
+void
+brw_add_texrect_params(struct gl_program *prog)
+{
+ for (int texunit = 0; texunit < BRW_MAX_TEX_UNIT; texunit++) {
+ if (!(prog->TexturesUsed[texunit] & (1 << TEXTURE_RECT_INDEX)))
+ continue;
+
+ int tokens[STATE_LENGTH] = {
+ STATE_INTERNAL,
+ STATE_TEXRECT_SCALE,
+ texunit,
+ 0,
+ 0
+ };
+
+ _mesa_add_state_reference(prog->Parameters, (gl_state_index *)tokens);
+ }
+}
- if (target == GL_FRAGMENT_PROGRAM_ARB &&
- prog->Instructions[i].DstReg.RelAddr &&
- prog->Instructions[i].DstReg.File == PROGRAM_OUTPUT) {
- shader_error(ctx, prog,
- "Variable indexing of FS outputs unsupported\n");
- return GL_FALSE;
- }
- if (target == GL_FRAGMENT_PROGRAM_ARB) {
- if ((prog->Instructions[i].DstReg.RelAddr &&
- prog->Instructions[i].DstReg.File == PROGRAM_TEMPORARY) ||
- (prog->Instructions[i].SrcReg[0].RelAddr &&
- prog->Instructions[i].SrcReg[0].File == PROGRAM_TEMPORARY) ||
- (prog->Instructions[i].SrcReg[1].RelAddr &&
- prog->Instructions[i].SrcReg[1].File == PROGRAM_TEMPORARY) ||
- (prog->Instructions[i].SrcReg[2].RelAddr &&
- prog->Instructions[i].SrcReg[2].File == PROGRAM_TEMPORARY)) {
- shader_error(ctx, prog,
- "Variable indexing of variable arrays in the FS "
- "unsupported\n");
- return GL_FALSE;
- }
- }
+/* Per-thread scratch space is a power-of-two multiple of 1KB. */
+int
+brw_get_scratch_size(int size)
+{
+ int i;
+
+ for (i = 1024; i < size; i *= 2)
+ ;
+
+ return i;
+}
+
+void
+brw_get_scratch_bo(struct brw_context *brw,
+ drm_intel_bo **scratch_bo, int size)
+{
+ drm_intel_bo *old_bo = *scratch_bo;
+
+ if (old_bo && old_bo->size < size) {
+ drm_intel_bo_unreference(old_bo);
+ old_bo = NULL;
}
- return GL_TRUE;
+ if (!old_bo) {
+ *scratch_bo = drm_intel_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
+ }
}
void brwInitFragProgFuncs( struct dd_function_table *functions )
functions->NewShader = brw_new_shader;
functions->NewShaderProgram = brw_new_shader_program;
- functions->CompileShader = brw_compile_shader;
functions->LinkShader = brw_link_shader;
}
+void
+brw_init_shader_time(struct brw_context *brw)
+{
+ const int max_entries = 4096;
+ brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time",
+ max_entries * SHADER_TIME_STRIDE,
+ 4096);
+ brw->shader_time.shader_programs = rzalloc_array(brw, struct gl_shader_program *,
+ max_entries);
+ brw->shader_time.programs = rzalloc_array(brw, struct gl_program *,
+ max_entries);
+ brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
+ max_entries);
+ brw->shader_time.cumulative = rzalloc_array(brw, uint64_t,
+ max_entries);
+ brw->shader_time.max_entries = max_entries;
+}
+
+static int
+compare_time(const void *a, const void *b)
+{
+ uint64_t * const *a_val = a;
+ uint64_t * const *b_val = b;
+
+ /* We don't just subtract because we're turning the value to an int. */
+ if (**a_val < **b_val)
+ return -1;
+ else if (**a_val == **b_val)
+ return 0;
+ else
+ return 1;
+}
+
+static void
+get_written_and_reset(struct brw_context *brw, int i,
+ uint64_t *written, uint64_t *reset)
+{
+ enum shader_time_shader_type type = brw->shader_time.types[i];
+ assert(type == ST_VS || type == ST_FS8 || type == ST_FS16);
+
+ /* Find where we recorded written and reset. */
+ int wi, ri;
+
+ for (wi = i; brw->shader_time.types[wi] != type + 1; wi++)
+ ;
+
+ for (ri = i; brw->shader_time.types[ri] != type + 2; ri++)
+ ;
+
+ *written = brw->shader_time.cumulative[wi];
+ *reset = brw->shader_time.cumulative[ri];
+}
+
+static void
+print_shader_time_line(const char *stage, const char *name,
+ int shader_num, uint64_t time, uint64_t total)
+{
+ printf("%-6s%-6s", stage, name);
+
+ if (shader_num != -1)
+ printf("%4d: ", shader_num);
+ else
+ printf(" : ");
+
+ printf("%16lld (%7.2f Gcycles) %4.1f%%\n",
+ (long long)time,
+ (double)time / 1000000000.0,
+ (double)time / total * 100.0);
+}
+
+static void
+brw_report_shader_time(struct brw_context *brw)
+{
+ if (!brw->shader_time.bo || !brw->shader_time.num_entries)
+ return;
+
+ uint64_t scaled[brw->shader_time.num_entries];
+ uint64_t *sorted[brw->shader_time.num_entries];
+ uint64_t total_by_type[ST_FS16 + 1];
+ memset(total_by_type, 0, sizeof(total_by_type));
+ double total = 0;
+ for (int i = 0; i < brw->shader_time.num_entries; i++) {
+ uint64_t written = 0, reset = 0;
+ enum shader_time_shader_type type = brw->shader_time.types[i];
+
+ sorted[i] = &scaled[i];
+
+ switch (type) {
+ case ST_VS_WRITTEN:
+ case ST_VS_RESET:
+ case ST_FS8_WRITTEN:
+ case ST_FS8_RESET:
+ case ST_FS16_WRITTEN:
+ case ST_FS16_RESET:
+ /* We'll handle these when along with the time. */
+ scaled[i] = 0;
+ continue;
+
+ case ST_VS:
+ case ST_FS8:
+ case ST_FS16:
+ get_written_and_reset(brw, i, &written, &reset);
+ break;
+
+ default:
+ /* I sometimes want to print things that aren't the 3 shader times.
+ * Just print the sum in that case.
+ */
+ written = 1;
+ reset = 0;
+ break;
+ }
+
+ uint64_t time = brw->shader_time.cumulative[i];
+ if (written) {
+ scaled[i] = time / written * (written + reset);
+ } else {
+ scaled[i] = time;
+ }
+
+ switch (type) {
+ case ST_VS:
+ case ST_FS8:
+ case ST_FS16:
+ total_by_type[type] += scaled[i];
+ break;
+ default:
+ break;
+ }
+
+ total += scaled[i];
+ }
+
+ if (total == 0) {
+ printf("No shader time collected yet\n");
+ return;
+ }
+
+ qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
+
+ printf("\n");
+ printf("type ID cycles spent %% of total\n");
+ for (int s = 0; s < brw->shader_time.num_entries; s++) {
+ const char *shader_name;
+ const char *stage;
+ /* Work back from the sorted pointers times to a time to print. */
+ int i = sorted[s] - scaled;
+
+ if (scaled[i] == 0)
+ continue;
+
+ int shader_num = -1;
+ if (brw->shader_time.shader_programs[i]) {
+ shader_num = brw->shader_time.shader_programs[i]->Name;
+
+ /* The fixed function fragment shader generates GLSL IR with a Name
+ * of 0, and nothing else does.
+ */
+ if (shader_num == 0 &&
+ (brw->shader_time.types[i] == ST_FS8 ||
+ brw->shader_time.types[i] == ST_FS16)) {
+ shader_name = "ff";
+ shader_num = -1;
+ } else {
+ shader_name = "glsl";
+ }
+ } else if (brw->shader_time.programs[i]) {
+ shader_num = brw->shader_time.programs[i]->Id;
+ if (shader_num == 0) {
+ shader_name = "ff";
+ shader_num = -1;
+ } else {
+ shader_name = "prog";
+ }
+ } else {
+ shader_name = "other";
+ }
+
+ switch (brw->shader_time.types[i]) {
+ case ST_VS:
+ stage = "vs";
+ break;
+ case ST_FS8:
+ stage = "fs8";
+ break;
+ case ST_FS16:
+ stage = "fs16";
+ break;
+ default:
+ stage = "other";
+ break;
+ }
+
+ print_shader_time_line(stage, shader_name, shader_num,
+ scaled[i], total);
+ }
+
+ printf("\n");
+ print_shader_time_line("total", "vs", -1, total_by_type[ST_VS], total);
+ print_shader_time_line("total", "fs8", -1, total_by_type[ST_FS8], total);
+ print_shader_time_line("total", "fs16", -1, total_by_type[ST_FS16], total);
+}
+
+static void
+brw_collect_shader_time(struct brw_context *brw)
+{
+ if (!brw->shader_time.bo)
+ return;
+
+ /* This probably stalls on the last rendering. We could fix that by
+ * delaying reading the reports, but it doesn't look like it's a big
+ * overhead compared to the cost of tracking the time in the first place.
+ */
+ drm_intel_bo_map(brw->shader_time.bo, true);
+
+ uint32_t *times = brw->shader_time.bo->virtual;
+
+ for (int i = 0; i < brw->shader_time.num_entries; i++) {
+ brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4];
+ }
+
+ /* Zero the BO out to clear it out for our next collection.
+ */
+ memset(times, 0, brw->shader_time.bo->size);
+ drm_intel_bo_unmap(brw->shader_time.bo);
+}
+
+void
+brw_collect_and_report_shader_time(struct brw_context *brw)
+{
+ brw_collect_shader_time(brw);
+
+ if (brw->shader_time.report_time == 0 ||
+ get_time() - brw->shader_time.report_time >= 1.0) {
+ brw_report_shader_time(brw);
+ brw->shader_time.report_time = get_time();
+ }
+}
+
+/**
+ * Chooses an index in the shader_time buffer and sets up tracking information
+ * for our printouts.
+ *
+ * Note that this holds on to references to the underlying programs, which may
+ * change their lifetimes compared to normal operation.
+ */
+int
+brw_get_shader_time_index(struct brw_context *brw,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog,
+ enum shader_time_shader_type type)
+{
+ struct gl_context *ctx = &brw->ctx;
+
+ int shader_time_index = brw->shader_time.num_entries++;
+ assert(shader_time_index < brw->shader_time.max_entries);
+ brw->shader_time.types[shader_time_index] = type;
+
+ _mesa_reference_shader_program(ctx,
+ &brw->shader_time.shader_programs[shader_time_index],
+ shader_prog);
+
+ _mesa_reference_program(ctx,
+ &brw->shader_time.programs[shader_time_index],
+ prog);
+
+ return shader_time_index;
+}
+
+void
+brw_destroy_shader_time(struct brw_context *brw)
+{
+ drm_intel_bo_unreference(brw->shader_time.bo);
+ brw->shader_time.bo = NULL;
+}