From c163072197b56e76b656cc472bbe6df650cf11ba Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 28 Sep 2011 13:18:09 -0700 Subject: [PATCH] mesa: Create _mesa_bitcount_64() to replace i965's brw_count_bits() The i965 driver already had a function to count bits in a 64-bit uint (brw_count_bits()), but it was buggy (it only counted the bottom 32 bits) and it was clumsy (it had a strange and broken fallback for non-GCC-like compilers, which fortunately was never used). Since Mesa already has a _mesa_bitcount() function, it seems better to just create a _mesa_bitcount_64() function rather than special-case this in the i965 driver. This patch creates the new _mesa_bitcount_64() function and rewrites all of the old brw_count_bits() calls to refer to it. Reviewed-by: Ian Romanick Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_clip.c | 2 +- src/mesa/drivers/dri/i965/brw_curbe.c | 2 +- src/mesa/drivers/dri/i965/brw_gs.c | 2 +- src/mesa/drivers/dri/i965/brw_sf.c | 2 +- src/mesa/drivers/dri/i965/brw_sf_emit.c | 4 ++-- src/mesa/drivers/dri/i965/brw_util.h | 9 +-------- src/mesa/drivers/dri/i965/brw_vs.c | 2 +- src/mesa/drivers/dri/i965/gen6_clip_state.c | 2 +- src/mesa/drivers/dri/i965/gen6_sf_state.c | 4 ++-- src/mesa/drivers/dri/i965/gen6_wm_state.c | 2 +- src/mesa/drivers/dri/i965/gen7_sf_state.c | 4 ++-- src/mesa/main/imports.c | 13 +++++++++++++ src/mesa/main/imports.h | 3 +++ 13 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 2eb6044e22c..2dcf9e5df2d 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -150,7 +150,7 @@ static void upload_clip_prog(struct brw_context *brw) key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); /* _NEW_TRANSFORM */ - key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); + key.nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); if (intel->gen == 5) key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 25c7e0908fd..0b7db0cf06f 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -66,7 +66,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) /* _NEW_TRANSFORM */ if (ctx->Transform.ClipPlanesEnabled) { - GLuint nr_planes = 6 + brw_count_bits(ctx->Transform.ClipPlanesEnabled); + GLuint nr_planes = 6 + _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); nr_clip_regs = (nr_planes * 4 + 15) / 16; } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 0a37485171e..b7304b97134 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -159,7 +159,7 @@ static void populate_key( struct brw_context *brw, } /* _NEW_TRANSFORM */ - key->nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); + key->nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); key->need_gs_prog = (intel->gen >= 6) ? 0 diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 4e0434addbf..f4a6f54d1e3 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -154,7 +154,7 @@ static void upload_sf_prog(struct brw_context *brw) } /* _NEW_TRANSFORM */ - key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); + key.nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); /* _NEW_POINT */ key.do_point_sprite = ctx->Point.PointSprite; diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index 532d3272934..ebcbf944db4 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -167,7 +167,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; struct brw_reg ip = brw_ip_reg(); - GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); + GLuint nr = _mesa_bitcount_64(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; if (!nr) @@ -206,7 +206,7 @@ static void do_flatshade_line( struct brw_sf_compile *c ) struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; struct brw_reg ip = brw_ip_reg(); - GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); + GLuint nr = _mesa_bitcount_64(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; if (!nr) diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h index 940a8715502..c0fc1330e4b 100644 --- a/src/mesa/drivers/dri/i965/brw_util.h +++ b/src/mesa/drivers/dri/i965/brw_util.h @@ -34,15 +34,8 @@ #define BRW_UTIL_H #include "main/mtypes.h" +#include "main/imports.h" -#ifdef __GNUC__ -#define brw_count_bits(v) __builtin_popcount(v) -#else -static inline GLuint brw_count_bits(uint64_t v) -{ - return _mesa_popcount(v>>32) + _mesa_popcount(v&0xffffffff); -} -#endif extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList); extern GLuint brw_translate_blend_factor( GLenum factor ); extern GLuint brw_translate_blend_equation( GLenum mode ); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 3a9b780382a..02e60dc20bf 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -287,7 +287,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) * the inputs it asks for, whether they are varying or not. */ key.program_string_id = vp->id; - key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); + key.nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); key.uses_clip_distance = vp->program.UsesClipDistance; key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL); diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index ffe2c53acfd..801b88fe047 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -46,7 +46,7 @@ brw_compute_userclip_flags(bool uses_clip_distance, * enable clipping planes 0 through n-1 in the hardware regardless of * which planes the user has selected. */ - return (1 << brw_count_bits(clip_planes_enabled)) - 1; + return (1 << _mesa_bitcount_64(clip_planes_enabled)) - 1; } } diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 3eca786814f..306a6c6f2ec 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -117,7 +117,7 @@ upload_sf_state(struct brw_context *brw) /* CACHE_NEW_VS_PROG */ GLbitfield64 vs_outputs_written = brw->vs.prog_data->outputs_written; /* BRW_NEW_FRAGMENT_PROGRAM */ - uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead); + uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead); uint32_t dw1, dw2, dw3, dw4, dw16, dw17; int i; /* _NEW_BUFFER */ @@ -129,7 +129,7 @@ upload_sf_state(struct brw_context *brw) int nr_userclip; /* _NEW_TRANSFORM */ - nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); + nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); brw_compute_vue_map(&vue_map, intel, nr_userclip, vs_outputs_written); urb_entry_read_length = (vue_map.num_slots + 1)/2 - urb_entry_read_offset; diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 07e9995f53b..00fadb98f1b 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -180,7 +180,7 @@ upload_wm_state(struct brw_context *brw) dw6 |= GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; - dw6 |= brw_count_bits(brw->fragment_program->Base.InputsRead) << + dw6 |= _mesa_bitcount_64(brw->fragment_program->Base.InputsRead) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT; BEGIN_BATCH(9); diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 75dc6dab4ca..c40abe4d4bc 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -38,13 +38,13 @@ upload_sbe_state(struct brw_context *brw) /* CACHE_NEW_VS_PROG */ GLbitfield64 vs_outputs_written = brw->vs.prog_data->outputs_written; /* BRW_NEW_FRAGMENT_PROGRAM */ - uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead); + uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead); uint32_t dw1, dw10, dw11; int i; int attr = 0, input_index = 0; /* _NEW_TRANSFORM */ int urb_entry_read_offset = 1; - int nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); + int nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); uint16_t attr_overrides[FRAG_ATTRIB_MAX]; brw_compute_vue_map(&vue_map, intel, nr_userclip, vs_outputs_written); diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index 8f097195922..345a1c53e2f 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -527,6 +527,19 @@ _mesa_bitcount(unsigned int n) } return bits; } + +/** + * Return number of bits set in given 64-bit uint. + */ +unsigned int +_mesa_bitcount_64(uint64_t n) +{ + unsigned int bits; + for (bits = 0; n > 0; n = n >> 1) { + bits += (n & 1); + } + return bits; +} #endif diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index 5fb5581efce..20fa148fe59 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -578,9 +578,12 @@ _mesa_init_sqrt_table(void); #if ((_GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) #define _mesa_bitcount(i) __builtin_popcount(i) +#define _mesa_bitcount_64(i) __builtin_popcountll(i) #else extern unsigned int _mesa_bitcount(unsigned int n); +extern unsigned int +_mesa_bitcount_64(uint64_t n); #endif #else -- 2.30.2