--- /dev/null
+
+
+void brw_buffer_subdata()
+{
+ if (intel->intelScreen->kernel_exec_fencing) {
+ drm_intel_gem_bo_map_gtt(bo);
+ memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
+ drm_intel_gem_bo_unmap_gtt(bo);
+ } else {
+ dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
+ }
+}
};
struct brw_cc_unit_key {
- GLboolean stencil, stencil_two_side, color_blend, alpha_enabled;
-
- GLenum stencil_func[2], stencil_fail_op[2];
- GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2];
- GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2];
- GLenum logic_op;
-
- GLenum blend_eq_rgb, blend_eq_a;
- GLenum blend_src_rgb, blend_src_a;
- GLenum blend_dst_rgb, blend_dst_a;
-
- GLenum alpha_func;
- GLclampf alpha_ref;
-
- GLboolean dither;
-
- GLboolean depth_test, depth_write;
- GLenum depth_func;
+ struct pipe_depth_stencil_alpha_state dsa;
+ struct pipe_blend_state blend; /* no color mask */
};
static void
cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
{
- GLcontext *ctx = &brw->intel.ctx;
- const unsigned back = ctx->Stencil._BackFace;
-
memset(key, 0, sizeof(*key));
+
+ key->dsa = brw->curr.dsa.base;
+ key->blend = brw->curr.blend.base;
- key->stencil = ctx->Stencil._Enabled;
- key->stencil_two_side = ctx->Stencil._TestTwoSide;
-
- if (key->stencil) {
- key->stencil_func[0] = ctx->Stencil.Function[0];
- key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0];
- key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0];
- key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0];
- key->stencil_ref[0] = ctx->Stencil.Ref[0];
- key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0];
- key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0];
- }
- if (key->stencil_two_side) {
- key->stencil_func[1] = ctx->Stencil.Function[back];
- key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back];
- key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back];
- key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back];
- key->stencil_ref[1] = ctx->Stencil.Ref[back];
- key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back];
- key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back];
- }
-
- if (ctx->Color._LogicOpEnabled)
- key->logic_op = ctx->Color.LogicOp;
- else
- key->logic_op = GL_COPY;
-
- key->color_blend = ctx->Color.BlendEnabled;
- if (key->color_blend) {
- key->blend_eq_rgb = ctx->Color.BlendEquationRGB;
- key->blend_eq_a = ctx->Color.BlendEquationA;
- key->blend_src_rgb = ctx->Color.BlendSrcRGB;
- key->blend_dst_rgb = ctx->Color.BlendDstRGB;
- key->blend_src_a = ctx->Color.BlendSrcA;
- key->blend_dst_a = ctx->Color.BlendDstA;
- }
-
- key->alpha_enabled = ctx->Color.AlphaEnabled;
- if (key->alpha_enabled) {
- key->alpha_func = ctx->Color.AlphaFunc;
- key->alpha_ref = ctx->Color.AlphaRef;
- }
-
- key->dither = ctx->Color.DitherFlag;
-
- key->depth_test = ctx->Depth.Test;
- if (key->depth_test) {
- key->depth_func = ctx->Depth.Func;
- key->depth_write = ctx->Depth.Mask;
- }
+ /* Clear non-respected values:
+ */
+ key->blend.colormask = 0xf;
}
/**
memset(&cc, 0, sizeof(cc));
- /* _NEW_STENCIL */
- if (key->stencil) {
- cc.cc0.stencil_enable = 1;
- cc.cc0.stencil_func =
- intel_translate_compare_func(key->stencil_func[0]);
- cc.cc0.stencil_fail_op =
- intel_translate_stencil_op(key->stencil_fail_op[0]);
- cc.cc0.stencil_pass_depth_fail_op =
- intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
- cc.cc0.stencil_pass_depth_pass_op =
- intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
- cc.cc1.stencil_ref = key->stencil_ref[0];
- cc.cc1.stencil_write_mask = key->stencil_write_mask[0];
- cc.cc1.stencil_test_mask = key->stencil_test_mask[0];
-
- if (key->stencil_two_side) {
- cc.cc0.bf_stencil_enable = 1;
- cc.cc0.bf_stencil_func =
- intel_translate_compare_func(key->stencil_func[1]);
- cc.cc0.bf_stencil_fail_op =
- intel_translate_stencil_op(key->stencil_fail_op[1]);
- cc.cc0.bf_stencil_pass_depth_fail_op =
- intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
- cc.cc0.bf_stencil_pass_depth_pass_op =
- intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
- cc.cc1.bf_stencil_ref = key->stencil_ref[1];
- cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1];
- cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1];
- }
-
- /* Not really sure about this:
- */
- if (key->stencil_write_mask[0] ||
- (key->stencil_two_side && key->stencil_write_mask[1]))
- cc.cc0.stencil_write_enable = 1;
- }
-
- /* _NEW_COLOR */
- if (key->logic_op != GL_COPY) {
- cc.cc2.logicop_enable = 1;
- cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op);
- } else if (key->color_blend) {
- GLenum eqRGB = key->blend_eq_rgb;
- GLenum eqA = key->blend_eq_a;
- GLenum srcRGB = key->blend_src_rgb;
- GLenum dstRGB = key->blend_dst_rgb;
- GLenum srcA = key->blend_src_a;
- GLenum dstA = key->blend_dst_a;
-
- if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
- srcRGB = dstRGB = GL_ONE;
- }
-
- if (eqA == GL_MIN || eqA == GL_MAX) {
- srcA = dstA = GL_ONE;
- }
-
- cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
- cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
- cc.cc6.blend_function = brw_translate_blend_equation(eqRGB);
-
- cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
- cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA);
- cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA);
-
- cc.cc3.blend_enable = 1;
- cc.cc3.ia_blend_enable = (srcA != srcRGB ||
- dstA != dstRGB ||
- eqA != eqRGB);
- }
-
- if (key->alpha_enabled) {
- cc.cc3.alpha_test = 1;
- cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func);
- cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
-
- UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref);
- }
-
- if (key->dither) {
- cc.cc5.dither_enable = 1;
- cc.cc6.y_dither_offset = 0;
- cc.cc6.x_dither_offset = 0;
- }
-
- /* _NEW_DEPTH */
- if (key->depth_test) {
- cc.cc2.depth_test = 1;
- cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func);
- cc.cc2.depth_write_enable = key->depth_write;
- }
+ cc.cc0 = brw->dsa.cc0;
+ cc.cc1 = brw->dsa.cc1;
+ cc.cc2 = brw->dsa.cc2;
+ cc.cc3 = brw->dsa.cc3 | brw->blend.cc3;
/* CACHE_NEW_CC_VP */
cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */
- if (INTEL_DEBUG & DEBUG_STATS)
- cc.cc5.statistics_enable = 1;
+ cc.cc5 = brw->blend.cc5 | brw->debug.cc5;
+
bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
key, sizeof(*key),
const struct brw_tracked_state brw_cc_unit = {
.dirty = {
- .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH,
+ .mesa = PIPE_NEW_DEPTH_STENCIL_ALPHA | PIPE_NEW_BLEND,
.brw = 0,
.cache = CACHE_NEW_CC_VP
},
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
+#include "pipe/p_state.h"
+
+#include "util/u_math.h"
#include "intel_batchbuffer.h"
delta += ATTR_SIZE;
}
- c.nr_attrs = brw_count_bits(c.key.attrs);
+ c.nr_attrs = util_count_bits(c.key.attrs);
if (BRW_IS_IGDNG(brw))
c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
* do all three:
*/
switch (key->primitive) {
- case GL_TRIANGLES:
+ case PIPE_PRIM_TRIANGLES:
if (key->do_unfilled)
brw_emit_unfilled_clip( &c );
else
brw_emit_tri_clip( &c );
break;
- case GL_LINES:
+ case PIPE_PRIM_LINES:
brw_emit_line_clip( &c );
break;
- case GL_POINTS:
+ case PIPE_PRIM_POINTS:
brw_emit_point_clip( &c );
break;
default:
*/
static void upload_clip_prog(struct brw_context *brw)
{
- GLcontext *ctx = &brw->intel.ctx;
struct brw_clip_prog_key key;
memset(&key, 0, sizeof(key));
/* Populate the key:
*/
/* BRW_NEW_REDUCED_PRIMITIVE */
- key.primitive = brw->intel.reduced_primitive;
+ key.primitive = brw->reduced_primitive;
/* CACHE_NEW_VS_PROG */
key.attrs = brw->vs.prog_data->outputs_written;
- /* _NEW_LIGHT */
- key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
- /* _NEW_TRANSFORM */
- key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
+ /* PIPE_NEW_RAST */
+ key.do_flat_shading = brw->rast.base.flatshade;
+ /* PIPE_NEW_UCP */
+ key.nr_userclip = brw->nr_ucp;
if (BRW_IS_IGDNG(brw))
key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
else
key.clip_mode = BRW_CLIPMODE_NORMAL;
- /* _NEW_POLYGON */
- if (key.primitive == GL_TRIANGLES) {
- if (ctx->Polygon.CullFlag &&
- ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+ /* PIPE_NEW_RAST */
+ if (key.primitive == PIPE_PRIM_TRIANGLES) {
+ if (brw->rast->cull_mode = PIPE_WINDING_BOTH)
key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
else {
- GLuint fill_front = CLIP_CULL;
- GLuint fill_back = CLIP_CULL;
- GLuint offset_front = 0;
- GLuint offset_back = 0;
-
- if (!ctx->Polygon.CullFlag ||
- ctx->Polygon.CullFaceMode != GL_FRONT) {
- switch (ctx->Polygon.FrontMode) {
- case GL_FILL:
- fill_front = CLIP_FILL;
- offset_front = 0;
- break;
- case GL_LINE:
- fill_front = CLIP_LINE;
- offset_front = ctx->Polygon.OffsetLine;
- break;
- case GL_POINT:
- fill_front = CLIP_POINT;
- offset_front = ctx->Polygon.OffsetPoint;
- break;
- }
+ key.fill_ccw = CLIP_CULL;
+ key.fill_cw = CLIP_CULL;
+
+ if (!(brw->rast->cull_mode & PIPE_WINDING_CCW)) {
+ key.fill_ccw = translate_fill(brw->rast.fill_ccw);
}
- if (!ctx->Polygon.CullFlag ||
- ctx->Polygon.CullFaceMode != GL_BACK) {
- switch (ctx->Polygon.BackMode) {
- case GL_FILL:
- fill_back = CLIP_FILL;
- offset_back = 0;
- break;
- case GL_LINE:
- fill_back = CLIP_LINE;
- offset_back = ctx->Polygon.OffsetLine;
- break;
- case GL_POINT:
- fill_back = CLIP_POINT;
- offset_back = ctx->Polygon.OffsetPoint;
- break;
- }
+ if (!(brw->rast->cull_mode & PIPE_WINDING_CW)) {
+ key.fill_cw = translate_fill(brw->rast.fill_cw);
}
- if (ctx->Polygon.BackMode != GL_FILL ||
- ctx->Polygon.FrontMode != GL_FILL) {
+ if (key.fill_cw != CLIP_FILL ||
+ key.fill_ccw != CLIP_FILL) {
key.do_unfilled = 1;
-
- /* Most cases the fixed function units will handle. Cases where
- * one or more polygon faces are unfilled will require help:
- */
key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
+ }
+
+ key.offset_ccw = brw->rast.offset_ccw;
+ key.offset_cw = brw->rast.offset_cw;
+
+ if (brw->rast.light_twoside &&
+ key.fill_cw != CLIP_CULL)
+ key.copy_bfc_cw = 1;
- if (offset_back || offset_front) {
- /* _NEW_POLYGON, _NEW_BUFFERS */
- key.offset_units = ctx->Polygon.OffsetUnits * brw->intel.polygon_offset_scale;
- key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD;
- }
-
- switch (ctx->Polygon.FrontFace) {
- case GL_CCW:
- key.fill_ccw = fill_front;
- key.fill_cw = fill_back;
- key.offset_ccw = offset_front;
- key.offset_cw = offset_back;
- if (ctx->Light.Model.TwoSide &&
- key.fill_cw != CLIP_CULL)
- key.copy_bfc_cw = 1;
- break;
- case GL_CW:
- key.fill_cw = fill_front;
- key.fill_ccw = fill_back;
- key.offset_cw = offset_front;
- key.offset_ccw = offset_back;
- if (ctx->Light.Model.TwoSide &&
- key.fill_ccw != CLIP_CULL)
- key.copy_bfc_ccw = 1;
- break;
- }
+ if (brw->rast.light_twoside &&
+ key.fill_ccw != CLIP_CULL)
+ key.copy_bfc_ccw = 1;
}
}
}
const struct brw_tracked_state brw_clip_prog = {
.dirty = {
- .mesa = (_NEW_LIGHT |
- _NEW_TRANSFORM |
- _NEW_POLYGON |
- _NEW_BUFFERS),
+ .mesa = (PIPE_NEW_RAST |
+ PIPE_NEW_UCP),
.brw = (BRW_NEW_REDUCED_PRIMITIVE),
.cache = CACHE_NEW_VS_PROG
},
*/
struct brw_clip_prog_key {
GLuint attrs:32;
+
GLuint primitive:4;
GLuint nr_userclip:3;
GLuint do_flat_shading:1;
GLuint fill_ccw:2; /* includes cull information */
GLuint offset_cw:1;
GLuint offset_ccw:1;
- GLuint pad0:17;
-
GLuint copy_bfc_cw:1;
GLuint copy_bfc_ccw:1;
GLuint clip_mode:3;
- GLuint pad1:27;
+ GLuint pad1:12;
GLfloat offset_factor;
GLfloat offset_units;
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/program.h"
-
-#include "intel_batchbuffer.h"
-
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/program.h"
-
-#include "intel_batchbuffer.h"
-
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
-#include "main/macros.h"
struct brw_clip_unit_key {
unsigned int total_grf;
key->nr_urb_entries = brw->urb.nr_clip_entries;
key->urb_size = brw->urb.vsize;
- /* _NEW_TRANSOFORM */
- key->depth_clamp = ctx->Transform.DepthClamp;
+ /* */
+ key->depth_clamp = 0; // XXX: add this to gallium: ctx->Transform.DepthClamp;
}
static dri_bo *
const struct brw_tracked_state brw_clip_unit = {
.dirty = {
- .mesa = _NEW_TRANSFORM,
+ .mesa = 0,
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_CLIP_PROG
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/program.h"
-
-#include "intel_batchbuffer.h"
-
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/program.h"
-
#include "intel_batchbuffer.h"
#include "brw_defines.h"
*/
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/program.h"
-
-#include "intel_batchbuffer.h"
-
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "utils.h"
-/***************************************
- * Mesa's Driver Functions
- ***************************************/
-
-static void brwUseProgram(GLcontext *ctx, GLuint program)
-{
- _mesa_use_program(ctx, program);
-}
-
-static void brwInitProgFuncs( struct dd_function_table *functions )
-{
- functions->UseProgram = brwUseProgram;
-}
-static void brwInitDriverFunctions( struct dd_function_table *functions )
-{
- intelInitDriverFunctions( functions );
-
- brwInitFragProgFuncs( functions );
- brwInitProgFuncs( functions );
- brw_init_queryobj_functions(functions);
-
- functions->Viewport = intel_viewport;
-}
GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
__DRIcontextPrivate *driContextPriv,
void *sharedContextPrivate)
{
- struct dd_function_table functions;
struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
- struct intel_context *intel = &brw->intel;
- GLcontext *ctx = &intel->ctx;
if (!brw) {
- _mesa_printf("%s: failed to alloc context\n", __FUNCTION__);
- return GL_FALSE;
- }
-
- brwInitVtbl( brw );
- brwInitDriverFunctions( &functions );
-
- if (!intelInitContext( intel, mesaVis, driContextPriv,
- sharedContextPrivate, &functions )) {
- _mesa_printf("%s: failed to init intel context\n", __FUNCTION__);
- FREE(brw);
+ debug_printf("%s: failed to alloc context\n", __FUNCTION__);
return GL_FALSE;
}
- /* Initialize swrast, tnl driver tables: */
- intelInitSpanFuncs(ctx);
-
- TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
-
- ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
- ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
- ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits,
- ctx->Const.MaxTextureImageUnits);
- ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */
-
- /* Mesa limits textures to 4kx4k; it would be nice to fix that someday
- */
- ctx->Const.MaxTextureLevels = 13;
- ctx->Const.Max3DTextureLevels = 9;
- ctx->Const.MaxCubeTextureLevels = 12;
- ctx->Const.MaxTextureRectSize = (1<<12);
-
- ctx->Const.MaxTextureMaxAnisotropy = 16.0;
-
- /* if conformance mode is set, swrast can handle any size AA point */
- ctx->Const.MaxPointSizeAA = 255.0;
-
/* We want the GLSL compiler to emit code that uses condition codes */
ctx->Shader.EmitCondCodes = GL_TRUE;
ctx->Shader.EmitNVTempInitialization = GL_TRUE;
- ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024);
- ctx->Const.VertexProgram.MaxAluInstructions = 0;
- ctx->Const.VertexProgram.MaxTexInstructions = 0;
- ctx->Const.VertexProgram.MaxTexIndirections = 0;
- ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
- ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
- ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
- ctx->Const.VertexProgram.MaxNativeAttribs = 16;
- ctx->Const.VertexProgram.MaxNativeTemps = 256;
- ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
- ctx->Const.VertexProgram.MaxNativeParameters = 1024;
- ctx->Const.VertexProgram.MaxEnvParams =
- MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
- ctx->Const.VertexProgram.MaxEnvParams);
-
- ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024);
- ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024);
- ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024);
- ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024);
- ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
- ctx->Const.FragmentProgram.MaxNativeTemps = 256;
- ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
- ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
- ctx->Const.FragmentProgram.MaxEnvParams =
- MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
- ctx->Const.FragmentProgram.MaxEnvParams);
+ brw_init_query( brw );
brw_init_state( brw );
+ brw_draw_init( brw );
brw->state.dirty.mesa = ~0;
brw->state.dirty.brw = ~0;
brw->emit_state_always = 0;
- ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
- ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
-
make_empty_list(&brw->query.active_head);
- brw_draw_init( brw );
return GL_TRUE;
}
+/**
+ * called from intelDestroyContext()
+ */
+static void brw_destroy_context( struct intel_context *intel )
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+ int i;
+
+ brw_destroy_state(brw);
+ brw_draw_destroy( brw );
+
+ _mesa_free(brw->wm.compile_data);
+
+ for (i = 0; i < brw->state.nr_color_regions; i++)
+ intel_region_release(&brw->state.color_regions[i]);
+ brw->state.nr_color_regions = 0;
+ intel_region_release(&brw->state.depth_region);
+
+ dri_bo_unreference(brw->curbe.curbe_bo);
+ dri_bo_unreference(brw->vs.prog_bo);
+ dri_bo_unreference(brw->vs.state_bo);
+ dri_bo_unreference(brw->vs.bind_bo);
+ dri_bo_unreference(brw->gs.prog_bo);
+ dri_bo_unreference(brw->gs.state_bo);
+ dri_bo_unreference(brw->clip.prog_bo);
+ dri_bo_unreference(brw->clip.state_bo);
+ dri_bo_unreference(brw->clip.vp_bo);
+ dri_bo_unreference(brw->sf.prog_bo);
+ dri_bo_unreference(brw->sf.state_bo);
+ dri_bo_unreference(brw->sf.vp_bo);
+ for (i = 0; i < BRW_MAX_TEX_UNIT; i++)
+ dri_bo_unreference(brw->wm.sdc_bo[i]);
+ dri_bo_unreference(brw->wm.bind_bo);
+ for (i = 0; i < BRW_WM_MAX_SURF; i++)
+ dri_bo_unreference(brw->wm.surf_bo[i]);
+ dri_bo_unreference(brw->wm.sampler_bo);
+ dri_bo_unreference(brw->wm.prog_bo);
+ dri_bo_unreference(brw->wm.state_bo);
+ dri_bo_unreference(brw->cc.prog_bo);
+ dri_bo_unreference(brw->cc.state_bo);
+ dri_bo_unreference(brw->cc.vp_bo);
+}
* Handles blending and (presumably) depth and stencil testing.
*/
-#define BRW_FALLBACK_TEXTURE 0x1
#define BRW_MAX_CURBE (32*16)
struct brw_context;
*/
struct brw_context
{
- struct intel_context intel; /**< base class, must be first field */
GLuint primitive;
GLboolean emit_state_always;
- GLboolean tmp_fallback;
GLboolean no_batch_wrap;
struct {
/*======================================================================
* brw_queryobj.c
*/
-void brw_init_queryobj_functions(struct dd_function_table *functions);
+void brw_init_query(struct brw_context *brw);
void brw_prepare_query_begin(struct brw_context *brw);
void brw_emit_query_begin(struct brw_context *brw);
void brw_emit_query_end(struct brw_context *brw);
* macros used previously:
*/
static INLINE struct brw_context *
-brw_context( GLcontext *ctx )
+brw_context( struct pipe_context *ctx )
{
return (struct brw_context *)ctx;
}
*/
-
-#include "main/glheader.h"
-#include "main/context.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/prog_statevars.h"
#include "intel_batchbuffer.h"
#include "intel_regions.h"
#include "brw_context.h"
GLuint nr_clip_regs = 0;
GLuint total_regs;
- /* _NEW_TRANSFORM */
- if (ctx->Transform.ClipPlanesEnabled) {
- GLuint nr_planes = 6 + brw_count_bits(ctx->Transform.ClipPlanesEnabled);
+ /* PIPE_NEW_UCP */
+ if (brw->nr_ucp) {
+ GLuint nr_planes = 6 + brw->nr_ucp;
nr_clip_regs = (nr_planes * 4 + 15) / 16;
}
total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
- /* This can happen - what to do? Probably rather than falling
- * back, the best thing to do is emit programs which code the
- * constants as immediate values. Could do this either as a static
- * cap on WM and VS, or adaptively.
- *
- * Unfortunately, this is currently dependent on the results of the
- * program generation process (in the case of wm), so this would
- * introduce the need to re-generate programs in the event of a
- * curbe allocation failure.
- */
- /* Max size is 32 - just large enough to
- * hold the 128 parameters allowed by
- * the fragment and vertex program
- * api's. It's not clear what happens
- * when both VP and FP want to use 128
- * parameters, though.
+ /* When this is > 32, want to use a true constant buffer to hold
+ * the extra constants.
*/
assert(total_regs <= 32);
brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
brw->curbe.total_size = reg;
- if (0)
- _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
+ if (BRW_DEBUG & DEBUG_CURBE)
+ debug_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
brw->curbe.wm_start,
brw->curbe.wm_size,
brw->curbe.clip_start,
const struct brw_tracked_state brw_curbe_offsets = {
.dirty = {
- .mesa = _NEW_TRANSFORM,
+ .mesa = PIPE_NEW_UCP,
.brw = BRW_NEW_VERTEX_PROGRAM,
.cache = CACHE_NEW_WM_PROG
},
if (brw->curbe.wm_size) {
GLuint offset = brw->curbe.wm_start * 16;
- _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
+ /* map fs constant buffer */
/* copy float constants */
for (i = 0; i < brw->wm.prog_data->nr_params; i++)
buf[offset + i] = *brw->wm.prog_data->param[i];
+
+ /* unmap fs constant buffer */
}
buf[offset + i * 4 + 3] = fixed_plane[i][3];
}
- /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
- * clip-space:
+ /* Clip planes:
*/
- assert(MAX_CLIP_PLANES == 6);
- for (j = 0; j < MAX_CLIP_PLANES; j++) {
- if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
- buf[offset + i * 4 + 0] = ctx->Transform._ClipUserPlane[j][0];
- buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1];
- buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2];
- buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3];
- i++;
- }
+ assert(brw->nr_ucp <= 6);
+ for (j = 0; j < brw->nr_ucp; j++) {
+ buf[offset + i * 4 + 0] = brw->ucp[j][0];
+ buf[offset + i * 4 + 1] = brw->ucp[j][1];
+ buf[offset + i * 4 + 2] = brw->ucp[j][2];
+ buf[offset + i * 4 + 3] = brw->ucp[j][3];
+ i++;
}
}
GLuint offset = brw->curbe.vs_start * 16;
GLuint nr = brw->vs.prog_data->nr_params / 4;
- if (brw->vertex_program->IsNVProgram)
- _mesa_load_tracked_matrices(ctx);
-
- /* Updates the ParamaterValues[i] pointers for all parameters of the
- * basic type of PROGRAM_STATE_VAR.
- */
- _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
+ /* map vs constant buffer */
/* XXX just use a memcpy here */
for (i = 0; i < nr; i++) {
buf[offset + i * 4 + 2] = value[2];
buf[offset + i * 4 + 3] = value[3];
}
+
+ /* unmap vs constant buffer */
}
if (0) {
for (i = 0; i < sz*16; i+=4)
- _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+ debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
- _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
+ debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
brw->curbe.last_buf, buf,
bufsz, brw->curbe.last_bufsz,
brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
bufsz == brw->curbe.last_bufsz &&
memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
/* constants have not changed */
- _mesa_free(buf);
+ FREE(buf);
}
else {
/* constants have changed */
if (brw->curbe.last_buf)
- _mesa_free(brw->curbe.last_buf);
+ FREE(brw->curbe.last_buf);
brw->curbe.last_buf = buf;
brw->curbe.last_bufsz = bufsz;
ADVANCE_BATCH();
}
-/* This tracked state is unique in that the state it monitors varies
- * dynamically depending on the parameters tracked by the fragment and
- * vertex programs. This is the template used as a starting point,
- * each context will maintain a copy of this internally and update as
- * required.
- */
const struct brw_tracked_state brw_constant_buffer = {
.dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS,
+ .mesa = (PIPE_NEW_FS_CONSTANTS |
+ PIPE_NEW_VS_CONSTANTS |
+ PIPE_NEW_UCP),
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
#include "intel_chipset.h"
-#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID))
-#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->intel.intelScreen->deviceID))
+#define BRW_IS_G4X(brw) (IS_G4X((brw)->brw_screen->deviceID))
+#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->brw_screen->deviceID))
#define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)))
#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
#define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
#include <unistd.h>
#include <stdarg.h>
-#include "main/mtypes.h"
-
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_state.h"
-#include "brw_fallback.h"
#include "intel_batchbuffer.h"
#include "intel_buffer_objects.h"
#define FILE_DEBUG_FLAG DEBUG_BATCH
-static GLuint prim_to_hw_prim[GL_POLYGON+1] = {
+static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = {
_3DPRIM_POINTLIST,
_3DPRIM_LINELIST,
_3DPRIM_LINELOOP,
};
-static const GLenum reduced_prim[GL_POLYGON+1] = {
- GL_POINTS,
- GL_LINES,
- GL_LINES,
- GL_LINES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES
-};
-
/* When the primitive changes, set a state bit and re-validate. Not
* the nicest and would rather deal with this by having all the
brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
}
-/* XXX: could split the primitive list to fallback only on the
- * non-conformant primitives.
- */
-static GLboolean check_fallbacks( struct brw_context *brw,
- const struct _mesa_prim *prim,
- GLuint nr_prims )
-{
- GLcontext *ctx = &brw->intel.ctx;
- GLuint i;
-
- /* If we don't require strict OpenGL conformance, never
- * use fallbacks. If we're forcing fallbacks, always
- * use fallfacks.
- */
- if (brw->intel.conformance_mode == 0)
- return GL_FALSE;
-
- if (brw->intel.conformance_mode == 2)
- return GL_TRUE;
-
- if (ctx->Polygon.SmoothFlag) {
- for (i = 0; i < nr_prims; i++)
- if (reduced_prim[prim[i].mode] == GL_TRIANGLES)
- return GL_TRUE;
- }
-
- /* BRW hardware will do AA lines, but they are non-conformant it
- * seems. TBD whether we keep this fallback:
- */
- if (ctx->Line.SmoothFlag) {
- for (i = 0; i < nr_prims; i++)
- if (reduced_prim[prim[i].mode] == GL_LINES)
- return GL_TRUE;
- }
-
- /* Stipple -- these fallbacks could be resolved with a little
- * bit of work?
- */
- if (ctx->Line.StippleFlag) {
- for (i = 0; i < nr_prims; i++) {
- /* GS doesn't get enough information to know when to reset
- * the stipple counter?!?
- */
- if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP)
- return GL_TRUE;
-
- if (prim[i].mode == GL_POLYGON &&
- (ctx->Polygon.FrontMode == GL_LINE ||
- ctx->Polygon.BackMode == GL_LINE))
- return GL_TRUE;
- }
- }
-
- if (ctx->Point.SmoothFlag) {
- for (i = 0; i < nr_prims; i++)
- if (prim[i].mode == GL_POINTS)
- return GL_TRUE;
- }
-
- /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
- * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
- * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and
- * we want strict conformance, force the fallback.
- * Right now, we only do this for 2D textures.
- */
- {
- int u;
- for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
- if (texUnit->Enabled) {
- if (texUnit->Enabled & TEXTURE_1D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) {
- return GL_TRUE;
- }
- }
- if (texUnit->Enabled & TEXTURE_2D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) {
- return GL_TRUE;
- }
- }
- if (texUnit->Enabled & TEXTURE_3D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) {
- return GL_TRUE;
- }
- }
- }
- }
- }
-
- /* Nothing stopping us from the fast path now */
- return GL_FALSE;
-}
-
/* May fail if out of video memory for texture or vbo upload, or on
* fallback conditions.
*/
GLboolean retval = GL_FALSE;
GLboolean warn = GL_FALSE;
GLboolean first_time = GL_TRUE;
+ uint32_t hw_prim;
GLuint i;
if (ctx->NewState)
_mesa_update_state( ctx );
- /* We have to validate the textures *before* checking for fallbacks;
- * otherwise, the software fallback won't be able to rely on the
- * texture state, the firstLevel and lastLevel fields won't be
- * set in the intel texture object (they'll both be 0), and the
- * software fallback will segfault if it attempts to access any
- * texture level other than level 0.
- */
- brw_validate_textures( brw );
-
- if (check_fallbacks(brw, prim, nr_prims))
- return GL_FALSE;
-
/* Bind all inputs, derive varying and size information:
*/
brw_merge_inputs( brw, arrays );
brw->vb.max_index = max_index;
brw->state.dirty.brw |= BRW_NEW_VERTICES;
- /* Have to validate state quite late. Will rebuild tnl_program,
- * which depends on varying information.
- *
- * Note this is where brw->vs->prog_data.inputs_read is calculated,
- * so can't access it earlier.
- */
-
- LOCK_HARDWARE(intel);
-
- if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) {
- UNLOCK_HARDWARE(intel);
- return GL_TRUE;
- }
-
- for (i = 0; i < nr_prims; i++) {
- uint32_t hw_prim;
-
- /* Flush the batch if it's approaching full, so that we don't wrap while
- * we've got validated state that needs to be in the same batch as the
- * primitives. This fraction is just a guess (minimal full state plus
- * a primitive is around 512 bytes), and would be better if we had
- * an upper bound of how much we might emit in a single
- * brw_try_draw_prims().
- */
- intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4,
- LOOP_CLIPRECTS);
-
- hw_prim = brw_set_prim(brw, prim[i].mode);
-
- if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) {
- first_time = GL_FALSE;
-
- brw_validate_state(brw);
-
- /* Various fallback checks: */
- if (brw->intel.Fallback)
- goto out;
-
- /* Check that we can fit our state in with our existing batchbuffer, or
- * flush otherwise.
- */
- if (dri_bufmgr_check_aperture_space(brw->state.validated_bos,
- brw->state.validated_bo_count)) {
- static GLboolean warned;
- intel_batchbuffer_flush(intel->batch);
-
- /* Validate the state after we flushed the batch (which would have
- * changed the set of dirty state). If we still fail to
- * check_aperture, warn of what's happening, but attempt to continue
- * on since it may succeed anyway, and the user would probably rather
- * see a failure and a warning than a fallback.
- */
- brw_validate_state(brw);
- if (!warned &&
- dri_bufmgr_check_aperture_space(brw->state.validated_bos,
- brw->state.validated_bo_count)) {
- warn = GL_TRUE;
- warned = GL_TRUE;
- }
- }
-
- brw_upload_state(brw);
- }
+ hw_prim = brw_set_prim(brw, prim[i].mode);
- brw_emit_prim(brw, &prim[i], hw_prim);
+ brw_validate_state(brw);
- retval = GL_TRUE;
- }
+ /* Check that we can fit our state in with our existing batchbuffer, or
+ * flush otherwise.
+ */
+ ret = dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+ brw->state.validated_bo_count);
+ if (ret)
+ return ret;
+
+ ret = brw_upload_state(brw);
+ if (ret)
+ return ret;
+
+ ret = brw_emit_prim(brw, &prim[i], hw_prim);
+ if (ret)
+ return ret;
if (intel->always_flush_batch)
intel_batchbuffer_flush(intel->batch);
- out:
- UNLOCK_HARDWARE(intel);
-
- brw_state_cache_check_size(brw);
-
- if (warn)
- fprintf(stderr, "i965: Single primitive emit potentially exceeded "
- "available aperture space\n");
- if (!retval)
- DBG("%s failed\n", __FUNCTION__);
-
- return retval;
+ return 0;
}
void brw_draw_prims( GLcontext *ctx,
GLuint min_index,
GLuint max_index )
{
- GLboolean retval;
+ enum pipe_error ret;
if (!vbo_all_varyings_in_vbos(arrays)) {
if (!index_bounds_valid)
vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
-
- /* Decide if we want to rebase. If so we end up recursing once
- * only into this function.
- */
- if (min_index != 0) {
- vbo_rebase_prims(ctx, arrays,
- prim, nr_prims,
- ib, min_index, max_index,
- brw_draw_prims );
- return;
- }
}
/* Make a first attempt at drawing:
*/
- retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+ ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
/* Otherwise, we really are out of memory. Pass the drawing
* command to the software tnl module and which will in turn call
* swrast to do the drawing.
*/
- if (!retval) {
- _swsetup_Wakeup(ctx);
- _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+ if (ret != 0) {
+ intel_batchbuffer_flush(intel->batch);
+ ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+ assert(ret == 0);
}
-
}
void brw_draw_init( struct brw_context *brw )
*
**************************************************************************/
+#include "pipe/p_context.h"
-#include "main/glheader.h"
-#include "main/bufferobj.h"
-#include "main/context.h"
-#include "main/state.h"
-#include "main/api_validate.h"
-#include "main/enums.h"
+#include "util/u_upload_mgr.h"
#include "brw_draw.h"
#include "brw_defines.h"
#include "intel_buffer_objects.h"
#include "intel_tex.h"
-static GLuint double_types[5] = {
- 0,
- BRW_SURFACEFORMAT_R64_FLOAT,
- BRW_SURFACEFORMAT_R64G64_FLOAT,
- BRW_SURFACEFORMAT_R64G64B64_FLOAT,
- BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
-};
-
-static GLuint float_types[5] = {
- 0,
- BRW_SURFACEFORMAT_R32_FLOAT,
- BRW_SURFACEFORMAT_R32G32_FLOAT,
- BRW_SURFACEFORMAT_R32G32B32_FLOAT,
- BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
-};
-
-static GLuint uint_types_norm[5] = {
- 0,
- BRW_SURFACEFORMAT_R32_UNORM,
- BRW_SURFACEFORMAT_R32G32_UNORM,
- BRW_SURFACEFORMAT_R32G32B32_UNORM,
- BRW_SURFACEFORMAT_R32G32B32A32_UNORM
-};
-
-static GLuint uint_types_scale[5] = {
- 0,
- BRW_SURFACEFORMAT_R32_USCALED,
- BRW_SURFACEFORMAT_R32G32_USCALED,
- BRW_SURFACEFORMAT_R32G32B32_USCALED,
- BRW_SURFACEFORMAT_R32G32B32A32_USCALED
-};
-
-static GLuint int_types_norm[5] = {
- 0,
- BRW_SURFACEFORMAT_R32_SNORM,
- BRW_SURFACEFORMAT_R32G32_SNORM,
- BRW_SURFACEFORMAT_R32G32B32_SNORM,
- BRW_SURFACEFORMAT_R32G32B32A32_SNORM
-};
-
-static GLuint int_types_scale[5] = {
- 0,
- BRW_SURFACEFORMAT_R32_SSCALED,
- BRW_SURFACEFORMAT_R32G32_SSCALED,
- BRW_SURFACEFORMAT_R32G32B32_SSCALED,
- BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
-};
-
-static GLuint ushort_types_norm[5] = {
- 0,
- BRW_SURFACEFORMAT_R16_UNORM,
- BRW_SURFACEFORMAT_R16G16_UNORM,
- BRW_SURFACEFORMAT_R16G16B16_UNORM,
- BRW_SURFACEFORMAT_R16G16B16A16_UNORM
-};
-
-static GLuint ushort_types_scale[5] = {
- 0,
- BRW_SURFACEFORMAT_R16_USCALED,
- BRW_SURFACEFORMAT_R16G16_USCALED,
- BRW_SURFACEFORMAT_R16G16B16_USCALED,
- BRW_SURFACEFORMAT_R16G16B16A16_USCALED
-};
-
-static GLuint short_types_norm[5] = {
- 0,
- BRW_SURFACEFORMAT_R16_SNORM,
- BRW_SURFACEFORMAT_R16G16_SNORM,
- BRW_SURFACEFORMAT_R16G16B16_SNORM,
- BRW_SURFACEFORMAT_R16G16B16A16_SNORM
-};
-
-static GLuint short_types_scale[5] = {
- 0,
- BRW_SURFACEFORMAT_R16_SSCALED,
- BRW_SURFACEFORMAT_R16G16_SSCALED,
- BRW_SURFACEFORMAT_R16G16B16_SSCALED,
- BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
-};
-static GLuint ubyte_types_norm[5] = {
- 0,
- BRW_SURFACEFORMAT_R8_UNORM,
- BRW_SURFACEFORMAT_R8G8_UNORM,
- BRW_SURFACEFORMAT_R8G8B8_UNORM,
- BRW_SURFACEFORMAT_R8G8B8A8_UNORM
-};
-static GLuint ubyte_types_scale[5] = {
- 0,
- BRW_SURFACEFORMAT_R8_USCALED,
- BRW_SURFACEFORMAT_R8G8_USCALED,
- BRW_SURFACEFORMAT_R8G8B8_USCALED,
- BRW_SURFACEFORMAT_R8G8B8A8_USCALED
-};
-
-static GLuint byte_types_norm[5] = {
- 0,
- BRW_SURFACEFORMAT_R8_SNORM,
- BRW_SURFACEFORMAT_R8G8_SNORM,
- BRW_SURFACEFORMAT_R8G8B8_SNORM,
- BRW_SURFACEFORMAT_R8G8B8A8_SNORM
-};
-static GLuint byte_types_scale[5] = {
- 0,
- BRW_SURFACEFORMAT_R8_SSCALED,
- BRW_SURFACEFORMAT_R8G8_SSCALED,
- BRW_SURFACEFORMAT_R8G8B8_SSCALED,
- BRW_SURFACEFORMAT_R8G8B8A8_SSCALED
-};
-
-
-/**
- * Given vertex array type/size/format/normalized info, return
- * the appopriate hardware surface type.
- * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
- */
-static GLuint get_surface_type( GLenum type, GLuint size,
- GLenum format, GLboolean normalized )
+unsigned brw_translate_surface_format( unsigned id )
{
- if (INTEL_DEBUG & DEBUG_VERTS)
- _mesa_printf("type %s size %d normalized %d\n",
- _mesa_lookup_enum_by_nr(type), size, normalized);
-
- if (normalized) {
- switch (type) {
- case GL_DOUBLE: return double_types[size];
- case GL_FLOAT: return float_types[size];
- case GL_INT: return int_types_norm[size];
- case GL_SHORT: return short_types_norm[size];
- case GL_BYTE: return byte_types_norm[size];
- case GL_UNSIGNED_INT: return uint_types_norm[size];
- case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
- case GL_UNSIGNED_BYTE:
- if (format == GL_BGRA) {
- /* See GL_EXT_vertex_array_bgra */
- assert(size == 4);
- return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
- }
- else {
- return ubyte_types_norm[size];
- }
- default: assert(0); return 0;
- }
- }
- else {
- assert(format == GL_RGBA); /* sanity check */
- switch (type) {
- case GL_DOUBLE: return double_types[size];
- case GL_FLOAT: return float_types[size];
- case GL_INT: return int_types_scale[size];
- case GL_SHORT: return short_types_scale[size];
- case GL_BYTE: return byte_types_scale[size];
- case GL_UNSIGNED_INT: return uint_types_scale[size];
- case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
- case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
- default: assert(0); return 0;
- }
+ switch (id) {
+ case PIPE_FORMAT_R64_FLOAT:
+ return BRW_SURFACEFORMAT_R64_FLOAT;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64_FLOAT;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64B64_FLOAT;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ return BRW_SURFACEFORMAT_R32_FLOAT;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+ case PIPE_FORMAT_R32_UNORM:
+ return BRW_SURFACEFORMAT_R32_UNORM;
+ case PIPE_FORMAT_R32G32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32_UNORM;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32B32_UNORM;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32B32A32_UNORM;
+
+ case PIPE_FORMAT_R32_USCALED:
+ return BRW_SURFACEFORMAT_R32_USCALED;
+ case PIPE_FORMAT_R32G32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32_USCALED;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32B32_USCALED;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32B32A32_USCALED;
+
+ case PIPE_FORMAT_R32_SNORM:
+ return BRW_SURFACEFORMAT_R32_SNORM;
+ case PIPE_FORMAT_R32G32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32_SNORM;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32B32_SNORM;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32B32A32_SNORM;
+
+ case PIPE_FORMAT_R32_SSCALED:
+ return BRW_SURFACEFORMAT_R32_SSCALED;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32_SSCALED;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32B32_SSCALED;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return BRW_SURFACEFORMAT_R16_UNORM;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16_UNORM;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16B16_UNORM;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16B16A16_UNORM;
+
+ case PIPE_FORMAT_R16_USCALED:
+ return BRW_SURFACEFORMAT_R16_USCALED;
+ case PIPE_FORMAT_R16G16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16_USCALED;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16B16_USCALED;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16B16A16_USCALED;
+
+ case PIPE_FORMAT_R16_SNORM:
+ return BRW_SURFACEFORMAT_R16_SNORM;
+ case PIPE_FORMAT_R16G16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16_SNORM;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16B16_SNORM;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16B16A16_SNORM;
+
+ case PIPE_FORMAT_R16_SSCALED:
+ return BRW_SURFACEFORMAT_R16_SSCALED;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16_SSCALED;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16B16_SSCALED;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED;
+
+ case PIPE_FORMAT_R8_UNORM:
+ return BRW_SURFACEFORMAT_R8_UNORM;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8_UNORM;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_USCALED:
+ return BRW_SURFACEFORMAT_R8_USCALED;
+ case PIPE_FORMAT_R8G8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8_USCALED;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8B8_USCALED;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8B8A8_USCALED;
+
+ case PIPE_FORMAT_R8_SNORM:
+ return BRW_SURFACEFORMAT_R8_SNORM;
+ case PIPE_FORMAT_R8G8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8_SNORM;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8_SNORM;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+ case PIPE_FORMAT_R8_SSCALED:
+ return BRW_SURFACEFORMAT_R8_SSCALED;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8_SSCALED;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8B8_SSCALED;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED;
+
+ default:
+ assert(0);
+ return 0;
}
}
-
-static GLuint get_size( GLenum type )
-{
- switch (type) {
- case GL_DOUBLE: return sizeof(GLdouble);
- case GL_FLOAT: return sizeof(GLfloat);
- case GL_INT: return sizeof(GLint);
- case GL_SHORT: return sizeof(GLshort);
- case GL_BYTE: return sizeof(GLbyte);
- case GL_UNSIGNED_INT: return sizeof(GLuint);
- case GL_UNSIGNED_SHORT: return sizeof(GLushort);
- case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
- default: return 0;
- }
-}
-
-static GLuint get_index_type(GLenum type)
+static unsigned get_index_type(int type)
{
switch (type) {
- case GL_UNSIGNED_BYTE: return BRW_INDEX_BYTE;
- case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD;
- case GL_UNSIGNED_INT: return BRW_INDEX_DWORD;
+ case 1: return BRW_INDEX_BYTE;
+ case 2: return BRW_INDEX_WORD;
+ case 4: return BRW_INDEX_DWORD;
default: assert(0); return 0;
}
}
-static void wrap_buffers( struct brw_context *brw,
- GLuint size )
-{
- if (size < BRW_UPLOAD_INIT_SIZE)
- size = BRW_UPLOAD_INIT_SIZE;
-
- brw->vb.upload.offset = 0;
-
- if (brw->vb.upload.bo != NULL)
- dri_bo_unreference(brw->vb.upload.bo);
- brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO",
- size, 1);
-
- /* Set the internal VBO\ to no-backing-store. We only use them as a
- * temporary within a brw_try_draw_prims while the lock is held.
- */
- /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH
- FAKE TO PUSH THIS STUFF */
-// if (!brw->intel.ttm)
-// dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL);
-}
-
-static void get_space( struct brw_context *brw,
- GLuint size,
- dri_bo **bo_return,
- GLuint *offset_return )
-{
- size = ALIGN(size, 64);
-
- if (brw->vb.upload.bo == NULL ||
- brw->vb.upload.offset + size > brw->vb.upload.bo->size) {
- wrap_buffers(brw, size);
- }
-
- assert(*bo_return == NULL);
- dri_bo_reference(brw->vb.upload.bo);
- *bo_return = brw->vb.upload.bo;
- *offset_return = brw->vb.upload.offset;
- brw->vb.upload.offset += size;
-}
-
-static void
-copy_array_to_vbo_array( struct brw_context *brw,
- struct brw_vertex_element *element,
- GLuint dst_stride)
-{
- struct intel_context *intel = &brw->intel;
- GLuint size = element->count * dst_stride;
-
- get_space(brw, size, &element->bo, &element->offset);
- if (element->glarray->StrideB == 0) {
- assert(element->count == 1);
- element->stride = 0;
- } else {
- element->stride = dst_stride;
- }
-
- if (dst_stride == element->glarray->StrideB) {
- if (intel->intelScreen->kernel_exec_fencing) {
- drm_intel_gem_bo_map_gtt(element->bo);
- memcpy((char *)element->bo->virtual + element->offset,
- element->glarray->Ptr, size);
- drm_intel_gem_bo_unmap_gtt(element->bo);
- } else {
- dri_bo_subdata(element->bo,
- element->offset,
- size,
- element->glarray->Ptr);
- }
- } else {
- char *dest;
- const unsigned char *src = element->glarray->Ptr;
- int i;
-
- if (intel->intelScreen->kernel_exec_fencing) {
- drm_intel_gem_bo_map_gtt(element->bo);
- dest = element->bo->virtual;
- dest += element->offset;
-
- for (i = 0; i < element->count; i++) {
- memcpy(dest, src, dst_stride);
- src += element->glarray->StrideB;
- dest += dst_stride;
- }
-
- drm_intel_gem_bo_unmap_gtt(element->bo);
- } else {
- void *data;
-
- data = _mesa_malloc(dst_stride * element->count);
- dest = data;
- for (i = 0; i < element->count; i++) {
- memcpy(dest, src, dst_stride);
- src += element->glarray->StrideB;
- dest += dst_stride;
- }
-
- dri_bo_subdata(element->bo,
- element->offset,
- size,
- data);
-
- _mesa_free(data);
- }
- }
-}
-static void brw_prepare_vertices(struct brw_context *brw)
+static boolean brw_prepare_vertices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
if (0)
_mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
- /* Accumulate the list of enabled arrays. */
- brw->vb.nr_enabled = 0;
- while (vs_inputs) {
- GLuint i = _mesa_ffsll(vs_inputs) - 1;
- struct brw_vertex_element *input = &brw->vb.inputs[i];
- vs_inputs &= ~(1 << i);
- brw->vb.enabled[brw->vb.nr_enabled++] = input;
- }
-
- /* XXX: In the rare cases where this happens we fallback all
- * the way to software rasterization, although a tnl fallback
- * would be sufficient. I don't know of *any* real world
- * cases with > 17 vertex attributes enabled, so it probably
- * isn't an issue at this point.
- */
- if (brw->vb.nr_enabled >= BRW_VEP_MAX) {
- intel->Fallback = 1;
- return;
- }
for (i = 0; i < brw->vb.nr_enabled; i++) {
struct brw_vertex_element *input = brw->vb.enabled[i];
input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
- if (_mesa_is_bufferobj(input->glarray->BufferObj)) {
- struct intel_buffer_object *intel_buffer =
- intel_buffer_object(input->glarray->BufferObj);
-
- /* Named buffer object: Just reference its contents directly. */
- dri_bo_unreference(input->bo);
- input->bo = intel_bufferobj_buffer(intel, intel_buffer,
- INTEL_READ);
- dri_bo_reference(input->bo);
- input->offset = (unsigned long)input->glarray->Ptr;
- input->stride = input->glarray->StrideB;
- input->count = input->glarray->_MaxElement;
-
- /* This is a common place to reach if the user mistakenly supplies
- * a pointer in place of a VBO offset. If we just let it go through,
- * we may end up dereferencing a pointer beyond the bounds of the
- * GTT. We would hope that the VBO's max_index would save us, but
- * Mesa appears to hand us min/max values not clipped to the
- * array object's _MaxElement, and _MaxElement frequently appears
- * to be wrong anyway.
- *
- * The VBO spec allows application termination in this case, and it's
- * probably a service to the poor programmer to do so rather than
- * trying to just not render.
- */
- assert(input->offset < input->bo->size);
- } else {
- input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
- if (input->bo != NULL) {
- /* Already-uploaded vertex data is present from a previous
- * prepare_vertices, but we had to re-validate state due to
- * check_aperture failing and a new batch being produced.
- */
- continue;
- }
-
- /* Queue the buffer object up to be uploaded in the next pass,
- * when we've decided if we're doing interleaved or not.
- */
- if (input->attrib == VERT_ATTRIB_POS) {
- /* Position array not properly enabled:
- */
- if (input->glarray->StrideB == 0) {
- intel->Fallback = 1;
- return;
- }
-
- interleave = input->glarray->StrideB;
- ptr = input->glarray->Ptr;
- }
- else if (interleave != input->glarray->StrideB ||
- (const unsigned char *)input->glarray->Ptr - ptr < 0 ||
- (const unsigned char *)input->glarray->Ptr - ptr > interleave)
- {
- interleave = 0;
- }
-
- upload[nr_uploads++] = input;
-
- /* We rebase drawing to start at element zero only when
- * varyings are not in vbos, which means we can end up
- * uploading non-varying arrays (stride != 0) when min_index
- * is zero. This doesn't matter as the amount to upload is
- * the same for these arrays whether the draw call is rebased
- * or not - we just have to upload the one element.
- */
- assert(min_index == 0 || input->glarray->StrideB == 0);
- }
- }
-
- /* Handle any arrays to be uploaded. */
- if (nr_uploads > 1 && interleave && interleave <= 256) {
- /* All uploads are interleaved, so upload the arrays together as
- * interleaved. First, upload the contents and set up upload[0].
- */
- copy_array_to_vbo_array(brw, upload[0], interleave);
-
- for (i = 1; i < nr_uploads; i++) {
- /* Then, just point upload[i] at upload[0]'s buffer. */
- upload[i]->stride = interleave;
- upload[i]->offset = upload[0]->offset +
- ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
- upload[i]->bo = upload[0]->bo;
- dri_bo_reference(upload[i]->bo);
+ if (brw_is_user_buffer(vb)) {
+ u_upload_buffer( brw->upload,
+ min_index * vb->stride,
+ (max_index + 1 - min_index) * vb->stride,
+ &offset,
+ &buffer );
}
- }
- else {
- /* Upload non-interleaved arrays */
- for (i = 0; i < nr_uploads; i++) {
- copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size);
+ else
+ {
+ offset = 0;
+ buffer = vb->buffer;
+ count = stride == 0 ? 1 : max_index + 1 - min_index;
}
+
+ /* Named buffer object: Just reference its contents directly. */
+ dri_bo_unreference(input->bo);
+ input->bo = intel_bufferobj_buffer(intel, intel_buffer,
+ INTEL_READ);
+ dri_bo_reference(input->bo);
+
+ input->offset = (unsigned long)offset;
+ input->stride = vb->stride;
+ input->count = count;
+
+ assert(input->offset < input->bo->size);
}
brw_prepare_query_begin(brw);
/* Straight upload
*/
- if (intel->intelScreen->kernel_exec_fencing) {
- drm_intel_gem_bo_map_gtt(bo);
- memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
- drm_intel_gem_bo_unmap_gtt(bo);
- } else {
- dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
- }
+ brw_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
+
} else {
offset = (GLuint) (unsigned long) index_buffer->ptr;
brw->ib.start_vertex_offset = 0;
/* Need to locate the two positions present in vertex + header.
* These are currently hardcoded:
*/
- c.nr_attrs = brw_count_bits(c.key.attrs);
+ c.nr_attrs = util_count_bits(c.key.attrs);
if (BRW_IS_IGDNG(brw))
c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
--- /dev/null
+
+ /* _NEW_COLOR */
+ if (key->logic_op != GL_COPY) {
+ cc.cc2.logicop_enable = 1;
+ cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op);
+ } else if (key->color_blend) {
+ GLenum eqRGB = key->blend_eq_rgb;
+ GLenum eqA = key->blend_eq_a;
+ GLenum srcRGB = key->blend_src_rgb;
+ GLenum dstRGB = key->blend_dst_rgb;
+ GLenum srcA = key->blend_src_a;
+ GLenum dstA = key->blend_dst_a;
+
+ if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+ srcRGB = dstRGB = GL_ONE;
+ }
+
+ if (eqA == GL_MIN || eqA == GL_MAX) {
+ srcA = dstA = GL_ONE;
+ }
+
+ cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+ cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
+ cc.cc6.blend_function = brw_translate_blend_equation(eqRGB);
+
+ cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+ cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA);
+ cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA);
+
+ cc.cc3.blend_enable = 1;
+ cc.cc3.ia_blend_enable = (srcA != srcRGB ||
+ dstA != dstRGB ||
+ eqA != eqRGB);
+ }
+
+ if (key->dither) {
+ cc.cc5.dither_enable = 1;
+ cc.cc6.y_dither_offset = 0;
+ cc.cc6.x_dither_offset = 0;
+ }
+
--- /dev/null
+ if (INTEL_DEBUG & DEBUG_STATS)
+ cc.cc5.statistics_enable = 1;
--- /dev/null
+ /* _NEW_STENCIL */
+ if (key->dsa.stencil[0].enable) {
+ cc.cc0.stencil_enable = 1;
+ cc.cc0.stencil_func =
+ intel_translate_compare_func(key->stencil_func[0]);
+ cc.cc0.stencil_fail_op =
+ intel_translate_stencil_op(key->stencil_fail_op[0]);
+ cc.cc0.stencil_pass_depth_fail_op =
+ intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
+ cc.cc0.stencil_pass_depth_pass_op =
+ intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
+ cc.cc1.stencil_ref = key->stencil_ref[0];
+ cc.cc1.stencil_write_mask = key->stencil_write_mask[0];
+ cc.cc1.stencil_test_mask = key->stencil_test_mask[0];
+
+ if (key->stencil_two_side) {
+ cc.cc0.bf_stencil_enable = 1;
+ cc.cc0.bf_stencil_func =
+ intel_translate_compare_func(key->stencil_func[1]);
+ cc.cc0.bf_stencil_fail_op =
+ intel_translate_stencil_op(key->stencil_fail_op[1]);
+ cc.cc0.bf_stencil_pass_depth_fail_op =
+ intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
+ cc.cc0.bf_stencil_pass_depth_pass_op =
+ intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
+ cc.cc1.bf_stencil_ref = key->stencil_ref[1];
+ cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1];
+ cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1];
+ }
+
+ /* Not really sure about this:
+ */
+ if (key->stencil_write_mask[0] ||
+ (key->stencil_two_side && key->stencil_write_mask[1]))
+ cc.cc0.stencil_write_enable = 1;
+ }
+
+
+ if (key->alpha_enabled) {
+ cc.cc3.alpha_test = 1;
+ cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func);
+ cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+
+ UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref);
+ }
+
+ /* _NEW_DEPTH */
+ if (key->depth_test) {
+ cc.cc2.depth_test = 1;
+ cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func);
+ cc.cc2.depth_write_enable = key->depth_write;
+ }
--- /dev/null
+
+/**
+ * called from intelDrawBuffer()
+ */
+static void brw_set_draw_region( struct intel_context *intel,
+ struct intel_region *color_regions[],
+ struct intel_region *depth_region,
+ GLuint num_color_regions)
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+ GLuint i;
+
+ /* release old color/depth regions */
+ if (brw->state.depth_region != depth_region)
+ brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER;
+ for (i = 0; i < brw->state.nr_color_regions; i++)
+ intel_region_release(&brw->state.color_regions[i]);
+ intel_region_release(&brw->state.depth_region);
+
+ /* reference new color/depth regions */
+ for (i = 0; i < num_color_regions; i++)
+ intel_region_reference(&brw->state.color_regions[i], color_regions[i]);
+ intel_region_reference(&brw->state.depth_region, depth_region);
+ brw->state.nr_color_regions = num_color_regions;
+}
--- /dev/null
+
+/**
+ * called from intel_batchbuffer_flush and children before sending a
+ * batchbuffer off.
+ */
+static void brw_finish_batch(struct intel_context *intel)
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+ brw_emit_query_end(brw);
+}
+
+
+/**
+ * called from intelFlushBatchLocked
+ */
+static void brw_new_batch( struct intel_context *intel )
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ /* Check that we didn't just wrap our batchbuffer at a bad time. */
+ assert(!brw->no_batch_wrap);
+
+ brw->curbe.need_new_bo = GL_TRUE;
+
+ /* Mark all context state as needing to be re-emitted.
+ * This is probably not as severe as on 915, since almost all of our state
+ * is just in referenced buffers.
+ */
+ brw->state.dirty.brw |= BRW_NEW_CONTEXT;
+
+ brw->state.dirty.mesa |= ~0;
+ brw->state.dirty.brw |= ~0;
+ brw->state.dirty.cache |= ~0;
+
+ /* Move to the end of the current upload buffer so that we'll force choosing
+ * a new buffer next time.
+ */
+ if (brw->vb.upload.bo != NULL) {
+ dri_bo_unreference(brw->vb.upload.bo);
+ brw->vb.upload.bo = NULL;
+ brw->vb.upload.offset = 0;
+ }
+}
+
+
+static void brw_note_fence( struct intel_context *intel, GLuint fence )
+{
+ brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE;
+}
+
+/* called from intelWaitForIdle() and intelFlush()
+ *
+ * For now, just flush everything. Could be smarter later.
+ */
+static GLuint brw_flush_cmd( void )
+{
+ struct brw_mi_flush flush;
+ flush.opcode = CMD_MI_FLUSH;
+ flush.pad = 0;
+ flush.flags = BRW_FLUSH_STATE_CACHE;
+ return *(GLuint *)&flush;
+}
+
+
--- /dev/null
+ /* _NEW_BUFFERS */
+ if (IS_965(intel->intelScreen->deviceID) &&
+ !IS_G4X(intel->intelScreen->deviceID)) {
+ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+ struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+ /* The original gen4 hardware couldn't set up WM surfaces pointing
+ * at an offset within a tile, which can happen when rendering to
+ * anything but the base level of a texture or the +X face/0 depth.
+ * This was fixed with the 4 Series hardware.
+ *
+ * For these original chips, you would have to make the depth and
+ * color destination surfaces include information on the texture
+ * type, LOD, face, and various limits to use them as a destination.
+ * I would have done this, but there's also a nasty requirement that
+ * the depth and the color surfaces all be of the same LOD, which
+ * may be a worse requirement than this alignment. (Also, we may
+ * want to just demote the texture to untiled, instead).
+ */
+ if (irb->region &&
+ irb->region->tiling != I915_TILING_NONE &&
+ (irb->region->draw_offset & 4095)) {
+ DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n");
+ return GL_TRUE;
+ }
+ }
brw_init_compile(brw, &c.func);
c.key = *key;
- c.nr_attrs = brw_count_bits(c.key.attrs);
+ c.nr_attrs = util_count_bits(c.key.attrs);
c.nr_attr_regs = (c.nr_attrs+1)/2;
- c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS);
+ c.nr_setup_attrs = util_count_bits(c.key.attrs & DO_SETUP_BITS);
c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
c.prog_data.urb_read_length = c.nr_attr_regs;
{
struct brw_compile *p = &c->func;
struct brw_reg ip = brw_ip_reg();
- GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+ GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
GLuint jmpi = 1;
if (!nr)
{
struct brw_compile *p = &c->func;
struct brw_reg ip = brw_ip_reg();
- GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+ GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
GLuint jmpi = 1;
if (!nr)
/***********************************************************************
* Emit all state:
*/
-void brw_validate_state( struct brw_context *brw )
+enum pipe_error brw_validate_state( struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = &brw->intel;
GLuint i;
brw_clear_validated_bos(brw);
-
- state->mesa |= brw->intel.NewGLState;
- brw->intel.NewGLState = 0;
-
brw_add_validated_bo(brw, intel->batch->buf);
if (brw->emit_state_always) {
state->cache |= ~0;
}
- if (brw->fragment_program != ctx->FragmentProgram._Current) {
- brw->fragment_program = ctx->FragmentProgram._Current;
- brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
- }
-
- if (brw->vertex_program != ctx->VertexProgram._Current) {
- brw->vertex_program = ctx->VertexProgram._Current;
- brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
- }
-
if (state->mesa == 0 &&
state->cache == 0 &&
state->brw == 0)
- return;
+ return 0;
if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
brw_clear_batch_cache(brw);
- brw->intel.Fallback = 0;
-
/* do prepare stage for all atoms */
for (i = 0; i < Elements(atoms); i++) {
const struct brw_tracked_state *atom = atoms[i];
- if (brw->intel.Fallback)
- break;
-
if (check_state(state, &atom->dirty)) {
if (atom->prepare) {
- atom->prepare(brw);
+ ret = atom->prepare(brw);
+ if (ret)
+ return ret;
}
}
}
* If this fails, we can experience GPU lock-ups.
*/
{
- const struct brw_fragment_program *fp;
- fp = brw_fragment_program_const(brw->fragment_program);
+ const struct brw_fragment_program *fp = brw->fragment_program;
if (fp) {
- assert((fp->tex_units_used & ctx->Texture._EnabledUnits)
- == fp->tex_units_used);
+ assert(fp->info.max_sampler <= brw->nr_samplers &&
+ fp->info.max_texture <= brw->nr_textures);
}
}
+
+ return 0;
}
-void brw_upload_state(struct brw_context *brw)
+enum pipe_error brw_upload_state(struct brw_context *brw)
{
struct brw_state_flags *state = &brw->state.dirty;
int i;
_mesa_memset(&examined, 0, sizeof(examined));
prev = *state;
- for (i = 0; i < Elements(atoms); i++) {
+ for (i = 0; i < Elements(atoms); i++) {
const struct brw_tracked_state *atom = atoms[i];
struct brw_state_flags generated;
atom->dirty.brw ||
atom->dirty.cache);
- if (brw->intel.Fallback)
- break;
-
if (check_state(state, &atom->dirty)) {
if (atom->emit) {
- atom->emit( brw );
+ ret = atom->emit( brw );
+ if (ret)
+ return ret;
}
}
for (i = 0; i < Elements(atoms); i++) {
const struct brw_tracked_state *atom = atoms[i];
- if (brw->intel.Fallback)
- break;
-
if (check_state(state, &atom->dirty)) {
if (atom->emit) {
- atom->emit( brw );
+ ret = atom->emit( brw );
+ if (ret)
+ return ret;
}
}
}
brw_print_dirty_count(mesa_bits, state->mesa);
brw_print_dirty_count(brw_bits, state->brw);
brw_print_dirty_count(cache_bits, state->cache);
- fprintf(stderr, "\n");
+ debug_printf("\n");
}
}
-
- if (!brw->intel.Fallback)
- memset(state, 0, sizeof(*state));
+
+ /* Clear dirty flags:
+ */
+ memset(state, 0, sizeof(*state));
}
--- /dev/null
+
+/* XXX: could split the primitive list to fallback only on the
+ * non-conformant primitives.
+ */
+static GLboolean check_fallbacks( struct brw_context *brw,
+ const struct _mesa_prim *prim,
+ GLuint nr_prims )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ GLuint i;
+
+ /* If we don't require strict OpenGL conformance, never
+ * use fallbacks. If we're forcing fallbacks, always
+ * use fallfacks.
+ */
+ if (brw->intel.conformance_mode == 0)
+ return GL_FALSE;
+
+ if (brw->intel.conformance_mode == 2)
+ return GL_TRUE;
+
+ if (ctx->Polygon.SmoothFlag) {
+ for (i = 0; i < nr_prims; i++)
+ if (reduced_prim[prim[i].mode] == GL_TRIANGLES)
+ return GL_TRUE;
+ }
+
+ /* BRW hardware will do AA lines, but they are non-conformant it
+ * seems. TBD whether we keep this fallback:
+ */
+ if (ctx->Line.SmoothFlag) {
+ for (i = 0; i < nr_prims; i++)
+ if (reduced_prim[prim[i].mode] == GL_LINES)
+ return GL_TRUE;
+ }
+
+ /* Stipple -- these fallbacks could be resolved with a little
+ * bit of work?
+ */
+ if (ctx->Line.StippleFlag) {
+ for (i = 0; i < nr_prims; i++) {
+ /* GS doesn't get enough information to know when to reset
+ * the stipple counter?!?
+ */
+ if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP)
+ return GL_TRUE;
+
+ if (prim[i].mode == GL_POLYGON &&
+ (ctx->Polygon.FrontMode == GL_LINE ||
+ ctx->Polygon.BackMode == GL_LINE))
+ return GL_TRUE;
+ }
+ }
+
+ if (ctx->Point.SmoothFlag) {
+ for (i = 0; i < nr_prims; i++)
+ if (prim[i].mode == GL_POINTS)
+ return GL_TRUE;
+ }
+
+ /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
+ * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
+ * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and
+ * we want strict conformance, force the fallback.
+ * Right now, we only do this for 2D textures.
+ */
+ {
+ int u;
+ for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
+ if (texUnit->Enabled) {
+ if (texUnit->Enabled & TEXTURE_1D_BIT) {
+ if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) {
+ return GL_TRUE;
+ }
+ }
+ if (texUnit->Enabled & TEXTURE_2D_BIT) {
+ if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP ||
+ texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) {
+ return GL_TRUE;
+ }
+ }
+ if (texUnit->Enabled & TEXTURE_3D_BIT) {
+ if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP ||
+ texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP ||
+ texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) {
+ return GL_TRUE;
+ }
+ }
+ }
+ }
+ }
+
+ /* Exceeding hw limits on number of VS inputs?
+ */
+ if (brw->nr_ve == 0 ||
+ brw->nr_ve >= BRW_VEP_MAX) {
+ return TRUE;
+ }
+
+ /* Position array with zero stride?
+ */
+ if (brw->vs[brw->ve[0]]->stride == 0)
+ return TRUE;
+
+
+
+ /* Nothing stopping us from the fast path now */
+ return GL_FALSE;
+}
+
+
+
+
--- /dev/null
+#ifndef BRW_TYPES_H
+#define BRW_TYPES_H
+
+typedef GLuint uint32_t;
+typedef GLubyte uint8_t;
+typedef GLushort uint16_t;
+/* no GLenum, translate all away */
+
+typedef GLboolean uint8_t;
+
+#endif
#include "brw_util.h"
#include "brw_defines.h"
-GLuint brw_count_bits( GLuint val )
-{
- GLuint i;
- for (i = 0; val ; val >>= 1)
- if (val & 1)
- i++;
- return i;
-}
GLuint brw_translate_blend_equation( GLenum mode )
}
if (0)
- _mesa_print_program(&c.vp->program.Base);
-
-
+ tgsi_dump(&c.vp->tokens, 0);
/* Emit GEN4 code.
*/
* the inputs it asks for, whether they are varying or not.
*/
key.program_string_id = vp->id;
- key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
- key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
- ctx->Polygon.BackMode != GL_FILL);
+ key.nr_userclip = brw->nr_userclip;
+ key.copy_edgeflag = (brw->rast->fill_ccw != PIPE_POLYGON_MODE_FILL ||
+ brw->rast->fill_cw != PIPE_POLYGON_MODE_FILL);
/* Make an early check for the key.
*/
*/
const struct brw_tracked_state brw_vs_prog = {
.dirty = {
- .mesa = _NEW_TRANSFORM | _NEW_POLYGON,
+ .mesa = PIPE_NEW_UCP | PIPE_NEW_RAST,
.brw = BRW_NEW_VERTEX_PROGRAM,
.cache = 0
},
#include "main/macros.h"
#include "shader/program.h"
#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
+#include "pipe/p_shader_tokens.h"
#include "brw_context.h"
#include "brw_vs.h"
reg++;
}
}
+
/* If there are no inputs, we'll still be reading one attribute's worth
* because it's required -- see urb_read_length setting.
*/
* vertex urb, so is half the amount:
*/
c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2;
+
/* Setting this field to 0 leads to undefined behavior according to the
* the VS_STATE docs. Our VUEs will always have at least one attribute
* sitting in them, even if it's padding.
/**
* Return the brw reg for the given instruction's src argument.
- * Will return mangled results for SWZ op. The emit_swz() function
- * ignores this result and recalculates taking extended swizzles into
- * account.
*/
static struct brw_reg get_arg( struct brw_vs_compile *c,
const struct prog_instruction *inst,
}
-static void emit_swz( struct brw_vs_compile *c,
- struct brw_reg dst,
- const struct prog_instruction *inst)
-{
- const GLuint argIndex = 0;
- const struct prog_src_register src = inst->SrcReg[argIndex];
- struct brw_compile *p = &c->func;
- GLuint zeros_mask = 0;
- GLuint ones_mask = 0;
- GLuint src_mask = 0;
- GLubyte src_swz[4];
- GLboolean need_tmp = (src.Negate &&
- dst.file != BRW_GENERAL_REGISTER_FILE);
- struct brw_reg tmp = dst;
- GLuint i;
-
- if (need_tmp)
- tmp = get_tmp(c);
-
- for (i = 0; i < 4; i++) {
- if (dst.dw1.bits.writemask & (1<<i)) {
- GLubyte s = GET_SWZ(src.Swizzle, i);
- switch (s) {
- case SWIZZLE_X:
- case SWIZZLE_Y:
- case SWIZZLE_Z:
- case SWIZZLE_W:
- src_mask |= 1<<i;
- src_swz[i] = s;
- break;
- case SWIZZLE_ZERO:
- zeros_mask |= 1<<i;
- break;
- case SWIZZLE_ONE:
- ones_mask |= 1<<i;
- break;
- }
- }
- }
-
- /* Do src first, in case dst aliases src:
- */
- if (src_mask) {
- struct brw_reg arg0;
-
- arg0 = get_src_reg(c, inst, argIndex);
-
- arg0 = brw_swizzle(arg0,
- src_swz[0], src_swz[1],
- src_swz[2], src_swz[3]);
-
- brw_MOV(p, brw_writemask(tmp, src_mask), arg0);
- }
-
- if (zeros_mask)
- brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0));
-
- if (ones_mask)
- brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
-
- if (src.Negate)
- brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
-
- if (need_tmp) {
- brw_MOV(p, dst, tmp);
- release_tmp(c, tmp);
- }
-}
/**
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_access_mode(p, BRW_ALIGN_16);
- /* Message registers can't be read, so copy the output into GRF register
- if they are used in source registers */
- for (insn = 0; insn < nr_insns; insn++) {
- GLuint i;
- struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
- for (i = 0; i < 3; i++) {
- struct prog_src_register *src = &inst->SrcReg[i];
- GLuint index = src->Index;
- GLuint file = src->File;
- if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS)
- c->output_regs[index].used_in_src = GL_TRUE;
- }
- }
-
/* Static register allocation
*/
brw_vs_alloc_regs(c);
_mesa_print_instruction(inst);
#endif
- /* Get argument regs. SWZ is special and does this itself.
+ /* Get argument regs.
*/
- if (inst->Opcode != OPCODE_SWZ)
- for (i = 0; i < 3; i++) {
- const struct prog_src_register *src = &inst->SrcReg[i];
- index = src->Index;
- file = src->File;
- if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
- args[i] = c->output_regs[index].reg;
- else
- args[i] = get_arg(c, inst, i);
- }
+ for (i = 0; i < 3; i++) {
+ const struct prog_src_register *src = &inst->SrcReg[i];
+ index = src->Index;
+ file = src->File;
+ args[i] = get_arg(c, inst, i);
+ }
/* Get dest regs. Note that it is possible for a reg to be both
* dst and arg, given the static allocation of registers. So
*/
index = inst->DstReg.Index;
file = inst->DstReg.File;
- if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
- dst = c->output_regs[index].reg;
- else
- dst = get_dst(c, inst->DstReg);
+ dst = get_dst(c, inst->DstReg);
if (inst->SaturateMode != SATURATE_OFF) {
_mesa_problem(NULL, "Unsupported saturate %d in vertex shader",
}
switch (inst->Opcode) {
- case OPCODE_ABS:
+ case TGSI_OPCODE_ABS:
brw_MOV(p, dst, brw_abs(args[0]));
break;
- case OPCODE_ADD:
+ case TGSI_OPCODE_ADD:
brw_ADD(p, dst, args[0], args[1]);
break;
- case OPCODE_COS:
+ case TGSI_OPCODE_COS:
emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
- case OPCODE_DP3:
+ case TGSI_OPCODE_DP3:
brw_DP3(p, dst, args[0], args[1]);
break;
- case OPCODE_DP4:
+ case TGSI_OPCODE_DP4:
brw_DP4(p, dst, args[0], args[1]);
break;
- case OPCODE_DPH:
+ case TGSI_OPCODE_DPH:
brw_DPH(p, dst, args[0], args[1]);
break;
- case OPCODE_NRM3:
+ case TGSI_OPCODE_NRM3:
emit_nrm(c, dst, args[0], 3);
break;
- case OPCODE_NRM4:
+ case TGSI_OPCODE_NRM4:
emit_nrm(c, dst, args[0], 4);
break;
- case OPCODE_DST:
+ case TGSI_OPCODE_DST:
unalias2(c, dst, args[0], args[1], emit_dst_noalias);
break;
- case OPCODE_EXP:
+ case TGSI_OPCODE_EXP:
unalias1(c, dst, args[0], emit_exp_noalias);
break;
- case OPCODE_EX2:
+ case TGSI_OPCODE_EX2:
emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
- case OPCODE_ARL:
+ case TGSI_OPCODE_ARL:
emit_arl(c, dst, args[0]);
break;
- case OPCODE_FLR:
+ case TGSI_OPCODE_FLR:
brw_RNDD(p, dst, args[0]);
break;
- case OPCODE_FRC:
+ case TGSI_OPCODE_FRC:
brw_FRC(p, dst, args[0]);
break;
- case OPCODE_LOG:
+ case TGSI_OPCODE_LOG:
unalias1(c, dst, args[0], emit_log_noalias);
break;
- case OPCODE_LG2:
+ case TGSI_OPCODE_LG2:
emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
- case OPCODE_LIT:
+ case TGSI_OPCODE_LIT:
unalias1(c, dst, args[0], emit_lit_noalias);
break;
- case OPCODE_LRP:
+ case TGSI_OPCODE_LRP:
unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
break;
- case OPCODE_MAD:
+ case TGSI_OPCODE_MAD:
brw_MOV(p, brw_acc_reg(), args[2]);
brw_MAC(p, dst, args[0], args[1]);
break;
- case OPCODE_MAX:
+ case TGSI_OPCODE_MAX:
emit_max(p, dst, args[0], args[1]);
break;
- case OPCODE_MIN:
+ case TGSI_OPCODE_MIN:
emit_min(p, dst, args[0], args[1]);
break;
- case OPCODE_MOV:
+ case TGSI_OPCODE_MOV:
brw_MOV(p, dst, args[0]);
break;
- case OPCODE_MUL:
+ case TGSI_OPCODE_MUL:
brw_MUL(p, dst, args[0], args[1]);
break;
- case OPCODE_POW:
+ case TGSI_OPCODE_POW:
emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);
break;
- case OPCODE_RCP:
+ case TGSI_OPCODE_RCP:
emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
- case OPCODE_RSQ:
+ case TGSI_OPCODE_RSQ:
emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
-
- case OPCODE_SEQ:
+ case TGSI_OPCODE_SEQ:
emit_seq(p, dst, args[0], args[1]);
break;
- case OPCODE_SIN:
+ case TGSI_OPCODE_SIN:
emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
- case OPCODE_SNE:
+ case TGSI_OPCODE_SNE:
emit_sne(p, dst, args[0], args[1]);
break;
- case OPCODE_SGE:
+ case TGSI_OPCODE_SGE:
emit_sge(p, dst, args[0], args[1]);
break;
- case OPCODE_SGT:
+ case TGSI_OPCODE_SGT:
emit_sgt(p, dst, args[0], args[1]);
break;
- case OPCODE_SLT:
+ case TGSI_OPCODE_SLT:
emit_slt(p, dst, args[0], args[1]);
break;
- case OPCODE_SLE:
+ case TGSI_OPCODE_SLE:
emit_sle(p, dst, args[0], args[1]);
break;
- case OPCODE_SUB:
+ case TGSI_OPCODE_SUB:
brw_ADD(p, dst, args[0], negate(args[1]));
break;
- case OPCODE_SWZ:
- /* The args[0] value can't be used here as it won't have
- * correctly encoded the full swizzle:
- */
- emit_swz(c, dst, inst);
- break;
- case OPCODE_TRUNC:
+ case TGSI_OPCODE_TRUNC:
/* round toward zero */
brw_RNDZ(p, dst, args[0]);
break;
- case OPCODE_XPD:
+ case TGSI_OPCODE_XPD:
emit_xpd(p, dst, args[0], args[1]);
break;
- case OPCODE_IF:
+ case TGSI_OPCODE_IF:
assert(if_depth < MAX_IF_DEPTH);
if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
/* Note that brw_IF smashes the predicate_control field. */
if_inst[if_depth]->header.predicate_control = get_predicate(inst);
if_depth++;
break;
- case OPCODE_ELSE:
+ case TGSI_OPCODE_ELSE:
if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
break;
- case OPCODE_ENDIF:
+ case TGSI_OPCODE_ENDIF:
assert(if_depth > 0);
brw_ENDIF(p, if_inst[--if_depth]);
break;
- case OPCODE_BGNLOOP:
+ case TGSI_OPCODE_BGNLOOP:
loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
break;
- case OPCODE_BRK:
+ case TGSI_OPCODE_BRK:
brw_set_predicate_control(p, get_predicate(inst));
brw_BREAK(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
- case OPCODE_CONT:
+ case TGSI_OPCODE_CONT:
brw_set_predicate_control(p, get_predicate(inst));
brw_CONT(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
- case OPCODE_ENDLOOP:
+ case TGSI_OPCODE_ENDLOOP:
{
struct brw_instruction *inst0, *inst1;
GLuint br = 1;
/* patch all the BREAK/CONT instructions from last BEGINLOOP */
while (inst0 > loop_inst[loop_depth]) {
inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ if (inst0->header.opcode == BRW_TGSI_OPCODE_BREAK) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
inst0->bits3.if_else.pop_count = 0;
}
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ else if (inst0->header.opcode == BRW_TGSI_OPCODE_CONTINUE) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
inst0->bits3.if_else.pop_count = 0;
}
}
}
break;
- case OPCODE_BRA:
+ case TGSI_OPCODE_BRA:
brw_set_predicate_control(p, get_predicate(inst));
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
- case OPCODE_CAL:
+ case TGSI_OPCODE_CAL:
brw_set_access_mode(p, BRW_ALIGN_1);
brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
brw_set_access_mode(p, BRW_ALIGN_16);
brw_save_call(p, inst->Comment, p->nr_insn);
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
break;
- case OPCODE_RET:
+ case TGSI_OPCODE_RET:
brw_ADD(p, get_addr_reg(stack_index),
get_addr_reg(stack_index), brw_imm_d(-4));
brw_set_access_mode(p, BRW_ALIGN_1);
brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0));
brw_set_access_mode(p, BRW_ALIGN_16);
break;
- case OPCODE_END:
+ case TGSI_OPCODE_END:
end_offset = p->nr_insn;
/* this instruction will get patched later to jump past subroutine
* code, etc.
*/
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
break;
- case OPCODE_PRINT:
+ case TGSI_OPCODE_PRINT:
/* no-op */
break;
- case OPCODE_BGNSUB:
+ case TGSI_OPCODE_BGNSUB:
brw_save_label(p, inst->Comment, p->nr_insn);
break;
- case OPCODE_ENDSUB:
+ case TGSI_OPCODE_ENDSUB:
/* no-op */
break;
default:
hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
}
- if ((inst->DstReg.File == PROGRAM_OUTPUT)
- && (inst->DstReg.Index != VERT_RESULT_HPOS)
- && c->output_regs[inst->DstReg.Index].used_in_src) {
- brw_MOV(p, get_dst(c, inst->DstReg), dst);
- }
-
- /* Result color clamping.
- *
- * When destination register is an output register and
- * it's primary/secondary front/back color, we have to clamp
- * the result to [0,1]. This is done by enabling the
- * saturation bit for the last instruction.
- *
- * We don't use brw_set_saturate() as it modifies
- * p->current->header.saturate, which affects all the subsequent
- * instructions. Instead, we directly modify the header
- * of the last (already stored) instruction.
- */
- if (inst->DstReg.File == PROGRAM_OUTPUT) {
- if ((inst->DstReg.Index == VERT_RESULT_COL0)
- || (inst->DstReg.Index == VERT_RESULT_COL1)
- || (inst->DstReg.Index == VERT_RESULT_BFC0)
- || (inst->DstReg.Index == VERT_RESULT_BFC1)) {
- p->store[p->nr_insn-1].header.saturate = 1;
- }
- }
-
release_tmps(c);
}
uses_depth,
key);
+ /* Revisit this, figure out if it's really useful, and either push
+ * it into the state tracker so that everyone benefits (use to
+ * create fs varients with TEX rather than TXP), or discard.
+ */
+ key->proj_attrib_mask = ~0; /*brw->wm.input_size_masks[4-1];*/
- /* BRW_NEW_WM_INPUT_DIMENSIONS */
- key->proj_attrib_mask = brw->wm.input_size_masks[4-1];
-
- /* _NEW_LIGHT */
- key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
+ /* PIPE_NEW_RAST */
+ key->flat_shade = brw->rast.flat_shade;
- /* _NEW_HINT */
- key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
+ /* This can be determined by looking at the INTERP mode each input decl.
+ */
+ key->linear_color = 0;
/* _NEW_TEXTURE */
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
- const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
-
- if (unit->_ReallyEnabled) {
- const struct gl_texture_object *t = unit->_Current;
- const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
+ if (i < brw->nr_textures) {
+ const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
+ const struct gl_texture_object *t = unit->_Current;
+ const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
+
if (img->InternalFormat == GL_YCBCR_MESA) {
key->yuvtex_mask |= 1 << i;
if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR)
- key->yuvtex_swap_mask |= 1 << i;
+ key->yuvtex_swap_mask |= 1 << i;
}
- key->tex_swizzles[i] = t->_Swizzle;
+ key->tex_swizzles[i] = t->_Swizzle;
+
+ if (0)
+ key->shadowtex_mask |= 1<<i;
}
else {
key->tex_swizzles[i] = SWIZZLE_NOOP;
}
}
- /* Shadow */
- key->shadowtex_mask = fp->program.Base.ShadowSamplers;
- /* _NEW_BUFFERS */
- /*
- * Include the draw buffer origin and height so that we can calculate
- * fragment position values relative to the bottom left of the drawable,
- * from the incoming screen origin relative position we get as part of our
- * payload.
- *
- * We could avoid recompiling by including this as a constant referenced by
- * our program, but if we were to do that it would also be nice to handle
- * getting that constant updated at batchbuffer submit time (when we
- * hold the lock and know where the buffer really is) rather than at emit
- * time when we don't hold the lock and are just guessing. We could also
- * just avoid using this as key data if the program doesn't use
- * fragment.position.
- *
- * This pretty much becomes moot with DRI2 and redirected buffers anyway,
- * as our origins will always be zero then.
- */
+ /* _NEW_FRAMEBUFFER */
if (brw->intel.driDrawable != NULL) {
- key->origin_x = brw->intel.driDrawable->x;
- key->origin_y = brw->intel.driDrawable->y;
- key->drawable_height = brw->intel.driDrawable->h;
+ key->drawable_height = brw->fb.cbufs[0].height;
}
/* CACHE_NEW_VS_PROG */
GLuint tex_swizzles[BRW_MAX_TEX_UNIT];
GLuint program_string_id:32;
- GLuint origin_x, origin_y;
GLuint drawable_height;
GLuint vp_outputs_written;
};
{
struct brw_compile *p = &c->func;
- /* Calculate the pixel offset from window bottom left into destination
- * X and Y channels.
- */
if (mask & WRITEMASK_X) {
- /* X' = X - origin */
- brw_ADD(p,
+ /* X' = X */
+ brw_MOV(p,
dst[0],
- retype(arg0[0], BRW_REGISTER_TYPE_W),
- brw_imm_d(0 - c->key.origin_x));
+ retype(arg0[0], BRW_REGISTER_TYPE_W));
}
+ /* XXX: is this needed any more, or is this a NOOP?
+ */
if (mask & WRITEMASK_Y) {
- /* Y' = height - (Y - origin_y) = height + origin_y - Y */
+ /* Y' = height - 1 - Y */
brw_ADD(p,
dst[1],
negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
- brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
+ brw_imm_d(c->key.drawable_height - 1));
}
}
break;
case OPCODE_MOV:
- case OPCODE_SWZ:
emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
break;
*/
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
+#include "pipe/p_shader_constants.h"
+
#include "brw_context.h"
#include "brw_wm.h"
#include "brw_util.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/prog_statevars.h"
-
-
-/** An invalid texture target */
-#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
-
-/** An invalid texture unit */
-#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
-
-#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
#define X 0
#define Y 1
"FRONTFACING",
};
-#if 0
-static const char *wm_file_strings[] = {
- "PAYLOAD"
-};
-#endif
/***********************************************************************
}
c->fp_temp |= 1<<(bit-1);
- return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
+ return dst_reg(PROGRAM_TEMPORARY, c->first_internal_temp+(bit-1));
}
static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
{
- c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
+ c->fp_temp &= ~(1 << (temp.Index - c->first_internal_temp));
}
return inst;
}
-static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
- GLuint op,
- struct prog_dst_register dest,
- GLuint saturate,
- GLuint tex_src_unit,
- GLuint tex_src_target,
- GLuint tex_shadow,
- struct prog_src_register src0,
- struct prog_src_register src1,
- struct prog_src_register src2 )
+static struct prog_instruction * emit_op(struct brw_wm_compile *c,
+ GLuint op,
+ struct prog_dst_register dest,
+ GLuint saturate,
+ struct prog_src_register src0,
+ struct prog_src_register src1,
+ struct prog_src_register src2 )
{
struct prog_instruction *inst = get_fp_inst(c);
- assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
- tex_src_unit == TEX_UNIT_NONE);
- assert(tex_src_target < NUM_TEXTURE_TARGETS ||
- tex_src_target == TEX_TARGET_NONE);
-
- /* update mask of which texture units are referenced by this program */
- if (tex_src_unit != TEX_UNIT_NONE)
- c->fp->tex_units_used |= (1 << tex_src_unit);
-
memset(inst, 0, sizeof(*inst));
inst->Opcode = op;
inst->DstReg = dest;
inst->SaturateMode = saturate;
- inst->TexSrcUnit = tex_src_unit;
- inst->TexSrcTarget = tex_src_target;
- inst->TexShadow = tex_shadow;
inst->SrcReg[0] = src0;
inst->SrcReg[1] = src1;
inst->SrcReg[2] = src2;
return inst;
}
-
-
-static struct prog_instruction * emit_op(struct brw_wm_compile *c,
- GLuint op,
- struct prog_dst_register dest,
- GLuint saturate,
- struct prog_src_register src0,
- struct prog_src_register src1,
- struct prog_src_register src2 )
-{
- return emit_tex_op(c, op, dest, saturate,
- TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */
- src0, src1, src2);
-}
-/* Many Mesa opcodes produce the same value across all the result channels.
+/* Many opcodes produce the same value across all the result channels.
* We'd rather not have to support that splatting in the opcode implementations,
* and brw_wm_pass*.c wants to optimize them out by shuffling references around
* anyway. We can easily get both by emitting the opcode to one channel, and
other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
if (other_channel_mask != 0) {
inst = emit_op(c,
- OPCODE_MOV,
+ TGSI_OPCODE_MOV,
dst_mask(inst0->DstReg, other_channel_mask),
0,
src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
}
static void emit_interp( struct brw_wm_compile *c,
- GLuint idx )
+ GLuint semantic,
+ GLuint semantic_index,
+ GLuint interp_mode )
{
struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
* multiplied by 1/W in the SF program, and LINTERP on those
* which have not:
*/
- switch (idx) {
+ switch (semantic) {
case FRAG_ATTRIB_WPOS:
/* Have to treat wpos.xy specially:
*/
deltas,
src_undef());
break;
- case FRAG_ATTRIB_COL0:
- case FRAG_ATTRIB_COL1:
+
+ case TGSI_SEMANTIC_COLOR:
if (c->key.flat_shade) {
emit_op(c,
WM_CINTERP,
src_undef());
}
else {
- if (c->key.linear_color) {
- emit_op(c,
- WM_LINTERP,
- dst,
- 0,
- interp,
- deltas,
- src_undef());
- }
- else {
- /* perspective-corrected color interpolation */
- emit_op(c,
- WM_PINTERP,
- dst,
- 0,
- interp,
- deltas,
- get_pixel_w(c));
- }
+ emit_op(c,
+ translate_interp_mode(interp_mode),
+ dst,
+ 0,
+ interp,
+ deltas,
+ src_undef());
}
break;
case FRAG_ATTRIB_FOGC:
get_pixel_w(c));
emit_op(c,
- OPCODE_MOV,
+ TGSI_OPCODE_MOV,
dst_mask(dst, WRITEMASK_YZW),
0,
src_swizzle(interp,
get_pixel_w(c));
emit_op(c,
- OPCODE_MOV,
+ TGSI_OPCODE_MOV,
dst_mask(dst, WRITEMASK_ZW),
0,
src_swizzle(interp,
default:
emit_op(c,
- WM_PINTERP,
+ translate_interp_mode(interp_mode),
dst,
0,
interp,
get_pixel_w(c));
break;
}
-
- c->fp_interp_emitted |= 1<<idx;
}
/***********************************************************************
/* dst.y = mul src0.y, src1.y
*/
emit_op(c,
- OPCODE_MUL,
+ TGSI_OPCODE_MUL,
dst_mask(dst, WRITEMASK_Y),
inst->SaturateMode,
src0,
/* dst.xz = swz src0.1zzz
*/
swz = emit_op(c,
- OPCODE_SWZ,
+ TGSI_OPCODE_MOV,
dst_mask(dst, WRITEMASK_XZ),
inst->SaturateMode,
src_swizzle(src0, SWIZZLE_ONE, z, z, z),
/* dst.w = mov src1.w
*/
emit_op(c,
- OPCODE_MOV,
+ TGSI_OPCODE_MOV,
dst_mask(dst, WRITEMASK_W),
inst->SaturateMode,
src1,
/* dst.xw = swz src0.1111
*/
swz = emit_op(c,
- OPCODE_SWZ,
+ TGSI_OPCODE_MOV,
dst_mask(dst, WRITEMASK_XW),
0,
src_swizzle1(src0, SWIZZLE_ONE),
if (dst.WriteMask & WRITEMASK_YZ) {
emit_op(c,
- OPCODE_LIT,
+ TGSI_OPCODE_LIT,
dst_mask(dst, WRITEMASK_YZ),
inst->SaturateMode,
src0,
coord = src_reg_from_dst(tmpcoord);
/* tmpcoord = src0 (i.e.: coord = src0) */
- out = emit_op(c, OPCODE_MOV,
+ out = emit_op(c, TGSI_OPCODE_MOV,
tmpcoord,
0,
src0,
out->SrcReg[0].Abs = 1;
/* tmp0 = MAX(coord.X, coord.Y) */
- emit_op(c, OPCODE_MAX,
+ emit_op(c, TGSI_OPCODE_MAX,
tmp0,
0,
src_swizzle1(coord, X),
src_undef());
/* tmp1 = MAX(tmp0, coord.Z) */
- emit_op(c, OPCODE_MAX,
+ emit_op(c, TGSI_OPCODE_MAX,
tmp1,
0,
tmp0src,
src_undef());
/* tmp0 = 1 / tmp1 */
- emit_op(c, OPCODE_RCP,
+ emit_op(c, TGSI_OPCODE_RCP,
dst_mask(tmp0, WRITEMASK_X),
0,
tmp1src,
src_undef());
/* tmpCoord = src0 * tmp0 */
- emit_op(c, OPCODE_MUL,
+ emit_op(c, TGSI_OPCODE_MUL,
tmpcoord,
0,
src0,
/* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
*/
emit_op(c,
- OPCODE_MUL,
+ TGSI_OPCODE_MUL,
tmpcoord,
0,
inst->SrcReg[0],
/* tmp = TEX ...
*/
emit_tex_op(c,
- OPCODE_TEX,
+ TGSI_OPCODE_TEX,
tmp,
inst->SaturateMode,
unit,
/* tmp.xyz = ADD TMP, C0
*/
emit_op(c,
- OPCODE_ADD,
+ TGSI_OPCODE_ADD,
dst_mask(tmp, WRITEMASK_XYZ),
0,
tmpsrc,
*/
emit_op(c,
- OPCODE_MUL,
+ TGSI_OPCODE_MUL,
dst_mask(tmp, WRITEMASK_Y),
0,
tmpsrc,
*/
emit_op(c,
- OPCODE_MAD,
+ TGSI_OPCODE_MAD,
dst_mask(dst, WRITEMASK_XYZ),
0,
swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
/* RGB.y = MAD YUV.z, C1.w, RGB.y
*/
emit_op(c,
- OPCODE_MAD,
+ TGSI_OPCODE_MAD,
dst_mask(dst, WRITEMASK_Y),
0,
src_swizzle1(tmpsrc, Z),
else {
/* ordinary RGBA tex instruction */
emit_tex_op(c,
- OPCODE_TEX,
+ TGSI_OPCODE_TEX,
inst->DstReg,
inst->SaturateMode,
unit,
if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
/* swizzle the result of the TEX instruction */
struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
- emit_op(c, OPCODE_SWZ,
+ emit_op(c, TGSI_OPCODE_MOV,
inst->DstReg,
SATURATE_OFF, /* saturate already done above */
src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
const struct prog_src_register src = inst->SrcReg[0];
GLboolean retVal;
- assert(inst->Opcode == OPCODE_TXP);
+ assert(inst->Opcode == TGSI_OPCODE_TXP);
/* Only try to detect the simplest cases. Could detect (later)
* cases where we are trying to emit code like RCP {1.0}, MUL x,
/* tmp0.w = RCP inst.arg[0][3]
*/
emit_op(c,
- OPCODE_RCP,
+ TGSI_OPCODE_RCP,
dst_mask(tmp, WRITEMASK_W),
0,
src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
/* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
*/
emit_op(c,
- OPCODE_MUL,
+ TGSI_OPCODE_MUL,
dst_mask(tmp, WRITEMASK_XYZ),
0,
src0,
GLuint idx = inst->SrcReg[i].Index;
if (!(c->fp_interp_emitted & (1<<idx))) {
emit_interp(c, idx);
+ c->fp_interp_emitted |= 1<<idx;
}
}
}
*/
switch (inst->Opcode) {
- case OPCODE_SWZ:
+ case TGSI_OPCODE_ABS:
out = emit_insn(c, inst);
- out->Opcode = OPCODE_MOV;
- break;
-
- case OPCODE_ABS:
- out = emit_insn(c, inst);
- out->Opcode = OPCODE_MOV;
+ out->Opcode = TGSI_OPCODE_MOV;
out->SrcReg[0].Negate = NEGATE_NONE;
out->SrcReg[0].Abs = 1;
break;
- case OPCODE_SUB:
+ case TGSI_OPCODE_SUB:
out = emit_insn(c, inst);
- out->Opcode = OPCODE_ADD;
+ out->Opcode = TGSI_OPCODE_ADD;
out->SrcReg[1].Negate ^= NEGATE_XYZW;
break;
- case OPCODE_SCS:
+ case TGSI_OPCODE_SCS:
out = emit_insn(c, inst);
/* This should probably be done in the parser.
*/
out->DstReg.WriteMask &= WRITEMASK_XY;
break;
- case OPCODE_DST:
+ case TGSI_OPCODE_DST:
precalc_dst(c, inst);
break;
- case OPCODE_LIT:
+ case TGSI_OPCODE_LIT:
precalc_lit(c, inst);
break;
- case OPCODE_TEX:
+ case TGSI_OPCODE_TEX:
precalc_tex(c, inst);
break;
- case OPCODE_TXP:
+ case TGSI_OPCODE_TXP:
precalc_txp(c, inst);
break;
- case OPCODE_TXB:
+ case TGSI_OPCODE_TXB:
out = emit_insn(c, inst);
out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
break;
- case OPCODE_XPD:
+ case TGSI_OPCODE_XPD:
out = emit_insn(c, inst);
/* This should probably be done in the parser.
*/
out->DstReg.WriteMask &= WRITEMASK_XYZ;
break;
- case OPCODE_KIL:
+ case TGSI_OPCODE_KIL:
out = emit_insn(c, inst);
/* This should probably be done in the parser.
*/
out->DstReg.WriteMask = 0;
break;
- case OPCODE_END:
+ case TGSI_OPCODE_END:
emit_fb_write(c);
break;
- case OPCODE_PRINT:
- break;
default:
if (brw_wm_is_scalar_result(inst->Opcode))
emit_scalar_insn(c, inst);
#include "brw_eu.h"
#include "brw_wm.h"
-enum _subroutine {
- SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4
-};
static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
const struct prog_instruction *inst,
case OPCODE_CAL:
case OPCODE_BRK:
case OPCODE_RET:
- case OPCODE_NOISE1:
- case OPCODE_NOISE2:
- case OPCODE_NOISE3:
- case OPCODE_NOISE4:
case OPCODE_BGNLOOP:
return GL_TRUE;
default:
0, 16, 2 );
}
-/* One-, two- and three-dimensional Perlin noise, similar to the description
- in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
-static void noise1_sub( struct brw_wm_compile *c ) {
- struct brw_compile *p = &c->func;
- struct brw_reg param,
- x0, x1, /* gradients at each end */
- t, tmp[ 2 ], /* float temporaries */
- itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
- int i;
- int mark = mark_tmps( c );
-
- x0 = alloc_tmp( c );
- x1 = alloc_tmp( c );
- t = alloc_tmp( c );
- tmp[ 0 ] = alloc_tmp( c );
- tmp[ 1 ] = alloc_tmp( c );
- itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD );
- itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD );
- itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD );
- itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD );
- itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD );
-
- param = lookup_tmp( c, mark - 2 );
-
- brw_set_access_mode( p, BRW_ALIGN_1 );
-
- brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
-
- /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
- be hashed. Also compute the remainder (offset within the unit
- length), interleaved to reduce register dependency penalties. */
- brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param );
- brw_FRC( p, param, param );
- brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) );
- brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
- brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
-
- /* We're now ready to perform the hashing. The two hashes are
- interleaved for performance. The hash function used is
- designed to rapidly achieve avalanche and require only 32x16
- bit multiplication, and 16-bit swizzles (which we get for
- free). We can't use immediate operands in the multiplies,
- because immediates are permitted only in src1 and the 16-bit
- factor is permitted only in src0. */
- for( i = 0; i < 2; i++ )
- brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] );
- for( i = 0; i < 2; i++ )
- brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
- high_words( itmp[ i ] ) );
- for( i = 0; i < 2; i++ )
- brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] );
- for( i = 0; i < 2; i++ )
- brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
- high_words( itmp[ i ] ) );
- for( i = 0; i < 2; i++ )
- brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
- for( i = 0; i < 2; i++ )
- brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
- high_words( itmp[ i ] ) );
-
- /* Now we want to initialise the two gradients based on the
- hashes. Format conversion from signed integer to float leaves
- everything scaled too high by a factor of pow( 2, 31 ), but
- we correct for that right at the end. */
- brw_ADD( p, t, param, brw_imm_f( -1.0 ) );
- brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) );
- brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) );
-
- brw_MUL( p, x0, x0, param );
- brw_MUL( p, x1, x1, t );
-
- /* We interpolate between the gradients using the polynomial
- 6t^5 - 15t^4 + 10t^3 (Perlin). */
- brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) );
- brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
- brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
- brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
- brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
- brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the
- pipeline */
- brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
- brw_MUL( p, param, tmp[ 0 ], param );
- brw_MUL( p, x1, x1, param );
- brw_ADD( p, x0, x0, x1 );
- /* scale by pow( 2, -30 ), to compensate for the format conversion
- above and an extra factor of 2 so that a single gradient covers
- the [-1,1] range */
- brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) );
-
- release_tmps( c, mark );
-}
-
-static void emit_noise1( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src, param, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
- int mark = mark_tmps( c );
-
- assert( mark == 0 );
-
- src = get_src_reg( c, inst, 0, 0 );
-
- param = alloc_tmp( c );
-
- brw_MOV( p, param, src );
-
- invoke_subroutine( c, SUB_NOISE1, noise1_sub );
-
- /* Fill in the result: */
- brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV( p, dst, param );
- }
- }
- if( inst->SaturateMode == SATURATE_ZERO_ONE )
- brw_set_saturate( p, 0 );
-
- release_tmps( c, mark );
-}
-
-static void noise2_sub( struct brw_wm_compile *c ) {
-
- struct brw_compile *p = &c->func;
- struct brw_reg param0, param1,
- x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */
- t, tmp[ 4 ], /* float temporaries */
- itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
- int i;
- int mark = mark_tmps( c );
-
- x0y0 = alloc_tmp( c );
- x0y1 = alloc_tmp( c );
- x1y0 = alloc_tmp( c );
- x1y1 = alloc_tmp( c );
- t = alloc_tmp( c );
- for( i = 0; i < 4; i++ ) {
- tmp[ i ] = alloc_tmp( c );
- itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
- }
- itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD );
- itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD );
- itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD );
-
- param0 = lookup_tmp( c, mark - 3 );
- param1 = lookup_tmp( c, mark - 2 );
-
- brw_set_access_mode( p, BRW_ALIGN_1 );
-
- /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
- be hashed. Also compute the remainders (offsets within the unit
- square), interleaved to reduce register dependency penalties. */
- brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 );
- brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 );
- brw_FRC( p, param0, param0 );
- brw_FRC( p, param1, param1 );
- brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
- brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ),
- low_words( itmp[ 1 ] ) );
- brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
- brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
- brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) );
- brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) );
- brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) );
-
- /* We're now ready to perform the hashing. The four hashes are
- interleaved for performance. The hash function used is
- designed to rapidly achieve avalanche and require only 32x16
- bit multiplication, and 16-bit swizzles (which we get for
- free). We can't use immediate operands in the multiplies,
- because immediates are permitted only in src1 and the 16-bit
- factor is permitted only in src0. */
- for( i = 0; i < 4; i++ )
- brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
- for( i = 0; i < 4; i++ )
- brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
- high_words( itmp[ i ] ) );
- for( i = 0; i < 4; i++ )
- brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] );
- for( i = 0; i < 4; i++ )
- brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
- high_words( itmp[ i ] ) );
- for( i = 0; i < 4; i++ )
- brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] );
- for( i = 0; i < 4; i++ )
- brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
- high_words( itmp[ i ] ) );
-
- /* Now we want to initialise the four gradients based on the
- hashes. Format conversion from signed integer to float leaves
- everything scaled too high by a factor of pow( 2, 15 ), but
- we correct for that right at the end. */
- brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
- brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
- brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
- brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) );
- brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) );
-
- brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) );
- brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) );
- brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) );
- brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) );
-
- brw_MUL( p, x1y0, x1y0, t );
- brw_MUL( p, x1y1, x1y1, t );
- brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
- brw_MUL( p, x0y0, x0y0, param0 );
- brw_MUL( p, x0y1, x0y1, param0 );
-
- brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 );
- brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 );
- brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t );
- brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t );
-
- brw_ADD( p, x0y0, x0y0, tmp[ 0 ] );
- brw_ADD( p, x1y0, x1y0, tmp[ 2 ] );
- brw_ADD( p, x0y1, x0y1, tmp[ 1 ] );
- brw_ADD( p, x1y1, x1y1, tmp[ 3 ] );
-
- /* We interpolate between the gradients using the polynomial
- 6t^5 - 15t^4 + 10t^3 (Perlin). */
- brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) );
- brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) );
- brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
- brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) );
- brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
- brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
- brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the
- pipeline */
- brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
- brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) );
- brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
- brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
- brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the
- pipeline */
- brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
- brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
- brw_MUL( p, param0, tmp[ 0 ], param0 );
- brw_MUL( p, param1, tmp[ 1 ], param1 );
-
- /* Here we interpolate in the y dimension... */
- brw_MUL( p, x0y1, x0y1, param1 );
- brw_MUL( p, x1y1, x1y1, param1 );
- brw_ADD( p, x0y0, x0y0, x0y1 );
- brw_ADD( p, x1y0, x1y0, x1y1 );
-
- /* And now in x. There are horrible register dependencies here,
- but we have nothing else to do. */
- brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
- brw_MUL( p, x1y0, x1y0, param0 );
- brw_ADD( p, x0y0, x0y0, x1y0 );
-
- /* scale by pow( 2, -15 ), as described above */
- brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) );
-
- release_tmps( c, mark );
-}
-
-static void emit_noise2( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, src1, param0, param1, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
- int mark = mark_tmps( c );
-
- assert( mark == 0 );
-
- src0 = get_src_reg( c, inst, 0, 0 );
- src1 = get_src_reg( c, inst, 0, 1 );
-
- param0 = alloc_tmp( c );
- param1 = alloc_tmp( c );
-
- brw_MOV( p, param0, src0 );
- brw_MOV( p, param1, src1 );
-
- invoke_subroutine( c, SUB_NOISE2, noise2_sub );
-
- /* Fill in the result: */
- brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV( p, dst, param0 );
- }
- }
- if( inst->SaturateMode == SATURATE_ZERO_ONE )
- brw_set_saturate( p, 0 );
-
- release_tmps( c, mark );
-}
-
-/**
- * The three-dimensional case is much like the one- and two- versions above,
- * but since the number of corners is rapidly growing we now pack 16 16-bit
- * hashes into each register to extract more parallelism from the EUs.
- */
-static void noise3_sub( struct brw_wm_compile *c ) {
-
- struct brw_compile *p = &c->func;
- struct brw_reg param0, param1, param2,
- x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
- xi, yi, zi, /* interpolation coefficients */
- t, tmp[ 8 ], /* float temporaries */
- itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
- wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
- int i;
- int mark = mark_tmps( c );
-
- x0y0 = alloc_tmp( c );
- x0y1 = alloc_tmp( c );
- x1y0 = alloc_tmp( c );
- x1y1 = alloc_tmp( c );
- xi = alloc_tmp( c );
- yi = alloc_tmp( c );
- zi = alloc_tmp( c );
- t = alloc_tmp( c );
- for( i = 0; i < 8; i++ ) {
- tmp[ i ] = alloc_tmp( c );
- itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
- wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 );
- }
-
- param0 = lookup_tmp( c, mark - 4 );
- param1 = lookup_tmp( c, mark - 3 );
- param2 = lookup_tmp( c, mark - 2 );
-
- brw_set_access_mode( p, BRW_ALIGN_1 );
-
- /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
- be hashed. Also compute the remainders (offsets within the unit
- cube), interleaved to reduce register dependency penalties. */
- brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 );
- brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 );
- brw_RNDD( p, retype( itmp[ 2 ], BRW_REGISTER_TYPE_D ), param2 );
- brw_FRC( p, param0, param0 );
- brw_FRC( p, param1, param1 );
- brw_FRC( p, param2, param2 );
- /* Since we now have only 16 bits of precision in the hash, we must
- be more careful about thorough mixing to maintain entropy as we
- squash the input vector into a small scalar. */
- brw_MUL( p, brw_null_reg(), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) );
- brw_MAC( p, brw_null_reg(), low_words( itmp[ 1 ] ), brw_imm_uw( 0xD0BD ) );
- brw_MAC( p, low_words( itmp[ 0 ] ), low_words( itmp[ 2 ] ),
- brw_imm_uw( 0x9B93 ) );
- brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ),
- brw_imm_uw( 0xBC8F ) );
-
- /* Temporarily disable the execution mask while we work with ExecSize=16
- channels (the mask is set for ExecSize=8 and is probably incorrect).
- Although this might cause execution of unwanted channels, the code
- writes only to temporary registers and has no side effects, so
- disabling the mask is harmless. */
- brw_push_insn_state( p );
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) );
- brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) );
- brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) );
-
- /* We're now ready to perform the hashing. The eight hashes are
- interleaved for performance. The hash function used is
- designed to rapidly achieve avalanche and require only 16x16
- bit multiplication, and 8-bit swizzles (which we get for
- free). */
- for( i = 0; i < 4; i++ )
- brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) );
- for( i = 0; i < 4; i++ )
- brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
- odd_bytes( wtmp[ i ] ) );
- for( i = 0; i < 4; i++ )
- brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) );
- for( i = 0; i < 4; i++ )
- brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
- odd_bytes( wtmp[ i ] ) );
- brw_pop_insn_state( p );
-
- /* Now we want to initialise the four rear gradients based on the
- hashes. Format conversion from signed integer to float leaves
- everything scaled too high by a factor of pow( 2, 15 ), but
- we correct for that right at the end. */
- /* x component */
- brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
- brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
- brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
- brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) );
- brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) );
-
- brw_push_insn_state( p );
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) );
- brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) );
- brw_pop_insn_state( p );
-
- brw_MUL( p, x1y0, x1y0, t );
- brw_MUL( p, x1y1, x1y1, t );
- brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
- brw_MUL( p, x0y0, x0y0, param0 );
- brw_MUL( p, x0y1, x0y1, param0 );
-
- /* y component */
- brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
- brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
- brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
- brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
-
- brw_push_insn_state( p );
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) );
- brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) );
- brw_pop_insn_state( p );
-
- brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
- brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
- brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
- brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 );
- brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 );
-
- brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
- brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
- brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
- brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
-
- /* z component */
- brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
- brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
- brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
- brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
-
- brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param2 );
- brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param2 );
- brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param2 );
- brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param2 );
-
- brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
- brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
- brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
- brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-
- /* We interpolate between the gradients using the polynomial
- 6t^5 - 15t^4 + 10t^3 (Perlin). */
- brw_MUL( p, xi, param0, brw_imm_f( 6.0 ) );
- brw_MUL( p, yi, param1, brw_imm_f( 6.0 ) );
- brw_MUL( p, zi, param2, brw_imm_f( 6.0 ) );
- brw_ADD( p, xi, xi, brw_imm_f( -15.0 ) );
- brw_ADD( p, yi, yi, brw_imm_f( -15.0 ) );
- brw_ADD( p, zi, zi, brw_imm_f( -15.0 ) );
- brw_MUL( p, xi, xi, param0 );
- brw_MUL( p, yi, yi, param1 );
- brw_MUL( p, zi, zi, param2 );
- brw_ADD( p, xi, xi, brw_imm_f( 10.0 ) );
- brw_ADD( p, yi, yi, brw_imm_f( 10.0 ) );
- brw_ADD( p, zi, zi, brw_imm_f( 10.0 ) );
- brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work */
- brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work */
- brw_MUL( p, xi, xi, param0 );
- brw_MUL( p, yi, yi, param1 );
- brw_MUL( p, zi, zi, param2 );
- brw_MUL( p, xi, xi, param0 );
- brw_MUL( p, yi, yi, param1 );
- brw_MUL( p, zi, zi, param2 );
- brw_MUL( p, xi, xi, param0 );
- brw_MUL( p, yi, yi, param1 );
- brw_MUL( p, zi, zi, param2 );
-
- /* Here we interpolate in the y dimension... */
- brw_MUL( p, x0y1, x0y1, yi );
- brw_MUL( p, x1y1, x1y1, yi );
- brw_ADD( p, x0y0, x0y0, x0y1 );
- brw_ADD( p, x1y0, x1y0, x1y1 );
-
- /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
- brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
- brw_MUL( p, x1y0, x1y0, xi );
- brw_ADD( p, tmp[ 0 ], x0y0, x1y0 );
-
- /* Now do the same thing for the front four gradients... */
- /* x component */
- brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) );
- brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) );
- brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) );
- brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) );
-
- brw_push_insn_state( p );
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) );
- brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) );
- brw_pop_insn_state( p );
-
- brw_MUL( p, x1y0, x1y0, t );
- brw_MUL( p, x1y1, x1y1, t );
- brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
- brw_MUL( p, x0y0, x0y0, param0 );
- brw_MUL( p, x0y1, x0y1, param0 );
-
- /* y component */
- brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
- brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
- brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
- brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
-
- brw_push_insn_state( p );
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) );
- brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) );
- brw_pop_insn_state( p );
-
- brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
- brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
- brw_ADD( p, t, param2, brw_imm_f( -1.0 ) );
- brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 );
- brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 );
-
- brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
- brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
- brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
- brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
-
- /* z component */
- brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
- brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
- brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
- brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
-
- brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
- brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
- brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
- brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
-
- brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
- brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
- brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
- brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-
- /* The interpolation coefficients are still around from last time, so
- again interpolate in the y dimension... */
- brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
- brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
- brw_MUL( p, x0y1, x0y1, yi );
- brw_MUL( p, x1y1, x1y1, yi );
- brw_ADD( p, x0y0, x0y0, x0y1 );
- brw_ADD( p, x1y0, x1y0, x1y1 );
-
- /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
- time put the front face in tmp[ 1 ] and we're nearly there... */
- brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
- brw_MUL( p, x1y0, x1y0, xi );
- brw_ADD( p, tmp[ 1 ], x0y0, x1y0 );
-
- /* The final interpolation, in the z dimension: */
- brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );
- brw_MUL( p, tmp[ 1 ], tmp[ 1 ], zi );
- brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] );
-
- /* scale by pow( 2, -15 ), as described above */
- brw_MUL( p, param0, tmp[ 0 ], brw_imm_f( 0.000030517578125 ) );
-
- release_tmps( c, mark );
-}
-
-static void emit_noise3( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, src1, src2, param0, param1, param2, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
- int mark = mark_tmps( c );
-
- assert( mark == 0 );
-
- src0 = get_src_reg( c, inst, 0, 0 );
- src1 = get_src_reg( c, inst, 0, 1 );
- src2 = get_src_reg( c, inst, 0, 2 );
-
- param0 = alloc_tmp( c );
- param1 = alloc_tmp( c );
- param2 = alloc_tmp( c );
-
- brw_MOV( p, param0, src0 );
- brw_MOV( p, param1, src1 );
- brw_MOV( p, param2, src2 );
-
- invoke_subroutine( c, SUB_NOISE3, noise3_sub );
-
- /* Fill in the result: */
- brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV( p, dst, param0 );
- }
- }
- if( inst->SaturateMode == SATURATE_ZERO_ONE )
- brw_set_saturate( p, 0 );
-
- release_tmps( c, mark );
-}
-
-/**
- * For the four-dimensional case, the little micro-optimisation benefits
- * we obtain by unrolling all the loops aren't worth the massive bloat it
- * now causes. Instead, we loop twice around performing a similar operation
- * to noise3, once for the w=0 cube and once for the w=1, with a bit more
- * code to glue it all together.
- */
-static void noise4_sub( struct brw_wm_compile *c )
-{
- struct brw_compile *p = &c->func;
- struct brw_reg param[ 4 ],
- x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
- w0, /* noise for the w=0 cube */
- floors[ 2 ], /* integer coordinates of base corner of hypercube */
- interp[ 4 ], /* interpolation coefficients */
- t, tmp[ 8 ], /* float temporaries */
- itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
- wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
- int i, j;
- int mark = mark_tmps( c );
- GLuint loop, origin;
-
- x0y0 = alloc_tmp( c );
- x0y1 = alloc_tmp( c );
- x1y0 = alloc_tmp( c );
- x1y1 = alloc_tmp( c );
- t = alloc_tmp( c );
- w0 = alloc_tmp( c );
- floors[ 0 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD );
- floors[ 1 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD );
-
- for( i = 0; i < 4; i++ ) {
- param[ i ] = lookup_tmp( c, mark - 5 + i );
- interp[ i ] = alloc_tmp( c );
- }
-
- for( i = 0; i < 8; i++ ) {
- tmp[ i ] = alloc_tmp( c );
- itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
- wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 );
- }
-
- brw_set_access_mode( p, BRW_ALIGN_1 );
-
- /* We only want 16 bits of precision from the integral part of each
- co-ordinate, but unfortunately the RNDD semantics would saturate
- at 16 bits if we performed the operation directly to a 16-bit
- destination. Therefore, we round to 32-bit temporaries where
- appropriate, and then store only the lower 16 bits. */
- brw_RNDD( p, retype( floors[ 0 ], BRW_REGISTER_TYPE_D ), param[ 0 ] );
- brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param[ 1 ] );
- brw_RNDD( p, retype( floors[ 1 ], BRW_REGISTER_TYPE_D ), param[ 2 ] );
- brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param[ 3 ] );
- brw_MOV( p, high_words( floors[ 0 ] ), low_words( itmp[ 0 ] ) );
- brw_MOV( p, high_words( floors[ 1 ] ), low_words( itmp[ 1 ] ) );
-
- /* Modify the flag register here, because the side effect is useful
- later (see below). We know for certain that all flags will be
- cleared, since the FRC instruction cannot possibly generate
- negative results. Even for exceptional inputs (infinities, denormals,
- NaNs), the architecture guarantees that the L conditional is false. */
- brw_set_conditionalmod( p, BRW_CONDITIONAL_L );
- brw_FRC( p, param[ 0 ], param[ 0 ] );
- brw_set_predicate_control( p, BRW_PREDICATE_NONE );
- for( i = 1; i < 4; i++ )
- brw_FRC( p, param[ i ], param[ i ] );
-
- /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
- of all. */
- for( i = 0; i < 4; i++ )
- brw_MUL( p, interp[ i ], param[ i ], brw_imm_f( 6.0 ) );
- for( i = 0; i < 4; i++ )
- brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( -15.0 ) );
- for( i = 0; i < 4; i++ )
- brw_MUL( p, interp[ i ], interp[ i ], param[ i ] );
- for( i = 0; i < 4; i++ )
- brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( 10.0 ) );
- for( j = 0; j < 3; j++ )
- for( i = 0; i < 4; i++ )
- brw_MUL( p, interp[ i ], interp[ i ], param[ i ] );
-
- /* Mark the current address, as it will be a jump destination. The
- following code will be executed twice: first, with the flag
- register clear indicating the w=0 case, and second with flags
- set for w=1. */
- loop = p->nr_insn;
-
- /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
- be hashed. Since we have only 16 bits of precision in the hash, we
- must be careful about thorough mixing to maintain entropy as we
- squash the input vector into a small scalar. */
- brw_MUL( p, brw_null_reg(), low_words( floors[ 0 ] ),
- brw_imm_uw( 0xBC8F ) );
- brw_MAC( p, brw_null_reg(), high_words( floors[ 0 ] ),
- brw_imm_uw( 0xD0BD ) );
- brw_MAC( p, brw_null_reg(), low_words( floors[ 1 ] ),
- brw_imm_uw( 0x9B93 ) );
- brw_MAC( p, low_words( itmp[ 0 ] ), high_words( floors[ 1 ] ),
- brw_imm_uw( 0xA359 ) );
- brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ),
- brw_imm_uw( 0xBC8F ) );
-
- /* Temporarily disable the execution mask while we work with ExecSize=16
- channels (the mask is set for ExecSize=8 and is probably incorrect).
- Although this might cause execution of unwanted channels, the code
- writes only to temporary registers and has no side effects, so
- disabling the mask is harmless. */
- brw_push_insn_state( p );
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) );
- brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) );
- brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) );
-
- /* We're now ready to perform the hashing. The eight hashes are
- interleaved for performance. The hash function used is
- designed to rapidly achieve avalanche and require only 16x16
- bit multiplication, and 8-bit swizzles (which we get for
- free). */
- for( i = 0; i < 4; i++ )
- brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) );
- for( i = 0; i < 4; i++ )
- brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
- odd_bytes( wtmp[ i ] ) );
- for( i = 0; i < 4; i++ )
- brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) );
- for( i = 0; i < 4; i++ )
- brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
- odd_bytes( wtmp[ i ] ) );
- brw_pop_insn_state( p );
-
- /* Now we want to initialise the four rear gradients based on the
- hashes. Format conversion from signed integer to float leaves
- everything scaled too high by a factor of pow( 2, 15 ), but
- we correct for that right at the end. */
- /* x component */
- brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) );
- brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
- brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
- brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) );
- brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) );
-
- brw_push_insn_state( p );
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
- brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
- brw_pop_insn_state( p );
-
- brw_MUL( p, x1y0, x1y0, t );
- brw_MUL( p, x1y1, x1y1, t );
- brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) );
- brw_MUL( p, x0y0, x0y0, param[ 0 ] );
- brw_MUL( p, x0y1, x0y1, param[ 0 ] );
-
- /* y component */
- brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
- brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
- brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
- brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
-
- brw_push_insn_state( p );
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
- brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
- brw_pop_insn_state( p );
-
- brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
- brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
- /* prepare t for the w component (used below): w the first time through
- the loop; w - 1 the second time) */
- brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
- brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) );
- p->current->header.predicate_inverse = 1;
- brw_MOV( p, t, param[ 3 ] );
- p->current->header.predicate_inverse = 0;
- brw_set_predicate_control( p, BRW_PREDICATE_NONE );
- brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] );
- brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] );
-
- brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
- brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
- brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
- brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
-
- /* z component */
- brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
- brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
- brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
- brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
-
- brw_push_insn_state( p );
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
- brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
- brw_pop_insn_state( p );
-
- brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 2 ] );
- brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param[ 2 ] );
- brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 2 ] );
- brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param[ 2 ] );
-
- brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
- brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
- brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
- brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-
- /* w component */
- brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
- brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
- brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
- brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
-
- brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
- brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
- brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
- brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
- brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) );
-
- brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
- brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
- brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
- brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-
- /* Here we interpolate in the y dimension... */
- brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
- brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
- brw_MUL( p, x0y1, x0y1, interp[ 1 ] );
- brw_MUL( p, x1y1, x1y1, interp[ 1 ] );
- brw_ADD( p, x0y0, x0y0, x0y1 );
- brw_ADD( p, x1y0, x1y0, x1y1 );
-
- /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
- brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
- brw_MUL( p, x1y0, x1y0, interp[ 0 ] );
- brw_ADD( p, tmp[ 0 ], x0y0, x1y0 );
-
- /* Now do the same thing for the front four gradients... */
- /* x component */
- brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) );
- brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) );
- brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) );
- brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) );
-
- brw_push_insn_state( p );
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
- brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
- brw_pop_insn_state( p );
-
- brw_MUL( p, x1y0, x1y0, t );
- brw_MUL( p, x1y1, x1y1, t );
- brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) );
- brw_MUL( p, x0y0, x0y0, param[ 0 ] );
- brw_MUL( p, x0y1, x0y1, param[ 0 ] );
-
- /* y component */
- brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
- brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
- brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
- brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
-
- brw_push_insn_state( p );
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
- brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
- brw_pop_insn_state( p );
-
- brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
- brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
- brw_ADD( p, t, param[ 2 ], brw_imm_f( -1.0 ) );
- brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] );
- brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] );
-
- brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
- brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
- brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
- brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
-
- /* z component */
- brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
- brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
- brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
- brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
-
- brw_push_insn_state( p );
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
- brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
- brw_pop_insn_state( p );
-
- brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
- brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
- brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
- brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
- /* prepare t for the w component (used below): w the first time through
- the loop; w - 1 the second time) */
- brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
- brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) );
- p->current->header.predicate_inverse = 1;
- brw_MOV( p, t, param[ 3 ] );
- p->current->header.predicate_inverse = 0;
- brw_set_predicate_control( p, BRW_PREDICATE_NONE );
-
- brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
- brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
- brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
- brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-
- /* w component */
- brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
- brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
- brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
- brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
-
- brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
- brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
- brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
- brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
-
- brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
- brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
- brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
- brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-
- /* Interpolate in the y dimension: */
- brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
- brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
- brw_MUL( p, x0y1, x0y1, interp[ 1 ] );
- brw_MUL( p, x1y1, x1y1, interp[ 1 ] );
- brw_ADD( p, x0y0, x0y0, x0y1 );
- brw_ADD( p, x1y0, x1y0, x1y1 );
-
- /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
- time put the front face in tmp[ 1 ] and we're nearly there... */
- brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
- brw_MUL( p, x1y0, x1y0, interp[ 0 ] );
- brw_ADD( p, tmp[ 1 ], x0y0, x1y0 );
-
- /* Another interpolation, in the z dimension: */
- brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );
- brw_MUL( p, tmp[ 1 ], tmp[ 1 ], interp[ 2 ] );
- brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] );
-
- /* Exit the loop if we've computed both cubes... */
- origin = p->nr_insn;
- brw_push_insn_state( p );
- brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
- brw_pop_insn_state( p );
-
- /* Save the result for the w=0 case, and increment the w coordinate: */
- brw_MOV( p, w0, tmp[ 0 ] );
- brw_ADD( p, high_words( floors[ 1 ] ), high_words( floors[ 1 ] ),
- brw_imm_uw( 1 ) );
-
- /* Loop around for the other cube. Explicitly set the flag register
- (unfortunately we must spend an extra instruction to do this: we
- can't rely on a side effect of the previous MOV or ADD because
- conditional modifiers which are normally true might be false in
- exceptional circumstances, e.g. given a NaN input; the add to
- brw_ip_reg() is not suitable because the IP is not an 8-vector). */
- brw_push_insn_state( p );
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_MOV( p, brw_flag_reg(), brw_imm_uw( 0xFF ) );
- brw_ADD( p, brw_ip_reg(), brw_ip_reg(),
- brw_imm_d( ( loop - p->nr_insn ) << 4 ) );
- brw_pop_insn_state( p );
-
- /* Patch the previous conditional branch now that we know the
- destination address. */
- brw_set_src1( p->store + origin,
- brw_imm_d( ( p->nr_insn - origin ) << 4 ) );
-
- /* The very last interpolation. */
- brw_ADD( p, tmp[ 0 ], tmp[ 0 ], negate( w0 ) );
- brw_MUL( p, tmp[ 0 ], tmp[ 0 ], interp[ 3 ] );
- brw_ADD( p, tmp[ 0 ], tmp[ 0 ], w0 );
-
- /* scale by pow( 2, -15 ), as described above */
- brw_MUL( p, param[ 0 ], tmp[ 0 ], brw_imm_f( 0.000030517578125 ) );
-
- release_tmps( c, mark );
-}
-
-static void emit_noise4( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
- int mark = mark_tmps( c );
-
- assert( mark == 0 );
-
- src0 = get_src_reg( c, inst, 0, 0 );
- src1 = get_src_reg( c, inst, 0, 1 );
- src2 = get_src_reg( c, inst, 0, 2 );
- src3 = get_src_reg( c, inst, 0, 3 );
-
- param0 = alloc_tmp( c );
- param1 = alloc_tmp( c );
- param2 = alloc_tmp( c );
- param3 = alloc_tmp( c );
-
- brw_MOV( p, param0, src0 );
- brw_MOV( p, param1, src1 );
- brw_MOV( p, param2, src2 );
- brw_MOV( p, param3, src3 );
-
- invoke_subroutine( c, SUB_NOISE4, noise4_sub );
-
- /* Fill in the result: */
- brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV( p, dst, param0 );
- }
- }
- if( inst->SaturateMode == SATURATE_ZERO_ONE )
- brw_set_saturate( p, 0 );
-
- release_tmps( c, mark );
-}
static void emit_wpos_xy(struct brw_wm_compile *c,
const struct prog_instruction *inst)
* X and Y channels.
*/
if (mask & WRITEMASK_X) {
- /* X' = X - origin_x */
- brw_ADD(p,
+ /* X' = X */
+ brw_MOV(p,
dst[0],
- retype(src0[0], BRW_REGISTER_TYPE_W),
- brw_imm_d(0 - c->key.origin_x));
+ retype(src0[0], BRW_REGISTER_TYPE_W));
}
if (mask & WRITEMASK_Y) {
- /* Y' = height - (Y - origin_y) = height + origin_y - Y */
+ /* Y' = height - 1 - Y */
brw_ADD(p,
dst[1],
negate(retype(src0[1], BRW_REGISTER_TYPE_W)),
- brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
+ brw_imm_d(c->key.drawable_height - 1));
}
}
emit_trunc(c, inst);
break;
case OPCODE_MOV:
- case OPCODE_SWZ:
emit_mov(c, inst);
break;
case OPCODE_DP3:
case OPCODE_MAD:
emit_mad(c, inst);
break;
- case OPCODE_NOISE1:
- emit_noise1(c, inst);
- break;
- case OPCODE_NOISE2:
- emit_noise2(c, inst);
- break;
- case OPCODE_NOISE3:
- emit_noise3(c, inst);
- break;
- case OPCODE_NOISE4:
- emit_noise4(c, inst);
- break;
case OPCODE_TEX:
emit_tex(c, inst);
break;
*/
switch (inst->Opcode) {
case OPCODE_MOV:
- case OPCODE_SWZ:
if (!inst->SaturateMode) {
pass0_precalc_mov(c, inst);
}
GLuint writemask;
GLuint read0, read1, read2;
- if (inst->opcode == OPCODE_KIL) {
+ if (inst->opcode == TGSI_OPCODE_KIL) {
track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */
continue;
}
/* Mark all inputs which contribute to the marked outputs:
*/
switch (inst->opcode) {
- case OPCODE_ABS:
- case OPCODE_FLR:
- case OPCODE_FRC:
- case OPCODE_MOV:
- case OPCODE_SWZ:
- case OPCODE_TRUNC:
+ case TGSI_OPCODE_ABS:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_FRC:
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_TRUNC:
read0 = writemask;
break;
- case OPCODE_SUB:
- case OPCODE_SLT:
- case OPCODE_SLE:
- case OPCODE_SGE:
- case OPCODE_SGT:
- case OPCODE_SEQ:
- case OPCODE_SNE:
- case OPCODE_ADD:
- case OPCODE_MAX:
- case OPCODE_MIN:
- case OPCODE_MUL:
+ case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_MUL:
read0 = writemask;
read1 = writemask;
break;
- case OPCODE_DDX:
- case OPCODE_DDY:
+ case TGSI_OPCODE_DDX:
+ case TGSI_OPCODE_DDY:
read0 = writemask;
break;
- case OPCODE_MAD:
- case OPCODE_CMP:
- case OPCODE_LRP:
+ case TGSI_OPCODE_MAD:
+ case TGSI_OPCODE_CMP:
+ case TGSI_OPCODE_LRP:
read0 = writemask;
read1 = writemask;
read2 = writemask;
break;
- case OPCODE_XPD:
+ case TGSI_OPCODE_XPD:
if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ;
if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ;
if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY;
read1 = read0;
break;
- case OPCODE_COS:
- case OPCODE_EX2:
- case OPCODE_LG2:
- case OPCODE_RCP:
- case OPCODE_RSQ:
- case OPCODE_SIN:
- case OPCODE_SCS:
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_SCS:
case WM_CINTERP:
case WM_PIXELXY:
read0 = WRITEMASK_X;
break;
- case OPCODE_POW:
+ case TGSI_OPCODE_POW:
read0 = WRITEMASK_X;
read1 = WRITEMASK_X;
break;
- case OPCODE_TEX:
- case OPCODE_TXP:
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXP:
read0 = get_texcoord_mask(inst->tex_idx);
if (inst->tex_shadow)
read0 |= WRITEMASK_Z;
break;
- case OPCODE_TXB:
+ case TGSI_OPCODE_TXB:
/* Shadow ignored for txb.
*/
read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W;
read2 = WRITEMASK_W; /* pixel w */
break;
- case OPCODE_DP3:
+ case TGSI_OPCODE_DP3:
read0 = WRITEMASK_XYZ;
read1 = WRITEMASK_XYZ;
break;
- case OPCODE_DPH:
+ case TGSI_OPCODE_DPH:
read0 = WRITEMASK_XYZ;
read1 = WRITEMASK_XYZW;
break;
- case OPCODE_DP4:
+ case TGSI_OPCODE_DP4:
read0 = WRITEMASK_XYZW;
read1 = WRITEMASK_XYZW;
break;
- case OPCODE_LIT:
+ case TGSI_OPCODE_LIT:
read0 = WRITEMASK_XYW;
break;
- case OPCODE_DST:
+ case TGSI_OPCODE_DST:
case WM_FRONTFACING:
- case OPCODE_KIL_NV:
+ case TGSI_OPCODE_KIL_NV:
default:
break;
}
#define PCI_CHIP_Q45_G 0x2E12
#define PCI_CHIP_G45_G 0x2E22
#define PCI_CHIP_G41_G 0x2E32
-#define PCI_CHIP_B43_G 0x2E42
#define PCI_CHIP_ILD_G 0x0042
#define PCI_CHIP_ILM_G 0x0046
#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
devid == PCI_CHIP_Q45_G || \
devid == PCI_CHIP_G45_G || \
- devid == PCI_CHIP_G41_G || \
- devid == PCI_CHIP_B43_G)
+ devid == PCI_CHIP_G41_G)
#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))