Merge branch 'glsl-pp-rework-2'
authorMichal Krol <michal@vmware.com>
Thu, 17 Dec 2009 16:00:46 +0000 (17:00 +0100)
committerMichal Krol <michal@vmware.com>
Thu, 17 Dec 2009 16:00:46 +0000 (17:00 +0100)
93 files changed:
src/gallium/auxiliary/tgsi/tgsi_build.c
src/gallium/auxiliary/tgsi/tgsi_build.h
src/gallium/auxiliary/tgsi/tgsi_dump.c
src/gallium/auxiliary/tgsi/tgsi_dump.h
src/gallium/auxiliary/tgsi/tgsi_exec.c
src/gallium/auxiliary/tgsi/tgsi_iterate.c
src/gallium/auxiliary/tgsi/tgsi_iterate.h
src/gallium/auxiliary/tgsi/tgsi_parse.c
src/gallium/auxiliary/tgsi/tgsi_parse.h
src/gallium/auxiliary/tgsi/tgsi_ppc.c
src/gallium/auxiliary/tgsi/tgsi_sanity.c
src/gallium/auxiliary/tgsi/tgsi_scan.c
src/gallium/auxiliary/tgsi/tgsi_scan.h
src/gallium/auxiliary/tgsi/tgsi_sse2.c
src/gallium/auxiliary/tgsi/tgsi_text.c
src/gallium/auxiliary/tgsi/tgsi_transform.c
src/gallium/auxiliary/tgsi/tgsi_transform.h
src/gallium/auxiliary/tgsi/tgsi_ureg.c
src/gallium/auxiliary/tgsi/tgsi_ureg.h
src/gallium/auxiliary/util/Makefile
src/gallium/auxiliary/util/SConscript
src/gallium/auxiliary/util/u_blit.c
src/gallium/auxiliary/util/u_blitter.c [new file with mode: 0644]
src/gallium/auxiliary/util/u_blitter.h [new file with mode: 0644]
src/gallium/auxiliary/util/u_dl.c [new file with mode: 0644]
src/gallium/auxiliary/util/u_dl.h [new file with mode: 0644]
src/gallium/auxiliary/util/u_format_access.py
src/gallium/auxiliary/util/u_gen_mipmap.c
src/gallium/auxiliary/util/u_simple_shaders.c
src/gallium/auxiliary/util/u_simple_shaders.h
src/gallium/auxiliary/util/u_texture.c [new file with mode: 0644]
src/gallium/auxiliary/util/u_texture.h [new file with mode: 0644]
src/gallium/drivers/llvmpipe/lp_bld_arit.c
src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
src/gallium/drivers/llvmpipe/lp_debug.h [new file with mode: 0644]
src/gallium/drivers/llvmpipe/lp_screen.c
src/gallium/drivers/llvmpipe/lp_state_fs.c
src/gallium/drivers/nouveau/nouveau_stateobj.h
src/gallium/drivers/nv04/nv04_surface_2d.c
src/gallium/drivers/nv30/nv30_context.c
src/gallium/drivers/nv30/nv30_context.h
src/gallium/drivers/nv30/nv30_state_emit.c
src/gallium/drivers/nv40/nv40_context.c
src/gallium/drivers/nv40/nv40_context.h
src/gallium/drivers/nv40/nv40_state_emit.c
src/gallium/drivers/nv50/nv50_context.h
src/gallium/drivers/nv50/nv50_miptree.c
src/gallium/drivers/nv50/nv50_program.c
src/gallium/drivers/nv50/nv50_program.h
src/gallium/drivers/nv50/nv50_query.c
src/gallium/drivers/nv50/nv50_screen.c
src/gallium/drivers/nv50/nv50_state.c
src/gallium/drivers/nv50/nv50_state_validate.c
src/gallium/drivers/nv50/nv50_surface.c
src/gallium/drivers/nv50/nv50_tex.c
src/gallium/drivers/nv50/nv50_texture.h
src/gallium/drivers/nv50/nv50_transfer.c
src/gallium/drivers/r300/Makefile
src/gallium/drivers/r300/SConscript
src/gallium/drivers/r300/r300_blit.c [new file with mode: 0644]
src/gallium/drivers/r300/r300_blit.h [new file with mode: 0644]
src/gallium/drivers/r300/r300_clear.c [deleted file]
src/gallium/drivers/r300/r300_clear.h [deleted file]
src/gallium/drivers/r300/r300_context.c
src/gallium/drivers/r300/r300_context.h
src/gallium/drivers/r300/r300_emit.c
src/gallium/drivers/r300/r300_reg.h
src/gallium/drivers/r300/r300_render.c
src/gallium/drivers/r300/r300_screen.c
src/gallium/drivers/r300/r300_state.c
src/gallium/drivers/r300/r300_state_invariant.c
src/gallium/include/pipe/p_defines.h
src/gallium/include/pipe/p_shader_tokens.h
src/gallium/winsys/drm/radeon/core/radeon_buffer.c
src/gallium/winsys/drm/radeon/core/radeon_buffer.h
src/gallium/winsys/drm/radeon/core/radeon_drm.c
src/mesa/drivers/dri/r300/r300_blit.c
src/mesa/drivers/dri/r300/r300_blit.h
src/mesa/drivers/dri/r300/r300_context.c
src/mesa/drivers/dri/r300/r300_tex.h
src/mesa/drivers/dri/r300/r300_texcopy.c
src/mesa/drivers/dri/r300/r300_texstate.c
src/mesa/drivers/dri/r600/r600_cmdbuf.c
src/mesa/drivers/dri/r600/r600_cmdbuf.h
src/mesa/drivers/dri/r600/r600_context.c
src/mesa/drivers/dri/r600/r700_assembler.c
src/mesa/drivers/dri/r600/r700_assembler.h
src/mesa/drivers/dri/r600/r700_chip.c
src/mesa/drivers/dri/r600/r700_fragprog.c
src/mesa/drivers/dri/r600/r700_fragprog.h
src/mesa/drivers/dri/r600/r700_shaderinst.h
src/mesa/drivers/dri/radeon/radeon_common_context.h
src/mesa/drivers/dri/radeon/radeon_dma.c

index 4092f78f4a2abe5f3b42829b6e167223ddfb4943..92903fe57f38f8b6979e553ad2a9ed2be54d4ce9 100644 (file)
@@ -942,3 +942,107 @@ tgsi_default_full_dst_register( void )
    return full_dst_register;
 }
 
+struct tgsi_property
+tgsi_default_property( void )
+{
+   struct tgsi_property property;
+
+   property.Type = TGSI_TOKEN_TYPE_PROPERTY;
+   property.NrTokens = 1;
+   property.PropertyName = TGSI_PROPERTY_GS_INPUT_PRIM;
+   property.Padding = 0;
+
+   return property;
+}
+
+struct tgsi_property
+tgsi_build_property(unsigned property_name,
+                    struct tgsi_header *header)
+{
+   struct tgsi_property property;
+
+   property = tgsi_default_property();
+   property.PropertyName = property_name;
+
+   header_bodysize_grow( header );
+
+   return property;
+}
+
+
+struct tgsi_full_property
+tgsi_default_full_property( void )
+{
+   struct tgsi_full_property  full_property;
+
+   full_property.Property  = tgsi_default_property();
+   memset(full_property.u, 0,
+          sizeof(struct tgsi_property_data) * 8);
+
+   return full_property;
+}
+
+static void
+property_grow(
+   struct tgsi_property *property,
+   struct tgsi_header *header )
+{
+   assert( property->NrTokens < 0xFF );
+
+   property->NrTokens++;
+
+   header_bodysize_grow( header );
+}
+
+struct tgsi_property_data
+tgsi_build_property_data(
+   unsigned value,
+   struct tgsi_property *property,
+   struct tgsi_header *header )
+{
+   struct tgsi_property_data property_data;
+
+   property_data.Data = value;
+
+   property_grow( property, header );
+
+   return property_data;
+}
+
+unsigned
+tgsi_build_full_property(
+   const struct tgsi_full_property *full_prop,
+   struct tgsi_token *tokens,
+   struct tgsi_header *header,
+   unsigned maxsize )
+{
+   unsigned size = 0, i;
+   struct tgsi_property *property;
+
+   if( maxsize <= size )
+      return 0;
+   property = (struct tgsi_property *) &tokens[size];
+   size++;
+
+   *property = tgsi_build_property(
+      TGSI_PROPERTY_GS_INPUT_PRIM,
+      header );
+
+   assert( full_prop->Property.NrTokens <= 8 + 1 );
+
+   for( i = 0; i < full_prop->Property.NrTokens - 1; i++ ) {
+      struct tgsi_property_data *data;
+
+      if( maxsize <= size )
+         return  0;
+      data = (struct tgsi_property_data *) &tokens[size];
+      size++;
+
+      *data = tgsi_build_property_data(
+         full_prop->u[i].Data,
+         property,
+         header );
+   }
+
+   return size;
+}
index ffea786770c3986efced3ea5ffa14883b349216b..9de2757fe40b9ecc579784a37d3af0f6d2c43c73 100644 (file)
@@ -126,6 +126,34 @@ tgsi_build_full_immediate(
    struct tgsi_header *header,
    unsigned maxsize );
 
+/*
+ * properties
+ */
+
+struct tgsi_property
+tgsi_default_property( void );
+
+struct tgsi_property
+tgsi_build_property(
+   unsigned property_name,
+   struct tgsi_header *header );
+
+struct tgsi_full_property
+tgsi_default_full_property( void );
+
+struct tgsi_property_data
+tgsi_build_property_data(
+   unsigned value,
+   struct tgsi_property *property,
+   struct tgsi_header *header );
+
+unsigned
+tgsi_build_full_property(
+   const struct tgsi_full_property *full_prop,
+   struct tgsi_token *tokens,
+   struct tgsi_header *header,
+   unsigned maxsize );
+
 /*
  * instruction
  */
index d09ab925656ce50b08872701bd708e41ed62a2c9..ba1357697d1041da2f4ce643804ef41b94caa59c 100644 (file)
@@ -101,7 +101,8 @@ static const char *file_names[TGSI_FILE_COUNT] =
    "ADDR",
    "IMM",
    "LOOP",
-   "PRED"
+   "PRED",
+   "SV"
 };
 
 static const char *interpolate_names[] =
@@ -149,6 +150,27 @@ static const char *texture_names[] =
    "SHADOWRECT"
 };
 
+static const char *property_names[] =
+{
+   "GS_INPUT_PRIMITIVE",
+   "GS_OUTPUT_PRIMITIVE",
+   "GS_MAX_OUTPUT_VERTICES"
+};
+
+static const char *primitive_names[] =
+{
+   "POINTS",
+   "LINES",
+   "LINE_LOOP",
+   "LINE_STRIP",
+   "TRIANGLES",
+   "TRIANGLE_STRIP",
+   "TRIANGLE_FAN",
+   "QUADS",
+   "QUAD_STRIP",
+   "POLYGON"
+};
+
 
 static void
 _dump_register(
@@ -272,6 +294,50 @@ tgsi_dump_declaration(
    iter_declaration( &ctx.iter, (struct tgsi_full_declaration *)decl );
 }
 
+static boolean
+iter_property(
+   struct tgsi_iterate_context *iter,
+   struct tgsi_full_property *prop )
+{
+   int i;
+   struct dump_ctx *ctx = (struct dump_ctx *)iter;
+
+   assert(Elements(property_names) == TGSI_PROPERTY_COUNT);
+
+   TXT( "PROPERTY " );
+   ENM(prop->Property.PropertyName, property_names);
+
+   if (prop->Property.NrTokens > 1)
+      TXT(" ");
+
+   for (i = 0; i < prop->Property.NrTokens - 1; ++i) {
+      switch (prop->Property.PropertyName) {
+      case TGSI_PROPERTY_GS_INPUT_PRIM:
+      case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+         ENM(prop->u[i].Data, primitive_names);
+         break;
+      default:
+         SID( prop->u[i].Data );
+         break;
+      }
+      if (i < prop->Property.NrTokens - 2)
+         TXT( ", " );
+   }
+   EOL();
+
+   return TRUE;
+}
+
+void tgsi_dump_property(
+   const struct tgsi_full_property *prop )
+{
+   struct dump_ctx ctx;
+
+   ctx.printf = dump_ctx_printf;
+
+   iter_property( &ctx.iter, (struct tgsi_full_property *)prop );
+}
+
 static boolean
 iter_immediate(
    struct tgsi_iterate_context *iter,
@@ -492,6 +558,7 @@ tgsi_dump(
    ctx.iter.iterate_instruction = iter_instruction;
    ctx.iter.iterate_declaration = iter_declaration;
    ctx.iter.iterate_immediate = iter_immediate;
+   ctx.iter.iterate_property = iter_property;
    ctx.iter.epilog = NULL;
 
    ctx.instno = 0;
@@ -546,6 +613,7 @@ tgsi_dump_str(
    ctx.base.iter.iterate_instruction = iter_instruction;
    ctx.base.iter.iterate_declaration = iter_declaration;
    ctx.base.iter.iterate_immediate = iter_immediate;
+   ctx.base.iter.iterate_property = iter_property;
    ctx.base.iter.epilog = NULL;
 
    ctx.base.instno = 0;
index ad1e647ec9000b0b2956627e4393d3c38c082a5c..4cd27317b36792b5d60e53c8efca3ee5dc51b054 100644 (file)
@@ -49,6 +49,7 @@ tgsi_dump(
 struct tgsi_full_immediate;
 struct tgsi_full_instruction;
 struct tgsi_full_declaration;
+struct tgsi_full_property;
 
 void
 tgsi_dump_immediate(
@@ -63,6 +64,10 @@ void
 tgsi_dump_declaration(
    const struct tgsi_full_declaration *decl );
 
+void
+tgsi_dump_property(
+   const struct tgsi_full_property *prop );
+
 #if defined __cplusplus
 }
 #endif
index 22984c32320e8531895d8a55cb437f28266b21b5..717358620c58ce6da45b05278eb1fbbc26b8c1e1 100644 (file)
@@ -336,6 +336,9 @@ tgsi_exec_machine_bind_shader(
          numInstructions++;
          break;
 
+      case TGSI_TOKEN_TYPE_PROPERTY:
+         break;
+
       default:
          assert( 0 );
       }
@@ -1158,6 +1161,7 @@ fetch_src_file_channel(
          break;
 
       case TGSI_FILE_INPUT:
+      case TGSI_FILE_SYSTEM_VALUE:
          chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
          chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
          chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
@@ -1302,6 +1306,7 @@ fetch_source(
        */
       switch (reg->Register.File) {
       case TGSI_FILE_INPUT:
+      case TGSI_FILE_SYSTEM_VALUE:
          index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
          index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
          index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
@@ -1892,7 +1897,8 @@ exec_declaration(struct tgsi_exec_machine *mach,
                  const struct tgsi_full_declaration *decl)
 {
    if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
-      if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      if (decl->Declaration.File == TGSI_FILE_INPUT ||
+          decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
          uint first, last, mask;
 
          first = decl->Range.First;
index 7b384f5e12a44810ba03f11f148def8d4c8f1328..0ba5fe48419bdcd0df467824ac4c996506e11535 100644 (file)
@@ -66,6 +66,12 @@ tgsi_iterate_shader(
                goto fail;
          break;
 
+      case TGSI_TOKEN_TYPE_PROPERTY:
+         if (ctx->iterate_property)
+            if (!ctx->iterate_property( ctx,  &parse.FullToken.FullProperty ))
+               goto fail;
+         break;
+
       default:
          assert( 0 );
       }
index ef5a33ebce9f7464f529a483cc1685cf77c7c381..8d67f22c4295d16a660f55e692e0cb856a9147e2 100644 (file)
@@ -56,6 +56,11 @@ struct tgsi_iterate_context
       struct tgsi_iterate_context *ctx,
       struct tgsi_full_immediate *imm );
 
+   boolean
+   (* iterate_property)(
+      struct tgsi_iterate_context *ctx,
+      struct tgsi_full_property *prop );
+
    boolean
    (* epilog)(
       struct tgsi_iterate_context *ctx );
index 8f2b6a307d38b56829cdf6fcda2d48f5cd258771..fa65ecb99754722924fbaa6a3e22253f086ae8f8 100644 (file)
@@ -220,6 +220,22 @@ tgsi_parse_token(
       break;
    }
 
+   case TGSI_TOKEN_TYPE_PROPERTY:
+   {
+      struct tgsi_full_property *prop = &ctx->FullToken.FullProperty;
+      uint prop_count;
+
+      memset(prop, 0, sizeof *prop);
+      copy_token(&prop->Property, &token);
+
+      prop_count = prop->Property.NrTokens - 1;
+      for (i = 0; i < prop_count; i++) {
+         next_token(ctx, &prop->u[i]);
+      }
+
+      break;
+   }
+
    default:
       assert( 0 );
    }
index 3aa1979a63a5e40a5ecb3706c1535acfd1c0b1ea..439a57269b7ef4e85565b08113d88b08cdb455b9 100644 (file)
@@ -67,6 +67,12 @@ struct tgsi_full_immediate
    union tgsi_immediate_data u[4];
 };
 
+struct tgsi_full_property
+{
+   struct tgsi_property   Property;
+   struct tgsi_property_data u[8];
+};
+
 #define TGSI_FULL_MAX_DST_REGISTERS 2
 #define TGSI_FULL_MAX_SRC_REGISTERS 4 /* TXD has 4 */
 
@@ -86,6 +92,7 @@ union tgsi_full_token
    struct tgsi_full_declaration  FullDeclaration;
    struct tgsi_full_immediate    FullImmediate;
    struct tgsi_full_instruction  FullInstruction;
+   struct tgsi_full_property     FullProperty;
 };
 
 struct tgsi_parse_context
index da6ad6da04ce7176b5da9abc887aaf48e061921a..138d2d095bb037577148113ccc8a43ee958c5a10 100644 (file)
@@ -293,6 +293,7 @@ emit_fetch(struct gen_context *gen,
    case TGSI_SWIZZLE_W:
       switch (reg->Register.File) {
       case TGSI_FILE_INPUT:
+      case TGSI_FILE_SYSTEM_VALUE:
          {
             int offset = (reg->Register.Index * 4 + swizzle) * 16;
             int offset_reg = emit_li_offset(gen, offset);
@@ -1173,7 +1174,8 @@ emit_declaration(
    struct ppc_function *func,
    struct tgsi_full_declaration *decl )
 {
-   if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+   if( decl->Declaration.File == TGSI_FILE_INPUT ||
+       decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE ) {
 #if 0
       unsigned first, last, mask;
       unsigned i, j;
@@ -1339,6 +1341,9 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
          }
          break;
 
+      case TGSI_TOKEN_TYPE_PROPERTY:
+         break;
+
       default:
         ok = 0;
          assert( 0 );
index b5d1faa897a7297b1186ea2fa920031a2f881b04..c27579e79427270411c36238f6c06c70d34ee935 100644 (file)
@@ -324,6 +324,17 @@ iter_immediate(
    return TRUE;
 }
 
+
+static boolean
+iter_property(
+   struct tgsi_iterate_context *iter,
+   struct tgsi_full_property *prop )
+{
+   /*struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;*/
+
+   return TRUE;
+}
+
 static boolean
 epilog(
    struct tgsi_iterate_context *iter )
@@ -367,6 +378,7 @@ tgsi_sanity_check(
    ctx.iter.iterate_instruction = iter_instruction;
    ctx.iter.iterate_declaration = iter_declaration;
    ctx.iter.iterate_immediate = iter_immediate;
+   ctx.iter.iterate_property = iter_property;
    ctx.iter.epilog = epilog;
 
    memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) );
index a5d2db04ec1f8f8c2302c024848269ccf3b7115f..5f5c95bfbdb41c2a81f5bed85f0a9b2e9302f8bd 100644 (file)
@@ -97,7 +97,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
                   const struct tgsi_full_src_register *src =
                      &fullinst->Src[i];
-                  if (src->Register.File == TGSI_FILE_INPUT) {
+                  if (src->Register.File == TGSI_FILE_INPUT ||
+                      src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
                      const int ind = src->Register.Index;
                      if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) {
                         if (src->Register.SwizzleX == TGSI_SWIZZLE_X) {
@@ -128,7 +129,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                info->file_count[file]++;
                info->file_max[file] = MAX2(info->file_max[file], (int)reg);
 
-               if (file == TGSI_FILE_INPUT) {
+               if (file == TGSI_FILE_INPUT || file == TGSI_FILE_SYSTEM_VALUE) {
                   info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name;
                   info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index;
                   info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate;
@@ -160,6 +161,19 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
             info->file_max[file] = MAX2(info->file_max[file], (int)reg);
          }
          break;
+      case TGSI_TOKEN_TYPE_PROPERTY:
+      {
+         const struct tgsi_full_property *fullprop
+            = &parse.FullToken.FullProperty;
+
+         info->properties[info->num_properties].name =
+            fullprop->Property.PropertyName;
+         memcpy(info->properties[info->num_properties].data,
+                fullprop->u, 8 * sizeof(unsigned));;
+
+         ++info->num_properties;
+      }
+      break;
 
       default:
          assert( 0 );
@@ -212,6 +226,7 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
             /* Do a whole bunch of checks for a simple move */
             if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV ||
                 src->Register.File != TGSI_FILE_INPUT ||
+                src->Register.File != TGSI_FILE_SYSTEM_VALUE ||
                 dst->Register.File != TGSI_FILE_OUTPUT ||
                 src->Register.Index != dst->Register.Index ||
 
@@ -235,6 +250,8 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
          /* fall-through */
       case TGSI_TOKEN_TYPE_IMMEDIATE:
          /* fall-through */
+      case TGSI_TOKEN_TYPE_PROPERTY:
+         /* fall-through */
       default:
          ; /* no-op */
       }
index 8a7ee0c7e4f1f1e2e69f159586f45d0a8cd28018..a1e8a4f6bb7c7072c9f9fc636acd099f66177be6 100644 (file)
@@ -33,7 +33,6 @@
 #include "pipe/p_state.h"
 #include "pipe/p_shader_tokens.h"
 
-
 /**
  * Shader summary info
  */
@@ -61,8 +60,13 @@ struct tgsi_shader_info
    boolean uses_kill;  /**< KIL or KILP instruction used? */
    boolean uses_fogcoord; /**< fragment shader uses fog coord? */
    boolean uses_frontfacing; /**< fragment shader uses front/back-face flag? */
-};
 
+   struct {
+      unsigned name;
+      unsigned data[8];
+   } properties[TGSI_PROPERTY_COUNT];
+   uint num_properties;
+};
 
 extern void
 tgsi_scan_shader(const struct tgsi_token *tokens,
index 76051ea0d8e0b81a1ff626f713c9b5565603fab4..d63c75dafb37aae87982a6ca48ccbc5805cdf84b 100644 (file)
@@ -1288,6 +1288,7 @@ emit_fetch(
          break;
 
       case TGSI_FILE_INPUT:
+      case TGSI_FILE_SYSTEM_VALUE:
          emit_inputf(
             func,
             xmm,
@@ -2633,7 +2634,8 @@ emit_declaration(
    struct x86_function *func,
    struct tgsi_full_declaration *decl )
 {
-   if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+   if( decl->Declaration.File == TGSI_FILE_INPUT ||
+       decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE ) {
       unsigned first, last, mask;
       unsigned i, j;
 
@@ -2952,6 +2954,9 @@ tgsi_emit_sse2(
             num_immediates++;
          }
          break;
+      case TGSI_TOKEN_TYPE_PROPERTY:
+         /* we just ignore them for now */
+         break;
 
       default:
         ok = 0;
index eb376fa9572c50144996dae3bcc96403307211ad..f000958bfc0ecd059a11067902b0ffc53071b270 100644 (file)
@@ -27,6 +27,7 @@
 
 #include "util/u_debug.h"
 #include "util/u_memory.h"
+#include "pipe/p_defines.h"
 #include "tgsi_text.h"
 #include "tgsi_build.h"
 #include "tgsi_info.h"
@@ -59,6 +60,21 @@ static boolean uprcase( char c )
    return c;
 }
 
+/*
+ * Ignore case of str1 and assume str2 is already uppercase.
+ * Return TRUE iff str1 and str2 are equal.
+ */
+static int
+streq_nocase_uprcase(const char *str1,
+                     const char *str2)
+{
+   while (*str1 && uprcase(*str1) == *str2) {
+      str1++;
+      str2++;
+   }
+   return *str1 == *str2;
+}
+
 static boolean str_match_no_case( const char **pcur, const char *str )
 {
    const char *cur = *pcur;
@@ -110,6 +126,20 @@ static boolean parse_uint( const char **pcur, uint *val )
    return FALSE;
 }
 
+static boolean parse_identifier( const char **pcur, char *ret )
+{
+   const char *cur = *pcur;
+   int i = 0;
+   if (is_alpha_underscore( cur )) {
+      ret[i++] = *cur++;
+      while (is_alpha_underscore( cur ))
+         ret[i++] = *cur++;
+      *pcur = cur;
+      return TRUE;
+   }
+   return FALSE;
+}
+
 /* Parse floating point.
  */
 static boolean parse_float( const char **pcur, float *val )
@@ -229,7 +259,8 @@ static const char *file_names[TGSI_FILE_COUNT] =
    "ADDR",
    "IMM",
    "LOOP",
-   "PRED"
+   "PRED",
+   "SV"
 };
 
 static boolean
@@ -939,6 +970,106 @@ static boolean parse_immediate( struct translate_ctx *ctx )
    return TRUE;
 }
 
+static const char *property_names[] =
+{
+   "GS_INPUT_PRIMITIVE",
+   "GS_OUTPUT_PRIMITIVE",
+   "GS_MAX_OUTPUT_VERTICES"
+};
+
+static const char *primitive_names[] =
+{
+   "POINTS",
+   "LINES",
+   "LINE_LOOP",
+   "LINE_STRIP",
+   "TRIANGLES",
+   "TRIANGLE_STRIP",
+   "TRIANGLE_FAN",
+   "QUADS",
+   "QUAD_STRIP",
+   "POLYGON"
+};
+
+static boolean
+parse_primitive( const char **pcur, uint *primitive )
+{
+   uint i;
+
+   for (i = 0; i < PIPE_PRIM_MAX; i++) {
+      const char *cur = *pcur;
+
+      if (str_match_no_case( &cur, primitive_names[i])) {
+         *primitive = i;
+         *pcur = cur;
+         return TRUE;
+      }
+   }
+   return FALSE;
+}
+
+
+static boolean parse_property( struct translate_ctx *ctx )
+{
+   struct tgsi_full_property prop;
+   uint property_name;
+   uint values[8];
+   uint advance;
+   char id[64];
+
+   if (!eat_white( &ctx->cur )) {
+      report_error( ctx, "Syntax error" );
+      return FALSE;
+   }
+   if (!parse_identifier( &ctx->cur, id )) {
+      report_error( ctx, "Syntax error" );
+      return FALSE;
+   }
+   for (property_name = 0; property_name < TGSI_PROPERTY_COUNT;
+        ++property_name) {
+      if (streq_nocase_uprcase(id, property_names[property_name])) {
+         break;
+      }
+   }
+   if (property_name >= TGSI_PROPERTY_COUNT) {
+      debug_printf( "\nError: Unknown property : '%s'", id );
+      return FALSE;
+   }
+
+   eat_opt_white( &ctx->cur );
+   switch(property_name) {
+   case TGSI_PROPERTY_GS_INPUT_PRIM:
+   case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+      if (!parse_primitive(&ctx->cur, &values[0] )) {
+         report_error( ctx, "Unknown primitive name as property!" );
+         return FALSE;
+      }
+      break;
+   default:
+      if (!parse_uint(&ctx->cur, &values[0] )) {
+         report_error( ctx, "Expected unsigned integer as property!" );
+         return FALSE;
+      }
+   }
+
+   prop = tgsi_default_full_property();
+   prop.Property.PropertyName = property_name;
+   prop.Property.NrTokens += 1;
+   prop.u[0].Data = values[0];
+
+   advance = tgsi_build_full_property(
+      &prop,
+      ctx->tokens_cur,
+      ctx->header,
+      (uint) (ctx->tokens_end - ctx->tokens_cur) );
+   if (advance == 0)
+      return FALSE;
+   ctx->tokens_cur += advance;
+
+   return TRUE;
+}
+
+
 static boolean translate( struct translate_ctx *ctx )
 {
    eat_opt_white( &ctx->cur );
@@ -947,7 +1078,6 @@ static boolean translate( struct translate_ctx *ctx )
 
    while (*ctx->cur != '\0') {
       uint label_val = 0;
-
       if (!eat_white( &ctx->cur )) {
          report_error( ctx, "Syntax error" );
          return FALSE;
@@ -955,7 +1085,6 @@ static boolean translate( struct translate_ctx *ctx )
 
       if (*ctx->cur == '\0')
          break;
-
       if (parse_label( ctx, &label_val )) {
          if (!parse_instruction( ctx, TRUE ))
             return FALSE;
@@ -968,6 +1097,10 @@ static boolean translate( struct translate_ctx *ctx )
          if (!parse_immediate( ctx ))
             return FALSE;
       }
+      else if (str_match_no_case( &ctx->cur, "PROPERTY" )) {
+         if (!parse_property( ctx ))
+            return FALSE;
+      }
       else if (!parse_instruction( ctx, FALSE )) {
          return FALSE;
       }
index 8b8f489b355b1b2e9d0d7cfb92c8f5cec491996a..ae875f29abfb23334a42fa5c55d42d6461909455 100644 (file)
@@ -79,6 +79,19 @@ emit_immediate(struct tgsi_transform_context *ctx,
 }
 
 
+static void
+emit_property(struct tgsi_transform_context *ctx,
+              const struct tgsi_full_property *prop)
+{
+   uint ti = ctx->ti;
+
+   ti += tgsi_build_full_property(prop,
+                                  ctx->tokens_out + ti,
+                                  ctx->header,
+                                  ctx->max_tokens_out - ti);
+   ctx->ti = ti;
+}
+
 
 /**
  * Apply user-defined transformations to the input shader to produce
@@ -110,6 +123,7 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in,
    ctx->emit_instruction = emit_instruction;
    ctx->emit_declaration = emit_declaration;
    ctx->emit_immediate = emit_immediate;
+   ctx->emit_property = emit_property;
    ctx->tokens_out = tokens_out;
    ctx->max_tokens_out = max_tokens_out;
 
@@ -182,6 +196,17 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in,
                ctx->emit_immediate(ctx, fullimm);
          }
          break;
+      case TGSI_TOKEN_TYPE_PROPERTY:
+         {
+            struct tgsi_full_property *fullprop
+               = &parse.FullToken.FullProperty;
+
+            if (ctx->transform_property)
+               ctx->transform_property(ctx, fullprop);
+            else
+               ctx->emit_property(ctx, fullprop);
+         }
+         break;
 
       default:
          assert( 0 );
index a121adbaef4056fa296fc0f7591dadb72479a176..818478e277a91e8ed55105453578987550965e7f 100644 (file)
@@ -53,6 +53,8 @@ struct tgsi_transform_context
 
    void (*transform_immediate)(struct tgsi_transform_context *ctx,
                                struct tgsi_full_immediate *imm);
+   void (*transform_property)(struct tgsi_transform_context *ctx,
+                              struct tgsi_full_property *prop);
 
    /**
     * Called at end of input program to allow caller to append extra
@@ -73,6 +75,8 @@ struct tgsi_transform_context
                             const struct tgsi_full_declaration *decl);
    void (*emit_immediate)(struct tgsi_transform_context *ctx,
                           const struct tgsi_full_immediate *imm);
+   void (*emit_property)(struct tgsi_transform_context *ctx,
+                         const struct tgsi_full_property *prop);
 
    struct tgsi_header *header;
    uint max_tokens_out;
index 3f943845f5b8ab8b3674f4b6ad77d0762355afc9..1e730e534276be4d20d50a48063a3ef0a9f0f029 100644 (file)
@@ -89,6 +89,11 @@ struct ureg_program
 
    unsigned vs_inputs[UREG_MAX_INPUT/32];
 
+   struct {
+      unsigned index;
+   } gs_input[UREG_MAX_INPUT];
+   unsigned nr_gs_inputs;
+
    struct {
       unsigned semantic_name;
       unsigned semantic_index;
@@ -278,6 +283,22 @@ ureg_DECL_vs_input( struct ureg_program *ureg,
 }
 
 
+struct ureg_src
+ureg_DECL_gs_input(struct ureg_program *ureg,
+                   unsigned index)
+{
+   if (ureg->nr_gs_inputs < UREG_MAX_INPUT) {
+      ureg->gs_input[ureg->nr_gs_inputs].index = index;
+      ureg->nr_gs_inputs++;
+   } else {
+      set_bad(ureg);
+   }
+
+   /* XXX: Add suport for true 2D input registers. */
+   return ureg_src_register(TGSI_FILE_INPUT, index);
+}
+
+
 struct ureg_dst 
 ureg_DECL_output( struct ureg_program *ureg,
                   unsigned name,
@@ -964,8 +985,7 @@ static void emit_decls( struct ureg_program *ureg )
             emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 );
          }
       }
-   }
-   else {
+   } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) {
       for (i = 0; i < ureg->nr_fs_inputs; i++) {
          emit_decl( ureg, 
                     TGSI_FILE_INPUT, 
@@ -974,6 +994,13 @@ static void emit_decls( struct ureg_program *ureg )
                     ureg->fs_input[i].semantic_index,
                     ureg->fs_input[i].interp );
       }
+   } else {
+      for (i = 0; i < ureg->nr_gs_inputs; i++) {
+         emit_decl_range(ureg, 
+                         TGSI_FILE_INPUT, 
+                         ureg->gs_input[i].index,
+                         1);
+      }
    }
 
    for (i = 0; i < ureg->nr_outputs; i++) {
index 94cc70a208230655b7def30f3a53a42ef066fa0b..7e3e7bcf1d36aea5f04e90987c44b42214d67489 100644 (file)
@@ -133,6 +133,10 @@ struct ureg_src
 ureg_DECL_vs_input( struct ureg_program *,
                     unsigned index );
 
+struct ureg_src
+ureg_DECL_gs_input(struct ureg_program *,
+                   unsigned index);
+
 struct ureg_dst
 ureg_DECL_output( struct ureg_program *,
                   unsigned semantic_name,
index 1d8bb55bbd68d4bd9bdae78f69bd2ed268007fca..3ed90fd1b70dd615ad424643a0271a6c3a1c77aa 100644 (file)
@@ -9,8 +9,10 @@ C_SOURCES = \
        u_debug_symbol.c \
        u_debug_stack.c \
        u_blit.c \
+       u_blitter.c \
        u_cache.c \
        u_cpu_detect.c \
+       u_dl.c \
        u_draw_quad.c \
        u_format.c \
        u_format_access.c \
@@ -30,6 +32,7 @@ C_SOURCES = \
        u_stream_stdc.c \
        u_stream_wd.c \
        u_surface.c \
+       u_texture.c \
        u_tile.c \
        u_time.c \
        u_timed_winsys.c \
index 8d99106d0b852b252fb62110cadf46437fc8bd0e..2a546d19dc0d796698b11d12036be3f83f6b1e57 100644 (file)
@@ -23,6 +23,7 @@ util = env.ConvenienceLibrary(
        source = [
                'u_bitmask.c',
                'u_blit.c',
+               'u_blitter.c',
                'u_cache.c',
                'u_cpu_detect.c',
                'u_debug.c',
@@ -30,6 +31,7 @@ util = env.ConvenienceLibrary(
                'u_debug_memory.c',
                'u_debug_stack.c',
                'u_debug_symbol.c',
+               'u_dl.c',
                'u_draw_quad.c',
                'u_format.c',
                'u_format_access.c',
@@ -48,6 +50,7 @@ util = env.ConvenienceLibrary(
                'u_stream_stdc.c',
                'u_stream_wd.c',
                'u_surface.c',
+               'u_texture.c',
                'u_tile.c',
                'u_time.c',
                'u_timed_winsys.c',
index abe1de3302bf87e577b8f99cbb1e265e22cfc717..c9050ca864c15db419f45f34c27655dd20fc5fe0 100644 (file)
@@ -126,7 +126,8 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
    }
 
    /* fragment shader */
-   ctx->fs[TGSI_WRITEMASK_XYZW] = util_make_fragment_tex_shader(pipe);
+   ctx->fs[TGSI_WRITEMASK_XYZW] =
+      util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D);
    ctx->vbuf = NULL;
 
    /* init vertex data that doesn't change */
@@ -420,7 +421,9 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    cso_set_sampler_textures(ctx->cso, 1, &tex);
 
    if (ctx->fs[writemask] == NULL)
-      ctx->fs[writemask] = util_make_fragment_tex_shader_writemask(pipe, writemask);
+      ctx->fs[writemask] =
+         util_make_fragment_tex_shader_writemask(pipe, TGSI_TEXTURE_2D,
+                                                 writemask);
 
    /* shaders */
    cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]);
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
new file mode 100644 (file)
index 0000000..895af2c
--- /dev/null
@@ -0,0 +1,718 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Blitter utility to facilitate acceleration of the clear, surface_copy,
+ * and surface_fill functions.
+ *
+ * @author Marek Olšák
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_blitter.h"
+#include "util/u_draw_quad.h"
+#include "util/u_pack_color.h"
+#include "util/u_rect.h"
+#include "util/u_simple_shaders.h"
+#include "util/u_texture.h"
+
+struct blitter_context_priv
+{
+   struct blitter_context blitter;
+
+   struct pipe_context *pipe; /**< pipe context */
+   struct pipe_buffer *vbuf;  /**< quad */
+
+   float vertices[4][2][4];   /**< {pos, color} or {pos, texcoord} */
+
+   /* Templates for various state objects. */
+   struct pipe_depth_stencil_alpha_state template_dsa;
+   struct pipe_sampler_state template_sampler_state;
+
+   /* Constant state objects. */
+   /* Vertex shaders. */
+   void *vs_col; /**< Vertex shader which passes {pos, color} to the output */
+   void *vs_tex; /**<Vertex shader which passes {pos, texcoord} to the output.*/
+
+   /* Fragment shaders. */
+   /* FS which outputs a color to multiple color buffers. */
+   void *fs_col[PIPE_MAX_COLOR_BUFS];
+
+   /* FS which outputs a color from a texture,
+      where the index is PIPE_TEXTURE_* to be sampled. */
+   void *fs_texfetch_col[PIPE_MAX_TEXTURE_TYPES];
+
+   /* FS which outputs a depth from a texture,
+      where the index is PIPE_TEXTURE_* to be sampled. */
+   void *fs_texfetch_depth[PIPE_MAX_TEXTURE_TYPES];
+
+   /* Blend state. */
+   void *blend_write_color;   /**< blend state with writemask of RGBA */
+   void *blend_keep_color;    /**< blend state with writemask of 0 */
+
+   /* Depth stencil alpha state. */
+   void *dsa_write_depth_stencil[0xff]; /**< indices are stencil clear values */
+   void *dsa_write_depth_keep_stencil;
+   void *dsa_keep_depth_stencil;
+
+   /* Sampler state for clamping to a miplevel. */
+   void *sampler_state[PIPE_MAX_TEXTURE_LEVELS];
+
+   /* Rasterizer state. */
+   void *rs_state;
+};
+
+struct blitter_context *util_blitter_create(struct pipe_context *pipe)
+{
+   struct blitter_context_priv *ctx;
+   struct pipe_blend_state blend;
+   struct pipe_depth_stencil_alpha_state *dsa;
+   struct pipe_rasterizer_state rs_state;
+   struct pipe_sampler_state *sampler_state;
+   unsigned i;
+
+   ctx = CALLOC_STRUCT(blitter_context_priv);
+   if (!ctx)
+      return NULL;
+
+   ctx->pipe = pipe;
+
+   /* init state objects for them to be considered invalid */
+   ctx->blitter.saved_fb_state.nr_cbufs = ~0;
+   ctx->blitter.saved_num_textures = ~0;
+   ctx->blitter.saved_num_sampler_states = ~0;
+
+   /* blend state objects */
+   memset(&blend, 0, sizeof(blend));
+   ctx->blend_keep_color = pipe->create_blend_state(pipe, &blend);
+
+   blend.colormask = PIPE_MASK_RGBA;
+   ctx->blend_write_color = pipe->create_blend_state(pipe, &blend);
+
+   /* depth stencil alpha state objects */
+   dsa = &ctx->template_dsa;
+   ctx->dsa_keep_depth_stencil =
+      pipe->create_depth_stencil_alpha_state(pipe, dsa);
+
+   dsa->depth.enabled = 1;
+   dsa->depth.writemask = 1;
+   dsa->depth.func = PIPE_FUNC_ALWAYS;
+   ctx->dsa_write_depth_keep_stencil =
+      pipe->create_depth_stencil_alpha_state(pipe, dsa);
+
+   dsa->stencil[0].enabled = 1;
+   dsa->stencil[0].func = PIPE_FUNC_ALWAYS;
+   dsa->stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE;
+   dsa->stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
+   dsa->stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE;
+   dsa->stencil[0].valuemask = 0xff;
+   dsa->stencil[0].writemask = 0xff;
+   /* The DSA state objects which write depth and stencil are created
+    * on-demand. */
+
+   /* sampler state */
+   sampler_state = &ctx->template_sampler_state;
+   sampler_state->wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler_state->wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler_state->wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   /* The sampler state objects which sample from a specified mipmap level
+    * are created on-demand. */
+
+   /* rasterizer state */
+   memset(&rs_state, 0, sizeof(rs_state));
+   rs_state.front_winding = PIPE_WINDING_CW;
+   rs_state.cull_mode = PIPE_WINDING_NONE;
+   rs_state.bypass_vs_clip_and_viewport = 1;
+   rs_state.gl_rasterization_rules = 1;
+   ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state);
+
+   /* fragment shaders are created on-demand */
+
+   /* vertex shaders */
+   {
+      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+                                      TGSI_SEMANTIC_COLOR };
+      const uint semantic_indices[] = { 0, 0 };
+      ctx->vs_col =
+         util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
+                                             semantic_indices);
+   }
+   {
+      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+                                      TGSI_SEMANTIC_GENERIC };
+      const uint semantic_indices[] = { 0, 0 };
+      ctx->vs_tex =
+         util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
+                                             semantic_indices);
+   }
+
+   /* set invariant vertex coordinates */
+   for (i = 0; i < 4; i++)
+      ctx->vertices[i][0][3] = 1; /*v.w*/
+
+   /* create the vertex buffer */
+   ctx->vbuf = pipe_buffer_create(ctx->pipe->screen,
+                                  32,
+                                  PIPE_BUFFER_USAGE_VERTEX,
+                                  sizeof(ctx->vertices));
+
+   return &ctx->blitter;
+}
+
+void util_blitter_destroy(struct blitter_context *blitter)
+{
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+   struct pipe_context *pipe = ctx->pipe;
+   int i;
+
+   pipe->delete_blend_state(pipe, ctx->blend_write_color);
+   pipe->delete_blend_state(pipe, ctx->blend_keep_color);
+   pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
+   pipe->delete_depth_stencil_alpha_state(pipe,
+                                          ctx->dsa_write_depth_keep_stencil);
+
+   for (i = 0; i < 0xff; i++)
+      if (ctx->dsa_write_depth_stencil[i])
+         pipe->delete_depth_stencil_alpha_state(pipe,
+            ctx->dsa_write_depth_stencil[i]);
+
+   pipe->delete_rasterizer_state(pipe, ctx->rs_state);
+   pipe->delete_vs_state(pipe, ctx->vs_col);
+   pipe->delete_vs_state(pipe, ctx->vs_tex);
+
+   for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) {
+      if (ctx->fs_texfetch_col[i])
+         pipe->delete_fs_state(pipe, ctx->fs_texfetch_col[i]);
+      if (ctx->fs_texfetch_depth[i])
+         pipe->delete_fs_state(pipe, ctx->fs_texfetch_depth[i]);
+   }
+
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS && ctx->fs_col[i]; i++)
+      if (ctx->fs_col[i])
+         pipe->delete_fs_state(pipe, ctx->fs_col[i]);
+
+   for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++)
+      if (ctx->sampler_state[i])
+         pipe->delete_sampler_state(pipe, ctx->sampler_state[i]);
+
+   pipe_buffer_reference(&ctx->vbuf, NULL);
+   FREE(ctx);
+}
+
+static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx)
+{
+   /* make sure these CSOs have been saved */
+   assert(ctx->blitter.saved_blend_state &&
+          ctx->blitter.saved_dsa_state &&
+          ctx->blitter.saved_rs_state &&
+          ctx->blitter.saved_fs &&
+          ctx->blitter.saved_vs);
+}
+
+static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
+{
+   struct pipe_context *pipe = ctx->pipe;
+
+   /* restore the state objects which are always required to be saved */
+   pipe->bind_blend_state(pipe, ctx->blitter.saved_blend_state);
+   pipe->bind_depth_stencil_alpha_state(pipe, ctx->blitter.saved_dsa_state);
+   pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state);
+   pipe->bind_fs_state(pipe, ctx->blitter.saved_fs);
+   pipe->bind_vs_state(pipe, ctx->blitter.saved_vs);
+
+   ctx->blitter.saved_blend_state = 0;
+   ctx->blitter.saved_dsa_state = 0;
+   ctx->blitter.saved_rs_state = 0;
+   ctx->blitter.saved_fs = 0;
+   ctx->blitter.saved_vs = 0;
+
+   /* restore the state objects which are required to be saved before copy/fill
+    */
+   if (ctx->blitter.saved_fb_state.nr_cbufs != ~0) {
+      pipe->set_framebuffer_state(pipe, &ctx->blitter.saved_fb_state);
+      ctx->blitter.saved_fb_state.nr_cbufs = ~0;
+   }
+
+   if (ctx->blitter.saved_num_sampler_states != ~0) {
+      pipe->bind_fragment_sampler_states(pipe,
+                                         ctx->blitter.saved_num_sampler_states,
+                                         ctx->blitter.saved_sampler_states);
+      ctx->blitter.saved_num_sampler_states = ~0;
+   }
+
+   if (ctx->blitter.saved_num_textures != ~0) {
+      pipe->set_fragment_sampler_textures(pipe,
+                                          ctx->blitter.saved_num_textures,
+                                          ctx->blitter.saved_textures);
+      ctx->blitter.saved_num_textures = ~0;
+   }
+}
+
+static void blitter_set_rectangle(struct blitter_context_priv *ctx,
+                                  unsigned x1, unsigned y1,
+                                  unsigned x2, unsigned y2,
+                                  float depth)
+{
+   int i;
+
+   /* set vertex positions */
+   ctx->vertices[0][0][0] = x1; /*v0.x*/
+   ctx->vertices[0][0][1] = y1; /*v0.y*/
+
+   ctx->vertices[1][0][0] = x2; /*v1.x*/
+   ctx->vertices[1][0][1] = y1; /*v1.y*/
+
+   ctx->vertices[2][0][0] = x2; /*v2.x*/
+   ctx->vertices[2][0][1] = y2; /*v2.y*/
+
+   ctx->vertices[3][0][0] = x1; /*v3.x*/
+   ctx->vertices[3][0][1] = y2; /*v3.y*/
+
+   for (i = 0; i < 4; i++)
+      ctx->vertices[i][0][2] = depth; /*z*/
+}
+
+static void blitter_set_clear_color(struct blitter_context_priv *ctx,
+                                    const float *rgba)
+{
+   int i;
+
+   for (i = 0; i < 4; i++) {
+      ctx->vertices[i][1][0] = rgba[0];
+      ctx->vertices[i][1][1] = rgba[1];
+      ctx->vertices[i][1][2] = rgba[2];
+      ctx->vertices[i][1][3] = rgba[3];
+   }
+}
+
+static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx,
+                                     struct pipe_surface *surf,
+                                     unsigned x1, unsigned y1,
+                                     unsigned x2, unsigned y2)
+{
+   int i;
+   float s1 = x1 / (float)surf->width;
+   float t1 = y1 / (float)surf->height;
+   float s2 = x2 / (float)surf->width;
+   float t2 = y2 / (float)surf->height;
+
+   ctx->vertices[0][1][0] = s1; /*t0.s*/
+   ctx->vertices[0][1][1] = t1; /*t0.t*/
+
+   ctx->vertices[1][1][0] = s2; /*t1.s*/
+   ctx->vertices[1][1][1] = t1; /*t1.t*/
+
+   ctx->vertices[2][1][0] = s2; /*t2.s*/
+   ctx->vertices[2][1][1] = t2; /*t2.t*/
+
+   ctx->vertices[3][1][0] = s1; /*t3.s*/
+   ctx->vertices[3][1][1] = t2; /*t3.t*/
+
+   for (i = 0; i < 4; i++) {
+      ctx->vertices[i][1][2] = 0; /*r*/
+      ctx->vertices[i][1][3] = 1; /*q*/
+   }
+}
+
+static void blitter_set_texcoords_3d(struct blitter_context_priv *ctx,
+                                     struct pipe_surface *surf,
+                                     unsigned x1, unsigned y1,
+                                     unsigned x2, unsigned y2)
+{
+   int i;
+   float depth = u_minify(surf->texture->depth0, surf->level);
+   float r = surf->zslice / depth;
+
+   blitter_set_texcoords_2d(ctx, surf, x1, y1, x2, y2);
+
+   for (i = 0; i < 4; i++)
+      ctx->vertices[i][1][2] = r; /*r*/
+}
+
+static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx,
+                                       struct pipe_surface *surf,
+                                       unsigned x1, unsigned y1,
+                                       unsigned x2, unsigned y2)
+{
+   int i;
+   float s1 = x1 / (float)surf->width;
+   float t1 = y1 / (float)surf->height;
+   float s2 = x2 / (float)surf->width;
+   float t2 = y2 / (float)surf->height;
+   const float st[4][2] = {
+      {s1, t1}, {s2, t1}, {s2, t2}, {s1, t2}
+   };
+
+   util_map_texcoords2d_onto_cubemap(surf->face,
+                                     /* pointer, stride in floats */
+                                     &st[0][0], 2,
+                                     &ctx->vertices[0][1][0], 8);
+
+   for (i = 0; i < 4; i++)
+      ctx->vertices[i][1][3] = 1; /*q*/
+}
+
+static void blitter_draw_quad(struct blitter_context_priv *ctx)
+{
+   struct pipe_context *pipe = ctx->pipe;
+
+   /* write vertices and draw them */
+   pipe_buffer_write(pipe->screen, ctx->vbuf,
+                     0, sizeof(ctx->vertices), ctx->vertices);
+
+   util_draw_vertex_buffer(pipe, ctx->vbuf, 0, PIPE_PRIM_TRIANGLE_FAN,
+                           4,  /* verts */
+                           2); /* attribs/vert */
+}
+
+static INLINE
+void *blitter_get_state_write_depth_stencil(
+               struct blitter_context_priv *ctx,
+               unsigned stencil)
+{
+   struct pipe_context *pipe = ctx->pipe;
+
+   stencil &= 0xff;
+
+   /* Create the DSA state on-demand. */
+   if (!ctx->dsa_write_depth_stencil[stencil]) {
+      ctx->template_dsa.stencil[0].ref_value = stencil;
+
+      ctx->dsa_write_depth_stencil[stencil] =
+         pipe->create_depth_stencil_alpha_state(pipe, &ctx->template_dsa);
+   }
+
+   return ctx->dsa_write_depth_stencil[stencil];
+}
+
+static INLINE
+void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
+                                 int miplevel)
+{
+   struct pipe_context *pipe = ctx->pipe;
+   struct pipe_sampler_state *sampler_state = &ctx->template_sampler_state;
+
+   assert(miplevel < PIPE_MAX_TEXTURE_LEVELS);
+
+   /* Create the sampler state on-demand. */
+   if (!ctx->sampler_state[miplevel]) {
+      sampler_state->lod_bias = miplevel;
+      sampler_state->min_lod = miplevel;
+      sampler_state->max_lod = miplevel;
+
+      ctx->sampler_state[miplevel] = pipe->create_sampler_state(pipe,
+                                                                sampler_state);
+   }
+
+   /* Return void** so that it can be passed to bind_fragment_sampler_states
+    * directly. */
+   return &ctx->sampler_state[miplevel];
+}
+
+static INLINE
+void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs)
+{
+   struct pipe_context *pipe = ctx->pipe;
+   unsigned index = num_cbufs ? num_cbufs - 1 : 0;
+
+   assert(num_cbufs <= PIPE_MAX_COLOR_BUFS);
+
+   if (!ctx->fs_col[index])
+      ctx->fs_col[index] =
+         util_make_fragment_clonecolor_shader(pipe, num_cbufs);
+
+   return ctx->fs_col[index];
+}
+
+static INLINE
+void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx,
+                                  unsigned tex_target)
+{
+   struct pipe_context *pipe = ctx->pipe;
+
+   assert(tex_target < PIPE_MAX_TEXTURE_TYPES);
+
+   /* Create the fragment shader on-demand. */
+   if (!ctx->fs_texfetch_col[tex_target]) {
+      switch (tex_target) {
+         case PIPE_TEXTURE_1D:
+            ctx->fs_texfetch_col[PIPE_TEXTURE_1D] =
+               util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_1D);
+            break;
+         case PIPE_TEXTURE_2D:
+            ctx->fs_texfetch_col[PIPE_TEXTURE_2D] =
+               util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D);
+            break;
+         case PIPE_TEXTURE_3D:
+            ctx->fs_texfetch_col[PIPE_TEXTURE_3D] =
+               util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_3D);
+            break;
+         case PIPE_TEXTURE_CUBE:
+            ctx->fs_texfetch_col[PIPE_TEXTURE_CUBE] =
+               util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE);
+            break;
+         default:;
+      }
+   }
+
+   return ctx->fs_texfetch_col[tex_target];
+}
+
+static INLINE
+void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
+                                    unsigned tex_target)
+{
+   struct pipe_context *pipe = ctx->pipe;
+
+   assert(tex_target < PIPE_MAX_TEXTURE_TYPES);
+
+   /* Create the fragment shader on-demand. */
+   if (!ctx->fs_texfetch_depth[tex_target]) {
+      switch (tex_target) {
+         case PIPE_TEXTURE_1D:
+            ctx->fs_texfetch_depth[PIPE_TEXTURE_1D] =
+               util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_1D);
+            break;
+         case PIPE_TEXTURE_2D:
+            ctx->fs_texfetch_depth[PIPE_TEXTURE_2D] =
+               util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_2D);
+            break;
+         case PIPE_TEXTURE_3D:
+            ctx->fs_texfetch_depth[PIPE_TEXTURE_3D] =
+               util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_3D);
+            break;
+         case PIPE_TEXTURE_CUBE:
+            ctx->fs_texfetch_depth[PIPE_TEXTURE_CUBE] =
+               util_make_fragment_tex_shader_writedepth(pipe,TGSI_TEXTURE_CUBE);
+            break;
+         default:;
+      }
+   }
+
+   return ctx->fs_texfetch_depth[tex_target];
+}
+
+void util_blitter_clear(struct blitter_context *blitter,
+                        unsigned width, unsigned height,
+                        unsigned num_cbufs,
+                        unsigned clear_buffers,
+                        const float *rgba,
+                        double depth, unsigned stencil)
+{
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+   struct pipe_context *pipe = ctx->pipe;
+
+   assert(num_cbufs <= PIPE_MAX_COLOR_BUFS);
+
+   blitter_check_saved_CSOs(ctx);
+
+   /* bind CSOs */
+   if (clear_buffers & PIPE_CLEAR_COLOR)
+      pipe->bind_blend_state(pipe, ctx->blend_write_color);
+   else
+      pipe->bind_blend_state(pipe, ctx->blend_keep_color);
+
+   if (clear_buffers & PIPE_CLEAR_DEPTHSTENCIL)
+      pipe->bind_depth_stencil_alpha_state(pipe,
+         blitter_get_state_write_depth_stencil(ctx, stencil));
+   else
+      pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
+
+   pipe->bind_rasterizer_state(pipe, ctx->rs_state);
+   pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs));
+   pipe->bind_vs_state(pipe, ctx->vs_col);
+
+   blitter_set_clear_color(ctx, rgba);
+   blitter_set_rectangle(ctx, 0, 0, width, height, depth);
+   blitter_draw_quad(ctx);
+   blitter_restore_CSOs(ctx);
+}
+
+void util_blitter_copy(struct blitter_context *blitter,
+                       struct pipe_surface *dst,
+                       unsigned dstx, unsigned dsty,
+                       struct pipe_surface *src,
+                       unsigned srcx, unsigned srcy,
+                       unsigned width, unsigned height,
+                       boolean ignore_stencil)
+{
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+   struct pipe_context *pipe = ctx->pipe;
+   struct pipe_screen *screen = pipe->screen;
+   struct pipe_framebuffer_state fb_state;
+   boolean is_stencil, is_depth;
+   unsigned dst_tex_usage;
+
+   /* give up if textures are not set */
+   assert(dst->texture && src->texture);
+   if (!dst->texture || !src->texture)
+      return;
+
+   is_depth = pf_get_component_bits(src->format, PIPE_FORMAT_COMP_Z) != 0;
+   is_stencil = pf_get_component_bits(src->format, PIPE_FORMAT_COMP_S) != 0;
+   dst_tex_usage = is_depth || is_stencil ? PIPE_TEXTURE_USAGE_DEPTH_STENCIL :
+                                            PIPE_TEXTURE_USAGE_RENDER_TARGET;
+
+   /* check if we can sample from and render to the surfaces */
+   /* (assuming copying a stencil buffer is not possible) */
+   if ((!ignore_stencil && is_stencil) ||
+       !screen->is_format_supported(screen, dst->format, dst->texture->target,
+                                    dst_tex_usage, 0) ||
+       !screen->is_format_supported(screen, src->format, src->texture->target,
+                                    PIPE_TEXTURE_USAGE_SAMPLER, 0)) {
+      util_surface_copy(pipe, FALSE, dst, dstx, dsty, src, srcx, srcy,
+                        width, height);
+      return;
+   }
+
+   /* check whether the states are properly saved */
+   blitter_check_saved_CSOs(ctx);
+   assert(blitter->saved_fb_state.nr_cbufs != ~0);
+   assert(blitter->saved_num_textures != ~0);
+   assert(blitter->saved_num_sampler_states != ~0);
+   assert(src->texture->target < PIPE_MAX_TEXTURE_TYPES);
+
+   /* bind CSOs */
+   fb_state.width = dst->width;
+   fb_state.height = dst->height;
+
+   if (is_depth) {
+      pipe->bind_blend_state(pipe, ctx->blend_keep_color);
+      pipe->bind_depth_stencil_alpha_state(pipe,
+                                           ctx->dsa_write_depth_keep_stencil);
+      pipe->bind_fs_state(pipe,
+         blitter_get_fs_texfetch_depth(ctx, src->texture->target));
+
+      fb_state.nr_cbufs = 0;
+      fb_state.zsbuf = dst;
+   } else {
+      pipe->bind_blend_state(pipe, ctx->blend_write_color);
+      pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
+      pipe->bind_fs_state(pipe,
+         blitter_get_fs_texfetch_col(ctx, src->texture->target));
+
+      fb_state.nr_cbufs = 1;
+      fb_state.cbufs[0] = dst;
+      fb_state.zsbuf = 0;
+   }
+
+   pipe->bind_rasterizer_state(pipe, ctx->rs_state);
+   pipe->bind_vs_state(pipe, ctx->vs_tex);
+   pipe->bind_fragment_sampler_states(pipe, 1,
+      blitter_get_sampler_state(ctx, src->level));
+   pipe->set_fragment_sampler_textures(pipe, 1, &src->texture);
+   pipe->set_framebuffer_state(pipe, &fb_state);
+
+   /* set texture coordinates */
+   switch (src->texture->target) {
+      case PIPE_TEXTURE_1D:
+      case PIPE_TEXTURE_2D:
+         blitter_set_texcoords_2d(ctx, src, srcx, srcy,
+                                  srcx+width, srcy+height);
+         break;
+      case PIPE_TEXTURE_3D:
+         blitter_set_texcoords_3d(ctx, src, srcx, srcy,
+                                  srcx+width, srcy+height);
+         break;
+      case PIPE_TEXTURE_CUBE:
+         blitter_set_texcoords_cube(ctx, src, srcx, srcy,
+                                    srcx+width, srcy+height);
+         break;
+      default:
+         assert(0);
+   }
+
+   blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0);
+   blitter_draw_quad(ctx);
+   blitter_restore_CSOs(ctx);
+}
+
+void util_blitter_fill(struct blitter_context *blitter,
+                       struct pipe_surface *dst,
+                       unsigned dstx, unsigned dsty,
+                       unsigned width, unsigned height,
+                       unsigned value)
+{
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+   struct pipe_context *pipe = ctx->pipe;
+   struct pipe_screen *screen = pipe->screen;
+   struct pipe_framebuffer_state fb_state;
+   float rgba[4];
+   ubyte ub_rgba[4] = {0};
+   union util_color color;
+   int i;
+
+   assert(dst->texture);
+   if (!dst->texture)
+      return;
+
+   /* check if we can render to the surface */
+   if (pf_is_depth_or_stencil(dst->format) || /* unlikely, but you never know */
+       !screen->is_format_supported(screen, dst->format, dst->texture->target,
+                                    PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
+      util_surface_fill(pipe, dst, dstx, dsty, width, height, value);
+      return;
+   }
+
+   /* unpack the color */
+   color.ui = value;
+   util_unpack_color_ub(dst->format, &color,
+                        ub_rgba, ub_rgba+1, ub_rgba+2, ub_rgba+3);
+   for (i = 0; i < 4; i++)
+      rgba[i] = ubyte_to_float(ub_rgba[i]);
+
+   /* check the saved state */
+   blitter_check_saved_CSOs(ctx);
+   assert(blitter->saved_fb_state.nr_cbufs != ~0);
+
+   /* bind CSOs */
+   pipe->bind_blend_state(pipe, ctx->blend_write_color);
+   pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
+   pipe->bind_rasterizer_state(pipe, ctx->rs_state);
+   pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1));
+   pipe->bind_vs_state(pipe, ctx->vs_col);
+
+   /* set a framebuffer state */
+   fb_state.width = dst->width;
+   fb_state.height = dst->height;
+   fb_state.nr_cbufs = 1;
+   fb_state.cbufs[0] = dst;
+   fb_state.zsbuf = 0;
+   pipe->set_framebuffer_state(pipe, &fb_state);
+
+   blitter_set_clear_color(ctx, rgba);
+   blitter_set_rectangle(ctx, 0, 0, width, height, 0);
+   blitter_draw_quad(ctx);
+   blitter_restore_CSOs(ctx);
+}
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
new file mode 100644 (file)
index 0000000..3da5a6c
--- /dev/null
@@ -0,0 +1,230 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_BLITTER_H
+#define U_BLITTER_H
+
+#include "util/u_memory.h"
+
+#include "pipe/p_state.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct pipe_context;
+
+struct blitter_context
+{
+   /* Private members, really. */
+   void *saved_blend_state;   /**< blend state */
+   void *saved_dsa_state;     /**< depth stencil alpha state */
+   void *saved_rs_state;      /**< rasterizer state */
+   void *saved_fs, *saved_vs; /**< fragment shader, vertex shader */
+
+   struct pipe_framebuffer_state saved_fb_state;  /**< framebuffer state */
+
+   int saved_num_sampler_states;
+   void *saved_sampler_states[32];
+
+   int saved_num_textures;
+   struct pipe_texture *saved_textures[32]; /* is 32 enough? */
+};
+
+/**
+ * Create a blitter context.
+ */
+struct blitter_context *util_blitter_create(struct pipe_context *pipe);
+
+/**
+ * Destroy a blitter context.
+ */
+void util_blitter_destroy(struct blitter_context *blitter);
+
+/*
+ * These CSOs must be saved before any of the following functions is called:
+ * - blend state
+ * - depth stencil alpha state
+ * - rasterizer state
+ * - vertex shader
+ * - fragment shader
+ */
+
+/**
+ * Clear a specified set of currently bound buffers to specified values.
+ */
+void util_blitter_clear(struct blitter_context *blitter,
+                        unsigned width, unsigned height,
+                        unsigned num_cbufs,
+                        unsigned clear_buffers,
+                        const float *rgba,
+                        double depth, unsigned stencil);
+
+/**
+ * Copy a block of pixels from one surface to another.
+ *
+ * You can copy from any color format to any other color format provided
+ * the former can be sampled and the latter can be rendered to. Otherwise,
+ * a software fallback path is taken and both surfaces must be of the same
+ * format.
+ *
+ * The same holds for depth-stencil formats with the exception that stencil
+ * cannot be copied unless you set ignore_stencil to FALSE. In that case,
+ * a software fallback path is taken and both surfaces must be of the same
+ * format.
+ *
+ * Use pipe_screen->is_format_supported to know your options.
+ *
+ * These states must be saved in the blitter in addition to the state objects
+ * already required to be saved:
+ * - framebuffer state
+ * - fragment sampler states
+ * - fragment sampler textures
+ */
+void util_blitter_copy(struct blitter_context *blitter,
+                       struct pipe_surface *dst,
+                       unsigned dstx, unsigned dsty,
+                       struct pipe_surface *src,
+                       unsigned srcx, unsigned srcy,
+                       unsigned width, unsigned height,
+                       boolean ignore_stencil);
+
+/**
+ * Fill a region of a surface with a constant value.
+ *
+ * If the surface cannot be rendered to or it's a depth-stencil format,
+ * a software fallback path is taken.
+ *
+ * These states must be saved in the blitter in addition to the state objects
+ * already required to be saved:
+ * - framebuffer state
+ */
+void util_blitter_fill(struct blitter_context *blitter,
+                       struct pipe_surface *dst,
+                       unsigned dstx, unsigned dsty,
+                       unsigned width, unsigned height,
+                       unsigned value);
+
+/**
+ * Copy all pixels from one surface to another.
+ *
+ * The rules are the same as in util_blitter_copy with the addition that
+ * surfaces must have the same size.
+ */
+static INLINE
+void util_blitter_copy_surface(struct blitter_context *blitter,
+                               struct pipe_surface *dst,
+                               struct pipe_surface *src,
+                               boolean ignore_stencil)
+{
+   assert(dst->width == src->width && dst->height == src->height);
+
+   util_blitter_copy(blitter, dst, 0, 0, src, 0, 0, src->width, src->height,
+                     ignore_stencil);
+}
+
+
+/* The functions below should be used to save currently bound constant state
+ * objects inside a driver. The objects are automatically restored at the end
+ * of the util_blitter_{clear, fill, copy, copy_surface} functions and then
+ * forgotten.
+ *
+ * CSOs not listed here are not affected by util_blitter. */
+
+static INLINE
+void util_blitter_save_blend(struct blitter_context *blitter,
+                             void *state)
+{
+   blitter->saved_blend_state = state;
+}
+
+static INLINE
+void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
+                                           void *state)
+{
+   blitter->saved_dsa_state = state;
+}
+
+static INLINE
+void util_blitter_save_rasterizer(struct blitter_context *blitter,
+                                  void *state)
+{
+   blitter->saved_rs_state = state;
+}
+
+static INLINE
+void util_blitter_save_fragment_shader(struct blitter_context *blitter,
+                                       void *fs)
+{
+   blitter->saved_fs = fs;
+}
+
+static INLINE
+void util_blitter_save_vertex_shader(struct blitter_context *blitter,
+                                     void *vs)
+{
+   blitter->saved_vs = vs;
+}
+
+static INLINE
+void util_blitter_save_framebuffer(struct blitter_context *blitter,
+                                   struct pipe_framebuffer_state *state)
+{
+   blitter->saved_fb_state = *state;
+}
+
+static INLINE
+void util_blitter_save_fragment_sampler_states(
+                  struct blitter_context *blitter,
+                  int num_sampler_states,
+                  void **sampler_states)
+{
+   assert(num_sampler_states <= Elements(blitter->saved_sampler_states));
+
+   blitter->saved_num_sampler_states = num_sampler_states;
+   memcpy(blitter->saved_sampler_states, sampler_states,
+          num_sampler_states * sizeof(void *));
+}
+
+static INLINE
+void util_blitter_save_fragment_sampler_textures(
+                  struct blitter_context *blitter,
+                  int num_textures,
+                  struct pipe_texture **textures)
+{
+   assert(num_textures <= Elements(blitter->saved_textures));
+
+   blitter->saved_num_textures = num_textures;
+   memcpy(blitter->saved_textures, textures,
+          num_textures * sizeof(struct pipe_texture *));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c
new file mode 100644 (file)
index 0000000..b42b429
--- /dev/null
@@ -0,0 +1,79 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * Copyright 1999-2008  Brian Paul
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_config.h"
+
+#if defined(PIPE_OS_UNIX)
+#include <dlfcn.h>
+#endif
+#if defined(PIPE_OS_WINDOWS)
+#include <windows.h>
+#endif
+
+#include "u_dl.h"
+
+
+struct util_dl_library *
+util_dl_open(const char *filename)
+{
+#if defined(PIPE_OS_UNIX)
+   return (struct util_dl_library *)dlopen(filename, RTLD_LAZY | RTLD_GLOBAL);
+#elif defined(PIPE_OS_WINDOWS)
+   return (struct util_dl_library *)LoadLibraryA(filename);
+#else
+   return NULL;
+#endif
+}
+
+
+util_dl_proc
+util_dl_get_proc_address(struct util_dl_library *library,
+                         const char *procname)
+{
+#if defined(PIPE_OS_UNIX)
+   return (util_dl_proc)dlsym((void *)library, procname);
+#elif defined(PIPE_OS_WINDOWS)
+   return (util_dl_proc)GetProcAddress((HMODULE)library, procname);
+#else
+   return (util_dl_proc)NULL;
+#endif
+}
+
+
+void
+util_dl_close(struct util_dl_library *library)
+{
+#if defined(PIPE_OS_UNIX)
+   dlclose((void *)library);
+#elif defined(PIPE_OS_WINDOWS)
+   FreeLibrary((HMODULE)library);
+#else
+   (void)library;
+#endif
+}
diff --git a/src/gallium/auxiliary/util/u_dl.h b/src/gallium/auxiliary/util/u_dl.h
new file mode 100644 (file)
index 0000000..018b385
--- /dev/null
@@ -0,0 +1,61 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef U_DL_H_
+#define U_DL_H_
+
+
+struct util_dl_library;
+
+
+typedef void (*util_dl_proc)(void);
+
+
+/**
+ * Open a library dynamically.
+ */
+struct util_dl_library *
+util_dl_open(const char *filename);
+
+
+/**
+ * Lookup a function in a library.
+ */
+util_dl_proc
+util_dl_get_proc_address(struct util_dl_library *library,
+                         const char *procname);
+
+
+/**
+ * Close a library.
+ */
+void
+util_dl_close(struct util_dl_library *library);
+
+
+#endif /* U_DL_H_ */
index eeb1a9657fd05ce5fdcce2a8f162dee9f290e31b..0b05ddb9312c700911b3073177fb6b1412bbbdd8 100644 (file)
@@ -325,14 +325,14 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix):
             elif swizzle == SWIZZLE_0:
                 value = '0'
             elif swizzle == SWIZZLE_1:
-                value = '1'
+                value = get_one(dst_type)
             else:
                 assert False
         elif format.colorspace == 'zs':
             if i < 3:
                 value = 'z'
             else:
-                value = '1'
+                value = get_one(dst_type)
         else:
             assert False
         print '         *dst_pixel++ = %s; /* %s */' % (value, 'rgba'[i])
index 83263d9fe647fca19cf3d4641760ac4185406d94..69ff3b9dd9e96f4a6bd0ef2c6c9d7f66417eb577 100644 (file)
@@ -46,6 +46,7 @@
 #include "util/u_gen_mipmap.h"
 #include "util/u_simple_shaders.h"
 #include "util/u_math.h"
+#include "util/u_texture.h"
 
 #include "cso_cache/cso_context.h"
 
@@ -1317,7 +1318,7 @@ util_create_gen_mipmap(struct pipe_context *pipe,
    }
 
    /* fragment shader */
-   ctx->fs = util_make_fragment_tex_shader(pipe);
+   ctx->fs = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D);
 
    /* vertex data that doesn't change */
    for (i = 0; i < 4; i++) {
@@ -1383,59 +1384,9 @@ set_vertex_data(struct gen_mipmap_state *ctx,
       static const float st[4][2] = {
          {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
       };
-      float rx, ry, rz;
-      uint i;
-
-      /* loop over quad verts */
-      for (i = 0; i < 4; i++) {
-         /* Compute sc = +/-scale and tc = +/-scale.
-          * Not +/-1 to avoid cube face selection ambiguity near the edges,
-          * though that can still sometimes happen with this scale factor...
-          */
-         const float scale = 0.9999f;
-         const float sc = (2.0f * st[i][0] - 1.0f) * scale;
-         const float tc = (2.0f * st[i][1] - 1.0f) * scale;
-
-         switch (face) {
-         case PIPE_TEX_FACE_POS_X:
-            rx = 1.0f;
-            ry = -tc;
-            rz = -sc;
-            break;
-         case PIPE_TEX_FACE_NEG_X:
-            rx = -1.0f;
-            ry = -tc;
-            rz = sc;
-            break;
-         case PIPE_TEX_FACE_POS_Y:
-            rx = sc;
-            ry = 1.0f;
-            rz = tc;
-            break;
-         case PIPE_TEX_FACE_NEG_Y:
-            rx = sc;
-            ry = -1.0f;
-            rz = -tc;
-            break;
-         case PIPE_TEX_FACE_POS_Z:
-            rx = sc;
-            ry = -tc;
-            rz = 1.0f;
-            break;
-         case PIPE_TEX_FACE_NEG_Z:
-            rx = -sc;
-            ry = -tc;
-            rz = -1.0f;
-            break;
-         default:
-            rx = ry = rz = 0.0f;
-            assert(0);
-         }
 
-         ctx->vertices[i][1][0] = rx; /*s*/
-         ctx->vertices[i][1][1] = ry; /*t*/
-         ctx->vertices[i][1][2] = rz; /*r*/
-      }
+      util_map_texcoords2d_onto_cubemap(face, &st[0][0], 2,
+                                        &ctx->vertices[0][1][0], 8);
    }
    else {
       /* 1D/2D */
index 1c8b157d91fcd1ac9d21b00fa69b3d9c01fd4e53..8172ead0201e836548178033f762dabbfa2ef680 100644 (file)
@@ -2,6 +2,7 @@
  *
  * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
@@ -30,6 +31,7 @@
  * Simple vertex/fragment shader generators.
  *  
  * @author Brian Paul
+           Marek Olšák
  */
 
 
@@ -87,6 +89,7 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe,
  */
 void *
 util_make_fragment_tex_shader_writemask(struct pipe_context *pipe,
+                                        unsigned tex_target,
                                         unsigned writemask )
 {
    struct ureg_program *ureg;
@@ -116,30 +119,82 @@ util_make_fragment_tex_shader_writemask(struct pipe_context *pipe,
 
    ureg_TEX( ureg, 
              ureg_writemask(out, writemask),
-             TGSI_TEXTURE_2D, tex, sampler );
+             tex_target, tex, sampler );
    ureg_END( ureg );
 
    return ureg_create_shader_and_destroy( ureg, pipe );
 }
 
 void *
-util_make_fragment_tex_shader(struct pipe_context *pipe )
+util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target )
 {
    return util_make_fragment_tex_shader_writemask( pipe,
+                                                   tex_target,
                                                    TGSI_WRITEMASK_XYZW );
 }
 
+/**
+ * Make a simple fragment texture shader which reads an X component from
+ * a texture and writes it as depth.
+ */
+void *
+util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe,
+                                         unsigned tex_target)
+{
+   struct ureg_program *ureg;
+   struct ureg_src sampler;
+   struct ureg_src tex;
+   struct ureg_dst out, depth;
+   struct ureg_src imm;
 
+   ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
+   if (ureg == NULL)
+      return NULL;
+
+   sampler = ureg_DECL_sampler( ureg, 0 );
+
+   tex = ureg_DECL_fs_input( ureg,
+                             TGSI_SEMANTIC_GENERIC, 0,
+                             TGSI_INTERPOLATE_PERSPECTIVE );
+
+   out = ureg_DECL_output( ureg,
+                           TGSI_SEMANTIC_COLOR,
+                           0 );
+
+   depth = ureg_DECL_output( ureg,
+                             TGSI_SEMANTIC_POSITION,
+                             0 );
+
+   imm = ureg_imm4f( ureg, 0, 0, 0, 1 );
+
+   ureg_MOV( ureg, out, imm );
+
+   ureg_TEX( ureg,
+             ureg_writemask(depth, TGSI_WRITEMASK_Z),
+             tex_target, tex, sampler );
+   ureg_END( ureg );
+
+   return ureg_create_shader_and_destroy( ureg, pipe );
+}
 
 /**
  * Make simple fragment color pass-through shader.
  */
 void *
 util_make_fragment_passthrough_shader(struct pipe_context *pipe)
+{
+   return util_make_fragment_clonecolor_shader(pipe, 1);
+}
+
+void *
+util_make_fragment_clonecolor_shader(struct pipe_context *pipe, int num_cbufs)
 {
    struct ureg_program *ureg;
    struct ureg_src src;
-   struct ureg_dst dst;
+   struct ureg_dst dst[8];
+   int i;
+
+   assert(num_cbufs <= 8);
 
    ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
    if (ureg == NULL)
@@ -148,12 +203,13 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe)
    src = ureg_DECL_fs_input( ureg, TGSI_SEMANTIC_COLOR, 0, 
                              TGSI_INTERPOLATE_PERSPECTIVE );
 
-   dst = ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 );
+   for (i = 0; i < num_cbufs; i++)
+      dst[i] = ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, i );
+
+   for (i = 0; i < num_cbufs; i++)
+      ureg_MOV( ureg, dst[i], src );
 
-   ureg_MOV( ureg, dst, src );
    ureg_END( ureg );
 
    return ureg_create_shader_and_destroy( ureg, pipe );
 }
-
-
index d2e80d6eb4d1dcaf9c63ec09bd36288fc8d3200e..6e760942e25242d48fa9587d9e52626071a43378 100644 (file)
@@ -51,16 +51,25 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe,
 
 extern void *
 util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, 
-                                        unsigned writemask );
+                                        unsigned tex_target,
+                                        unsigned writemask);
 
 extern void *
-util_make_fragment_tex_shader(struct pipe_context *pipe);
+util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target);
+
+
+extern void *
+util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe,
+                                         unsigned tex_target);
 
 
 extern void *
 util_make_fragment_passthrough_shader(struct pipe_context *pipe);
 
 
+extern void *
+util_make_fragment_clonecolor_shader(struct pipe_context *pipe, int num_cbufs);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/auxiliary/util/u_texture.c b/src/gallium/auxiliary/util/u_texture.c
new file mode 100644 (file)
index 0000000..cd477ab
--- /dev/null
@@ -0,0 +1,102 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * Copyright 2008 VMware, Inc.  All rights reserved.
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture mapping utility functions.
+ *
+ * @author Brian Paul
+ *         Marek Olšák
+ */
+
+#include "pipe/p_defines.h"
+
+#include "util/u_texture.h"
+
+void util_map_texcoords2d_onto_cubemap(unsigned face,
+                                       const float *in_st, unsigned in_stride,
+                                       float *out_str, unsigned out_stride)
+{
+   int i;
+   float rx, ry, rz;
+
+   /* loop over quad verts */
+   for (i = 0; i < 4; i++) {
+      /* Compute sc = +/-scale and tc = +/-scale.
+       * Not +/-1 to avoid cube face selection ambiguity near the edges,
+       * though that can still sometimes happen with this scale factor...
+       */
+      const float scale = 0.9999f;
+      const float sc = (2 * in_st[0] - 1) * scale;
+      const float tc = (2 * in_st[1] - 1) * scale;
+
+      switch (face) {
+         case PIPE_TEX_FACE_POS_X:
+            rx = 1;
+            ry = -tc;
+            rz = -sc;
+            break;
+         case PIPE_TEX_FACE_NEG_X:
+            rx = -1;
+            ry = -tc;
+            rz = sc;
+            break;
+         case PIPE_TEX_FACE_POS_Y:
+            rx = sc;
+            ry = 1;
+            rz = tc;
+            break;
+         case PIPE_TEX_FACE_NEG_Y:
+            rx = sc;
+            ry = -1;
+            rz = -tc;
+            break;
+         case PIPE_TEX_FACE_POS_Z:
+            rx = sc;
+            ry = -tc;
+            rz = 1;
+            break;
+         case PIPE_TEX_FACE_NEG_Z:
+            rx = -sc;
+            ry = -tc;
+            rz = -1;
+            break;
+         default:
+            rx = ry = rz = 0;
+            assert(0);
+      }
+
+      out_str[0] = rx; /*s*/
+      out_str[1] = ry; /*t*/
+      out_str[2] = rz; /*r*/
+
+      in_st += in_stride;
+      out_str += out_stride;
+   }
+}
diff --git a/src/gallium/auxiliary/util/u_texture.h b/src/gallium/auxiliary/util/u_texture.h
new file mode 100644 (file)
index 0000000..93b2f1e
--- /dev/null
@@ -0,0 +1,54 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_TEXTURE_H
+#define U_TEXTURE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Convert 2D texture coordinates of 4 vertices into cubemap coordinates
+ * in the given face.
+ * Coordinates must be in the range [0,1].
+ *
+ * \param face          Cubemap face.
+ * \param in_st         4 pairs of 2D texture coordinates to convert.
+ * \param in_stride     Stride of in_st in floats.
+ * \param out_str       STR cubemap texture coordinates to compute.
+ * \param out_stride    Stride of out_str in floats.
+ */
+void util_map_texcoords2d_onto_cubemap(unsigned face,
+                                       const float *in_st, unsigned in_stride,
+                                       float *out_str, unsigned out_stride);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
index 9c59677a741adc361320751f2af0333fb274e5aa..eea6b5d6a5c9d2d1002fa9ff5b6edac8dba0c07c 100644 (file)
@@ -629,7 +629,8 @@ lp_build_abs(struct lp_build_context *bld,
    if(type.floating) {
       /* Mask out the sign bit */
       LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
-      LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1);
+      unsigned long absMask = ~(1 << (type.width - 1));
+      LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
       a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
       a = LLVMBuildAnd(bld->builder, a, mask, "");
       a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
@@ -1083,7 +1084,7 @@ lp_build_log(struct lp_build_context *bld,
              LLVMValueRef x)
 {
    /* log(2) */
-   LLVMValueRef log2 = lp_build_const_scalar(bld->type, 1.4426950408889634);
+   LLVMValueRef log2 = lp_build_const_scalar(bld->type, 0.69314718055994529);
 
    return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
 }
@@ -1095,7 +1096,7 @@ lp_build_log(struct lp_build_context *bld,
 
 /**
  * Generate polynomial.
- * Ex:  x^2 * coeffs[0] + x * coeffs[1] + coeffs[2].
+ * Ex:  coeffs[0] + x * coeffs[1] + x^2 * coeffs[2].
  */
 static LLVMValueRef
 lp_build_polynomial(struct lp_build_context *bld,
@@ -1285,13 +1286,13 @@ lp_build_log2_approx(struct lp_build_context *bld,
       /* mant = (float) mantissa(x) */
       mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
       mant = LLVMBuildOr(bld->builder, mant, one, "");
-      mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, "");
+      mant = LLVMBuildBitCast(bld->builder, mant, vec_type, "");
 
       logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
                                     Elements(lp_build_log2_polynomial));
 
       /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
-      logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), "");
+      logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
 
       res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
    }
index 3eb0e0c57cb5d5b272521cfcf244c062991e38bf..a67c70ff25a277a394476e9d916e2443aa1b7692 100644 (file)
@@ -763,7 +763,7 @@ emit_instruction(
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          src0 = emit_fetch( bld, inst, 0, chan_index );
          tmp0 = lp_build_floor(&bld->base, src0);
-         tmp0 = lp_build_sub(&bld->base, tmp0, src0);
+         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
          dst0[chan_index] = tmp0;
       }
       break;
diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h
new file mode 100644 (file)
index 0000000..74b2757
--- /dev/null
@@ -0,0 +1,71 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#ifndef LP_DEBUG_H
+#define LP_DEBUG_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+
+extern void
+st_print_current(void);
+
+
+#define DEBUG_PIPE      0x1
+#define DEBUG_TGSI      0x2
+#define DEBUG_TEX       0x4
+#define DEBUG_ASM       0x8
+#define DEBUG_SETUP     0x10
+#define DEBUG_RAST      0x20
+#define DEBUG_QUERY     0x40
+#define DEBUG_SCREEN    0x80
+#define DEBUG_JIT       0x100
+
+#ifdef DEBUG
+extern int LP_DEBUG;
+#else
+#define LP_DEBUG 0
+#endif
+
+void st_debug_init( void );
+
+static INLINE void
+LP_DBG( unsigned flag, const char *fmt, ... )
+{
+    if (LP_DEBUG & flag)
+    {
+        va_list args;
+
+        va_start( args, fmt );
+        debug_vprintf( fmt, args );
+        va_end( args );
+    }
+}
+
+
+#endif /* LP_DEBUG_H */
index 19fe2850fd1332466bd0b65afbd1d57abfa4dd68..9b47415f003350c7592894fb6338f50c7943e2b6 100644 (file)
 #include "lp_winsys.h"
 #include "lp_jit.h"
 #include "lp_screen.h"
+#include "lp_debug.h"
+
+#ifdef DEBUG
+int LP_DEBUG = 0;
+
+static const struct debug_named_value lp_debug_flags[] = {
+   { "pipe",   DEBUG_PIPE },
+   { "tgsi",   DEBUG_TGSI },
+   { "tex",    DEBUG_TEX },
+   { "asm",    DEBUG_ASM },
+   { "setup",  DEBUG_SETUP },
+   { "rast",   DEBUG_RAST },
+   { "query",  DEBUG_QUERY },
+   { "screen", DEBUG_SCREEN },
+   { "jit",    DEBUG_JIT },
+   {NULL, 0}
+};
+#endif
 
 
 static const char *
@@ -259,6 +277,10 @@ llvmpipe_create_screen(struct llvmpipe_winsys *winsys)
 {
    struct llvmpipe_screen *screen = CALLOC_STRUCT(llvmpipe_screen);
 
+#ifdef DEBUG
+   LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 );
+#endif
+
    if (!screen)
       return NULL;
 
index ee0f69b2af9723e6a58d74486de64225af5f3f7c..22683ff8b428581ec91b0312a0887af640324d42 100644 (file)
@@ -87,6 +87,7 @@
 #include "lp_state.h"
 #include "lp_quad.h"
 #include "lp_tex_sample.h"
+#include "lp_debug.h"
 
 
 static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
@@ -408,59 +409,58 @@ generate_fragment(struct llvmpipe_context *lp,
    unsigned i;
    unsigned chan;
 
-#ifdef DEBUG
-   tgsi_dump(shader->base.tokens, 0);
-   if(key->depth.enabled) {
-      debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
-      debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
-      debug_printf("depth.writemask = %u\n", key->depth.writemask);
-   }
-   if(key->alpha.enabled) {
-      debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
-      debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
-   }
-   if(key->blend.logicop_enable) {
-      debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func);
-   }
-   else if(key->blend.blend_enable) {
-      debug_printf("blend.rgb_func = %s\n",   debug_dump_blend_func  (key->blend.rgb_func, TRUE));
-      debug_printf("rgb_src_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE));
-      debug_printf("rgb_dst_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE));
-      debug_printf("alpha_func = %s\n",       debug_dump_blend_func  (key->blend.alpha_func, TRUE));
-      debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE));
-      debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
-   }
-   debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
-   for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
-      if(key->sampler[i].format) {
-         debug_printf("sampler[%u] = \n", i);
-         debug_printf("  .format = %s\n",
-                      pf_name(key->sampler[i].format));
-         debug_printf("  .target = %s\n",
-                      debug_dump_tex_target(key->sampler[i].target, TRUE));
-         debug_printf("  .pot = %u %u %u\n",
-                      key->sampler[i].pot_width,
-                      key->sampler[i].pot_height,
-                      key->sampler[i].pot_depth);
-         debug_printf("  .wrap = %s %s %s\n",
-                      debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
-                      debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
-                      debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
-         debug_printf("  .min_img_filter = %s\n",
-                      debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
-         debug_printf("  .min_mip_filter = %s\n",
-                      debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
-         debug_printf("  .mag_img_filter = %s\n",
-                      debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
-         if(key->sampler[i].compare_mode)
-            debug_printf("  .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
-         debug_printf("  .normalized_coords = %u\n", key->sampler[i].normalized_coords);
-         debug_printf("  .prefilter = %u\n", key->sampler[i].prefilter);
+   if (LP_DEBUG & DEBUG_JIT) {
+      tgsi_dump(shader->base.tokens, 0);
+      if(key->depth.enabled) {
+         debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
+         debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
+         debug_printf("depth.writemask = %u\n", key->depth.writemask);
+      }
+      if(key->alpha.enabled) {
+         debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
+         debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
+      }
+      if(key->blend.logicop_enable) {
+         debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func);
+      }
+      else if(key->blend.blend_enable) {
+         debug_printf("blend.rgb_func = %s\n",   debug_dump_blend_func  (key->blend.rgb_func, TRUE));
+         debug_printf("rgb_src_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE));
+         debug_printf("rgb_dst_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE));
+         debug_printf("alpha_func = %s\n",       debug_dump_blend_func  (key->blend.alpha_func, TRUE));
+         debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE));
+         debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
+      }
+      debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
+      for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
+         if(key->sampler[i].format) {
+            debug_printf("sampler[%u] = \n", i);
+            debug_printf("  .format = %s\n",
+                         pf_name(key->sampler[i].format));
+            debug_printf("  .target = %s\n",
+                         debug_dump_tex_target(key->sampler[i].target, TRUE));
+            debug_printf("  .pot = %u %u %u\n",
+                         key->sampler[i].pot_width,
+                         key->sampler[i].pot_height,
+                         key->sampler[i].pot_depth);
+            debug_printf("  .wrap = %s %s %s\n",
+                         debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
+                         debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
+                         debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
+            debug_printf("  .min_img_filter = %s\n",
+                         debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
+            debug_printf("  .min_mip_filter = %s\n",
+                         debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
+            debug_printf("  .mag_img_filter = %s\n",
+                         debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
+            if(key->sampler[i].compare_mode)
+               debug_printf("  .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
+            debug_printf("  .normalized_coords = %u\n", key->sampler[i].normalized_coords);
+            debug_printf("  .prefilter = %u\n", key->sampler[i].prefilter);
+         }
       }
    }
 
-#endif
-
    variant = CALLOC_STRUCT(lp_fragment_shader_variant);
    if(!variant)
       return NULL;
@@ -599,8 +599,8 @@ generate_fragment(struct llvmpipe_context *lp,
    }
 
    lp_build_conv_mask(builder, fs_type, blend_type,
-                               fs_mask, num_fs,
-                               &blend_mask, 1);
+                      fs_mask, num_fs,
+                      &blend_mask, 1);
 
    /*
     * Blending.
@@ -631,16 +631,15 @@ generate_fragment(struct llvmpipe_context *lp,
 
    LLVMRunFunctionPassManager(screen->pass, variant->function);
 
-#ifdef DEBUG
-   LLVMDumpValue(variant->function);
-   debug_printf("\n");
-#endif
+   if (LP_DEBUG & DEBUG_JIT) {
+      LLVMDumpValue(variant->function);
+      debug_printf("\n");
+   }
 
    variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function);
 
-#ifdef DEBUG
-   lp_disassemble(variant->jit_function);
-#endif
+   if (LP_DEBUG & DEBUG_ASM)
+      lp_disassemble(variant->jit_function);
 
    variant->next = shader->variants;
    shader->variants = variant;
index 62990f9b6a607333921dc4ecf16850f391095685..9aee9e495664e3c8b3caf97669106e2b729ff256 100644 (file)
@@ -112,20 +112,30 @@ so_emit(struct nouveau_channel *chan, struct nouveau_stateobj *so)
 {
        struct nouveau_pushbuf *pb = chan->pushbuf;
        unsigned nr, i;
+       int ret = 0;
 
        nr = so->cur - so->push;
-       if (pb->remaining < nr)
-               nouveau_pushbuf_flush(chan, nr);
+       /* This will flush if we need space.
+        * We don't actually need the marker.
+        */
+       if ((ret = nouveau_pushbuf_marker_emit(chan, nr, so->cur_reloc))) {
+               debug_printf("so_emit failed marker emit with error %d\n", ret);
+               return;
+       }
        pb->remaining -= nr;
 
        memcpy(pb->cur, so->push, nr * 4);
        for (i = 0; i < so->cur_reloc; i++) {
                struct nouveau_stateobj_reloc *r = &so->reloc[i];
 
-               nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset,
+               if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset,
                                           r->bo, r->data, 0, r->flags,
-                                          r->vor, r->tor);
+                                          r->vor, r->tor))) {
+                       debug_printf("so_emit failed reloc with error %d\n", ret);
+                       goto out;
+               }
        }
+out:
        pb->cur += nr;
 }
 
@@ -134,26 +144,45 @@ so_emit_reloc_markers(struct nouveau_channel *chan, struct nouveau_stateobj *so)
 {
        struct nouveau_pushbuf *pb = chan->pushbuf;
        unsigned i;
+       int ret = 0;
 
        if (!so)
                return;
 
        i = so->cur_reloc << 1;
-       if (pb->remaining < i)
-               nouveau_pushbuf_flush(chan, i);
+       /* This will flush if we need space.
+        * We don't actually need the marker.
+        */
+       if ((ret = nouveau_pushbuf_marker_emit(chan, i, i))) {
+               debug_printf("so_emit_reloc_markers failed marker emit with" \
+                       "error %d\n", ret);
+               return;
+       }
        pb->remaining -= i;
 
        for (i = 0; i < so->cur_reloc; i++) {
                struct nouveau_stateobj_reloc *r = &so->reloc[i];
 
-               nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, r->packet, 0,
+               if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo,
+                                          r->packet, 0,
                                           (r->flags & (NOUVEAU_BO_VRAM |
                                                        NOUVEAU_BO_GART |
                                                        NOUVEAU_BO_RDWR)) |
-                                          NOUVEAU_BO_DUMMY, 0, 0);
-               nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, r->data, 0,
+                                          NOUVEAU_BO_DUMMY, 0, 0))) {
+                       debug_printf("so_emit_reloc_markers failed reloc" \
+                                               "with error %d\n", ret);
+                       pb->remaining += ((so->cur_reloc - i) << 1);
+                       return;
+               }
+               if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo,
+                                          r->data, 0,
                                           r->flags | NOUVEAU_BO_DUMMY,
-                                          r->vor, r->tor);
+                                          r->vor, r->tor))) {
+                       debug_printf("so_emit_reloc_markers failed reloc" \
+                                               "with error %d\n", ret);
+                       pb->remaining += ((so->cur_reloc - i) << 1) - 1;
+                       return;
+               }
        }
 }
 
index 932893eef597bfc58ce0ad96338cc885e794670b..3020806c5d65735f8d6afab1d7faef720c99b597 100644 (file)
@@ -133,6 +133,9 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
        assert(sub_w == w || util_is_pot(sub_w));
        assert(sub_h == h || util_is_pot(sub_h));
 
+       MARK_RING (chan, 8 + ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*17, 2 +
+                        ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*2);
+
        BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
        OUT_RELOCo(chan, dst_bo,
                         NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
@@ -202,7 +205,7 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
        unsigned src_offset = src->offset + sy * src_pitch +
                              sx * pf_get_blocksize(src->texture->format);
 
-       WAIT_RING (chan, 3 + ((h / 2047) + 1) * 9);
+       MARK_RING (chan, 3 + ((h / 2047) + 1) * 9, 2 + ((h / 2047) + 1) * 2);
        BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
        OUT_RELOCo(chan, src_bo,
                   NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
@@ -250,7 +253,7 @@ nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
        if (format < 0)
                return 1;
 
-       WAIT_RING (chan, 12);
+       MARK_RING (chan, 12, 4);
        BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
        OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
        OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
@@ -315,7 +318,7 @@ nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
        gdirect_format = nv04_rect_format(dst->format);
        assert(gdirect_format >= 0);
 
-       WAIT_RING (chan, 16);
+       MARK_RING (chan, 16, 4);
        BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
        OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
        OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
index d8300fd69f647a95ed00ce0581deed5012707a3b..46a821a48b1f03b22e071120083e425d946918f8 100644 (file)
@@ -58,6 +58,9 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id)
        nv30->pipe.is_texture_referenced = nouveau_is_texture_referenced;
        nv30->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
 
+       screen->base.channel->user_private = nv30;
+       screen->base.channel->flush_notify = nv30_state_flush_notify;
+
        nv30_init_query_functions(nv30);
        nv30_init_surface_functions(nv30);
        nv30_init_state_functions(nv30);
index 8d49366dfcb813dd91e54812593e52caeb760466..6f44b1c7fe1a42cc07c73c459411f58325c0cfff 100644 (file)
@@ -184,6 +184,7 @@ extern void nv30_fragtex_bind(struct nv30_context *);
 /* nv30_state.c and friends */
 extern boolean nv30_state_validate(struct nv30_context *nv30);
 extern void nv30_state_emit(struct nv30_context *nv30);
+extern void nv30_state_flush_notify(struct nouveau_channel *chan);
 extern struct nv30_state_entry nv30_state_rasterizer;
 extern struct nv30_state_entry nv30_state_scissor;
 extern struct nv30_state_entry nv30_state_stipple;
index 621b8846c8ed3835c97fd809ddfbe0dcba208e26..ac52d946f021232356e8912b47041080fe19d049 100644 (file)
@@ -41,7 +41,7 @@ nv30_state_emit(struct nv30_context *nv30)
        struct nouveau_channel *chan = nv30->screen->base.channel;
        struct nv30_state *state = &nv30->state;
        struct nv30_screen *screen = nv30->screen;
-       unsigned i, samplers;
+       unsigned i;
        uint64_t states;
 
        if (nv30->pctx_id != screen->cur_pctx) {
@@ -63,6 +63,14 @@ nv30_state_emit(struct nv30_context *nv30)
        }
 
        state->dirty = 0;
+}
+
+void
+nv30_state_flush_notify(struct nouveau_channel *chan)
+{
+       struct nv30_context *nv30 = chan->user_private;
+       struct nv30_state *state = &nv30->state;
+       unsigned i, samplers;
 
        so_emit_reloc_markers(chan, state->hw[NV30_STATE_FB]);
        for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
index 7f008274a4e301d5621234853afbfb036e51bb3e..eb9cce4c786718b45708de45a47eb83e2d8538a3 100644 (file)
@@ -58,6 +58,9 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id)
        nv40->pipe.is_texture_referenced = nouveau_is_texture_referenced;
        nv40->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
 
+       screen->base.channel->user_private = nv40;
+       screen->base.channel->flush_notify = nv40_state_flush_notify;
+
        nv40_init_query_functions(nv40);
        nv40_init_surface_functions(nv40);
        nv40_init_state_functions(nv40);
index a3d594167aad1c668c93ff74a85996220397a658..cf33b64a86dbb91b058bafce61522e0ffcbbbe54 100644 (file)
@@ -204,6 +204,7 @@ extern void nv40_fragtex_bind(struct nv40_context *);
 extern boolean nv40_state_validate(struct nv40_context *nv40);
 extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40);
 extern void nv40_state_emit(struct nv40_context *nv40);
+extern void nv40_state_flush_notify(struct nouveau_channel *chan);
 extern struct nv40_state_entry nv40_state_rasterizer;
 extern struct nv40_state_entry nv40_state_scissor;
 extern struct nv40_state_entry nv40_state_stipple;
index 198692965dc4ad3d482c2826161af03c04457524..ba0fbcb26a995585914ea7dafd4679e823f1a1e1 100644 (file)
@@ -57,7 +57,7 @@ nv40_state_emit(struct nv40_context *nv40)
        struct nouveau_channel *chan = nv40->screen->base.channel;
        struct nv40_state *state = &nv40->state;
        struct nv40_screen *screen = nv40->screen;
-       unsigned i, samplers;
+       unsigned i;
        uint64_t states;
 
        if (nv40->pctx_id != screen->cur_pctx) {
@@ -87,6 +87,14 @@ nv40_state_emit(struct nv40_context *nv40)
        }
 
        state->dirty = 0;
+}
+
+void
+nv40_state_flush_notify(struct nouveau_channel *chan)
+{
+       struct nv40_context *nv40 = chan->user_private;
+       struct nv40_state *state = &nv40->state;
+       unsigned i, samplers;
 
        so_emit_reloc_markers(chan, state->hw[NV40_STATE_FB]);
        for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
index 79135f2f3628f0e28d0bdcf37177f452b220a6ed..5578a5838fb4e32254129907b7550c6c44112c71 100644 (file)
@@ -126,7 +126,7 @@ struct nv50_state {
        unsigned viewport_bypass;
        struct nouveau_stateobj *tsc_upload;
        struct nouveau_stateobj *tic_upload;
-       unsigned miptree_nr;
+       unsigned miptree_nr[PIPE_SHADER_TYPES];
        struct nouveau_stateobj *vertprog;
        struct nouveau_stateobj *fragprog;
        struct nouveau_stateobj *programs;
@@ -162,10 +162,10 @@ struct nv50_context {
        unsigned vtxbuf_nr;
        struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
        unsigned vtxelt_nr;
-       struct nv50_sampler_stateobj *sampler[PIPE_MAX_SAMPLERS];
-       unsigned sampler_nr;
-       struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS];
-       unsigned miptree_nr;
+       struct nv50_sampler_stateobj *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+       unsigned sampler_nr[PIPE_SHADER_TYPES];
+       struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+       unsigned miptree_nr[PIPE_SHADER_TYPES];
 
        uint16_t vbo_fifo;
 };
@@ -218,7 +218,7 @@ extern void nv50_state_flush_notify(struct nouveau_channel *chan);
 extern void nv50_so_init_sifc(struct nv50_context *nv50,
                              struct nouveau_stateobj *so,
                              struct nouveau_bo *bo, unsigned reloc,
-                             unsigned size);
+                             unsigned offset, unsigned size);
 
 /* nv50_tex.c */
 extern void nv50_tex_validate(struct nv50_context *);
index 40ee6659993b566439eb62d1c3ecda3eeb3e0669..9e083b662dd3d08e4dccb52136e4a87f5ae0aec3 100644 (file)
@@ -55,6 +55,20 @@ get_tile_mode(unsigned ny, unsigned d)
        return tile_mode | 0x10;
 }
 
+static INLINE unsigned
+get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned nb_h)
+{
+       unsigned tile_h = get_tile_height(tile_mode);
+       unsigned tile_d = get_tile_depth(tile_mode);
+
+       /* pitch_2d == to next slice within this volume-tile */
+       /* pitch_3d == size (in bytes) of a volume-tile */
+       unsigned pitch_2d = tile_h * 64;
+       unsigned pitch_3d = tile_d * align(nb_h, tile_h) * pitch;
+
+       return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d;
+}
+
 static struct pipe_texture *
 nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 {
@@ -130,6 +144,8 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
                                  mt->level[0].tile_mode, tile_flags,
                                  &mt->base.bo);
        if (ret) {
+               for (l = 0; l < pt->last_level; ++l)
+                       FREE(mt->level[l].image_offset);
                FREE(mt);
                return NULL;
        }
@@ -169,6 +185,10 @@ static void
 nv50_miptree_destroy(struct pipe_texture *pt)
 {
        struct nv50_miptree *mt = nv50_miptree(pt);
+       unsigned l;
+
+       for (l = 0; l < pt->last_level; ++l)
+               FREE(mt->level[l].image_offset);
 
        nouveau_bo_ref(NULL, &mt->base.bo);
        FREE(mt);
@@ -182,15 +202,10 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
        struct nv50_miptree *mt = nv50_miptree(pt);
        struct nv50_miptree_level *lvl = &mt->level[level];
        struct pipe_surface *ps;
-       int img;
+       unsigned img = 0;
 
        if (pt->target == PIPE_TEXTURE_CUBE)
                img = face;
-       else
-       if (pt->target == PIPE_TEXTURE_3D)
-               img = zslice;
-       else
-               img = 0;
 
        ps = CALLOC_STRUCT(pipe_surface);
        if (!ps)
@@ -206,6 +221,12 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
        ps->zslice = zslice;
        ps->offset = lvl->image_offset[img];
 
+       if (pt->target == PIPE_TEXTURE_3D) {
+               unsigned nb_h = pf_get_nblocksy(pt->format, ps->height);
+               ps->offset += get_zslice_offset(lvl->tile_mode, zslice,
+                                               lvl->pitch, nb_h);
+       }
+
        return ps;
 }
 
index f0fe7e616844bac6b3d03bfad6fd8f629007bb7f..e496cf4cad84ef0774fad5d14efe7af2057dd693 100644 (file)
@@ -119,7 +119,7 @@ struct nv50_pc {
        struct nv50_reg *param;
        int param_nr;
        struct nv50_reg *immd;
-       float *immd_buf;
+       uint32_t *immd_buf;
        int immd_nr;
        struct nv50_reg **addr;
        int addr_nr;
@@ -131,6 +131,9 @@ struct nv50_pc {
        struct nv50_reg *r_brdc;
        struct nv50_reg *r_dst[4];
 
+       struct nv50_reg reg_instances[16];
+       unsigned reg_instance_nr;
+
        unsigned interp_mode[32];
        /* perspective interpolation registers */
        struct nv50_reg *iv_p;
@@ -150,6 +153,20 @@ struct nv50_pc {
        boolean allow32;
 };
 
+static INLINE struct nv50_reg *
+reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
+{
+       struct nv50_reg *ri;
+
+       assert(pc->reg_instance_nr < 16);
+       ri = &pc->reg_instances[pc->reg_instance_nr++];
+       if (reg) {
+               *ri = *reg;
+               reg->mod = 0;
+       }
+       return ri;
+}
+
 static INLINE void
 ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
 {
@@ -342,25 +359,34 @@ static void
 kill_temp_temp(struct nv50_pc *pc)
 {
        int i;
-       
+
        for (i = 0; i < pc->temp_temp_nr; i++)
                free_temp(pc, pc->temp_temp[i]);
        pc->temp_temp_nr = 0;
 }
 
 static int
-ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
+ctor_immd_4u32(struct nv50_pc *pc,
+              uint32_t x, uint32_t y, uint32_t z, uint32_t w)
 {
-       pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * 4 * sizeof(float)),
-                              (pc->immd_nr + 1) * 4 * sizeof(float));
+       unsigned size = pc->immd_nr * 4 * sizeof(uint32_t);
+
+       pc->immd_buf = REALLOC(pc->immd_buf, size, size + 4 * sizeof(uint32_t));
+
        pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
        pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
        pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
        pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
-       
+
        return pc->immd_nr++;
 }
 
+static INLINE int
+ctor_immd_4f32(struct nv50_pc *pc, float x, float y, float z, float w)
+{
+       return ctor_immd_4u32(pc, fui(x), fui(y), fui(z), fui(w));
+}
+
 static struct nv50_reg *
 alloc_immd(struct nv50_pc *pc, float f)
 {
@@ -368,11 +394,11 @@ alloc_immd(struct nv50_pc *pc, float f)
        unsigned hw;
 
        for (hw = 0; hw < pc->immd_nr * 4; hw++)
-               if (pc->immd_buf[hw] == f)
+               if (pc->immd_buf[hw] == fui(f))
                        break;
 
        if (hw == pc->immd_nr * 4)
-               hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4;
+               hw = ctor_immd_4f32(pc, f, -f, 0.5 * f, 0) * 4;
 
        ctor_reg(r, P_IMMD, -1, hw);
        return r;
@@ -464,22 +490,24 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
 static INLINE void
 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
 {
-       unsigned val;
-       float f = pc->immd_buf[imm->hw];
+       union {
+               float f;
+               uint32_t ui;
+       } u;
+       u.ui = pc->immd_buf[imm->hw];
 
-       if (imm->mod & NV50_MOD_ABS)
-               f = fabsf(f);
-       val = fui((imm->mod & NV50_MOD_NEG) ? -f : f);
+       u.f = (imm->mod & NV50_MOD_ABS) ? fabsf(u.f) : u.f;
+       u.f = (imm->mod & NV50_MOD_NEG) ? -u.f : u.f;
 
        set_long(pc, e);
-       /*XXX: can't be predicated - bits overlap.. catch cases where both
-        *     are required and avoid them. */
+       /* XXX: can't be predicated - bits overlap; cases where both
+        * are required should be avoided by using pc->allow32 */
        set_pred(pc, 0, 0, e);
        set_pred_wr(pc, 0, 0, e);
 
        e->inst[1] |= 0x00000002 | 0x00000001;
-       e->inst[0] |= (val & 0x3f) << 16;
-       e->inst[1] |= (val >> 6) << 2;
+       e->inst[0] |= (u.ui & 0x3f) << 16;
+       e->inst[1] |= (u.ui >> 6) << 2;
 }
 
 static INLINE void
@@ -644,7 +672,7 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
        if (src->type == P_IMMD || src->type == P_CONST) {
                set_long(pc, e);
                set_data(pc, src, 0x7f, 9, e);
-               e->inst[1] |= 0x20000000; /* src0 const? */
+               e->inst[1] |= 0x20000000; /* mov from c[] */
        } else {
                if (src->type == P_ATTR) {
                        set_long(pc, e);
@@ -659,9 +687,9 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 
        if (is_long(e) && !is_immd(e)) {
                e->inst[1] |= 0x04000000; /* 32-bit */
-               e->inst[1] |= 0x0000c000; /* "subsubop" 0x3 */
+               e->inst[1] |= 0x0000c000; /* 32-bit c[] load / lane mask 0:1 */
                if (!(e->inst[1] & 0x20000000))
-                       e->inst[1] |= 0x00030000; /* "subsubop" 0xf */
+                       e->inst[1] |= 0x00030000; /* lane mask 2:3 */
        } else
                e->inst[0] |= 0x00008000;
 
@@ -676,6 +704,17 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
        FREE(imm);
 }
 
+static void
+emit_nop(struct nv50_pc *pc)
+{
+       struct nv50_program_exec *e = exec(pc);
+
+       e->inst[0] = 0xf0000000;
+       set_long(pc, e);
+       e->inst[1] = 0xe0000000;
+       emit(pc, e);
+}
+
 static boolean
 check_swap_src_0_1(struct nv50_pc *pc,
                   struct nv50_reg **s0, struct nv50_reg **s1)
@@ -794,6 +833,33 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
        e->inst[1] |= ((src->hw & 127) << 14);
 }
 
+static void
+emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred)
+{
+       struct nv50_program_exec *e = exec(pc);
+
+       assert(dst->type == P_TEMP);
+       e->inst[1] = 0x20000000 | (pred << 12);
+       set_long(pc, e);
+       set_dst(pc, dst, e);
+
+       emit(pc, e);
+}
+
+static void
+emit_mov_to_pred(struct nv50_pc *pc, int pred, struct nv50_reg *src)
+{
+       struct nv50_program_exec *e = exec(pc);
+
+       e->inst[0] = 0x000001fc;
+       e->inst[1] = 0xa0000008;
+       set_long(pc, e);
+       set_pred_wr(pc, 1, pred, e);
+       set_src_0_restricted(pc, src, e);
+
+       emit(pc, e);
+}
+
 static void
 emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
         struct nv50_reg *src1)
@@ -898,7 +964,6 @@ static INLINE void
 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
         struct nv50_reg *src1)
 {
-       assert(src0 != src1);
        src1->mod ^= NV50_MOD_NEG;
        emit_add(pc, dst, src0, src1);
        src1->mod ^= NV50_MOD_NEG;
@@ -967,7 +1032,6 @@ static INLINE void
 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
         struct nv50_reg *src1, struct nv50_reg *src2)
 {
-       assert(src2 != src0 && src2 != src1);
        src2->mod ^= NV50_MOD_NEG;
        emit_mad(pc, dst, src0, src1, src2);
        src2->mod ^= NV50_MOD_NEG;
@@ -1257,9 +1321,68 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
        emit(pc, e);
 }
 
+static struct nv50_program_exec *
+emit_branch(struct nv50_pc *pc, int pred, unsigned cc,
+           struct nv50_program_exec **join)
+{
+       struct nv50_program_exec *e = exec(pc);
+
+       if (join) {
+               set_long(pc, e);
+               e->inst[0] |= 0xa0000002;
+               emit(pc, e);
+               *join = e;
+               e = exec(pc);
+       }
+
+       set_long(pc, e);
+       e->inst[0] |= 0x10000002;
+       if (pred >= 0)
+               set_pred(pc, cc, pred, e);
+       emit(pc, e);
+       return pc->p->exec_tail;
+}
+
+#define QOP_ADD 0
+#define QOP_SUBR 1
+#define QOP_SUB 2
+#define QOP_MOV_SRC1 3
+
+/* For a quad of threads / top left, top right, bottom left, bottom right
+ * pixels, do a different operation, and take src0 from a specific thread.
+ */
+static void
+emit_quadop(struct nv50_pc *pc, struct nv50_reg *dst, int wp, int lane_src0,
+           struct nv50_reg *src0, struct nv50_reg *src1, ubyte qop)
+{
+       struct nv50_program_exec *e = exec(pc);
+
+       e->inst[0] = 0xc0000000;
+       e->inst[1] = 0x80000000;
+       set_long(pc, e);
+       e->inst[0] |= lane_src0 << 16;
+       set_src_0(pc, src0, e);
+       set_src_2(pc, src1, e);
+
+       if (wp >= 0)
+              set_pred_wr(pc, 1, wp, e);
+
+       if (dst)
+              set_dst(pc, dst, e);
+       else {
+              e->inst[0] |= 0x000001fc;
+              e->inst[1] |= 0x00000008;
+       }
+
+       e->inst[0] |= (qop & 3) << 20;
+       e->inst[1] |= (qop >> 2) << 22;
+
+       emit(pc, e);
+}
+
 static void
 load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
-                    struct nv50_reg **src, boolean proj)
+                    struct nv50_reg **src, unsigned arg, boolean proj)
 {
        int mod[3] = { src[0]->mod, src[1]->mod, src[2]->mod };
 
@@ -1276,6 +1399,10 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
 
        if (proj && 0 /* looks more correct without this */)
                emit_mul(pc, t[2], t[2], src[3]);
+       else
+       if (arg == 4) /* there is no textureProj(samplerCubeShadow) */
+               emit_mov(pc, t[3], src[3]);
+
        emit_flop(pc, 0, t[2], t[2]);
 
        emit_mul(pc, t[0], src[0], t[2]);
@@ -1284,89 +1411,214 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
 }
 
 static void
-emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
-        struct nv50_reg **src, unsigned unit, unsigned type, boolean proj)
+load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
+                    struct nv50_reg **src, unsigned dim, unsigned arg)
 {
-       struct nv50_reg *t[4];
-       struct nv50_program_exec *e;
+       unsigned c, mode;
+
+       if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
+               mode = pc->interp_mode[src[0]->index] | INTERP_PERSPECTIVE;
 
-       unsigned c, mode, dim;
+               t[3]->rhw = src[3]->rhw;
+               emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
+               emit_flop(pc, 0, t[3], t[3]);
 
+               for (c = 0; c < dim; ++c) {
+                       t[c]->rhw = src[c]->rhw;
+                       emit_interp(pc, t[c], t[3], mode);
+               }
+               if (arg != dim) { /* depth reference value */
+                       t[dim]->rhw = src[2]->rhw;
+                       emit_interp(pc, t[dim], t[3], mode);
+               }
+       } else {
+               /* XXX: for some reason the blob sometimes uses MAD
+                * (mad f32 $rX $rY $rZ neg $r63)
+                */
+               emit_flop(pc, 0, t[3], src[3]);
+               for (c = 0; c < dim; ++c)
+                       emit_mul(pc, t[c], src[c], t[3]);
+               if (arg != dim) /* depth reference value */
+                       emit_mul(pc, t[dim], src[2], t[3]);
+       }
+}
+
+static INLINE void
+get_tex_dim(unsigned type, unsigned *dim, unsigned *arg)
+{
        switch (type) {
        case TGSI_TEXTURE_1D:
-               dim = 1;
+               *arg = *dim = 1;
+               break;
+       case TGSI_TEXTURE_SHADOW1D:
+               *dim = 1;
+               *arg = 2;
                break;
        case TGSI_TEXTURE_UNKNOWN:
        case TGSI_TEXTURE_2D:
-       case TGSI_TEXTURE_SHADOW1D: /* XXX: x, z */
        case TGSI_TEXTURE_RECT:
-               dim = 2;
+               *arg = *dim = 2;
+               break;
+       case TGSI_TEXTURE_SHADOW2D:
+       case TGSI_TEXTURE_SHADOWRECT:
+               *dim = 2;
+               *arg = 3;
                break;
        case TGSI_TEXTURE_3D:
        case TGSI_TEXTURE_CUBE:
-       case TGSI_TEXTURE_SHADOW2D:
-       case TGSI_TEXTURE_SHADOWRECT: /* XXX */
-               dim = 3;
+               *dim = *arg = 3;
                break;
        default:
                assert(0);
                break;
        }
+}
 
-       /* some cards need t[0]'s hw index to be a multiple of 4 */
-       alloc_temp4(pc, t, 0);
+/* We shouldn't execute TEXLOD if any of the pixels in a quad have
+ * different LOD values, so branch off groups of equal LOD.
+ */
+static void
+emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod,
+                    struct nv50_reg *src, struct nv50_program_exec *tex)
+{
+       struct nv50_program_exec *join_at;
+       unsigned i, target = pc->p->exec_size + 7 * 2;
 
-       if (type == TGSI_TEXTURE_CUBE) {
-               load_cube_tex_coords(pc, t, src, proj);
-       } else
-       if (proj) {
-               if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
-                       mode = pc->interp_mode[src[0]->index];
-
-                       t[3]->rhw = src[3]->rhw;
-                       emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
-                       emit_flop(pc, 0, t[3], t[3]);
-
-                       for (c = 0; c < dim; c++) {
-                               t[c]->rhw = src[c]->rhw;
-                               emit_interp(pc, t[c], t[3],
-                                           (mode | INTERP_PERSPECTIVE));
-                       }
-               } else {
-                       emit_flop(pc, 0, t[3], src[3]);
-                       for (c = 0; c < dim; c++)
-                               emit_mul(pc, t[c], src[c], t[3]);
+       /* Subtract lod of each pixel from lod of top left pixel, jump
+        * texlod insn if result is 0, then repeat for 2 other pixels.
+        */
+       emit_quadop(pc, NULL, 0, 0, tlod, tlod, 0x55);
+       emit_branch(pc, 0, 2, &join_at)->param.index = target;
 
-                       /* XXX: for some reason the blob sometimes uses MAD:
-                        * emit_mad(pc, t[c], src[0][c], t[3], t[3])
-                        * pc->p->exec_tail->inst[1] |= 0x080fc000;
-                        */
+       for (i = 1; i < 4; ++i) {
+               emit_quadop(pc, NULL, 0, i, tlod, tlod, 0x55);
+               emit_branch(pc, 0, 2, NULL)->param.index = target;
+       }
+
+       emit_mov(pc, tlod, src); /* target */
+       emit(pc, tex); /* texlod */
+
+       join_at->param.index = target + 2 * 2;
+       emit_nop(pc);
+       pc->p->exec_tail->inst[1] |= 2; /* join _after_ tex */
+}
+
+static void
+emit_texbias_sequence(struct nv50_pc *pc, struct nv50_reg *t[4], unsigned arg,
+                     struct nv50_program_exec *tex)
+{
+       struct nv50_program_exec *e;
+       struct nv50_reg imm_1248, *t123[4][4], *r_bits = alloc_temp(pc, NULL);
+       int r_pred = 0;
+       unsigned n, c, i, cc[4] = { 0x0a, 0x13, 0x11, 0x10 };
+
+       pc->allow32 = FALSE;
+       ctor_reg(&imm_1248, P_IMMD, -1, ctor_immd_4u32(pc, 1, 2, 4, 8) * 4);
+
+       /* Subtract bias value of thread i from bias values of each thread,
+        * store result in r_pred, and set bit i in r_bits if result was 0.
+        */
+       assert(arg < 4);
+       for (i = 0; i < 4; ++i, ++imm_1248.hw) {
+               emit_quadop(pc, NULL, r_pred, i, t[arg], t[arg], 0x55);
+               emit_mov(pc, r_bits, &imm_1248);
+               set_pred(pc, 2, r_pred, pc->p->exec_tail);
+       }
+       emit_mov_to_pred(pc, r_pred, r_bits);
+
+       /* The lanes of a quad are now grouped by the bit in r_pred they have
+        * set. Put the input values for TEX into a new register set for each
+        * group and execute TEX only for a specific group.
+        * We cannot use the same register set for each group because we need
+        * the derivatives, which are implicitly calculated, to be correct.
+        */
+       for (i = 1; i < 4; ++i) {
+               alloc_temp4(pc, t123[i], 0);
+
+               for (c = 0; c <= arg; ++c)
+                       emit_mov(pc, t123[i][c], t[c]);
+
+               *(e = exec(pc)) = *(tex);
+               e->inst[0] &= ~0x01fc;
+               set_dst(pc, t123[i][0], e);
+               set_pred(pc, cc[i], r_pred, e);
+               emit(pc, e);
+       }
+       /* finally TEX on the original regs (where we kept the input) */
+       set_pred(pc, cc[0], r_pred, tex);
+       emit(pc, tex);
+
+       /* put the 3 * n other results into regs for lane 0 */
+       n = popcnt4(((e->inst[0] >> 25) & 0x3) | ((e->inst[1] >> 12) & 0xc));
+       for (i = 1; i < 4; ++i) {
+               for (c = 0; c < n; ++c) {
+                       emit_mov(pc, t[c], t123[i][c]);
+                       set_pred(pc, cc[i], r_pred, pc->p->exec_tail);
                }
-       } else {
-               for (c = 0; c < dim; c++)
-                       emit_mov(pc, t[c], src[c]);
+               free_temp4(pc, t123[i]);
        }
 
+       emit_nop(pc);
+       free_temp(pc, r_bits);
+}
+
+static void
+emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
+        struct nv50_reg **src, unsigned unit, unsigned type,
+        boolean proj, int bias_lod)
+{
+       struct nv50_reg *t[4];
+       struct nv50_program_exec *e;
+       unsigned c, dim, arg;
+
+       /* t[i] must be within a single 128 bit super-reg */
+       alloc_temp4(pc, t, 0);
+
        e = exec(pc);
+       e->inst[0] = 0xf0000000;
        set_long(pc, e);
-       e->inst[0] |= 0xf0000000;
-       e->inst[1] |= 0x00000004;
        set_dst(pc, t[0], e);
-       e->inst[0] |= (unit << 9);
 
-       if (dim == 2)
-               e->inst[0] |= 0x00400000;
-       else
-       if (dim == 3) {
-               e->inst[0] |= 0x00800000;
-               if (type == TGSI_TEXTURE_CUBE)
-                       e->inst[0] |= 0x08000000;
+       /* TIC and TSC binding indices (TSC is ignored as TSC_LINKED = TRUE): */
+       e->inst[0] |= (unit << 9) /* | (unit << 17) */;
+
+       /* live flag (don't set if TEX results affect input to another TEX): */
+       /* e->inst[0] |= 0x00000004; */
+
+       get_tex_dim(type, &dim, &arg);
+
+       if (type == TGSI_TEXTURE_CUBE) {
+               e->inst[0] |= 0x08000000;
+               load_cube_tex_coords(pc, t, src, arg, proj);
+       } else
+       if (proj)
+               load_proj_tex_coords(pc, t, src, dim, arg);
+       else {
+               for (c = 0; c < dim; c++)
+                       emit_mov(pc, t[c], src[c]);
+               if (arg != dim) /* depth reference value (always src.z here) */
+                       emit_mov(pc, t[dim], src[2]);
        }
 
        e->inst[0] |= (mask & 0x3) << 25;
        e->inst[1] |= (mask & 0xc) << 12;
 
-       emit(pc, e);
+       if (!bias_lod) {
+               e->inst[0] |= (arg - 1) << 22;
+               emit(pc, e);
+       } else
+       if (bias_lod < 0) {
+               e->inst[0] |= arg << 22;
+               e->inst[1] |= 0x20000000; /* texbias */
+               emit_mov(pc, t[arg], src[3]);
+               emit_texbias_sequence(pc, t, arg, e);
+       } else {
+               e->inst[0] |= arg << 22;
+               e->inst[1] |= 0x40000000; /* texlod */
+               emit_mov(pc, t[arg], src[3]);
+               emit_texlod_sequence(pc, t[arg], src[3], e);
+       }
+
 #if 1
        c = 0;
        if (mask & 1) emit_mov(pc, dst[0], t[c++]);
@@ -1388,38 +1640,6 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 #endif
 }
 
-static void
-emit_branch(struct nv50_pc *pc, int pred, unsigned cc,
-           struct nv50_program_exec **join)
-{
-       struct nv50_program_exec *e = exec(pc);
-
-       if (join) {
-               set_long(pc, e);
-               e->inst[0] |= 0xa0000002;
-               emit(pc, e);
-               *join = e;
-               e = exec(pc);
-       }
-
-       set_long(pc, e);
-       e->inst[0] |= 0x10000002;
-       if (pred >= 0)
-               set_pred(pc, cc, pred, e);
-       emit(pc, e);
-}
-
-static void
-emit_nop(struct nv50_pc *pc)
-{
-       struct nv50_program_exec *e = exec(pc);
-
-       e->inst[0] = 0xf0000000;
-       set_long(pc, e);
-       e->inst[1] = 0xe0000000;
-       emit(pc, e);
-}
-
 static void
 emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
@@ -1515,8 +1735,6 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
 static boolean
 negate_supported(const struct tgsi_full_instruction *insn, int i)
 {
-       int s;
-
        switch (insn->Instruction.Opcode) {
        case TGSI_OPCODE_DDY:
        case TGSI_OPCODE_DP3:
@@ -1526,29 +1744,14 @@ negate_supported(const struct tgsi_full_instruction *insn, int i)
        case TGSI_OPCODE_ADD:
        case TGSI_OPCODE_SUB:
        case TGSI_OPCODE_MAD:
-               break;
+               return TRUE;
        case TGSI_OPCODE_POW:
                if (i == 1)
-                       break;
+                       return TRUE;
                return FALSE;
        default:
                return FALSE;
        }
-
-       /* Watch out for possible multiple uses of an nv50_reg, we
-        * can't use nv50_reg::neg in these cases.
-        */
-       for (s = 0; s < insn->Instruction.NumSrcRegs; ++s) {
-               if (s == i)
-                       continue;
-               if ((insn->Src[s].Register.Index ==
-                    insn->Src[i].Register.Index) &&
-                   (insn->Src[s].Register.File ==
-                    insn->Src[i].Register.File))
-                       return FALSE;
-       }
-
-       return TRUE;
 }
 
 /* Return a read mask for source registers deduced from opcode & write mask. */
@@ -1576,9 +1779,13 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
        case TGSI_OPCODE_RSQ:
        case TGSI_OPCODE_SCS:
                return 0x1;
+       case TGSI_OPCODE_IF:
+               return 0x1;
        case TGSI_OPCODE_LIT:
                return 0xb;
        case TGSI_OPCODE_TEX:
+       case TGSI_OPCODE_TXB:
+       case TGSI_OPCODE_TXL:
        case TGSI_OPCODE_TXP:
        {
                const struct tgsi_instruction_texture *tex;
@@ -1587,13 +1794,17 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
                tex = &insn->Texture;
 
                mask = 0x7;
-               if (insn->Instruction.Opcode == TGSI_OPCODE_TXP)
-                       mask |= 0x8;
+               if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
+                   insn->Instruction.Opcode != TGSI_OPCODE_TXD)
+                       mask |= 0x8; /* bias, lod or proj */
 
                switch (tex->Texture) {
                case TGSI_TEXTURE_1D:
                        mask &= 0x9;
                        break;
+               case TGSI_TEXTURE_SHADOW1D:
+                       mask &= 0x5;
+                       break;
                case TGSI_TEXTURE_2D:
                        mask &= 0xb;
                        break;
@@ -1676,7 +1887,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
                        /* Indicate indirection by setting r->acc < 0 and
                         * use the index field to select the address reg.
                         */
-                       r = MALLOC_STRUCT(nv50_reg);
+                       r = reg_instance(pc, NULL);
                        swz = tgsi_util_get_src_register_swizzle(
                                                 &src->Indirect, 0);
                        ctor_reg(r, P_CONST,
@@ -1730,6 +1941,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
                break;
        }
 
+       if (r && r->acc >= 0 && r != temp)
+               return reg_instance(pc, r);
        return r;
 }
 
@@ -1785,6 +1998,8 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c)
        case TGSI_OPCODE_LIT:
        case TGSI_OPCODE_SCS:
        case TGSI_OPCODE_TEX:
+       case TGSI_OPCODE_TXB:
+       case TGSI_OPCODE_TXL:
        case TGSI_OPCODE_TXP:
                /* these take care of dangerous swizzles themselves */
                return 0x0;
@@ -2187,11 +2402,19 @@ nv50_program_tx_insn(struct nv50_pc *pc,
                break;
        case TGSI_OPCODE_TEX:
                emit_tex(pc, dst, mask, src[0], unit,
-                        inst->Texture.Texture, FALSE);
+                        inst->Texture.Texture, FALSE, 0);
+               break;
+       case TGSI_OPCODE_TXB:
+               emit_tex(pc, dst, mask, src[0], unit,
+                        inst->Texture.Texture, FALSE, -1);
+               break;
+       case TGSI_OPCODE_TXL:
+               emit_tex(pc, dst, mask, src[0], unit,
+                        inst->Texture.Texture, FALSE, 1);
                break;
        case TGSI_OPCODE_TXP:
                emit_tex(pc, dst, mask, src[0], unit,
-                        inst->Texture.Texture, TRUE);
+                        inst->Texture.Texture, TRUE, 0);
                break;
        case TGSI_OPCODE_TRUNC:
                for (c = 0; c < 4; c++) {
@@ -2245,20 +2468,9 @@ nv50_program_tx_insn(struct nv50_pc *pc,
                }
        }
 
-       for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-               for (c = 0; c < 4; c++) {
-                       if (!src[i][c])
-                               continue;
-                       src[i][c]->mod = 0;
-                       if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
-                               FREE(src[i][c]);
-                       else
-                       if (src[i][c]->acc < 0 && src[i][c]->type == P_CONST)
-                               FREE(src[i][c]); /* indirect constant */
-               }
-       }
-
        kill_temp_temp(pc);
+       pc->reg_instance_nr = 0;
+
        return TRUE;
 }
 
@@ -2541,10 +2753,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
                        const struct tgsi_full_immediate *imm =
                                &tp.FullToken.FullImmediate;
 
-                       ctor_immd(pc, imm->u[0].Float,
-                                     imm->u[1].Float,
-                                     imm->u[2].Float,
-                                     imm->u[3].Float);
+                       ctor_immd_4f32(pc, imm->u[0].Float,
+                                      imm->u[1].Float,
+                                      imm->u[2].Float,
+                                      imm->u[3].Float);
                }
                        break;
                case TGSI_TOKEN_TYPE_DECLARATION:
@@ -3024,7 +3236,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
 }
 
 static void
-nv50_program_upload_data(struct nv50_context *nv50, float *map,
+nv50_program_upload_data(struct nv50_context *nv50, uint32_t *map,
                        unsigned start, unsigned count, unsigned cbuf)
 {
        struct nouveau_channel *chan = nv50->screen->base.channel;
@@ -3072,8 +3284,8 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
 
        if (p->param_nr) {
                unsigned cb;
-               float *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
-                                            PIPE_BUFFER_USAGE_CPU_READ);
+               uint32_t *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
+                                               PIPE_BUFFER_USAGE_CPU_READ);
 
                if (p->type == PIPE_SHADER_VERTEX)
                        cb = NV50_CB_PVP;
index 255c7c737ef823da68c39d238a6db51bb6e82df9..4a90c372ce34610c5eee7f04dd60ca811626f784 100644 (file)
@@ -37,7 +37,7 @@ struct nv50_program {
 
        struct nouveau_bo *bo;
 
-       float *immd;
+       uint32_t *immd;
        unsigned immd_nr;
        unsigned param_nr;
 
index 5305c93d59ab15de76094c8a14e667d8d2b335a2..268c9823f7d8887ceab9b3022662d61a6c372da8 100644 (file)
@@ -93,7 +93,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
        struct nouveau_grobj *tesla = nv50->screen->tesla;
        struct nv50_query *q = nv50_query(pq);
 
-       WAIT_RING (chan, 5);
+       MARK_RING (chan, 5, 2); /* flush on lack of space or relocs */
        BEGIN_RING(chan, tesla, NV50TCL_QUERY_ADDRESS_HIGH, 4);
        OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
        OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
index e1b2f11239a4e1cc3859c98d112d3b59818878ce..d443ca3ad06905a714816da5425308aa164a2548 100644 (file)
@@ -76,6 +76,7 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
                case PIPE_FORMAT_DXT3_RGBA:
                case PIPE_FORMAT_DXT5_RGBA:
                case PIPE_FORMAT_Z24S8_UNORM:
+               case PIPE_FORMAT_S8Z24_UNORM:
                case PIPE_FORMAT_Z32_FLOAT:
                case PIPE_FORMAT_R16G16B16A16_SNORM:
                case PIPE_FORMAT_R16G16B16A16_UNORM:
@@ -97,6 +98,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
        switch (param) {
        case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
                return 32;
+       case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+               return 32;
+       case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+               return 64;
        case PIPE_CAP_NPOT_TEXTURES:
                return 1;
        case PIPE_CAP_TWO_SIDED_STENCIL:
@@ -122,8 +127,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
        case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
        case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
                return 1;
-       case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
-               return 0;
        case PIPE_CAP_TGSI_CONT_SUPPORTED:
                return 0;
        case PIPE_CAP_BLEND_EQUATION_SEPARATE:
@@ -315,6 +318,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        so_method(so, screen->tesla, 0x1400, 1);
        so_data  (so, 0xf);
 
+       /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */
+       so_method(so, screen->tesla, 0x13b4, 1);
+       so_data  (so, 0x54);
        so_method(so, screen->tesla, 0x13bc, 1);
        so_data  (so, 0x54);
        /* origin is top left (set to 1 for bottom left) */
@@ -387,7 +393,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
        so_data  (so, 0x00000131 | (NV50_CB_PFP << 12));
 
-       ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tic);
+       ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+                            &screen->tic);
        if (ret) {
                nv50_screen_destroy(pscreen);
                return NULL;
@@ -398,9 +405,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
                  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
        so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
                  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-       so_data  (so, 0x000007ff);
+       so_data  (so, PIPE_SHADER_TYPES * 32 - 1);
 
-       ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tsc);
+       ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+                            &screen->tsc);
        if (ret) {
                nv50_screen_destroy(pscreen);
                return NULL;
@@ -411,7 +419,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
                  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
        so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
                  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-       so_data  (so, 0x00000000);
+       so_data  (so, 0x00000000); /* ignored if TSC_LINKED (0x1234) = 1 */
 
 
        /* Vertex array limits - max them out */
@@ -425,6 +433,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        so_data  (so, fui(0.0));
        so_data  (so, fui(1.0));
 
+       /* no dynamic combination of TIC & TSC entries => only BIND_TIC used */
        so_method(so, screen->tesla, 0x1234, 1);
        so_data  (so, 1);
 
index 07318f23947bc34ba33d37527e2729e885e8b18e..88aef52d08c6bb57218cea85a52d9af073581b7c 100644 (file)
@@ -196,8 +196,9 @@ nv50_sampler_state_create(struct pipe_context *pipe,
        }
 
        if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               tsc[0] |= (1 << 8);
-               tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7);
+               /* XXX: must be deactivated for non-shadow textures */
+               tsc[0] |= (1 << 9);
+               tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10;
        }
 
        limit = CLAMP(cso->lod_bias, -16.0, 15.0);
@@ -215,41 +216,66 @@ nv50_sampler_state_create(struct pipe_context *pipe,
        return (void *)sso;
 }
 
-static void
-nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+static INLINE void
+nv50_sampler_state_bind(struct pipe_context *pipe, unsigned type,
+                       unsigned nr, void **sampler)
 {
        struct nv50_context *nv50 = nv50_context(pipe);
-       int i;
 
-       nv50->sampler_nr = nr;
-       for (i = 0; i < nv50->sampler_nr; i++)
-               nv50->sampler[i] = sampler[i];
+       memcpy(nv50->sampler[type], sampler, nr * sizeof(void *));
 
+       nv50->sampler_nr[type] = nr;
        nv50->dirty |= NV50_NEW_SAMPLER;
 }
 
+static void
+nv50_vp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+       nv50_sampler_state_bind(pipe, PIPE_SHADER_VERTEX, nr, s);
+}
+
+static void
+nv50_fp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+       nv50_sampler_state_bind(pipe, PIPE_SHADER_FRAGMENT, nr, s);
+}
+
 static void
 nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
 {
        FREE(hwcso);
 }
 
-static void
-nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
-                        struct pipe_texture **pt)
+static INLINE void
+nv50_set_sampler_texture(struct pipe_context *pipe, unsigned type,
+                        unsigned nr, struct pipe_texture **pt)
 {
        struct nv50_context *nv50 = nv50_context(pipe);
-       int i;
+       unsigned i;
 
        for (i = 0; i < nr; i++)
-               pipe_texture_reference((void *)&nv50->miptree[i], pt[i]);
-       for (i = nr; i < nv50->miptree_nr; i++)
-               pipe_texture_reference((void *)&nv50->miptree[i], NULL);
+               pipe_texture_reference((void *)&nv50->miptree[type][i], pt[i]);
+       for (i = nr; i < nv50->miptree_nr[type]; i++)
+               pipe_texture_reference((void *)&nv50->miptree[type][i], NULL);
 
-       nv50->miptree_nr = nr;
+       nv50->miptree_nr[type] = nr;
        nv50->dirty |= NV50_NEW_TEXTURE;
 }
 
+static void
+nv50_set_vp_sampler_textures(struct pipe_context *pipe,
+                            unsigned nr, struct pipe_texture **pt)
+{
+       nv50_set_sampler_texture(pipe, PIPE_SHADER_VERTEX, nr, pt);
+}
+
+static void
+nv50_set_fp_sampler_textures(struct pipe_context *pipe,
+                            unsigned nr, struct pipe_texture **pt)
+{
+       nv50_set_sampler_texture(pipe, PIPE_SHADER_FRAGMENT, nr, pt);
+}
+
 static void *
 nv50_rasterizer_state_create(struct pipe_context *pipe,
                             const struct pipe_rasterizer_state *cso)
@@ -648,9 +674,11 @@ nv50_init_state_functions(struct nv50_context *nv50)
        nv50->pipe.delete_blend_state = nv50_blend_state_delete;
 
        nv50->pipe.create_sampler_state = nv50_sampler_state_create;
-       nv50->pipe.bind_fragment_sampler_states = nv50_sampler_state_bind;
        nv50->pipe.delete_sampler_state = nv50_sampler_state_delete;
-       nv50->pipe.set_fragment_sampler_textures = nv50_set_sampler_texture;
+       nv50->pipe.bind_fragment_sampler_states = nv50_fp_sampler_state_bind;
+       nv50->pipe.bind_vertex_sampler_states   = nv50_vp_sampler_state_bind;
+       nv50->pipe.set_fragment_sampler_textures = nv50_set_fp_sampler_textures;
+       nv50->pipe.set_vertex_sampler_textures   = nv50_set_vp_sampler_textures;
 
        nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create;
        nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind;
index c871acaab8deacafe238c991c9eebb44f5bd7092..871e8097b65a9650485c512946b4e6c1adad7dfa 100644 (file)
@@ -155,6 +155,30 @@ nv50_state_validate_fb(struct nv50_context *nv50)
        so_ref(NULL, &so);
 }
 
+static void
+nv50_validate_samplers(struct nv50_context *nv50, struct nouveau_stateobj *so,
+                      unsigned p)
+{
+       struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+       unsigned i, j, dw = nv50->sampler_nr[p] * 8;
+
+       if (!dw)
+               return;
+       nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM,
+                         p * (32 * 8 * 4), dw * 4);
+
+       so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), dw);
+
+       for (i = 0; i < nv50->sampler_nr[p]; ++i) {
+               if (nv50->sampler[p][i])
+                       so_datap(so, nv50->sampler[p][i]->tsc, 8);
+               else {
+                       for (j = 0; j < 8; ++j) /* you get punished */
+                               so_data(so, 0); /* ... for leaving holes */
+               }
+       }
+}
+
 static void
 nv50_state_emit(struct nv50_context *nv50)
 {
@@ -246,7 +270,6 @@ boolean
 nv50_state_validate(struct nv50_context *nv50)
 {
        struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nouveau_grobj *eng2d = nv50->screen->eng2d;
        struct nouveau_stateobj *so;
        unsigned i;
 
@@ -369,22 +392,16 @@ scissor_uptodate:
 viewport_uptodate:
 
        if (nv50->dirty & NV50_NEW_SAMPLER) {
-               unsigned i;
+               unsigned nr = 0;
 
-               so = so_new(nv50->sampler_nr * 9 + 23 + 4, 2);
+               for (i = 0; i < PIPE_SHADER_TYPES; ++i)
+                       nr += nv50->sampler_nr[i];
 
-               nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM,
-                                 nv50->sampler_nr * 8 * 4);
+               so = so_new(nr * 8 + 24 * PIPE_SHADER_TYPES + 2, 4);
 
-               for (i = 0; i < nv50->sampler_nr; i++) {
-                       if (!nv50->sampler[i])
-                               continue;
-                       so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8);
-                       so_datap (so, nv50->sampler[i]->tsc, 8);
-               }
+               nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
+               nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
 
-               so_method(so, tesla, 0x1440, 1); /* sync SIFC */
-               so_data  (so, 0);
                so_method(so, tesla, 0x1334, 1); /* flush TSC */
                so_data  (so, 0);
 
@@ -407,10 +424,13 @@ viewport_uptodate:
 
 void nv50_so_init_sifc(struct nv50_context *nv50,
                       struct nouveau_stateobj *so,
-                      struct nouveau_bo *bo, unsigned reloc, unsigned size)
+                      struct nouveau_bo *bo, unsigned reloc,
+                      unsigned offset, unsigned size)
 {
        struct nouveau_grobj *eng2d = nv50->screen->eng2d;
 
+       reloc |= NOUVEAU_BO_WR;
+
        so_method(so, eng2d, NV50_2D_DST_FORMAT, 2);
        so_data  (so, NV50_2D_DST_FORMAT_R8_UNORM);
        so_data  (so, 1);
@@ -418,8 +438,8 @@ void nv50_so_init_sifc(struct nv50_context *nv50,
        so_data  (so, 262144);
        so_data  (so, 65536);
        so_data  (so, 1);
-       so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_HIGH, 0, 0);
-       so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_LOW, 0, 0);
+       so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0);
+       so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0);
        so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2);
        so_data  (so, 0);
        so_data  (so, NV50_2D_SIFC_FORMAT_R8_UNORM);
index 6bf6f773b0c26b075282175c1d3292a1e3ddd753..79655fc08d5a6b782b33eadb5916d0a6697fed19 100644 (file)
@@ -62,6 +62,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
                return 1;
 
        if (!bo->tile_flags) {
+               MARK_RING (chan, 9, 2); /* flush on lack of space or relocs */
                BEGIN_RING(chan, eng2d, mthd, 2);
                OUT_RING  (chan, format);
                OUT_RING  (chan, 1);
@@ -72,6 +73,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
                OUT_RELOCh(chan, bo, ps->offset, flags);
                OUT_RELOCl(chan, bo, ps->offset, flags);
        } else {
+               MARK_RING (chan, 11, 2); /* flush on lack of space or relocs */
                BEGIN_RING(chan, eng2d, mthd, 5);
                OUT_RING  (chan, format);
                OUT_RING  (chan, 0);
index 417d367942284f44a587e2ffa3b19a4593ab1fa5..120aa6f362b0d75220668db90d78a129b484c0e1 100644 (file)
@@ -68,6 +68,7 @@ static const struct nv50_texture_format nv50_tex_format_list[] =
        _(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5),
 
        _MIXED(Z24S8_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8),
+       _MIXED(S8Z24_UNORM, UNORM, UINT, UINT, UINT, C0, C0, C0, ONE, 8_24),
 
        _(R16G16B16A16_SNORM, UNORM, C0, C1, C2, C3, 16_16_16_16),
        _(R16G16B16A16_UNORM, SNORM, C0, C1, C2, C3, 16_16_16_16),
@@ -85,7 +86,7 @@ static const struct nv50_texture_format nv50_tex_format_list[] =
 
 static int
 nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
-                  struct nv50_miptree *mt, int unit)
+                  struct nv50_miptree *mt, int unit, unsigned p)
 {
        unsigned i;
        uint32_t mode;
@@ -96,7 +97,7 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
        if (i == NV50_TEX_FORMAT_LIST_SIZE)
                 return 1;
 
-       if (nv50->sampler[unit]->normalized)
+       if (nv50->sampler[p][unit]->normalized)
                mode = 0x50001000 | (1 << 31);
        else {
                mode = 0x50001000 | (7 << 14);
@@ -140,48 +141,78 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
        return 0;
 }
 
-void
-nv50_tex_validate(struct nv50_context *nv50)
+#ifndef NV50TCL_BIND_TIC
+#define NV50TCL_BIND_TIC(n) (0x1448 + 8 * n)
+#endif
+
+static boolean
+nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so,
+                      unsigned p)
 {
+       static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2 };
+
        struct nouveau_grobj *eng2d = nv50->screen->eng2d;
        struct nouveau_grobj *tesla = nv50->screen->tesla;
-       struct nouveau_stateobj *so;
-       unsigned i, unit, push;
-
-       push = MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2 + 23 + 6;
-       so = so_new(nv50->miptree_nr * 9 + push, nv50->miptree_nr * 2 + 2);
+       unsigned unit, j, p_hw = p_remap[p];
 
        nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM,
-                         nv50->miptree_nr * 8 * 4);
+                         p * (32 * 8 * 4), nv50->miptree_nr[p] * 8 * 4);
 
-       for (i = 0, unit = 0; unit < nv50->miptree_nr; ++unit) {
-               struct nv50_miptree *mt = nv50->miptree[unit];
-
-               if (!mt)
-                       continue;
+       for (unit = 0; unit < nv50->miptree_nr[p]; ++unit) {
+               struct nv50_miptree *mt = nv50->miptree[p][unit];
 
                so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8);
-               if (nv50_tex_construct(nv50, so, mt, unit)) {
-                       NOUVEAU_ERR("failed tex validate\n");
-                       so_ref(NULL, &so);
-                       return;
+               if (mt) {
+                       if (nv50_tex_construct(nv50, so, mt, unit, p))
+                               return FALSE;
+                       /* Set TEX insn $t src binding $unit in program type p
+                        * to TIC, TSC entry (32 * p + unit), mark valid (1).
+                        */
+                       so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+                       so_data  (so, ((32 * p + unit) << 9) | (unit << 1) | 1);
+               } else {
+                       for (j = 0; j < 8; ++j)
+                               so_data(so, 0);
+                       so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+                       so_data  (so, (unit << 1) | 0);
                }
+       }
+
+       for (; unit < nv50->state.miptree_nr[p]; unit++) {
+               /* Make other bindings invalid. */
+               so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+               so_data  (so, (unit << 1) | 0);
+       }
+
+       nv50->state.miptree_nr[p] = nv50->miptree_nr[p];
+       return TRUE;
+}
 
-               so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
-               so_data  (so, (i++ << NV50TCL_SET_SAMPLER_TEX_TIC_SHIFT) |
-                         (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) |
-                         NV50TCL_SET_SAMPLER_TEX_VALID);
+void
+nv50_tex_validate(struct nv50_context *nv50)
+{
+       struct nouveau_stateobj *so;
+       struct nouveau_grobj *tesla = nv50->screen->tesla;
+       unsigned p, push, nrlc;
+
+       for (nrlc = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) {
+               push += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]);
+               nrlc += nv50->miptree_nr[p];
        }
+       push = push * 11 + 23 * PIPE_SHADER_TYPES + 4;
+       nrlc = nrlc * 2 + 2 * PIPE_SHADER_TYPES;
+
+       so = so_new(push, nrlc);
+
+       if (nv50_validate_textures(nv50, so, PIPE_SHADER_VERTEX) == FALSE ||
+           nv50_validate_textures(nv50, so, PIPE_SHADER_FRAGMENT) == FALSE) {
+               so_ref(NULL, &so);
 
-       for (; unit < nv50->state.miptree_nr; unit++) {
-               so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
-               so_data  (so,
-                         (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | 0);
+               NOUVEAU_ERR("failed tex validate\n");
+               return;
        }
 
        /* not sure if the following really do what I think: */
-       so_method(so, tesla, 0x1440, 1); /* sync SIFC */
-       so_data  (so, 0);
        so_method(so, tesla, 0x1330, 1); /* flush TIC */
        so_data  (so, 0);
        so_method(so, tesla, 0x1338, 1); /* flush texture caches */
@@ -189,6 +220,4 @@ nv50_tex_validate(struct nv50_context *nv50)
 
        so_ref(so, &nv50->state.tic_upload);
        so_ref(NULL, &so);
-       nv50->state.miptree_nr = nv50->miptree_nr;
 }
-
index d531e6113278463b2f2915e72f2d86843ca6cfb9..b870302019a80359d63eba90e176d5e4045f2ee2 100644 (file)
@@ -82,6 +82,7 @@
 #define NV50TIC_0_0_FMT_RGTC1                                     0x00000027
 #define NV50TIC_0_0_FMT_RGTC2                                     0x00000028
 #define NV50TIC_0_0_FMT_24_8                                      0x00000029
+#define NV50TIC_0_0_FMT_8_24                                      0x0000002a
 #define NV50TIC_0_0_FMT_32_DEPTH                                  0x0000002f
 #define NV50TIC_0_0_FMT_32_8                                      0x00000030
 
index 4705f96f57284374b995aa57cd6d88984d0acb0c..6240a0c757ad2ffff87646dcc5b558b9efbc4a3e 100644 (file)
@@ -16,6 +16,7 @@ struct nv50_transfer {
        int level_depth;
        int level_x;
        int level_y;
+       int level_z;
        unsigned nblocksx;
        unsigned nblocksy;
 };
@@ -24,10 +25,10 @@ static void
 nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
                        struct nouveau_bo *src_bo, unsigned src_offset,
                        int src_pitch, unsigned src_tile_mode,
-                       int sx, int sy, int sw, int sh, int sd,
+                       int sx, int sy, int sz, int sw, int sh, int sd,
                        struct nouveau_bo *dst_bo, unsigned dst_offset,
                        int dst_pitch, unsigned dst_tile_mode,
-                       int dx, int dy, int dw, int dh, int dd,
+                       int dx, int dy, int dz, int dw, int dh, int dd,
                        int cpp, int width, int height,
                        unsigned src_reloc, unsigned dst_reloc)
 {
@@ -56,7 +57,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
                OUT_RING  (chan, sw * cpp);
                OUT_RING  (chan, sh);
                OUT_RING  (chan, sd);
-               OUT_RING  (chan, 0);
+               OUT_RING  (chan, sz); /* copying only 1 zslice per call */
        }
 
        if (!dst_bo->tile_flags) {
@@ -75,13 +76,13 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
                OUT_RING  (chan, dw * cpp);
                OUT_RING  (chan, dh);
                OUT_RING  (chan, dd);
-               OUT_RING  (chan, 0);
+               OUT_RING  (chan, dz); /* copying only 1 zslice per call */
        }
 
        while (height) {
                int line_count = height > 2047 ? 2047 : height;
 
-               WAIT_RING (chan, 15);
+               MARK_RING (chan, 15, 4); /* flush on lack of space or relocs */
                BEGIN_RING(chan, m2mf,
                        NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH, 2);
                OUT_RELOCh(chan, src_bo, src_offset, src_reloc);
@@ -118,20 +119,6 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
        }
 }
 
-static INLINE unsigned
-get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned ny)
-{
-       unsigned tile_h = get_tile_height(tile_mode);
-       unsigned tile_d = get_tile_depth(tile_mode);
-
-       /* pitch_2d == to next slice within this volume-tile */
-       /* pitch_3d == to next slice in next 2D array of blocks */
-       unsigned pitch_2d = tile_h * 64;
-       unsigned pitch_3d = tile_d * align(ny, tile_h) * pitch;
-
-       return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d;
-}
-
 static struct pipe_transfer *
 nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
                  unsigned face, unsigned level, unsigned zslice,
@@ -166,6 +153,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
        tx->level_depth = u_minify(mt->base.base.depth0, level);
        tx->level_offset = lvl->image_offset[image];
        tx->level_tiling = lvl->tile_mode;
+       tx->level_z = zslice;
        tx->level_x = pf_get_nblocksx(pt->format, x);
        tx->level_y = pf_get_nblocksy(pt->format, y);
        ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
@@ -175,23 +163,18 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
                return NULL;
        }
 
-       if (pt->target == PIPE_TEXTURE_3D)
-               tx->level_offset += get_zslice_offset(lvl->tile_mode, zslice,
-                                                     lvl->pitch,
-                                                     tx->nblocksy);
-
        if (usage & PIPE_TRANSFER_READ) {
                nx = pf_get_nblocksx(pt->format, tx->base.width);
                ny = pf_get_nblocksy(pt->format, tx->base.height);
 
                nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset,
                                        tx->level_pitch, tx->level_tiling,
-                                       x, y,
+                                       x, y, zslice,
                                        tx->nblocksx, tx->nblocksy,
                                        tx->level_depth,
                                        tx->bo, 0,
                                        tx->base.stride, tx->bo->tile_mode,
-                                       0, 0,
+                                       0, 0, 0,
                                        tx->nblocksx, tx->nblocksy, 1,
                                        pf_get_blocksize(pt->format), nx, ny,
                                        NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
@@ -216,11 +199,11 @@ nv50_transfer_del(struct pipe_transfer *ptx)
 
                nv50_transfer_rect_m2mf(pscreen, tx->bo, 0,
                                        tx->base.stride, tx->bo->tile_mode,
-                                       0, 0,
+                                       0, 0, 0,
                                        tx->nblocksx, tx->nblocksy, 1,
                                        mt->base.bo, tx->level_offset,
                                        tx->level_pitch, tx->level_tiling,
-                                       tx->level_x, tx->level_y,
+                                       tx->level_x, tx->level_y, tx->level_z,
                                        tx->nblocksx, tx->nblocksy,
                                        tx->level_depth,
                                        pf_get_blocksize(pt->format), nx, ny,
@@ -282,7 +265,7 @@ nv50_upload_sifc(struct nv50_context *nv50,
 
        reloc |= NOUVEAU_BO_WR;
 
-       WAIT_RING (chan, 32);
+       MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */
 
        if (bo->tile_flags) {
                BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5);
index 9c9fc6f64b38d11e937aeb5a40cb2f617d913507..8cfd4147c203c9773bb873ed084d15078b5ac60d 100644 (file)
@@ -4,8 +4,8 @@ include $(TOP)/configs/current
 LIBNAME = r300
 
 C_SOURCES = \
+       r300_blit.c \
        r300_chipset.c \
-       r300_clear.c \
        r300_context.c \
        r300_debug.c \
        r300_emit.c \
index 97989040d2e06ee5d2be36bfedf360ea4315bb1d..0d2de17be93205b8c5375709b18010b04a242fa7 100644 (file)
@@ -9,8 +9,8 @@ env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/sr
 r300 = env.ConvenienceLibrary(
     target = 'r300',
     source = [
+        'r300_blit.c',
         'r300_chipset.c',
-        'r300_clear.c',
         'r300_context.c',
         'r300_debug.c',
         'r300_emit.c',
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
new file mode 100644 (file)
index 0000000..ffe066d
--- /dev/null
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_blit.h"
+#include "r300_context.h"
+
+#include "util/u_rect.h"
+
+static void r300_blitter_save_states(struct r300_context* r300)
+{
+    util_blitter_save_blend(r300->blitter, r300->blend_state);
+    util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state);
+    util_blitter_save_rasterizer(r300->blitter, r300->rs_state);
+    util_blitter_save_fragment_shader(r300->blitter, r300->fs);
+    util_blitter_save_vertex_shader(r300->blitter, r300->vs);
+}
+
+/* Clear currently bound buffers. */
+void r300_clear(struct pipe_context* pipe,
+                unsigned buffers,
+                const float* rgba,
+                double depth,
+                unsigned stencil)
+{
+    /* XXX Implement fastfill.
+     *
+     * If fastfill is enabled, a few facts should be considered:
+     *
+     * 1) Zbuffer must be micro-tiled and whole microtiles must be
+     *    written.
+     *
+     * 2) ZB_DEPTHCLEARVALUE is used to clear a zbuffer and Z Mask must be
+     *    equal to 0.
+     *
+     * 3) RB3D_COLOR_CLEAR_VALUE is used to clear a colorbuffer and
+     *    RB3D_COLOR_CHANNEL_MASK must be equal to 0.
+     *
+     * 4) ZB_CB_CLEAR can be used to make the ZB units help in clearing
+     *    the colorbuffer. The color clear value is supplied through both
+     *    RB3D_COLOR_CLEAR_VALUE and ZB_DEPTHCLEARVALUE, and the colorbuffer
+     *    must be set in ZB_DEPTHOFFSET and ZB_DEPTHPITCH in addition to
+     *    RB3D_COLOROFFSET and RB3D_COLORPITCH. It's obvious that the zbuffer
+     *    will not be cleared and multiple render targets cannot be cleared
+     *    this way either.
+     *
+     * 5) For 16-bit integer buffering, compression causes a hung with one or
+     *    two samples and should not be used.
+     *
+     * 6) Fastfill must not be used if reading of compressed Z data is disabled
+     *    and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE),
+     *    i.e. it cannot be used to compress the zbuffer.
+     *    (what the hell does that mean and how does it fit in clearing
+     *    the buffers?)
+     *
+     * - Marek
+     */
+
+    struct r300_context* r300 = r300_context(pipe);
+
+    r300_blitter_save_states(r300);
+
+    util_blitter_clear(r300->blitter,
+                       r300->framebuffer_state.width,
+                       r300->framebuffer_state.height,
+                       r300->framebuffer_state.nr_cbufs,
+                       buffers, rgba, depth, stencil);
+}
+
+/* Copy a block of pixels from one surface to another. */
+void r300_surface_copy(struct pipe_context* pipe,
+                       struct pipe_surface* dst,
+                       unsigned dstx, unsigned dsty,
+                       struct pipe_surface* src,
+                       unsigned srcx, unsigned srcy,
+                       unsigned width, unsigned height)
+{
+    struct r300_context* r300 = r300_context(pipe);
+
+    /* Yeah we have to save all those states to ensure this blitter operation
+     * is really transparent. The states will be restored by the blitter once
+     * copying is done. */
+    r300_blitter_save_states(r300);
+    util_blitter_save_framebuffer(r300->blitter, &r300->framebuffer_state);
+
+    util_blitter_save_fragment_sampler_states(
+        r300->blitter, r300->sampler_count, (void**)r300->sampler_states);
+
+    util_blitter_save_fragment_sampler_textures(
+        r300->blitter, r300->texture_count,
+        (struct pipe_texture**)r300->textures);
+
+    /* Do a copy */
+    util_blitter_copy(r300->blitter,
+                      dst, dstx, dsty, src, srcx, srcy, width, height, TRUE);
+}
+
+/* Fill a region of a surface with a constant value. */
+void r300_surface_fill(struct pipe_context* pipe,
+                       struct pipe_surface* dst,
+                       unsigned dstx, unsigned dsty,
+                       unsigned width, unsigned height,
+                       unsigned value)
+{
+    struct r300_context* r300 = r300_context(pipe);
+
+    r300_blitter_save_states(r300);
+    util_blitter_save_framebuffer(r300->blitter, &r300->framebuffer_state);
+
+    util_blitter_fill(r300->blitter,
+                      dst, dstx, dsty, width, height, value);
+}
diff --git a/src/gallium/drivers/r300/r300_blit.h b/src/gallium/drivers/r300/r300_blit.h
new file mode 100644 (file)
index 0000000..029e4f9
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2008 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_BLIT_H
+#define R300_BLIT_H
+
+struct pipe_context;
+struct pipe_surface;
+
+void r300_clear(struct pipe_context* pipe,
+                unsigned buffers,
+                const float* rgba,
+                double depth,
+                unsigned stencil);
+
+void r300_surface_copy(struct pipe_context* pipe,
+                       struct pipe_surface* dst,
+                       unsigned dstx, unsigned dsty,
+                       struct pipe_surface* src,
+                       unsigned srcx, unsigned srcy,
+                       unsigned width, unsigned height);
+
+void r300_surface_fill(struct pipe_context* pipe,
+                       struct pipe_surface* dst,
+                       unsigned dstx, unsigned dsty,
+                       unsigned width, unsigned height,
+                       unsigned value);
+
+#endif /* R300_BLIT_H */
diff --git a/src/gallium/drivers/r300/r300_clear.c b/src/gallium/drivers/r300/r300_clear.c
deleted file mode 100644 (file)
index 02d6d50..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "r300_clear.h"
-#include "r300_context.h"
-
-#include "util/u_clear.h"
-
-/* Clears currently bound buffers. */
-void r300_clear(struct pipe_context* pipe,
-                unsigned buffers,
-                const float* rgba,
-                double depth,
-                unsigned stencil)
-{
-    /* XXX we can and should do one clear if both color and zs are set */
-    util_clear(pipe, &r300_context(pipe)->framebuffer_state,
-            buffers, rgba, depth, stencil);
-}
diff --git a/src/gallium/drivers/r300/r300_clear.h b/src/gallium/drivers/r300/r300_clear.h
deleted file mode 100644 (file)
index b8fcdf2..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_CLEAR_H
-#define R300_CLEAR_H
-
-struct pipe_context;
-
-void r300_clear(struct pipe_context* pipe,
-                unsigned buffers,
-                const float* rgba,
-                double depth,
-                unsigned stencil);
-
-#endif /* R300_CLEAR_H */
index 5b337f03ac49d71ec840ec0a035e4ca35e358192..d5c2d63d393babb83a5c18c6c400abc21b77b90b 100644 (file)
@@ -28,7 +28,7 @@
 #include "util/u_memory.h"
 #include "util/u_simple_list.h"
 
-#include "r300_clear.h"
+#include "r300_blit.h"
 #include "r300_context.h"
 #include "r300_flush.h"
 #include "r300_query.h"
@@ -52,6 +52,8 @@ static void r300_destroy_context(struct pipe_context* context)
     struct r300_context* r300 = r300_context(context);
     struct r300_query* query, * temp;
 
+    util_blitter_destroy(r300->blitter);
+
     util_hash_table_foreach(r300->shader_hash_table, r300_clear_hash_table,
         NULL);
     util_hash_table_destroy(r300->shader_hash_table);
@@ -124,6 +126,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300->context.destroy = r300_destroy_context;
 
     r300->context.clear = r300_clear;
+    r300->context.surface_copy = r300_surface_copy;
+    r300->context.surface_fill = r300_surface_fill;
 
     if (r300screen->caps->has_tcl) {
         r300->context.draw_arrays = r300_draw_arrays;
@@ -175,5 +179,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300);
     r300->dirty_state = R300_NEW_KITCHEN_SINK;
     r300->dirty_hw++;
+
+    r300->blitter = util_blitter_create(&r300->context);
+
     return &r300->context;
 }
index 0be190392a53fae0825d65b466c13883a7e30d12..6bd2766730f3785675806a0e46b28e694aa6637c 100644 (file)
@@ -25,6 +25,8 @@
 
 #include "draw/draw_vertex.h"
 
+#include "util/u_blitter.h"
+
 #include "pipe/p_context.h"
 #include "pipe/p_inlines.h"
 
@@ -248,6 +250,8 @@ struct r300_context {
     struct radeon_winsys* winsys;
     /* Draw module. Used mostly for SW TCL. */
     struct draw_context* draw;
+    /* Accelerated blit support. */
+    struct blitter_context* blitter;
 
     /* Vertex buffer for rendering. */
     struct pipe_buffer* vbo;
index f784e1fa8e5f9724e3819414e65675dea15e3afb..55c8aa07bdf5d8f433e702ad8e06f008a1decb82 100644 (file)
@@ -41,9 +41,16 @@ void r300_emit_blend_state(struct r300_context* r300,
     CS_LOCALS(r300);
     BEGIN_CS(8);
     OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3);
-    OUT_CS(blend->blend_control);
-    OUT_CS(blend->alpha_blend_control);
-    OUT_CS(blend->color_channel_mask);
+    if (r300->framebuffer_state.nr_cbufs) {
+        OUT_CS(blend->blend_control);
+        OUT_CS(blend->alpha_blend_control);
+        OUT_CS(blend->color_channel_mask);
+    } else {
+        OUT_CS(0);
+        OUT_CS(0);
+        OUT_CS(0);
+        /* XXX also disable fastfill here once it's supported */
+    }
     OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop);
     OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither);
     END_CS;
@@ -335,7 +342,7 @@ void r300_emit_fb_state(struct r300_context* r300,
     assert(fb->nr_cbufs <= 4);
 
     BEGIN_CS((10 * fb->nr_cbufs) + (2 * (4 - fb->nr_cbufs)) +
-             (fb->zsbuf ? 10 : 0) + 4);
+             (fb->zsbuf ? 10 : 0) + 6);
 
     /* Flush and free renderbuffer caches. */
     OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
@@ -345,6 +352,9 @@ void r300_emit_fb_state(struct r300_context* r300,
         R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
         R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
 
+    /* Set the number of colorbuffers. */
+    OUT_CS_REG(R300_RB3D_CCTL, R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs));
+
     /* Set up colorbuffers. */
     for (i = 0; i < fb->nr_cbufs; i++) {
         surf = fb->cbufs[i];
@@ -873,10 +883,21 @@ void r300_emit_viewport_state(struct r300_context* r300,
 
 void r300_emit_texture_count(struct r300_context* r300)
 {
+    uint32_t tx_enable = 0;
+    int i;
     CS_LOCALS(r300);
 
+    /* Notice that texture_count and sampler_count are just sizes
+     * of the respective arrays. We still have to check for the individual
+     * elements. */
+    for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) {
+        if (r300->textures[i]) {
+            tx_enable |= 1 << i;
+        }
+    }
+
     BEGIN_CS(2);
-    OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1);
+    OUT_CS_REG(R300_TX_ENABLE, tx_enable);
     END_CS;
 
 }
index c1ea87d11e9485d6373efa09f787912b5f5a12b3..d8d08fbe26417ff889ceab23295542b57a05cc88 100644 (file)
@@ -2145,6 +2145,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 /* Unpipelined. */
 #define R300_RB3D_CCTL                      0x4e00
+#      define R300_RB3D_CCTL_NUM_MULTIWRITES(x)       (MAX2(((x)-1), 0) << 5)
 #      define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER                (0 << 5)
 #      define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS               (1 << 5)
 #      define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS               (2 << 5)
index 35b335df6a1640100efc3ea3c41c5b590ecdb1dd..4b210f72db2ac9efcec8ea5fcc37e8b899e7ffaf 100644 (file)
@@ -82,8 +82,9 @@ static void r300_emit_draw_arrays(struct r300_context *r300,
 {
     CS_LOCALS(r300);
 
-    BEGIN_CS(4);
-    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count);
+    BEGIN_CS(6);
+    OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0);
+    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
     OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
            r300_translate_primitive(mode));
@@ -108,7 +109,8 @@ static void r300_emit_draw_elements(struct r300_context *r300,
     assert((start * indexSize)  % 4 == 0);
     assert(offset_dwords == 0);
 
-    BEGIN_CS(10);
+    BEGIN_CS(12);
+    OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, minIndex);
     OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
     if (indexSize == 4) {
index c0d9797020dacb73acd4ff4cd3f2fafc14134168..feb571a23dd762cdf819b129e4a82843a4169f00 100644 (file)
@@ -220,12 +220,18 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage,
 
         /* Z buffer or texture */
         case PIPE_FORMAT_Z16_UNORM:
+            retval = usage &
+                (PIPE_TEXTURE_USAGE_DEPTH_STENCIL |
+                 PIPE_TEXTURE_USAGE_SAMPLER);
+            break;
+
+        /* 24bit Z buffer can only be used as a texture on R500. */
         case PIPE_FORMAT_Z24X8_UNORM:
         /* Z buffer with stencil or texture */
         case PIPE_FORMAT_Z24S8_UNORM:
             retval = usage &
                 (PIPE_TEXTURE_USAGE_DEPTH_STENCIL |
-                 PIPE_TEXTURE_USAGE_SAMPLER);
+                 (is_r500 ? PIPE_TEXTURE_USAGE_SAMPLER : 0));
             break;
 
         /* Definitely unsupported formats. */
index edf7114bbb5023c70a75ad6dae7826f2fac55cee..91cf972edee4fa46d026f3b09b645d62c6444c3b 100644 (file)
@@ -339,6 +339,7 @@ static void
         r300->dirty_state |= R300_NEW_SCISSOR;
     }
     r300->dirty_state |= R300_NEW_FRAMEBUFFERS;
+    r300->dirty_state |= R300_NEW_BLEND;
 }
 
 /* Create fragment shader state. */
@@ -626,8 +627,6 @@ static void r300_set_sampler_textures(struct pipe_context* pipe,
         return;
     }
     
-    r300->context.flush(&r300->context, 0, NULL);
-
     for (i = 0; i < count; i++) {
         if (r300->textures[i] != (struct r300_texture*)texture[i]) {
             pipe_texture_reference((struct pipe_texture**)&r300->textures[i],
index 46d1cb39b54d9663c8e0c9669a60654478d0f6cf..bcd4c030f9c0a5c25135d03f4300b4779359f6ce 100644 (file)
@@ -43,7 +43,7 @@ void r300_emit_invariant_state(struct r300_context* r300)
     struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
     CS_LOCALS(r300);
 
-    BEGIN_CS(24 + (caps->has_tcl ? 2: 0));
+    BEGIN_CS(20 + (caps->has_tcl ? 2: 0));
 
     /*** Graphics Backend (GB) ***/
     /* Various GB enables */
@@ -70,9 +70,6 @@ void r300_emit_invariant_state(struct r300_context* r300)
     OUT_CS_REG(R300_US_W_FMT, 0x0);
 
     /*** VAP ***/
-    /* Max and min vertex index clamp. */
-    OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0);
-    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xffffff);
     /* Sign/normalize control */
     OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO);
     /* TCL-only stuff */
@@ -84,15 +81,11 @@ void r300_emit_invariant_state(struct r300_context* r300)
     END_CS;
 
     /* XXX unsorted stuff from surface_fill */
-    BEGIN_CS(56 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
-    /* Flush PVS. */
-    OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
+    BEGIN_CS(44 + (caps->has_tcl ? 7 : 0) + (caps->is_r500 ? 4 : 0));
 
-    OUT_CS_REG(R300_SE_VTE_CNTL, R300_VPORT_X_SCALE_ENA |
-        R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
-        R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
-        R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT);
     if (caps->has_tcl) {
+        /*Flushing PVS is required before the VAP_GB registers can be changed*/
+        OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
         OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4);
         OUT_CS_32F(1.0);
         OUT_CS_32F(1.0);
@@ -123,19 +116,15 @@ void r300_emit_invariant_state(struct r300_context* r300)
     OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000);
     OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C);
     OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525);
-    OUT_CS_REG(R300_RB3D_CCTL, 0x00000000);
     OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000);
     if (caps->is_r500) {
         OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000);
         OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF);
     }
-    OUT_CS_REG(R300_ZB_FORMAT, 0x00000002);
-    OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, 0x00000003);
     OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000);
     OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000);
     OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000);
     OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000);
-    OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F);
 
     /* XXX */
     OUT_CS_REG(R300_SC_CLIP_RULE, 0xaaaa);
index 69a0970d5f80d4c5c467208faf0572228c38eb26..fe1390d765f2272b642649b69e9cf5eb4d480200 100644 (file)
@@ -140,7 +140,8 @@ enum pipe_texture_target {
    PIPE_TEXTURE_1D   = 0,
    PIPE_TEXTURE_2D   = 1,
    PIPE_TEXTURE_3D   = 2,
-   PIPE_TEXTURE_CUBE = 3
+   PIPE_TEXTURE_CUBE = 3,
+   PIPE_MAX_TEXTURE_TYPES
 };
 
 #define PIPE_TEX_FACE_POS_X 0
index 588ca5e026d482c784ee9c7e14890d0b96dfc761..79f3d3f056677e286c73ed9ea9762a49b3cd90d1 100644 (file)
@@ -55,6 +55,7 @@ struct tgsi_processor
 #define TGSI_TOKEN_TYPE_DECLARATION    0
 #define TGSI_TOKEN_TYPE_IMMEDIATE      1
 #define TGSI_TOKEN_TYPE_INSTRUCTION    2
+#define TGSI_TOKEN_TYPE_PROPERTY       3
 
 struct tgsi_token
 {
@@ -64,16 +65,17 @@ struct tgsi_token
 };
 
 enum tgsi_file_type {
-   TGSI_FILE_NULL        =0,
-   TGSI_FILE_CONSTANT    =1,
-   TGSI_FILE_INPUT       =2,
-   TGSI_FILE_OUTPUT      =3,
-   TGSI_FILE_TEMPORARY   =4,
-   TGSI_FILE_SAMPLER     =5,
-   TGSI_FILE_ADDRESS     =6,
-   TGSI_FILE_IMMEDIATE   =7,
-   TGSI_FILE_LOOP        =8,
-   TGSI_FILE_PREDICATE   =9,
+   TGSI_FILE_NULL         =0,
+   TGSI_FILE_CONSTANT     =1,
+   TGSI_FILE_INPUT        =2,
+   TGSI_FILE_OUTPUT       =3,
+   TGSI_FILE_TEMPORARY    =4,
+   TGSI_FILE_SAMPLER      =5,
+   TGSI_FILE_ADDRESS      =6,
+   TGSI_FILE_IMMEDIATE    =7,
+   TGSI_FILE_LOOP         =8,
+   TGSI_FILE_PREDICATE    =9,
+   TGSI_FILE_SYSTEM_VALUE =10,
    TGSI_FILE_COUNT      /**< how many TGSI_FILE_ types */
 };
 
@@ -151,6 +153,22 @@ union tgsi_immediate_data
    float Float;
 };
 
+#define TGSI_PROPERTY_GS_INPUT_PRIM          0
+#define TGSI_PROPERTY_GS_OUTPUT_PRIM         1
+#define TGSI_PROPERTY_GS_MAX_VERTICES        2
+#define TGSI_PROPERTY_COUNT                  3
+
+struct tgsi_property {
+   unsigned Type         : 4;  /**< TGSI_TOKEN_TYPE_PROPERTY */
+   unsigned NrTokens     : 8;  /**< UINT */
+   unsigned PropertyName : 8;  /**< one of TGSI_PROPERTY */
+   unsigned Padding      : 12;
+};
+
+struct tgsi_property_data {
+   unsigned Data;
+};
+
 /* TGSI opcodes.  
  * 
  * For more information on semantics of opcodes and
index 2a8daed051d4777f566b5d0566995cae55762f95..76acc99ad7388d51ac6af53c9d98f6417b7d9d2d 100644 (file)
@@ -81,6 +81,7 @@ static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws,
         domain |= RADEON_GEM_DOMAIN_GTT;
     }
 
+    radeon_buffer->ws = radeon_ws;
     radeon_buffer->bo = radeon_bo_open(radeon_ws->priv->bom, 0, size,
             alignment, domain, 0);
     if (radeon_buffer->bo == NULL) {
@@ -131,6 +132,11 @@ static void radeon_buffer_del(struct pipe_buffer *buffer)
 {
     struct radeon_pipe_buffer *radeon_buffer =
         (struct radeon_pipe_buffer*)buffer;
+    struct radeon_winsys_priv *priv = radeon_buffer->ws->priv;
+
+    if (radeon_bo_is_referenced_by_cs(radeon_buffer->bo, priv->cs)) {
+        priv->cs->space_flush_fn(priv->cs->space_flush_data);
+    }
 
     radeon_bo_unref(radeon_buffer->bo);
     free(radeon_buffer);
index bfe2221d1ed8c9e50d79e188ffc26a24c2f8e010..1e91e18927a84ed040408ffbcd71199da0222aff 100644 (file)
@@ -50,6 +50,7 @@
 struct radeon_pipe_buffer {
     struct pipe_buffer  base;
     struct radeon_bo    *bo;
+    struct radeon_winsys *ws;
     boolean flinked;
     uint32_t flink;
 };
index dec7c065036d374bd4e3bac5ea6fe6667144a150..2f7fbc72423b7445ecf33668ff8912b68058bf45 100644 (file)
@@ -171,6 +171,7 @@ struct pipe_buffer* radeon_buffer_from_handle(struct drm_api* api,
     radeon_buffer->base.screen = screen;
     radeon_buffer->base.usage = PIPE_BUFFER_USAGE_PIXEL;
     radeon_buffer->bo = bo;
+    radeon_buffer->ws = (struct radeon_winsys*)screen->winsys;
     return &radeon_buffer->base;
 }
 
index 10e1b3c9124885d0b84e8f23a42decd9a9398e93..ca6dd3bcf8e4d6ea231d54bf68cd7b99044b6417 100644 (file)
@@ -141,7 +141,7 @@ static void r300_emit_tx_setup(struct r300_context *r300,
 
     assert(width <= 2048);
     assert(height <= 2048);
-    assert(r300TranslateTexFormat(mesa_format) != 0);
+    assert(r300TranslateTexFormat(mesa_format) >= 0);
     assert(offset % 32 == 0);
 
     BEGIN_BATCH(17);
@@ -315,8 +315,8 @@ static void r300_emit_rs_setup(struct r300_context *r300)
     OUT_BATCH_REGVAL(R300_RS_IP_0,
                      R300_RS_TEX_PTR(0) |
                      R300_RS_SEL_S(R300_RS_SEL_C0) |
-                     R300_RS_SEL_R(R300_RS_SEL_C1) |
-                     R300_RS_SEL_T(R300_RS_SEL_K0) |
+                     R300_RS_SEL_T(R300_RS_SEL_C1) |
+                     R300_RS_SEL_R(R300_RS_SEL_K0) |
                      R300_RS_SEL_Q(R300_RS_SEL_K1));
     END_BATCH();
 }
@@ -428,6 +428,7 @@ static void emit_cb_setup(struct r300_context *r300,
                           struct radeon_bo *bo,
                           intptr_t offset,
                           gl_format mesa_format,
+                          unsigned pitch,
                           unsigned width,
                           unsigned height)
 {
@@ -448,7 +449,7 @@ static void emit_cb_setup(struct r300_context *r300,
 
     r300_emit_cb_setup(r300, bo, offset, mesa_format,
                        _mesa_get_format_bytes(mesa_format),
-                       _mesa_format_row_stride(mesa_format, width));
+                       _mesa_format_row_stride(mesa_format, pitch));
 
     BEGIN_BATCH_NO_AUTOSTATE(5);
     OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
@@ -468,18 +469,14 @@ GLboolean r300_blit(struct r300_context *r300,
                     struct radeon_bo *dst_bo,
                     intptr_t dst_offset,
                     gl_format dst_mesaformat,
+                    unsigned dst_pitch,
                     unsigned dst_width,
                     unsigned dst_height)
 {
-    //assert(src_width == dst_width);
-    //assert(src_height == dst_height);
-
     if (src_bo == dst_bo) {
         return GL_FALSE;
     }
 
-    //return GL_FALSE;
-
     if (0) {
         fprintf(stderr, "src: width %d, height %d, pitch %d vs %d, format %s\n",
                 src_width, src_height, src_pitch,
@@ -511,14 +508,13 @@ GLboolean r300_blit(struct r300_context *r300,
     emit_pvs_setup(r300, r300->blit.vp_code.body.d, 2);
     emit_vap_setup(r300, dst_width, dst_height);
 
-    emit_cb_setup(r300, dst_bo, dst_offset, dst_mesaformat, dst_width, dst_height);
+    emit_cb_setup(r300, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height);
 
     emit_draw_packet(r300, dst_width, dst_height);
 
     r300EmitCacheFlush(r300);
 
     radeonFlush(r300->radeon.glCtx);
-    //r300ResetHwState(r300);
 
     return GL_TRUE;
 }
\ No newline at end of file
index 29c5aa95149cb2bae41bf6ad741ca9f9e324b3ea..28ffd4ea42118d63984c1bcce3b0d72c6e6bcef0 100644 (file)
@@ -40,6 +40,7 @@ GLboolean r300_blit(struct r300_context *r300,
                     struct radeon_bo *dst_bo,
                     intptr_t dst_offset,
                     gl_format dst_mesaformat,
+                    unsigned dst_pitch,
                     unsigned dst_width,
                     unsigned dst_height);
 
index 05005f61c30ea6c42d332cca4e04883e517fac48..3c6ec2a34a8fc9f85fa338198e25904da1266021 100644 (file)
@@ -486,7 +486,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
        r300_init_vtbl(&r300->radeon);
 
        _mesa_init_driver_functions(&functions);
-       r300_init_texcopy_functions(&functions);
        r300InitIoctlFuncs(&functions);
        r300InitStateFuncs(&functions);
        r300InitTextureFuncs(&functions);
@@ -494,6 +493,10 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
        radeonInitQueryObjFunctions(&functions);
        radeonInitBufferObjectFuncs(&functions);
 
+       if (r300->radeon.radeonScreen->kernel_mm) {
+               r300_init_texcopy_functions(&functions);
+       }
+
        if (!radeonInitContext(&r300->radeon, &functions,
                               glVisual, driContextPriv,
                               sharedContextPrivate)) {
index beb10072e9c69470d48c2efbc6f9a700e4832e37..6ede0fe25c9047241a2bea540403b6f4e050f55d 100644 (file)
@@ -51,6 +51,6 @@ extern GLboolean r300ValidateBuffers(GLcontext * ctx);
 
 extern void r300InitTextureFuncs(struct dd_function_table *functions);
 
-uint32_t r300TranslateTexFormat(gl_format mesaFormat);
+int32_t r300TranslateTexFormat(gl_format mesaFormat);
 
 #endif                         /* __r300_TEX_H__ */
index 5e3a724d4e637698e794e7a78f09a8fbe36ab1d2..7702a1d67dfa057391f98ae2f585590df8447ec7 100644 (file)
@@ -63,7 +63,6 @@ do_copy_texsubimage(GLcontext *ctx,
     assert(timg->mt->bo);
     assert(timg->base.Width >= dstx + width);
     assert(timg->base.Height >= dsty + height);
-    //assert(tobj->mt == timg->mt);
 
     intptr_t src_offset = rrb->draw_offset + x * rrb->cpp + y * rrb->pitch;
     intptr_t dst_offset = radeon_miptree_image_offset(timg->mt, _mesa_tex_target_to_face(target), level);
@@ -87,7 +86,7 @@ do_copy_texsubimage(GLcontext *ctx,
     /* blit from src buffer to texture */
     return r300_blit(r300, rrb->bo, src_offset, rrb->base.Format, rrb->pitch,
                      rrb->base.Width, rrb->base.Height, timg->mt->bo ? timg->mt->bo : timg->bo, dst_offset,
-                     timg->base.TexFormat, width, height);
+                     timg->base.TexFormat, timg->base.Width, width, height);
 }
 
 static void
index 6db56ba618f801bab66f19f3eb9252a55ab6b50b..d4a728381e46a2cc11834d28307ffbf311aea439 100644 (file)
@@ -59,7 +59,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  * identically.  -- paulus
  */
 
-uint32_t r300TranslateTexFormat(gl_format mesaFormat)
+int32_t r300TranslateTexFormat(gl_format mesaFormat)
 {
        switch (mesaFormat)
        {
@@ -168,7 +168,7 @@ uint32_t r300TranslateTexFormat(gl_format mesaFormat)
                case MESA_FORMAT_SRGBA_DXT5:
                        return R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5) | R300_TX_FORMAT_GAMMA;
                default:
-                       return 0;
+                       return -1;
        }
 };
 
@@ -252,12 +252,13 @@ static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
                if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
                        r300SetDepthTexMode(&t->base);
                } else {
-                       t->pp_txformat = r300TranslateTexFormat(firstImage->TexFormat);
-                       if (t->pp_txformat == 0) {
+                       int32_t txformat = r300TranslateTexFormat(firstImage->TexFormat);
+                       if (txformat < 0) {
                                _mesa_problem(rmesa->radeon.glCtx, "%s: Invalid format %s",
                                                          __FUNCTION__, _mesa_get_format_name(firstImage->TexFormat));
                                _mesa_exit(1);
                        }
+                       t->pp_txformat = (uint32_t) txformat;
                }
        }
 
index d27a3245a394a8a9661320a3773cc84d14f4a2b2..5e1504872d627881e5ccfa995acb34b8c8f02ea4 100644 (file)
@@ -52,6 +52,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_mipmap_tree.h"
 #include "radeon_reg.h"
 
+struct r600_cs_manager_legacy
+{
+    struct radeon_cs_manager    base;
+    struct radeon_context       *ctx;
+    /* hack for scratch stuff */
+    uint32_t                    pending_age;
+    uint32_t                    pending_count;
+};
+
+struct r600_cs_reloc_legacy {
+    struct radeon_cs_reloc  base;
+    uint32_t                cindices;
+    uint32_t                *indices;
+    uint32_t                *reloc_indices;
+};
 
 
 static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm,
index eba43d37b6b53b8ad4b3d8c75e4f175dbff0d13c..dff000969993c1b87a1e6ac390fd037eb7c04ffb 100644 (file)
@@ -118,22 +118,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define R600_IT_SET_CTL_CONST                     0x00006F00
 #define R600_IT_SURFACE_BASE_UPDATE               0x00007300
 
-struct r600_cs_manager_legacy
-{
-    struct radeon_cs_manager    base;
-    struct radeon_context       *ctx;
-    /* hack for scratch stuff */
-    uint32_t                    pending_age;
-    uint32_t                    pending_count;
-};
-
-struct r600_cs_reloc_legacy {
-    struct radeon_cs_reloc  base;
-    uint32_t                cindices;
-    uint32_t                *indices;
-    uint32_t                *reloc_indices;
-};
-
 struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx);
 
 /**
index 25314eff56320caa11e92c77606cb6d1bd467026..b66fe78ac3b306976169917c72316f2c4d80d584 100644 (file)
@@ -317,20 +317,8 @@ static void r600InitGLExtensions(GLcontext *ctx)
 
 #ifdef R600_ENABLE_GLSL_TEST
     driInitExtensions(ctx, gl_20_extension, GL_TRUE);
-    //_mesa_enable_2_0_extensions(ctx);
-    //1.5
-    ctx->Extensions.ARB_occlusion_query = GL_TRUE;
-    ctx->Extensions.ARB_vertex_buffer_object = GL_TRUE;
-    ctx->Extensions.EXT_shadow_funcs = GL_TRUE;
-    //2.0
-    ctx->Extensions.ARB_draw_buffers = GL_TRUE;
-    ctx->Extensions.ARB_point_sprite = GL_TRUE;
-    ctx->Extensions.ARB_shader_objects = GL_TRUE;
-    ctx->Extensions.ARB_vertex_shader = GL_TRUE;
-    ctx->Extensions.ARB_fragment_shader = GL_TRUE;
-    ctx->Extensions.EXT_blend_equation_separate = GL_TRUE;
-    ctx->Extensions.ATI_separate_stencil = GL_TRUE;
-
+    _mesa_enable_2_0_extensions(ctx);
+    
     /* glsl compiler has problem if this is not GL_TRUE */
     ctx->Shader.EmitCondCodes = GL_TRUE;
 #endif /* R600_ENABLE_GLSL_TEST */
index e84f5245253bb64d688a70569215510fd003d629..e10b23b97f1130126c125d86415a09c39d163016 100644 (file)
@@ -539,12 +539,15 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700
     pAsm->unNumPresub       = 0;
     pAsm->unCurNumILInsts   = 0;
 
+    pAsm->unVetTexBits      = 0;
+
     return 0;
 }
 
 GLboolean IsTex(gl_inst_opcode Opcode)
 {
-    if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
+    if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ||
+        (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) )
     {
         return GL_TRUE;
     }
@@ -1412,43 +1415,65 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
             pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
             break;
         case PROGRAM_INPUT:
-            switch (pILInst->SrcReg[0].Index)
+            if(SPT_VP == pAsm->currentShaderType)
+            {
+                switch (pILInst->SrcReg[0].Index)
+                {
+                    case VERT_ATTRIB_TEX0:
+                    case VERT_ATTRIB_TEX1:
+                    case VERT_ATTRIB_TEX2:
+                    case VERT_ATTRIB_TEX3:
+                    case VERT_ATTRIB_TEX4:
+                    case VERT_ATTRIB_TEX5:
+                    case VERT_ATTRIB_TEX6:
+                    case VERT_ATTRIB_TEX7:
+                        bValidTexCoord = GL_TRUE;
+                        pAsm->S[0].src.reg   =
+                            pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index];
+                        pAsm->S[0].src.rtype = SRC_REG_INPUT;
+                        break;
+                }
+            }
+            else
             {
-                case FRAG_ATTRIB_WPOS:
-                case FRAG_ATTRIB_COL0:
-                case FRAG_ATTRIB_COL1:
-                case FRAG_ATTRIB_FOGC:
-                case FRAG_ATTRIB_TEX0:
-                case FRAG_ATTRIB_TEX1:
-                case FRAG_ATTRIB_TEX2:
-                case FRAG_ATTRIB_TEX3:
-                case FRAG_ATTRIB_TEX4:
-                case FRAG_ATTRIB_TEX5:
-                case FRAG_ATTRIB_TEX6:
-                case FRAG_ATTRIB_TEX7:
-                    bValidTexCoord = GL_TRUE;
+                switch (pILInst->SrcReg[0].Index)
+                {
+                    case FRAG_ATTRIB_WPOS:
+                    case FRAG_ATTRIB_COL0:
+                    case FRAG_ATTRIB_COL1:
+                    case FRAG_ATTRIB_FOGC:
+                    case FRAG_ATTRIB_TEX0:
+                    case FRAG_ATTRIB_TEX1:
+                    case FRAG_ATTRIB_TEX2:
+                    case FRAG_ATTRIB_TEX3:
+                    case FRAG_ATTRIB_TEX4:
+                    case FRAG_ATTRIB_TEX5:
+                    case FRAG_ATTRIB_TEX6:
+                    case FRAG_ATTRIB_TEX7:
+                        bValidTexCoord = GL_TRUE;
+                        pAsm->S[0].src.reg   =
+                            pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
+                        pAsm->S[0].src.rtype = SRC_REG_INPUT;
+                        break;
+                    case FRAG_ATTRIB_FACE:
+                        fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
+                        break;
+                    case FRAG_ATTRIB_PNTC:
+                        fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
+                        break;
+                }
+
+                if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
+                    (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
+                {
+                                   bValidTexCoord = GL_TRUE;
                     pAsm->S[0].src.reg   =
                         pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
                     pAsm->S[0].src.rtype = SRC_REG_INPUT;
-                    break;
-                case FRAG_ATTRIB_FACE:
-                    fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
-                    break;
-                case FRAG_ATTRIB_PNTC:
-                    fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
-                    break;
-            }
-
-            if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
-                (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
-            {
-                               bValidTexCoord = GL_TRUE;
-                pAsm->S[0].src.reg   =
-                    pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
-                pAsm->S[0].src.rtype = SRC_REG_INPUT;
+                }
             }
 
-        break;
+            break;
         }
     }
 
@@ -1493,8 +1518,17 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize
     tex_instruction_ptr->m_Word0.f.tex_inst         = pAsm->D.dst.opcode;
     tex_instruction_ptr->m_Word0.f.bc_frac_mode     = 0x0;
     tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+    tex_instruction_ptr->m_Word0.f.alt_const        = 0;
 
-    tex_instruction_ptr->m_Word0.f.resource_id      = texture_unit_source->reg;
+    if(SPT_VP == pAsm->currentShaderType)
+    {
+        tex_instruction_ptr->m_Word0.f.resource_id      = texture_unit_source->reg + VERT_ATTRIB_MAX;
+        pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
+    }
+    else
+    {
+        tex_instruction_ptr->m_Word0.f.resource_id      = texture_unit_source->reg;
+    }
 
     tex_instruction_ptr->m_Word1.f.lod_bias     = 0x0;
     if (normalized) {
@@ -1513,7 +1547,6 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize
     tex_instruction_ptr->m_Word2.f.offset_x   = 0x0;
     tex_instruction_ptr->m_Word2.f.offset_y   = 0x0;
     tex_instruction_ptr->m_Word2.f.offset_z   = 0x0;
-
     tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
 
     // dst
@@ -4331,13 +4364,20 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
 
     }
 
-    if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXB)
+    switch(pAsm->pILInst[pAsm->uiCurInst].Opcode)
     {
-        pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
-    }
-    else
-    {
-        pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+        case OPCODE_DDX:
+            /* will these need WQM(1) on CF inst ? */
+            pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H;
+            break;
+        case OPCODE_DDY:
+            pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V;
+            break;
+        case OPCODE_TXB:
+            pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
+            break;
+        default:
+            pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
     }
 
     pAsm->is_tex = GL_TRUE;
@@ -5650,7 +5690,8 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
                 }
             }
             break;
-
+        case OPCODE_DDX:
+        case OPCODE_DDY:
         case OPCODE_TEX: 
         case OPCODE_TXB:  
         case OPCODE_TXP: 
index 6ef945dfda3368759ce9be0a53d13ead04bb02b4..dbd9860f7d0b578dc3486ca4b4909a4aadb1412b 100644 (file)
@@ -485,6 +485,8 @@ typedef struct r700_AssemblerBase
     GLuint        unNumPresub;
     GLuint        unCurNumILInsts;
 
+    GLuint    unVetTexBits;
+
 } r700_AssemblerBase;
 
 //Internal use
index ee2a0a4c8a6410099a517bd183c94e97283ec15a..c124e02184f473691a6e31788d25890fb050d025 100644 (file)
@@ -45,6 +45,9 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
 {
        context_t         *context = R700_CONTEXT(ctx);
        R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+    struct r700_vertex_program *vp = context->selected_vp;
+
        struct radeon_bo *bo = NULL;
        unsigned int i;
        BATCH_LOCALS(&context->radeon);
@@ -52,7 +55,7 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
        radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
        for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
-               if (ctx->Texture.Unit[i]._ReallyEnabled) {
+               if (ctx->Texture.Unit[i]._ReallyEnabled) {            
                        radeonTexObj *t = r700->textures[i];
                        uint32_t offset;
                        if (t) {
@@ -71,7 +74,16 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
 
                                        BEGIN_BATCH_NO_AUTOSTATE(9 + 4);
                                        R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
-                                       R600_OUT_BATCH(i * 7);
+
+                    if( (1<<i) & vp->r700AsmCode.unVetTexBits )                    
+                    {   /* vs texture */                                     
+                        R600_OUT_BATCH((i + VERT_ATTRIB_MAX + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
+                    }
+                    else
+                    {
+                                           R600_OUT_BATCH(i * 7);
+                    }
+
                                        R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0);
                                        R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1);
                                        R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2);
@@ -95,21 +107,35 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
        }
 }
 
+#define SAMPLER_STRIDE                 3
+
 static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *atom)
 {
        context_t         *context = R700_CONTEXT(ctx);
        R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
        unsigned int i;
+
+    struct r700_vertex_program *vp = context->selected_vp;
+
        BATCH_LOCALS(&context->radeon);
        radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
        for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
-               if (ctx->Texture.Unit[i]._ReallyEnabled) {
+               if (ctx->Texture.Unit[i]._ReallyEnabled) {            
                        radeonTexObj *t = r700->textures[i];
                        if (t) {
                                BEGIN_BATCH_NO_AUTOSTATE(5);
                                R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3));
-                               R600_OUT_BATCH(i * 3);
+
+                if( (1<<i) & vp->r700AsmCode.unVetTexBits )                    
+                {   /* vs texture */
+                    R600_OUT_BATCH((i+SQ_TEX_SAMPLER_VS_OFFSET) * SAMPLER_STRIDE); //work 1
+                }
+                else
+                {
+                                   R600_OUT_BATCH(i * 3);
+                }
+
                                R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0);
                                R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1);
                                R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2);
index d15f013710754a39e95f5cd31a2f3fd12fa043c4..ca0710b6813c9dbf52b8d4a71cdb49325b8ea218 100644 (file)
@@ -34,6 +34,7 @@
 #include "main/imports.h"
 #include "shader/prog_parameter.h"
 #include "shader/prog_statevars.h"
+#include "shader/program.h"
 
 #include "r600_context.h"
 #include "r600_cmdbuf.h"
 
 #include "r700_debug.h"
 
+void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog)
+{
+    static const gl_state_index winstate[STATE_LENGTH]
+         = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0};
+    struct prog_instruction *newInst, *inst;
+    GLint  win_size;  /* state reference */
+    GLuint wpos_temp; /* temp register */
+    int i, j;
+
+    /* PARAM win_size = STATE_FB_SIZE */
+    win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate);
+
+    wpos_temp = fprog->Base.NumTemporaries++;
+
+    /* scan program where WPOS is used and replace with wpos_temp */
+    inst = fprog->Base.Instructions;
+    for (i = 0; i < fprog->Base.NumInstructions; i++) {
+        for (j=0; j < 3; j++) {
+            if(inst->SrcReg[j].File == PROGRAM_INPUT && 
+               inst->SrcReg[j].Index == FRAG_ATTRIB_WPOS) {
+                inst->SrcReg[j].File = PROGRAM_TEMPORARY;
+                inst->SrcReg[j].Index = wpos_temp;
+            }
+        }
+        inst++;
+    }
+
+    _mesa_insert_instructions(&(fprog->Base), 0, 1);
+
+    newInst = fprog->Base.Instructions;
+    /* invert wpos.y
+     * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */
+    newInst[0].Opcode = OPCODE_ADD;
+    newInst[0].DstReg.File = PROGRAM_TEMPORARY;
+    newInst[0].DstReg.Index = wpos_temp;
+    newInst[0].DstReg.WriteMask = WRITEMASK_XYZW;
+
+    newInst[0].SrcReg[0].File = PROGRAM_INPUT;
+    newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+    newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+    newInst[0].SrcReg[0].Negate = NEGATE_Y;
+
+    newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR;
+    newInst[0].SrcReg[1].Index = win_size;
+    newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
+
+}
+
 //TODO : Validate FP input with VP output.
 void Map_Fragment_Program(r700_AssemblerBase         *pAsm,
                                                  struct gl_fragment_program *mesa_fp,
@@ -155,6 +204,12 @@ void Map_Fragment_Program(r700_AssemblerBase         *pAsm,
         pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++;
     }
 
+    unBit = 1 << FRAG_ATTRIB_PNTC;
+    if(mesa_fp->Base.InputsRead & unBit)
+    {
+        pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC] = pAsm->number_used_registers++;
+    }
+
 /* Map temporary registers (GPRs) */
     pAsm->starting_temp_register_number = pAsm->number_used_registers;
 
@@ -312,7 +367,13 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
 
     //Init_Program
        Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
-       Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); 
+
+    if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS)
+    {
+        insert_wpos_code(ctx, mesa_fp);
+    }
+
+    Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); 
 
     if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
        {
@@ -479,6 +540,21 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
     }
 
 
+    if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC))
+    {
+        ui++;
+        SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
+        SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
+        SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_S, PNT_SPRITE_OVRD_X_shift, PNT_SPRITE_OVRD_X_mask);
+        SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask);
+        //SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
+    }
+    else
+    {
+        CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
+    }
+
+
     ui = (unNumOfReg < ui) ? ui : unNumOfReg;
 
     SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
@@ -498,6 +574,10 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
     struct r700_vertex_program_cont *vpc =
                       (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
     GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
+    
+    for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++)
+        r700->SPI_PS_INPUT_CNTL[ui].u32All = 0;
+
     unBit = 1 << FRAG_ATTRIB_WPOS;
     if(mesa_fp->Base.InputsRead & unBit)
     {
@@ -575,6 +655,22 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
             else
                     CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
     }
+    unBit = 1 << FRAG_ATTRIB_PNTC;
+    if(mesa_fp->Base.InputsRead & unBit)
+    {
+            ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC];
+            SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+            SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+                     SEMANTIC_shift, SEMANTIC_mask);
+            if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+                    SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+            else
+                    CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+            SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
+    }
+
+
+
 
     for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
        {
index e562bfa4789e0dc1163478e16d34e1c634d333b6..39c59c9201df9e56ae9f590b43e23ad85c3baf3a 100644 (file)
@@ -48,6 +48,8 @@ struct r700_fragment_program
 };
 
 /* Internal */
+void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog);
+
 void Map_Fragment_Program(r700_AssemblerBase         *pAsm,
                          struct gl_fragment_program *mesa_fp,
                           GLcontext *ctx); 
index 2829cca0a3cb1c87af4f583d6dac42ece0591420..cdb9a570f7ccf036442b979fee927eeff5631a06 100644 (file)
 #define SQ_FETCH_RESOURCE_VS_OFFSET    0x000000a0
 #define SQ_FETCH_RESOURCE_VS_COUNT     0x000000b0
 
+//richard dec.10 glsl
+#define SQ_TEX_SAMPLER_PS_OFFSET       0x00000000
+#define SQ_TEX_SAMPLER_PS_COUNT        0x00000012
+#define SQ_TEX_SAMPLER_VS_OFFSET       0x00000012
+#define SQ_TEX_SAMPLER_VS_COUNT        0x00000012
+//-------------------
+
 #define SHADERINST_TYPEMASK_CF  0x10
 #define SHADERINST_TYPEMASK_ALU 0x20
 #define SHADERINST_TYPEMASK_TEX 0x40
index 49a9ec561061e384f7a8f2ddcb82b2327348d679..0739496e0323af37cf5674407bc779413fac8963 100644 (file)
@@ -406,9 +406,6 @@ struct radeon_state {
        struct radeon_depthbuffer_state depth;
        struct radeon_scissor_state scissor;
        struct radeon_stencilbuffer_state stencil;
-
-       struct radeon_cs_space_check bos[RADEON_MAX_BOS];
-       int validated_bo_count;
 };
 
 /**
index b8c65f4ce621ace32269fd355e3dd9cf7aa8ac36..d31e4e47ddbbae1401bd5e14b7c373a76fc0eb42 100644 (file)
@@ -205,7 +205,6 @@ again_alloc:
                   counter on unused buffers for later freeing them from
                   begin of list */
                dma_bo = last_elem(&rmesa->dma.free);
-               assert(dma_bo->bo->cref == 1);
                remove_from_list(dma_bo);
                insert_at_head(&rmesa->dma.reserved, dma_bo);
        }