swr/rast: Misc cleanup

author George Kyriazis <george.kyriazis@intel.com>

Fri, 2 Feb 2018 07:12:29 +0000 (01:12 -0600)

committer George Kyriazis <george.kyriazis@intel.com>

Fri, 16 Feb 2018 16:54:00 +0000 (10:54 -0600)
author George Kyriazis <george.kyriazis@intel.com>
Fri, 2 Feb 2018 07:12:29 +0000 (01:12 -0600)
committer George Kyriazis <george.kyriazis@intel.com>
Fri, 16 Feb 2018 16:54:00 +0000 (10:54 -0600)
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h

index 519367228eaf4df69bfc6dc0c13c9acf3841024c..ddee3b1a94024251ece4220c49ad9c352419dcbc 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -703,7 +703,7 @@ public:
          primMask &= ~ComputeNaNMask(prim);
  
          // user cull distance cull 
-        if (state.backendState.cullDistanceMask)
+        if (state.backendState.cullDistanceMask | state.backendState.clipDistanceMask)
          {
              primMask &= ~ComputeUserClipCullMask(pa, prim);
          }
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h

index a284c422dff9a3a24dca347330fcd37157ec2885..489aa7862cbf3dea610bfe6c381f0d41e3cfb704 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -227,8 +227,9 @@ OSALIGNLINE(struct) API_STATE
      // Vertex Buffers
      SWR_VERTEX_BUFFER_STATE vertexBuffers[KNOB_NUM_STREAMS];
  
-    // Index Buffer
-    SWR_INDEX_BUFFER_STATE  indexBuffer;
+    // GS - Geometry Shader State
+    SWR_GS_STATE            gsState;
+    PFN_GS_FUNC             pfnGsFunc;
  
      // FS - Fetch Shader State
      PFN_FETCH_FUNC          pfnFetchFunc;
@@ -236,9 +237,8 @@ OSALIGNLINE(struct) API_STATE
      // VS - Vertex Shader State
      PFN_VERTEX_FUNC         pfnVertexFunc;
  
-    // GS - Geometry Shader State
-    PFN_GS_FUNC             pfnGsFunc;
-    SWR_GS_STATE            gsState;
+    // Index Buffer
+    SWR_INDEX_BUFFER_STATE  indexBuffer;
  
      // CS - Compute Shader
      PFN_CS_FUNC             pfnCsFunc;
@@ -265,8 +265,6 @@ OSALIGNLINE(struct) API_STATE
      // Number of attributes used by the frontend (vs, so, gs)
      uint32_t                feNumAttributes;
  
-    PRIMITIVE_TOPOLOGY      topology;
-    bool                    forceFront;
  
      // RS - Rasterizer State
      SWR_RASTSTATE           rastState;
@@ -282,8 +280,12 @@ OSALIGNLINE(struct) API_STATE
      SWR_RECT                scissorsInFixedPoint[KNOB_NUM_VIEWPORTS_SCISSORS];
      bool                    scissorsTileAligned;
  
+    bool                    forceFront;
+    PRIMITIVE_TOPOLOGY      topology;
+
+
      // Backend state
-    SWR_BACKEND_STATE       backendState;
+    OSALIGNLINE(SWR_BACKEND_STATE) backendState;
  
      SWR_DEPTH_BOUNDS_STATE  depthBoundsState;
  
@@ -400,8 +402,6 @@ struct DRAW_CONTEXT
          DispatchQueue*  pDispatch;      // Queue for thread groups. (isCompute)
      };
      DRAW_STATE*     pState;             // Read-only state. Core should not update this outside of API thread.
-    DRAW_DYNAMIC_STATE dynState;
-
      CachingArena*   pArena;
  
      uint32_t        drawId;
@@ -412,11 +412,13 @@ struct DRAW_CONTEXT
  
      FE_WORK         FeWork;
  
+    SYNC_DESC       retireCallback; // Call this func when this DC is retired.
+
+    DRAW_DYNAMIC_STATE dynState;
+
      volatile OSALIGNLINE(bool)       doneFE;         // Is FE work done for this draw?
      volatile OSALIGNLINE(uint32_t)   FeLock;
      volatile OSALIGNLINE(uint32_t)   threadsDone;
-
-    SYNC_DESC       retireCallback; // Call this func when this DC is retired.
  };
  
  static_assert((sizeof(DRAW_CONTEXT) & 63) == 0, "Invalid size for DRAW_CONTEXT");
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h

index d959c64361dbf0f787431bf583ac12a261517513..6b108d9c21e687ed9af1b9f1b5845bcfc4bf292b 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -228,8 +228,8 @@ struct SWR_VS_CONTEXT
  #if USE_SIMD16_FRONTEND
      uint32_t AlternateOffset;   // IN: amount to offset for interleaving even/odd simd8 in simd16vertex output
  #if USE_SIMD16_VS
-    simd16scalari mask16;      // IN: Active mask for shader (16-wide)
-    simd16scalari VertexID16;  // IN: Vertex ID (16-wide)
+    simd16scalari mask16;       // IN: Active mask for shader (16-wide)
+    simd16scalari VertexID16;   // IN: Vertex ID (16-wide)
  #endif
  #endif
  };
@@ -553,11 +553,10 @@ struct SWR_SURFACE_STATE
  // in the fetch shader jit
  struct SWR_VERTEX_BUFFER_STATE
  {
+    gfxptr_t xpData;
      uint32_t index;
      uint32_t pitch;
-    gfxptr_t xpData;
      uint32_t size;
-    uint32_t numaNode;
      uint32_t minVertex;             // min vertex (for bounds checking)
      uint32_t maxVertex;             // size / pitch.  precalculated value used by fetch shader for OOB checks
      uint32_t partialInboundsSize;   // size % pitch.  precalculated value used by fetch shader for partially OOB vertices
@@ -565,9 +564,9 @@ struct SWR_VERTEX_BUFFER_STATE
  
  struct SWR_INDEX_BUFFER_STATE
  {
+    const void *pIndices;
      // Format type for indices (e.g. UINT16, UINT32, etc.)
      SWR_FORMAT format; // @llvm_enum
-    const void *pIndices;
      uint32_t size;
  };
  
@@ -646,12 +645,15 @@ OSALIGNLINE(struct) SWR_STATS_FE
  
  struct SWR_STREAMOUT_BUFFER
  {
-    bool enable;
-    bool soWriteEnable;
-
      // Pointers to streamout buffers.
      uint32_t* pBuffer;
  
+    // Offset to the SO write offset. If not null then we update offset here.
+    uint32_t* pWriteOffset;
+
+    bool enable;
+    bool soWriteEnable;
+
      // Size of buffer in dwords.
      uint32_t bufferSize;
  
@@ -660,10 +662,6 @@ struct SWR_STREAMOUT_BUFFER
  
      // Offset into buffer in dwords. SOS will increment this offset.
      uint32_t streamOffset;
-
-    // Offset to the SO write offset. If not null then we update offset here.
-    uint32_t* pWriteOffset;
-
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -718,6 +716,11 @@ struct SWR_GS_STATE
  {
      bool gsEnable;
  
+    // If true, geometry shader emits a single stream, with separate cut buffer.
+    // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
+    // to map vertices to streams
+    bool isSingleStream;
+
      // Number of input attributes per vertex. Used by the frontend to
      // optimize assembling primitives for GS
      uint32_t numInputAttribs;
@@ -730,15 +733,10 @@ struct SWR_GS_STATE
  
      // Maximum number of verts that can be emitted by a single instance of the GS
      uint32_t maxNumVerts;
-    
+
      // Instance count
      uint32_t instanceCount;
  
-    // If true, geometry shader emits a single stream, with separate cut buffer.
-    // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
-    // to map vertices to streams
-    bool isSingleStream;
-
      // When single stream is enabled, singleStreamID dictates which stream is being output.
      // field ignored if isSingleStream is false
      uint32_t singleStreamID;
@@ -768,7 +766,11 @@ struct SWR_GS_STATE
      // Set this to non-zero to indicate that the shader outputs a static number of verts. If zero, shader is
      // expected to store the final vertex count in the first dword of the gs output stream.
      uint32_t staticVertexCount;
+
+    uint32_t pad;
  };
+static_assert(sizeof(SWR_GS_STATE) == 64,
+    "Adjust padding to keep size (or remove this assert)");
  
  
  //////////////////////////////////////////////////////////////////////////
@@ -814,6 +816,7 @@ enum SWR_TS_DOMAIN
  struct SWR_TS_STATE
  {
      bool                    tsEnable;
+
      SWR_TS_OUTPUT_TOPOLOGY  tsOutputTopology;   // @llvm_enum
      SWR_TS_PARTITIONING     partitioning;       // @llvm_enum
      SWR_TS_DOMAIN           domain;             // @llvm_enum
@@ -863,11 +866,11 @@ struct SWR_BLEND_STATE
      float constantColor[4];
  
      // alpha test reference value in unorm8 or float32
-    uint32_t alphaTestReference; 
+    uint32_t alphaTestReference;
      uint32_t sampleMask;
      // all RT's have the same sample count
      ///@todo move this to Output Merger state when we refactor
-    SWR_MULTISAMPLE_COUNT sampleCount;  // @llvm_enum 
+    SWR_MULTISAMPLE_COUNT sampleCount;  // @llvm_enum
  
      SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
  };
@@ -889,8 +892,8 @@ typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsConte
  typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
  typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
  typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
-typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, 
-    simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample, 
+typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*,
+    simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample,
      uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* vCoverageMask);
  typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
  
@@ -917,7 +920,7 @@ struct SWR_FRONTEND_STATE
      } provokingVertex;
      uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
  
-    // Size of a vertex in simdvector units. Should be sized to the 
+    // Size of a vertex in simdvector units. Should be sized to the
      // maximum of the input/output of the vertex shader.
      uint32_t vsVertexSize;
  };
@@ -1013,7 +1016,7 @@ public:
      INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
      INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
      INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
-    
+
      INLINE void PrecalcSampleData(int numSamples); //@llvm_func
  
  private:
@@ -1081,7 +1084,7 @@ enum SWR_CONSTANT_SOURCE
  
  struct SWR_ATTRIB_SWIZZLE
  {
-    uint16_t sourceAttrib : 5;          // source attribute 
+    uint16_t sourceAttrib : 5;          // source attribute
      uint16_t constantSource : 2;        // constant source to apply
      uint16_t componentOverrideMask : 4; // override component with constant source
  };
@@ -1092,27 +1095,33 @@ struct SWR_BACKEND_STATE
      uint32_t constantInterpolationMask;     // bitmask indicating which attributes have constant interpolation
      uint32_t pointSpriteTexCoordMask;       // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
  
-    uint8_t numAttributes;                  // total number of attributes to send to backend (up to 32)
-    uint8_t numComponents[32];              // number of components to setup per attribute, this reduces some calculations for unneeded components
-
-    bool swizzleEnable;                 // when enabled, core will parse the swizzle map when 
+    bool swizzleEnable;                 // when enabled, core will parse the swizzle map when
                                          // setting up attributes for the backend, otherwise
                                          // all attributes up to numAttributes will be sent
-    SWR_ATTRIB_SWIZZLE swizzleMap[32];
+    uint8_t numAttributes;                  // total number of attributes to send to backend (up to 32)
+    uint8_t numComponents[32];              // number of components to setup per attribute, this reduces some calculations for unneeded components
  
      bool readRenderTargetArrayIndex;    // Forward render target array index from last FE stage to the backend
      bool readViewportArrayIndex;        // Read viewport array index from last FE stage during binning
-    
-       // Offset to the start of the attributes of the input vertices, in simdvector units
-    uint32_t vertexAttribOffset;
  
      // User clip/cull distance enables
      uint8_t cullDistanceMask;
      uint8_t clipDistanceMask;
  
+    // padding to ensure swizzleMap starts 64B offset from start of the struct
+    // and that the next fields are dword aligned.
+    uint8_t pad[10];
+
+        // Offset to the start of the attributes of the input vertices, in simdvector units
+    uint32_t vertexAttribOffset;
+
      // Offset to clip/cull attrib section of the vertex, in simdvector units
      uint32_t vertexClipCullOffset;
+
+    SWR_ATTRIB_SWIZZLE swizzleMap[32];
  };
+static_assert(sizeof(SWR_BACKEND_STATE) == 128,
+    "Adjust padding to keep size (or remove this assert)");
  
  
  union SWR_DEPTH_STENCIL_STATE
@@ -1167,8 +1176,8 @@ enum SWR_INPUT_COVERAGE
  
  enum SWR_PS_POSITION_OFFSET
  {
-    SWR_PS_POSITION_SAMPLE_NONE, 
-    SWR_PS_POSITION_SAMPLE_OFFSET, 
+    SWR_PS_POSITION_SAMPLE_NONE,
+    SWR_PS_POSITION_SAMPLE_OFFSET,
      SWR_PS_POSITION_CENTROID_OFFSET,
      SWR_PS_POSITION_OFFSET_COUNT,
  };
@@ -1194,7 +1203,7 @@ struct SWR_PS_STATE
      uint32_t shadingRate            : 2;    // shading per pixel / sample / coarse pixel
      uint32_t posOffset              : 2;    // type of offset (none, sample, centroid) to add to pixel position
      uint32_t barycentricsMask       : 3;    // which type(s) of barycentric coords does the PS interpolate attributes with
-    uint32_t usesUAV                : 1;    // pixel shader accesses UAV 
+    uint32_t usesUAV                : 1;    // pixel shader accesses UAV
      uint32_t forceEarlyZ            : 1;    // force execution of early depth/stencil test
  
      uint8_t renderTargetMask;               // Mask of render targets written
author	George Kyriazis <george.kyriazis@intel.com>
	Fri, 2 Feb 2018 07:12:29 +0000 (01:12 -0600)
committer	George Kyriazis <george.kyriazis@intel.com>
	Fri, 16 Feb 2018 16:54:00 +0000 (10:54 -0600)
src/gallium/drivers/swr/rasterizer/core/clip.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/context.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/state.h		patch \| blob \| history