X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=libcpp%2Flex.c;h=9d23002d84be19b2a19ff83a246cbbde5c0880af;hb=63737e7a0c43a6f78c3ed5f1960a991e3c6bb72a;hp=fcec329d8b4970a67de3271e2476c2d8f700a2db;hpb=3ce4f9e4d2643273f11647e97b2c7796a64a73dd;p=gcc.git diff --git a/libcpp/lex.c b/libcpp/lex.c index fcec329d8b4..9d23002d84b 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -1,6 +1,6 @@ /* CPP Library - lexical analysis. Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, - 2011 Free Software Foundation, Inc. + 2011, 2012 Free Software Foundation, Inc. Contributed by Per Bothner, 1994-95. Based on CCCP program by Paul Rubin, June 1986 Adapted to ANSI C, Richard Stallman, Jan 1987 @@ -267,7 +267,6 @@ search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) /* Disable on Solaris 2/x86 until the following problems can be properly autoconfed: - The Solaris 8 assembler cannot assemble SSE2/SSE4.2 insns. The Solaris 9 assembler cannot assemble SSE4.2 insns. Before Solaris 9 Update 6, SSE insns cannot be executed. The Solaris 10+ assembler tags objects with the instruction set @@ -477,7 +476,8 @@ search_line_sse42 (const uchar *s, const uchar *end) typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *); static search_line_fast_type search_line_fast; -static void __attribute__((constructor)) +#define HAVE_init_vectorized_lexer 1 +static inline void init_vectorized_lexer (void) { unsigned dummy, ecx = 0, edx = 0; @@ -629,6 +629,69 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) } } +#elif defined (__ARM_NEON__) +#include "arm_neon.h" + +static const uchar * +search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) +{ + const uint8x16_t repl_nl = vdupq_n_u8 ('\n'); + const uint8x16_t repl_cr = vdupq_n_u8 ('\r'); + const uint8x16_t repl_bs = vdupq_n_u8 ('\\'); + const uint8x16_t repl_qm = vdupq_n_u8 ('?'); + const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL); + + unsigned int misalign, found, mask; + const uint8_t *p; + uint8x16_t data; + + /* Align the source pointer. */ + misalign = (uintptr_t)s & 15; + p = (const uint8_t *)((uintptr_t)s & -16); + data = vld1q_u8 (p); + + /* Create a mask for the bytes that are valid within the first + 16-byte block. The Idea here is that the AND with the mask + within the loop is "free", since we need some AND or TEST + insn in order to set the flags for the branch anyway. */ + mask = (-1u << misalign) & 0xffff; + + /* Main loop, processing 16 bytes at a time. */ + goto start; + + do + { + uint8x8_t l; + uint16x4_t m; + uint32x2_t n; + uint8x16_t t, u, v, w; + + p += 16; + data = vld1q_u8 (p); + mask = 0xffff; + + start: + t = vceqq_u8 (data, repl_nl); + u = vceqq_u8 (data, repl_cr); + v = vorrq_u8 (t, vceqq_u8 (data, repl_bs)); + w = vorrq_u8 (u, vceqq_u8 (data, repl_qm)); + t = vandq_u8 (vorrq_u8 (v, w), xmask); + l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t)); + m = vpaddl_u8 (l); + n = vpaddl_u16 (m); + + found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n, + vshr_n_u64 ((uint64x1_t) n, 24)), 0); + found &= mask; + } + while (!found); + + /* FOUND contains 1 in bits for which we matched a relevant + character. Conversion to the byte index is trivial. */ + found = __builtin_ctz (found); + return (const uchar *)p + found; +} + #else /* We only have one accellerated alternative. Use a direct call so that @@ -638,6 +701,16 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) #endif +/* Initialize the lexer if needed. */ + +void +_cpp_init_lexer (void) +{ +#ifdef HAVE_init_vectorized_lexer + init_vectorized_lexer (); +#endif +} + /* Returns with a logical line that contains no escaped newlines or trigraphs. This is a time-critical inner loop. */ void