+
+/* Implementation of class cpp_string_location_reader. */
+
+/* Constructor for cpp_string_location_reader. */
+
+cpp_string_location_reader::
+cpp_string_location_reader (location_t src_loc,
+ line_maps *line_table)
+{
+ src_loc = get_range_from_loc (line_table, src_loc).m_start;
+
+ /* SRC_LOC might be a macro location. It only makes sense to do
+ column-by-column calculations on ordinary maps, so get the
+ corresponding location in an ordinary map. */
+ m_loc
+ = linemap_resolve_location (line_table, src_loc,
+ LRK_SPELLING_LOCATION, NULL);
+
+ const line_map_ordinary *map
+ = linemap_check_ordinary (linemap_lookup (line_table, m_loc));
+ m_offset_per_column = (1 << map->m_range_bits);
+}
+
+/* Get the range of the next source byte. */
+
+source_range
+cpp_string_location_reader::get_next ()
+{
+ source_range result;
+ result.m_start = m_loc;
+ result.m_finish = m_loc;
+ if (m_loc <= LINE_MAP_MAX_LOCATION_WITH_COLS)
+ m_loc += m_offset_per_column;
+ return result;
+}
+
+cpp_display_width_computation::
+cpp_display_width_computation (const char *data, int data_length, int tabstop) :
+ m_begin (data),
+ m_next (m_begin),
+ m_bytes_left (data_length),
+ m_tabstop (tabstop),
+ m_display_cols (0)
+{
+ gcc_assert (m_tabstop > 0);
+}
+
+
+/* The main implementation function for class cpp_display_width_computation.
+ m_next points on entry to the start of the UTF-8 encoding of the next
+ character, and is updated to point just after the last byte of the encoding.
+ m_bytes_left contains on entry the remaining size of the buffer into which
+ m_next points, and this is also updated accordingly. If m_next does not
+ point to a valid UTF-8-encoded sequence, then it will be treated as a single
+ byte with display width 1. m_cur_display_col is the current display column,
+ relative to which tab stops should be expanded. Returns the display width of
+ the codepoint just processed. */
+
+int
+cpp_display_width_computation::process_next_codepoint ()
+{
+ cppchar_t c;
+ int next_width;
+
+ if (*m_next == '\t')
+ {
+ ++m_next;
+ --m_bytes_left;
+ next_width = m_tabstop - (m_display_cols % m_tabstop);
+ }
+ else if (one_utf8_to_cppchar ((const uchar **) &m_next, &m_bytes_left, &c)
+ != 0)
+ {
+ /* Input is not convertible to UTF-8. This could be fine, e.g. in a
+ string literal, so don't complain. Just treat it as if it has a width
+ of one. */
+ ++m_next;
+ --m_bytes_left;
+ next_width = 1;
+ }
+ else
+ {
+ /* one_utf8_to_cppchar() has updated m_next and m_bytes_left for us. */
+ next_width = cpp_wcwidth (c);
+ }
+
+ m_display_cols += next_width;
+ return next_width;
+}
+
+/* Utility to advance the byte stream by the minimum amount needed to consume
+ N display columns. Returns the number of display columns that were
+ actually skipped. This could be less than N, if there was not enough data,
+ or more than N, if the last character to be skipped had a sufficiently large
+ display width. */
+int
+cpp_display_width_computation::advance_display_cols (int n)
+{
+ const int start = m_display_cols;
+ const int target = start + n;
+ while (m_display_cols < target && !done ())
+ process_next_codepoint ();
+ return m_display_cols - start;
+}
+
+/* For the string of length DATA_LENGTH bytes that begins at DATA, compute
+ how many display columns are occupied by the first COLUMN bytes. COLUMN
+ may exceed DATA_LENGTH, in which case the phantom bytes at the end are
+ treated as if they have display width 1. Tabs are expanded to the next tab
+ stop, relative to the start of DATA. */
+
+int
+cpp_byte_column_to_display_column (const char *data, int data_length,
+ int column, int tabstop)
+{
+ const int offset = MAX (0, column - data_length);
+ cpp_display_width_computation dw (data, column - offset, tabstop);
+ while (!dw.done ())
+ dw.process_next_codepoint ();
+ return dw.display_cols_processed () + offset;
+}
+
+/* For the string of length DATA_LENGTH bytes that begins at DATA, compute
+ the least number of bytes that will result in at least DISPLAY_COL display
+ columns. The return value may exceed DATA_LENGTH if the entire string does
+ not occupy enough display columns. */
+
+int
+cpp_display_column_to_byte_column (const char *data, int data_length,
+ int display_col, int tabstop)
+{
+ cpp_display_width_computation dw (data, data_length, tabstop);
+ const int avail_display = dw.advance_display_cols (display_col);
+ return dw.bytes_processed () + MAX (0, display_col - avail_display);
+}
+
+/* Our own version of wcwidth(). We don't use the actual wcwidth() in glibc,
+ because that will inspect the user's locale, and in particular in an ASCII
+ locale, it will not return anything useful for extended characters. But GCC
+ in other respects (see e.g. _cpp_default_encoding()) behaves as if
+ everything is UTF-8. We also make some tweaks that are useful for the way
+ GCC needs to use this data, e.g. tabs and other control characters should be
+ treated as having width 1. The lookup tables are generated from
+ contrib/unicode/gen_wcwidth.py and were made by simply calling glibc
+ wcwidth() on all codepoints, then applying the small tweaks. These tables
+ are not highly optimized, but for the present purpose of outputting
+ diagnostics, they are sufficient. */
+
+#include "generated_cpp_wcwidth.h"
+int cpp_wcwidth (cppchar_t c)
+{
+ if (__builtin_expect (c <= wcwidth_range_ends[0], true))
+ return wcwidth_widths[0];
+
+ /* Binary search the tables. */
+ int begin = 1;
+ static const int end
+ = sizeof wcwidth_range_ends / sizeof (*wcwidth_range_ends);
+ int len = end - begin;
+ do
+ {
+ int half = len/2;
+ int middle = begin + half;
+ if (c > wcwidth_range_ends[middle])
+ {
+ begin = middle + 1;
+ len -= half + 1;
+ }
+ else
+ len = half;
+ } while (len);
+
+ if (__builtin_expect (begin != end, true))
+ return wcwidth_widths[begin];
+ return 1;
+}