convert_UTF: rewrite in C++

This allows us to namespace the symbols properly. Bug: google-breakpad:725 Change-Id: Iea8052547eef6c0acb299c1995781735c6d8994f Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/1769236 Reviewed-by: Mark Mentovai <mark@chromium.org>
author: Mike Frysinger <vapier@chromium.org> 2019-08-03 12:12:40 -0400
committer: Mike Frysinger <vapier@chromium.org> 2019-09-04 20:25:23 +0000
commit: db1cda26539c711c3da7ed4d410dfe8190e89b8f (patch)
tree: ece5c30abb27e436ada9a0b2a07a08a7eb7da938 /src/common
parent: codereview.settings: do not force squashing behavior (diff)
download: breakpad-db1cda26539c711c3da7ed4d410dfe8190e89b8f.tar.xz
3 files changed, 27 insertions, 17 deletions
diff --git a/src/common/common.gyp b/src/common/common.gyp
index fe646b47..7d5e5c7d 100644
--- a/src/common/common.gyp
+++ b/src/common/common.gyp
@@ -61,7 +61,7 @@
         'android/ucontext_constants.h',
         'basictypes.h',
         'byte_cursor.h',
-        'convert_UTF.c',
+        'convert_UTF.cc',
         'convert_UTF.h',
         'dwarf/bytereader-inl.h',
         'dwarf/bytereader.cc',
diff --git a/src/common/convert_UTF.c b/src/common/convert_UTF.cc
index 12a3c891..fed04e78 100644
--- a/src/common/convert_UTF.c
+++ b/src/common/convert_UTF.cc
@@ -60,10 +60,16 @@ See the header file "ConvertUTF.h" for complete documentation.
 #include <stdio.h>
 #endif
 
-static const int halfShift  = 10; /* used for shifting by 10 bits */
+namespace google_breakpad {
 
-static const UTF32 halfBase = 0x0010000UL;
-static const UTF32 halfMask = 0x3FFUL;
+namespace {
+
+const int halfShift  = 10; /* used for shifting by 10 bits */
+
+const UTF32 halfBase = 0x0010000UL;
+const UTF32 halfMask = 0x3FFUL;
+
+}  // namespace
 
 #define UNI_SUR_HIGH_START  (UTF32)0xD800
 #define UNI_SUR_HIGH_END    (UTF32)0xDBFF
@@ -183,6 +189,8 @@ ConversionResult ConvertUTF16toUTF32 (const UTF16** sourceStart, const UTF16* so
 
 /* --------------------------------------------------------------------- */
 
+namespace {
+
 /*
  * Index into the table below with the first byte of a UTF-8 sequence to
  * get the number of trailing bytes that are supposed to follow it.
@@ -190,7 +198,7 @@ ConversionResult ConvertUTF16toUTF32 (const UTF16** sourceStart, const UTF16* so
  * left as-is for anyone who may want to do such conversion, which was
  * allowed in earlier algorithms.
  */
-static const char trailingBytesForUTF8[256] = {
+const char trailingBytesForUTF8[256] = {
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -206,7 +214,7 @@ static const char trailingBytesForUTF8[256] = {
  * This table contains as many values as there might be trailing bytes
  * in a UTF-8 sequence.
  */
-static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
+const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
   0x03C82080UL, 0xFA082080UL, 0x82082080UL };
 
 /*
@@ -216,7 +224,7 @@ static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080
  * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
  * for *legal* UTF-8 will be 4 or fewer bytes total.
  */
-static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
 
 /* --------------------------------------------------------------------- */
 
@@ -228,6 +236,8 @@ static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
 * into an inline function.
 */
 
+}  // namespace
+
 /* --------------------------------------------------------------------- */
 
 ConversionResult ConvertUTF16toUTF8 (const UTF16** sourceStart, const UTF16* sourceEnd,
@@ -299,6 +309,8 @@ return result;
 
 /* --------------------------------------------------------------------- */
 
+namespace {
+
 /*
  * Utility routine to tell whether a sequence of bytes is legal UTF-8.
  * This must be called with the length pre-determined by the first byte.
@@ -309,8 +321,7 @@ return result;
  * If presented with a length > 4, this returns false.  The Unicode
  * definition of UTF-8 goes up to 4-byte sequences.
  */
-
-static Boolean isLegalUTF8(const UTF8 *source, int length) {
+Boolean isLegalUTF8(const UTF8 *source, int length) {
   UTF8 a;
   const UTF8 *srcptr = source+length;
   switch (length) {
@@ -335,6 +346,8 @@ static Boolean isLegalUTF8(const UTF8 *source, int length) {
   return true;
 }
 
+}  // namespace
+
 /* --------------------------------------------------------------------- */
 
 /*
@@ -552,3 +565,5 @@ In UTF-8 writing code, the switches on "bytesToWrite" are
 similarly unrolled loops.
 
 --------------------------------------------------------------------- */
+
+}  // namespace google_breakpad
diff --git a/src/common/convert_UTF.h b/src/common/convert_UTF.h
index 644d0995..2f69495d 100644
--- a/src/common/convert_UTF.h
+++ b/src/common/convert_UTF.h
@@ -106,6 +106,8 @@ All should be unsigned values to avoid sign extension during
 bit mask & shift operations.
 ------------------------------------------------------------------------ */
 
+namespace google_breakpad {
+
 typedef unsigned long	UTF32;	/* at least 32 bits */
 typedef unsigned short	UTF16;	/* at least 16 bits */
 typedef unsigned char	UTF8;	/* typically 8 bits */
@@ -130,11 +132,6 @@ typedef enum {
 	lenientConversion
 } ConversionFlags;
 
-/* This is for C++ and does no harm in C */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 ConversionResult ConvertUTF8toUTF16 (const UTF8** sourceStart, const UTF8* sourceEnd,
                                      UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
 
@@ -155,9 +152,7 @@ ConversionResult ConvertUTF32toUTF16 (const UTF32** sourceStart, const UTF32* so
 
 Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
 
-#ifdef __cplusplus
-}
-#endif
+}  // namespace google_breakpad
 
 /* --------------------------------------------------------------------- */
author	Mike Frysinger <vapier@chromium.org>	2019-08-03 12:12:40 -0400
committer	Mike Frysinger <vapier@chromium.org>	2019-09-04 20:25:23 +0000
commit	db1cda26539c711c3da7ed4d410dfe8190e89b8f (patch)
tree	ece5c30abb27e436ada9a0b2a07a08a7eb7da938 /src/common
parent	codereview.settings: do not force squashing behavior (diff)
download	breakpad-db1cda26539c711c3da7ed4d410dfe8190e89b8f.tar.xz