Static Public Member Functions | List of all members
utf_decoder< Traits, opt_swap > Struct Template Reference

Static Public Member Functions

static Traits::value_type decode_utf8_block (const uint8_t *data, size_t size, typename Traits::value_type result)
 
static Traits::value_type decode_utf16_block (const uint16_t *data, size_t size, typename Traits::value_type result)
 
static Traits::value_type decode_utf32_block (const uint32_t *data, size_t size, typename Traits::value_type result)
 
static Traits::value_type decode_latin1_block (const uint8_t *data, size_t size, typename Traits::value_type result)
 
static Traits::value_type decode_wchar_block_impl (const uint16_t *data, size_t size, typename Traits::value_type result)
 
static Traits::value_type decode_wchar_block_impl (const uint32_t *data, size_t size, typename Traits::value_type result)
 
static Traits::value_type decode_wchar_block (const wchar_t *data, size_t size, typename Traits::value_type result)
 

Detailed Description

template<typename Traits, typename opt_swap = opt_false>
struct utf_decoder< Traits, opt_swap >

Definition at line 952 of file pugixml.cpp.

Member Function Documentation

◆ decode_latin1_block()

template<typename Traits , typename opt_swap = opt_false>
static Traits::value_type utf_decoder< Traits, opt_swap >::decode_latin1_block ( const uint8_t data,
size_t  size,
typename Traits::value_type  result 
)
inlinestatic

Definition at line 1085 of file pugixml.cpp.

1086 {
1087 for (size_t i = 0; i < size; ++i)
1088 {
1089 result = Traits::low(result, data[i]);
1090 }
1091
1092 return result;
1093 }

Referenced by convert_buffer_latin1().

◆ decode_utf16_block()

template<typename Traits , typename opt_swap = opt_false>
static Traits::value_type utf_decoder< Traits, opt_swap >::decode_utf16_block ( const uint16_t data,
size_t  size,
typename Traits::value_type  result 
)
inlinestatic

Definition at line 1016 of file pugixml.cpp.

1017 {
1018 const uint16_t* end = data + size;
1019
1020 while (data < end)
1021 {
1022 unsigned int lead = opt_swap::value ? endian_swap(*data) : *data;
1023
1024 // U+0000..U+D7FF
1025 if (lead < 0xD800)
1026 {
1027 result = Traits::low(result, lead);
1028 data += 1;
1029 }
1030 // U+E000..U+FFFF
1031 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1032 {
1033 result = Traits::low(result, lead);
1034 data += 1;
1035 }
1036 // surrogate pair lead
1037 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)
1038 {
1039 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1040
1041 if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1042 {
1043 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1044 data += 2;
1045 }
1046 else
1047 {
1048 data += 1;
1049 }
1050 }
1051 else
1052 {
1053 data += 1;
1054 }
1055 }
1056
1057 return result;
1058 }
PUGI__NS_END PUGI__NS_BEGIN uint16_t endian_swap(uint16_t value)
Definition pugixml.cpp:752
unsigned short uint16_t
Definition stdint_msvc.h:80

References endian_swap().

Referenced by convert_buffer_utf16(), and utf_decoder< Traits, opt_swap >::decode_wchar_block_impl().

◆ decode_utf32_block()

template<typename Traits , typename opt_swap = opt_false>
static Traits::value_type utf_decoder< Traits, opt_swap >::decode_utf32_block ( const uint32_t data,
size_t  size,
typename Traits::value_type  result 
)
inlinestatic

Definition at line 1060 of file pugixml.cpp.

1061 {
1062 const uint32_t* end = data + size;
1063
1064 while (data < end)
1065 {
1066 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1067
1068 // U+0000..U+FFFF
1069 if (lead < 0x10000)
1070 {
1071 result = Traits::low(result, lead);
1072 data += 1;
1073 }
1074 // U+10000..U+10FFFF
1075 else
1076 {
1077 result = Traits::high(result, lead);
1078 data += 1;
1079 }
1080 }
1081
1082 return result;
1083 }
unsigned int uint32_t
Definition stdint_msvc.h:81

References endian_swap().

Referenced by convert_buffer_utf32(), and utf_decoder< Traits, opt_swap >::decode_wchar_block_impl().

◆ decode_utf8_block()

template<typename Traits , typename opt_swap = opt_false>
static Traits::value_type utf_decoder< Traits, opt_swap >::decode_utf8_block ( const uint8_t data,
size_t  size,
typename Traits::value_type  result 
)
inlinestatic

Definition at line 954 of file pugixml.cpp.

955 {
956 const uint8_t utf8_byte_mask = 0x3f;
957
958 while (size)
959 {
960 uint8_t lead = *data;
961
962 // 0xxxxxxx -> U+0000..U+007F
963 if (lead < 0x80)
964 {
965 result = Traits::low(result, lead);
966 data += 1;
967 size -= 1;
968
969 // process aligned single-byte (ascii) blocks
970 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
971 {
972 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
973 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
974 {
975 result = Traits::low(result, data[0]);
976 result = Traits::low(result, data[1]);
977 result = Traits::low(result, data[2]);
978 result = Traits::low(result, data[3]);
979 data += 4;
980 size -= 4;
981 }
982 }
983 }
984 // 110xxxxx -> U+0080..U+07FF
985 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
986 {
987 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
988 data += 2;
989 size -= 2;
990 }
991 // 1110xxxx -> U+0800-U+FFFF
992 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
993 {
994 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
995 data += 3;
996 size -= 3;
997 }
998 // 11110xxx -> U+10000..U+10FFFF
999 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1000 {
1001 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1002 data += 4;
1003 size -= 4;
1004 }
1005 // 10xxxxxx or 11111xxx -> invalid
1006 else
1007 {
1008 data += 1;
1009 size -= 1;
1010 }
1011 }
1012
1013 return result;
1014 }
_W64 unsigned int uintptr_t
unsigned char uint8_t
Definition stdint_msvc.h:79

Referenced by as_wide_impl(), and convert_buffer_output().

◆ decode_wchar_block()

template<typename Traits , typename opt_swap = opt_false>
static Traits::value_type utf_decoder< Traits, opt_swap >::decode_wchar_block ( const wchar_t *  data,
size_t  size,
typename Traits::value_type  result 
)
inlinestatic

Definition at line 1105 of file pugixml.cpp.

1106 {
1107 return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);
1108 }
static Traits::value_type decode_wchar_block_impl(const uint16_t *data, size_t size, typename Traits::value_type result)
Definition pugixml.cpp:1095

References utf_decoder< Traits, opt_swap >::decode_wchar_block_impl().

Referenced by as_utf8_begin(), and as_utf8_end().

◆ decode_wchar_block_impl() [1/2]

template<typename Traits , typename opt_swap = opt_false>
static Traits::value_type utf_decoder< Traits, opt_swap >::decode_wchar_block_impl ( const uint16_t data,
size_t  size,
typename Traits::value_type  result 
)
inlinestatic

Definition at line 1095 of file pugixml.cpp.

1096 {
1097 return decode_utf16_block(data, size, result);
1098 }
static Traits::value_type decode_utf16_block(const uint16_t *data, size_t size, typename Traits::value_type result)
Definition pugixml.cpp:1016

References utf_decoder< Traits, opt_swap >::decode_utf16_block().

Referenced by utf_decoder< Traits, opt_swap >::decode_wchar_block().

◆ decode_wchar_block_impl() [2/2]

template<typename Traits , typename opt_swap = opt_false>
static Traits::value_type utf_decoder< Traits, opt_swap >::decode_wchar_block_impl ( const uint32_t data,
size_t  size,
typename Traits::value_type  result 
)
inlinestatic

Definition at line 1100 of file pugixml.cpp.

1101 {
1102 return decode_utf32_block(data, size, result);
1103 }
static Traits::value_type decode_utf32_block(const uint32_t *data, size_t size, typename Traits::value_type result)
Definition pugixml.cpp:1060

References utf_decoder< Traits, opt_swap >::decode_utf32_block().


The documentation for this struct was generated from the following file:

Generated on Thu May 22 2025 08:23:50 for QuickFIX by doxygen 1.9.8 written by Dimitri van Heesch, © 1997-2001