80 lines
2.8 KiB
C++
Executable File
80 lines
2.8 KiB
C++
Executable File
#ifndef UTF_H
|
|
#define UTF_H
|
|
|
|
#include <cstdint>
|
|
#include <string>
|
|
#include <exception>
|
|
#include <sstream>
|
|
|
|
/**
|
|
* @brief Класс исключения неправильной перекодировки
|
|
* @detailed Как правило исключение этого типа означает, что исходная строка
|
|
* не соответствует указанной кодировке
|
|
*/
|
|
class bad_conversion: public std::exception
|
|
{
|
|
private:
|
|
std::string _msg;
|
|
char32_t _badSymbol;
|
|
|
|
public:
|
|
bad_conversion(const char* msg, char32_t badData): _badSymbol(badData)
|
|
{
|
|
std::stringstream errStream;
|
|
errStream << msg << " bad symbol: " << _badSymbol;
|
|
_msg = errStream.str();
|
|
}
|
|
|
|
virtual ~bad_conversion() throw() {}
|
|
|
|
virtual const char* what() const throw()
|
|
{
|
|
return _msg.c_str();
|
|
}
|
|
};
|
|
|
|
#define UTF8_BOM_SIZE 3
|
|
#define UTF16_BOM_SIZE 2
|
|
#define UTF32_BOM_SIZE 4
|
|
|
|
extern const unsigned char g_utf8_bom[UTF8_BOM_SIZE];
|
|
extern const unsigned char g_utf16le_bom[UTF16_BOM_SIZE];
|
|
extern const unsigned char g_utf16be_bom[UTF16_BOM_SIZE];
|
|
extern const unsigned char g_utf32le_bom[UTF32_BOM_SIZE];
|
|
extern const unsigned char g_utf32be_bom[UTF32_BOM_SIZE];
|
|
|
|
enum byte_order
|
|
{
|
|
BYTE_ORDER_LITTLE_ENDIAN,
|
|
BYTE_ORDER_BIG_ENDIAN
|
|
};
|
|
|
|
byte_order current_byte_order();
|
|
inline char16_t invert_byte_order_16(char16_t val);
|
|
inline char32_t invert_byte_order_32(char32_t val);
|
|
|
|
std::size_t utf8_first_byte_mask(std::size_t symbolSize);
|
|
const char* utf8_skip_bom(const char* str);
|
|
const char16_t* utf16_skip_bom(const char16_t* str);
|
|
const char32_t* utf32_skip_bom(const char32_t* str);
|
|
std::size_t significant_bits(uint32_t v);
|
|
void utf32_strcpy_with_convert_byteorder(const char32_t* src, char32_t* dst, std::size_t num, byte_order byteOrder = BYTE_ORDER_LITTLE_ENDIAN);
|
|
|
|
void utf8_to_ucs4(const char* src, char32_t* dst, std::size_t symbols, bool needUnescape);
|
|
void ucs4_to_utf8(const char32_t* src, char* dst, std::size_t symbols);
|
|
void utf16_to_ucs4(const char16_t* src, char32_t* dst, std::size_t symbols, byte_order byteOrder = BYTE_ORDER_LITTLE_ENDIAN);
|
|
void ucs4_to_utf16(const char32_t* src, char16_t* dst, std::size_t symbols, byte_order byteOrder = BYTE_ORDER_LITTLE_ENDIAN);
|
|
|
|
std::size_t utf8_str_len(const char* str, bool needUnescape);
|
|
std::size_t utf8_get_symbol_size(const char* str, bool needUnescape);
|
|
std::size_t utf16_str_len(const char16_t* str, byte_order byteOrder = BYTE_ORDER_LITTLE_ENDIAN);
|
|
std::size_t utf16_get_symbol_size(const char16_t* str, byte_order byteOrder = BYTE_ORDER_LITTLE_ENDIAN);
|
|
std::size_t utf32_str_len(const char32_t* str);
|
|
|
|
std::size_t ucs4_get_utf8_symbol_size(const char32_t symbol);
|
|
std::size_t ucs4_get_utf8_str_bytes(const char32_t* str);
|
|
std::size_t ucs4_get_utf16_symbol_size(const char32_t symbol);
|
|
std::size_t ucs4_get_utf16_str_bytes(const char32_t* str);
|
|
|
|
#endif // UTF_H
|