-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathucode8.h
84 lines (71 loc) · 2.1 KB
/
ucode8.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#ifndef _H_UCODE8
#define _H_UCODE8
#include <cstdint>
#include <string>
//! return number of character units consumed, and unicode code number
unsigned int ucode8Fore(
char const* cpt, // buffer pointer
unsigned int slen, // size
char32_t& d // 32-bit unicode value
);
//! return number of character units consumed, and unicode code number
unsigned int
ucode16Fore(
char16_t const* cp,
unsigned int slen,
char32_t& uc);
//! Encode char32_t into null terminated string of char_t, return length
struct EncodeUTF8 {
//! one to several bytes result
char result[8];
//! Turn Unicode 32 character into multibyte UTF-8 string
unsigned int encode(const char32_t d);
};
//! Encode char32_t , return number of char16_t units
struct EncodeUTF16 {
//! one or 2 character utf-16 result
char16_t result[2];
//! return count of encoded length put in result
unsigned int encode(const char32_t d);
};
//! Arbitrary enum for a few Byte-Order-Marks sequences
enum BOM_CODE {
BAD_BOM = -1,
NO_BOM = 0,
UTF_8 = 1,
UTF_16LE = 2,
UTF_16BE = 3,
UTF_32LE = 4,
UTF_32BE = 5
};
/*! This little class found on the net, to detect native endian direction.
If it isn't big, presume its little.
*/
class TestEndian {
private:
union {
uint32_t i;
char c[4];
} bint = {0x01020304};
public:
//! True if most signifcant at lowest memory address.
bool isBigEnd() const {
return bint.c[0] == 1;
}
};
const char32_t INVALID_CHAR = (char32_t) -1;
const char* getBOMName(BOM_CODE code);
//! byte by byte check
BOM_CODE getBOMCode(const char* sptr, unsigned int len);
//! 16-bit character buffer from UTF-16 on this platform, to UTF-8
bool convertUTF16(char16_t const* cp, unsigned int wlen, std::string& output);
//! Result is returned in std::string, because PHP uses string for buffer storage.
int toUTF16(const std::string& input, std::string& output);
void swap16buffer(char16_t* cp, unsigned int wlen);
/*!
* Check the value of input for encoding, and make sure it is UTF-8
* Convert if possible.
* Return an offset to end of BOM , if any remains
*/
unsigned int ensureUTF8(std::string& input);
#endif