unicode.c

Include dependency graph for unicode.c:

digraph {
    graph [bgcolor="#00000000"]
    node [shape=rectangle style=filled fillcolor="#FFFFFF" font=Helvetica padding=2]
    edge [color="#1414CE"]
    "2" [label="stdbool.h" tooltip="stdbool.h"]
    "4" [label="utils.h" tooltip="utils.h"]
    "8" [label="stdint.h" tooltip="stdint.h"]
    "6" [label="stdlib.h" tooltip="stdlib.h"]
    "3" [label="stddef.h" tooltip="stddef.h"]
    "1" [label="/__w/AtomVM/AtomVM/src/libAtomVM/unicode.c" tooltip="/__w/AtomVM/AtomVM/src/libAtomVM/unicode.c" fillcolor="#BFBFBF"]
    "7" [label="unicode.h" tooltip="unicode.h"]
    "5" [label="stdio.h" tooltip="stdio.h"]
    "4" -> "3" [dir=forward tooltip="include"]
    "4" -> "5" [dir=forward tooltip="include"]
    "4" -> "6" [dir=forward tooltip="include"]
    "1" -> "2" [dir=forward tooltip="include"]
    "1" -> "3" [dir=forward tooltip="include"]
    "1" -> "4" [dir=forward tooltip="include"]
    "1" -> "7" [dir=forward tooltip="include"]
    "7" -> "2" [dir=forward tooltip="include"]
    "7" -> "3" [dir=forward tooltip="include"]
    "7" -> "8" [dir=forward tooltip="include"]
}

Defines

UTF8_ACCEPT 0
UTF8_REJECT 12

Functions

static inline uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte)
enum UnicodeTransformDecodeResult unicode_utf8_decode(const uint8_t *buf, size_t len, uint32_t *c, size_t *out_size)

Decode a character from UTF-8.

Parameters:
  • buf – the buffer from which to decode the string

  • len – the length (in bytes) of the bytes in buf

  • c – int value to decode to

  • out_size – the size in bytes, on output (if not NULL)

Returns:

UnicodeTransformDecodeSuccess if decoding was successful, UnicodeTransformDecodeFail if character starting at buf is not a valid unicode character or UnicodeTransformDecodeIncomplete if character starting at buf is a valid but incomplete transformation

bool unicode_is_valid_utf8_buf(const uint8_t *buf, size_t len)
size_t unicode_buf_utf8_len(const uint8_t *buf, size_t buf_len)
bool unicode_buf_is_ascii(const uint8_t *buf, size_t len)
size_t unicode_latin1_buf_size_as_utf8(const uint8_t *buf, size_t len)

Variables

static const uint8_t utf8d[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,}