Libparserutils
inputstream.h
Go to the documentation of this file.
1/*
2 * This file is part of LibParserUtils.
3 * Licensed under the MIT License,
4 * http://www.opensource.org/licenses/mit-license.php
5 * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
6 */
7
8#ifndef parserutils_input_inputstream_h_
9#define parserutils_input_inputstream_h_
10
11#ifdef __cplusplus
12extern "C"
13{
14#endif
15
16#include <stdbool.h>
17#ifndef NDEBUG
18#include <stdio.h>
19#endif
20#include <stdlib.h>
21#include <inttypes.h>
22
23#include <parserutils/errors.h>
25#include <parserutils/types.h>
28
33 const uint8_t *data, size_t len,
34 uint16_t *mibenum, uint32_t *source);
35
47
48/* Create an input stream */
50 uint32_t encsrc, parserutils_charset_detect_func csdetect,
52/* Destroy an input stream */
55
56/* Append data to an input stream */
59 const uint8_t *data, size_t len);
60/* Insert data into stream at current location */
63 const uint8_t *data, size_t len);
64
65/* Slow form of css_inputstream_peek. */
68 size_t offset, const uint8_t **ptr, size_t *length);
69
92 parserutils_inputstream *stream, size_t offset,
93 const uint8_t **ptr, size_t *length)
94{
96 const parserutils_buffer *utf8;
97 const uint8_t *utf8_data;
98 size_t len, off, utf8_len;
99
100 if (stream == NULL || ptr == NULL || length == NULL)
101 return PARSERUTILS_BADPARM;
102
103#ifndef NDEBUG
104#ifdef VERBOSE_INPUTSTREAM
105 fprintf(stdout, "Peek: len: %zu cur: %u off: %zu\n",
106 stream->utf8->length, stream->cursor, offset);
107#endif
108#ifdef RANDOMISE_INPUTSTREAM
110#endif
111#endif
112
113 utf8 = stream->utf8;
114 utf8_data = utf8->data;
115 utf8_len = utf8->length;
116 off = stream->cursor + offset;
117
118#define IS_ASCII(x) (((x) & 0x80) == 0)
119
120 if (off < utf8_len) {
121 if (IS_ASCII(utf8_data[off])) {
122 /* Early exit for ASCII case */
123 (*length) = 1;
124 (*ptr) = (utf8_data + off);
125 return PARSERUTILS_OK;
126 } else {
128 utf8_data + off, &len);
129
130 if (error == PARSERUTILS_OK) {
131 (*length) = len;
132 (*ptr) = (utf8_data + off);
133 return PARSERUTILS_OK;
134 } else if (error != PARSERUTILS_NEEDDATA) {
135 return error;
136 }
137 }
138 }
139
140#undef IS_ASCII
141
142 return parserutils_inputstream_peek_slow(stream, offset, ptr, length);
143}
144
152 parserutils_inputstream *stream, size_t bytes)
153{
154 if (stream == NULL)
155 return;
156
157#if !defined(NDEBUG) && defined(VERBOSE_INPUTSTREAM)
158 fprintf(stdout, "Advance: len: %zu cur: %u bytes: %zu\n",
159 stream->utf8->length, stream->cursor, bytes);
160#endif
161
162 if (bytes > stream->utf8->length - stream->cursor)
163 bytes = stream->utf8->length - stream->cursor;
164
165 if (stream->cursor == stream->utf8->length)
166 return;
167
168 stream->cursor += bytes;
169}
170
171/* Read the document charset */
173 parserutils_inputstream *stream, uint32_t *source);
174/* Change the document charset */
177 const char *enc, uint32_t source);
178
179#ifdef __cplusplus
180}
181#endif
182
183#endif
184
parserutils_error parserutils_buffer_randomise(parserutils_buffer *buffer)
Definition buffer.c:249
size_t len
Definition codec_8859.c:23
parserutils_error
Definition errors.h:18
@ PARSERUTILS_OK
Definition errors.h:19
@ PARSERUTILS_NEEDDATA
Definition errors.h:25
@ PARSERUTILS_BADPARM
Definition errors.h:22
static void parserutils_inputstream_advance(parserutils_inputstream *stream, size_t bytes)
Advance the stream's current position.
parserutils_error parserutils_inputstream_insert(parserutils_inputstream *stream, const uint8_t *data, size_t len)
Insert data into stream at current location.
parserutils_error(* parserutils_charset_detect_func)(const uint8_t *data, size_t len, uint16_t *mibenum, uint32_t *source)
Type of charset detection function.
Definition inputstream.h:32
const char * parserutils_inputstream_read_charset(parserutils_inputstream *stream, uint32_t *source)
Read the source charset of the input stream.
parserutils_error parserutils_inputstream_create(const char *enc, uint32_t encsrc, parserutils_charset_detect_func csdetect, parserutils_inputstream **stream)
Create an input stream.
Definition inputstream.c:59
parserutils_error parserutils_inputstream_change_charset(parserutils_inputstream *stream, const char *enc, uint32_t source)
Change the source charset of the input stream.
#define IS_ASCII(x)
parserutils_error parserutils_inputstream_peek_slow(parserutils_inputstream *stream, size_t offset, const uint8_t **ptr, size_t *length)
Look at the character in the stream that starts at offset bytes from the cursor (slow version)
static parserutils_error parserutils_inputstream_peek(parserutils_inputstream *stream, size_t offset, const uint8_t **ptr, size_t *length)
Look at the character in the stream that starts at offset bytes from the cursor.
Definition inputstream.h:91
parserutils_error parserutils_inputstream_destroy(parserutils_inputstream *stream)
Destroy an input stream.
parserutils_error parserutils_inputstream_append(parserutils_inputstream *stream, const uint8_t *data, size_t len)
Append data to an input stream.
uint8_t * data
Definition buffer.h:22
Input stream object.
Definition inputstream.h:40
parserutils_buffer * utf8
Buffer containing UTF-8 data.
Definition inputstream.h:41
uint32_t cursor
Byte offset of current position.
Definition inputstream.h:43
bool had_eof
Whether EOF has been reached.
Definition inputstream.h:45
UTF-8 manipulation functions (interface).
parserutils_error parserutils_charset_utf8_char_byte_length(const uint8_t *s, size_t *len)
Calculate the length (in bytes) of a UTF-8 character.
Definition utf8.c:107