Skip to content

Commit d8c4a84

Browse files
authored
io: add a string_reader submodule (#20893)
1 parent a9c1042 commit d8c4a84

File tree

6 files changed

+454
-11
lines changed

6 files changed

+454
-11
lines changed

vlib/io/buffered_reader.v

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ pub struct BufferedReaderConfig {
2424
// BufferedReadLineConfig are options that can be given to the read_line() function.
2525
@[params]
2626
pub struct BufferedReadLineConfig {
27+
pub:
2728
delim u8 = `\n` // line delimiter
2829
}
2930

vlib/io/reader_test.v

+10-10
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ fn test_read_all_huge() {
3838
assert res == '123'.repeat(100000).bytes()
3939
}
4040

41-
struct StringReader {
41+
struct StringReaderTest {
4242
text string
4343
mut:
4444
place int
4545
}
4646

47-
fn (mut s StringReader) read(mut buf []u8) !int {
47+
fn (mut s StringReaderTest) read(mut buf []u8) !int {
4848
if s.place >= s.text.len {
4949
return Eof{}
5050
}
@@ -55,9 +55,9 @@ fn (mut s StringReader) read(mut buf []u8) !int {
5555

5656
const newline_count = 100000
5757

58-
fn test_stringreader() {
58+
fn test_stringreadertest() {
5959
text := '12345\n'.repeat(io.newline_count)
60-
mut s := StringReader{
60+
mut s := StringReaderTest{
6161
text: text
6262
}
6363
mut r := new_buffered_reader(reader: s)
@@ -80,9 +80,9 @@ fn test_stringreader() {
8080
}
8181
}
8282

83-
fn test_stringreader2() {
83+
fn test_stringreadertest2() {
8484
text := '12345\r\n'.repeat(io.newline_count)
85-
mut s := StringReader{
85+
mut s := StringReaderTest{
8686
text: text
8787
}
8888
mut r := new_buffered_reader(reader: s)
@@ -107,7 +107,7 @@ fn test_stringreader2() {
107107

108108
fn test_leftover() {
109109
text := 'This is a test\r\nNice try!'
110-
mut s := StringReader{
110+
mut s := StringReaderTest{
111111
text: text
112112
}
113113
mut r := new_buffered_reader(reader: s)
@@ -129,7 +129,7 @@ fn test_leftover() {
129129

130130
fn test_totalread_read() {
131131
text := 'Some testing text'
132-
mut s := StringReader{
132+
mut s := StringReaderTest{
133133
text: text
134134
}
135135
mut r := new_buffered_reader(reader: s)
@@ -145,7 +145,7 @@ fn test_totalread_read() {
145145

146146
fn test_totalread_readline() {
147147
text := 'Some testing text\nmore_enters'
148-
mut s := StringReader{
148+
mut s := StringReaderTest{
149149
text: text
150150
}
151151
mut r := new_buffered_reader(reader: s)
@@ -164,7 +164,7 @@ fn test_totalread_readline() {
164164

165165
fn test_read_line_until_zero_terminated() {
166166
text := 'This is a test\0Nice try!\0'
167-
mut s := StringReader{
167+
mut s := StringReaderTest{
168168
text: text
169169
}
170170
mut r := new_buffered_reader(reader: s)

vlib/io/string_reader/string_reader.v

+292
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
module string_reader
2+
3+
import io
4+
import strings
5+
6+
@[params]
7+
pub struct StringReaderParams {
8+
// the reader interface
9+
reader ?io.Reader
10+
// initialize the builder with this source string
11+
source ?string
12+
// if no source is given the string builder is initialized with this size
13+
initial_size int
14+
}
15+
16+
// StringReader is able to read data from a Reader interface and/or source string to a dynamically
17+
// growing buffer using a string builder. Unlike the BufferedReader, StringReader will
18+
// keep the entire contents of the buffer in memory, allowing the incoming data to be reused
19+
// and read in an efficient matter. The StringReader will not set a maximum capacity to the string
20+
// builders buffer and could grow very large.
21+
pub struct StringReader {
22+
mut:
23+
reader ?io.Reader
24+
offset int // current offset in the buffer
25+
pub mut:
26+
end_of_stream bool // whether we reached the end of the upstream reader
27+
builder strings.Builder
28+
}
29+
30+
// new creates a new StringReader and sets the string builder size to `initial_size`.
31+
// If a source
32+
pub fn StringReader.new(params StringReaderParams) StringReader {
33+
mut r := StringReader{
34+
reader: params.reader
35+
}
36+
37+
if source := params.source {
38+
r.builder = strings.new_builder(source.len)
39+
r.builder.write_string(source)
40+
} else {
41+
r.builder = strings.new_builder(params.initial_size)
42+
}
43+
44+
return r
45+
}
46+
47+
// needs_fill returns whether the buffer needs refilling
48+
pub fn (r StringReader) needs_fill() bool {
49+
return r.offset >= r.builder.len
50+
}
51+
52+
// needs_fill_until returns whether the buffer needs refilling in order to read
53+
// `n` bytes
54+
pub fn (r StringReader) needs_fill_until(n int) bool {
55+
return r.offset + n >= r.builder.len
56+
}
57+
58+
// fill_bufer tries to read data into the buffer until either a 0 length read or if read_to_end_of_stream
59+
// is true then the end of the stream. It returns the number of bytes read
60+
pub fn (mut r StringReader) fill_buffer(read_till_end_of_stream bool) !int {
61+
if r.end_of_stream {
62+
return io.Eof{}
63+
}
64+
65+
mut reader := r.reader or { return error('reader is not set') }
66+
67+
start := r.builder.len
68+
mut end := start
69+
70+
// make sure there is enough room in the string builder
71+
unsafe { r.builder.grow_len(io.read_all_len) }
72+
defer {
73+
// shrink the length of the buffer to the total of bytes read
74+
r.builder.go_back(r.builder.len - end)
75+
}
76+
77+
for {
78+
read := reader.read(mut r.builder[start..]) or {
79+
r.end_of_stream = true
80+
break
81+
}
82+
end += read
83+
84+
if !read_till_end_of_stream && read == 0 {
85+
break
86+
} else if r.builder.len == end {
87+
unsafe { r.builder.grow_len(io.read_all_grow_len) }
88+
}
89+
}
90+
91+
if end == start {
92+
return io.Eof{}
93+
}
94+
95+
return end - start
96+
}
97+
98+
// fill_buffer_until tries read `n` amount of bytes from the reader into the buffer
99+
// and returns the actual number of bytes read
100+
pub fn (mut r StringReader) fill_buffer_until(n int) !int {
101+
if r.end_of_stream {
102+
return io.Eof{}
103+
}
104+
105+
mut reader := r.reader or { return error('reader is not set') }
106+
107+
start := r.builder.len
108+
// make sure there is enough room in the string builder
109+
if n > io.read_all_len {
110+
unsafe { r.builder.grow_len(io.read_all_len) }
111+
} else {
112+
unsafe { r.builder.grow_len(n) }
113+
}
114+
115+
mut end := start
116+
for {
117+
read := reader.read(mut r.builder[start..]) or {
118+
r.end_of_stream = true
119+
break
120+
}
121+
end += read
122+
123+
if read == 0 || end - start == n {
124+
break
125+
} else if r.builder.len == end {
126+
if n - end > io.read_all_grow_len {
127+
unsafe { r.builder.grow_len(io.read_all_grow_len) }
128+
} else {
129+
unsafe { r.builder.grow_len(n - end) }
130+
}
131+
}
132+
}
133+
134+
if end == start {
135+
return io.Eof{}
136+
}
137+
return end - start
138+
}
139+
140+
// read_all_bytes reads all bytes from a reader until either a 0 length read or if read_to_end_of_stream
141+
// is true then the end of the stream. It returns a copy of the read data
142+
pub fn (mut r StringReader) read_all_bytes(read_till_end_of_stream bool) ![]u8 {
143+
start := r.offset
144+
// ignore Eof error from fill buffer
145+
r.fill_buffer(read_till_end_of_stream) or {}
146+
r.offset = r.builder.len
147+
// check if there was still data in the buffer, but the reader has reached its end of stream
148+
if start == r.offset {
149+
return io.Eof{}
150+
}
151+
152+
return r.get_part(start, r.offset - start)!
153+
}
154+
155+
// read_all reads all bytes from a reader until either a 0 length read or if read_to_end_of_stream
156+
// is true then the end of the stream. It produces a string from the read data
157+
pub fn (mut r StringReader) read_all(read_till_end_of_stream bool) !string {
158+
buf := r.read_all_bytes(read_till_end_of_stream)!
159+
return unsafe { tos(buf.data, buf.len) }
160+
}
161+
162+
// read_bytes tries to read n amount of bytes from the reader
163+
pub fn (mut r StringReader) read_bytes(n int) ![]u8 {
164+
start := r.offset
165+
166+
if r.needs_fill_until(n) {
167+
actual_read := r.fill_buffer_until(n - (r.builder.len - r.offset))!
168+
r.offset += actual_read
169+
} else {
170+
r.offset += n
171+
}
172+
173+
return r.get_part(start, r.offset - start)!
174+
}
175+
176+
// read_bytes tries to read `n` amount of bytes from the reader and produces a string
177+
// from the read data
178+
pub fn (mut r StringReader) read_string(n int) !string {
179+
buf := r.read_bytes(n)!
180+
return unsafe { tos(buf.data, buf.len) }
181+
}
182+
183+
// read implements the Reader interface
184+
pub fn (mut r StringReader) read(mut buf []u8) !int {
185+
start := r.offset
186+
187+
read := r.fill_buffer_until(buf.len - start)!
188+
r.offset += read
189+
190+
copy(mut buf, r.builder[start..read])
191+
return r.builder.len - start
192+
}
193+
194+
// read_line attempts to read a line from the reader.
195+
// It will read until it finds the specified line delimiter
196+
// such as (\n, the default or \0) or the end of stream.
197+
@[direct_array_access]
198+
pub fn (mut r StringReader) read_line(config io.BufferedReadLineConfig) !string {
199+
if r.end_of_stream && r.needs_fill() {
200+
return io.Eof{}
201+
}
202+
203+
start := r.offset
204+
for {
205+
if r.needs_fill() {
206+
r.fill_buffer(false) or {
207+
// we are at the end of the stream
208+
if r.offset == start {
209+
return io.Eof{}
210+
}
211+
return r.get_string_part(start, r.offset - start)!
212+
}
213+
}
214+
// try to find a newline character
215+
mut i := r.offset
216+
for ; i < r.builder.len; i++ {
217+
c := r.builder[i]
218+
if c == config.delim {
219+
// great, we hit something
220+
// do some checking for whether we hit \r\n or just \n
221+
mut x := i
222+
if i != 0 && config.delim == `\n` && r.builder[i - 1] == `\r` {
223+
x--
224+
}
225+
r.offset = i + 1
226+
return r.get_string_part(start, x - start)!
227+
}
228+
}
229+
r.offset = i
230+
}
231+
232+
return io.Eof{}
233+
}
234+
235+
// write implements the Writer interface
236+
pub fn (mut r StringReader) write(buf []u8) !int {
237+
return r.builder.write(buf)!
238+
}
239+
240+
// get_data returns a copy of the buffer
241+
@[inline]
242+
pub fn (r StringReader) get_data() []u8 {
243+
unsafe {
244+
mut x := malloc_noscan(r.builder.len)
245+
vmemcpy(x, &u8(r.builder.data), r.builder.len)
246+
return x.vbytes(r.builder.len)
247+
}
248+
}
249+
250+
// get get_part returns a copy of a part of the buffer from `start` till `start` + `n`
251+
pub fn (r StringReader) get_part(start int, n int) ![]u8 {
252+
if start + n > r.builder.len {
253+
return io.Eof{}
254+
}
255+
256+
unsafe {
257+
mut x := malloc_noscan(n)
258+
vmemcpy(x, &u8(r.builder.data) + start, n)
259+
return x.vbytes(n)
260+
}
261+
}
262+
263+
// get_string produces a string from all the bytes in the buffer
264+
@[inline]
265+
pub fn (r StringReader) get_string() string {
266+
return r.builder.spart(0, r.builder.len)
267+
}
268+
269+
// get_string_part produces a string from `start` till `start` + `n` of the buffer
270+
pub fn (r StringReader) get_string_part(start int, n int) !string {
271+
if start + n > r.builder.len {
272+
return io.Eof{}
273+
}
274+
275+
return r.builder.spart(start, n)
276+
}
277+
278+
// flush clears the stringbuilder and returns the resulting string and the stringreaders
279+
// offset is reset to 0
280+
pub fn (mut r StringReader) flush() string {
281+
r.offset = 0
282+
return r.builder.str()
283+
}
284+
285+
// free frees the memory block used for the string builders buffer,
286+
// a new string builder with size 0 is initialized and the stringreaders offset is reset to 0
287+
@[unsafe]
288+
pub fn (mut r StringReader) free() {
289+
unsafe { r.builder.free() }
290+
r.builder = strings.new_builder(0)
291+
r.offset = 0
292+
}

0 commit comments

Comments
 (0)