Loading...
Searching...
No Matches
json.h
Go to the documentation of this file.
1// BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
2
3#ifndef AWKWARD_IO_JSON_H_
4#define AWKWARD_IO_JSON_H_
5
6#include <complex>
7#include <cstdio>
8#include <string>
9
10#include "awkward/common.h"
15#include "awkward/util.h"
16
17namespace awkward {
23 public:
24 virtual int64_t read(int64_t num_bytes, char* buffer) = 0;
25 };
26
43 EXPORT_SYMBOL void
45 ArrayBuilder& builder,
46 int64_t buffersize,
47 bool read_one,
48 const char* nan_string = nullptr,
49 const char* posinf_string = nullptr,
50 const char* neginf_string = nullptr);
51
53 public:
55 int64_t buffersize,
56 bool read_one,
57 const char* nan_string,
58 const char* posinf_string,
59 const char* neginf_string,
60 const char* jsonassembly,
61 int64_t initial,
62 double resize);
63
64 // Delete copy constructor
66
67 // Delete copy-assignment constructor
69
71 inline int64_t current_stack_depth() const noexcept {
72 return current_stack_depth_;
73 }
74
76 inline int64_t current_instruction() const noexcept {
77 return current_instruction_;
78 }
79
81 inline int64_t instruction() const noexcept {
82 return instructions_.data()[current_instruction_ * 4];
83 }
84
86 inline int64_t argument1() const noexcept {
87 return instructions_.data()[current_instruction_ * 4 + 1];
88 }
89
91 inline int64_t argument2() const noexcept {
92 return instructions_.data()[current_instruction_ * 4 + 2];
93 }
94
96 inline int64_t argument3() const noexcept {
97 return instructions_.data()[current_instruction_ * 4 + 3];
98 }
99
101 inline void step_forward() noexcept {
102 current_instruction_++;
103 }
104
106 inline void step_backward() noexcept {
107 current_instruction_--;
108 }
109
111 inline void push_stack(int64_t jump_to) noexcept {
112 instruction_stack_.data()[current_stack_depth_] = current_instruction_;
113 current_stack_depth_++;
114 current_instruction_ = jump_to;
115 }
116
118 inline void pop_stack() noexcept {
119 current_stack_depth_--;
120 current_instruction_ = instruction_stack_.data()[current_stack_depth_];
121 }
122
124 inline int64_t find_enum(const char* str) noexcept {
125 int64_t* offsets = string_offsets_.data();
126 char* chars = characters_.data();
127 int64_t stringsstart = argument2();
128 int64_t start;
129 int64_t stop;
130 for (int64_t i = stringsstart; i < argument3(); i++) {
131 start = offsets[i];
132 stop = offsets[i + 1];
133 if (strncmp(str, &chars[start], (size_t)(stop - start)) == 0) {
134 return i - stringsstart;
135 }
136 }
137 return -1;
138 }
139
141 inline int64_t find_key(const char* str) noexcept {
142 int64_t* offsets = string_offsets_.data();
143 char* chars = characters_.data();
144 int64_t i;
145 int64_t j;
146 int64_t stringi;
147 int64_t start;
148 int64_t stop;
149 uint64_t chunkmask;
150 // optimistic: fields in data are in the order specified by the schema
151 if (argument1() != 0) {
152 // increment the current (last seen) field with wrap-around
153 record_current_field_[argument2()]++;
154 if (record_current_field_[argument2()] == argument1()) {
155 record_current_field_[argument2()] = 0;
156 }
157 j = record_current_field_[argument2()];
158 // use the record_current_field_ (as j)
159 i = current_instruction_ + 1 + j;
160 stringi = instructions_.data()[i * 4 + 1];
161 start = offsets[stringi];
162 stop = offsets[stringi + 1];
163 if (strncmp(str, &chars[start], (size_t)(stop - start)) == 0) {
164 // ensure that the checklist bit is 1
165 chunkmask = (uint64_t)1 << (j & 0x3f);
166 if ((record_checklist_[argument2()][j >> 6] & chunkmask) == 0) {
167 return -1; // ignore the value of a duplicate key
168 }
169 // set the checklist bit to 0
170 record_checklist_[argument2()][j >> 6] &= ~chunkmask;
171 return key_instruction_at(i);
172 }
173 }
174 // pessimistic: try all field names, starting from the first
175 for (i = current_instruction_ + 1; i <= current_instruction_ + argument1(); i++) {
176 // not including the one optimistic trial
177 if (i != current_instruction_ + 1 + record_current_field_[argument2()]) {
178 stringi = instructions_.data()[i * 4 + 1];
179 start = offsets[stringi];
180 stop = offsets[stringi + 1];
181 if (strncmp(str, &chars[start], (size_t)(stop - start)) == 0) {
182 // set the record_current_field_
183 j = i - (current_instruction_ + 1);
184 record_current_field_[argument2()] = j;
185 // ensure that the checklist bit is 1
186 chunkmask = (uint64_t)1 << (j & 0x3f);
187 if ((record_checklist_[argument2()][j >> 6] & chunkmask) == 0) {
188 return -1; // ignore the value of a duplicate key
189 }
190 // set the checklist bit to 0
191 record_checklist_[argument2()][j >> 6] &= ~chunkmask;
192 return key_instruction_at(i);
193 }
194 }
195 }
196 return -1;
197 }
198
200 inline bool key_already_filled(int64_t record_identifier, int64_t j) const noexcept {
201 uint64_t chunkmask = (uint64_t)1 << (j & 0x3f);
202 return (record_checklist_[record_identifier][j >> 6] & chunkmask) == 0;
203 }
204
206 inline int64_t key_instruction_at(int64_t i) const noexcept {
207 return instructions_.data()[i * 4 + 2];
208 }
209
211 inline void start_object(int64_t keytableheader_instruction) noexcept {
212 int64_t record_identifier = instructions_.data()[keytableheader_instruction * 4 + 2];
213 record_checklist_[record_identifier].assign(
214 record_checklist_init_[record_identifier].begin(),
215 record_checklist_init_[record_identifier].end()
216 );
217 }
218
220 inline bool end_object(int64_t keytableheader_instruction) const noexcept {
221 int64_t record_identifier = instructions_.data()[keytableheader_instruction * 4 + 2];
222 uint64_t should_be_zero = 0;
223 for (uint64_t chunk : record_checklist_[record_identifier]) {
224 should_be_zero |= chunk;
225 }
226 return should_be_zero == 0;
227 }
228
230 inline void write_int8(int64_t index, int8_t x) noexcept {
231 buffers_uint8_[(size_t)index].append(*reinterpret_cast<uint8_t*>(&x));
232 }
233
235 inline void write_uint8(int64_t index, uint8_t x) noexcept {
236 buffers_uint8_[(size_t)index].append(x);
237 }
238
240 inline void write_many_uint8(int64_t index, int64_t num_items, const uint8_t* values) noexcept {
241 buffers_uint8_[(size_t)index].extend(values, (size_t)num_items);
242 }
243
245 inline void write_int64(int64_t index, int64_t x) noexcept {
246 buffers_int64_[(size_t)index].append(x);
247 }
248
250 inline void write_uint64(int64_t index, uint64_t x) noexcept {
251 buffers_int64_[(size_t)index].append(static_cast<int64_t>(x));
252 }
253
255 inline void write_add_int64(int64_t index, int64_t x) noexcept {
256 buffers_int64_[(size_t)index].append(buffers_int64_[(size_t)index].last() + x);
257 }
258
260 inline void write_float64(int64_t index, double x) noexcept {
261 buffers_float64_[(size_t)index].append(x);
262 }
263
265 inline int64_t get_and_increment(int64_t index) noexcept {
266 return counters_[(size_t)index]++;
267 }
268
270 int64_t length() const noexcept {
271 return length_;
272 }
273
275 inline void add_to_length(int64_t length) noexcept {
276 length_ += length;
277 }
278
280 std::string debug() const noexcept;
281
283 int64_t num_outputs() const {
284 return (int64_t)output_names_.size();
285 }
286
288 std::string output_name(int64_t i) const {
289 return output_names_[(size_t)i];
290 }
291
293 std::string output_dtype(int64_t i) const {
294 switch (output_dtypes_[(size_t)i]) {
295 case util::dtype::int8:
296 return "int8";
297 case util::dtype::uint8:
298 return "uint8";
299 case util::dtype::int64:
300 return "int64";
301 case util::dtype::float64:
302 return "float64";
303 default:
304 return "unknown";
305 }
306 }
307
309 int64_t output_num_items(int64_t i) const {
310 switch (output_dtypes_[(size_t)i]) {
311 case util::dtype::int8:
312 return (int64_t)buffers_uint8_[(size_t)output_which_[(size_t)i]].nbytes();
313 case util::dtype::uint8:
314 return (int64_t)buffers_uint8_[(size_t)output_which_[(size_t)i]].nbytes();
315 case util::dtype::int64:
316 return (int64_t)buffers_int64_[(size_t)output_which_[(size_t)i]].nbytes() / 8;
317 case util::dtype::float64:
318 return (int64_t)buffers_float64_[(size_t)output_which_[(size_t)i]].nbytes() / 8;
319 default:
320 return -1;
321 }
322 }
323
325 void output_fill(int64_t i, void* external_pointer) const {
326 switch (output_dtypes_[(size_t)i]) {
327 case util::dtype::int8:
328 buffers_uint8_[(size_t)output_which_[(size_t)i]].concatenate(
329 reinterpret_cast<uint8_t*>(external_pointer)
330 );
331 break;
332 case util::dtype::uint8:
333 buffers_uint8_[(size_t)output_which_[(size_t)i]].concatenate(
334 reinterpret_cast<uint8_t*>(external_pointer)
335 );
336 break;
337 case util::dtype::int64:
338 buffers_int64_[(size_t)output_which_[(size_t)i]].concatenate(
339 reinterpret_cast<int64_t*>(external_pointer)
340 );
341 break;
342 case util::dtype::float64:
343 buffers_float64_[(size_t)output_which_[(size_t)i]].concatenate(
344 reinterpret_cast<double*>(external_pointer)
345 );
346 break;
347 default:
348 break;
349 }
350 }
351
352 private:
353 std::vector<int64_t> instructions_;
354 std::vector<char> characters_;
355 std::vector<int64_t> string_offsets_;
356
357 std::vector<int64_t> record_current_field_;
358 std::vector<std::vector<uint64_t>> record_checklist_init_;
359 std::vector<std::vector<uint64_t>> record_checklist_;
360
361 std::vector<std::string> output_names_;
362 std::vector<util::dtype> output_dtypes_;
363 std::vector<int64_t> output_which_;
364 std::vector<GrowableBuffer<uint8_t>> buffers_uint8_;
365 std::vector<GrowableBuffer<int64_t>> buffers_int64_;
366 std::vector<GrowableBuffer<double>> buffers_float64_;
367
368 int64_t current_instruction_;
369 std::vector<int64_t> instruction_stack_;
370 int64_t current_stack_depth_;
371 std::vector<int64_t> counters_;
372
373 int64_t length_;
374 };
375
376}
377
378#endif // AWKWARD_IO_JSON_H_
User interface to the Builder system: the ArrayBuilder is a fixed reference while the Builder subclas...
Definition ArrayBuilder.h:23
Abstract class to represent a file-like object, something with a read(num_bytes) method....
Definition json.h:22
virtual int64_t read(int64_t num_bytes, char *buffer)=0
Definition json.h:52
void write_many_uint8(int64_t index, int64_t num_items, const uint8_t *values) noexcept
HERE.
Definition json.h:240
FromJsonObjectSchema(const FromJsonObjectSchema &)=delete
FromJsonObjectSchema & operator=(FromJsonObjectSchema &)=delete
void step_forward() noexcept
HERE.
Definition json.h:101
void pop_stack() noexcept
HERE.
Definition json.h:118
void start_object(int64_t keytableheader_instruction) noexcept
HERE.
Definition json.h:211
FromJsonObjectSchema(FileLikeObject *source, int64_t buffersize, bool read_one, const char *nan_string, const char *posinf_string, const char *neginf_string, const char *jsonassembly, int64_t initial, double resize)
void write_uint64(int64_t index, uint64_t x) noexcept
HERE.
Definition json.h:250
void output_fill(int64_t i, void *external_pointer) const
HERE.
Definition json.h:325
int64_t instruction() const noexcept
HERE.
Definition json.h:81
int64_t length() const noexcept
HERE.
Definition json.h:270
void write_float64(int64_t index, double x) noexcept
HERE.
Definition json.h:260
int64_t find_enum(const char *str) noexcept
HERE.
Definition json.h:124
void write_int64(int64_t index, int64_t x) noexcept
HERE.
Definition json.h:245
int64_t current_instruction() const noexcept
HERE.
Definition json.h:76
int64_t argument1() const noexcept
HERE.
Definition json.h:86
bool end_object(int64_t keytableheader_instruction) const noexcept
HERE.
Definition json.h:220
void step_backward() noexcept
HERE.
Definition json.h:106
void write_uint8(int64_t index, uint8_t x) noexcept
HERE.
Definition json.h:235
int64_t get_and_increment(int64_t index) noexcept
HERE.
Definition json.h:265
void write_add_int64(int64_t index, int64_t x) noexcept
HERE.
Definition json.h:255
int64_t argument2() const noexcept
HERE.
Definition json.h:91
void add_to_length(int64_t length) noexcept
HERE.
Definition json.h:275
std::string output_name(int64_t i) const
HERE.
Definition json.h:288
void write_int8(int64_t index, int8_t x) noexcept
HERE.
Definition json.h:230
void push_stack(int64_t jump_to) noexcept
HERE.
Definition json.h:111
int64_t find_key(const char *str) noexcept
HERE.
Definition json.h:141
int64_t argument3() const noexcept
HERE.
Definition json.h:96
std::string debug() const noexcept
HERE.
int64_t output_num_items(int64_t i) const
HERE.
Definition json.h:309
std::string output_dtype(int64_t i) const
HERE.
Definition json.h:293
bool key_already_filled(int64_t record_identifier, int64_t j) const noexcept
HERE.
Definition json.h:200
int64_t key_instruction_at(int64_t i) const noexcept
HERE.
Definition json.h:206
int64_t current_stack_depth() const noexcept
HERE.
Definition json.h:71
#define EXPORT_SYMBOL
Definition common.h:25
Definition ArrayBuilder.h:14
EXPORT_SYMBOL void fromjsonobject(FileLikeObject *source, ArrayBuilder &builder, int64_t buffersize, bool read_one, const char *nan_string=nullptr, const char *posinf_string=nullptr, const char *neginf_string=nullptr)
Parses a JSON-encoded file-like object using an ArrayBuilder.