All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
json.h
Go to the documentation of this file.
1// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
2
3#ifndef AWKWARD_IO_JSON_H_
4#define AWKWARD_IO_JSON_H_
5
6#include <complex>
7#include <cstdio>
8#include <string>
9
10#include "awkward/common.h"
15#include "awkward/util.h"
16
17namespace awkward {
23 public:
24 virtual int64_t read(int64_t num_bytes, char* buffer) = 0;
25 };
26
45 ArrayBuilder& builder,
46 int64_t buffersize,
47 bool read_one,
48 const char* nan_string = nullptr,
49 const char* posinf_string = nullptr,
50 const char* neginf_string = nullptr);
51
53 public:
55 int64_t buffersize,
56 bool read_one,
57 const char* nan_string,
58 const char* posinf_string,
59 const char* neginf_string,
60 const char* jsonassembly,
61 int64_t initial,
62 double resize);
63
65 inline int64_t current_stack_depth() const noexcept {
66 return current_stack_depth_;
67 }
68
70 inline int64_t current_instruction() const noexcept {
71 return current_instruction_;
72 }
73
75 inline int64_t instruction() const noexcept {
76 return instructions_.data()[current_instruction_ * 4];
77 }
78
80 inline int64_t argument1() const noexcept {
81 return instructions_.data()[current_instruction_ * 4 + 1];
82 }
83
85 inline int64_t argument2() const noexcept {
86 return instructions_.data()[current_instruction_ * 4 + 2];
87 }
88
90 inline int64_t argument3() const noexcept {
91 return instructions_.data()[current_instruction_ * 4 + 3];
92 }
93
95 inline void step_forward() noexcept {
96 current_instruction_++;
97 }
98
100 inline void step_backward() noexcept {
101 current_instruction_--;
102 }
103
105 inline void push_stack(int64_t jump_to) noexcept {
106 instruction_stack_.data()[current_stack_depth_] = current_instruction_;
107 current_stack_depth_++;
108 current_instruction_ = jump_to;
109 }
110
112 inline void pop_stack() noexcept {
113 current_stack_depth_--;
114 current_instruction_ = instruction_stack_.data()[current_stack_depth_];
115 }
116
118 inline int64_t find_enum(const char* str) noexcept {
119 int64_t* offsets = string_offsets_.data();
120 char* chars = characters_.data();
121 int64_t stringsstart = argument2();
122 int64_t start;
123 int64_t stop;
124 for (int64_t i = stringsstart; i < argument3(); i++) {
125 start = offsets[i];
126 stop = offsets[i + 1];
127 if (strncmp(str, &chars[start], (size_t)(stop - start)) == 0) {
128 return i - stringsstart;
129 }
130 }
131 return -1;
132 }
133
135 inline int64_t find_key(const char* str) noexcept {
136 int64_t* offsets = string_offsets_.data();
137 char* chars = characters_.data();
138 int64_t i;
139 int64_t j;
140 int64_t stringi;
141 int64_t start;
142 int64_t stop;
143 uint64_t chunkmask;
144 // optimistic: fields in data are in the order specified by the schema
145 if (argument1() != 0) {
146 // increment the current (last seen) field with wrap-around
147 record_current_field_[argument2()]++;
148 if (record_current_field_[argument2()] == argument1()) {
149 record_current_field_[argument2()] = 0;
150 }
151 j = record_current_field_[argument2()];
152 // use the record_current_field_ (as j)
153 i = current_instruction_ + 1 + j;
154 stringi = instructions_.data()[i * 4 + 1];
155 start = offsets[stringi];
156 stop = offsets[stringi + 1];
157 if (strncmp(str, &chars[start], (size_t)(stop - start)) == 0) {
158 // ensure that the checklist bit is 1
159 chunkmask = (uint64_t)1 << (j & 0x3f);
160 if ((record_checklist_[argument2()][j >> 6] & chunkmask) == 0) {
161 return -1; // ignore the value of a duplicate key
162 }
163 // set the checklist bit to 0
164 record_checklist_[argument2()][j >> 6] &= ~chunkmask;
165 return key_instruction_at(i);
166 }
167 }
168 // pessimistic: try all field names, starting from the first
169 for (i = current_instruction_ + 1; i <= current_instruction_ + argument1(); i++) {
170 // not including the one optimistic trial
171 if (i != current_instruction_ + 1 + record_current_field_[argument2()]) {
172 stringi = instructions_.data()[i * 4 + 1];
173 start = offsets[stringi];
174 stop = offsets[stringi + 1];
175 if (strncmp(str, &chars[start], (size_t)(stop - start)) == 0) {
176 // set the record_current_field_
177 j = i - (current_instruction_ + 1);
178 record_current_field_[argument2()] = j;
179 // ensure that the checklist bit is 1
180 chunkmask = (uint64_t)1 << (j & 0x3f);
181 if ((record_checklist_[argument2()][j >> 6] & chunkmask) == 0) {
182 return -1; // ignore the value of a duplicate key
183 }
184 // set the checklist bit to 0
185 record_checklist_[argument2()][j >> 6] &= ~chunkmask;
186 return key_instruction_at(i);
187 }
188 }
189 }
190 return -1;
191 }
192
194 inline bool key_already_filled(int64_t record_identifier, int64_t j) const noexcept {
195 uint64_t chunkmask = (uint64_t)1 << (j & 0x3f);
196 return (record_checklist_[record_identifier][j >> 6] & chunkmask) == 0;
197 }
198
200 inline int64_t key_instruction_at(int64_t i) const noexcept {
201 return instructions_.data()[i * 4 + 2];
202 }
203
205 inline void start_object(int64_t keytableheader_instruction) noexcept {
206 int64_t record_identifier = instructions_.data()[keytableheader_instruction * 4 + 2];
207 record_checklist_[record_identifier].assign(
208 record_checklist_init_[record_identifier].begin(),
209 record_checklist_init_[record_identifier].end()
210 );
211 }
212
214 inline bool end_object(int64_t keytableheader_instruction) const noexcept {
215 int64_t record_identifier = instructions_.data()[keytableheader_instruction * 4 + 2];
216 uint64_t should_be_zero = 0;
217 for (uint64_t chunk : record_checklist_[record_identifier]) {
218 should_be_zero |= chunk;
219 }
220 return should_be_zero == 0;
221 }
222
224 inline void write_int8(int64_t index, int8_t x) noexcept {
225 buffers_uint8_[(size_t)index].append(*reinterpret_cast<uint8_t*>(&x));
226 }
227
229 inline void write_uint8(int64_t index, uint8_t x) noexcept {
230 buffers_uint8_[(size_t)index].append(x);
231 }
232
234 inline void write_many_uint8(int64_t index, int64_t num_items, const uint8_t* values) noexcept {
235 buffers_uint8_[(size_t)index].extend(values, (size_t)num_items);
236 }
237
239 inline void write_int64(int64_t index, int64_t x) noexcept {
240 buffers_int64_[(size_t)index].append(x);
241 }
242
244 inline void write_uint64(int64_t index, uint64_t x) noexcept {
245 buffers_int64_[(size_t)index].append(static_cast<int64_t>(x));
246 }
247
249 inline void write_add_int64(int64_t index, int64_t x) noexcept {
250 buffers_int64_[(size_t)index].append(buffers_int64_[(size_t)index].last() + x);
251 }
252
254 inline void write_float64(int64_t index, double x) noexcept {
255 buffers_float64_[(size_t)index].append(x);
256 }
257
259 inline int64_t get_and_increment(int64_t index) noexcept {
260 return counters_[(size_t)index]++;
261 }
262
264 int64_t length() const noexcept {
265 return length_;
266 }
267
269 inline void add_to_length(int64_t length) noexcept {
270 length_ += length;
271 }
272
274 std::string debug() const noexcept;
275
277 int64_t num_outputs() const {
278 return (int64_t)output_names_.size();
279 }
280
282 std::string output_name(int64_t i) const {
283 return output_names_[(size_t)i];
284 }
285
287 std::string output_dtype(int64_t i) const {
288 switch (output_dtypes_[(size_t)i]) {
290 return "int8";
292 return "uint8";
294 return "int64";
296 return "float64";
297 default:
298 return "unknown";
299 }
300 }
301
303 int64_t output_num_items(int64_t i) const {
304 switch (output_dtypes_[(size_t)i]) {
306 return (int64_t)buffers_uint8_[(size_t)output_which_[(size_t)i]].nbytes();
308 return (int64_t)buffers_uint8_[(size_t)output_which_[(size_t)i]].nbytes();
310 return (int64_t)buffers_int64_[(size_t)output_which_[(size_t)i]].nbytes() / 8;
312 return (int64_t)buffers_float64_[(size_t)output_which_[(size_t)i]].nbytes() / 8;
313 default:
314 return -1;
315 }
316 }
317
319 void output_fill(int64_t i, void* external_pointer) const {
320 switch (output_dtypes_[(size_t)i]) {
322 buffers_uint8_[(size_t)output_which_[(size_t)i]].concatenate(
323 reinterpret_cast<uint8_t*>(external_pointer)
324 );
325 break;
327 buffers_uint8_[(size_t)output_which_[(size_t)i]].concatenate(
328 reinterpret_cast<uint8_t*>(external_pointer)
329 );
330 break;
332 buffers_int64_[(size_t)output_which_[(size_t)i]].concatenate(
333 reinterpret_cast<int64_t*>(external_pointer)
334 );
335 break;
337 buffers_float64_[(size_t)output_which_[(size_t)i]].concatenate(
338 reinterpret_cast<double*>(external_pointer)
339 );
340 break;
341 default:
342 break;
343 }
344 }
345
346 private:
347 std::vector<int64_t> instructions_;
348 std::vector<char> characters_;
349 std::vector<int64_t> string_offsets_;
350
351 std::vector<int64_t> record_current_field_;
352 std::vector<std::vector<uint64_t>> record_checklist_init_;
353 std::vector<std::vector<uint64_t>> record_checklist_;
354
355 std::vector<std::string> output_names_;
356 std::vector<util::dtype> output_dtypes_;
357 std::vector<int64_t> output_which_;
358 std::vector<GrowableBuffer<uint8_t>> buffers_uint8_;
359 std::vector<GrowableBuffer<int64_t>> buffers_int64_;
360 std::vector<GrowableBuffer<double>> buffers_float64_;
361
362 int64_t current_instruction_;
363 std::vector<int64_t> instruction_stack_;
364 int64_t current_stack_depth_;
365 std::vector<int64_t> counters_;
366
367 int64_t length_;
368 };
369
370}
371
372#endif // AWKWARD_IO_JSON_H_
User interface to the Builder system: the ArrayBuilder is a fixed reference while the Builder subclas...
Definition ArrayBuilder.h:23
Abstract class to represent a file-like object, something with a read(num_bytes) method....
Definition json.h:22
virtual int64_t read(int64_t num_bytes, char *buffer)=0
Definition json.h:52
void write_many_uint8(int64_t index, int64_t num_items, const uint8_t *values) noexcept
HERE.
Definition json.h:234
void step_forward() noexcept
HERE.
Definition json.h:95
void pop_stack() noexcept
HERE.
Definition json.h:112
void start_object(int64_t keytableheader_instruction) noexcept
HERE.
Definition json.h:205
FromJsonObjectSchema(FileLikeObject *source, int64_t buffersize, bool read_one, const char *nan_string, const char *posinf_string, const char *neginf_string, const char *jsonassembly, int64_t initial, double resize)
void write_uint64(int64_t index, uint64_t x) noexcept
HERE.
Definition json.h:244
void output_fill(int64_t i, void *external_pointer) const
HERE.
Definition json.h:319
int64_t instruction() const noexcept
HERE.
Definition json.h:75
int64_t length() const noexcept
HERE.
Definition json.h:264
void write_float64(int64_t index, double x) noexcept
HERE.
Definition json.h:254
int64_t find_enum(const char *str) noexcept
HERE.
Definition json.h:118
void write_int64(int64_t index, int64_t x) noexcept
HERE.
Definition json.h:239
int64_t current_instruction() const noexcept
HERE.
Definition json.h:70
int64_t argument1() const noexcept
HERE.
Definition json.h:80
int64_t num_outputs() const
HERE.
Definition json.h:277
bool end_object(int64_t keytableheader_instruction) const noexcept
HERE.
Definition json.h:214
void step_backward() noexcept
HERE.
Definition json.h:100
void write_uint8(int64_t index, uint8_t x) noexcept
HERE.
Definition json.h:229
int64_t get_and_increment(int64_t index) noexcept
HERE.
Definition json.h:259
void write_add_int64(int64_t index, int64_t x) noexcept
HERE.
Definition json.h:249
int64_t argument2() const noexcept
HERE.
Definition json.h:85
void add_to_length(int64_t length) noexcept
HERE.
Definition json.h:269
std::string output_name(int64_t i) const
HERE.
Definition json.h:282
void write_int8(int64_t index, int8_t x) noexcept
HERE.
Definition json.h:224
void push_stack(int64_t jump_to) noexcept
HERE.
Definition json.h:105
int64_t find_key(const char *str) noexcept
HERE.
Definition json.h:135
int64_t argument3() const noexcept
HERE.
Definition json.h:90
std::string debug() const noexcept
HERE.
int64_t output_num_items(int64_t i) const
HERE.
Definition json.h:303
std::string output_dtype(int64_t i) const
HERE.
Definition json.h:287
bool key_already_filled(int64_t record_identifier, int64_t j) const noexcept
HERE.
Definition json.h:194
int64_t key_instruction_at(int64_t i) const noexcept
HERE.
Definition json.h:200
int64_t current_stack_depth() const noexcept
HERE.
Definition json.h:65
#define LIBAWKWARD_EXPORT_SYMBOL
Definition common.h:44
Definition ArrayBuilder.h:14
LIBAWKWARD_EXPORT_SYMBOL void fromjsonobject(FileLikeObject *source, ArrayBuilder &builder, int64_t buffersize, bool read_one, const char *nan_string=nullptr, const char *posinf_string=nullptr, const char *neginf_string=nullptr)
Parses a JSON-encoded file-like object using an ArrayBuilder.