1 | /*
|
2 | * Souffle - A Datalog Compiler
|
3 | * Copyright (c) 2020, The Souffle Developers. All rights reserved
|
4 | * Licensed under the Universal Permissive License v 1.0 as shown at:
|
5 | * - https://opensource.org/licenses/UPL
|
6 | * - <souffle root>/licenses/SOUFFLE-UPL.txt
|
7 | */
|
8 |
|
9 | /************************************************************************
|
10 | *
|
11 | * @file ReadStreamJSON.h
|
12 | *
|
13 | ***********************************************************************/
|
14 |
|
15 | #pragma once
|
16 |
|
17 | #include "souffle/RamTypes.h"
|
18 | #include "souffle/RecordTable.h"
|
19 | #include "souffle/SymbolTable.h"
|
20 | #include "souffle/io/ReadStream.h"
|
21 | #include "souffle/utility/ContainerUtil.h"
|
22 | #include "souffle/utility/FileUtil.h"
|
23 | #include "souffle/utility/StringUtil.h"
|
24 |
|
25 | #include <algorithm>
|
26 | #include <cassert>
|
27 | #include <cstddef>
|
28 | #include <cstdint>
|
29 | #include <fstream>
|
30 | #include <iostream>
|
31 | #include <map>
|
32 | #include <memory>
|
33 | #include <queue>
|
34 | #include <sstream>
|
35 | #include <stdexcept>
|
36 | #include <string>
|
37 | #include <tuple>
|
38 | #include <vector>
|
39 |
|
40 | namespace souffle {
|
41 |
|
42 | template <typename... T>
|
43 | [[noreturn]] static void throwError(T const&... t) {
|
44 | std::ostringstream out;
|
45 | (out << ... << t);
|
46 | throw std::runtime_error(out.str());
|
47 | }
|
48 |
|
49 | class ReadStreamJSON : public ReadStream {
|
50 | public:
|
51 | ReadStreamJSON(std::istream& file, const std::map<std::string, std::string>& rwOperation,
|
52 | SymbolTable& symbolTable, RecordTable& recordTable)
|
53 | : ReadStream(rwOperation, symbolTable, recordTable), file(file), pos(0), isInitialized(false) {
|
54 | std::string err;
|
55 | params = Json::parse(rwOperation.at("params"), err);
|
56 | if (err.length() > 0) {
|
57 | throwError("cannot get internal params: ", err);
|
58 | }
|
59 | }
|
60 |
|
61 | protected:
|
62 | std::istream& file;
|
63 | std::size_t pos;
|
64 | Json jsonSource;
|
65 | Json params;
|
66 | bool isInitialized;
|
67 | bool useObjects;
|
68 | std::map<const std::string, const std::size_t> paramIndex;
|
69 |
|
70 | Own<RamDomain[]> readNextTuple() override {
|
71 | // for some reasons we cannot initalized our json objects in constructor
|
72 | // otherwise it will segfault, so we initialize in the first call
|
73 | if (!isInitialized) {
|
74 | isInitialized = true;
|
75 | std::string error = "";
|
76 | std::string source(std::istreambuf_iterator<char>(file), {});
|
77 |
|
78 | jsonSource = Json::parse(source, error);
|
79 | // it should be wrapped by an extra array
|
80 | if (error.length() > 0 || !jsonSource.is_array()) {
|
81 | throwError("cannot deserialize json because ", error, ":\n", source);
|
82 | }
|
83 |
|
84 | if (jsonSource.array_items().empty()) {
|
85 | // No tuples defined
|
86 | return nullptr;
|
87 | }
|
88 |
|
89 | // we only check the first one, since there are extra checks
|
90 | // in readNextTupleObject/readNextTupleList
|
91 | if (jsonSource[0].is_array()) {
|
92 | useObjects = false;
|
93 | } else if (jsonSource[0].is_object()) {
|
94 | useObjects = true;
|
95 | std::size_t index_pos = 0;
|
96 | for (auto param : params["relation"]["params"].array_items()) {
|
97 | paramIndex.insert(std::make_pair(param.string_value(), index_pos));
|
98 | index_pos++;
|
99 | }
|
100 | } else {
|
101 | throwError("the input is neither list nor object format");
|
102 | }
|
103 | }
|
104 |
|
105 | if (useObjects) {
|
106 | return readNextTupleObject();
|
107 | } else {
|
108 | return readNextTupleList();
|
109 | }
|
110 | }
|
111 |
|
112 | Own<RamDomain[]> readNextTupleList() {
|
113 | if (pos >= jsonSource.array_items().size()) {
|
114 | return nullptr;
|
115 | }
|
116 |
|
117 | Own<RamDomain[]> tuple = mk<RamDomain[]>(typeAttributes.size());
|
118 | const Json& jsonObj = jsonSource[pos];
|
119 | assert(jsonObj.is_array() && "the input is not json array");
|
120 | pos++;
|
121 | for (std::size_t i = 0; i < typeAttributes.size(); ++i) {
|
122 | try {
|
123 | auto&& ty = typeAttributes.at(i);
|
124 | switch (ty[0]) {
|
125 | case 's': {
|
126 | tuple[i] = symbolTable.encode(jsonObj[i].string_value());
|
127 | break;
|
128 | }
|
129 | case 'r': {
|
130 | tuple[i] = readNextElementList(jsonObj[i], ty);
|
131 | break;
|
132 | }
|
133 | case 'i': {
|
134 | tuple[i] = jsonObj[i].int_value();
|
135 | break;
|
136 | }
|
137 | case 'u': {
|
138 | tuple[i] = jsonObj[i].int_value();
|
139 | break;
|
140 | }
|
141 | case 'f': {
|
142 | tuple[i] = static_cast<RamDomain>(jsonObj[i].number_value());
|
143 | break;
|
144 | }
|
145 | default: throwError("invalid type attribute: '", ty[0], "'");
|
146 | }
|
147 | } catch (...) {
|
148 | std::stringstream errorMessage;
|
149 | if (jsonObj.is_array() && i < jsonObj.array_items().size()) {
|
150 | errorMessage << "Error converting: " << jsonObj[i].dump();
|
151 | } else {
|
152 | errorMessage << "Invalid index: " << i;
|
153 | }
|
154 | throw std::invalid_argument(errorMessage.str());
|
155 | }
|
156 | }
|
157 |
|
158 | return tuple;
|
159 | }
|
160 |
|
161 | RamDomain readNextElementList(const Json& source, const std::string& recordTypeName) {
|
162 | auto&& recordInfo = types["records"][recordTypeName];
|
163 |
|
164 | if (recordInfo.is_null()) {
|
165 | throw std::invalid_argument("Missing record type information: " + recordTypeName);
|
166 | }
|
167 |
|
168 | // Handle null case
|
169 | if (source.is_null()) {
|
170 | return 0;
|
171 | }
|
172 |
|
173 | assert(source.is_array() && "the input is not json array");
|
174 | auto&& recordTypes = recordInfo["types"];
|
175 | const std::size_t recordArity = recordInfo["arity"].long_value();
|
176 | std::vector<RamDomain> recordValues(recordArity);
|
177 | for (std::size_t i = 0; i < recordArity; ++i) {
|
178 | const std::string& recordType = recordTypes[i].string_value();
|
179 | switch (recordType[0]) {
|
180 | case 's': {
|
181 | recordValues[i] = symbolTable.encode(source[i].string_value());
|
182 | break;
|
183 | }
|
184 | case 'r': {
|
185 | recordValues[i] = readNextElementList(source[i], recordType);
|
186 | break;
|
187 | }
|
188 | case 'i': {
|
189 | recordValues[i] = source[i].int_value();
|
190 | break;
|
191 | }
|
192 | case 'u': {
|
193 | recordValues[i] = source[i].int_value();
|
194 | break;
|
195 | }
|
196 | case 'f': {
|
197 | recordValues[i] = static_cast<RamDomain>(source[i].number_value());
|
198 | break;
|
199 | }
|
200 | default: throwError("invalid type attribute");
|
201 | }
|
202 | }
|
203 |
|
204 | return recordTable.pack(recordValues.data(), recordValues.size());
|
205 | }
|
206 |
|
207 | Own<RamDomain[]> readNextTupleObject() {
|
208 | if (pos >= jsonSource.array_items().size()) {
|
209 | return nullptr;
|
210 | }
|
211 |
|
212 | Own<RamDomain[]> tuple = mk<RamDomain[]>(typeAttributes.size());
|
213 | const Json& jsonObj = jsonSource[pos];
|
214 | assert(jsonObj.is_object() && "the input is not json object");
|
215 | pos++;
|
216 | for (auto p : jsonObj.object_items()) {
|
217 | try {
|
218 | // get the corresponding position by parameter name
|
219 | if (paramIndex.find(p.first) == paramIndex.end()) {
|
220 | throwError("invalid parameter: ", p.first);
|
221 | }
|
222 | std::size_t i = paramIndex.at(p.first);
|
223 | auto&& ty = typeAttributes.at(i);
|
224 | switch (ty[0]) {
|
225 | case 's': {
|
226 | tuple[i] = symbolTable.encode(p.second.string_value());
|
227 | break;
|
228 | }
|
229 | case 'r': {
|
230 | tuple[i] = readNextElementObject(p.second, ty);
|
231 | break;
|
232 | }
|
233 | case 'i': {
|
234 | tuple[i] = p.second.int_value();
|
235 | break;
|
236 | }
|
237 | case 'u': {
|
238 | tuple[i] = p.second.int_value();
|
239 | break;
|
240 | }
|
241 | case 'f': {
|
242 | tuple[i] = static_cast<RamDomain>(p.second.number_value());
|
243 | break;
|
244 | }
|
245 | default: throwError("invalid type attribute: '", ty[0], "'");
|
246 | }
|
247 | } catch (...) {
|
248 | std::stringstream errorMessage;
|
249 | errorMessage << "Error converting: " << p.second.dump();
|
250 | throw std::invalid_argument(errorMessage.str());
|
251 | }
|
252 | }
|
253 |
|
254 | return tuple;
|
255 | }
|
256 |
|
257 | RamDomain readNextElementObject(const Json& source, const std::string& recordTypeName) {
|
258 | auto&& recordInfo = types["records"][recordTypeName];
|
259 | const std::string recordName = recordTypeName.substr(2);
|
260 | std::map<const std::string, const std::size_t> recordIndex;
|
261 |
|
262 | std::size_t index_pos = 0;
|
263 | for (auto param : params["records"][recordName]["params"].array_items()) {
|
264 | recordIndex.insert(std::make_pair(param.string_value(), index_pos));
|
265 | index_pos++;
|
266 | }
|
267 |
|
268 | if (recordInfo.is_null()) {
|
269 | throw std::invalid_argument("Missing record type information: " + recordTypeName);
|
270 | }
|
271 |
|
272 | // Handle null case
|
273 | if (source.is_null()) {
|
274 | return 0;
|
275 | }
|
276 |
|
277 | assert(source.is_object() && "the input is not json object");
|
278 | auto&& recordTypes = recordInfo["types"];
|
279 | const std::size_t recordArity = recordInfo["arity"].long_value();
|
280 | std::vector<RamDomain> recordValues(recordArity);
|
281 | recordValues.reserve(recordIndex.size());
|
282 | for (auto readParam : source.object_items()) {
|
283 | // get the corresponding position by parameter name
|
284 | if (recordIndex.find(readParam.first) == recordIndex.end()) {
|
285 | throwError("invalid parameter: ", readParam.first);
|
286 | }
|
287 | std::size_t i = recordIndex.at(readParam.first);
|
288 | auto&& type = recordTypes[i].string_value();
|
289 | switch (type[0]) {
|
290 | case 's': {
|
291 | recordValues[i] = symbolTable.encode(readParam.second.string_value());
|
292 | break;
|
293 | }
|
294 | case 'r': {
|
295 | recordValues[i] = readNextElementObject(readParam.second, type);
|
296 | break;
|
297 | }
|
298 | case 'i': {
|
299 | recordValues[i] = readParam.second.int_value();
|
300 | break;
|
301 | }
|
302 | case 'u': {
|
303 | recordValues[i] = readParam.second.int_value();
|
304 | break;
|
305 | }
|
306 | case 'f': {
|
307 | recordValues[i] = static_cast<RamDomain>(readParam.second.number_value());
|
308 | break;
|
309 | }
|
310 | default: throwError("invalid type attribute: '", type[0], "'");
|
311 | }
|
312 | }
|
313 |
|
314 | return recordTable.pack(recordValues.data(), recordValues.size());
|
315 | }
|
316 | };
|
317 |
|
318 | class ReadFileJSON : public ReadStreamJSON {
|
319 | public:
|
320 | ReadFileJSON(const std::map<std::string, std::string>& rwOperation, SymbolTable& symbolTable,
|
321 | RecordTable& recordTable)
|
322 | // FIXME: This is bordering on UB - we're passing an unconstructed
|
323 | // object (fileHandle) to the base class
|
324 | : ReadStreamJSON(fileHandle, rwOperation, symbolTable, recordTable),
|
325 | baseName(souffle::baseName(getFileName(rwOperation))),
|
326 | fileHandle(getFileName(rwOperation), std::ios::in | std::ios::binary) {
|
327 | if (!fileHandle.is_open()) {
|
328 | throw std::invalid_argument("Cannot open json file " + baseName + "\n");
|
329 | }
|
330 | }
|
331 |
|
332 | ~ReadFileJSON() override = default;
|
333 |
|
334 | protected:
|
335 | /**
|
336 | * Return given filename or construct from relation name.
|
337 | * Default name is [configured path]/[relation name].json
|
338 | *
|
339 | * @param rwOperation map of IO configuration options
|
340 | * @return input filename
|
341 | */
|
342 | static std::string getFileName(const std::map<std::string, std::string>& rwOperation) {
|
343 | auto name = getOr(rwOperation, "filename", rwOperation.at("name") + ".json");
|
344 | if (name.front() != '/') {
|
345 | name = getOr(rwOperation, "fact-dir", ".") + "/" + name;
|
346 | }
|
347 | return name;
|
348 | }
|
349 |
|
350 | std::string baseName;
|
351 | std::ifstream fileHandle;
|
352 | };
|
353 |
|
354 | class ReadCinJSONFactory : public ReadStreamFactory {
|
355 | public:
|
356 | Own<ReadStream> getReader(const std::map<std::string, std::string>& rwOperation, SymbolTable& symbolTable,
|
357 | RecordTable& recordTable) override {
|
358 | return mk<ReadStreamJSON>(std::cin, rwOperation, symbolTable, recordTable);
|
359 | }
|
360 |
|
361 | const std::string& getName() const override {
|
362 | static const std::string name = "json";
|
363 | return name;
|
364 | }
|
365 | ~ReadCinJSONFactory() override = default;
|
366 | };
|
367 |
|
368 | class ReadFileJSONFactory : public ReadStreamFactory {
|
369 | public:
|
370 | Own<ReadStream> getReader(const std::map<std::string, std::string>& rwOperation, SymbolTable& symbolTable,
|
371 | RecordTable& recordTable) override {
|
372 | return mk<ReadFileJSON>(rwOperation, symbolTable, recordTable);
|
373 | }
|
374 |
|
375 | const std::string& getName() const override {
|
376 | static const std::string name = "jsonfile";
|
377 | return name;
|
378 | }
|
379 |
|
380 | ~ReadFileJSONFactory() override = default;
|
381 | };
|
382 | } // namespace souffle
|