| 1 | /*
 | 
| 2 |  * Souffle - A Datalog Compiler
 | 
| 3 |  * Copyright (c) 2020, The Souffle Developers. All rights reserved
 | 
| 4 |  * Licensed under the Universal Permissive License v 1.0 as shown at:
 | 
| 5 |  * - https://opensource.org/licenses/UPL
 | 
| 6 |  * - <souffle root>/licenses/SOUFFLE-UPL.txt
 | 
| 7 |  */
 | 
| 8 | 
 | 
| 9 | /************************************************************************
 | 
| 10 |  *
 | 
| 11 |  * @file WriteStreamJSON.h
 | 
| 12 |  *
 | 
| 13 |  ***********************************************************************/
 | 
| 14 | 
 | 
| 15 | #pragma once
 | 
| 16 | 
 | 
| 17 | #include "souffle/RamTypes.h"
 | 
| 18 | #include "souffle/SymbolTable.h"
 | 
| 19 | #include "souffle/io/WriteStream.h"
 | 
| 20 | #include "souffle/utility/ContainerUtil.h"
 | 
| 21 | #include "souffle/utility/json11.h"
 | 
| 22 | 
 | 
| 23 | #include <map>
 | 
| 24 | #include <ostream>
 | 
| 25 | #include <queue>
 | 
| 26 | #include <stack>
 | 
| 27 | #include <string>
 | 
| 28 | #include <variant>
 | 
| 29 | #include <vector>
 | 
| 30 | 
 | 
| 31 | namespace souffle {
 | 
| 32 | 
 | 
| 33 | class WriteStreamJSON : public WriteStream {
 | 
| 34 | protected:
 | 
| 35 |     WriteStreamJSON(const std::map<std::string, std::string>& rwOperation, const SymbolTable& symbolTable,
 | 
| 36 |             const RecordTable& recordTable)
 | 
| 37 |             : WriteStream(rwOperation, symbolTable, recordTable),
 | 
| 38 |               useObjects(getOr(rwOperation, "format", "list") == "object") {
 | 
| 39 |         if (useObjects) {
 | 
| 40 |             std::string err;
 | 
| 41 |             params = Json::parse(rwOperation.at("params"), err);
 | 
| 42 |             if (err.length() > 0) {
 | 
| 43 |                 fatal("cannot get internal param names: %s", err);
 | 
| 44 |             }
 | 
| 45 |         }
 | 
| 46 |     };
 | 
| 47 | 
 | 
| 48 |     const bool useObjects;
 | 
| 49 |     Json params;
 | 
| 50 | 
 | 
| 51 |     void writeNextTupleJSON(std::ostream& destination, const RamDomain* tuple) {
 | 
| 52 |         std::vector<Json> result;
 | 
| 53 | 
 | 
| 54 |         if (useObjects)
 | 
| 55 |             destination << "{";
 | 
| 56 |         else
 | 
| 57 |             destination << "[";
 | 
| 58 | 
 | 
| 59 |         for (std::size_t col = 0; col < arity; ++col) {
 | 
| 60 |             if (col > 0) {
 | 
| 61 |                 destination << ", ";
 | 
| 62 |             }
 | 
| 63 | 
 | 
| 64 |             if (useObjects) {
 | 
| 65 |                 destination << params["relation"]["params"][col].dump() << ": ";
 | 
| 66 |                 writeNextTupleObject(destination, typeAttributes.at(col), tuple[col]);
 | 
| 67 |             } else {
 | 
| 68 |                 writeNextTupleList(destination, typeAttributes.at(col), tuple[col]);
 | 
| 69 |             }
 | 
| 70 |         }
 | 
| 71 | 
 | 
| 72 |         if (useObjects)
 | 
| 73 |             destination << "}";
 | 
| 74 |         else
 | 
| 75 |             destination << "]";
 | 
| 76 |     }
 | 
| 77 | 
 | 
| 78 |     void writeNextTupleList(std::ostream& destination, const std::string& name, const RamDomain value) {
 | 
| 79 |         using ValueTuple = std::pair<const std::string, const RamDomain>;
 | 
| 80 |         std::stack<std::variant<ValueTuple, std::string>> worklist;
 | 
| 81 |         worklist.push(std::make_pair(name, value));
 | 
| 82 | 
 | 
| 83 |         // the Json11 output is not tail recursive, therefore highly inefficient for recursive record
 | 
| 84 |         // in addition the JSON object is immutable, so has memory overhead
 | 
| 85 |         while (!worklist.empty()) {
 | 
| 86 |             std::variant<ValueTuple, std::string> curr = worklist.top();
 | 
| 87 |             worklist.pop();
 | 
| 88 | 
 | 
| 89 |             if (std::holds_alternative<std::string>(curr)) {
 | 
| 90 |                 destination << std::get<std::string>(curr);
 | 
| 91 |                 continue;
 | 
| 92 |             }
 | 
| 93 | 
 | 
| 94 |             const std::string& currType = std::get<ValueTuple>(curr).first;
 | 
| 95 |             const RamDomain currValue = std::get<ValueTuple>(curr).second;
 | 
| 96 |             assert(currType.length() > 2 && "Invalid type length");
 | 
| 97 |             switch (currType[0]) {
 | 
| 98 |                 // since some strings may need to be escaped, we use dump here
 | 
| 99 |                 case 's': destination << Json(symbolTable.decode(currValue)).dump(); break;
 | 
| 100 |                 case 'i': destination << currValue; break;
 | 
| 101 |                 case 'u': destination << (int)ramBitCast<RamUnsigned>(currValue); break;
 | 
| 102 |                 case 'f': destination << ramBitCast<RamFloat>(currValue); break;
 | 
| 103 |                 case 'r': {
 | 
| 104 |                     auto&& recordInfo = types["records"][currType];
 | 
| 105 |                     assert(!recordInfo.is_null() && "Missing record type information");
 | 
| 106 |                     if (currValue == 0) {
 | 
| 107 |                         destination << "null";
 | 
| 108 |                         break;
 | 
| 109 |                     }
 | 
| 110 | 
 | 
| 111 |                     auto&& recordTypes = recordInfo["types"];
 | 
| 112 |                     const std::size_t recordArity = recordInfo["arity"].long_value();
 | 
| 113 |                     const RamDomain* tuplePtr = recordTable.unpack(currValue, recordArity);
 | 
| 114 |                     worklist.push("]");
 | 
| 115 |                     for (auto i = (long long)(recordArity - 1); i >= 0; --i) {
 | 
| 116 |                         if (i != (long long)(recordArity - 1)) {
 | 
| 117 |                             worklist.push(", ");
 | 
| 118 |                         }
 | 
| 119 |                         const std::string& recordType = recordTypes[i].string_value();
 | 
| 120 |                         const RamDomain recordValue = tuplePtr[i];
 | 
| 121 |                         worklist.push(std::make_pair(recordType, recordValue));
 | 
| 122 |                     }
 | 
| 123 | 
 | 
| 124 |                     worklist.push("[");
 | 
| 125 |                     break;
 | 
| 126 |                 }
 | 
| 127 |                 default: fatal("unsupported type attribute: `%c`", currType[0]);
 | 
| 128 |             }
 | 
| 129 |         }
 | 
| 130 |     }
 | 
| 131 | 
 | 
| 132 |     void writeNextTupleObject(std::ostream& destination, const std::string& name, const RamDomain value) {
 | 
| 133 |         using ValueTuple = std::pair<const std::string, const RamDomain>;
 | 
| 134 |         std::stack<std::variant<ValueTuple, std::string>> worklist;
 | 
| 135 |         worklist.push(std::make_pair(name, value));
 | 
| 136 | 
 | 
| 137 |         // the Json11 output is not tail recursive, therefore highly inefficient for recursive record
 | 
| 138 |         // in addition the JSON object is immutable, so has memory overhead
 | 
| 139 |         while (!worklist.empty()) {
 | 
| 140 |             std::variant<ValueTuple, std::string> curr = worklist.top();
 | 
| 141 |             worklist.pop();
 | 
| 142 | 
 | 
| 143 |             if (std::holds_alternative<std::string>(curr)) {
 | 
| 144 |                 destination << std::get<std::string>(curr);
 | 
| 145 |                 continue;
 | 
| 146 |             }
 | 
| 147 | 
 | 
| 148 |             const std::string& currType = std::get<ValueTuple>(curr).first;
 | 
| 149 |             const RamDomain currValue = std::get<ValueTuple>(curr).second;
 | 
| 150 |             const std::string& typeName = currType.substr(2);
 | 
| 151 |             assert(currType.length() > 2 && "Invalid type length");
 | 
| 152 |             switch (currType[0]) {
 | 
| 153 |                 // since some strings may need to be escaped, we use dump here
 | 
| 154 |                 case 's': destination << Json(symbolTable.decode(currValue)).dump(); break;
 | 
| 155 |                 case 'i': destination << currValue; break;
 | 
| 156 |                 case 'u': destination << (int)ramBitCast<RamUnsigned>(currValue); break;
 | 
| 157 |                 case 'f': destination << ramBitCast<RamFloat>(currValue); break;
 | 
| 158 |                 case 'r': {
 | 
| 159 |                     auto&& recordInfo = types["records"][currType];
 | 
| 160 |                     assert(!recordInfo.is_null() && "Missing record type information");
 | 
| 161 |                     if (currValue == 0) {
 | 
| 162 |                         destination << "null";
 | 
| 163 |                         break;
 | 
| 164 |                     }
 | 
| 165 | 
 | 
| 166 |                     auto&& recordTypes = recordInfo["types"];
 | 
| 167 |                     const std::size_t recordArity = recordInfo["arity"].long_value();
 | 
| 168 |                     const RamDomain* tuplePtr = recordTable.unpack(currValue, recordArity);
 | 
| 169 |                     worklist.push("}");
 | 
| 170 |                     for (auto i = (long long)(recordArity - 1); i >= 0; --i) {
 | 
| 171 |                         if (i != (long long)(recordArity - 1)) {
 | 
| 172 |                             worklist.push(", ");
 | 
| 173 |                         }
 | 
| 174 |                         const std::string& recordType = recordTypes[i].string_value();
 | 
| 175 |                         const RamDomain recordValue = tuplePtr[i];
 | 
| 176 |                         worklist.push(std::make_pair(recordType, recordValue));
 | 
| 177 |                         worklist.push(": ");
 | 
| 178 | 
 | 
| 179 |                         auto&& recordParam = params["records"][typeName]["params"][i];
 | 
| 180 |                         assert(recordParam.is_string());
 | 
| 181 |                         worklist.push(recordParam.dump());
 | 
| 182 |                     }
 | 
| 183 | 
 | 
| 184 |                     worklist.push("{");
 | 
| 185 |                     break;
 | 
| 186 |                 }
 | 
| 187 |                 default: fatal("unsupported type attribute: `%c`", currType[0]);
 | 
| 188 |             }
 | 
| 189 |         }
 | 
| 190 |     }
 | 
| 191 | };
 | 
| 192 | 
 | 
| 193 | class WriteFileJSON : public WriteStreamJSON {
 | 
| 194 | public:
 | 
| 195 |     WriteFileJSON(const std::map<std::string, std::string>& rwOperation, const SymbolTable& symbolTable,
 | 
| 196 |             const RecordTable& recordTable)
 | 
| 197 |             : WriteStreamJSON(rwOperation, symbolTable, recordTable), isFirst(true),
 | 
| 198 |               file(getFileName(rwOperation), std::ios::out | std::ios::binary) {
 | 
| 199 |         file << "[";
 | 
| 200 |     }
 | 
| 201 | 
 | 
| 202 |     ~WriteFileJSON() override {
 | 
| 203 |         file << "]\n";
 | 
| 204 |         file.close();
 | 
| 205 |     }
 | 
| 206 | 
 | 
| 207 | protected:
 | 
| 208 |     bool isFirst;
 | 
| 209 |     std::ofstream file;
 | 
| 210 | 
 | 
| 211 |     void writeNullary() override {
 | 
| 212 |         file << "null\n";
 | 
| 213 |     }
 | 
| 214 | 
 | 
| 215 |     void writeNextTuple(const RamDomain* tuple) override {
 | 
| 216 |         if (!isFirst) {
 | 
| 217 |             file << ",\n";
 | 
| 218 |         } else {
 | 
| 219 |             isFirst = false;
 | 
| 220 |         }
 | 
| 221 |         writeNextTupleJSON(file, tuple);
 | 
| 222 |     }
 | 
| 223 | 
 | 
| 224 |     /**
 | 
| 225 |      * Return given filename or construct from relation name.
 | 
| 226 |      * Default name is [configured path]/[relation name].json
 | 
| 227 |      *
 | 
| 228 |      * @param rwOperation map of IO configuration options
 | 
| 229 |      * @return input filename
 | 
| 230 |      */
 | 
| 231 |     static std::string getFileName(const std::map<std::string, std::string>& rwOperation) {
 | 
| 232 |         auto name = getOr(rwOperation, "filename", rwOperation.at("name") + ".json");
 | 
| 233 |         if (name.front() != '/') {
 | 
| 234 |             name = getOr(rwOperation, "output-dir", ".") + "/" + name;
 | 
| 235 |         }
 | 
| 236 |         return name;
 | 
| 237 |     }
 | 
| 238 | };
 | 
| 239 | 
 | 
| 240 | class WriteCoutJSON : public WriteStreamJSON {
 | 
| 241 | public:
 | 
| 242 |     WriteCoutJSON(const std::map<std::string, std::string>& rwOperation, const SymbolTable& symbolTable,
 | 
| 243 |             const RecordTable& recordTable)
 | 
| 244 |             : WriteStreamJSON(rwOperation, symbolTable, recordTable), isFirst(true) {
 | 
| 245 |         std::cout << "[";
 | 
| 246 |     }
 | 
| 247 | 
 | 
| 248 |     ~WriteCoutJSON() override {
 | 
| 249 |         std::cout << "]\n";
 | 
| 250 |     };
 | 
| 251 | 
 | 
| 252 | protected:
 | 
| 253 |     bool isFirst;
 | 
| 254 | 
 | 
| 255 |     void writeNullary() override {
 | 
| 256 |         std::cout << "null\n";
 | 
| 257 |     }
 | 
| 258 | 
 | 
| 259 |     void writeNextTuple(const RamDomain* tuple) override {
 | 
| 260 |         if (!isFirst) {
 | 
| 261 |             std::cout << ",\n";
 | 
| 262 |         } else {
 | 
| 263 |             isFirst = false;
 | 
| 264 |         }
 | 
| 265 |         writeNextTupleJSON(std::cout, tuple);
 | 
| 266 |     }
 | 
| 267 | };
 | 
| 268 | 
 | 
| 269 | class WriteFileJSONFactory : public WriteStreamFactory {
 | 
| 270 | public:
 | 
| 271 |     Own<WriteStream> getWriter(const std::map<std::string, std::string>& rwOperation,
 | 
| 272 |             const SymbolTable& symbolTable, const RecordTable& recordTable) override {
 | 
| 273 |         return mk<WriteFileJSON>(rwOperation, symbolTable, recordTable);
 | 
| 274 |     }
 | 
| 275 | 
 | 
| 276 |     const std::string& getName() const override {
 | 
| 277 |         static const std::string name = "jsonfile";
 | 
| 278 |         return name;
 | 
| 279 |     }
 | 
| 280 | 
 | 
| 281 |     ~WriteFileJSONFactory() override = default;
 | 
| 282 | };
 | 
| 283 | 
 | 
| 284 | class WriteCoutJSONFactory : public WriteStreamFactory {
 | 
| 285 | public:
 | 
| 286 |     Own<WriteStream> getWriter(const std::map<std::string, std::string>& rwOperation,
 | 
| 287 |             const SymbolTable& symbolTable, const RecordTable& recordTable) override {
 | 
| 288 |         return mk<WriteCoutJSON>(rwOperation, symbolTable, recordTable);
 | 
| 289 |     }
 | 
| 290 | 
 | 
| 291 |     const std::string& getName() const override {
 | 
| 292 |         static const std::string name = "json";
 | 
| 293 |         return name;
 | 
| 294 |     }
 | 
| 295 | 
 | 
| 296 |     ~WriteCoutJSONFactory() override = default;
 | 
| 297 | };
 | 
| 298 | }  // namespace souffle
 |