| 1 | /*
 | 
| 2 |  * Souffle - A Datalog Compiler
 | 
| 3 |  * Copyright (c) 2016, The Souffle Developers. All rights reserved
 | 
| 4 |  * Licensed under the Universal Permissive License v 1.0 as shown at:
 | 
| 5 |  * - https://opensource.org/licenses/UPL
 | 
| 6 |  * - <souffle root>/licenses/SOUFFLE-UPL.txt
 | 
| 7 |  */
 | 
| 8 | 
 | 
| 9 | #pragma once
 | 
| 10 | 
 | 
| 11 | #include "souffle/profile/CellInterface.h"
 | 
| 12 | #include "souffle/profile/Row.h"
 | 
| 13 | #include "souffle/profile/Table.h"
 | 
| 14 | #include <algorithm>
 | 
| 15 | #include <chrono>
 | 
| 16 | #include <cmath>
 | 
| 17 | #include <cstdio>
 | 
| 18 | #include <fstream>
 | 
| 19 | #include <iomanip>
 | 
| 20 | #include <ios>
 | 
| 21 | #include <memory>
 | 
| 22 | #include <sstream>
 | 
| 23 | #include <string>
 | 
| 24 | #include <vector>
 | 
| 25 | 
 | 
| 26 | #ifndef _MSC_VER
 | 
| 27 | #include <unistd.h>
 | 
| 28 | #endif
 | 
| 29 | 
 | 
| 30 | #include <sys/stat.h>
 | 
| 31 | 
 | 
| 32 | namespace souffle {
 | 
| 33 | namespace profile {
 | 
| 34 | 
 | 
| 35 | /*
 | 
| 36 |  * A series of functions necessary throughout the code
 | 
| 37 |  * Mostly string manipulation
 | 
| 38 |  */
 | 
| 39 | namespace Tools {
 | 
| 40 | static const std::vector<std::string> abbreviations{
 | 
| 41 |         "K", "M", "B", "t", "q", "Q", "s", "S", "o", "n", "d", "U"};
 | 
| 42 | 
 | 
| 43 | inline std::string formatNum(double amount) {
 | 
| 44 |     std::stringstream ss;
 | 
| 45 |     ss << amount;
 | 
| 46 |     return ss.str();
 | 
| 47 | }
 | 
| 48 | 
 | 
| 49 | inline std::string formatNum(int precision, int64_t amount) {
 | 
| 50 |     // assumes number is < 999*10^12
 | 
| 51 |     if (amount == 0) {
 | 
| 52 |         return "0";
 | 
| 53 |     }
 | 
| 54 | 
 | 
| 55 |     if (precision <= 0) {
 | 
| 56 |         return std::to_string(amount);
 | 
| 57 |     }
 | 
| 58 | 
 | 
| 59 |     std::string result;
 | 
| 60 | 
 | 
| 61 |     if (amount < 1000) {
 | 
| 62 |         return std::to_string(amount);
 | 
| 63 |     }
 | 
| 64 | 
 | 
| 65 |     for (std::size_t i = 0; i < abbreviations.size(); ++i) {
 | 
| 66 |         if (amount > std::pow(1000, i + 2)) {
 | 
| 67 |             continue;
 | 
| 68 |         }
 | 
| 69 | 
 | 
| 70 |         double r = amount / std::pow(1000, i + 1);
 | 
| 71 |         result = std::to_string(r);
 | 
| 72 | 
 | 
| 73 |         if (r >= 100) {  // 1000 > result >= 100
 | 
| 74 | 
 | 
| 75 |             switch (precision) {
 | 
| 76 |                 case 1: result = result.substr(0, 1) + "00"; break;
 | 
| 77 |                 case 2: result = result.substr(0, 2) + "0"; break;
 | 
| 78 |                 case 3: result = result.substr(0, 3); break;
 | 
| 79 |                 default: result = result.substr(0, precision + 1);
 | 
| 80 |             }
 | 
| 81 |         } else if (r >= 10) {  // 100 > result >= 10
 | 
| 82 |             switch (precision) {
 | 
| 83 |                 case 1: result = result.substr(0, 1) + "0"; break;
 | 
| 84 |                 case 2: result = result.substr(0, 2); break;
 | 
| 85 |                 default: result = result.substr(0, precision + 1);
 | 
| 86 |             }
 | 
| 87 |         } else {  // 10 > result > 0
 | 
| 88 |             switch (precision) {
 | 
| 89 |                 case 1: result = result.substr(0, 1); break;
 | 
| 90 |                 default: result = result.substr(0, precision + 1);
 | 
| 91 |             }
 | 
| 92 |         }
 | 
| 93 |         result += abbreviations.at(i);
 | 
| 94 |         return result;
 | 
| 95 |     }
 | 
| 96 |     // If we ever have integers too large to handle, fall back to this
 | 
| 97 |     return std::to_string(amount);
 | 
| 98 | }
 | 
| 99 | 
 | 
| 100 | inline std::string formatMemory(uint64_t kbytes) {
 | 
| 101 |     if (kbytes < 1024UL * 2UL) {
 | 
| 102 |         return std::to_string(kbytes) + "kB";
 | 
| 103 |     } else if (kbytes < 1024UL * 1024UL * 2UL) {
 | 
| 104 |         return std::to_string(kbytes / 1024UL) + "MB";
 | 
| 105 |     } else if (kbytes < 1024UL * 1024UL * 1024UL * 2UL) {
 | 
| 106 |         return std::to_string(kbytes / (1024UL * 1024UL)) + "GB";
 | 
| 107 |     }
 | 
| 108 |     return std::to_string(kbytes / (1024UL * 1024UL * 1024UL)) + "TB";
 | 
| 109 | }
 | 
| 110 | 
 | 
| 111 | inline std::string formatTime(std::chrono::microseconds number) {
 | 
| 112 |     uint64_t sec = number.count() / 1000000;
 | 
| 113 |     if (sec >= 100) {
 | 
| 114 |         uint64_t min = static_cast<uint64_t>(std::floor(sec / 60));
 | 
| 115 |         if (min >= 100) {
 | 
| 116 |             uint64_t hours = static_cast<uint64_t>(std::floor(min / 60));
 | 
| 117 |             if (hours >= 100) {
 | 
| 118 |                 uint64_t days = static_cast<uint64_t>(std::floor(hours / 24));
 | 
| 119 |                 return std::to_string(days) + "D";
 | 
| 120 |             }
 | 
| 121 |             return std::to_string(hours) + "h";
 | 
| 122 |         }
 | 
| 123 |         if (min < 10) {
 | 
| 124 |             // temp should always be 1 digit long
 | 
| 125 |             uint64_t temp = static_cast<uint64_t>(std::floor((sec - (min * 60.0)) * 10.0 / 6.0));
 | 
| 126 |             return std::to_string(min) + "." + std::to_string(temp).substr(0, 1) + "m";
 | 
| 127 |         }
 | 
| 128 |         return std::to_string(min) + "m";
 | 
| 129 |     } else if (sec >= 10) {
 | 
| 130 |         return std::to_string(sec) + "s";
 | 
| 131 |     } else if (number.count() >= 1000000) {
 | 
| 132 |         std::string temp = std::to_string(number.count() / 100);
 | 
| 133 |         return temp.substr(0, 1) + "." + temp.substr(1, 2) + "s";
 | 
| 134 |     } else if (number.count() >= 100000) {
 | 
| 135 |         std::string temp = std::to_string(number.count() / 1000);
 | 
| 136 |         return "." + temp.substr(0, 3) + "s";
 | 
| 137 |     } else if (number.count() >= 10000) {
 | 
| 138 |         std::string temp = std::to_string(number.count() / 1000);
 | 
| 139 |         return ".0" + temp.substr(0, 2) + "s";
 | 
| 140 |     } else if (number.count() >= 1000) {
 | 
| 141 |         std::string temp = std::to_string(number.count() / 1000);
 | 
| 142 |         return ".00" + temp.substr(0, 1) + "s";
 | 
| 143 |     }
 | 
| 144 | 
 | 
| 145 |     return ".000s";
 | 
| 146 | }
 | 
| 147 | 
 | 
| 148 | inline std::vector<std::vector<std::string>> formatTable(Table table, int precision) {
 | 
| 149 |     std::vector<std::vector<std::string>> result;
 | 
| 150 |     for (auto& row : table.getRows()) {
 | 
| 151 |         std::vector<std::string> result_row;
 | 
| 152 |         for (auto& cell : row->getCells()) {
 | 
| 153 |             if (cell != nullptr) {
 | 
| 154 |                 result_row.push_back(cell->toString(precision));
 | 
| 155 |             } else {
 | 
| 156 |                 result_row.push_back("-");
 | 
| 157 |             }
 | 
| 158 |         }
 | 
| 159 |         result.push_back(result_row);
 | 
| 160 |     }
 | 
| 161 |     return result;
 | 
| 162 | }
 | 
| 163 | 
 | 
| 164 | /** @brief split on the delimiter */
 | 
| 165 | inline std::vector<std::string> split(std::string toSplit, std::string delimiter) {
 | 
| 166 |     std::vector<std::string> elements;
 | 
| 167 |     std::string::size_type lastPos = 0;
 | 
| 168 |     auto pos = toSplit.find(delimiter, lastPos);
 | 
| 169 | 
 | 
| 170 |     while (pos != std::string::npos) {
 | 
| 171 |         if (pos > 0) {
 | 
| 172 |             std::string newElement = toSplit.substr(lastPos, pos - lastPos);
 | 
| 173 |             elements.push_back(newElement);
 | 
| 174 |         }
 | 
| 175 |         lastPos = pos + delimiter.size();
 | 
| 176 |         pos = toSplit.find(delimiter, lastPos);
 | 
| 177 |     }
 | 
| 178 |     if (lastPos < toSplit.size()) {
 | 
| 179 |         elements.push_back(toSplit.substr(lastPos));
 | 
| 180 |     }
 | 
| 181 | 
 | 
| 182 |     return elements;
 | 
| 183 | }
 | 
| 184 | 
 | 
| 185 | inline std::string trimWhitespace(std::string str) {
 | 
| 186 |     std::string whitespace = " \t";
 | 
| 187 |     std::size_t first = str.find_first_not_of(whitespace);
 | 
| 188 |     if (first != std::string::npos) {
 | 
| 189 |         str.erase(0, first);
 | 
| 190 |         std::size_t last = str.find_last_not_of(whitespace);
 | 
| 191 |         str.erase(last + 1);
 | 
| 192 |     } else {
 | 
| 193 |         str.clear();
 | 
| 194 |     }
 | 
| 195 | 
 | 
| 196 |     return str;
 | 
| 197 | }
 | 
| 198 | 
 | 
| 199 | inline bool file_exists(const std::string& name) {
 | 
| 200 |     struct stat buffer = {};
 | 
| 201 |     if (stat(name.c_str(), &buffer) == 0) {
 | 
| 202 |         if ((buffer.st_mode & S_IFMT) != 0) {
 | 
| 203 |             return true;
 | 
| 204 |         }
 | 
| 205 |     }
 | 
| 206 |     return false;
 | 
| 207 | }
 | 
| 208 | /** @brief Remove \n and \t characters, \n and \t sequence of two chars, and wrapping quotes */
 | 
| 209 | inline std::string cleanString(std::string val) {
 | 
| 210 |     if (val.size() < 2) {
 | 
| 211 |         return val;
 | 
| 212 |     }
 | 
| 213 | 
 | 
| 214 |     std::size_t start_pos = 0;
 | 
| 215 |     while ((start_pos = val.find('\\', start_pos)) != std::string::npos) {
 | 
| 216 |         val.erase(start_pos, 1);
 | 
| 217 |         if (start_pos < val.size()) {
 | 
| 218 |             if (val[start_pos] == 'n' || val[start_pos] == 't') {
 | 
| 219 |                 val.replace(start_pos, 1, " ");
 | 
| 220 |             }
 | 
| 221 |         }
 | 
| 222 |     }
 | 
| 223 | 
 | 
| 224 |     if (val.at(0) == '"' && val.at(val.size() - 1) == '"') {
 | 
| 225 |         val = val.substr(1, val.size() - 2);
 | 
| 226 |     }
 | 
| 227 | 
 | 
| 228 |     std::replace(val.begin(), val.end(), '\n', ' ');
 | 
| 229 |     std::replace(val.begin(), val.end(), '\t', ' ');
 | 
| 230 | 
 | 
| 231 |     return val;
 | 
| 232 | }
 | 
| 233 | 
 | 
| 234 | /** @brief escape escapes and quotes, and remove surrounding quotes */
 | 
| 235 | inline std::string cleanJsonOut(std::string value) {
 | 
| 236 |     if (value.size() >= 2) {
 | 
| 237 |         if (value.at(0) == '"' && value.at(value.size() - 1) == '"') {
 | 
| 238 |             value = value.substr(1, value.size() - 2);
 | 
| 239 |         }
 | 
| 240 |     }
 | 
| 241 | 
 | 
| 242 |     std::size_t start_pos = 0;
 | 
| 243 |     while ((start_pos = value.find('\\', start_pos)) != std::string::npos) {
 | 
| 244 |         value.replace(start_pos, 1, "\\\\");
 | 
| 245 |         start_pos += 2;
 | 
| 246 |     }
 | 
| 247 |     start_pos = 0;
 | 
| 248 |     while ((start_pos = value.find('"', start_pos)) != std::string::npos) {
 | 
| 249 |         value.replace(start_pos, 1, "\\\"");
 | 
| 250 |         start_pos += 2;
 | 
| 251 |     }
 | 
| 252 |     return value;
 | 
| 253 | }
 | 
| 254 | 
 | 
| 255 | /** @brief Convert doubles to NaN or scientific notation */
 | 
| 256 | inline std::string cleanJsonOut(double val) {
 | 
| 257 |     if (std::isnan(val)) {
 | 
| 258 |         return "NaN";
 | 
| 259 |     }
 | 
| 260 |     std::ostringstream ss;
 | 
| 261 |     ss << std::scientific << std::setprecision(6) << val;
 | 
| 262 |     return ss.str();
 | 
| 263 | }
 | 
| 264 | }  // namespace Tools
 | 
| 265 | 
 | 
| 266 | }  // namespace profile
 | 
| 267 | }  // namespace souffle
 |