OILS / vendor / souffle / utility / StringUtil.h View on Github | oilshell.org

494 lines, 254 significant
1/*
2 * Souffle - A Datalog Compiler
3 * Copyright (c) 2021, The Souffle Developers. All rights reserved
4 * Licensed under the Universal Permissive License v 1.0 as shown at:
5 * - https://opensource.org/licenses/UPL
6 * - <souffle root>/licenses/SOUFFLE-UPL.txt
7 */
8
9/************************************************************************
10 *
11 * @file StringUtil.h
12 *
13 * @brief Datalog project utilities
14 *
15 ***********************************************************************/
16
17#pragma once
18
19#include "souffle/RamTypes.h"
20#include <algorithm>
21#include <cctype>
22#include <cstdlib>
23#include <fstream>
24#include <limits>
25#include <set>
26#include <sstream>
27#include <stdexcept>
28#include <string>
29#include <type_traits>
30#include <typeinfo>
31#include <vector>
32
33namespace souffle {
34
35// Forward declaration
36inline bool isPrefix(const std::string& prefix, const std::string& element);
37
38/**
39 * Converts a string to a RamSigned
40 *
41 * This procedure has similar behaviour to std::stoi/stoll.
42 *
43 * The procedure accepts prefixes 0b (if base = 2) and 0x (if base = 16)
44 * If base = 0, the procedure will try to infer the base from the prefix, if present.
45 */
46inline RamSigned RamSignedFromString(
47 const std::string& str, std::size_t* position = nullptr, const int base = 10) {
48 RamSigned val;
49
50 if (base == 0) {
51 if (isPrefix("-0b", str) || isPrefix("0b", str)) {
52 return RamSignedFromString(str, position, 2);
53 } else if (isPrefix("-0x", str) || isPrefix("0x", str)) {
54 return RamSignedFromString(str, position, 16);
55 } else {
56 return RamSignedFromString(str, position);
57 }
58 }
59 std::string binaryNumber;
60 bool parsingBinary = base == 2;
61
62 // stoi/stoll can't handle base 2 prefix by default.
63 if (parsingBinary) {
64 if (isPrefix("-0b", str)) {
65 binaryNumber = "-" + str.substr(3);
66 } else if (isPrefix("0b", str)) {
67 binaryNumber = str.substr(2);
68 }
69 }
70 const std::string& tmp = parsingBinary ? binaryNumber : str;
71
72#if RAM_DOMAIN_SIZE == 64
73 val = std::stoll(tmp, position, base);
74#else
75 val = std::stoi(tmp, position, base);
76#endif
77
78 if (parsingBinary && position != nullptr) {
79 *position += 2;
80 }
81
82 return val;
83}
84
85/**
86 * Converts a string to a RamFloat
87 */
88inline RamFloat RamFloatFromString(const std::string& str, std::size_t* position = nullptr) {
89 RamFloat val;
90#if RAM_DOMAIN_SIZE == 64
91 val = std::stod(str, position);
92#else
93 val = std::stof(str, position);
94#endif
95 return static_cast<RamFloat>(val);
96}
97/**
98 * Converts a string to a RamUnsigned
99 *
100 * This procedure has similar behaviour to std::stoul/stoull.
101 *
102 * The procedure accepts prefixes 0b (if base = 2) and 0x (if base = 16)
103 * If base = 0, the procedure will try to infer the base from the prefix, if present.
104 */
105inline RamUnsigned RamUnsignedFromString(
106 const std::string& str, std::size_t* position = nullptr, const int base = 10) {
107 // Be default C++ (stoul) allows unsigned numbers starting with "-".
108 if (isPrefix("-", str)) {
109 throw std::invalid_argument("Unsigned number can't start with minus.");
110 }
111
112 if (base == 0) {
113 if (isPrefix("0b", str)) {
114 return RamUnsignedFromString(str, position, 2);
115 } else if (isPrefix("0x", str)) {
116 return RamUnsignedFromString(str, position, 16);
117 } else {
118 return RamUnsignedFromString(str, position);
119 }
120 }
121
122 // stoul/stoull can't handle binary prefix by default.
123 std::string binaryNumber;
124 bool parsingBinary = false;
125 if (base == 2 && isPrefix("0b", str)) {
126 binaryNumber = str.substr(2);
127 parsingBinary = true;
128 }
129 const std::string& tmp = parsingBinary ? binaryNumber : str;
130
131 RamUnsigned val;
132#if RAM_DOMAIN_SIZE == 64
133 val = std::stoull(tmp, position, base);
134#else
135 val = std::stoul(tmp, position, base);
136#endif
137
138 if (parsingBinary && position != nullptr) {
139 *position += 2;
140 }
141
142 // check if it's safe to cast (stoul returns unsigned long)
143 if (val > std::numeric_limits<RamUnsigned>::max()) {
144 throw std::invalid_argument("Unsigned number of of bounds");
145 }
146
147 return static_cast<RamUnsigned>(val);
148}
149
150/**
151 * Can a string be parsed as RamSigned.
152 *
153 * Souffle (parser, not fact file readers) accepts: hex, binary and base 10.
154 * Integer can be negative, in all 3 formats this means that it
155 * starts with minus (c++ default semantics).
156 */
157inline bool canBeParsedAsRamSigned(const std::string& string) {
158 std::size_t charactersRead = 0;
159
160 try {
161 RamSignedFromString(string, &charactersRead, 0);
162 } catch (...) {
163 return false;
164 }
165
166 return charactersRead == string.size();
167}
168
169/**
170 * Can a string be parsed as RamUnsigned.
171 *
172 * Souffle accepts: hex, binary and base 10.
173 */
174inline bool canBeParsedAsRamUnsigned(const std::string& string) {
175 std::size_t charactersRead = 0;
176 try {
177 RamUnsignedFromString(string, &charactersRead, 0);
178 } catch (...) {
179 return false;
180 }
181 return charactersRead == string.size();
182}
183
184/**
185 * Can a string be parsed as RamFloat.
186 */
187inline bool canBeParsedAsRamFloat(const std::string& string) {
188 std::size_t charactersRead = 0;
189 try {
190 RamFloatFromString(string, &charactersRead);
191 } catch (...) {
192 return false;
193 }
194 return charactersRead == string.size();
195}
196
197#if RAM_DOMAIN_SIZE == 64
198inline RamDomain stord(const std::string& str, std::size_t* pos = nullptr, int base = 10) {
199 return static_cast<RamDomain>(std::stoull(str, pos, base));
200}
201#elif RAM_DOMAIN_SIZE == 32
202inline RamDomain stord(const std::string& str, std::size_t* pos = nullptr, int base = 10) {
203 return static_cast<RamDomain>(std::stoul(str, pos, base));
204}
205#else
206#error RAM Domain is neither 32bit nor 64bit
207#endif
208
209/**
210 * Check whether a string is a sequence of digits
211 */
212inline bool isNumber(const char* str) {
213 if (str == nullptr) {
214 return false;
215 }
216
217 while (*str != 0) {
218 if (isdigit(*str) == 0) {
219 return false;
220 }
221 str++;
222 }
223 return true;
224}
225
226/**
227 * A generic function converting strings into strings (trivial case).
228 */
229inline const std::string& toString(const std::string& str) {
230 return str;
231}
232
233namespace detail {
234
235/**
236 * A type trait to check whether a given type is printable.
237 * In this general case, nothing is printable.
238 */
239template <typename T, typename filter = void>
240struct is_printable : public std::false_type {};
241
242/**
243 * A type trait to check whether a given type is printable.
244 * This specialization makes types with an output operator printable.
245 */
246template <typename T>
247struct is_printable<T, typename std::conditional<false,
248 decltype(std::declval<std::ostream&>() << std::declval<T>()), void>::type>
249 : public std::true_type {};
250
251template <typename T, typename filter = void>
252struct is_html_printable : public std::false_type {};
253
254template <typename T>
255struct is_html_printable<T,
256 typename std::conditional<false, decltype(std::declval<T>().printHTML(std::declval<std::ostream&>())),
257 void>::type> : public std::true_type {};
258
259} // namespace detail
260
261/**
262 * A generic function converting arbitrary objects to strings by utilizing
263 * their print capability.
264 *
265 * This function is mainly intended for implementing test cases and debugging
266 * operations.
267 */
268template <typename T>
269typename std::enable_if<detail::is_printable<T>::value, std::string>::type toString(const T& value) {
270 // write value into stream and return result
271 std::stringstream ss;
272 ss << value;
273 return ss.str();
274}
275
276/**
277 * A fallback for the to-string function in case an unprintable object is supposed
278 * to be printed.
279 */
280template <typename T>
281typename std::enable_if<!detail::is_printable<T>::value, std::string>::type toString(const T&) {
282 std::stringstream ss;
283 ss << "(print for type ";
284 ss << typeid(T).name();
285 ss << " not supported)";
286 return ss.str();
287}
288
289template <typename T>
290auto toHtml(const T& obj) -> typename std::enable_if<detail::is_html_printable<T>::value, std::string>::type {
291 std::stringstream out;
292 obj.printHTML(out);
293 return out.str();
294}
295
296/** Fallback to `toString` */
297template <typename T>
298auto toHtml(const T& obj) ->
299 typename std::enable_if<not detail::is_html_printable<T>::value, std::string>::type {
300 return toString(obj);
301}
302
303// -------------------------------------------------------------------------------
304// String Utils
305// -------------------------------------------------------------------------------
306
307/**
308 * Determine if one string is a prefix of another
309 */
310inline bool isPrefix(const std::string& prefix, const std::string& element) {
311 auto itPrefix = prefix.begin();
312 auto itElement = element.begin();
313
314 while (itPrefix != prefix.end() && itElement != element.end()) {
315 if (*itPrefix != *itElement) {
316 break;
317 }
318 ++itPrefix;
319 ++itElement;
320 }
321
322 return itPrefix == prefix.end();
323}
324
325/**
326 * Determines whether the given value string ends with the given
327 * end string.
328 */
329inline bool endsWith(const std::string& value, const std::string& ending) {
330 if (value.size() < ending.size()) {
331 return false;
332 }
333 return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
334}
335
336/**
337 * Splits a string given a delimiter
338 */
339inline std::vector<std::string_view> splitView(std::string_view toSplit, std::string_view delimiter) {
340 if (toSplit.empty()) return {toSplit};
341
342 auto delimLen = std::max<size_t>(1, delimiter.size()); // ensure we advance even w/ an empty needle
343
344 std::vector<std::string_view> parts;
345 for (auto tail = toSplit;;) {
346 auto pos = tail.find(delimiter);
347 parts.push_back(tail.substr(0, pos));
348 if (pos == tail.npos) break;
349
350 tail = tail.substr(pos + delimLen);
351 }
352
353 return parts;
354}
355
356/**
357 * Splits a string given a delimiter
358 */
359inline std::vector<std::string> splitString(std::string_view str, char delimiter) {
360 std::vector<std::string> xs;
361 for (auto&& x : splitView(str, std::string_view{&delimiter, 1}))
362 xs.push_back(std::string(x));
363 return xs;
364}
365
366/**
367 * Strips the prefix of a given string if it exists. No change otherwise.
368 */
369inline std::string stripPrefix(const std::string& prefix, const std::string& element) {
370 return isPrefix(prefix, element) ? element.substr(prefix.length()) : element;
371}
372
373/**
374 * Stringify a string using escapes for escape, newline, tab, double-quotes and semicolons
375 */
376inline std::string stringify(const std::string& input) {
377 std::string str(input);
378
379 // replace escapes with double escape sequence
380 std::size_t start_pos = 0;
381 while ((start_pos = str.find('\\', start_pos)) != std::string::npos) {
382 str.replace(start_pos, 1, "\\\\");
383 start_pos += 2;
384 }
385 // replace semicolons with escape sequence
386 start_pos = 0;
387 while ((start_pos = str.find(';', start_pos)) != std::string::npos) {
388 str.replace(start_pos, 1, "\\;");
389 start_pos += 2;
390 }
391 // replace double-quotes with escape sequence
392 start_pos = 0;
393 while ((start_pos = str.find('"', start_pos)) != std::string::npos) {
394 str.replace(start_pos, 1, "\\\"");
395 start_pos += 2;
396 }
397 // replace newline with escape sequence
398 start_pos = 0;
399 while ((start_pos = str.find('\n', start_pos)) != std::string::npos) {
400 str.replace(start_pos, 1, "\\n");
401 start_pos += 2;
402 }
403 // replace tab with escape sequence
404 start_pos = 0;
405 while ((start_pos = str.find('\t', start_pos)) != std::string::npos) {
406 str.replace(start_pos, 1, "\\t");
407 start_pos += 2;
408 }
409 return str;
410}
411
412/**
413 * Escape JSON string.
414 */
415inline std::string escapeJSONstring(const std::string& JSONstr) {
416 std::ostringstream destination;
417
418 // Iterate over all characters except first and last
419 for (char c : JSONstr) {
420 if (c == '\"') {
421 destination << "\\";
422 }
423 destination << c;
424 }
425 return destination.str();
426}
427
428/** Valid C++ identifier, note that this does not ensure the uniqueness of identifiers returned. */
429inline std::string identifier(std::string id) {
430 for (std::size_t i = 0; i < id.length(); i++) {
431 if (((isalpha(id[i]) == 0) && i == 0) || ((isalnum(id[i]) == 0) && id[i] != '_')) {
432 id[i] = '_';
433 }
434 }
435 return id;
436}
437
438// TODO (b-scholz): tidy up unescape/escape functions
439
440inline std::string unescape(
441 const std::string& inputString, const std::string& needle, const std::string& replacement) {
442 std::string result = inputString;
443 std::size_t pos = 0;
444 while ((pos = result.find(needle, pos)) != std::string::npos) {
445 result = result.replace(pos, needle.length(), replacement);
446 pos += replacement.length();
447 }
448 return result;
449}
450
451inline std::string unescape(const std::string& inputString) {
452 std::string unescaped = unescape(inputString, "\\\"", "\"");
453 unescaped = unescape(unescaped, "\\t", "\t");
454 unescaped = unescape(unescaped, "\\r", "\r");
455 unescaped = unescape(unescaped, "\\n", "\n");
456 return unescaped;
457}
458
459inline std::string escape(
460 const std::string& inputString, const std::string& needle, const std::string& replacement) {
461 std::string result = inputString;
462 std::size_t pos = 0;
463 while ((pos = result.find(needle, pos)) != std::string::npos) {
464 result = result.replace(pos, needle.length(), replacement);
465 pos += replacement.length();
466 }
467 return result;
468}
469
470inline std::string escape(const std::string& inputString) {
471 std::string escaped = escape(inputString, "\"", "\\\"");
472 escaped = escape(escaped, "\t", "\\t");
473 escaped = escape(escaped, "\r", "\\r");
474 escaped = escape(escaped, "\n", "\\n");
475 return escaped;
476}
477
478template <typename C>
479auto escape(C&& os, std::string_view str, std::set<char> const& needs_escape, std::string_view esc) {
480 for (auto&& x : str) {
481 if (needs_escape.find(x) != needs_escape.end()) {
482 os << esc;
483 }
484 os << x;
485 }
486
487 return std::forward<C>(os);
488}
489
490inline std::string escape(std::string_view str, std::set<char> const& needs_escape, std::string_view esc) {
491 return escape(std::stringstream{}, str, needs_escape, esc).str();
492}
493
494} // end namespace souffle