| 1 | #include "mycpp/gc_str.h"
 | 
| 2 | 
 | 
| 3 | #include <ctype.h>  // isalpha(), isdigit()
 | 
| 4 | #include <stdarg.h>
 | 
| 5 | 
 | 
| 6 | #include <regex>
 | 
| 7 | 
 | 
| 8 | #include "mycpp/common.h"
 | 
| 9 | #include "mycpp/gc_alloc.h"     // NewStr()
 | 
| 10 | #include "mycpp/gc_builtins.h"  // StringToInt()
 | 
| 11 | #include "mycpp/gc_list.h"      // join(), split() use it
 | 
| 12 | 
 | 
| 13 | GLOBAL_STR(kEmptyString, "");
 | 
| 14 | 
 | 
| 15 | static const std::regex gStrFmtRegex("([^%]*)(?:%(-?[0-9]*)(.))?");
 | 
| 16 | static const int kMaxFmtWidth = 256;  // arbitrary...
 | 
| 17 | 
 | 
| 18 | int BigStr::find(BigStr* needle, int start, int end) {
 | 
| 19 |   if (end == -1) {
 | 
| 20 |     end = len(this);
 | 
| 21 |   }
 | 
| 22 |   int needle_len = len(needle);
 | 
| 23 | 
 | 
| 24 |   if (needle_len > (end - start)) {
 | 
| 25 |     return -1;  // needle is too long to be found (Python behavior)
 | 
| 26 |   }
 | 
| 27 | 
 | 
| 28 |   if (needle_len == 1) {
 | 
| 29 |     char c = needle->data_[0];
 | 
| 30 |     // For 'aaa'.find('a', 0, 1)
 | 
| 31 |     // end = 1, needle_len = 1, last_start = 1 which means we go through once
 | 
| 32 |     for (int i = start; i < end; ++i) {
 | 
| 33 |       if (data_[i] == c) {
 | 
| 34 |         return i;
 | 
| 35 |       }
 | 
| 36 |     }
 | 
| 37 |   } else {
 | 
| 38 |     // Note: this works for finding the empty string.  Empty string is found in
 | 
| 39 |     // empty range like [5, 5), but not in [5, 4)
 | 
| 40 | 
 | 
| 41 |     // For 'aaa'.find('aa', 0, 2)
 | 
| 42 |     // end = 2, needle_len = 2, last_start = 1 which means we go through once
 | 
| 43 | 
 | 
| 44 |     int last_start = end - needle_len + 1;
 | 
| 45 |     // could use a smarter substring search algorithm
 | 
| 46 |     for (int i = start; i < last_start; ++i) {
 | 
| 47 |       if (memcmp(data_ + i, needle->data_, needle_len) == 0) {
 | 
| 48 |         return i;
 | 
| 49 |       }
 | 
| 50 |     }
 | 
| 51 |   }
 | 
| 52 |   return -1;
 | 
| 53 | }
 | 
| 54 | 
 | 
| 55 | int BigStr::rfind(BigStr* needle) {
 | 
| 56 |   int length = len(this);
 | 
| 57 |   DCHECK(len(needle) == 1);  // Oils usage
 | 
| 58 |   char c = needle->data_[0];
 | 
| 59 |   for (int i = length - 1; i >= 0; --i) {
 | 
| 60 |     if (data_[i] == c) {
 | 
| 61 |       return i;
 | 
| 62 |     }
 | 
| 63 |   }
 | 
| 64 |   return -1;
 | 
| 65 | }
 | 
| 66 | 
 | 
| 67 | bool BigStr::isdigit() {
 | 
| 68 |   int n = len(this);
 | 
| 69 |   if (n == 0) {
 | 
| 70 |     return false;  // special case
 | 
| 71 |   }
 | 
| 72 |   for (int i = 0; i < n; ++i) {
 | 
| 73 |     if (!::isdigit(data_[i])) {
 | 
| 74 |       return false;
 | 
| 75 |     }
 | 
| 76 |   }
 | 
| 77 |   return true;
 | 
| 78 | }
 | 
| 79 | 
 | 
| 80 | bool BigStr::isalpha() {
 | 
| 81 |   int n = len(this);
 | 
| 82 |   if (n == 0) {
 | 
| 83 |     return false;  // special case
 | 
| 84 |   }
 | 
| 85 |   for (int i = 0; i < n; ++i) {
 | 
| 86 |     if (!::isalpha(data_[i])) {
 | 
| 87 |       return false;
 | 
| 88 |     }
 | 
| 89 |   }
 | 
| 90 |   return true;
 | 
| 91 | }
 | 
| 92 | 
 | 
| 93 | // e.g. for osh/braces.py
 | 
| 94 | bool BigStr::isupper() {
 | 
| 95 |   int n = len(this);
 | 
| 96 |   if (n == 0) {
 | 
| 97 |     return false;  // special case
 | 
| 98 |   }
 | 
| 99 |   for (int i = 0; i < n; ++i) {
 | 
| 100 |     if (!::isupper(data_[i])) {
 | 
| 101 |       return false;
 | 
| 102 |     }
 | 
| 103 |   }
 | 
| 104 |   return true;
 | 
| 105 | }
 | 
| 106 | 
 | 
| 107 | bool BigStr::startswith(BigStr* s) {
 | 
| 108 |   int n = len(s);
 | 
| 109 |   if (n > len(this)) {
 | 
| 110 |     return false;
 | 
| 111 |   }
 | 
| 112 |   return memcmp(data_, s->data_, n) == 0;
 | 
| 113 | }
 | 
| 114 | 
 | 
| 115 | bool BigStr::endswith(BigStr* s) {
 | 
| 116 |   int len_s = len(s);
 | 
| 117 |   int len_this = len(this);
 | 
| 118 |   if (len_s > len_this) {
 | 
| 119 |     return false;
 | 
| 120 |   }
 | 
| 121 |   const char* start = data_ + len_this - len_s;
 | 
| 122 |   return memcmp(start, s->data_, len_s) == 0;
 | 
| 123 | }
 | 
| 124 | 
 | 
| 125 | // Get a string with one character
 | 
| 126 | BigStr* BigStr::at(int i) {
 | 
| 127 |   int length = len(this);
 | 
| 128 |   if (i < 0) {
 | 
| 129 |     i = length + i;
 | 
| 130 |   }
 | 
| 131 |   DCHECK(0 <= i);
 | 
| 132 |   DCHECK(i < length);  // had a problem here!
 | 
| 133 | 
 | 
| 134 |   BigStr* result = NewStr(1);
 | 
| 135 |   result->data_[0] = data_[i];
 | 
| 136 |   return result;
 | 
| 137 | }
 | 
| 138 | 
 | 
| 139 | // s[begin:]
 | 
| 140 | BigStr* BigStr::slice(int begin) {
 | 
| 141 |   return slice(begin, len(this));
 | 
| 142 | }
 | 
| 143 | 
 | 
| 144 | // s[begin:end]
 | 
| 145 | BigStr* BigStr::slice(int begin, int end) {
 | 
| 146 |   int length = len(this);
 | 
| 147 |   SLICE_ADJUST(begin, end, length);
 | 
| 148 | 
 | 
| 149 |   DCHECK(0 <= begin && begin <= length);
 | 
| 150 |   DCHECK(0 <= end && end <= length);
 | 
| 151 | 
 | 
| 152 |   int new_len = end - begin;
 | 
| 153 |   DCHECK(0 <= new_len && new_len <= length);
 | 
| 154 | 
 | 
| 155 |   BigStr* result = NewStr(new_len);  // has kEmptyString optimization
 | 
| 156 |   memcpy(result->data_, data_ + begin, new_len);
 | 
| 157 | 
 | 
| 158 |   return result;
 | 
| 159 | }
 | 
| 160 | 
 | 
| 161 | // Used by 'help' builtin and --help, neither of which translate yet.
 | 
| 162 | 
 | 
| 163 | List<BigStr*>* BigStr::splitlines(bool keep) {
 | 
| 164 |   DCHECK(keep == true);
 | 
| 165 |   FAIL(kNotImplemented);
 | 
| 166 | }
 | 
| 167 | 
 | 
| 168 | BigStr* BigStr::upper() {
 | 
| 169 |   int length = len(this);
 | 
| 170 |   BigStr* result = NewStr(length);
 | 
| 171 |   char* buffer = result->data();
 | 
| 172 |   for (int char_index = 0; char_index < length; ++char_index) {
 | 
| 173 |     buffer[char_index] = toupper(data_[char_index]);
 | 
| 174 |   }
 | 
| 175 |   return result;
 | 
| 176 | }
 | 
| 177 | 
 | 
| 178 | BigStr* BigStr::lower() {
 | 
| 179 |   int length = len(this);
 | 
| 180 |   BigStr* result = NewStr(length);
 | 
| 181 |   char* buffer = result->data();
 | 
| 182 |   for (int char_index = 0; char_index < length; ++char_index) {
 | 
| 183 |     buffer[char_index] = tolower(data_[char_index]);
 | 
| 184 |   }
 | 
| 185 |   return result;
 | 
| 186 | }
 | 
| 187 | 
 | 
| 188 | BigStr* BigStr::ljust(int width, BigStr* fillchar) {
 | 
| 189 |   DCHECK(len(fillchar) == 1);
 | 
| 190 | 
 | 
| 191 |   int length = len(this);
 | 
| 192 |   int num_fill = width - length;
 | 
| 193 |   if (num_fill < 0) {
 | 
| 194 |     return this;
 | 
| 195 |   } else {
 | 
| 196 |     BigStr* result = NewStr(width);
 | 
| 197 |     char c = fillchar->data_[0];
 | 
| 198 |     memcpy(result->data_, data_, length);
 | 
| 199 |     for (int i = length; i < width; ++i) {
 | 
| 200 |       result->data_[i] = c;
 | 
| 201 |     }
 | 
| 202 |     return result;
 | 
| 203 |   }
 | 
| 204 | }
 | 
| 205 | 
 | 
| 206 | BigStr* BigStr::rjust(int width, BigStr* fillchar) {
 | 
| 207 |   DCHECK(len(fillchar) == 1);
 | 
| 208 | 
 | 
| 209 |   int length = len(this);
 | 
| 210 |   int num_fill = width - length;
 | 
| 211 |   if (num_fill < 0) {
 | 
| 212 |     return this;
 | 
| 213 |   } else {
 | 
| 214 |     BigStr* result = NewStr(width);
 | 
| 215 |     char c = fillchar->data_[0];
 | 
| 216 |     for (int i = 0; i < num_fill; ++i) {
 | 
| 217 |       result->data_[i] = c;
 | 
| 218 |     }
 | 
| 219 |     memcpy(result->data_ + num_fill, data_, length);
 | 
| 220 |     return result;
 | 
| 221 |   }
 | 
| 222 | }
 | 
| 223 | 
 | 
| 224 | BigStr* BigStr::replace(BigStr* old, BigStr* new_str) {
 | 
| 225 |   // Use -1 as in python2: "aaaa".replace(-1) -> "AAAA"
 | 
| 226 |   return replace(old, new_str, -1);
 | 
| 227 | }
 | 
| 228 | 
 | 
| 229 | BigStr* BigStr::replace(BigStr* old, BigStr* new_str, int count) {
 | 
| 230 |   // log("replacing %s with %s", old_data, new_str->data_);
 | 
| 231 |   const char* old_data = old->data_;
 | 
| 232 | 
 | 
| 233 |   int this_len = len(this);
 | 
| 234 |   int old_len = len(old);
 | 
| 235 | 
 | 
| 236 |   const char* last_possible = data_ + this_len - old_len;
 | 
| 237 | 
 | 
| 238 |   const char* p_this = data_;  // advances through 'this'
 | 
| 239 | 
 | 
| 240 |   // First pass: Calculate number of replacements, and hence new length
 | 
| 241 |   int replace_count = 0;
 | 
| 242 |   if (old_len == 0) {
 | 
| 243 |     replace_count = this_len + 1;
 | 
| 244 |     if (count > 0) {
 | 
| 245 |       replace_count = min(replace_count, count);
 | 
| 246 |     }
 | 
| 247 |   } else {
 | 
| 248 |     while (p_this <= last_possible) {
 | 
| 249 |       if (replace_count != count &&  // limit replacements (if count != -1)
 | 
| 250 |           memcmp(p_this, old_data, old_len) == 0) {  // equal
 | 
| 251 |         replace_count++;
 | 
| 252 |         p_this += old_len;
 | 
| 253 |       } else {
 | 
| 254 |         p_this++;
 | 
| 255 |       }
 | 
| 256 |     }
 | 
| 257 |   }
 | 
| 258 | 
 | 
| 259 |   // log("replacements %d", replace_count);
 | 
| 260 | 
 | 
| 261 |   if (replace_count == 0) {
 | 
| 262 |     return this;  // Reuse the string if there were no replacements
 | 
| 263 |   }
 | 
| 264 | 
 | 
| 265 |   int new_str_len = len(new_str);
 | 
| 266 |   int result_len =
 | 
| 267 |       this_len - (replace_count * old_len) + (replace_count * new_str_len);
 | 
| 268 | 
 | 
| 269 |   BigStr* result = NewStr(result_len);
 | 
| 270 | 
 | 
| 271 |   const char* new_data = new_str->data_;
 | 
| 272 |   const size_t new_len = new_str_len;
 | 
| 273 | 
 | 
| 274 |   // Second pass: Copy pieces into 'result'
 | 
| 275 |   p_this = data_;                  // back to beginning
 | 
| 276 |   char* p_result = result->data_;  // advances through 'result'
 | 
| 277 |   replace_count = 0;
 | 
| 278 | 
 | 
| 279 |   if (old_len == 0) {
 | 
| 280 |     // Should place new_str between each char in this
 | 
| 281 |     while (p_this < last_possible && replace_count != count) {
 | 
| 282 |       replace_count++;
 | 
| 283 |       memcpy(p_result, new_data, new_len);  // Copy from new_str
 | 
| 284 |       p_result += new_len;                  // Move past new_str
 | 
| 285 | 
 | 
| 286 |       // Write a char from this
 | 
| 287 |       *p_result = *p_this;
 | 
| 288 |       p_this++;
 | 
| 289 |       p_result++;
 | 
| 290 |     }
 | 
| 291 | 
 | 
| 292 |     if (replace_count != count) {
 | 
| 293 |       // Write a copy of new_str at the end
 | 
| 294 |       assert(p_this == last_possible);
 | 
| 295 |       memcpy(p_result, new_data, new_len);
 | 
| 296 |     } else if (p_this <= last_possible) {
 | 
| 297 |       // Write the last part of string
 | 
| 298 |       memcpy(p_result, p_this, data_ + this_len - p_this);
 | 
| 299 |     }
 | 
| 300 |   } else {
 | 
| 301 |     while (p_this <= last_possible) {
 | 
| 302 |       // Note: would be more efficient if we remembered the match positions
 | 
| 303 |       if (replace_count != count &&  // limit replacements (if count != -1)
 | 
| 304 |           memcmp(p_this, old_data, old_len) == 0) {  // equal
 | 
| 305 |         memcpy(p_result, new_data, new_len);         // Copy from new_str
 | 
| 306 |         replace_count++;
 | 
| 307 |         p_result += new_len;
 | 
| 308 |         p_this += old_len;
 | 
| 309 |       } else {  // copy 1 byte
 | 
| 310 |         *p_result = *p_this;
 | 
| 311 |         p_result++;
 | 
| 312 |         p_this++;
 | 
| 313 |       }
 | 
| 314 |     }
 | 
| 315 |     memcpy(p_result, p_this, data_ + this_len - p_this);  // last part of string
 | 
| 316 |   }
 | 
| 317 | 
 | 
| 318 |   return result;
 | 
| 319 | }
 | 
| 320 | 
 | 
| 321 | enum class StripWhere {
 | 
| 322 |   Left,
 | 
| 323 |   Right,
 | 
| 324 |   Both,
 | 
| 325 | };
 | 
| 326 | 
 | 
| 327 | const int kWhitespace = -1;
 | 
| 328 | 
 | 
| 329 | bool OmitChar(int ch, int what) {
 | 
| 330 |   if (what == kWhitespace) {
 | 
| 331 |     // Intentional incompatibility with Python, where say \v is whitespace
 | 
| 332 |     // '\v'.strip() == ''
 | 
| 333 |     //
 | 
| 334 |     // But it is consistent with the JSON spec [ \t\r\n] and the rules in
 | 
| 335 |     // frontend/lexer_def.py
 | 
| 336 |     //
 | 
| 337 |     // Note that the YSH is separate, and Str => trim() respects Unicode.
 | 
| 338 |     return IsAsciiWhitespace(ch);
 | 
| 339 |   } else {
 | 
| 340 |     return what == ch;
 | 
| 341 |   }
 | 
| 342 | }
 | 
| 343 | 
 | 
| 344 | // StripAny is modeled after CPython's do_strip() in stringobject.c, and can
 | 
| 345 | // implement 6 functions:
 | 
| 346 | //
 | 
| 347 | //   strip / lstrip / rstrip
 | 
| 348 | //   strip(char) / lstrip(char) / rstrip(char)
 | 
| 349 | //
 | 
| 350 | // Args:
 | 
| 351 | //   where: which ends to strip from
 | 
| 352 | //   what: kWhitespace, or an ASCII code 0-255
 | 
| 353 | 
 | 
| 354 | BigStr* StripAny(BigStr* s, StripWhere where, int what) {
 | 
| 355 |   int length = len(s);
 | 
| 356 |   const char* char_data = s->data();
 | 
| 357 | 
 | 
| 358 |   int i = 0;
 | 
| 359 |   if (where != StripWhere::Right) {
 | 
| 360 |     while (i < length && OmitChar(char_data[i], what)) {
 | 
| 361 |       i++;
 | 
| 362 |     }
 | 
| 363 |   }
 | 
| 364 | 
 | 
| 365 |   int j = length;
 | 
| 366 |   if (where != StripWhere::Left) {
 | 
| 367 |     do {
 | 
| 368 |       j--;
 | 
| 369 |     } while (j >= i && OmitChar(char_data[j], what));
 | 
| 370 |     j++;
 | 
| 371 |   }
 | 
| 372 | 
 | 
| 373 |   if (i == j) {  // Optimization to reuse existing object
 | 
| 374 |     return kEmptyString;
 | 
| 375 |   }
 | 
| 376 | 
 | 
| 377 |   if (i == 0 && j == length) {  // nothing stripped
 | 
| 378 |     return s;
 | 
| 379 |   }
 | 
| 380 | 
 | 
| 381 |   // Note: makes a copy in leaky version, and will in GC version too
 | 
| 382 |   int new_len = j - i;
 | 
| 383 |   BigStr* result = NewStr(new_len);
 | 
| 384 |   memcpy(result->data(), s->data() + i, new_len);
 | 
| 385 |   return result;
 | 
| 386 | }
 | 
| 387 | 
 | 
| 388 | BigStr* BigStr::strip() {
 | 
| 389 |   return StripAny(this, StripWhere::Both, kWhitespace);
 | 
| 390 | }
 | 
| 391 | 
 | 
| 392 | // Used for CommandSub in osh/cmd_exec.py
 | 
| 393 | BigStr* BigStr::rstrip(BigStr* chars) {
 | 
| 394 |   DCHECK(len(chars) == 1);
 | 
| 395 |   int c = chars->data_[0];
 | 
| 396 |   return StripAny(this, StripWhere::Right, c);
 | 
| 397 | }
 | 
| 398 | 
 | 
| 399 | BigStr* BigStr::rstrip() {
 | 
| 400 |   return StripAny(this, StripWhere::Right, kWhitespace);
 | 
| 401 | }
 | 
| 402 | 
 | 
| 403 | BigStr* BigStr::lstrip(BigStr* chars) {
 | 
| 404 |   DCHECK(len(chars) == 1);
 | 
| 405 |   int c = chars->data_[0];
 | 
| 406 |   return StripAny(this, StripWhere::Left, c);
 | 
| 407 | }
 | 
| 408 | 
 | 
| 409 | BigStr* BigStr::lstrip() {
 | 
| 410 |   return StripAny(this, StripWhere::Left, kWhitespace);
 | 
| 411 | }
 | 
| 412 | 
 | 
| 413 | BigStr* BigStr::join(List<BigStr*>* items) {
 | 
| 414 |   int length = 0;
 | 
| 415 | 
 | 
| 416 |   int num_parts = len(items);
 | 
| 417 | 
 | 
| 418 |   // " ".join([]) == ""
 | 
| 419 |   if (num_parts == 0) {
 | 
| 420 |     return kEmptyString;
 | 
| 421 |   }
 | 
| 422 | 
 | 
| 423 |   // Common case
 | 
| 424 |   // 'anything'.join(["foo"]) == "foo"
 | 
| 425 |   if (num_parts == 1) {
 | 
| 426 |     return items->at(0);
 | 
| 427 |   }
 | 
| 428 | 
 | 
| 429 |   for (int i = 0; i < num_parts; ++i) {
 | 
| 430 |     length += len(items->at(i));
 | 
| 431 |   }
 | 
| 432 |   // add length of all the separators
 | 
| 433 |   int this_len = len(this);
 | 
| 434 |   length += this_len * (num_parts - 1);
 | 
| 435 | 
 | 
| 436 |   BigStr* result = NewStr(length);
 | 
| 437 |   char* p_result = result->data_;  // advances through
 | 
| 438 | 
 | 
| 439 |   for (int i = 0; i < num_parts; ++i) {
 | 
| 440 |     // log("i %d", i);
 | 
| 441 |     if (i != 0 && this_len) {             // optimize common case of ''.join()
 | 
| 442 |       memcpy(p_result, data_, this_len);  // copy the separator
 | 
| 443 |       p_result += this_len;
 | 
| 444 |       // log("this_len %d", this_len);
 | 
| 445 |     }
 | 
| 446 | 
 | 
| 447 |     int n = len(items->at(i));
 | 
| 448 |     // log("n: %d", n);
 | 
| 449 |     memcpy(p_result, items->at(i)->data_, n);  // copy the list item
 | 
| 450 |     p_result += n;
 | 
| 451 |   }
 | 
| 452 | 
 | 
| 453 |   return result;
 | 
| 454 | }
 | 
| 455 | 
 | 
| 456 | static void AppendPart(List<BigStr*>* result, BigStr* s, int left, int right) {
 | 
| 457 |   int new_len = right - left;
 | 
| 458 |   BigStr* part;
 | 
| 459 |   if (new_len == 0) {
 | 
| 460 |     part = kEmptyString;
 | 
| 461 |   } else {
 | 
| 462 |     part = NewStr(new_len);
 | 
| 463 |     memcpy(part->data_, s->data_ + left, new_len);
 | 
| 464 |   }
 | 
| 465 |   result->append(part);
 | 
| 466 | }
 | 
| 467 | 
 | 
| 468 | // Split BigStr into List<BigStr*> of parts separated by 'sep'.
 | 
| 469 | // The code structure is taken from CPython's Objects/stringlib/split.h.
 | 
| 470 | List<BigStr*>* BigStr::split(BigStr* sep, int max_split) {
 | 
| 471 |   DCHECK(sep != nullptr);
 | 
| 472 |   DCHECK(len(sep) == 1);  // we can only split one char
 | 
| 473 |   char sep_char = sep->data_[0];
 | 
| 474 | 
 | 
| 475 |   int str_len = len(this);
 | 
| 476 |   if (str_len == 0) {
 | 
| 477 |     // weird case consistent with Python: ''.split(':') == ['']
 | 
| 478 |     return NewList<BigStr*>({kEmptyString});
 | 
| 479 |   }
 | 
| 480 | 
 | 
| 481 |   List<BigStr*>* result = NewList<BigStr*>({});
 | 
| 482 |   int left = 0;
 | 
| 483 |   int right = 0;
 | 
| 484 |   int num_parts = 0;  // 3 splits results in 4 parts
 | 
| 485 | 
 | 
| 486 |   while (right < str_len && num_parts < max_split) {
 | 
| 487 |     // search for separator
 | 
| 488 |     for (; right < str_len; right++) {
 | 
| 489 |       if (data_[right] == sep_char) {
 | 
| 490 |         AppendPart(result, this, left, right);
 | 
| 491 |         right++;
 | 
| 492 |         left = right;
 | 
| 493 |         num_parts++;
 | 
| 494 |         break;
 | 
| 495 |       }
 | 
| 496 |     }
 | 
| 497 |   }
 | 
| 498 |   if (num_parts == 0) {  // Optimization when there is no split
 | 
| 499 |     result->append(this);
 | 
| 500 |   } else if (left <= str_len) {  // Last part
 | 
| 501 |     AppendPart(result, this, left, str_len);
 | 
| 502 |   }
 | 
| 503 | 
 | 
| 504 |   return result;
 | 
| 505 | }
 | 
| 506 | 
 | 
| 507 | List<BigStr*>* BigStr::split(BigStr* sep) {
 | 
| 508 |   return this->split(sep, len(this));
 | 
| 509 | }
 | 
| 510 | 
 | 
| 511 | unsigned BigStr::hash(HashFunc h) {
 | 
| 512 |   if (!is_hashed_) {
 | 
| 513 |     hash_ = h(data_, len(this)) >> 1;
 | 
| 514 |     is_hashed_ = 1;
 | 
| 515 |   }
 | 
| 516 |   return hash_;
 | 
| 517 | }
 | 
| 518 | 
 | 
| 519 | static inline BigStr* _StrFormat(const char* fmt, int fmt_len, va_list args) {
 | 
| 520 |   auto beg = std::cregex_iterator(fmt, fmt + fmt_len, gStrFmtRegex);
 | 
| 521 |   auto end = std::cregex_iterator();
 | 
| 522 | 
 | 
| 523 |   char int_buf[kMaxFmtWidth];
 | 
| 524 |   std::string buf;
 | 
| 525 |   for (std::cregex_iterator it = beg; it != end; ++it) {
 | 
| 526 |     const std::cmatch& match = *it;
 | 
| 527 | 
 | 
| 528 |     const std::csub_match& lit_m = match[1];
 | 
| 529 |     DCHECK(lit_m.matched);
 | 
| 530 |     const std::string& lit_s = lit_m.str();
 | 
| 531 |     buf.append(lit_s);
 | 
| 532 | 
 | 
| 533 |     int width = 0;
 | 
| 534 |     bool zero_pad = false;
 | 
| 535 |     bool pad_back = false;
 | 
| 536 |     const std::csub_match& width_m = match[2];
 | 
| 537 |     const std::string& width_s = width_m.str();
 | 
| 538 |     bool ok = false;
 | 
| 539 |     if (width_m.matched && !width_s.empty()) {
 | 
| 540 |       if (width_s[0] == '0') {
 | 
| 541 |         zero_pad = true;
 | 
| 542 |         DCHECK(width_s.size() > 1);
 | 
| 543 |         ok = StringToInt(width_s.c_str() + 1, width_s.size() - 1, 10, &width);
 | 
| 544 |         DCHECK(ok);
 | 
| 545 |         (void)ok;  // silence unused var warning in opt
 | 
| 546 |       } else {
 | 
| 547 |         ok = StringToInt(width_s.c_str(), width_s.size(), 10, &width);
 | 
| 548 |         DCHECK(ok);
 | 
| 549 |       }
 | 
| 550 |       if (width < 0) {
 | 
| 551 |         pad_back = true;
 | 
| 552 |         width *= -1;
 | 
| 553 |       }
 | 
| 554 |       DCHECK(0 <= width && width < kMaxFmtWidth);
 | 
| 555 |     }
 | 
| 556 | 
 | 
| 557 |     char const* str_to_add = nullptr;
 | 
| 558 |     int add_len = 0;
 | 
| 559 |     const std::csub_match& code_m = match[3];
 | 
| 560 |     const std::string& code_s = code_m.str();
 | 
| 561 |     if (!code_m.matched) {
 | 
| 562 |       DCHECK(!width_m.matched);  // python errors on invalid format operators
 | 
| 563 |       break;
 | 
| 564 |     }
 | 
| 565 |     DCHECK(code_s.size() == 1);
 | 
| 566 |     switch (code_s[0]) {
 | 
| 567 |     case '%': {
 | 
| 568 |       str_to_add = code_s.c_str();
 | 
| 569 |       add_len = 1;
 | 
| 570 |       break;
 | 
| 571 |     }
 | 
| 572 |     case 's': {
 | 
| 573 |       BigStr* s = va_arg(args, BigStr*);
 | 
| 574 |       // Check type unconditionally because mycpp doesn't always check it
 | 
| 575 |       CHECK(ObjHeader::FromObject(s)->type_tag == TypeTag::BigStr);
 | 
| 576 | 
 | 
| 577 |       str_to_add = s->data();
 | 
| 578 |       add_len = len(s);
 | 
| 579 |       zero_pad = false;  // python ignores the 0 directive for strings
 | 
| 580 |       break;
 | 
| 581 |     }
 | 
| 582 |     case 'r': {
 | 
| 583 |       BigStr* s = va_arg(args, BigStr*);
 | 
| 584 |       // Check type unconditionally because mycpp doesn't always check it
 | 
| 585 |       CHECK(ObjHeader::FromObject(s)->type_tag == TypeTag::BigStr);
 | 
| 586 | 
 | 
| 587 |       s = repr(s);
 | 
| 588 |       str_to_add = s->data();
 | 
| 589 |       add_len = len(s);
 | 
| 590 |       zero_pad = false;  // python ignores the 0 directive for strings
 | 
| 591 |       break;
 | 
| 592 |     }
 | 
| 593 |     case 'd':  // fallthrough
 | 
| 594 |     case 'o': {
 | 
| 595 |       int d = va_arg(args, int);
 | 
| 596 |       add_len = snprintf(int_buf, kMaxFmtWidth,
 | 
| 597 |                          match.str().c_str() + lit_s.size(), d);
 | 
| 598 |       DCHECK(add_len > 0);
 | 
| 599 |       str_to_add = int_buf;
 | 
| 600 |       break;
 | 
| 601 |     }
 | 
| 602 |     default:
 | 
| 603 |       DCHECK(0);
 | 
| 604 |       break;
 | 
| 605 |     }
 | 
| 606 |     DCHECK(str_to_add != nullptr);
 | 
| 607 | 
 | 
| 608 |     if (pad_back) {
 | 
| 609 |       buf.append(str_to_add, add_len);
 | 
| 610 |     }
 | 
| 611 |     if (add_len < width) {
 | 
| 612 |       for (int i = 0; i < width - add_len; ++i) {
 | 
| 613 |         buf.push_back(zero_pad ? '0' : ' ');
 | 
| 614 |       }
 | 
| 615 |     }
 | 
| 616 |     if (!pad_back) {
 | 
| 617 |       buf.append(str_to_add, add_len);
 | 
| 618 |     }
 | 
| 619 |   }
 | 
| 620 | 
 | 
| 621 |   return StrFromC(buf.c_str(), buf.size());
 | 
| 622 | }
 | 
| 623 | 
 | 
| 624 | BigStr* StrIter::Value() {  // similar to at()
 | 
| 625 |   BigStr* result = NewStr(1);
 | 
| 626 |   result->data_[0] = s_->data_[i_];
 | 
| 627 |   DCHECK(result->data_[1] == '\0');
 | 
| 628 |   return result;
 | 
| 629 | }
 | 
| 630 | 
 | 
| 631 | BigStr* StrFormat(const char* fmt, ...) {
 | 
| 632 |   va_list args;
 | 
| 633 |   va_start(args, fmt);
 | 
| 634 |   BigStr* ret = _StrFormat(fmt, strlen(fmt), args);
 | 
| 635 |   va_end(args);
 | 
| 636 |   return ret;
 | 
| 637 | }
 | 
| 638 | 
 | 
| 639 | BigStr* StrFormat(BigStr* fmt, ...) {
 | 
| 640 |   va_list args;
 | 
| 641 |   va_start(args, fmt);
 | 
| 642 |   BigStr* ret = _StrFormat(fmt->data(), len(fmt), args);
 | 
| 643 |   va_end(args);
 | 
| 644 |   return ret;
 | 
| 645 | }
 |