| 1 | // Micro Syntax
 | 
| 2 | //
 | 
| 3 | // See doctools/micro-syntax.md
 | 
| 4 | 
 | 
| 5 | #include "micro_syntax.h"  // requires -I $BASE_DIR
 | 
| 6 | 
 | 
| 7 | #include <assert.h>
 | 
| 8 | #include <errno.h>
 | 
| 9 | #include <getopt.h>
 | 
| 10 | #include <stdarg.h>  // va_list, etc.
 | 
| 11 | #include <stdbool.h>
 | 
| 12 | #include <stdio.h>
 | 
| 13 | #include <stdlib.h>  // free
 | 
| 14 | #include <string.h>
 | 
| 15 | 
 | 
| 16 | #include <string>
 | 
| 17 | #include <vector>
 | 
| 18 | 
 | 
| 19 | const char* RESET = "\x1b[0;0m";
 | 
| 20 | const char* BOLD = "\x1b[1m";
 | 
| 21 | const char* UNDERLINE = "\x1b[4m";
 | 
| 22 | const char* REVERSE = "\x1b[7m";  // reverse video
 | 
| 23 | 
 | 
| 24 | const char* BLACK = "\x1b[30m";
 | 
| 25 | const char* RED = "\x1b[31m";
 | 
| 26 | const char* GREEN = "\x1b[32m";
 | 
| 27 | const char* YELLOW = "\x1b[33m";
 | 
| 28 | const char* BLUE = "\x1b[34m";
 | 
| 29 | const char* PURPLE = "\x1b[35m";
 | 
| 30 | const char* CYAN = "\x1b[36m";
 | 
| 31 | const char* WHITE = "\x1b[37m";
 | 
| 32 | 
 | 
| 33 | const char* BLACK2 = "\x1b[90m";
 | 
| 34 | const char* RED2 = "\x1b[91m";
 | 
| 35 | const char* BLUE2 = "\x1b[94m";
 | 
| 36 | 
 | 
| 37 | void Log(const char* fmt, ...) {
 | 
| 38 |   va_list args;
 | 
| 39 |   va_start(args, fmt);
 | 
| 40 |   vfprintf(stderr, fmt, args);
 | 
| 41 |   va_end(args);
 | 
| 42 |   fputs("\n", stderr);
 | 
| 43 | }
 | 
| 44 | 
 | 
| 45 | enum class lang_e {
 | 
| 46 |   PlainText,
 | 
| 47 | 
 | 
| 48 |   Cpp,  // including C
 | 
| 49 |   Py,
 | 
| 50 |   Shell,
 | 
| 51 |   Ysh,  // ''' etc.
 | 
| 52 |   Asdl,
 | 
| 53 |   R,  // uses # comments
 | 
| 54 | 
 | 
| 55 |   // JS,  // uses // comments
 | 
| 56 | };
 | 
| 57 | 
 | 
| 58 | class Reader {
 | 
| 59 |   // We don't care about internal NUL, so this interface doesn't allow it
 | 
| 60 | 
 | 
| 61 |  public:
 | 
| 62 |   Reader(FILE* f, const char* filename)
 | 
| 63 |       : f_(f), filename_(filename), line_(nullptr), allocated_size_(0) {
 | 
| 64 |   }
 | 
| 65 | 
 | 
| 66 |   const char* Filename() {  // for error messages only, nullptr for stdin
 | 
| 67 |     return filename_;
 | 
| 68 |   }
 | 
| 69 | 
 | 
| 70 |   bool NextLine() {
 | 
| 71 |     // Returns false if there was an error, and sets err_num_.
 | 
| 72 |     // Returns true if not error, and Current() can be checked.
 | 
| 73 | 
 | 
| 74 |     // Note: getline() frees the previous line, so we don't have to
 | 
| 75 |     ssize_t len = getline(&line_, &allocated_size_, f_);
 | 
| 76 | 
 | 
| 77 |     if (len < 0) {  // EOF is -1
 | 
| 78 |       // man page says the buffer should be freed if getline() fails
 | 
| 79 |       free(line_);
 | 
| 80 | 
 | 
| 81 |       line_ = nullptr;  // tell the caller not to continue
 | 
| 82 | 
 | 
| 83 |       if (errno != 0) {  // I/O error
 | 
| 84 |         err_num_ = errno;
 | 
| 85 |         return false;
 | 
| 86 |       }
 | 
| 87 |     }
 | 
| 88 |     return true;
 | 
| 89 |   }
 | 
| 90 | 
 | 
| 91 |   char* Current() {
 | 
| 92 |     // Returns nullptr on EOF.
 | 
| 93 |     return line_;
 | 
| 94 |   }
 | 
| 95 | 
 | 
| 96 |   FILE* f_;
 | 
| 97 |   const char* filename_;
 | 
| 98 | 
 | 
| 99 |   char* line_;  // valid for one NextLine() call, nullptr on EOF or error
 | 
| 100 |   size_t allocated_size_;  // unused, but must pass address to getline()
 | 
| 101 |   int err_num_;            // set on error
 | 
| 102 | };
 | 
| 103 | 
 | 
| 104 | class Printer {
 | 
| 105 |  public:
 | 
| 106 |   virtual void PrintLineNumber(int line_num) = 0;
 | 
| 107 |   virtual void PrintLineEnd() {
 | 
| 108 |   }
 | 
| 109 |   virtual void PrintToken(const char* line, int line_num, int start_col,
 | 
| 110 |                           Token token) = 0;
 | 
| 111 |   virtual void Swap(std::string* s) {
 | 
| 112 |     assert(0);
 | 
| 113 |   }
 | 
| 114 |   virtual ~Printer() {
 | 
| 115 |   }
 | 
| 116 | };
 | 
| 117 | 
 | 
| 118 | class HtmlPrinter : public Printer {
 | 
| 119 |  public:
 | 
| 120 |   HtmlPrinter() : Printer(), out_() {
 | 
| 121 |   }
 | 
| 122 | 
 | 
| 123 |   virtual void Swap(std::string* s) {
 | 
| 124 |     // assert(s != nullptr);
 | 
| 125 |     out_.swap(*s);
 | 
| 126 |   }
 | 
| 127 | 
 | 
| 128 |   virtual void PrintLineNumber(int line_num) {
 | 
| 129 |     char buf[16];
 | 
| 130 |     snprintf(buf, 16, "%d", line_num);
 | 
| 131 | 
 | 
| 132 |     out_.append("<tr><td class=num>");  // <tr> closed by PrintLineEnd()
 | 
| 133 |     out_.append(buf);
 | 
| 134 |     out_.append("</td><td id=L");  // jump to line with foo.html#L32
 | 
| 135 |     out_.append(buf);
 | 
| 136 |     out_.append(" class=line>");  // <td> closed by PrintLineEnd()
 | 
| 137 |   }
 | 
| 138 | 
 | 
| 139 |   virtual void PrintLineEnd() {
 | 
| 140 |     out_.append("</td></tr>");
 | 
| 141 |   }
 | 
| 142 | 
 | 
| 143 |   virtual void PrintToken(const char* line, int line_num, int start_col,
 | 
| 144 |                           Token tok) {
 | 
| 145 |     const char* p_start = line + start_col;
 | 
| 146 |     int num_bytes = tok.end_col - start_col;
 | 
| 147 | 
 | 
| 148 |     switch (tok.id) {
 | 
| 149 |     case Id::Comm:
 | 
| 150 |       PrintSpan("comm", p_start, num_bytes);
 | 
| 151 |       break;
 | 
| 152 | 
 | 
| 153 |     case Id::Name:
 | 
| 154 |       PrintEscaped(p_start, num_bytes);
 | 
| 155 |       break;
 | 
| 156 | 
 | 
| 157 |     case Id::PreprocCommand:
 | 
| 158 |     case Id::LineCont:
 | 
| 159 |       PrintSpan("preproc", p_start, num_bytes);
 | 
| 160 |       break;
 | 
| 161 | 
 | 
| 162 |     case Id::Re2c:
 | 
| 163 |       PrintSpan("re2c", p_start, num_bytes);
 | 
| 164 |       break;
 | 
| 165 | 
 | 
| 166 |     case Id::Other:
 | 
| 167 |       // PrintSpan("other", p_start, num_bytes);
 | 
| 168 |       PrintEscaped(p_start, num_bytes);
 | 
| 169 |       break;
 | 
| 170 | 
 | 
| 171 |       // for now these are strings
 | 
| 172 |     case Id::HereBegin:
 | 
| 173 |     case Id::HereEnd:
 | 
| 174 |     case Id::Str:
 | 
| 175 |       PrintSpan("str", p_start, num_bytes);
 | 
| 176 |       break;
 | 
| 177 | 
 | 
| 178 |     case Id::LBrace:
 | 
| 179 |     case Id::RBrace:
 | 
| 180 |       PrintSpan("brace", p_start, num_bytes);
 | 
| 181 |       break;
 | 
| 182 | 
 | 
| 183 |     case Id::Unknown:
 | 
| 184 |       PrintSpan("x", p_start, num_bytes);
 | 
| 185 |       break;
 | 
| 186 | 
 | 
| 187 |     default:
 | 
| 188 |       PrintEscaped(p_start, num_bytes);
 | 
| 189 |       break;
 | 
| 190 |     }
 | 
| 191 |   }
 | 
| 192 | 
 | 
| 193 |  private:
 | 
| 194 |   void PrintEscaped(const char* s, int len) {
 | 
| 195 |     // HTML escape the code string
 | 
| 196 |     for (int i = 0; i < len; ++i) {
 | 
| 197 |       char c = s[i];
 | 
| 198 | 
 | 
| 199 |       switch (c) {
 | 
| 200 |       case '<':
 | 
| 201 |         out_.append("<");
 | 
| 202 |         break;
 | 
| 203 |       case '>':
 | 
| 204 |         out_.append(">");
 | 
| 205 |         break;
 | 
| 206 |       case '&':
 | 
| 207 |         out_.append("&");
 | 
| 208 |         break;
 | 
| 209 |       default:
 | 
| 210 |         // Is this inefficient?  Fill 1 char
 | 
| 211 |         out_.append(1, s[i]);
 | 
| 212 |         break;
 | 
| 213 |       }
 | 
| 214 |     }
 | 
| 215 |   }
 | 
| 216 | 
 | 
| 217 |   void PrintSpan(const char* css_class, const char* s, int len) {
 | 
| 218 |     out_.append("<span class=");
 | 
| 219 |     out_.append(css_class);
 | 
| 220 |     out_.append(">");
 | 
| 221 | 
 | 
| 222 |     PrintEscaped(s, len);
 | 
| 223 | 
 | 
| 224 |     out_.append("</span>");
 | 
| 225 |   }
 | 
| 226 | 
 | 
| 227 |   std::string out_;
 | 
| 228 | };
 | 
| 229 | 
 | 
| 230 | struct Flags {
 | 
| 231 |   lang_e lang;
 | 
| 232 |   bool tsv;
 | 
| 233 |   bool web;
 | 
| 234 |   bool more_color;
 | 
| 235 |   bool comments_only;
 | 
| 236 | 
 | 
| 237 |   int argc;
 | 
| 238 |   char** argv;
 | 
| 239 | };
 | 
| 240 | 
 | 
| 241 | class AnsiPrinter : public Printer {
 | 
| 242 |  public:
 | 
| 243 |   AnsiPrinter(const Flags& flag) : Printer(), flag_(flag) {
 | 
| 244 |   }
 | 
| 245 | 
 | 
| 246 |   virtual void PrintLineNumber(int line_num) {
 | 
| 247 |     if (flag_.comments_only) {
 | 
| 248 |       return;
 | 
| 249 |     }
 | 
| 250 |     printf("%s%5d%s ", BLACK2, line_num, RESET);
 | 
| 251 |   }
 | 
| 252 | 
 | 
| 253 |   virtual void PrintToken(const char* line, int line_num, int start_col,
 | 
| 254 |                           Token tok) {
 | 
| 255 |     const char* p_start = line + start_col;
 | 
| 256 |     int num_bytes = tok.end_col - start_col;
 | 
| 257 |     switch (tok.id) {
 | 
| 258 |     case Id::Comm:
 | 
| 259 |       if (flag_.comments_only) {
 | 
| 260 |         PrintAlways(p_start, num_bytes);
 | 
| 261 |       } else {
 | 
| 262 |         PrintColor(BLUE, p_start, num_bytes);
 | 
| 263 |       }
 | 
| 264 |       break;
 | 
| 265 | 
 | 
| 266 |     case Id::Name:
 | 
| 267 |       PrintText(p_start, num_bytes);
 | 
| 268 |       break;
 | 
| 269 | 
 | 
| 270 |     case Id::PreprocCommand:
 | 
| 271 |     case Id::LineCont:
 | 
| 272 |       PrintColor(PURPLE, p_start, num_bytes);
 | 
| 273 |       break;
 | 
| 274 | 
 | 
| 275 |     case Id::Re2c:
 | 
| 276 |       PrintColor(PURPLE, p_start, num_bytes);
 | 
| 277 |       break;
 | 
| 278 | 
 | 
| 279 |     case Id::Other:
 | 
| 280 |       if (flag_.more_color) {
 | 
| 281 |         PrintColor(PURPLE, p_start, num_bytes);
 | 
| 282 |       } else {
 | 
| 283 |         PrintText(p_start, num_bytes);
 | 
| 284 |       }
 | 
| 285 |       break;
 | 
| 286 | 
 | 
| 287 |     case Id::WS:
 | 
| 288 |       if (flag_.more_color) {
 | 
| 289 |         fputs(REVERSE, stdout);
 | 
| 290 |         PrintColor(WHITE, p_start, num_bytes);
 | 
| 291 |       } else {
 | 
| 292 |         PrintText(p_start, num_bytes);
 | 
| 293 |       }
 | 
| 294 |       break;
 | 
| 295 | 
 | 
| 296 |     case Id::Str:
 | 
| 297 |       PrintColor(RED, p_start, num_bytes);
 | 
| 298 |       break;
 | 
| 299 | 
 | 
| 300 |     case Id::HereBegin:
 | 
| 301 |     case Id::HereEnd: {
 | 
| 302 |       PrintColor(RED2, p_start, num_bytes);
 | 
| 303 | 
 | 
| 304 |       // Debug submatch extraction
 | 
| 305 | #if 0
 | 
| 306 |       fputs(RED, stdout);
 | 
| 307 |       int n = tok.submatch_len;
 | 
| 308 |       fwrite(tok.submatch_start, 1, n, stdout);
 | 
| 309 |       fputs(RESET, stdout);
 | 
| 310 | #endif
 | 
| 311 |     } break;
 | 
| 312 | 
 | 
| 313 |     case Id::DelimStrBegin:
 | 
| 314 |     case Id::DelimStrEnd: {
 | 
| 315 |       PrintColor(RED2, p_start, num_bytes);
 | 
| 316 | 
 | 
| 317 |       // Debug submatch extraction
 | 
| 318 | #if 0
 | 
| 319 |       fputs(RED, stdout);
 | 
| 320 |       int n = tok.submatch_len;
 | 
| 321 |       fwrite(tok.submatch_start, 1, n, stdout);
 | 
| 322 |       fputs(RESET, stdout);
 | 
| 323 | #endif
 | 
| 324 |     } break;
 | 
| 325 | 
 | 
| 326 |     case Id::LBrace:
 | 
| 327 |     case Id::RBrace:
 | 
| 328 |       PrintColor(GREEN, p_start, num_bytes);
 | 
| 329 |       break;
 | 
| 330 | 
 | 
| 331 |     case Id::Unknown:
 | 
| 332 |       // Make errors red
 | 
| 333 |       fputs(REVERSE, stdout);
 | 
| 334 |       PrintColor(RED, p_start, num_bytes);
 | 
| 335 |       break;
 | 
| 336 | 
 | 
| 337 |     default:
 | 
| 338 |       PrintText(p_start, num_bytes);
 | 
| 339 |       break;
 | 
| 340 |     }
 | 
| 341 |   }
 | 
| 342 | 
 | 
| 343 |  private:
 | 
| 344 |   void PrintColor(const char* color, const char* s, int n) {
 | 
| 345 |     fputs(color, stdout);
 | 
| 346 |     PrintText(s, n);
 | 
| 347 |     fputs(RESET, stdout);
 | 
| 348 |   }
 | 
| 349 | 
 | 
| 350 |   void PrintText(const char* s, int n) {
 | 
| 351 |     if (flag_.comments_only) {
 | 
| 352 |       for (int i = 0; i < n; ++i) {
 | 
| 353 |         // Replace everything but newline with space
 | 
| 354 |         // TODO: I think we always want a newline token, including in comments.
 | 
| 355 |         // That will simplify this.
 | 
| 356 |         char c = (s[i] == '\n') ? '\n' : ' ';
 | 
| 357 |         fwrite(&c, 1, 1, stdout);
 | 
| 358 |       }
 | 
| 359 |     } else {
 | 
| 360 |       fwrite(s, 1, n, stdout);
 | 
| 361 |     }
 | 
| 362 |   }
 | 
| 363 | 
 | 
| 364 |   void PrintAlways(const char* s, int n) {
 | 
| 365 |     fwrite(s, 1, n, stdout);
 | 
| 366 |   }
 | 
| 367 | 
 | 
| 368 |   const Flags& flag_;
 | 
| 369 | };
 | 
| 370 | 
 | 
| 371 | const char* Id_str(Id id) {
 | 
| 372 |   switch (id) {
 | 
| 373 |   case Id::Comm:
 | 
| 374 |     return "Comm";
 | 
| 375 |   case Id::MaybeComment:  // fix-up doesn't guarantee this is gone
 | 
| 376 |     return "MaybeComment";
 | 
| 377 |   case Id::WS:
 | 
| 378 |     return "WS";
 | 
| 379 |   case Id::Re2c:
 | 
| 380 |     return "Re2c";
 | 
| 381 | 
 | 
| 382 |   case Id::MaybePreproc:  // fix-up doesn't guarantee this is gone
 | 
| 383 |     return "MaybePreproc";
 | 
| 384 |   case Id::PreprocCommand:
 | 
| 385 |     return "PreprocCommand";
 | 
| 386 |   case Id::PreprocOther:
 | 
| 387 |     return "PreprocOther";
 | 
| 388 |   case Id::LineCont:
 | 
| 389 |     return "LineCont";
 | 
| 390 | 
 | 
| 391 |   case Id::Name:
 | 
| 392 |     return "Name";
 | 
| 393 |   case Id::Other:
 | 
| 394 |     return "Other";
 | 
| 395 | 
 | 
| 396 |   case Id::Str:
 | 
| 397 |     return "Str";
 | 
| 398 | 
 | 
| 399 |   case Id::HereBegin:
 | 
| 400 |     return "HereBegin";
 | 
| 401 |   case Id::HereEnd:
 | 
| 402 |     return "HereEnd";
 | 
| 403 |   case Id::DelimStrBegin:
 | 
| 404 |     return "DelimStrBegin";
 | 
| 405 |   case Id::DelimStrEnd:
 | 
| 406 |     return "DelimStrEnd";
 | 
| 407 | 
 | 
| 408 |   case Id::LBrace:
 | 
| 409 |     return "LBrace";
 | 
| 410 |   case Id::RBrace:
 | 
| 411 |     return "RBrace";
 | 
| 412 | 
 | 
| 413 |   case Id::Unknown:
 | 
| 414 |     return "Unknown";
 | 
| 415 |   default:
 | 
| 416 |     assert(0);
 | 
| 417 |   }
 | 
| 418 | }
 | 
| 419 | 
 | 
| 420 | class TsvPrinter : public Printer {
 | 
| 421 |  public:
 | 
| 422 |   virtual void PrintLineNumber(int line_num) {
 | 
| 423 |     ;
 | 
| 424 |   }
 | 
| 425 | 
 | 
| 426 |   virtual void Swap(std::string* s) {
 | 
| 427 |     // out_.swap(*s);
 | 
| 428 |   }
 | 
| 429 | 
 | 
| 430 |   virtual void PrintToken(const char* line, int line_num, int start_col,
 | 
| 431 |                           Token tok) {
 | 
| 432 |     printf("%d\t%s\t%d\t%d\n", line_num, Id_str(tok.id), start_col,
 | 
| 433 |            tok.end_col);
 | 
| 434 |     // printf("  -> mode %d\n", lexer.line_mode);
 | 
| 435 |   }
 | 
| 436 |   virtual ~TsvPrinter() {
 | 
| 437 |   }
 | 
| 438 | };
 | 
| 439 | 
 | 
| 440 | bool TokenIsSignificant(Id id) {
 | 
| 441 |   switch (id) {
 | 
| 442 |   case Id::Name:
 | 
| 443 |   case Id::Other:
 | 
| 444 |   case Id::PreprocCommand:
 | 
| 445 |   case Id::PreprocOther:
 | 
| 446 |   case Id::Re2c:
 | 
| 447 |     return true;
 | 
| 448 | 
 | 
| 449 |   // Comments, whitespace, and string literals aren't significant
 | 
| 450 |   // TODO: can abort on Id::Unknown?
 | 
| 451 |   default:
 | 
| 452 |     break;
 | 
| 453 |   }
 | 
| 454 |   return false;
 | 
| 455 | }
 | 
| 456 | 
 | 
| 457 | class OutputStream {
 | 
| 458 |   // stdout contains either
 | 
| 459 |   // - netstrings of HTML, or TSV Token structs
 | 
| 460 |   // - ANSI text
 | 
| 461 | 
 | 
| 462 |  public:
 | 
| 463 |   OutputStream(Printer* pr) : pr_(pr) {
 | 
| 464 |   }
 | 
| 465 |   virtual void PathBegin(const char* path) = 0;
 | 
| 466 |   virtual void Line(int line_num, const char* line,
 | 
| 467 |                     const std::vector<Token>& tokens) = 0;
 | 
| 468 |   virtual void PathEnd(int num_lines, int num_sig_lines) = 0;
 | 
| 469 |   virtual ~OutputStream() {
 | 
| 470 |   }
 | 
| 471 | 
 | 
| 472 |  protected:
 | 
| 473 |   Printer* pr_;  // how to print each file
 | 
| 474 | };
 | 
| 475 | 
 | 
| 476 | class NetStringOutput : public OutputStream {
 | 
| 477 |  public:
 | 
| 478 |   NetStringOutput(Printer* pr) : OutputStream(pr) {
 | 
| 479 |   }
 | 
| 480 | 
 | 
| 481 |   virtual void PathBegin(const char* path) {
 | 
| 482 |     if (path == nullptr) {
 | 
| 483 |       path = "<stdin>";
 | 
| 484 |     }
 | 
| 485 |     PrintNetString(path, strlen(path));
 | 
| 486 |   }
 | 
| 487 | 
 | 
| 488 |   virtual void Line(int line_num, const char* line,
 | 
| 489 |                     const std::vector<Token>& tokens) {
 | 
| 490 |     pr_->PrintLineNumber(line_num);
 | 
| 491 | 
 | 
| 492 |     int start_col = 0;
 | 
| 493 |     for (auto tok : tokens) {
 | 
| 494 |       pr_->PrintToken(line, line_num, start_col, tok);
 | 
| 495 |       start_col = tok.end_col;
 | 
| 496 |     }
 | 
| 497 | 
 | 
| 498 |     pr_->PrintLineEnd();
 | 
| 499 |   }
 | 
| 500 | 
 | 
| 501 |   virtual void PathEnd(int num_lines, int num_sig_lines) {
 | 
| 502 |     std::string string_for_file;
 | 
| 503 |     pr_->Swap(&string_for_file);
 | 
| 504 | 
 | 
| 505 |     PrintNetString(string_for_file.c_str(), string_for_file.size());
 | 
| 506 | 
 | 
| 507 |     // Output summary in JSON
 | 
| 508 |     // TODO: change this to a 4th column
 | 
| 509 |     char buf[64];
 | 
| 510 |     int n = snprintf(buf, 64, "{\"num_lines\": %d, \"num_sig_lines\": %d}",
 | 
| 511 |                      num_lines, num_sig_lines);
 | 
| 512 |     PrintNetString(buf, n);
 | 
| 513 |   }
 | 
| 514 | 
 | 
| 515 |  private:
 | 
| 516 |   void PrintNetString(const char* s, int len) {
 | 
| 517 |     fprintf(stdout, "%d:%*s,", len, len, s);
 | 
| 518 |   }
 | 
| 519 | };
 | 
| 520 | 
 | 
| 521 | class AnsiOutput : public OutputStream {
 | 
| 522 |  public:
 | 
| 523 |   AnsiOutput(Printer* pr) : OutputStream(pr) {
 | 
| 524 |   }
 | 
| 525 | 
 | 
| 526 |   // TODO: Can respect --comments-only
 | 
| 527 |   virtual void PathBegin(const char* path) {
 | 
| 528 |     if (path == nullptr) {
 | 
| 529 |       path = "<stdin>";
 | 
| 530 |     }
 | 
| 531 |     // diff uses +++ ---
 | 
| 532 |     printf("\n");
 | 
| 533 |     printf("=== %s%s%s%s ===\n", BOLD, PURPLE, path, RESET);
 | 
| 534 |     printf("\n");
 | 
| 535 |   }
 | 
| 536 | 
 | 
| 537 |   virtual void Line(int line_num, const char* line,
 | 
| 538 |                     const std::vector<Token>& tokens) {
 | 
| 539 |     pr_->PrintLineNumber(line_num);
 | 
| 540 | 
 | 
| 541 |     int start_col = 0;
 | 
| 542 |     for (auto tok : tokens) {
 | 
| 543 |       pr_->PrintToken(line, line_num, start_col, tok);
 | 
| 544 |       start_col = tok.end_col;
 | 
| 545 |     }
 | 
| 546 | 
 | 
| 547 |     pr_->PrintLineEnd();
 | 
| 548 |   };
 | 
| 549 | 
 | 
| 550 |   // TODO: Can respect --comments-only
 | 
| 551 |   virtual void PathEnd(int num_lines, int num_sig_lines) {
 | 
| 552 |     fprintf(stdout, "%s%d lines, %d significant%s\n", GREEN, num_lines,
 | 
| 553 |             num_sig_lines, RESET);
 | 
| 554 |   };
 | 
| 555 | };
 | 
| 556 | 
 | 
| 557 | void PrintTokens(std::vector<Token>& toks) {
 | 
| 558 |   int start_col = 0;
 | 
| 559 |   int i = 0;
 | 
| 560 |   Log("===");
 | 
| 561 |   for (auto tok : toks) {
 | 
| 562 |     Log("%2d %10s %2d %2d", i, Id_str(tok.id), start_col, tok.end_col);
 | 
| 563 |     start_col = tok.end_col;
 | 
| 564 |     ++i;
 | 
| 565 |   }
 | 
| 566 |   Log("===");
 | 
| 567 | }
 | 
| 568 | 
 | 
| 569 | // BUGGY, needs unit tests
 | 
| 570 | 
 | 
| 571 | // Fiddly function, reduces the size of the output a bit
 | 
| 572 | // "hi" becomes 1 Id::DQ token instead of 3 separate Id::DQ tokens
 | 
| 573 | void Optimize(std::vector<Token>* tokens) {
 | 
| 574 |   std::vector<Token>& toks = *tokens;  // alias
 | 
| 575 | 
 | 
| 576 |   // PrintTokens(toks);
 | 
| 577 | 
 | 
| 578 |   int n = toks.size();
 | 
| 579 |   if (n < 1) {  // nothing to de-duplicate
 | 
| 580 |     return;
 | 
| 581 |   }
 | 
| 582 | 
 | 
| 583 |   int left = 0;
 | 
| 584 |   int right = 1;
 | 
| 585 |   while (right < n) {
 | 
| 586 |     Log("right ID = %s, end %d", Id_str(toks[right].id), toks[right].end_col);
 | 
| 587 | 
 | 
| 588 |     if (toks[left].id == toks[right].id) {
 | 
| 589 |       //  Join the tokens together
 | 
| 590 |       toks[left].end_col = toks[right].end_col;
 | 
| 591 |     } else {
 | 
| 592 |       toks[left] = toks[right];
 | 
| 593 |       left++;
 | 
| 594 |       Log("  not eq, left = %d", left);
 | 
| 595 |     }
 | 
| 596 |     right++;
 | 
| 597 |   }
 | 
| 598 |   Log("left = %d, right = %d", left, right);
 | 
| 599 | 
 | 
| 600 |   // Fiddly condition: one more iteration.  Need some unit tests for this.
 | 
| 601 |   toks[left] = toks[right - 1];
 | 
| 602 |   left++;
 | 
| 603 |   assert(left <= n);
 | 
| 604 | 
 | 
| 605 |   // Erase the remaining ones
 | 
| 606 |   toks.resize(left);
 | 
| 607 | 
 | 
| 608 |   // PrintTokens(toks);
 | 
| 609 | }
 | 
| 610 | 
 | 
| 611 | // Version of the above that's not in-place, led to a bug fix
 | 
| 612 | void Optimize2(std::vector<Token>* tokens) {
 | 
| 613 |   std::vector<Token> optimized;
 | 
| 614 | 
 | 
| 615 |   int n = tokens->size();
 | 
| 616 |   if (n < 1) {
 | 
| 617 |     return;
 | 
| 618 |   }
 | 
| 619 | 
 | 
| 620 |   optimized.reserve(n);
 | 
| 621 | 
 | 
| 622 |   int left = 0;
 | 
| 623 |   int right = 1;
 | 
| 624 |   while (right < n) {
 | 
| 625 |     optimized.push_back((*tokens)[left]);
 | 
| 626 |     left++;
 | 
| 627 |     right++;
 | 
| 628 |   }
 | 
| 629 |   optimized.push_back((*tokens)[left]);
 | 
| 630 |   left++;
 | 
| 631 | 
 | 
| 632 |   tokens->swap(optimized);
 | 
| 633 | }
 | 
| 634 | 
 | 
| 635 | bool LineEqualsHereDelim(const char* line, std::string& here_delim) {
 | 
| 636 |   // Compare EOF vs. EOF\n or EOF\t\n or x\n
 | 
| 637 | 
 | 
| 638 |   // Hack: skip leading tab unconditionally, even though that's only alowed in
 | 
| 639 |   // <<- Really we should capture the operator and the delim?
 | 
| 640 |   if (*line == '\t') {
 | 
| 641 |     line++;
 | 
| 642 |   }
 | 
| 643 | 
 | 
| 644 |   int n = strlen(line);
 | 
| 645 |   int h = here_delim.size();
 | 
| 646 | 
 | 
| 647 |   // Log("Here delim=%s line=%s", here_delim.c_str(), line);
 | 
| 648 | 
 | 
| 649 |   // Line should be at least one longer, EOF\n
 | 
| 650 |   if (n <= h) {
 | 
| 651 |     // Log("  [0] line too short");
 | 
| 652 |     return false;
 | 
| 653 |   }
 | 
| 654 | 
 | 
| 655 |   int i = 0;
 | 
| 656 |   for (; i < h; ++i) {
 | 
| 657 |     if (here_delim[i] != line[i]) {
 | 
| 658 |       // Log("  [1] byte %d not equal", i);
 | 
| 659 |       return false;
 | 
| 660 |     }
 | 
| 661 |   }
 | 
| 662 | 
 | 
| 663 |   while (i < n) {
 | 
| 664 |     switch (line[i]) {
 | 
| 665 |     case ' ':
 | 
| 666 |     case '\t':
 | 
| 667 |     case '\r':
 | 
| 668 |     case '\n':
 | 
| 669 |       break;
 | 
| 670 |     default:
 | 
| 671 |       // Log("  [2] byte %d not whitespace", i);
 | 
| 672 |       return false;  // line can't have whitespace on the end
 | 
| 673 |     }
 | 
| 674 |     ++i;
 | 
| 675 |   }
 | 
| 676 | 
 | 
| 677 |   return true;
 | 
| 678 | }
 | 
| 679 | 
 | 
| 680 | void CppHook::TryPreprocess(char* line, std::vector<Token>* tokens) {
 | 
| 681 |   // Fills tokens, which can be checked for beginning and end tokens
 | 
| 682 | 
 | 
| 683 |   Lexer<pp_mode_e> lexer(line);
 | 
| 684 |   Matcher<pp_mode_e> matcher;
 | 
| 685 | 
 | 
| 686 |   while (true) {  // tokens on each line
 | 
| 687 |     Token tok;
 | 
| 688 |     // Log("Match %d", lexer.p_current - lexer.line_);
 | 
| 689 |     bool eol = matcher.Match(&lexer, &tok);
 | 
| 690 |     // Log("EOL %d", eol);
 | 
| 691 |     if (eol) {
 | 
| 692 |       break;
 | 
| 693 |     }
 | 
| 694 |     // Log("TOK %s %d", Id_str(tok.id), tok.end_col);
 | 
| 695 |     tokens->push_back(tok);  // make a copy
 | 
| 696 |   }
 | 
| 697 | }
 | 
| 698 | 
 | 
| 699 | void FixShellComments(std::vector<Token>& tokens) {
 | 
| 700 |   int n = tokens.size();
 | 
| 701 |   for (int i = 0; i < n; ++i) {
 | 
| 702 |     // # comment at start of line
 | 
| 703 |     if (tokens[i].id == Id::MaybeComment) {
 | 
| 704 |       if (i == 0) {
 | 
| 705 |         tokens[i].id = Id::Comm;
 | 
| 706 |       }
 | 
| 707 |       if (i != 0 and tokens[i - 1].id == Id::WS) {
 | 
| 708 |         tokens[i].id = Id::Comm;
 | 
| 709 |       }
 | 
| 710 |     }
 | 
| 711 |   }
 | 
| 712 | }
 | 
| 713 | 
 | 
| 714 | // This templated method causes some code expansion, but not too much.  The
 | 
| 715 | // binary went from 38 KB to 42 KB, after being stripped.
 | 
| 716 | // We get a little type safety with py_mode_e vs cpp_mode_e.
 | 
| 717 | 
 | 
| 718 | template <typename T>
 | 
| 719 | int ScanOne(Reader* reader, OutputStream* out, Hook* hook) {
 | 
| 720 |   Lexer<T> lexer(nullptr);
 | 
| 721 |   Matcher<T> matcher;
 | 
| 722 | 
 | 
| 723 |   int line_num = 1;
 | 
| 724 |   int num_sig = 0;
 | 
| 725 | 
 | 
| 726 |   std::vector<std::string> here_list;  // delimiters to pop
 | 
| 727 |   std::vector<int> here_start_num;
 | 
| 728 | 
 | 
| 729 |   // For multi-line strings.  This has 0 or 1 entries, and the 1 entry can be
 | 
| 730 |   // the empty string.
 | 
| 731 |   std::vector<std::string> delim_begin;
 | 
| 732 | 
 | 
| 733 |   while (true) {  // read each line, handling errors
 | 
| 734 |     if (!reader->NextLine()) {
 | 
| 735 |       const char* name = reader->Filename() ?: "<stdin>";
 | 
| 736 |       Log("micro-syntax: getline() error on %s: %s", name,
 | 
| 737 |           strerror(reader->err_num_));
 | 
| 738 |       return 1;
 | 
| 739 |     }
 | 
| 740 |     char* line = reader->Current();
 | 
| 741 |     if (line == nullptr) {
 | 
| 742 |       break;  // EOF
 | 
| 743 |     }
 | 
| 744 | 
 | 
| 745 |     std::vector<Token> pre_tokens;
 | 
| 746 | 
 | 
| 747 |     hook->TryPreprocess(line, &pre_tokens);
 | 
| 748 | 
 | 
| 749 |     // e.g #define at beginning of line
 | 
| 750 |     if (pre_tokens.size() && pre_tokens[0].id == Id::MaybePreproc) {
 | 
| 751 |       pre_tokens[0].id = Id::PreprocCommand;
 | 
| 752 | 
 | 
| 753 |       out->Line(line_num, line, pre_tokens);
 | 
| 754 | 
 | 
| 755 |       line_num += 1;
 | 
| 756 |       num_sig += 1;
 | 
| 757 | 
 | 
| 758 |       Token last = pre_tokens.back();
 | 
| 759 |       while (last.id == Id::LineCont) {
 | 
| 760 |         const char* blame = reader->Filename() ?: "<stdin>";
 | 
| 761 |         if (!reader->NextLine()) {
 | 
| 762 |           Log("micro-syntax: getline() error on %s: %s", blame,
 | 
| 763 |               strerror(reader->err_num_));
 | 
| 764 |           return 1;
 | 
| 765 |         }
 | 
| 766 |         char* line = reader->Current();
 | 
| 767 |         if (line == nullptr) {
 | 
| 768 |           Log("Unexpected end-of-file in preprocessor in %s", blame);
 | 
| 769 |           return 1;
 | 
| 770 |         }
 | 
| 771 | 
 | 
| 772 |         pre_tokens.clear();
 | 
| 773 |         hook->TryPreprocess(line, &pre_tokens);
 | 
| 774 | 
 | 
| 775 |         out->Line(line_num, line, pre_tokens);
 | 
| 776 | 
 | 
| 777 |         line_num += 1;
 | 
| 778 |         num_sig += 1;
 | 
| 779 | 
 | 
| 780 |         last = pre_tokens.back();
 | 
| 781 |       }
 | 
| 782 |       continue;  // Skip the rest of the loop
 | 
| 783 |     }
 | 
| 784 | 
 | 
| 785 |     //
 | 
| 786 |     // Main Loop for "normal" lines (not preprocessor or here doc)
 | 
| 787 |     //
 | 
| 788 | 
 | 
| 789 |     std::vector<Token> tokens;
 | 
| 790 |     lexer.SetLine(line);
 | 
| 791 | 
 | 
| 792 |     bool line_is_sig = false;
 | 
| 793 |     while (true) {  // tokens on each line
 | 
| 794 |       Token tok;
 | 
| 795 |       bool eol = matcher.Match(&lexer, &tok);
 | 
| 796 |       if (eol) {
 | 
| 797 |         break;
 | 
| 798 |       }
 | 
| 799 | 
 | 
| 800 |       switch (tok.id) {
 | 
| 801 |       case Id::HereBegin: {
 | 
| 802 |         // Put a copy on the stack
 | 
| 803 |         int n = tok.submatch_end - tok.submatch_start;
 | 
| 804 |         here_list.emplace_back(line + tok.submatch_start, n);
 | 
| 805 |         here_start_num.push_back(line_num);
 | 
| 806 |       } break;
 | 
| 807 | 
 | 
| 808 |       case Id::DelimStrBegin: {
 | 
| 809 |         if (delim_begin.empty()) {
 | 
| 810 |           int n = tok.submatch_end - tok.submatch_start;
 | 
| 811 |           delim_begin.emplace_back(line + tok.submatch_start, n);
 | 
| 812 |         } else {
 | 
| 813 |           // We have entered cpp_mode_e::DelimStr, which means we should never
 | 
| 814 |           // return another DelimStrBegin
 | 
| 815 |           assert(0);
 | 
| 816 |         }
 | 
| 817 |       } break;
 | 
| 818 | 
 | 
| 819 |       case Id::DelimStrEnd: {
 | 
| 820 |         if (delim_begin.empty()) {
 | 
| 821 |           // We should never get this unless we got a DelimStrBegin first
 | 
| 822 |           assert(0);
 | 
| 823 |         } else {
 | 
| 824 |           size_t n = tok.submatch_end - tok.submatch_start;
 | 
| 825 |           std::string end_delim(line + tok.submatch_start, n);
 | 
| 826 | 
 | 
| 827 |           if (end_delim == delim_begin.back()) {
 | 
| 828 |             lexer.line_mode = T::Outer;  // the string is ended
 | 
| 829 |             delim_begin.pop_back();
 | 
| 830 |           } else {
 | 
| 831 |             tok.id = Id::Str;  // mismatched delimiter is just a string
 | 
| 832 |           }
 | 
| 833 |         }
 | 
| 834 |       } break;
 | 
| 835 | 
 | 
| 836 |       default:
 | 
| 837 |         break;
 | 
| 838 |       }
 | 
| 839 | 
 | 
| 840 |       tokens.push_back(tok);  // make a copy
 | 
| 841 | 
 | 
| 842 |       if (TokenIsSignificant(tok.id)) {
 | 
| 843 |         line_is_sig = true;
 | 
| 844 |       }
 | 
| 845 |     }
 | 
| 846 | 
 | 
| 847 | #if 0
 | 
| 848 |     PrintTokens(tokens);
 | 
| 849 |     Log("%d tokens before", tokens.size());
 | 
| 850 |     Optimize(&tokens);
 | 
| 851 |     Log("%d tokens after", tokens.size());
 | 
| 852 |     PrintTokens(tokens);
 | 
| 853 | #endif
 | 
| 854 | 
 | 
| 855 |     FixShellComments(tokens);
 | 
| 856 | 
 | 
| 857 |     out->Line(line_num, line, tokens);
 | 
| 858 |     tokens.clear();
 | 
| 859 | 
 | 
| 860 |     // Potentially multiple here docs for this line
 | 
| 861 |     int here_index = 0;
 | 
| 862 |     for (auto here_delim : here_list) {
 | 
| 863 |       // Log("HERE %s", here_delim.c_str());
 | 
| 864 | 
 | 
| 865 |       while (true) {
 | 
| 866 |         const char* blame = reader->Filename() ?: "<stdin>";
 | 
| 867 |         if (!reader->NextLine()) {
 | 
| 868 |           Log("micro-syntax: getline() error on %s: %s", blame,
 | 
| 869 |               strerror(reader->err_num_));
 | 
| 870 |           return 1;
 | 
| 871 |         }
 | 
| 872 |         char* line = reader->Current();
 | 
| 873 |         if (line == nullptr) {
 | 
| 874 |           int start_line = here_start_num[here_index];
 | 
| 875 |           Log("Unexpected end-of-file in here doc in %s, start line %d", blame,
 | 
| 876 |               start_line);
 | 
| 877 |           return 1;
 | 
| 878 |         }
 | 
| 879 | 
 | 
| 880 |         line_num++;
 | 
| 881 | 
 | 
| 882 |         if (LineEqualsHereDelim(line, here_delim)) {
 | 
| 883 |           int n = strlen(line);
 | 
| 884 |           Token whole_line(Id::HereEnd, n);
 | 
| 885 |           tokens.push_back(whole_line);
 | 
| 886 |           out->Line(line_num, line, tokens);
 | 
| 887 |           tokens.clear();
 | 
| 888 |           break;
 | 
| 889 | 
 | 
| 890 |         } else {
 | 
| 891 |           int n = strlen(line);
 | 
| 892 |           Token whole_line(Id::Str, n);
 | 
| 893 |           tokens.push_back(whole_line);
 | 
| 894 |           out->Line(line_num, line, tokens);
 | 
| 895 |           tokens.clear();
 | 
| 896 | 
 | 
| 897 |           // Log("  not equal: %s", line);
 | 
| 898 |         }
 | 
| 899 |       }
 | 
| 900 |       here_index++;
 | 
| 901 |     }
 | 
| 902 |     here_list.clear();
 | 
| 903 |     here_start_num.clear();
 | 
| 904 | 
 | 
| 905 |     line_num++;
 | 
| 906 |     num_sig += line_is_sig;
 | 
| 907 |   }
 | 
| 908 | 
 | 
| 909 |   out->PathEnd(line_num - 1, num_sig);
 | 
| 910 |   return 0;
 | 
| 911 | }
 | 
| 912 | 
 | 
| 913 | int ScanFiles(const Flags& flag, std::vector<char*> files, OutputStream* out,
 | 
| 914 |               Hook* hook) {
 | 
| 915 |   Reader* reader = nullptr;
 | 
| 916 | 
 | 
| 917 |   int status = 0;
 | 
| 918 |   for (auto path : files) {
 | 
| 919 |     FILE* f;
 | 
| 920 |     if (path == nullptr) {
 | 
| 921 |       f = stdin;
 | 
| 922 |     } else {
 | 
| 923 |       f = fopen(path, "r");
 | 
| 924 |       if (f == nullptr) {
 | 
| 925 |         Log("Error opening %s: %s", path, strerror(errno));
 | 
| 926 |         return 1;
 | 
| 927 |       }
 | 
| 928 |     }
 | 
| 929 |     out->PathBegin(path);
 | 
| 930 | 
 | 
| 931 |     reader = new Reader(f, path);
 | 
| 932 | 
 | 
| 933 |     switch (flag.lang) {
 | 
| 934 |     case lang_e::PlainText:
 | 
| 935 |       status = ScanOne<text_mode_e>(reader, out, hook);
 | 
| 936 |       break;
 | 
| 937 | 
 | 
| 938 |     case lang_e::Py:
 | 
| 939 |       status = ScanOne<py_mode_e>(reader, out, hook);
 | 
| 940 |       break;
 | 
| 941 | 
 | 
| 942 |     case lang_e::Cpp:
 | 
| 943 |       status = ScanOne<cpp_mode_e>(reader, out, hook);
 | 
| 944 |       break;
 | 
| 945 | 
 | 
| 946 |     case lang_e::Shell:
 | 
| 947 |       status = ScanOne<sh_mode_e>(reader, out, hook);
 | 
| 948 |       break;
 | 
| 949 | 
 | 
| 950 |     case lang_e::Asdl:
 | 
| 951 |       status = ScanOne<asdl_mode_e>(reader, out, hook);
 | 
| 952 |       break;
 | 
| 953 | 
 | 
| 954 |     case lang_e::R:
 | 
| 955 |       status = ScanOne<R_mode_e>(reader, out, hook);
 | 
| 956 |       break;
 | 
| 957 | 
 | 
| 958 |     default:
 | 
| 959 |       assert(0);
 | 
| 960 |     }
 | 
| 961 | 
 | 
| 962 |     delete reader;
 | 
| 963 | 
 | 
| 964 |     if (path == nullptr) {
 | 
| 965 |       ;
 | 
| 966 |     } else {
 | 
| 967 |       fclose(f);
 | 
| 968 |     }
 | 
| 969 | 
 | 
| 970 |     if (status != 0) {
 | 
| 971 |       break;
 | 
| 972 |     }
 | 
| 973 |   }
 | 
| 974 | 
 | 
| 975 |   return status;
 | 
| 976 | }
 | 
| 977 | 
 | 
| 978 | void PrintHelp() {
 | 
| 979 |   puts(R"(Usage: micro-syntax FLAGS* FILE*
 | 
| 980 | 
 | 
| 981 | Recognizes the syntax of each file,, and prints it to stdout.
 | 
| 982 | 
 | 
| 983 | If there are no files, reads stdin.
 | 
| 984 | 
 | 
| 985 | Flags:
 | 
| 986 |   -h --help   This help
 | 
| 987 | 
 | 
| 988 |   -l --lang   Language: py|cpp|shell|...
 | 
| 989 |   -t          Print tokens as TSV, instead of ANSI color
 | 
| 990 |   -w          Print HTML for the web
 | 
| 991 | 
 | 
| 992 |   -m          More color, useful for debugging tokens
 | 
| 993 | 
 | 
| 994 |   -n --no-comments    Omit comments
 | 
| 995 |   -o --comments-only  Only print comments
 | 
| 996 |   -e --empty-strs     Substitute string literals for empty strings
 | 
| 997 |      --color          on off always more
 | 
| 998 | 
 | 
| 999 | )");
 | 
| 1000 | }
 | 
| 1001 | 
 | 
| 1002 | int main(int argc, char** argv) {
 | 
| 1003 |   Flags flag = {lang_e::PlainText};
 | 
| 1004 | 
 | 
| 1005 |   // http://www.gnu.org/software/libc/manual/html_node/Example-of-Getopt.html
 | 
| 1006 |   // + means to be strict about flag parsing.
 | 
| 1007 |   int c;
 | 
| 1008 |   while ((c = getopt(argc, argv, "+hl:motw")) != -1) {
 | 
| 1009 |     switch (c) {
 | 
| 1010 |     case 'h':
 | 
| 1011 |       PrintHelp();
 | 
| 1012 |       return 0;
 | 
| 1013 | 
 | 
| 1014 |     case 'l':
 | 
| 1015 |       if (strcmp(optarg, "cpp") == 0) {
 | 
| 1016 |         flag.lang = lang_e::Cpp;
 | 
| 1017 | 
 | 
| 1018 |       } else if (strcmp(optarg, "py") == 0) {
 | 
| 1019 |         flag.lang = lang_e::Py;
 | 
| 1020 | 
 | 
| 1021 |       } else if (strcmp(optarg, "shell") == 0) {
 | 
| 1022 |         flag.lang = lang_e::Shell;
 | 
| 1023 | 
 | 
| 1024 |       } else if (strcmp(optarg, "asdl") == 0) {
 | 
| 1025 |         flag.lang = lang_e::Asdl;
 | 
| 1026 | 
 | 
| 1027 |       } else if (strcmp(optarg, "R") == 0) {
 | 
| 1028 |         flag.lang = lang_e::R;
 | 
| 1029 | 
 | 
| 1030 |         // TODO: implement all of these
 | 
| 1031 |       } else if (strcmp(optarg, "js") == 0) {
 | 
| 1032 |         flag.lang = lang_e::PlainText;
 | 
| 1033 | 
 | 
| 1034 |       } else if (strcmp(optarg, "css") == 0) {
 | 
| 1035 |         flag.lang = lang_e::PlainText;
 | 
| 1036 | 
 | 
| 1037 |       } else if (strcmp(optarg, "md") == 0) {
 | 
| 1038 |         flag.lang = lang_e::PlainText;
 | 
| 1039 | 
 | 
| 1040 |       } else if (strcmp(optarg, "yaml") == 0) {
 | 
| 1041 |         flag.lang = lang_e::PlainText;
 | 
| 1042 | 
 | 
| 1043 |       } else if (strcmp(optarg, "txt") == 0) {
 | 
| 1044 |         flag.lang = lang_e::PlainText;
 | 
| 1045 | 
 | 
| 1046 |       } else if (strcmp(optarg, "other") == 0) {
 | 
| 1047 |         flag.lang = lang_e::PlainText;
 | 
| 1048 | 
 | 
| 1049 |       } else {
 | 
| 1050 |         Log("Expected -l LANG to be cpp|py|shell|asdl|R|js|css|md|yaml|txt, "
 | 
| 1051 |             "got %s",
 | 
| 1052 |             optarg);
 | 
| 1053 |         return 2;
 | 
| 1054 |       }
 | 
| 1055 |       break;
 | 
| 1056 | 
 | 
| 1057 |     case 'm':
 | 
| 1058 |       flag.more_color = true;
 | 
| 1059 |       break;
 | 
| 1060 | 
 | 
| 1061 |     case 'o':
 | 
| 1062 |       flag.comments_only = true;
 | 
| 1063 |       break;
 | 
| 1064 | 
 | 
| 1065 |     case 't':
 | 
| 1066 |       flag.tsv = true;
 | 
| 1067 |       break;
 | 
| 1068 | 
 | 
| 1069 |     case 'w':
 | 
| 1070 |       flag.web = true;
 | 
| 1071 |       break;
 | 
| 1072 | 
 | 
| 1073 |     case '?':  // getopt library will print error
 | 
| 1074 |       return 2;
 | 
| 1075 | 
 | 
| 1076 |     default:
 | 
| 1077 |       abort();  // should never happen
 | 
| 1078 |     }
 | 
| 1079 |   }
 | 
| 1080 | 
 | 
| 1081 |   int a = optind;  // index into argv
 | 
| 1082 |   flag.argv = argv + a;
 | 
| 1083 |   flag.argc = argc - a;
 | 
| 1084 | 
 | 
| 1085 |   std::vector<char*> files;  // filename, or nullptr for stdin
 | 
| 1086 |   if (flag.argc != 0) {
 | 
| 1087 |     for (int i = 0; i < flag.argc; ++i) {
 | 
| 1088 |       files.push_back(flag.argv[i]);
 | 
| 1089 |     }
 | 
| 1090 |   } else {
 | 
| 1091 |     files.push_back(nullptr);  // stands for stdin
 | 
| 1092 |   }
 | 
| 1093 | 
 | 
| 1094 |   Printer* pr;        // for each file
 | 
| 1095 |   OutputStream* out;  // the entire stream
 | 
| 1096 | 
 | 
| 1097 |   if (flag.tsv) {
 | 
| 1098 |     pr = new TsvPrinter();
 | 
| 1099 |     out = new NetStringOutput(pr);
 | 
| 1100 |   } else if (flag.web) {
 | 
| 1101 |     pr = new HtmlPrinter();
 | 
| 1102 |     out = new NetStringOutput(pr);
 | 
| 1103 |   } else {
 | 
| 1104 |     pr = new AnsiPrinter(flag);
 | 
| 1105 |     out = new AnsiOutput(pr);
 | 
| 1106 |   }
 | 
| 1107 | 
 | 
| 1108 |   Hook* hook = nullptr;
 | 
| 1109 |   if (flag.lang == lang_e::Cpp) {
 | 
| 1110 |     hook = new CppHook();
 | 
| 1111 |   } else {
 | 
| 1112 |     hook = new Hook();  // default hook
 | 
| 1113 |   }
 | 
| 1114 | 
 | 
| 1115 |   int status = ScanFiles(flag, files, out, hook);
 | 
| 1116 | 
 | 
| 1117 |   delete hook;
 | 
| 1118 |   delete pr;
 | 
| 1119 |   delete out;
 | 
| 1120 | 
 | 
| 1121 |   return status;
 | 
| 1122 | }
 |