Subversion Repositories Code-Repo

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
92 Kevin 1
// Protocol Buffers - Google's data interchange format
2
// Copyright 2008 Google Inc.  All rights reserved.
3
// http://code.google.com/p/protobuf/
4
//
5
// Redistribution and use in source and binary forms, with or without
6
// modification, are permitted provided that the following conditions are
7
// met:
8
//
9
//     * Redistributions of source code must retain the above copyright
10
// notice, this list of conditions and the following disclaimer.
11
//     * Redistributions in binary form must reproduce the above
12
// copyright notice, this list of conditions and the following disclaimer
13
// in the documentation and/or other materials provided with the
14
// distribution.
15
//     * Neither the name of Google Inc. nor the names of its
16
// contributors may be used to endorse or promote products derived from
17
// this software without specific prior written permission.
18
//
19
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
 
31
package com.google.protobuf;
32
 
33
import com.google.protobuf.Descriptors.Descriptor;
34
import com.google.protobuf.Descriptors.FieldDescriptor;
35
import com.google.protobuf.Descriptors.EnumDescriptor;
36
import com.google.protobuf.Descriptors.EnumValueDescriptor;
37
 
38
import java.io.IOException;
39
import java.nio.CharBuffer;
40
import java.math.BigInteger;
41
import java.util.ArrayList;
42
import java.util.List;
43
import java.util.Locale;
44
import java.util.Map;
45
import java.util.regex.Matcher;
46
import java.util.regex.Pattern;
47
 
48
/**
49
 * Provide text parsing and formatting support for proto2 instances.
50
 * The implementation largely follows google/protobuf/text_format.cc.
51
 *
52
 * @author wenboz@google.com Wenbo Zhu
53
 * @author kenton@google.com Kenton Varda
54
 */
55
public final class TextFormat {
56
  private TextFormat() {}
57
 
58
  private static final Printer DEFAULT_PRINTER = new Printer(false);
59
  private static final Printer SINGLE_LINE_PRINTER = new Printer(true);
60
 
61
  /**
62
   * Outputs a textual representation of the Protocol Message supplied into
63
   * the parameter output. (This representation is the new version of the
64
   * classic "ProtocolPrinter" output from the original Protocol Buffer system)
65
   */
66
  public static void print(final Message message, final Appendable output)
67
                           throws IOException {
68
    DEFAULT_PRINTER.print(message, new TextGenerator(output));
69
  }
70
 
71
  /** Outputs a textual representation of {@code fields} to {@code output}. */
72
  public static void print(final UnknownFieldSet fields,
73
                           final Appendable output)
74
                           throws IOException {
75
    DEFAULT_PRINTER.printUnknownFields(fields, new TextGenerator(output));
76
  }
77
 
78
  /**
79
   * Generates a human readable form of this message, useful for debugging and
80
   * other purposes, with no newline characters.
81
   */
82
  public static String shortDebugString(final Message message) {
83
    try {
84
      final StringBuilder sb = new StringBuilder();
85
      SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb));
86
      // Single line mode currently might have an extra space at the end.
87
      return sb.toString().trim();
88
    } catch (IOException e) {
89
      throw new IllegalStateException(e);
90
    }
91
  }
92
 
93
  /**
94
   * Generates a human readable form of the unknown fields, useful for debugging
95
   * and other purposes, with no newline characters.
96
   */
97
  public static String shortDebugString(final UnknownFieldSet fields) {
98
    try {
99
      final StringBuilder sb = new StringBuilder();
100
      SINGLE_LINE_PRINTER.printUnknownFields(fields, new TextGenerator(sb));
101
      // Single line mode currently might have an extra space at the end.
102
      return sb.toString().trim();
103
    } catch (IOException e) {
104
      throw new IllegalStateException(e);
105
    }
106
  }
107
 
108
  /**
109
   * Like {@code print()}, but writes directly to a {@code String} and
110
   * returns it.
111
   */
112
  public static String printToString(final Message message) {
113
    try {
114
      final StringBuilder text = new StringBuilder();
115
      print(message, text);
116
      return text.toString();
117
    } catch (IOException e) {
118
      throw new IllegalStateException(e);
119
    }
120
  }
121
 
122
  /**
123
   * Like {@code print()}, but writes directly to a {@code String} and
124
   * returns it.
125
   */
126
  public static String printToString(final UnknownFieldSet fields) {
127
    try {
128
      final StringBuilder text = new StringBuilder();
129
      print(fields, text);
130
      return text.toString();
131
    } catch (IOException e) {
132
      throw new IllegalStateException(e);
133
    }
134
  }
135
 
136
  public static void printField(final FieldDescriptor field,
137
                                final Object value,
138
                                final Appendable output)
139
                                throws IOException {
140
    DEFAULT_PRINTER.printField(field, value, new TextGenerator(output));
141
  }
142
 
143
  public static String printFieldToString(final FieldDescriptor field,
144
                                          final Object value) {
145
    try {
146
      final StringBuilder text = new StringBuilder();
147
      printField(field, value, text);
148
      return text.toString();
149
    } catch (IOException e) {
150
      throw new IllegalStateException(e);
151
    }
152
  }
153
 
154
  /**
155
   * Outputs a textual representation of the value of given field value.
156
   *
157
   * @param field the descriptor of the field
158
   * @param value the value of the field
159
   * @param output the output to which to append the formatted value
160
   * @throws ClassCastException if the value is not appropriate for the
161
   *     given field descriptor
162
   * @throws IOException if there is an exception writing to the output
163
   */
164
  public static void printFieldValue(final FieldDescriptor field,
165
                                     final Object value,
166
                                     final Appendable output)
167
                                     throws IOException {
168
    DEFAULT_PRINTER.printFieldValue(field, value, new TextGenerator(output));
169
  }
170
 
171
  /**
172
   * Outputs a textual representation of the value of an unknown field.
173
   *
174
   * @param tag the field's tag number
175
   * @param value the value of the field
176
   * @param output the output to which to append the formatted value
177
   * @throws ClassCastException if the value is not appropriate for the
178
   *     given field descriptor
179
   * @throws IOException if there is an exception writing to the output
180
   */
181
  public static void printUnknownFieldValue(final int tag,
182
                                            final Object value,
183
                                            final Appendable output)
184
                                            throws IOException {
185
    printUnknownFieldValue(tag, value, new TextGenerator(output));
186
  }
187
 
188
  private static void printUnknownFieldValue(final int tag,
189
                                             final Object value,
190
                                             final TextGenerator generator)
191
                                             throws IOException {
192
    switch (WireFormat.getTagWireType(tag)) {
193
      case WireFormat.WIRETYPE_VARINT:
194
        generator.print(unsignedToString((Long) value));
195
        break;
196
      case WireFormat.WIRETYPE_FIXED32:
197
        generator.print(
198
            String.format((Locale) null, "0x%08x", (Integer) value));
199
        break;
200
      case WireFormat.WIRETYPE_FIXED64:
201
        generator.print(String.format((Locale) null, "0x%016x", (Long) value));
202
        break;
203
      case WireFormat.WIRETYPE_LENGTH_DELIMITED:
204
        generator.print("\"");
205
        generator.print(escapeBytes((ByteString) value));
206
        generator.print("\"");
207
        break;
208
      case WireFormat.WIRETYPE_START_GROUP:
209
        DEFAULT_PRINTER.printUnknownFields((UnknownFieldSet) value, generator);
210
        break;
211
      default:
212
        throw new IllegalArgumentException("Bad tag: " + tag);
213
    }
214
  }
215
 
216
  /** Helper class for converting protobufs to text. */
217
  private static final class Printer {
218
    /** Whether to omit newlines from the output. */
219
    final boolean singleLineMode;
220
 
221
    private Printer(final boolean singleLineMode) {
222
      this.singleLineMode = singleLineMode;
223
    }
224
 
225
    private void print(final Message message, final TextGenerator generator)
226
        throws IOException {
227
      for (Map.Entry<FieldDescriptor, Object> field
228
          : message.getAllFields().entrySet()) {
229
        printField(field.getKey(), field.getValue(), generator);
230
      }
231
      printUnknownFields(message.getUnknownFields(), generator);
232
    }
233
 
234
    private void printField(final FieldDescriptor field, final Object value,
235
        final TextGenerator generator) throws IOException {
236
      if (field.isRepeated()) {
237
        // Repeated field.  Print each element.
238
        for (Object element : (List<?>) value) {
239
          printSingleField(field, element, generator);
240
        }
241
      } else {
242
        printSingleField(field, value, generator);
243
      }
244
    }
245
 
246
    private void printSingleField(final FieldDescriptor field,
247
                                  final Object value,
248
                                  final TextGenerator generator)
249
                                  throws IOException {
250
      if (field.isExtension()) {
251
        generator.print("[");
252
        // We special-case MessageSet elements for compatibility with proto1.
253
        if (field.getContainingType().getOptions().getMessageSetWireFormat()
254
            && (field.getType() == FieldDescriptor.Type.MESSAGE)
255
            && (field.isOptional())
256
            // object equality
257
            && (field.getExtensionScope() == field.getMessageType())) {
258
          generator.print(field.getMessageType().getFullName());
259
        } else {
260
          generator.print(field.getFullName());
261
        }
262
        generator.print("]");
263
      } else {
264
        if (field.getType() == FieldDescriptor.Type.GROUP) {
265
          // Groups must be serialized with their original capitalization.
266
          generator.print(field.getMessageType().getName());
267
        } else {
268
          generator.print(field.getName());
269
        }
270
      }
271
 
272
      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
273
        if (singleLineMode) {
274
          generator.print(" { ");
275
        } else {
276
          generator.print(" {\n");
277
          generator.indent();
278
        }
279
      } else {
280
        generator.print(": ");
281
      }
282
 
283
      printFieldValue(field, value, generator);
284
 
285
      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
286
        if (singleLineMode) {
287
          generator.print("} ");
288
        } else {
289
          generator.outdent();
290
          generator.print("}\n");
291
        }
292
      } else {
293
        if (singleLineMode) {
294
          generator.print(" ");
295
        } else {
296
          generator.print("\n");
297
        }
298
      }
299
    }
300
 
301
    private void printFieldValue(final FieldDescriptor field,
302
                                 final Object value,
303
                                 final TextGenerator generator)
304
                                 throws IOException {
305
      switch (field.getType()) {
306
        case INT32:
307
        case SINT32:
308
        case SFIXED32:
309
          generator.print(((Integer) value).toString());
310
          break;
311
 
312
        case INT64:
313
        case SINT64:
314
        case SFIXED64:
315
          generator.print(((Long) value).toString());
316
          break;
317
 
318
        case BOOL:
319
          generator.print(((Boolean) value).toString());
320
          break;
321
 
322
        case FLOAT:
323
          generator.print(((Float) value).toString());
324
          break;
325
 
326
        case DOUBLE:
327
          generator.print(((Double) value).toString());
328
          break;
329
 
330
        case UINT32:
331
        case FIXED32:
332
          generator.print(unsignedToString((Integer) value));
333
          break;
334
 
335
        case UINT64:
336
        case FIXED64:
337
          generator.print(unsignedToString((Long) value));
338
          break;
339
 
340
        case STRING:
341
          generator.print("\"");
342
          generator.print(escapeText((String) value));
343
          generator.print("\"");
344
          break;
345
 
346
        case BYTES:
347
          generator.print("\"");
348
          generator.print(escapeBytes((ByteString) value));
349
          generator.print("\"");
350
          break;
351
 
352
        case ENUM:
353
          generator.print(((EnumValueDescriptor) value).getName());
354
          break;
355
 
356
        case MESSAGE:
357
        case GROUP:
358
          print((Message) value, generator);
359
          break;
360
      }
361
    }
362
 
363
    private void printUnknownFields(final UnknownFieldSet unknownFields,
364
                                    final TextGenerator generator)
365
                                    throws IOException {
366
      for (Map.Entry<Integer, UnknownFieldSet.Field> entry :
367
               unknownFields.asMap().entrySet()) {
368
        final int number = entry.getKey();
369
        final UnknownFieldSet.Field field = entry.getValue();
370
        printUnknownField(number, WireFormat.WIRETYPE_VARINT,
371
            field.getVarintList(), generator);
372
        printUnknownField(number, WireFormat.WIRETYPE_FIXED32,
373
            field.getFixed32List(), generator);
374
        printUnknownField(number, WireFormat.WIRETYPE_FIXED64,
375
            field.getFixed64List(), generator);
376
        printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED,
377
            field.getLengthDelimitedList(), generator);
378
        for (final UnknownFieldSet value : field.getGroupList()) {
379
          generator.print(entry.getKey().toString());
380
          if (singleLineMode) {
381
            generator.print(" { ");
382
          } else {
383
            generator.print(" {\n");
384
            generator.indent();
385
          }
386
          printUnknownFields(value, generator);
387
          if (singleLineMode) {
388
            generator.print("} ");
389
          } else {
390
            generator.outdent();
391
            generator.print("}\n");
392
          }
393
        }
394
      }
395
    }
396
 
397
    private void printUnknownField(final int number,
398
                                   final int wireType,
399
                                   final List<?> values,
400
                                   final TextGenerator generator)
401
                                   throws IOException {
402
      for (final Object value : values) {
403
        generator.print(String.valueOf(number));
404
        generator.print(": ");
405
        printUnknownFieldValue(wireType, value, generator);
406
        generator.print(singleLineMode ? " " : "\n");
407
      }
408
    }
409
  }
410
 
411
  /** Convert an unsigned 32-bit integer to a string. */
412
  private static String unsignedToString(final int value) {
413
    if (value >= 0) {
414
      return Integer.toString(value);
415
    } else {
416
      return Long.toString(((long) value) & 0x00000000FFFFFFFFL);
417
    }
418
  }
419
 
420
  /** Convert an unsigned 64-bit integer to a string. */
421
  private static String unsignedToString(final long value) {
422
    if (value >= 0) {
423
      return Long.toString(value);
424
    } else {
425
      // Pull off the most-significant bit so that BigInteger doesn't think
426
      // the number is negative, then set it again using setBit().
427
      return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL)
428
                       .setBit(63).toString();
429
    }
430
  }
431
 
432
  /**
433
   * An inner class for writing text to the output stream.
434
   */
435
  private static final class TextGenerator {
436
    private final Appendable output;
437
    private final StringBuilder indent = new StringBuilder();
438
    private boolean atStartOfLine = true;
439
 
440
    private TextGenerator(final Appendable output) {
441
      this.output = output;
442
    }
443
 
444
    /**
445
     * Indent text by two spaces.  After calling Indent(), two spaces will be
446
     * inserted at the beginning of each line of text.  Indent() may be called
447
     * multiple times to produce deeper indents.
448
     */
449
    public void indent() {
450
      indent.append("  ");
451
    }
452
 
453
    /**
454
     * Reduces the current indent level by two spaces, or crashes if the indent
455
     * level is zero.
456
     */
457
    public void outdent() {
458
      final int length = indent.length();
459
      if (length == 0) {
460
        throw new IllegalArgumentException(
461
            " Outdent() without matching Indent().");
462
      }
463
      indent.delete(length - 2, length);
464
    }
465
 
466
    /**
467
     * Print text to the output stream.
468
     */
469
    public void print(final CharSequence text) throws IOException {
470
      final int size = text.length();
471
      int pos = 0;
472
 
473
      for (int i = 0; i < size; i++) {
474
        if (text.charAt(i) == '\n') {
475
          write(text.subSequence(pos, size), i - pos + 1);
476
          pos = i + 1;
477
          atStartOfLine = true;
478
        }
479
      }
480
      write(text.subSequence(pos, size), size - pos);
481
    }
482
 
483
    private void write(final CharSequence data, final int size)
484
                       throws IOException {
485
      if (size == 0) {
486
        return;
487
      }
488
      if (atStartOfLine) {
489
        atStartOfLine = false;
490
        output.append(indent);
491
      }
492
      output.append(data);
493
    }
494
  }
495
 
496
  // =================================================================
497
  // Parsing
498
 
499
  /**
500
   * Represents a stream of tokens parsed from a {@code String}.
501
   *
502
   * <p>The Java standard library provides many classes that you might think
503
   * would be useful for implementing this, but aren't.  For example:
504
   *
505
   * <ul>
506
   * <li>{@code java.io.StreamTokenizer}:  This almost does what we want -- or,
507
   *   at least, something that would get us close to what we want -- except
508
   *   for one fatal flaw:  It automatically un-escapes strings using Java
509
   *   escape sequences, which do not include all the escape sequences we
510
   *   need to support (e.g. '\x').
511
   * <li>{@code java.util.Scanner}:  This seems like a great way at least to
512
   *   parse regular expressions out of a stream (so we wouldn't have to load
513
   *   the entire input into a single string before parsing).  Sadly,
514
   *   {@code Scanner} requires that tokens be delimited with some delimiter.
515
   *   Thus, although the text "foo:" should parse to two tokens ("foo" and
516
   *   ":"), {@code Scanner} would recognize it only as a single token.
517
   *   Furthermore, {@code Scanner} provides no way to inspect the contents
518
   *   of delimiters, making it impossible to keep track of line and column
519
   *   numbers.
520
   * </ul>
521
   *
522
   * <p>Luckily, Java's regular expression support does manage to be useful to
523
   * us.  (Barely:  We need {@code Matcher.usePattern()}, which is new in
524
   * Java 1.5.)  So, we can use that, at least.  Unfortunately, this implies
525
   * that we need to have the entire input in one contiguous string.
526
   */
527
  private static final class Tokenizer {
528
    private final CharSequence text;
529
    private final Matcher matcher;
530
    private String currentToken;
531
 
532
    // The character index within this.text at which the current token begins.
533
    private int pos = 0;
534
 
535
    // The line and column numbers of the current token.
536
    private int line = 0;
537
    private int column = 0;
538
 
539
    // The line and column numbers of the previous token (allows throwing
540
    // errors *after* consuming).
541
    private int previousLine = 0;
542
    private int previousColumn = 0;
543
 
544
    // We use possesive quantifiers (*+ and ++) because otherwise the Java
545
    // regex matcher has stack overflows on large inputs.
546
    private static final Pattern WHITESPACE =
547
      Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
548
    private static final Pattern TOKEN = Pattern.compile(
549
      "[a-zA-Z_][0-9a-zA-Z_+-]*+|" +                // an identifier
550
      "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" +             // a number
551
      "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" +       // a double-quoted string
552
      "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)",         // a single-quoted string
553
      Pattern.MULTILINE);
554
 
555
    private static final Pattern DOUBLE_INFINITY = Pattern.compile(
556
      "-?inf(inity)?",
557
      Pattern.CASE_INSENSITIVE);
558
    private static final Pattern FLOAT_INFINITY = Pattern.compile(
559
      "-?inf(inity)?f?",
560
      Pattern.CASE_INSENSITIVE);
561
    private static final Pattern FLOAT_NAN = Pattern.compile(
562
      "nanf?",
563
      Pattern.CASE_INSENSITIVE);
564
 
565
    /** Construct a tokenizer that parses tokens from the given text. */
566
    private Tokenizer(final CharSequence text) {
567
      this.text = text;
568
      this.matcher = WHITESPACE.matcher(text);
569
      skipWhitespace();
570
      nextToken();
571
    }
572
 
573
    /** Are we at the end of the input? */
574
    public boolean atEnd() {
575
      return currentToken.length() == 0;
576
    }
577
 
578
    /** Advance to the next token. */
579
    public void nextToken() {
580
      previousLine = line;
581
      previousColumn = column;
582
 
583
      // Advance the line counter to the current position.
584
      while (pos < matcher.regionStart()) {
585
        if (text.charAt(pos) == '\n') {
586
          ++line;
587
          column = 0;
588
        } else {
589
          ++column;
590
        }
591
        ++pos;
592
      }
593
 
594
      // Match the next token.
595
      if (matcher.regionStart() == matcher.regionEnd()) {
596
        // EOF
597
        currentToken = "";
598
      } else {
599
        matcher.usePattern(TOKEN);
600
        if (matcher.lookingAt()) {
601
          currentToken = matcher.group();
602
          matcher.region(matcher.end(), matcher.regionEnd());
603
        } else {
604
          // Take one character.
605
          currentToken = String.valueOf(text.charAt(pos));
606
          matcher.region(pos + 1, matcher.regionEnd());
607
        }
608
 
609
        skipWhitespace();
610
      }
611
    }
612
 
613
    /**
614
     * Skip over any whitespace so that the matcher region starts at the next
615
     * token.
616
     */
617
    private void skipWhitespace() {
618
      matcher.usePattern(WHITESPACE);
619
      if (matcher.lookingAt()) {
620
        matcher.region(matcher.end(), matcher.regionEnd());
621
      }
622
    }
623
 
624
    /**
625
     * If the next token exactly matches {@code token}, consume it and return
626
     * {@code true}.  Otherwise, return {@code false} without doing anything.
627
     */
628
    public boolean tryConsume(final String token) {
629
      if (currentToken.equals(token)) {
630
        nextToken();
631
        return true;
632
      } else {
633
        return false;
634
      }
635
    }
636
 
637
    /**
638
     * If the next token exactly matches {@code token}, consume it.  Otherwise,
639
     * throw a {@link ParseException}.
640
     */
641
    public void consume(final String token) throws ParseException {
642
      if (!tryConsume(token)) {
643
        throw parseException("Expected \"" + token + "\".");
644
      }
645
    }
646
 
647
    /**
648
     * Returns {@code true} if the next token is an integer, but does
649
     * not consume it.
650
     */
651
    public boolean lookingAtInteger() {
652
      if (currentToken.length() == 0) {
653
        return false;
654
      }
655
 
656
      final char c = currentToken.charAt(0);
657
      return ('0' <= c && c <= '9') ||
658
             c == '-' || c == '+';
659
    }
660
 
661
    /**
662
     * If the next token is an identifier, consume it and return its value.
663
     * Otherwise, throw a {@link ParseException}.
664
     */
665
    public String consumeIdentifier() throws ParseException {
666
      for (int i = 0; i < currentToken.length(); i++) {
667
        final char c = currentToken.charAt(i);
668
        if (('a' <= c && c <= 'z') ||
669
            ('A' <= c && c <= 'Z') ||
670
            ('0' <= c && c <= '9') ||
671
            (c == '_') || (c == '.')) {
672
          // OK
673
        } else {
674
          throw parseException("Expected identifier.");
675
        }
676
      }
677
 
678
      final String result = currentToken;
679
      nextToken();
680
      return result;
681
    }
682
 
683
    /**
684
     * If the next token is a 32-bit signed integer, consume it and return its
685
     * value.  Otherwise, throw a {@link ParseException}.
686
     */
687
    public int consumeInt32() throws ParseException {
688
      try {
689
        final int result = parseInt32(currentToken);
690
        nextToken();
691
        return result;
692
      } catch (NumberFormatException e) {
693
        throw integerParseException(e);
694
      }
695
    }
696
 
697
    /**
698
     * If the next token is a 32-bit unsigned integer, consume it and return its
699
     * value.  Otherwise, throw a {@link ParseException}.
700
     */
701
    public int consumeUInt32() throws ParseException {
702
      try {
703
        final int result = parseUInt32(currentToken);
704
        nextToken();
705
        return result;
706
      } catch (NumberFormatException e) {
707
        throw integerParseException(e);
708
      }
709
    }
710
 
711
    /**
712
     * If the next token is a 64-bit signed integer, consume it and return its
713
     * value.  Otherwise, throw a {@link ParseException}.
714
     */
715
    public long consumeInt64() throws ParseException {
716
      try {
717
        final long result = parseInt64(currentToken);
718
        nextToken();
719
        return result;
720
      } catch (NumberFormatException e) {
721
        throw integerParseException(e);
722
      }
723
    }
724
 
725
    /**
726
     * If the next token is a 64-bit unsigned integer, consume it and return its
727
     * value.  Otherwise, throw a {@link ParseException}.
728
     */
729
    public long consumeUInt64() throws ParseException {
730
      try {
731
        final long result = parseUInt64(currentToken);
732
        nextToken();
733
        return result;
734
      } catch (NumberFormatException e) {
735
        throw integerParseException(e);
736
      }
737
    }
738
 
739
    /**
740
     * If the next token is a double, consume it and return its value.
741
     * Otherwise, throw a {@link ParseException}.
742
     */
743
    public double consumeDouble() throws ParseException {
744
      // We need to parse infinity and nan separately because
745
      // Double.parseDouble() does not accept "inf", "infinity", or "nan".
746
      if (DOUBLE_INFINITY.matcher(currentToken).matches()) {
747
        final boolean negative = currentToken.startsWith("-");
748
        nextToken();
749
        return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
750
      }
751
      if (currentToken.equalsIgnoreCase("nan")) {
752
        nextToken();
753
        return Double.NaN;
754
      }
755
      try {
756
        final double result = Double.parseDouble(currentToken);
757
        nextToken();
758
        return result;
759
      } catch (NumberFormatException e) {
760
        throw floatParseException(e);
761
      }
762
    }
763
 
764
    /**
765
     * If the next token is a float, consume it and return its value.
766
     * Otherwise, throw a {@link ParseException}.
767
     */
768
    public float consumeFloat() throws ParseException {
769
      // We need to parse infinity and nan separately because
770
      // Float.parseFloat() does not accept "inf", "infinity", or "nan".
771
      if (FLOAT_INFINITY.matcher(currentToken).matches()) {
772
        final boolean negative = currentToken.startsWith("-");
773
        nextToken();
774
        return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;
775
      }
776
      if (FLOAT_NAN.matcher(currentToken).matches()) {
777
        nextToken();
778
        return Float.NaN;
779
      }
780
      try {
781
        final float result = Float.parseFloat(currentToken);
782
        nextToken();
783
        return result;
784
      } catch (NumberFormatException e) {
785
        throw floatParseException(e);
786
      }
787
    }
788
 
789
    /**
790
     * If the next token is a boolean, consume it and return its value.
791
     * Otherwise, throw a {@link ParseException}.
792
     */
793
    public boolean consumeBoolean() throws ParseException {
794
      if (currentToken.equals("true") ||
795
          currentToken.equals("t") ||
796
          currentToken.equals("1")) {
797
        nextToken();
798
        return true;
799
      } else if (currentToken.equals("false") ||
800
                 currentToken.equals("f") ||
801
                 currentToken.equals("0")) {
802
        nextToken();
803
        return false;
804
      } else {
805
        throw parseException("Expected \"true\" or \"false\".");
806
      }
807
    }
808
 
809
    /**
810
     * If the next token is a string, consume it and return its (unescaped)
811
     * value.  Otherwise, throw a {@link ParseException}.
812
     */
813
    public String consumeString() throws ParseException {
814
      return consumeByteString().toStringUtf8();
815
    }
816
 
817
    /**
818
     * If the next token is a string, consume it, unescape it as a
819
     * {@link ByteString}, and return it.  Otherwise, throw a
820
     * {@link ParseException}.
821
     */
822
    public ByteString consumeByteString() throws ParseException {
823
      List<ByteString> list = new ArrayList<ByteString>();
824
      consumeByteString(list);
825
      while (currentToken.startsWith("'") || currentToken.startsWith("\"")) {
826
        consumeByteString(list);
827
      }
828
      return ByteString.copyFrom(list);
829
    }
830
 
831
    /**
832
     * Like {@link #consumeByteString()} but adds each token of the string to
833
     * the given list.  String literals (whether bytes or text) may come in
834
     * multiple adjacent tokens which are automatically concatenated, like in
835
     * C or Python.
836
     */
837
    private void consumeByteString(List<ByteString> list) throws ParseException {
838
      final char quote = currentToken.length() > 0 ? currentToken.charAt(0)
839
                                                   : '\0';
840
      if (quote != '\"' && quote != '\'') {
841
        throw parseException("Expected string.");
842
      }
843
 
844
      if (currentToken.length() < 2 ||
845
          currentToken.charAt(currentToken.length() - 1) != quote) {
846
        throw parseException("String missing ending quote.");
847
      }
848
 
849
      try {
850
        final String escaped =
851
            currentToken.substring(1, currentToken.length() - 1);
852
        final ByteString result = unescapeBytes(escaped);
853
        nextToken();
854
        list.add(result);
855
      } catch (InvalidEscapeSequenceException e) {
856
        throw parseException(e.getMessage());
857
      }
858
    }
859
 
860
    /**
861
     * Returns a {@link ParseException} with the current line and column
862
     * numbers in the description, suitable for throwing.
863
     */
864
    public ParseException parseException(final String description) {
865
      // Note:  People generally prefer one-based line and column numbers.
866
      return new ParseException(
867
        (line + 1) + ":" + (column + 1) + ": " + description);
868
    }
869
 
870
    /**
871
     * Returns a {@link ParseException} with the line and column numbers of
872
     * the previous token in the description, suitable for throwing.
873
     */
874
    public ParseException parseExceptionPreviousToken(
875
        final String description) {
876
      // Note:  People generally prefer one-based line and column numbers.
877
      return new ParseException(
878
        (previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
879
    }
880
 
881
    /**
882
     * Constructs an appropriate {@link ParseException} for the given
883
     * {@code NumberFormatException} when trying to parse an integer.
884
     */
885
    private ParseException integerParseException(
886
        final NumberFormatException e) {
887
      return parseException("Couldn't parse integer: " + e.getMessage());
888
    }
889
 
890
    /**
891
     * Constructs an appropriate {@link ParseException} for the given
892
     * {@code NumberFormatException} when trying to parse a float or double.
893
     */
894
    private ParseException floatParseException(final NumberFormatException e) {
895
      return parseException("Couldn't parse number: " + e.getMessage());
896
    }
897
  }
898
 
899
  /** Thrown when parsing an invalid text format message. */
900
  public static class ParseException extends IOException {
901
    private static final long serialVersionUID = 3196188060225107702L;
902
 
903
    public ParseException(final String message) {
904
      super(message);
905
    }
906
  }
907
 
908
  /**
909
   * Parse a text-format message from {@code input} and merge the contents
910
   * into {@code builder}.
911
   */
912
  public static void merge(final Readable input,
913
                           final Message.Builder builder)
914
                           throws IOException {
915
    merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
916
  }
917
 
918
  /**
919
   * Parse a text-format message from {@code input} and merge the contents
920
   * into {@code builder}.
921
   */
922
  public static void merge(final CharSequence input,
923
                           final Message.Builder builder)
924
                           throws ParseException {
925
    merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
926
  }
927
 
928
  /**
929
   * Parse a text-format message from {@code input} and merge the contents
930
   * into {@code builder}.  Extensions will be recognized if they are
931
   * registered in {@code extensionRegistry}.
932
   */
933
  public static void merge(final Readable input,
934
                           final ExtensionRegistry extensionRegistry,
935
                           final Message.Builder builder)
936
                           throws IOException {
937
    // Read the entire input to a String then parse that.
938
 
939
    // If StreamTokenizer were not quite so crippled, or if there were a kind
940
    // of Reader that could read in chunks that match some particular regex,
941
    // or if we wanted to write a custom Reader to tokenize our stream, then
942
    // we would not have to read to one big String.  Alas, none of these is
943
    // the case.  Oh well.
944
 
945
    merge(toStringBuilder(input), extensionRegistry, builder);
946
  }
947
 
948
  private static final int BUFFER_SIZE = 4096;
949
 
950
  // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
951
  // overhead is worthwhile
952
  private static StringBuilder toStringBuilder(final Readable input)
953
      throws IOException {
954
    final StringBuilder text = new StringBuilder();
955
    final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
956
    while (true) {
957
      final int n = input.read(buffer);
958
      if (n == -1) {
959
        break;
960
      }
961
      buffer.flip();
962
      text.append(buffer, 0, n);
963
    }
964
    return text;
965
  }
966
 
967
  /**
968
   * Parse a text-format message from {@code input} and merge the contents
969
   * into {@code builder}.  Extensions will be recognized if they are
970
   * registered in {@code extensionRegistry}.
971
   */
972
  public static void merge(final CharSequence input,
973
                           final ExtensionRegistry extensionRegistry,
974
                           final Message.Builder builder)
975
                           throws ParseException {
976
    final Tokenizer tokenizer = new Tokenizer(input);
977
 
978
    while (!tokenizer.atEnd()) {
979
      mergeField(tokenizer, extensionRegistry, builder);
980
    }
981
  }
982
 
983
  /**
984
   * Parse a single field from {@code tokenizer} and merge it into
985
   * {@code builder}.
986
   */
987
  private static void mergeField(final Tokenizer tokenizer,
988
                                 final ExtensionRegistry extensionRegistry,
989
                                 final Message.Builder builder)
990
                                 throws ParseException {
991
    FieldDescriptor field;
992
    final Descriptor type = builder.getDescriptorForType();
993
    ExtensionRegistry.ExtensionInfo extension = null;
994
 
995
    if (tokenizer.tryConsume("[")) {
996
      // An extension.
997
      final StringBuilder name =
998
          new StringBuilder(tokenizer.consumeIdentifier());
999
      while (tokenizer.tryConsume(".")) {
1000
        name.append('.');
1001
        name.append(tokenizer.consumeIdentifier());
1002
      }
1003
 
1004
      extension = extensionRegistry.findExtensionByName(name.toString());
1005
 
1006
      if (extension == null) {
1007
        throw tokenizer.parseExceptionPreviousToken(
1008
          "Extension \"" + name + "\" not found in the ExtensionRegistry.");
1009
      } else if (extension.descriptor.getContainingType() != type) {
1010
        throw tokenizer.parseExceptionPreviousToken(
1011
          "Extension \"" + name + "\" does not extend message type \"" +
1012
          type.getFullName() + "\".");
1013
      }
1014
 
1015
      tokenizer.consume("]");
1016
 
1017
      field = extension.descriptor;
1018
    } else {
1019
      final String name = tokenizer.consumeIdentifier();
1020
      field = type.findFieldByName(name);
1021
 
1022
      // Group names are expected to be capitalized as they appear in the
1023
      // .proto file, which actually matches their type names, not their field
1024
      // names.
1025
      if (field == null) {
1026
        // Explicitly specify US locale so that this code does not break when
1027
        // executing in Turkey.
1028
        final String lowerName = name.toLowerCase(Locale.US);
1029
        field = type.findFieldByName(lowerName);
1030
        // If the case-insensitive match worked but the field is NOT a group,
1031
        if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
1032
          field = null;
1033
        }
1034
      }
1035
      // Again, special-case group names as described above.
1036
      if (field != null && field.getType() == FieldDescriptor.Type.GROUP &&
1037
          !field.getMessageType().getName().equals(name)) {
1038
        field = null;
1039
      }
1040
 
1041
      if (field == null) {
1042
        throw tokenizer.parseExceptionPreviousToken(
1043
          "Message type \"" + type.getFullName() +
1044
          "\" has no field named \"" + name + "\".");
1045
      }
1046
    }
1047
 
1048
    Object value = null;
1049
 
1050
    if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
1051
      tokenizer.tryConsume(":");  // optional
1052
 
1053
      final String endToken;
1054
      if (tokenizer.tryConsume("<")) {
1055
        endToken = ">";
1056
      } else {
1057
        tokenizer.consume("{");
1058
        endToken = "}";
1059
      }
1060
 
1061
      final Message.Builder subBuilder;
1062
      if (extension == null) {
1063
        subBuilder = builder.newBuilderForField(field);
1064
      } else {
1065
        subBuilder = extension.defaultInstance.newBuilderForType();
1066
      }
1067
 
1068
      while (!tokenizer.tryConsume(endToken)) {
1069
        if (tokenizer.atEnd()) {
1070
          throw tokenizer.parseException(
1071
            "Expected \"" + endToken + "\".");
1072
        }
1073
        mergeField(tokenizer, extensionRegistry, subBuilder);
1074
      }
1075
 
1076
      value = subBuilder.build();
1077
 
1078
    } else {
1079
      tokenizer.consume(":");
1080
 
1081
      switch (field.getType()) {
1082
        case INT32:
1083
        case SINT32:
1084
        case SFIXED32:
1085
          value = tokenizer.consumeInt32();
1086
          break;
1087
 
1088
        case INT64:
1089
        case SINT64:
1090
        case SFIXED64:
1091
          value = tokenizer.consumeInt64();
1092
          break;
1093
 
1094
        case UINT32:
1095
        case FIXED32:
1096
          value = tokenizer.consumeUInt32();
1097
          break;
1098
 
1099
        case UINT64:
1100
        case FIXED64:
1101
          value = tokenizer.consumeUInt64();
1102
          break;
1103
 
1104
        case FLOAT:
1105
          value = tokenizer.consumeFloat();
1106
          break;
1107
 
1108
        case DOUBLE:
1109
          value = tokenizer.consumeDouble();
1110
          break;
1111
 
1112
        case BOOL:
1113
          value = tokenizer.consumeBoolean();
1114
          break;
1115
 
1116
        case STRING:
1117
          value = tokenizer.consumeString();
1118
          break;
1119
 
1120
        case BYTES:
1121
          value = tokenizer.consumeByteString();
1122
          break;
1123
 
1124
        case ENUM:
1125
          final EnumDescriptor enumType = field.getEnumType();
1126
 
1127
          if (tokenizer.lookingAtInteger()) {
1128
            final int number = tokenizer.consumeInt32();
1129
            value = enumType.findValueByNumber(number);
1130
            if (value == null) {
1131
              throw tokenizer.parseExceptionPreviousToken(
1132
                "Enum type \"" + enumType.getFullName() +
1133
                "\" has no value with number " + number + '.');
1134
            }
1135
          } else {
1136
            final String id = tokenizer.consumeIdentifier();
1137
            value = enumType.findValueByName(id);
1138
            if (value == null) {
1139
              throw tokenizer.parseExceptionPreviousToken(
1140
                "Enum type \"" + enumType.getFullName() +
1141
                "\" has no value named \"" + id + "\".");
1142
            }
1143
          }
1144
 
1145
          break;
1146
 
1147
        case MESSAGE:
1148
        case GROUP:
1149
          throw new RuntimeException("Can't get here.");
1150
      }
1151
    }
1152
 
1153
    if (field.isRepeated()) {
1154
      builder.addRepeatedField(field, value);
1155
    } else {
1156
      builder.setField(field, value);
1157
    }
1158
  }
1159
 
1160
  // =================================================================
1161
  // Utility functions
1162
  //
1163
  // Some of these methods are package-private because Descriptors.java uses
1164
  // them.
1165
 
1166
  /**
1167
   * Escapes bytes in the format used in protocol buffer text format, which
1168
   * is the same as the format used for C string literals.  All bytes
1169
   * that are not printable 7-bit ASCII characters are escaped, as well as
1170
   * backslash, single-quote, and double-quote characters.  Characters for
1171
   * which no defined short-hand escape sequence is defined will be escaped
1172
   * using 3-digit octal sequences.
1173
   */
1174
  static String escapeBytes(final ByteString input) {
1175
    final StringBuilder builder = new StringBuilder(input.size());
1176
    for (int i = 0; i < input.size(); i++) {
1177
      final byte b = input.byteAt(i);
1178
      switch (b) {
1179
        // Java does not recognize \a or \v, apparently.
1180
        case 0x07: builder.append("\\a" ); break;
1181
        case '\b': builder.append("\\b" ); break;
1182
        case '\f': builder.append("\\f" ); break;
1183
        case '\n': builder.append("\\n" ); break;
1184
        case '\r': builder.append("\\r" ); break;
1185
        case '\t': builder.append("\\t" ); break;
1186
        case 0x0b: builder.append("\\v" ); break;
1187
        case '\\': builder.append("\\\\"); break;
1188
        case '\'': builder.append("\\\'"); break;
1189
        case '"' : builder.append("\\\""); break;
1190
        default:
1191
          // Note:  Bytes with the high-order bit set should be escaped.  Since
1192
          //   bytes are signed, such bytes will compare less than 0x20, hence
1193
          //   the following line is correct.
1194
          if (b >= 0x20) {
1195
            builder.append((char) b);
1196
          } else {
1197
            builder.append('\\');
1198
            builder.append((char) ('0' + ((b >>> 6) & 3)));
1199
            builder.append((char) ('0' + ((b >>> 3) & 7)));
1200
            builder.append((char) ('0' + (b & 7)));
1201
          }
1202
          break;
1203
      }
1204
    }
1205
    return builder.toString();
1206
  }
1207
 
1208
  /**
1209
   * Un-escape a byte sequence as escaped using
1210
   * {@link #escapeBytes(ByteString)}.  Two-digit hex escapes (starting with
1211
   * "\x") are also recognized.
1212
   */
1213
  static ByteString unescapeBytes(final CharSequence charString)
1214
      throws InvalidEscapeSequenceException {
1215
    // First convert the Java characater sequence to UTF-8 bytes.
1216
    ByteString input = ByteString.copyFromUtf8(charString.toString());
1217
    // Then unescape certain byte sequences introduced by ASCII '\\'.  The valid
1218
    // escapes can all be expressed with ASCII characters, so it is safe to
1219
    // operate on bytes here.
1220
    //
1221
    // Unescaping the input byte array will result in a byte sequence that's no
1222
    // longer than the input.  That's because each escape sequence is between
1223
    // two and four bytes long and stands for a single byte.
1224
    final byte[] result = new byte[input.size()];
1225
    int pos = 0;
1226
    for (int i = 0; i < input.size(); i++) {
1227
      byte c = input.byteAt(i);
1228
      if (c == '\\') {
1229
        if (i + 1 < input.size()) {
1230
          ++i;
1231
          c = input.byteAt(i);
1232
          if (isOctal(c)) {
1233
            // Octal escape.
1234
            int code = digitValue(c);
1235
            if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
1236
              ++i;
1237
              code = code * 8 + digitValue(input.byteAt(i));
1238
            }
1239
            if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
1240
              ++i;
1241
              code = code * 8 + digitValue(input.byteAt(i));
1242
            }
1243
            // TODO: Check that 0 <= code && code <= 0xFF.
1244
            result[pos++] = (byte)code;
1245
          } else {
1246
            switch (c) {
1247
              case 'a' : result[pos++] = 0x07; break;
1248
              case 'b' : result[pos++] = '\b'; break;
1249
              case 'f' : result[pos++] = '\f'; break;
1250
              case 'n' : result[pos++] = '\n'; break;
1251
              case 'r' : result[pos++] = '\r'; break;
1252
              case 't' : result[pos++] = '\t'; break;
1253
              case 'v' : result[pos++] = 0x0b; break;
1254
              case '\\': result[pos++] = '\\'; break;
1255
              case '\'': result[pos++] = '\''; break;
1256
              case '"' : result[pos++] = '\"'; break;
1257
 
1258
              case 'x':
1259
                // hex escape
1260
                int code = 0;
1261
                if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
1262
                  ++i;
1263
                  code = digitValue(input.byteAt(i));
1264
                } else {
1265
                  throw new InvalidEscapeSequenceException(
1266
                      "Invalid escape sequence: '\\x' with no digits");
1267
                }
1268
                if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
1269
                  ++i;
1270
                  code = code * 16 + digitValue(input.byteAt(i));
1271
                }
1272
                result[pos++] = (byte)code;
1273
                break;
1274
 
1275
              default:
1276
                throw new InvalidEscapeSequenceException(
1277
                    "Invalid escape sequence: '\\" + (char)c + '\'');
1278
            }
1279
          }
1280
        } else {
1281
          throw new InvalidEscapeSequenceException(
1282
              "Invalid escape sequence: '\\' at end of string.");
1283
        }
1284
      } else {
1285
        result[pos++] = c;
1286
      }
1287
    }
1288
 
1289
    return ByteString.copyFrom(result, 0, pos);
1290
  }
1291
 
1292
  /**
1293
   * Thrown by {@link TextFormat#unescapeBytes} and
1294
   * {@link TextFormat#unescapeText} when an invalid escape sequence is seen.
1295
   */
1296
  static class InvalidEscapeSequenceException extends IOException {
1297
    private static final long serialVersionUID = -8164033650142593304L;
1298
 
1299
    InvalidEscapeSequenceException(final String description) {
1300
      super(description);
1301
    }
1302
  }
1303
 
1304
  /**
1305
   * Like {@link #escapeBytes(ByteString)}, but escapes a text string.
1306
   * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped
1307
   * individually as a 3-digit octal escape.  Yes, it's weird.
1308
   */
1309
  static String escapeText(final String input) {
1310
    return escapeBytes(ByteString.copyFromUtf8(input));
1311
  }
1312
 
1313
  /**
1314
   * Un-escape a text string as escaped using {@link #escapeText(String)}.
1315
   * Two-digit hex escapes (starting with "\x") are also recognized.
1316
   */
1317
  static String unescapeText(final String input)
1318
                             throws InvalidEscapeSequenceException {
1319
    return unescapeBytes(input).toStringUtf8();
1320
  }
1321
 
1322
  /** Is this an octal digit? */
1323
  private static boolean isOctal(final byte c) {
1324
    return '0' <= c && c <= '7';
1325
  }
1326
 
1327
  /** Is this a hex digit? */
1328
  private static boolean isHex(final byte c) {
1329
    return ('0' <= c && c <= '9') ||
1330
           ('a' <= c && c <= 'f') ||
1331
           ('A' <= c && c <= 'F');
1332
  }
1333
 
1334
  /**
1335
   * Interpret a character as a digit (in any base up to 36) and return the
1336
   * numeric value.  This is like {@code Character.digit()} but we don't accept
1337
   * non-ASCII digits.
1338
   */
1339
  private static int digitValue(final byte c) {
1340
    if ('0' <= c && c <= '9') {
1341
      return c - '0';
1342
    } else if ('a' <= c && c <= 'z') {
1343
      return c - 'a' + 10;
1344
    } else {
1345
      return c - 'A' + 10;
1346
    }
1347
  }
1348
 
1349
  /**
1350
   * Parse a 32-bit signed integer from the text.  Unlike the Java standard
1351
   * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1352
   * and "0" to signify hexidecimal and octal numbers, respectively.
1353
   */
1354
  static int parseInt32(final String text) throws NumberFormatException {
1355
    return (int) parseInteger(text, true, false);
1356
  }
1357
 
1358
  /**
1359
   * Parse a 32-bit unsigned integer from the text.  Unlike the Java standard
1360
   * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1361
   * and "0" to signify hexidecimal and octal numbers, respectively.  The
1362
   * result is coerced to a (signed) {@code int} when returned since Java has
1363
   * no unsigned integer type.
1364
   */
1365
  static int parseUInt32(final String text) throws NumberFormatException {
1366
    return (int) parseInteger(text, false, false);
1367
  }
1368
 
1369
  /**
1370
   * Parse a 64-bit signed integer from the text.  Unlike the Java standard
1371
   * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1372
   * and "0" to signify hexidecimal and octal numbers, respectively.
1373
   */
1374
  static long parseInt64(final String text) throws NumberFormatException {
1375
    return parseInteger(text, true, true);
1376
  }
1377
 
1378
  /**
1379
   * Parse a 64-bit unsigned integer from the text.  Unlike the Java standard
1380
   * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1381
   * and "0" to signify hexidecimal and octal numbers, respectively.  The
1382
   * result is coerced to a (signed) {@code long} when returned since Java has
1383
   * no unsigned long type.
1384
   */
1385
  static long parseUInt64(final String text) throws NumberFormatException {
1386
    return parseInteger(text, false, true);
1387
  }
1388
 
1389
  private static long parseInteger(final String text,
1390
                                   final boolean isSigned,
1391
                                   final boolean isLong)
1392
                                   throws NumberFormatException {
1393
    int pos = 0;
1394
 
1395
    boolean negative = false;
1396
    if (text.startsWith("-", pos)) {
1397
      if (!isSigned) {
1398
        throw new NumberFormatException("Number must be positive: " + text);
1399
      }
1400
      ++pos;
1401
      negative = true;
1402
    }
1403
 
1404
    int radix = 10;
1405
    if (text.startsWith("0x", pos)) {
1406
      pos += 2;
1407
      radix = 16;
1408
    } else if (text.startsWith("0", pos)) {
1409
      radix = 8;
1410
    }
1411
 
1412
    final String numberText = text.substring(pos);
1413
 
1414
    long result = 0;
1415
    if (numberText.length() < 16) {
1416
      // Can safely assume no overflow.
1417
      result = Long.parseLong(numberText, radix);
1418
      if (negative) {
1419
        result = -result;
1420
      }
1421
 
1422
      // Check bounds.
1423
      // No need to check for 64-bit numbers since they'd have to be 16 chars
1424
      // or longer to overflow.
1425
      if (!isLong) {
1426
        if (isSigned) {
1427
          if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) {
1428
            throw new NumberFormatException(
1429
              "Number out of range for 32-bit signed integer: " + text);
1430
          }
1431
        } else {
1432
          if (result >= (1L << 32) || result < 0) {
1433
            throw new NumberFormatException(
1434
              "Number out of range for 32-bit unsigned integer: " + text);
1435
          }
1436
        }
1437
      }
1438
    } else {
1439
      BigInteger bigValue = new BigInteger(numberText, radix);
1440
      if (negative) {
1441
        bigValue = bigValue.negate();
1442
      }
1443
 
1444
      // Check bounds.
1445
      if (!isLong) {
1446
        if (isSigned) {
1447
          if (bigValue.bitLength() > 31) {
1448
            throw new NumberFormatException(
1449
              "Number out of range for 32-bit signed integer: " + text);
1450
          }
1451
        } else {
1452
          if (bigValue.bitLength() > 32) {
1453
            throw new NumberFormatException(
1454
              "Number out of range for 32-bit unsigned integer: " + text);
1455
          }
1456
        }
1457
      } else {
1458
        if (isSigned) {
1459
          if (bigValue.bitLength() > 63) {
1460
            throw new NumberFormatException(
1461
              "Number out of range for 64-bit signed integer: " + text);
1462
          }
1463
        } else {
1464
          if (bigValue.bitLength() > 64) {
1465
            throw new NumberFormatException(
1466
              "Number out of range for 64-bit unsigned integer: " + text);
1467
          }
1468
        }
1469
      }
1470
 
1471
      result = bigValue.longValue();
1472
    }
1473
 
1474
    return result;
1475
  }
1476
}