Blame | Last modification | View Log | Download | RSS feed
// Protocol Buffers - Google's data interchange format// Copyright 2008 Google Inc. All rights reserved.// http://code.google.com/p/protobuf///// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are// met://// * Redistributions of source code must retain the above copyright// notice, this list of conditions and the following disclaimer.// * Redistributions in binary form must reproduce the above// copyright notice, this list of conditions and the following disclaimer// in the documentation and/or other materials provided with the// distribution.// * Neither the name of Google Inc. nor the names of its// contributors may be used to endorse or promote products derived from// this software without specific prior written permission.//// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.package com.google.protobuf;import com.google.protobuf.Descriptors.Descriptor;import com.google.protobuf.Descriptors.FieldDescriptor;import com.google.protobuf.Descriptors.EnumDescriptor;import com.google.protobuf.Descriptors.EnumValueDescriptor;import java.io.IOException;import java.nio.CharBuffer;import java.math.BigInteger;import java.util.ArrayList;import java.util.List;import java.util.Locale;import java.util.Map;import java.util.regex.Matcher;import java.util.regex.Pattern;/*** Provide text parsing and formatting support for proto2 instances.* The implementation largely follows google/protobuf/text_format.cc.** @author wenboz@google.com Wenbo Zhu* @author kenton@google.com Kenton Varda*/public final class TextFormat {private TextFormat() {}private static final Printer DEFAULT_PRINTER = new Printer(false);private static final Printer SINGLE_LINE_PRINTER = new Printer(true);/*** Outputs a textual representation of the Protocol Message supplied into* the parameter output. (This representation is the new version of the* classic "ProtocolPrinter" output from the original Protocol Buffer system)*/public static void print(final Message message, final Appendable output)throws IOException {DEFAULT_PRINTER.print(message, new TextGenerator(output));}/** Outputs a textual representation of {@code fields} to {@code output}. */public static void print(final UnknownFieldSet fields,final Appendable output)throws IOException {DEFAULT_PRINTER.printUnknownFields(fields, new TextGenerator(output));}/*** Generates a human readable form of this message, useful for debugging and* other purposes, with no newline characters.*/public static String shortDebugString(final Message message) {try {final StringBuilder sb = new StringBuilder();SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb));// Single line mode currently might have an extra space at the end.return sb.toString().trim();} catch (IOException e) {throw new IllegalStateException(e);}}/*** Generates a human readable form of the unknown fields, useful for debugging* and other purposes, with no newline characters.*/public static String shortDebugString(final UnknownFieldSet fields) {try {final StringBuilder sb = new StringBuilder();SINGLE_LINE_PRINTER.printUnknownFields(fields, new TextGenerator(sb));// Single line mode currently might have an extra space at the end.return sb.toString().trim();} catch (IOException e) {throw new IllegalStateException(e);}}/*** Like {@code print()}, but writes directly to a {@code String} and* returns it.*/public static String printToString(final Message message) {try {final StringBuilder text = new StringBuilder();print(message, text);return text.toString();} catch (IOException e) {throw new IllegalStateException(e);}}/*** Like {@code print()}, but writes directly to a {@code String} and* returns it.*/public static String printToString(final UnknownFieldSet fields) {try {final StringBuilder text = new StringBuilder();print(fields, text);return text.toString();} catch (IOException e) {throw new IllegalStateException(e);}}public static void printField(final FieldDescriptor field,final Object value,final Appendable output)throws IOException {DEFAULT_PRINTER.printField(field, value, new TextGenerator(output));}public static String printFieldToString(final FieldDescriptor field,final Object value) {try {final StringBuilder text = new StringBuilder();printField(field, value, text);return text.toString();} catch (IOException e) {throw new IllegalStateException(e);}}/*** Outputs a textual representation of the value of given field value.** @param field the descriptor of the field* @param value the value of the field* @param output the output to which to append the formatted value* @throws ClassCastException if the value is not appropriate for the* given field descriptor* @throws IOException if there is an exception writing to the output*/public static void printFieldValue(final FieldDescriptor field,final Object value,final Appendable output)throws IOException {DEFAULT_PRINTER.printFieldValue(field, value, new TextGenerator(output));}/*** Outputs a textual representation of the value of an unknown field.** @param tag the field's tag number* @param value the value of the field* @param output the output to which to append the formatted value* @throws ClassCastException if the value is not appropriate for the* given field descriptor* @throws IOException if there is an exception writing to the output*/public static void printUnknownFieldValue(final int tag,final Object value,final Appendable output)throws IOException {printUnknownFieldValue(tag, value, new TextGenerator(output));}private static void printUnknownFieldValue(final int tag,final Object value,final TextGenerator generator)throws IOException {switch (WireFormat.getTagWireType(tag)) {case WireFormat.WIRETYPE_VARINT:generator.print(unsignedToString((Long) value));break;case WireFormat.WIRETYPE_FIXED32:generator.print(String.format((Locale) null, "0x%08x", (Integer) value));break;case WireFormat.WIRETYPE_FIXED64:generator.print(String.format((Locale) null, "0x%016x", (Long) value));break;case WireFormat.WIRETYPE_LENGTH_DELIMITED:generator.print("\"");generator.print(escapeBytes((ByteString) value));generator.print("\"");break;case WireFormat.WIRETYPE_START_GROUP:DEFAULT_PRINTER.printUnknownFields((UnknownFieldSet) value, generator);break;default:throw new IllegalArgumentException("Bad tag: " + tag);}}/** Helper class for converting protobufs to text. */private static final class Printer {/** Whether to omit newlines from the output. */final boolean singleLineMode;private Printer(final boolean singleLineMode) {this.singleLineMode = singleLineMode;}private void print(final Message message, final TextGenerator generator)throws IOException {for (Map.Entry<FieldDescriptor, Object> field: message.getAllFields().entrySet()) {printField(field.getKey(), field.getValue(), generator);}printUnknownFields(message.getUnknownFields(), generator);}private void printField(final FieldDescriptor field, final Object value,final TextGenerator generator) throws IOException {if (field.isRepeated()) {// Repeated field. Print each element.for (Object element : (List<?>) value) {printSingleField(field, element, generator);}} else {printSingleField(field, value, generator);}}private void printSingleField(final FieldDescriptor field,final Object value,final TextGenerator generator)throws IOException {if (field.isExtension()) {generator.print("[");// We special-case MessageSet elements for compatibility with proto1.if (field.getContainingType().getOptions().getMessageSetWireFormat()&& (field.getType() == FieldDescriptor.Type.MESSAGE)&& (field.isOptional())// object equality&& (field.getExtensionScope() == field.getMessageType())) {generator.print(field.getMessageType().getFullName());} else {generator.print(field.getFullName());}generator.print("]");} else {if (field.getType() == FieldDescriptor.Type.GROUP) {// Groups must be serialized with their original capitalization.generator.print(field.getMessageType().getName());} else {generator.print(field.getName());}}if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {if (singleLineMode) {generator.print(" { ");} else {generator.print(" {\n");generator.indent();}} else {generator.print(": ");}printFieldValue(field, value, generator);if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {if (singleLineMode) {generator.print("} ");} else {generator.outdent();generator.print("}\n");}} else {if (singleLineMode) {generator.print(" ");} else {generator.print("\n");}}}private void printFieldValue(final FieldDescriptor field,final Object value,final TextGenerator generator)throws IOException {switch (field.getType()) {case INT32:case SINT32:case SFIXED32:generator.print(((Integer) value).toString());break;case INT64:case SINT64:case SFIXED64:generator.print(((Long) value).toString());break;case BOOL:generator.print(((Boolean) value).toString());break;case FLOAT:generator.print(((Float) value).toString());break;case DOUBLE:generator.print(((Double) value).toString());break;case UINT32:case FIXED32:generator.print(unsignedToString((Integer) value));break;case UINT64:case FIXED64:generator.print(unsignedToString((Long) value));break;case STRING:generator.print("\"");generator.print(escapeText((String) value));generator.print("\"");break;case BYTES:generator.print("\"");generator.print(escapeBytes((ByteString) value));generator.print("\"");break;case ENUM:generator.print(((EnumValueDescriptor) value).getName());break;case MESSAGE:case GROUP:print((Message) value, generator);break;}}private void printUnknownFields(final UnknownFieldSet unknownFields,final TextGenerator generator)throws IOException {for (Map.Entry<Integer, UnknownFieldSet.Field> entry :unknownFields.asMap().entrySet()) {final int number = entry.getKey();final UnknownFieldSet.Field field = entry.getValue();printUnknownField(number, WireFormat.WIRETYPE_VARINT,field.getVarintList(), generator);printUnknownField(number, WireFormat.WIRETYPE_FIXED32,field.getFixed32List(), generator);printUnknownField(number, WireFormat.WIRETYPE_FIXED64,field.getFixed64List(), generator);printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED,field.getLengthDelimitedList(), generator);for (final UnknownFieldSet value : field.getGroupList()) {generator.print(entry.getKey().toString());if (singleLineMode) {generator.print(" { ");} else {generator.print(" {\n");generator.indent();}printUnknownFields(value, generator);if (singleLineMode) {generator.print("} ");} else {generator.outdent();generator.print("}\n");}}}}private void printUnknownField(final int number,final int wireType,final List<?> values,final TextGenerator generator)throws IOException {for (final Object value : values) {generator.print(String.valueOf(number));generator.print(": ");printUnknownFieldValue(wireType, value, generator);generator.print(singleLineMode ? " " : "\n");}}}/** Convert an unsigned 32-bit integer to a string. */private static String unsignedToString(final int value) {if (value >= 0) {return Integer.toString(value);} else {return Long.toString(((long) value) & 0x00000000FFFFFFFFL);}}/** Convert an unsigned 64-bit integer to a string. */private static String unsignedToString(final long value) {if (value >= 0) {return Long.toString(value);} else {// Pull off the most-significant bit so that BigInteger doesn't think// the number is negative, then set it again using setBit().return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL).setBit(63).toString();}}/*** An inner class for writing text to the output stream.*/private static final class TextGenerator {private final Appendable output;private final StringBuilder indent = new StringBuilder();private boolean atStartOfLine = true;private TextGenerator(final Appendable output) {this.output = output;}/*** Indent text by two spaces. After calling Indent(), two spaces will be* inserted at the beginning of each line of text. Indent() may be called* multiple times to produce deeper indents.*/public void indent() {indent.append(" ");}/*** Reduces the current indent level by two spaces, or crashes if the indent* level is zero.*/public void outdent() {final int length = indent.length();if (length == 0) {throw new IllegalArgumentException(" Outdent() without matching Indent().");}indent.delete(length - 2, length);}/*** Print text to the output stream.*/public void print(final CharSequence text) throws IOException {final int size = text.length();int pos = 0;for (int i = 0; i < size; i++) {if (text.charAt(i) == '\n') {write(text.subSequence(pos, size), i - pos + 1);pos = i + 1;atStartOfLine = true;}}write(text.subSequence(pos, size), size - pos);}private void write(final CharSequence data, final int size)throws IOException {if (size == 0) {return;}if (atStartOfLine) {atStartOfLine = false;output.append(indent);}output.append(data);}}// =================================================================// Parsing/*** Represents a stream of tokens parsed from a {@code String}.** <p>The Java standard library provides many classes that you might think* would be useful for implementing this, but aren't. For example:** <ul>* <li>{@code java.io.StreamTokenizer}: This almost does what we want -- or,* at least, something that would get us close to what we want -- except* for one fatal flaw: It automatically un-escapes strings using Java* escape sequences, which do not include all the escape sequences we* need to support (e.g. '\x').* <li>{@code java.util.Scanner}: This seems like a great way at least to* parse regular expressions out of a stream (so we wouldn't have to load* the entire input into a single string before parsing). Sadly,* {@code Scanner} requires that tokens be delimited with some delimiter.* Thus, although the text "foo:" should parse to two tokens ("foo" and* ":"), {@code Scanner} would recognize it only as a single token.* Furthermore, {@code Scanner} provides no way to inspect the contents* of delimiters, making it impossible to keep track of line and column* numbers.* </ul>** <p>Luckily, Java's regular expression support does manage to be useful to* us. (Barely: We need {@code Matcher.usePattern()}, which is new in* Java 1.5.) So, we can use that, at least. Unfortunately, this implies* that we need to have the entire input in one contiguous string.*/private static final class Tokenizer {private final CharSequence text;private final Matcher matcher;private String currentToken;// The character index within this.text at which the current token begins.private int pos = 0;// The line and column numbers of the current token.private int line = 0;private int column = 0;// The line and column numbers of the previous token (allows throwing// errors *after* consuming).private int previousLine = 0;private int previousColumn = 0;// We use possesive quantifiers (*+ and ++) because otherwise the Java// regex matcher has stack overflows on large inputs.private static final Pattern WHITESPACE =Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);private static final Pattern TOKEN = Pattern.compile("[a-zA-Z_][0-9a-zA-Z_+-]*+|" + // an identifier"[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" + // a number"\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" + // a double-quoted string"\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted stringPattern.MULTILINE);private static final Pattern DOUBLE_INFINITY = Pattern.compile("-?inf(inity)?",Pattern.CASE_INSENSITIVE);private static final Pattern FLOAT_INFINITY = Pattern.compile("-?inf(inity)?f?",Pattern.CASE_INSENSITIVE);private static final Pattern FLOAT_NAN = Pattern.compile("nanf?",Pattern.CASE_INSENSITIVE);/** Construct a tokenizer that parses tokens from the given text. */private Tokenizer(final CharSequence text) {this.text = text;this.matcher = WHITESPACE.matcher(text);skipWhitespace();nextToken();}/** Are we at the end of the input? */public boolean atEnd() {return currentToken.length() == 0;}/** Advance to the next token. */public void nextToken() {previousLine = line;previousColumn = column;// Advance the line counter to the current position.while (pos < matcher.regionStart()) {if (text.charAt(pos) == '\n') {++line;column = 0;} else {++column;}++pos;}// Match the next token.if (matcher.regionStart() == matcher.regionEnd()) {// EOFcurrentToken = "";} else {matcher.usePattern(TOKEN);if (matcher.lookingAt()) {currentToken = matcher.group();matcher.region(matcher.end(), matcher.regionEnd());} else {// Take one character.currentToken = String.valueOf(text.charAt(pos));matcher.region(pos + 1, matcher.regionEnd());}skipWhitespace();}}/*** Skip over any whitespace so that the matcher region starts at the next* token.*/private void skipWhitespace() {matcher.usePattern(WHITESPACE);if (matcher.lookingAt()) {matcher.region(matcher.end(), matcher.regionEnd());}}/*** If the next token exactly matches {@code token}, consume it and return* {@code true}. Otherwise, return {@code false} without doing anything.*/public boolean tryConsume(final String token) {if (currentToken.equals(token)) {nextToken();return true;} else {return false;}}/*** If the next token exactly matches {@code token}, consume it. Otherwise,* throw a {@link ParseException}.*/public void consume(final String token) throws ParseException {if (!tryConsume(token)) {throw parseException("Expected \"" + token + "\".");}}/*** Returns {@code true} if the next token is an integer, but does* not consume it.*/public boolean lookingAtInteger() {if (currentToken.length() == 0) {return false;}final char c = currentToken.charAt(0);return ('0' <= c && c <= '9') ||c == '-' || c == '+';}/*** If the next token is an identifier, consume it and return its value.* Otherwise, throw a {@link ParseException}.*/public String consumeIdentifier() throws ParseException {for (int i = 0; i < currentToken.length(); i++) {final char c = currentToken.charAt(i);if (('a' <= c && c <= 'z') ||('A' <= c && c <= 'Z') ||('0' <= c && c <= '9') ||(c == '_') || (c == '.')) {// OK} else {throw parseException("Expected identifier.");}}final String result = currentToken;nextToken();return result;}/*** If the next token is a 32-bit signed integer, consume it and return its* value. Otherwise, throw a {@link ParseException}.*/public int consumeInt32() throws ParseException {try {final int result = parseInt32(currentToken);nextToken();return result;} catch (NumberFormatException e) {throw integerParseException(e);}}/*** If the next token is a 32-bit unsigned integer, consume it and return its* value. Otherwise, throw a {@link ParseException}.*/public int consumeUInt32() throws ParseException {try {final int result = parseUInt32(currentToken);nextToken();return result;} catch (NumberFormatException e) {throw integerParseException(e);}}/*** If the next token is a 64-bit signed integer, consume it and return its* value. Otherwise, throw a {@link ParseException}.*/public long consumeInt64() throws ParseException {try {final long result = parseInt64(currentToken);nextToken();return result;} catch (NumberFormatException e) {throw integerParseException(e);}}/*** If the next token is a 64-bit unsigned integer, consume it and return its* value. Otherwise, throw a {@link ParseException}.*/public long consumeUInt64() throws ParseException {try {final long result = parseUInt64(currentToken);nextToken();return result;} catch (NumberFormatException e) {throw integerParseException(e);}}/*** If the next token is a double, consume it and return its value.* Otherwise, throw a {@link ParseException}.*/public double consumeDouble() throws ParseException {// We need to parse infinity and nan separately because// Double.parseDouble() does not accept "inf", "infinity", or "nan".if (DOUBLE_INFINITY.matcher(currentToken).matches()) {final boolean negative = currentToken.startsWith("-");nextToken();return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;}if (currentToken.equalsIgnoreCase("nan")) {nextToken();return Double.NaN;}try {final double result = Double.parseDouble(currentToken);nextToken();return result;} catch (NumberFormatException e) {throw floatParseException(e);}}/*** If the next token is a float, consume it and return its value.* Otherwise, throw a {@link ParseException}.*/public float consumeFloat() throws ParseException {// We need to parse infinity and nan separately because// Float.parseFloat() does not accept "inf", "infinity", or "nan".if (FLOAT_INFINITY.matcher(currentToken).matches()) {final boolean negative = currentToken.startsWith("-");nextToken();return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;}if (FLOAT_NAN.matcher(currentToken).matches()) {nextToken();return Float.NaN;}try {final float result = Float.parseFloat(currentToken);nextToken();return result;} catch (NumberFormatException e) {throw floatParseException(e);}}/*** If the next token is a boolean, consume it and return its value.* Otherwise, throw a {@link ParseException}.*/public boolean consumeBoolean() throws ParseException {if (currentToken.equals("true") ||currentToken.equals("t") ||currentToken.equals("1")) {nextToken();return true;} else if (currentToken.equals("false") ||currentToken.equals("f") ||currentToken.equals("0")) {nextToken();return false;} else {throw parseException("Expected \"true\" or \"false\".");}}/*** If the next token is a string, consume it and return its (unescaped)* value. Otherwise, throw a {@link ParseException}.*/public String consumeString() throws ParseException {return consumeByteString().toStringUtf8();}/*** If the next token is a string, consume it, unescape it as a* {@link ByteString}, and return it. Otherwise, throw a* {@link ParseException}.*/public ByteString consumeByteString() throws ParseException {List<ByteString> list = new ArrayList<ByteString>();consumeByteString(list);while (currentToken.startsWith("'") || currentToken.startsWith("\"")) {consumeByteString(list);}return ByteString.copyFrom(list);}/*** Like {@link #consumeByteString()} but adds each token of the string to* the given list. String literals (whether bytes or text) may come in* multiple adjacent tokens which are automatically concatenated, like in* C or Python.*/private void consumeByteString(List<ByteString> list) throws ParseException {final char quote = currentToken.length() > 0 ? currentToken.charAt(0): '\0';if (quote != '\"' && quote != '\'') {throw parseException("Expected string.");}if (currentToken.length() < 2 ||currentToken.charAt(currentToken.length() - 1) != quote) {throw parseException("String missing ending quote.");}try {final String escaped =currentToken.substring(1, currentToken.length() - 1);final ByteString result = unescapeBytes(escaped);nextToken();list.add(result);} catch (InvalidEscapeSequenceException e) {throw parseException(e.getMessage());}}/*** Returns a {@link ParseException} with the current line and column* numbers in the description, suitable for throwing.*/public ParseException parseException(final String description) {// Note: People generally prefer one-based line and column numbers.return new ParseException((line + 1) + ":" + (column + 1) + ": " + description);}/*** Returns a {@link ParseException} with the line and column numbers of* the previous token in the description, suitable for throwing.*/public ParseException parseExceptionPreviousToken(final String description) {// Note: People generally prefer one-based line and column numbers.return new ParseException((previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);}/*** Constructs an appropriate {@link ParseException} for the given* {@code NumberFormatException} when trying to parse an integer.*/private ParseException integerParseException(final NumberFormatException e) {return parseException("Couldn't parse integer: " + e.getMessage());}/*** Constructs an appropriate {@link ParseException} for the given* {@code NumberFormatException} when trying to parse a float or double.*/private ParseException floatParseException(final NumberFormatException e) {return parseException("Couldn't parse number: " + e.getMessage());}}/** Thrown when parsing an invalid text format message. */public static class ParseException extends IOException {private static final long serialVersionUID = 3196188060225107702L;public ParseException(final String message) {super(message);}}/*** Parse a text-format message from {@code input} and merge the contents* into {@code builder}.*/public static void merge(final Readable input,final Message.Builder builder)throws IOException {merge(input, ExtensionRegistry.getEmptyRegistry(), builder);}/*** Parse a text-format message from {@code input} and merge the contents* into {@code builder}.*/public static void merge(final CharSequence input,final Message.Builder builder)throws ParseException {merge(input, ExtensionRegistry.getEmptyRegistry(), builder);}/*** Parse a text-format message from {@code input} and merge the contents* into {@code builder}. Extensions will be recognized if they are* registered in {@code extensionRegistry}.*/public static void merge(final Readable input,final ExtensionRegistry extensionRegistry,final Message.Builder builder)throws IOException {// Read the entire input to a String then parse that.// If StreamTokenizer were not quite so crippled, or if there were a kind// of Reader that could read in chunks that match some particular regex,// or if we wanted to write a custom Reader to tokenize our stream, then// we would not have to read to one big String. Alas, none of these is// the case. Oh well.merge(toStringBuilder(input), extensionRegistry, builder);}private static final int BUFFER_SIZE = 4096;// TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)// overhead is worthwhileprivate static StringBuilder toStringBuilder(final Readable input)throws IOException {final StringBuilder text = new StringBuilder();final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);while (true) {final int n = input.read(buffer);if (n == -1) {break;}buffer.flip();text.append(buffer, 0, n);}return text;}/*** Parse a text-format message from {@code input} and merge the contents* into {@code builder}. Extensions will be recognized if they are* registered in {@code extensionRegistry}.*/public static void merge(final CharSequence input,final ExtensionRegistry extensionRegistry,final Message.Builder builder)throws ParseException {final Tokenizer tokenizer = new Tokenizer(input);while (!tokenizer.atEnd()) {mergeField(tokenizer, extensionRegistry, builder);}}/*** Parse a single field from {@code tokenizer} and merge it into* {@code builder}.*/private static void mergeField(final Tokenizer tokenizer,final ExtensionRegistry extensionRegistry,final Message.Builder builder)throws ParseException {FieldDescriptor field;final Descriptor type = builder.getDescriptorForType();ExtensionRegistry.ExtensionInfo extension = null;if (tokenizer.tryConsume("[")) {// An extension.final StringBuilder name =new StringBuilder(tokenizer.consumeIdentifier());while (tokenizer.tryConsume(".")) {name.append('.');name.append(tokenizer.consumeIdentifier());}extension = extensionRegistry.findExtensionByName(name.toString());if (extension == null) {throw tokenizer.parseExceptionPreviousToken("Extension \"" + name + "\" not found in the ExtensionRegistry.");} else if (extension.descriptor.getContainingType() != type) {throw tokenizer.parseExceptionPreviousToken("Extension \"" + name + "\" does not extend message type \"" +type.getFullName() + "\".");}tokenizer.consume("]");field = extension.descriptor;} else {final String name = tokenizer.consumeIdentifier();field = type.findFieldByName(name);// Group names are expected to be capitalized as they appear in the// .proto file, which actually matches their type names, not their field// names.if (field == null) {// Explicitly specify US locale so that this code does not break when// executing in Turkey.final String lowerName = name.toLowerCase(Locale.US);field = type.findFieldByName(lowerName);// If the case-insensitive match worked but the field is NOT a group,if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {field = null;}}// Again, special-case group names as described above.if (field != null && field.getType() == FieldDescriptor.Type.GROUP &&!field.getMessageType().getName().equals(name)) {field = null;}if (field == null) {throw tokenizer.parseExceptionPreviousToken("Message type \"" + type.getFullName() +"\" has no field named \"" + name + "\".");}}Object value = null;if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {tokenizer.tryConsume(":"); // optionalfinal String endToken;if (tokenizer.tryConsume("<")) {endToken = ">";} else {tokenizer.consume("{");endToken = "}";}final Message.Builder subBuilder;if (extension == null) {subBuilder = builder.newBuilderForField(field);} else {subBuilder = extension.defaultInstance.newBuilderForType();}while (!tokenizer.tryConsume(endToken)) {if (tokenizer.atEnd()) {throw tokenizer.parseException("Expected \"" + endToken + "\".");}mergeField(tokenizer, extensionRegistry, subBuilder);}value = subBuilder.build();} else {tokenizer.consume(":");switch (field.getType()) {case INT32:case SINT32:case SFIXED32:value = tokenizer.consumeInt32();break;case INT64:case SINT64:case SFIXED64:value = tokenizer.consumeInt64();break;case UINT32:case FIXED32:value = tokenizer.consumeUInt32();break;case UINT64:case FIXED64:value = tokenizer.consumeUInt64();break;case FLOAT:value = tokenizer.consumeFloat();break;case DOUBLE:value = tokenizer.consumeDouble();break;case BOOL:value = tokenizer.consumeBoolean();break;case STRING:value = tokenizer.consumeString();break;case BYTES:value = tokenizer.consumeByteString();break;case ENUM:final EnumDescriptor enumType = field.getEnumType();if (tokenizer.lookingAtInteger()) {final int number = tokenizer.consumeInt32();value = enumType.findValueByNumber(number);if (value == null) {throw tokenizer.parseExceptionPreviousToken("Enum type \"" + enumType.getFullName() +"\" has no value with number " + number + '.');}} else {final String id = tokenizer.consumeIdentifier();value = enumType.findValueByName(id);if (value == null) {throw tokenizer.parseExceptionPreviousToken("Enum type \"" + enumType.getFullName() +"\" has no value named \"" + id + "\".");}}break;case MESSAGE:case GROUP:throw new RuntimeException("Can't get here.");}}if (field.isRepeated()) {builder.addRepeatedField(field, value);} else {builder.setField(field, value);}}// =================================================================// Utility functions//// Some of these methods are package-private because Descriptors.java uses// them./*** Escapes bytes in the format used in protocol buffer text format, which* is the same as the format used for C string literals. All bytes* that are not printable 7-bit ASCII characters are escaped, as well as* backslash, single-quote, and double-quote characters. Characters for* which no defined short-hand escape sequence is defined will be escaped* using 3-digit octal sequences.*/static String escapeBytes(final ByteString input) {final StringBuilder builder = new StringBuilder(input.size());for (int i = 0; i < input.size(); i++) {final byte b = input.byteAt(i);switch (b) {// Java does not recognize \a or \v, apparently.case 0x07: builder.append("\\a" ); break;case '\b': builder.append("\\b" ); break;case '\f': builder.append("\\f" ); break;case '\n': builder.append("\\n" ); break;case '\r': builder.append("\\r" ); break;case '\t': builder.append("\\t" ); break;case 0x0b: builder.append("\\v" ); break;case '\\': builder.append("\\\\"); break;case '\'': builder.append("\\\'"); break;case '"' : builder.append("\\\""); break;default:// Note: Bytes with the high-order bit set should be escaped. Since// bytes are signed, such bytes will compare less than 0x20, hence// the following line is correct.if (b >= 0x20) {builder.append((char) b);} else {builder.append('\\');builder.append((char) ('0' + ((b >>> 6) & 3)));builder.append((char) ('0' + ((b >>> 3) & 7)));builder.append((char) ('0' + (b & 7)));}break;}}return builder.toString();}/*** Un-escape a byte sequence as escaped using* {@link #escapeBytes(ByteString)}. Two-digit hex escapes (starting with* "\x") are also recognized.*/static ByteString unescapeBytes(final CharSequence charString)throws InvalidEscapeSequenceException {// First convert the Java characater sequence to UTF-8 bytes.ByteString input = ByteString.copyFromUtf8(charString.toString());// Then unescape certain byte sequences introduced by ASCII '\\'. The valid// escapes can all be expressed with ASCII characters, so it is safe to// operate on bytes here.//// Unescaping the input byte array will result in a byte sequence that's no// longer than the input. That's because each escape sequence is between// two and four bytes long and stands for a single byte.final byte[] result = new byte[input.size()];int pos = 0;for (int i = 0; i < input.size(); i++) {byte c = input.byteAt(i);if (c == '\\') {if (i + 1 < input.size()) {++i;c = input.byteAt(i);if (isOctal(c)) {// Octal escape.int code = digitValue(c);if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {++i;code = code * 8 + digitValue(input.byteAt(i));}if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {++i;code = code * 8 + digitValue(input.byteAt(i));}// TODO: Check that 0 <= code && code <= 0xFF.result[pos++] = (byte)code;} else {switch (c) {case 'a' : result[pos++] = 0x07; break;case 'b' : result[pos++] = '\b'; break;case 'f' : result[pos++] = '\f'; break;case 'n' : result[pos++] = '\n'; break;case 'r' : result[pos++] = '\r'; break;case 't' : result[pos++] = '\t'; break;case 'v' : result[pos++] = 0x0b; break;case '\\': result[pos++] = '\\'; break;case '\'': result[pos++] = '\''; break;case '"' : result[pos++] = '\"'; break;case 'x':// hex escapeint code = 0;if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {++i;code = digitValue(input.byteAt(i));} else {throw new InvalidEscapeSequenceException("Invalid escape sequence: '\\x' with no digits");}if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {++i;code = code * 16 + digitValue(input.byteAt(i));}result[pos++] = (byte)code;break;default:throw new InvalidEscapeSequenceException("Invalid escape sequence: '\\" + (char)c + '\'');}}} else {throw new InvalidEscapeSequenceException("Invalid escape sequence: '\\' at end of string.");}} else {result[pos++] = c;}}return ByteString.copyFrom(result, 0, pos);}/*** Thrown by {@link TextFormat#unescapeBytes} and* {@link TextFormat#unescapeText} when an invalid escape sequence is seen.*/static class InvalidEscapeSequenceException extends IOException {private static final long serialVersionUID = -8164033650142593304L;InvalidEscapeSequenceException(final String description) {super(description);}}/*** Like {@link #escapeBytes(ByteString)}, but escapes a text string.* Non-ASCII characters are first encoded as UTF-8, then each byte is escaped* individually as a 3-digit octal escape. Yes, it's weird.*/static String escapeText(final String input) {return escapeBytes(ByteString.copyFromUtf8(input));}/*** Un-escape a text string as escaped using {@link #escapeText(String)}.* Two-digit hex escapes (starting with "\x") are also recognized.*/static String unescapeText(final String input)throws InvalidEscapeSequenceException {return unescapeBytes(input).toStringUtf8();}/** Is this an octal digit? */private static boolean isOctal(final byte c) {return '0' <= c && c <= '7';}/** Is this a hex digit? */private static boolean isHex(final byte c) {return ('0' <= c && c <= '9') ||('a' <= c && c <= 'f') ||('A' <= c && c <= 'F');}/*** Interpret a character as a digit (in any base up to 36) and return the* numeric value. This is like {@code Character.digit()} but we don't accept* non-ASCII digits.*/private static int digitValue(final byte c) {if ('0' <= c && c <= '9') {return c - '0';} else if ('a' <= c && c <= 'z') {return c - 'a' + 10;} else {return c - 'A' + 10;}}/*** Parse a 32-bit signed integer from the text. Unlike the Java standard* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"* and "0" to signify hexidecimal and octal numbers, respectively.*/static int parseInt32(final String text) throws NumberFormatException {return (int) parseInteger(text, true, false);}/*** Parse a 32-bit unsigned integer from the text. Unlike the Java standard* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"* and "0" to signify hexidecimal and octal numbers, respectively. The* result is coerced to a (signed) {@code int} when returned since Java has* no unsigned integer type.*/static int parseUInt32(final String text) throws NumberFormatException {return (int) parseInteger(text, false, false);}/*** Parse a 64-bit signed integer from the text. Unlike the Java standard* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"* and "0" to signify hexidecimal and octal numbers, respectively.*/static long parseInt64(final String text) throws NumberFormatException {return parseInteger(text, true, true);}/*** Parse a 64-bit unsigned integer from the text. Unlike the Java standard* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"* and "0" to signify hexidecimal and octal numbers, respectively. The* result is coerced to a (signed) {@code long} when returned since Java has* no unsigned long type.*/static long parseUInt64(final String text) throws NumberFormatException {return parseInteger(text, false, true);}private static long parseInteger(final String text,final boolean isSigned,final boolean isLong)throws NumberFormatException {int pos = 0;boolean negative = false;if (text.startsWith("-", pos)) {if (!isSigned) {throw new NumberFormatException("Number must be positive: " + text);}++pos;negative = true;}int radix = 10;if (text.startsWith("0x", pos)) {pos += 2;radix = 16;} else if (text.startsWith("0", pos)) {radix = 8;}final String numberText = text.substring(pos);long result = 0;if (numberText.length() < 16) {// Can safely assume no overflow.result = Long.parseLong(numberText, radix);if (negative) {result = -result;}// Check bounds.// No need to check for 64-bit numbers since they'd have to be 16 chars// or longer to overflow.if (!isLong) {if (isSigned) {if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) {throw new NumberFormatException("Number out of range for 32-bit signed integer: " + text);}} else {if (result >= (1L << 32) || result < 0) {throw new NumberFormatException("Number out of range for 32-bit unsigned integer: " + text);}}}} else {BigInteger bigValue = new BigInteger(numberText, radix);if (negative) {bigValue = bigValue.negate();}// Check bounds.if (!isLong) {if (isSigned) {if (bigValue.bitLength() > 31) {throw new NumberFormatException("Number out of range for 32-bit signed integer: " + text);}} else {if (bigValue.bitLength() > 32) {throw new NumberFormatException("Number out of range for 32-bit unsigned integer: " + text);}}} else {if (isSigned) {if (bigValue.bitLength() > 63) {throw new NumberFormatException("Number out of range for 64-bit signed integer: " + text);}} else {if (bigValue.bitLength() > 64) {throw new NumberFormatException("Number out of range for 64-bit unsigned integer: " + text);}}}result = bigValue.longValue();}return result;}}