| 87 |
Kevin |
1 |
// Protocol Buffers - Google's data interchange format
|
|
|
2 |
// Copyright 2008 Google Inc. All rights reserved.
|
|
|
3 |
// http://code.google.com/p/protobuf/
|
|
|
4 |
//
|
|
|
5 |
// Redistribution and use in source and binary forms, with or without
|
|
|
6 |
// modification, are permitted provided that the following conditions are
|
|
|
7 |
// met:
|
|
|
8 |
//
|
|
|
9 |
// * Redistributions of source code must retain the above copyright
|
|
|
10 |
// notice, this list of conditions and the following disclaimer.
|
|
|
11 |
// * Redistributions in binary form must reproduce the above
|
|
|
12 |
// copyright notice, this list of conditions and the following disclaimer
|
|
|
13 |
// in the documentation and/or other materials provided with the
|
|
|
14 |
// distribution.
|
|
|
15 |
// * Neither the name of Google Inc. nor the names of its
|
|
|
16 |
// contributors may be used to endorse or promote products derived from
|
|
|
17 |
// this software without specific prior written permission.
|
|
|
18 |
//
|
|
|
19 |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
20 |
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
21 |
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
22 |
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
23 |
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
24 |
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
25 |
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
26 |
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
27 |
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
28 |
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
29 |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
30 |
|
|
|
31 |
package com.google.protobuf;
|
|
|
32 |
|
|
|
33 |
import java.io.UnsupportedEncodingException;
|
|
|
34 |
|
|
|
35 |
/**
|
|
|
36 |
* The classes contained within are used internally by the Protocol Buffer
|
|
|
37 |
* library and generated message implementations. They are public only because
|
|
|
38 |
* those generated messages do not reside in the {@code protobuf} package.
|
|
|
39 |
* Others should not use this class directly.
|
|
|
40 |
*
|
|
|
41 |
* @author kenton@google.com (Kenton Varda)
|
|
|
42 |
*/
|
|
|
43 |
public class Internal {
|
|
|
44 |
/**
|
|
|
45 |
* Helper called by generated code to construct default values for string
|
|
|
46 |
* fields.
|
|
|
47 |
* <p>
|
|
|
48 |
* The protocol compiler does not actually contain a UTF-8 decoder -- it
|
|
|
49 |
* just pushes UTF-8-encoded text around without touching it. The one place
|
|
|
50 |
* where this presents a problem is when generating Java string literals.
|
|
|
51 |
* Unicode characters in the string literal would normally need to be encoded
|
|
|
52 |
* using a Unicode escape sequence, which would require decoding them.
|
|
|
53 |
* To get around this, protoc instead embeds the UTF-8 bytes into the
|
|
|
54 |
* generated code and leaves it to the runtime library to decode them.
|
|
|
55 |
* <p>
|
|
|
56 |
* It gets worse, though. If protoc just generated a byte array, like:
|
|
|
57 |
* new byte[] {0x12, 0x34, 0x56, 0x78}
|
|
|
58 |
* Java actually generates *code* which allocates an array and then fills
|
|
|
59 |
* in each value. This is much less efficient than just embedding the bytes
|
|
|
60 |
* directly into the bytecode. To get around this, we need another
|
|
|
61 |
* work-around. String literals are embedded directly, so protoc actually
|
|
|
62 |
* generates a string literal corresponding to the bytes. The easiest way
|
|
|
63 |
* to do this is to use the ISO-8859-1 character set, which corresponds to
|
|
|
64 |
* the first 256 characters of the Unicode range. Protoc can then use
|
|
|
65 |
* good old CEscape to generate the string.
|
|
|
66 |
* <p>
|
|
|
67 |
* So we have a string literal which represents a set of bytes which
|
|
|
68 |
* represents another string. This function -- stringDefaultValue --
|
|
|
69 |
* converts from the generated string to the string we actually want. The
|
|
|
70 |
* generated code calls this automatically.
|
|
|
71 |
*/
|
|
|
72 |
public static String stringDefaultValue(String bytes) {
|
|
|
73 |
try {
|
|
|
74 |
return new String(bytes.getBytes("ISO-8859-1"), "UTF-8");
|
|
|
75 |
} catch (UnsupportedEncodingException e) {
|
|
|
76 |
// This should never happen since all JVMs are required to implement
|
|
|
77 |
// both of the above character sets.
|
|
|
78 |
throw new IllegalStateException(
|
|
|
79 |
"Java VM does not support a standard character set.", e);
|
|
|
80 |
}
|
|
|
81 |
}
|
|
|
82 |
|
|
|
83 |
/**
|
|
|
84 |
* Helper called by generated code to construct default values for bytes
|
|
|
85 |
* fields.
|
|
|
86 |
* <p>
|
|
|
87 |
* This is a lot like {@link #stringDefaultValue}, but for bytes fields.
|
|
|
88 |
* In this case we only need the second of the two hacks -- allowing us to
|
|
|
89 |
* embed raw bytes as a string literal with ISO-8859-1 encoding.
|
|
|
90 |
*/
|
|
|
91 |
public static ByteString bytesDefaultValue(String bytes) {
|
|
|
92 |
try {
|
|
|
93 |
return ByteString.copyFrom(bytes.getBytes("ISO-8859-1"));
|
|
|
94 |
} catch (UnsupportedEncodingException e) {
|
|
|
95 |
// This should never happen since all JVMs are required to implement
|
|
|
96 |
// ISO-8859-1.
|
|
|
97 |
throw new IllegalStateException(
|
|
|
98 |
"Java VM does not support a standard character set.", e);
|
|
|
99 |
}
|
|
|
100 |
}
|
|
|
101 |
|
|
|
102 |
/**
|
|
|
103 |
* Helper called by generated code to determine if a byte array is a valid
|
|
|
104 |
* UTF-8 encoded string such that the original bytes can be converted to
|
|
|
105 |
* a String object and then back to a byte array round tripping the bytes
|
|
|
106 |
* without loss.
|
|
|
107 |
* <p>
|
|
|
108 |
* This is inspired by UTF_8.java in sun.nio.cs.
|
|
|
109 |
*
|
|
|
110 |
* @param byteString the string to check
|
|
|
111 |
* @return whether the byte array is round trippable
|
|
|
112 |
*/
|
|
|
113 |
public static boolean isValidUtf8(ByteString byteString) {
|
|
|
114 |
int index = 0;
|
|
|
115 |
int size = byteString.size();
|
|
|
116 |
// To avoid the masking, we could change this to use bytes;
|
|
|
117 |
// Then X > 0xC2 gets turned into X < -0xC2; X < 0x80
|
|
|
118 |
// gets turned into X >= 0, etc.
|
|
|
119 |
|
|
|
120 |
while (index < size) {
|
|
|
121 |
int byte1 = byteString.byteAt(index++) & 0xFF;
|
|
|
122 |
if (byte1 < 0x80) {
|
|
|
123 |
// fast loop for single bytes
|
|
|
124 |
continue;
|
|
|
125 |
|
|
|
126 |
// we know from this point on that we have 2-4 byte forms
|
|
|
127 |
} else if (byte1 < 0xC2 || byte1 > 0xF4) {
|
|
|
128 |
// catch illegal first bytes: < C2 or > F4
|
|
|
129 |
return false;
|
|
|
130 |
}
|
|
|
131 |
if (index >= size) {
|
|
|
132 |
// fail if we run out of bytes
|
|
|
133 |
return false;
|
|
|
134 |
}
|
|
|
135 |
int byte2 = byteString.byteAt(index++) & 0xFF;
|
|
|
136 |
if (byte2 < 0x80 || byte2 > 0xBF) {
|
|
|
137 |
// general trail-byte test
|
|
|
138 |
return false;
|
|
|
139 |
}
|
|
|
140 |
if (byte1 <= 0xDF) {
|
|
|
141 |
// two-byte form; general trail-byte test is sufficient
|
|
|
142 |
continue;
|
|
|
143 |
}
|
|
|
144 |
|
|
|
145 |
// we know from this point on that we have 3 or 4 byte forms
|
|
|
146 |
if (index >= size) {
|
|
|
147 |
// fail if we run out of bytes
|
|
|
148 |
return false;
|
|
|
149 |
}
|
|
|
150 |
int byte3 = byteString.byteAt(index++) & 0xFF;
|
|
|
151 |
if (byte3 < 0x80 || byte3 > 0xBF) {
|
|
|
152 |
// general trail-byte test
|
|
|
153 |
return false;
|
|
|
154 |
}
|
|
|
155 |
if (byte1 <= 0xEF) {
|
|
|
156 |
// three-byte form. Vastly more frequent than four-byte forms
|
|
|
157 |
// The following has an extra test, but not worth restructuring
|
|
|
158 |
if (byte1 == 0xE0 && byte2 < 0xA0 ||
|
|
|
159 |
byte1 == 0xED && byte2 > 0x9F) {
|
|
|
160 |
// check special cases of byte2
|
|
|
161 |
return false;
|
|
|
162 |
}
|
|
|
163 |
|
|
|
164 |
} else {
|
|
|
165 |
// four-byte form
|
|
|
166 |
|
|
|
167 |
if (index >= size) {
|
|
|
168 |
// fail if we run out of bytes
|
|
|
169 |
return false;
|
|
|
170 |
}
|
|
|
171 |
int byte4 = byteString.byteAt(index++) & 0xFF;
|
|
|
172 |
if (byte4 < 0x80 || byte4 > 0xBF) {
|
|
|
173 |
// general trail-byte test
|
|
|
174 |
return false;
|
|
|
175 |
}
|
|
|
176 |
// The following has an extra test, but not worth restructuring
|
|
|
177 |
if (byte1 == 0xF0 && byte2 < 0x90 ||
|
|
|
178 |
byte1 == 0xF4 && byte2 > 0x8F) {
|
|
|
179 |
// check special cases of byte2
|
|
|
180 |
return false;
|
|
|
181 |
}
|
|
|
182 |
}
|
|
|
183 |
}
|
|
|
184 |
return true;
|
|
|
185 |
}
|
|
|
186 |
|
|
|
187 |
/**
|
|
|
188 |
* Interface for an enum value or value descriptor, to be used in FieldSet.
|
|
|
189 |
* The lite library stores enum values directly in FieldSets but the full
|
|
|
190 |
* library stores EnumValueDescriptors in order to better support reflection.
|
|
|
191 |
*/
|
|
|
192 |
public interface EnumLite {
|
|
|
193 |
int getNumber();
|
|
|
194 |
}
|
|
|
195 |
|
|
|
196 |
/**
|
|
|
197 |
* Interface for an object which maps integers to {@link EnumLite}s.
|
|
|
198 |
* {@link Descriptors.EnumDescriptor} implements this interface by mapping
|
|
|
199 |
* numbers to {@link Descriptors.EnumValueDescriptor}s. Additionally,
|
|
|
200 |
* every generated enum type has a static method internalGetValueMap() which
|
|
|
201 |
* returns an implementation of this type that maps numbers to enum values.
|
|
|
202 |
*/
|
|
|
203 |
public interface EnumLiteMap<T extends EnumLite> {
|
|
|
204 |
T findValueByNumber(int number);
|
|
|
205 |
}
|
|
|
206 |
}
|