OLD | NEW |
1 // -*- mode: C++ -*- | 1 // -*- mode: C++ -*- |
2 | 2 |
3 // Copyright (c) 2010 Google Inc. All Rights Reserved. | 3 // Copyright (c) 2010 Google Inc. All Rights Reserved. |
4 // | 4 // |
5 // Redistribution and use in source and binary forms, with or without | 5 // Redistribution and use in source and binary forms, with or without |
6 // modification, are permitted provided that the following conditions are | 6 // modification, are permitted provided that the following conditions are |
7 // met: | 7 // met: |
8 // | 8 // |
9 // * Redistributions of source code must retain the above copyright | 9 // * Redistributions of source code must retain the above copyright |
10 // notice, this list of conditions and the following disclaimer. | 10 // notice, this list of conditions and the following disclaimer. |
(...skipping 13 matching lines...) Expand all Loading... |
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
30 | 30 |
31 #ifndef COMMON_DWARF_BYTEREADER_H__ | 31 #ifndef COMMON_DWARF_BYTEREADER_H__ |
32 #define COMMON_DWARF_BYTEREADER_H__ | 32 #define COMMON_DWARF_BYTEREADER_H__ |
33 | 33 |
| 34 #include <stdint.h> |
| 35 |
34 #include <string> | 36 #include <string> |
| 37 |
35 #include "common/dwarf/types.h" | 38 #include "common/dwarf/types.h" |
36 #include "common/dwarf/dwarf2enums.h" | 39 #include "common/dwarf/dwarf2enums.h" |
37 | 40 |
38 namespace dwarf2reader { | 41 namespace dwarf2reader { |
39 | 42 |
40 // We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN | 43 // We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN |
41 // because it conflicts with a macro | 44 // because it conflicts with a macro |
42 enum Endianness { | 45 enum Endianness { |
43 ENDIANNESS_BIG, | 46 ENDIANNESS_BIG, |
44 ENDIANNESS_LITTLE | 47 ENDIANNESS_LITTLE |
45 }; | 48 }; |
46 | 49 |
47 // A ByteReader knows how to read single- and multi-byte values of | 50 // A ByteReader knows how to read single- and multi-byte values of |
48 // various endiannesses, sizes, and encodings, as used in DWARF | 51 // various endiannesses, sizes, and encodings, as used in DWARF |
49 // debugging information and Linux C++ exception handling data. | 52 // debugging information and Linux C++ exception handling data. |
50 class ByteReader { | 53 class ByteReader { |
51 public: | 54 public: |
52 // Construct a ByteReader capable of reading one-, two-, four-, and | 55 // Construct a ByteReader capable of reading one-, two-, four-, and |
53 // eight-byte values according to ENDIANNESS, absolute machine-sized | 56 // eight-byte values according to ENDIANNESS, absolute machine-sized |
54 // addresses, DWARF-style "initial length" values, signed and | 57 // addresses, DWARF-style "initial length" values, signed and |
55 // unsigned LEB128 numbers, and Linux C++ exception handling data's | 58 // unsigned LEB128 numbers, and Linux C++ exception handling data's |
56 // encoded pointers. | 59 // encoded pointers. |
57 explicit ByteReader(enum Endianness endianness); | 60 explicit ByteReader(enum Endianness endianness); |
58 virtual ~ByteReader(); | 61 virtual ~ByteReader(); |
59 | 62 |
60 // Read a single byte from BUFFER and return it as an unsigned 8 bit | 63 // Read a single byte from BUFFER and return it as an unsigned 8 bit |
61 // number. | 64 // number. |
62 uint8 ReadOneByte(const char* buffer) const; | 65 uint8 ReadOneByte(const uint8_t *buffer) const; |
63 | 66 |
64 // Read two bytes from BUFFER and return them as an unsigned 16 bit | 67 // Read two bytes from BUFFER and return them as an unsigned 16 bit |
65 // number, using this ByteReader's endianness. | 68 // number, using this ByteReader's endianness. |
66 uint16 ReadTwoBytes(const char* buffer) const; | 69 uint16 ReadTwoBytes(const uint8_t *buffer) const; |
67 | 70 |
68 // Read four bytes from BUFFER and return them as an unsigned 32 bit | 71 // Read four bytes from BUFFER and return them as an unsigned 32 bit |
69 // number, using this ByteReader's endianness. This function returns | 72 // number, using this ByteReader's endianness. This function returns |
70 // a uint64 so that it is compatible with ReadAddress and | 73 // a uint64 so that it is compatible with ReadAddress and |
71 // ReadOffset. The number it returns will never be outside the range | 74 // ReadOffset. The number it returns will never be outside the range |
72 // of an unsigned 32 bit integer. | 75 // of an unsigned 32 bit integer. |
73 uint64 ReadFourBytes(const char* buffer) const; | 76 uint64 ReadFourBytes(const uint8_t *buffer) const; |
74 | 77 |
75 // Read eight bytes from BUFFER and return them as an unsigned 64 | 78 // Read eight bytes from BUFFER and return them as an unsigned 64 |
76 // bit number, using this ByteReader's endianness. | 79 // bit number, using this ByteReader's endianness. |
77 uint64 ReadEightBytes(const char* buffer) const; | 80 uint64 ReadEightBytes(const uint8_t *buffer) const; |
78 | 81 |
79 // Read an unsigned LEB128 (Little Endian Base 128) number from | 82 // Read an unsigned LEB128 (Little Endian Base 128) number from |
80 // BUFFER and return it as an unsigned 64 bit integer. Set LEN to | 83 // BUFFER and return it as an unsigned 64 bit integer. Set LEN to |
81 // the number of bytes read. | 84 // the number of bytes read. |
82 // | 85 // |
83 // The unsigned LEB128 representation of an integer N is a variable | 86 // The unsigned LEB128 representation of an integer N is a variable |
84 // number of bytes: | 87 // number of bytes: |
85 // | 88 // |
86 // - If N is between 0 and 0x7f, then its unsigned LEB128 | 89 // - If N is between 0 and 0x7f, then its unsigned LEB128 |
87 // representation is a single byte whose value is N. | 90 // representation is a single byte whose value is N. |
88 // | 91 // |
89 // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | | 92 // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | |
90 // 0x80, followed by the unsigned LEB128 representation of N / | 93 // 0x80, followed by the unsigned LEB128 representation of N / |
91 // 128, rounded towards negative infinity. | 94 // 128, rounded towards negative infinity. |
92 // | 95 // |
93 // In other words, we break VALUE into groups of seven bits, put | 96 // In other words, we break VALUE into groups of seven bits, put |
94 // them in little-endian order, and then write them as eight-bit | 97 // them in little-endian order, and then write them as eight-bit |
95 // bytes with the high bit on all but the last. | 98 // bytes with the high bit on all but the last. |
96 uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const; | 99 uint64 ReadUnsignedLEB128(const uint8_t *buffer, size_t *len) const; |
97 | 100 |
98 // Read a signed LEB128 number from BUFFER and return it as an | 101 // Read a signed LEB128 number from BUFFER and return it as an |
99 // signed 64 bit integer. Set LEN to the number of bytes read. | 102 // signed 64 bit integer. Set LEN to the number of bytes read. |
100 // | 103 // |
101 // The signed LEB128 representation of an integer N is a variable | 104 // The signed LEB128 representation of an integer N is a variable |
102 // number of bytes: | 105 // number of bytes: |
103 // | 106 // |
104 // - If N is between -0x40 and 0x3f, then its signed LEB128 | 107 // - If N is between -0x40 and 0x3f, then its signed LEB128 |
105 // representation is a single byte whose value is N in two's | 108 // representation is a single byte whose value is N in two's |
106 // complement. | 109 // complement. |
107 // | 110 // |
108 // - Otherwise, its signed LEB128 representation is (N & 0x7f) | | 111 // - Otherwise, its signed LEB128 representation is (N & 0x7f) | |
109 // 0x80, followed by the signed LEB128 representation of N / 128, | 112 // 0x80, followed by the signed LEB128 representation of N / 128, |
110 // rounded towards negative infinity. | 113 // rounded towards negative infinity. |
111 // | 114 // |
112 // In other words, we break VALUE into groups of seven bits, put | 115 // In other words, we break VALUE into groups of seven bits, put |
113 // them in little-endian order, and then write them as eight-bit | 116 // them in little-endian order, and then write them as eight-bit |
114 // bytes with the high bit on all but the last. | 117 // bytes with the high bit on all but the last. |
115 int64 ReadSignedLEB128(const char* buffer, size_t* len) const; | 118 int64 ReadSignedLEB128(const uint8_t *buffer, size_t *len) const; |
116 | 119 |
117 // Indicate that addresses on this architecture are SIZE bytes long. SIZE | 120 // Indicate that addresses on this architecture are SIZE bytes long. SIZE |
118 // must be either 4 or 8. (DWARF allows addresses to be any number of | 121 // must be either 4 or 8. (DWARF allows addresses to be any number of |
119 // bytes in length from 1 to 255, but we only support 32- and 64-bit | 122 // bytes in length from 1 to 255, but we only support 32- and 64-bit |
120 // addresses at the moment.) You must call this before using the | 123 // addresses at the moment.) You must call this before using the |
121 // ReadAddress member function. | 124 // ReadAddress member function. |
122 // | 125 // |
123 // For data in a .debug_info section, or something that .debug_info | 126 // For data in a .debug_info section, or something that .debug_info |
124 // refers to like line number or macro data, the compilation unit | 127 // refers to like line number or macro data, the compilation unit |
125 // header's address_size field indicates the address size to use. Call | 128 // header's address_size field indicates the address size to use. Call |
126 // frame information doesn't indicate its address size (a shortcoming of | 129 // frame information doesn't indicate its address size (a shortcoming of |
127 // the spec); you must supply the appropriate size based on the | 130 // the spec); you must supply the appropriate size based on the |
128 // architecture of the target machine. | 131 // architecture of the target machine. |
129 void SetAddressSize(uint8 size); | 132 void SetAddressSize(uint8 size); |
130 | 133 |
131 // Return the current address size, in bytes. This is either 4, | 134 // Return the current address size, in bytes. This is either 4, |
132 // indicating 32-bit addresses, or 8, indicating 64-bit addresses. | 135 // indicating 32-bit addresses, or 8, indicating 64-bit addresses. |
133 uint8 AddressSize() const { return address_size_; } | 136 uint8 AddressSize() const { return address_size_; } |
134 | 137 |
135 // Read an address from BUFFER and return it as an unsigned 64 bit | 138 // Read an address from BUFFER and return it as an unsigned 64 bit |
136 // integer, respecting this ByteReader's endianness and address size. You | 139 // integer, respecting this ByteReader's endianness and address size. You |
137 // must call SetAddressSize before calling this function. | 140 // must call SetAddressSize before calling this function. |
138 uint64 ReadAddress(const char* buffer) const; | 141 uint64 ReadAddress(const uint8_t *buffer) const; |
139 | 142 |
140 // DWARF actually defines two slightly different formats: 32-bit DWARF | 143 // DWARF actually defines two slightly different formats: 32-bit DWARF |
141 // and 64-bit DWARF. This is *not* related to the size of registers or | 144 // and 64-bit DWARF. This is *not* related to the size of registers or |
142 // addresses on the target machine; it refers only to the size of section | 145 // addresses on the target machine; it refers only to the size of section |
143 // offsets and data lengths appearing in the DWARF data. One only needs | 146 // offsets and data lengths appearing in the DWARF data. One only needs |
144 // 64-bit DWARF when the debugging data itself is larger than 4GiB. | 147 // 64-bit DWARF when the debugging data itself is larger than 4GiB. |
145 // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the | 148 // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the |
146 // debugging data itself is very large. | 149 // debugging data itself is very large. |
147 // | 150 // |
148 // DWARF information identifies itself as 32-bit or 64-bit DWARF: each | 151 // DWARF information identifies itself as 32-bit or 64-bit DWARF: each |
(...skipping 16 matching lines...) Expand all Loading... |
165 // | 168 // |
166 // A DWARF initial length is either: | 169 // A DWARF initial length is either: |
167 // | 170 // |
168 // - a byte count stored as an unsigned 32-bit value less than | 171 // - a byte count stored as an unsigned 32-bit value less than |
169 // 0xffffff00, indicating that the data whose length is being | 172 // 0xffffff00, indicating that the data whose length is being |
170 // measured uses the 32-bit DWARF format, or | 173 // measured uses the 32-bit DWARF format, or |
171 // | 174 // |
172 // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, | 175 // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, |
173 // indicating that the data whose length is being measured uses | 176 // indicating that the data whose length is being measured uses |
174 // the 64-bit DWARF format. | 177 // the 64-bit DWARF format. |
175 uint64 ReadInitialLength(const char* start, size_t* len); | 178 uint64 ReadInitialLength(const uint8_t *start, size_t *len); |
176 | 179 |
177 // Read an offset from BUFFER and return it as an unsigned 64 bit | 180 // Read an offset from BUFFER and return it as an unsigned 64 bit |
178 // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the | 181 // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the |
179 // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes | 182 // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes |
180 // long. You must call ReadInitialLength or SetOffsetSize before calling | 183 // long. You must call ReadInitialLength or SetOffsetSize before calling |
181 // this function; see the comments above for details. | 184 // this function; see the comments above for details. |
182 uint64 ReadOffset(const char* buffer) const; | 185 uint64 ReadOffset(const uint8_t *buffer) const; |
183 | 186 |
184 // Return the current offset size, in bytes. | 187 // Return the current offset size, in bytes. |
185 // A return value of 4 indicates that we are reading 32-bit DWARF. | 188 // A return value of 4 indicates that we are reading 32-bit DWARF. |
186 // A return value of 8 indicates that we are reading 64-bit DWARF. | 189 // A return value of 8 indicates that we are reading 64-bit DWARF. |
187 uint8 OffsetSize() const { return offset_size_; } | 190 uint8 OffsetSize() const { return offset_size_; } |
188 | 191 |
189 // Indicate that section offsets and lengths are SIZE bytes long. SIZE | 192 // Indicate that section offsets and lengths are SIZE bytes long. SIZE |
190 // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). | 193 // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). |
191 // Usually, you should not call this function yourself; instead, let a | 194 // Usually, you should not call this function yourself; instead, let a |
192 // call to ReadInitialLength establish the data's offset size | 195 // call to ReadInitialLength establish the data's offset size |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
227 // There are also two options that fall outside that matrix | 230 // There are also two options that fall outside that matrix |
228 // altogether: the pointer may be omitted, or it may have padding to | 231 // altogether: the pointer may be omitted, or it may have padding to |
229 // align it on an appropriate address boundary. (That last option | 232 // align it on an appropriate address boundary. (That last option |
230 // may seem like it should be just another axis, but it is not.) | 233 // may seem like it should be just another axis, but it is not.) |
231 | 234 |
232 // Indicate that the exception handling data is loaded starting at | 235 // Indicate that the exception handling data is loaded starting at |
233 // SECTION_BASE, and that the start of its buffer in our own memory | 236 // SECTION_BASE, and that the start of its buffer in our own memory |
234 // is BUFFER_BASE. This allows us to find the address that a given | 237 // is BUFFER_BASE. This allows us to find the address that a given |
235 // byte in our buffer would have when loaded into the program the | 238 // byte in our buffer would have when loaded into the program the |
236 // data describes. We need this to resolve DW_EH_PE_pcrel pointers. | 239 // data describes. We need this to resolve DW_EH_PE_pcrel pointers. |
237 void SetCFIDataBase(uint64 section_base, const char *buffer_base); | 240 void SetCFIDataBase(uint64 section_base, const uint8_t *buffer_base); |
238 | 241 |
239 // Indicate that the base address of the program's ".text" section | 242 // Indicate that the base address of the program's ".text" section |
240 // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers. | 243 // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers. |
241 void SetTextBase(uint64 text_base); | 244 void SetTextBase(uint64 text_base); |
242 | 245 |
243 // Indicate that the base address for DW_EH_PE_datarel pointers is | 246 // Indicate that the base address for DW_EH_PE_datarel pointers is |
244 // DATA_BASE. The proper value depends on the ABI; it is usually the | 247 // DATA_BASE. The proper value depends on the ABI; it is usually the |
245 // address of the global offset table, held in a designated register in | 248 // address of the global offset table, held in a designated register in |
246 // position-independent code. You will need to look at the startup code | 249 // position-independent code. You will need to look at the startup code |
247 // for the target system to be sure. I tried; my eyes bled. | 250 // for the target system to be sure. I tried; my eyes bled. |
(...skipping 18 matching lines...) Expand all Loading... |
266 bool UsableEncoding(DwarfPointerEncoding encoding) const; | 269 bool UsableEncoding(DwarfPointerEncoding encoding) const; |
267 | 270 |
268 // Read an encoded pointer from BUFFER using ENCODING; return the | 271 // Read an encoded pointer from BUFFER using ENCODING; return the |
269 // absolute address it represents, and set *LEN to the pointer's | 272 // absolute address it represents, and set *LEN to the pointer's |
270 // length in bytes, including any padding for aligned pointers. | 273 // length in bytes, including any padding for aligned pointers. |
271 // | 274 // |
272 // This function calls 'abort' if ENCODING is invalid or refers to a | 275 // This function calls 'abort' if ENCODING is invalid or refers to a |
273 // base address this reader hasn't been given, so you should check | 276 // base address this reader hasn't been given, so you should check |
274 // with ValidEncoding and UsableEncoding first if you would rather | 277 // with ValidEncoding and UsableEncoding first if you would rather |
275 // die in a more helpful way. | 278 // die in a more helpful way. |
276 uint64 ReadEncodedPointer(const char *buffer, DwarfPointerEncoding encoding, | 279 uint64 ReadEncodedPointer(const uint8_t *buffer, |
| 280 DwarfPointerEncoding encoding, |
277 size_t *len) const; | 281 size_t *len) const; |
278 | 282 |
279 private: | 283 private: |
280 | 284 |
281 // Function pointer type for our address and offset readers. | 285 // Function pointer type for our address and offset readers. |
282 typedef uint64 (ByteReader::*AddressReader)(const char*) const; | 286 typedef uint64 (ByteReader::*AddressReader)(const uint8_t *) const; |
283 | 287 |
284 // Read an offset from BUFFER and return it as an unsigned 64 bit | 288 // Read an offset from BUFFER and return it as an unsigned 64 bit |
285 // integer. DWARF2/3 define offsets as either 4 or 8 bytes, | 289 // integer. DWARF2/3 define offsets as either 4 or 8 bytes, |
286 // generally depending on the amount of DWARF2/3 info present. | 290 // generally depending on the amount of DWARF2/3 info present. |
287 // This function pointer gets set by SetOffsetSize. | 291 // This function pointer gets set by SetOffsetSize. |
288 AddressReader offset_reader_; | 292 AddressReader offset_reader_; |
289 | 293 |
290 // Read an address from BUFFER and return it as an unsigned 64 bit | 294 // Read an address from BUFFER and return it as an unsigned 64 bit |
291 // integer. DWARF2/3 allow addresses to be any size from 0-255 | 295 // integer. DWARF2/3 allow addresses to be any size from 0-255 |
292 // bytes currently. Internally we support 4 and 8 byte addresses, | 296 // bytes currently. Internally we support 4 and 8 byte addresses, |
293 // and will CHECK on anything else. | 297 // and will CHECK on anything else. |
294 // This function pointer gets set by SetAddressSize. | 298 // This function pointer gets set by SetAddressSize. |
295 AddressReader address_reader_; | 299 AddressReader address_reader_; |
296 | 300 |
297 Endianness endian_; | 301 Endianness endian_; |
298 uint8 address_size_; | 302 uint8 address_size_; |
299 uint8 offset_size_; | 303 uint8 offset_size_; |
300 | 304 |
301 // Base addresses for Linux C++ exception handling data's encoded pointers. | 305 // Base addresses for Linux C++ exception handling data's encoded pointers. |
302 bool have_section_base_, have_text_base_, have_data_base_; | 306 bool have_section_base_, have_text_base_, have_data_base_; |
303 bool have_function_base_; | 307 bool have_function_base_; |
304 uint64 section_base_, text_base_, data_base_, function_base_; | 308 uint64 section_base_, text_base_, data_base_, function_base_; |
305 const char *buffer_base_; | 309 const uint8_t *buffer_base_; |
306 }; | 310 }; |
307 | 311 |
308 } // namespace dwarf2reader | 312 } // namespace dwarf2reader |
309 | 313 |
310 #endif // COMMON_DWARF_BYTEREADER_H__ | 314 #endif // COMMON_DWARF_BYTEREADER_H__ |
OLD | NEW |