OLD | NEW |
(Empty) | |
| 1 /////////////////////////////////////////////////////////////////////////////// |
| 2 // |
| 3 /// \file fastpos.h |
| 4 /// \brief Kind of two-bit version of bit scan reverse |
| 5 /// |
| 6 // Authors: Igor Pavlov |
| 7 // Lasse Collin |
| 8 // |
| 9 // This file has been put into the public domain. |
| 10 // You can do whatever you want with this file. |
| 11 // |
| 12 /////////////////////////////////////////////////////////////////////////////// |
| 13 |
| 14 #ifndef LZMA_FASTPOS_H |
| 15 #define LZMA_FASTPOS_H |
| 16 |
| 17 // LZMA encodes match distances (positions) by storing the highest two |
| 18 // bits using a six-bit value [0, 63], and then the missing lower bits. |
| 19 // Dictionary size is also stored using this encoding in the new .lzma |
| 20 // file format header. |
| 21 // |
| 22 // fastpos.h provides a way to quickly find out the correct six-bit |
| 23 // values. The following table gives some examples of this encoding: |
| 24 // |
| 25 // pos return |
| 26 // 0 0 |
| 27 // 1 1 |
| 28 // 2 2 |
| 29 // 3 3 |
| 30 // 4 4 |
| 31 // 5 4 |
| 32 // 6 5 |
| 33 // 7 5 |
| 34 // 8 6 |
| 35 // 11 6 |
| 36 // 12 7 |
| 37 // ... ... |
| 38 // 15 7 |
| 39 // 16 8 |
| 40 // 17 8 |
| 41 // ... ... |
| 42 // 23 8 |
| 43 // 24 9 |
| 44 // 25 9 |
| 45 // ... ... |
| 46 // |
| 47 // |
| 48 // Provided functions or macros |
| 49 // ---------------------------- |
| 50 // |
| 51 // get_pos_slot(pos) is the basic version. get_pos_slot_2(pos) |
| 52 // assumes that pos >= FULL_DISTANCES, thus the result is at least |
| 53 // FULL_DISTANCES_BITS * 2. Using get_pos_slot(pos) instead of |
| 54 // get_pos_slot_2(pos) would give the same result, but get_pos_slot_2(pos) |
| 55 // should be tiny bit faster due to the assumption being made. |
| 56 // |
| 57 // |
| 58 // Size vs. speed |
| 59 // -------------- |
| 60 // |
| 61 // With some CPUs that have fast BSR (bit scan reverse) instruction, the |
| 62 // size optimized version is slightly faster than the bigger table based |
| 63 // approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III |
| 64 // and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that |
| 65 // would still have speed roughly comparable to the table version. Older |
| 66 // x86 CPUs like the original Pentium have very slow BSR; on those systems |
| 67 // the table version is a lot faster. |
| 68 // |
| 69 // On some CPUs, the table version is a lot faster when using position |
| 70 // dependent code, but with position independent code the size optimized |
| 71 // version is slightly faster. This occurs at least on 32-bit SPARC (no |
| 72 // ASM optimizations). |
| 73 // |
| 74 // I'm making the table version the default, because that has good speed |
| 75 // on all systems I have tried. The size optimized version is sometimes |
| 76 // slightly faster, but sometimes it is a lot slower. |
| 77 |
| 78 #ifdef HAVE_SMALL |
| 79 # define get_pos_slot(pos) ((pos) <= 4 ? (pos) : get_pos_slot_2(pos)) |
| 80 |
| 81 static inline uint32_t |
| 82 get_pos_slot_2(uint32_t pos) |
| 83 { |
| 84 const uint32_t i = bsr32(pos); |
| 85 return (i + i) + ((pos >> (i - 1)) & 1); |
| 86 } |
| 87 |
| 88 |
| 89 #else |
| 90 |
| 91 #define FASTPOS_BITS 13 |
| 92 |
| 93 extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS]; |
| 94 |
| 95 |
| 96 #define fastpos_shift(extra, n) \ |
| 97 ((extra) + (n) * (FASTPOS_BITS - 1)) |
| 98 |
| 99 #define fastpos_limit(extra, n) \ |
| 100 (UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n))) |
| 101 |
| 102 #define fastpos_result(pos, extra, n) \ |
| 103 lzma_fastpos[(pos) >> fastpos_shift(extra, n)] \ |
| 104 + 2 * fastpos_shift(extra, n) |
| 105 |
| 106 |
| 107 static inline uint32_t |
| 108 get_pos_slot(uint32_t pos) |
| 109 { |
| 110 // If it is small enough, we can pick the result directly from |
| 111 // the precalculated table. |
| 112 if (pos < fastpos_limit(0, 0)) |
| 113 return lzma_fastpos[pos]; |
| 114 |
| 115 if (pos < fastpos_limit(0, 1)) |
| 116 return fastpos_result(pos, 0, 1); |
| 117 |
| 118 return fastpos_result(pos, 0, 2); |
| 119 } |
| 120 |
| 121 |
| 122 #ifdef FULL_DISTANCES_BITS |
| 123 static inline uint32_t |
| 124 get_pos_slot_2(uint32_t pos) |
| 125 { |
| 126 assert(pos >= FULL_DISTANCES); |
| 127 |
| 128 if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 0)) |
| 129 return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 0); |
| 130 |
| 131 if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 1)) |
| 132 return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 1); |
| 133 |
| 134 return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 2); |
| 135 } |
| 136 #endif |
| 137 |
| 138 #endif |
| 139 |
| 140 #endif |
OLD | NEW |