FreeNOS
Lz4Decompressor.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2020 Niek Linnenbank
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <FreeNOS/User.h>
19 #include <Log.h>
20 #include <ByteOrder.h>
21 #include <MemoryBlock.h>
22 #include "Lz4Decompressor.h"
23 
24 Lz4Decompressor::Lz4Decompressor(const void *data, const Size size)
25  : m_inputData(static_cast<const u8 *>(data))
26  , m_inputSize(size)
27  , m_frameDescSize(3)
28  , m_blockChecksums(false)
29  , m_contentChecksum(false)
30  , m_contentSize(0)
31  , m_blockMaximumSize(0)
32 {
33 }
34 
36 {
37  // Reset state
38  m_frameDescSize = 3;
39 
40  // Verify minimum input size
41  if (m_inputSize < 27)
42  {
43  ERROR("invalid size of input data: " << m_inputSize);
44  return InvalidArgument;
45  }
46 
47  // Verify the input is an actual LZ4 frame
49  {
50  ERROR("invalid magic value " << readLe32(m_inputData) << " != " << FrameMagic);
51  return InvalidArgument;
52  }
53 
54  // Read the FLG byte
55  const u8 flg = *(m_inputData + sizeof(u32));
56 
57  // Verify the version bits
58  const u8 version = flg >> FrameVersionShift;
59  if (version != FrameVersion)
60  {
61  ERROR("invalid version value " << version << " != " << FrameVersion);
62  return InvalidArgument;
63  }
64 
65  // This code only supports independent blocks
66  const bool independent = (flg >> FrameBlockIndShift) & 0x1;
67  if (!independent)
68  {
69  ERROR("inter-dependent blocks not supported");
70  return NotSupported;
71  }
72 
73  // Check for block checksum flag
74  m_blockChecksums = (flg >> FrameBlockChkShift) & 0x1;
75 
76  // Check for content size flag
77  if ((flg >> FrameContentSzShift) & 0x1)
78  {
79  m_contentSize = readLe64(m_inputData + sizeof(u32) + (sizeof(u8) * 2));
80  m_frameDescSize += 8;
81  }
82 
83  // Content size must be non-zero
84  if (m_contentSize == 0)
85  {
86  ERROR("content size must not be zero");
87  return NotSupported;
88  }
89 
90  // Check for the content checksum flag
91  m_contentChecksum = (flg >> FrameContentChkShift) & 0x1 ? true : false;
92 
93  // Check for the DictID flag
94  if ((flg >> FrameDictIdShift) & 0x1)
95  {
96  m_frameDescSize += 4;
97  }
98 
99  // Read the BD byte which contains the maximum block size
100  const u8 bd = *(m_inputData + sizeof(u32) + sizeof(u8));
101  switch (bd >> 4)
102  {
103  case 4:
105  break;
106  case 5:
108  break;
109  case 6:
111  break;
112  case 7:
114  break;
115  default:
116  {
117  ERROR("invalid maximum block size value: " << (bd >> 4));
118  return InvalidArgument;
119  }
120  }
121 
122  return Success;
123 }
124 
126 {
127  return m_contentSize;
128 }
129 
131  const Size size) const
132 {
133  const u8 *input = m_inputData + m_frameDescSize + sizeof(u32);
134  const u8 *inputEnd = m_inputData + m_inputSize;
135  u8 *output = static_cast<u8 *>(buffer);
136  Size copied = 0;
137 
138  while (copied < size && input < inputEnd)
139  {
140  // Fetch the next block
141  const u32 blockSizeByte = readLe32(input);
142  const u32 blockSize = blockSizeByte & ~(1 << 31);
143  const bool isCompressed = blockSizeByte & (1 << 31) ? false : true;
144  Size uncompSize;
145 
146  // Last block has the EndMark as size value
147  if (blockSize == EndMark)
148  {
149  break;
150  }
151  assert(blockSize <= m_blockMaximumSize);
152  input += sizeof(u32);
153 
154  // Decompress the block
155  if (isCompressed)
156  {
157  uncompSize = decompress(input, blockSize, output, size - copied);
158  }
159  // Return data as-is when the block is not compressed
160  else
161  {
162  MemoryBlock::copy(output, input, blockSize);
163  uncompSize = blockSize;
164  }
165 
166  // Move to the next block
167  copied += uncompSize;
168  output += uncompSize;
169  input += blockSize;
170  if (m_blockChecksums)
171  {
172  input += sizeof(u32);
173  }
174  }
175 
176  return Success;
177 }
178 
179 inline const u32 Lz4Decompressor::integerDecode(const u32 initial,
180  const u8 *next,
181  Size &byteCount) const
182 {
183  u32 value = initial;
184 
185  if (initial < 0xf)
186  {
187  return initial;
188  }
189 
190  for (byteCount = 1; ; byteCount++)
191  {
192  const u8 byte = *next++;
193  value += byte;
194 
195  if (byte < 0xff)
196  {
197  break;
198  }
199  }
200 
201  return value;
202 }
203 
204 const u32 Lz4Decompressor::decompress(const u8 *input,
205  const Size inputSize,
206  u8 *output,
207  const Size outputSize) const
208 {
209  const u8 *inputEnd = input + inputSize;
210  Size outputOffset = 0;
211 
212  // Decompress the whole block
213  while (input < inputEnd && outputOffset < outputSize)
214  {
215  u32 literalBytes = 0;
216  u32 matchBytes = 0;
217 
218  // Read the token
219  const u8 token = *input;
220  input++;
221 
222  // Read literals count
223  const u32 literalsCount = integerDecode(token >> 4, input, literalBytes);
224  input += literalBytes;
225  DEBUG("token = " << token << " literalsCount = " << literalsCount << " literalBytes = " << literalBytes);
226 
227  // Copy literals
228  if (literalsCount > 0)
229  {
230  MemoryBlock::copy(output + outputOffset, input, literalsCount);
231  input += literalsCount;
232  outputOffset += literalsCount;
233  }
234 
235  // End of block reached? Last 5 bytes are only literals
236  if (input >= inputEnd)
237  {
238  break;
239  }
240 
241  // Read match offset
242  const u16 off = readLe16(input);
243  assert(off <= outputOffset);
244  const u32 matchOffset = outputOffset - off;
245  input += sizeof(u16);
246 
247  // Read match length
248  const u32 matchCount = integerDecode(token & 0xf, input, matchBytes) + 4u;
249  input += matchBytes;
250 
251  // Copy the match from previous decoded bytes
252  DEBUG("matchOffset = " << matchOffset << " matchCount = " << matchCount);
253  MemoryBlock::copy(output + outputOffset, output + matchOffset, matchCount);
254  outputOffset += matchCount;
255  }
256 
257  assert(outputOffset <= m_blockMaximumSize);
258  return outputOffset;
259 }
Lz4Decompressor::FrameVersionShift
@ FrameVersionShift
Definition: Lz4Decompressor.h:56
MemoryBlock::copy
static Size copy(void *dest, const void *src, Size count)
Copy memory from one place to another.
Definition: MemoryBlock.cpp:36
readLe16
const u16 readLe16(const void *data)
Read 16-bit little endian integer.
Definition: ByteOrder.h:356
Lz4Decompressor::InvalidArgument
@ InvalidArgument
Definition: Lz4Decompressor.h:74
Lz4Decompressor::m_blockMaximumSize
Size m_blockMaximumSize
Maximum block size in bytes of the uncompressed content.
Definition: Lz4Decompressor.h:163
Lz4Decompressor::FrameContentSzShift
@ FrameContentSzShift
Definition: Lz4Decompressor.h:53
Lz4Decompressor::FrameMagic
static const u32 FrameMagic
Magic number value marks the start of the frame header.
Definition: Lz4Decompressor.h:44
Lz4Decompressor::getUncompressedSize
u64 getUncompressedSize() const
Get size of the uncompressed data.
Definition: Lz4Decompressor.cpp:125
Lz4Decompressor::m_frameDescSize
Size m_frameDescSize
Size of the frame descriptor in bytes.
Definition: Lz4Decompressor.h:151
Lz4Decompressor::FrameVersion
static const u8 FrameVersion
Current supported version of the LZ4 algorithm.
Definition: Lz4Decompressor.h:60
Lz4Decompressor::initialize
Result initialize()
Initialize the decompressor.
Definition: Lz4Decompressor.cpp:35
Lz4Decompressor::FrameBlockChkShift
@ FrameBlockChkShift
Definition: Lz4Decompressor.h:54
MemoryBlock.h
Lz4Decompressor::m_inputData
const u8 * m_inputData
Compressed input data.
Definition: Lz4Decompressor.h:145
readLe64
const u64 readLe64(const void *data)
Read 64-bit little endian integer.
Definition: ByteOrder.h:328
Lz4Decompressor::FrameDictIdShift
@ FrameDictIdShift
Definition: Lz4Decompressor.h:51
Lz4Decompressor::FrameContentChkShift
@ FrameContentChkShift
Definition: Lz4Decompressor.h:52
Log.h
Lz4Decompressor::Success
@ Success
Definition: Lz4Decompressor.h:72
MegaByte
#define MegaByte(v)
Convert megabytes to bytes.
Definition: Macros.h:57
u64
unsigned long long u64
Unsigned 64-bit number.
Definition: Types.h:50
Lz4Decompressor.h
Lz4Decompressor::m_inputSize
const Size m_inputSize
Total size in bytes of the compressed input data.
Definition: Lz4Decompressor.h:148
KiloByte
#define KiloByte(v)
Convert kilobytes to bytes.
Definition: Macros.h:54
DEBUG
#define DEBUG(msg)
Output a debug message to standard output.
Definition: Log.h:89
Lz4Decompressor::m_blockChecksums
bool m_blockChecksums
True if blocks have checksums enabled.
Definition: Lz4Decompressor.h:154
u16
unsigned short u16
Unsigned 16-bit number.
Definition: Types.h:56
Lz4Decompressor::decompress
const u32 decompress(const u8 *input, const Size inputSize, u8 *output, const Size outputSize) const
Decompress a block of compressed data.
Definition: Lz4Decompressor.cpp:204
ByteOrder.h
u32
unsigned int u32
Unsigned 32-bit number.
Definition: Types.h:53
Size
unsigned int Size
Any sane size indicator cannot go negative.
Definition: Types.h:128
Lz4Decompressor::Result
Result
Result codes.
Definition: Lz4Decompressor.h:70
Lz4Decompressor::NotSupported
@ NotSupported
Definition: Lz4Decompressor.h:75
readLe32
const u32 readLe32(const void *data)
Read 32-bit little endian integer.
Definition: ByteOrder.h:342
Lz4Decompressor::Lz4Decompressor
Lz4Decompressor(const void *data, const Size size)
Constructor function.
Definition: Lz4Decompressor.cpp:24
Lz4Decompressor::m_contentChecksum
bool m_contentChecksum
True if the input data buffer contains content checksum.
Definition: Lz4Decompressor.h:157
assert
#define assert(exp)
Insert program diagnostics.
Definition: assert.h:60
Lz4Decompressor::read
Result read(void *buffer, const Size size) const
Reads compressed data.
Definition: Lz4Decompressor.cpp:130
ERROR
#define ERROR(msg)
Output an error message.
Definition: Log.h:61
u8
unsigned char u8
Unsigned 8-bit number.
Definition: Types.h:59
Lz4Decompressor::FrameBlockIndShift
@ FrameBlockIndShift
Definition: Lz4Decompressor.h:55
Lz4Decompressor::EndMark
static const u32 EndMark
EndMark marks the end of the data stream.
Definition: Lz4Decompressor.h:63
Lz4Decompressor::m_contentSize
u64 m_contentSize
Content size as specified in the frame header.
Definition: Lz4Decompressor.h:160
Lz4Decompressor::integerDecode
const u32 integerDecode(const u32 initial, const u8 *next, Size &byteCount) const
Decode input data as integer (little-endian, 32-bit unsigned)
Definition: Lz4Decompressor.cpp:179