| | 1 | | using System; |
| | 2 | | using System.IO; |
| | 3 | | using ICSharpCode.SharpZipLib.Checksum; |
| | 4 | | using ICSharpCode.SharpZipLib.Zip.Compression; |
| | 5 | | using ICSharpCode.SharpZipLib.Zip.Compression.Streams; |
| | 6 | |
|
| | 7 | | namespace ICSharpCode.SharpZipLib.GZip |
| | 8 | | { |
| | 9 | |
|
| | 10 | | /// <summary> |
| | 11 | | /// This filter stream is used to decompress a "GZIP" format stream. |
| | 12 | | /// The "GZIP" format is described baseInputStream RFC 1952. |
| | 13 | | /// |
| | 14 | | /// author of the original java version : John Leuner |
| | 15 | | /// </summary> |
| | 16 | | /// <example> This sample shows how to unzip a gzipped file |
| | 17 | | /// <code> |
| | 18 | | /// using System; |
| | 19 | | /// using System.IO; |
| | 20 | | /// |
| | 21 | | /// using ICSharpCode.SharpZipLib.Core; |
| | 22 | | /// using ICSharpCode.SharpZipLib.GZip; |
| | 23 | | /// |
| | 24 | | /// class MainClass |
| | 25 | | /// { |
| | 26 | | /// public static void Main(string[] args) |
| | 27 | | /// { |
| | 28 | | /// using (Stream inStream = new GZipInputStream(File.OpenRead(args[0]))) |
| | 29 | | /// using (FileStream outStream = File.Create(Path.GetFileNameWithoutExtension(args[0]))) { |
| | 30 | | /// byte[] buffer = new byte[4096]; |
| | 31 | | /// StreamUtils.Copy(inStream, outStream, buffer); |
| | 32 | | /// } |
| | 33 | | /// } |
| | 34 | | /// } |
| | 35 | | /// </code> |
| | 36 | | /// </example> |
| | 37 | | public class GZipInputStream : InflaterInputStream |
| | 38 | | { |
| | 39 | | #region Instance Fields |
| | 40 | | /// <summary> |
| | 41 | | /// CRC-32 value for uncompressed data |
| | 42 | | /// </summary> |
| | 43 | | protected Crc32 crc; |
| | 44 | |
|
| | 45 | | /// <summary> |
| | 46 | | /// Flag to indicate if we've read the GZIP header yet for the current member (block of compressed data). |
| | 47 | | /// This is tracked per-block as the file is parsed. |
| | 48 | | /// </summary> |
| | 49 | | bool readGZIPHeader; |
| | 50 | | #endregion |
| | 51 | |
|
| | 52 | | #region Constructors |
| | 53 | | /// <summary> |
| | 54 | | /// Creates a GZipInputStream with the default buffer size |
| | 55 | | /// </summary> |
| | 56 | | /// <param name="baseInputStream"> |
| | 57 | | /// The stream to read compressed data from (baseInputStream GZIP format) |
| | 58 | | /// </param> |
| | 59 | | public GZipInputStream(Stream baseInputStream) |
| 2 | 60 | | : this(baseInputStream, 4096) |
| | 61 | | { |
| 2 | 62 | | } |
| | 63 | |
|
| | 64 | | /// <summary> |
| | 65 | | /// Creates a GZIPInputStream with the specified buffer size |
| | 66 | | /// </summary> |
| | 67 | | /// <param name="baseInputStream"> |
| | 68 | | /// The stream to read compressed data from (baseInputStream GZIP format) |
| | 69 | | /// </param> |
| | 70 | | /// <param name="size"> |
| | 71 | | /// Size of the buffer to use |
| | 72 | | /// </param> |
| | 73 | | public GZipInputStream(Stream baseInputStream, int size) |
| 2 | 74 | | : base(baseInputStream, new Inflater(true), size) |
| | 75 | | { |
| 2 | 76 | | } |
| | 77 | | #endregion |
| | 78 | |
|
| | 79 | | #region Stream overrides |
| | 80 | | /// <summary> |
| | 81 | | /// Reads uncompressed data into an array of bytes |
| | 82 | | /// </summary> |
| | 83 | | /// <param name="buffer"> |
| | 84 | | /// The buffer to read uncompressed data into |
| | 85 | | /// </param> |
| | 86 | | /// <param name="offset"> |
| | 87 | | /// The offset indicating where the data should be placed |
| | 88 | | /// </param> |
| | 89 | | /// <param name="count"> |
| | 90 | | /// The number of uncompressed bytes to be read |
| | 91 | | /// </param> |
| | 92 | | /// <returns>Returns the number of bytes actually read.</returns> |
| | 93 | | public override int Read(byte[] buffer, int offset, int count) |
| | 94 | | { |
| | 95 | | // A GZIP file can contain multiple blocks of compressed data, although this is quite rare. |
| | 96 | | // A compressed block could potentially be empty, so we need to loop until we reach EOF or |
| | 97 | | // we find data. |
| | 98 | | while (true) { |
| | 99 | |
|
| | 100 | | // If we haven't read the header for this block, read it |
| 0 | 101 | | if (!readGZIPHeader) { |
| | 102 | |
|
| | 103 | | // Try to read header. If there is no header (0 bytes available), this is EOF. If there is |
| | 104 | | // an incomplete header, this will throw an exception. |
| 0 | 105 | | if (!ReadHeader()) { |
| 0 | 106 | | return 0; |
| | 107 | | } |
| | 108 | | } |
| | 109 | |
|
| | 110 | | // Try to read compressed data |
| 0 | 111 | | int bytesRead = base.Read(buffer, offset, count); |
| 0 | 112 | | if (bytesRead > 0) { |
| 0 | 113 | | crc.Update(buffer, offset, bytesRead); |
| | 114 | | } |
| | 115 | |
|
| | 116 | | // If this is the end of stream, read the footer |
| 0 | 117 | | if (inf.IsFinished) { |
| 0 | 118 | | ReadFooter(); |
| | 119 | | } |
| | 120 | |
|
| 0 | 121 | | if (bytesRead > 0) { |
| 0 | 122 | | return bytesRead; |
| | 123 | | } |
| | 124 | | } |
| | 125 | | } |
| | 126 | | #endregion |
| | 127 | |
|
| | 128 | | #region Support routines |
| | 129 | | bool ReadHeader() |
| | 130 | | { |
| | 131 | | // Initialize CRC for this block |
| 0 | 132 | | crc = new Crc32(); |
| | 133 | |
|
| | 134 | | // Make sure there is data in file. We can't rely on ReadLeByte() to fill the buffer, as this could be EOF, |
| | 135 | | // which is fine, but ReadLeByte() throws an exception if it doesn't find data, so we do this part ourselves. |
| 0 | 136 | | if (inputBuffer.Available <= 0) { |
| 0 | 137 | | inputBuffer.Fill(); |
| 0 | 138 | | if (inputBuffer.Available <= 0) { |
| | 139 | | // No header, EOF. |
| 0 | 140 | | return false; |
| | 141 | | } |
| | 142 | | } |
| | 143 | |
|
| | 144 | | // 1. Check the two magic bytes |
| 0 | 145 | | var headCRC = new Crc32(); |
| 0 | 146 | | int magic = inputBuffer.ReadLeByte(); |
| | 147 | |
|
| 0 | 148 | | if (magic < 0) { |
| 0 | 149 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | 150 | | } |
| | 151 | |
|
| 0 | 152 | | headCRC.Update(magic); |
| 0 | 153 | | if (magic != (GZipConstants.GZIP_MAGIC >> 8)) { |
| 0 | 154 | | throw new GZipException("Error GZIP header, first magic byte doesn't match"); |
| | 155 | | } |
| | 156 | |
|
| | 157 | | //magic = baseInputStream.ReadByte(); |
| 0 | 158 | | magic = inputBuffer.ReadLeByte(); |
| | 159 | |
|
| 0 | 160 | | if (magic < 0) { |
| 0 | 161 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | 162 | | } |
| | 163 | |
|
| 0 | 164 | | if (magic != (GZipConstants.GZIP_MAGIC & 0xFF)) { |
| 0 | 165 | | throw new GZipException("Error GZIP header, second magic byte doesn't match"); |
| | 166 | | } |
| | 167 | |
|
| 0 | 168 | | headCRC.Update(magic); |
| | 169 | |
|
| | 170 | | // 2. Check the compression type (must be 8) |
| 0 | 171 | | int compressionType = inputBuffer.ReadLeByte(); |
| | 172 | |
|
| 0 | 173 | | if (compressionType < 0) { |
| 0 | 174 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | 175 | | } |
| | 176 | |
|
| 0 | 177 | | if (compressionType != 8) { |
| 0 | 178 | | throw new GZipException("Error GZIP header, data not in deflate format"); |
| | 179 | | } |
| 0 | 180 | | headCRC.Update(compressionType); |
| | 181 | |
|
| | 182 | | // 3. Check the flags |
| 0 | 183 | | int flags = inputBuffer.ReadLeByte(); |
| 0 | 184 | | if (flags < 0) { |
| 0 | 185 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | 186 | | } |
| 0 | 187 | | headCRC.Update(flags); |
| | 188 | |
|
| | 189 | | /* This flag byte is divided into individual bits as follows: |
| | 190 | |
|
| | 191 | | bit 0 FTEXT |
| | 192 | | bit 1 FHCRC |
| | 193 | | bit 2 FEXTRA |
| | 194 | | bit 3 FNAME |
| | 195 | | bit 4 FCOMMENT |
| | 196 | | bit 5 reserved |
| | 197 | | bit 6 reserved |
| | 198 | | bit 7 reserved |
| | 199 | | */ |
| | 200 | |
|
| | 201 | | // 3.1 Check the reserved bits are zero |
| | 202 | |
|
| 0 | 203 | | if ((flags & 0xE0) != 0) { |
| 0 | 204 | | throw new GZipException("Reserved flag bits in GZIP header != 0"); |
| | 205 | | } |
| | 206 | |
|
| | 207 | | // 4.-6. Skip the modification time, extra flags, and OS type |
| 0 | 208 | | for (int i = 0; i < 6; i++) { |
| 0 | 209 | | int readByte = inputBuffer.ReadLeByte(); |
| 0 | 210 | | if (readByte < 0) { |
| 0 | 211 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | 212 | | } |
| 0 | 213 | | headCRC.Update(readByte); |
| | 214 | | } |
| | 215 | |
|
| | 216 | | // 7. Read extra field |
| 0 | 217 | | if ((flags & GZipConstants.FEXTRA) != 0) { |
| | 218 | |
|
| | 219 | | // XLEN is total length of extra subfields, we will skip them all |
| | 220 | | int len1, len2; |
| 0 | 221 | | len1 = inputBuffer.ReadLeByte(); |
| 0 | 222 | | len2 = inputBuffer.ReadLeByte(); |
| 0 | 223 | | if ((len1 < 0) || (len2 < 0)) { |
| 0 | 224 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | 225 | | } |
| 0 | 226 | | headCRC.Update(len1); |
| 0 | 227 | | headCRC.Update(len2); |
| | 228 | |
|
| 0 | 229 | | int extraLen = (len2 << 8) | len1; // gzip is LSB first |
| 0 | 230 | | for (int i = 0; i < extraLen; i++) { |
| 0 | 231 | | int readByte = inputBuffer.ReadLeByte(); |
| 0 | 232 | | if (readByte < 0) { |
| 0 | 233 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | 234 | | } |
| 0 | 235 | | headCRC.Update(readByte); |
| | 236 | | } |
| | 237 | | } |
| | 238 | |
|
| | 239 | | // 8. Read file name |
| 0 | 240 | | if ((flags & GZipConstants.FNAME) != 0) { |
| | 241 | | int readByte; |
| 0 | 242 | | while ((readByte = inputBuffer.ReadLeByte()) > 0) { |
| 0 | 243 | | headCRC.Update(readByte); |
| | 244 | | } |
| | 245 | |
|
| 0 | 246 | | if (readByte < 0) { |
| 0 | 247 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | 248 | | } |
| 0 | 249 | | headCRC.Update(readByte); |
| | 250 | | } |
| | 251 | |
|
| | 252 | | // 9. Read comment |
| 0 | 253 | | if ((flags & GZipConstants.FCOMMENT) != 0) { |
| | 254 | | int readByte; |
| 0 | 255 | | while ((readByte = inputBuffer.ReadLeByte()) > 0) { |
| 0 | 256 | | headCRC.Update(readByte); |
| | 257 | | } |
| | 258 | |
|
| 0 | 259 | | if (readByte < 0) { |
| 0 | 260 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | 261 | | } |
| | 262 | |
|
| 0 | 263 | | headCRC.Update(readByte); |
| | 264 | | } |
| | 265 | |
|
| | 266 | | // 10. Read header CRC |
| 0 | 267 | | if ((flags & GZipConstants.FHCRC) != 0) { |
| | 268 | | int tempByte; |
| 0 | 269 | | int crcval = inputBuffer.ReadLeByte(); |
| 0 | 270 | | if (crcval < 0) { |
| 0 | 271 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | 272 | | } |
| | 273 | |
|
| 0 | 274 | | tempByte = inputBuffer.ReadLeByte(); |
| 0 | 275 | | if (tempByte < 0) { |
| 0 | 276 | | throw new EndOfStreamException("EOS reading GZIP header"); |
| | 277 | | } |
| | 278 | |
|
| 0 | 279 | | crcval = (crcval << 8) | tempByte; |
| 0 | 280 | | if (crcval != ((int)headCRC.Value & 0xffff)) { |
| 0 | 281 | | throw new GZipException("Header CRC value mismatch"); |
| | 282 | | } |
| | 283 | | } |
| | 284 | |
|
| 0 | 285 | | readGZIPHeader = true; |
| 0 | 286 | | return true; |
| | 287 | | } |
| | 288 | |
|
| | 289 | | void ReadFooter() |
| | 290 | | { |
| 0 | 291 | | byte[] footer = new byte[8]; |
| | 292 | |
|
| | 293 | | // End of stream; reclaim all bytes from inf, read the final byte count, and reset the inflator |
| 0 | 294 | | long bytesRead = inf.TotalOut & 0xffffffff; |
| 0 | 295 | | inputBuffer.Available += inf.RemainingInput; |
| 0 | 296 | | inf.Reset(); |
| | 297 | |
|
| | 298 | | // Read footer from inputBuffer |
| 0 | 299 | | int needed = 8; |
| 0 | 300 | | while (needed > 0) { |
| 0 | 301 | | int count = inputBuffer.ReadClearTextBuffer(footer, 8 - needed, needed); |
| 0 | 302 | | if (count <= 0) { |
| 0 | 303 | | throw new EndOfStreamException("EOS reading GZIP footer"); |
| | 304 | | } |
| 0 | 305 | | needed -= count; // Jewel Jan 16 |
| | 306 | | } |
| | 307 | |
|
| | 308 | | // Calculate CRC |
| 0 | 309 | | int crcval = (footer[0] & 0xff) | ((footer[1] & 0xff) << 8) | ((footer[2] & 0xff) << 16) | (footer[3] << 24); |
| 0 | 310 | | if (crcval != (int)crc.Value) { |
| 0 | 311 | | throw new GZipException("GZIP crc sum mismatch, theirs \"" + crcval + "\" and ours \"" + (int)crc.Value); |
| | 312 | | } |
| | 313 | |
|
| | 314 | | // NOTE The total here is the original total modulo 2 ^ 32. |
| 0 | 315 | | uint total = |
| 0 | 316 | | (uint)((uint)footer[4] & 0xff) | |
| 0 | 317 | | (uint)(((uint)footer[5] & 0xff) << 8) | |
| 0 | 318 | | (uint)(((uint)footer[6] & 0xff) << 16) | |
| 0 | 319 | | (uint)((uint)footer[7] << 24); |
| | 320 | |
|
| 0 | 321 | | if (bytesRead != total) { |
| 0 | 322 | | throw new GZipException("Number of bytes mismatch in footer"); |
| | 323 | | } |
| | 324 | |
|
| | 325 | | // Mark header read as false so if another header exists, we'll continue reading through the file |
| 0 | 326 | | readGZIPHeader = false; |
| 0 | 327 | | } |
| | 328 | | #endregion |
| | 329 | | } |
| | 330 | | } |